[llvm-branch-commits] [llvm] DAG: Avoid stack usage in bitcast operand promotion to legal vector (PR #125637)
Matt Arsenault via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Mon Feb 3 21:51:41 PST 2025
https://github.com/arsenm created https://github.com/llvm/llvm-project/pull/125637
Fix introducing stack usage if a bitcast source operand is an illegal
integer type cast to a legal vector type. This should cover more
situations, but this is the first one I noticed.
>From 2d1418b0c78e1d1051caceadf35d8edbe70963d9 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Tue, 4 Feb 2025 11:41:53 +0700
Subject: [PATCH] DAG: Avoid stack usage in bitcast operand promotion to legal
vector
Fix introducing stack usage if a bitcast source operand is an illegal
integer type cast to a legal vector type. This should cover more
situations, but this is the first one I noticed.
---
.../SelectionDAG/LegalizeIntegerTypes.cpp | 35 +-
.../CodeGen/AMDGPU/bitcast_vector_bigint.ll | 160 --
...ffer-fat-pointers-contents-legalization.ll | 9 -
llvm/test/CodeGen/AMDGPU/ctpop16.ll | 328 +----
llvm/test/CodeGen/AMDGPU/kernel-args.ll | 733 ++--------
llvm/test/CodeGen/AMDGPU/load-constant-i16.ll | 40 +-
llvm/test/CodeGen/AMDGPU/load-constant-i8.ll | 1300 +++--------------
llvm/test/CodeGen/AMDGPU/load-global-i16.ll | 79 +-
llvm/test/CodeGen/AMDGPU/load-global-i8.ll | 48 +-
llvm/test/CodeGen/AMDGPU/min.ll | 306 +---
llvm/test/CodeGen/AMDGPU/shl.ll | 59 +-
llvm/test/CodeGen/AMDGPU/sra.ll | 67 +-
12 files changed, 574 insertions(+), 2590 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index 95fb8b406e51bfe..eb0c5faa7fe1eb1 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -2202,9 +2202,42 @@ SDValue DAGTypeLegalizer::PromoteIntOp_ATOMIC_STORE(AtomicSDNode *N) {
}
SDValue DAGTypeLegalizer::PromoteIntOp_BITCAST(SDNode *N) {
+ EVT OutVT = N->getValueType(0);
+ SDValue InOp = N->getOperand(0);
+ EVT InVT = InOp.getValueType();
+ EVT NInVT = TLI.getTypeToTransformTo(*DAG.getContext(), InVT);
+ SDLoc dl(N);
+
+ switch (getTypeAction(InVT)) {
+ case TargetLowering::TypePromoteInteger: {
+ if (OutVT.isVector()) {
+ EVT EltVT = OutVT.getVectorElementType();
+ TypeSize EltSize = EltVT.getSizeInBits();
+ TypeSize NInSize = NInVT.getSizeInBits();
+
+ if (NInSize.hasKnownScalarFactor(EltSize)) {
+ unsigned NumEltsWithPadding = NInSize.getKnownScalarFactor(EltSize);
+ EVT WideVecVT =
+ EVT::getVectorVT(*DAG.getContext(), EltVT, NumEltsWithPadding);
+
+ if (isTypeLegal(WideVecVT)) {
+ SDValue Promoted = GetPromotedInteger(InOp);
+ SDValue Cast = DAG.getNode(ISD::BITCAST, dl, WideVecVT, Promoted);
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OutVT, Cast,
+ DAG.getVectorIdxConstant(0, dl));
+ }
+ }
+ }
+
+ break;
+ }
+ default:
+ break;
+ }
+
// This should only occur in unusual situations like bitcasting to an
// x86_fp80, so just turn it into a store+load
- return CreateStackStoreLoad(N->getOperand(0), N->getValueType(0));
+ return CreateStackStoreLoad(InOp, OutVT);
}
SDValue DAGTypeLegalizer::PromoteIntOp_BR_CC(SDNode *N, unsigned OpNo) {
diff --git a/llvm/test/CodeGen/AMDGPU/bitcast_vector_bigint.ll b/llvm/test/CodeGen/AMDGPU/bitcast_vector_bigint.ll
index ab89bb293f6e6ec..2c6aabec7633069 100644
--- a/llvm/test/CodeGen/AMDGPU/bitcast_vector_bigint.ll
+++ b/llvm/test/CodeGen/AMDGPU/bitcast_vector_bigint.ll
@@ -80,15 +80,6 @@ define <5 x i32> @bitcast_i160_to_v5i32(i160 %int) {
; GFX9-LABEL: bitcast_i160_to_v5i32:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: s_mov_b32 s4, s33
-; GFX9-NEXT: s_add_i32 s33, s32, 0x7c0
-; GFX9-NEXT: s_and_b32 s33, s33, 0xfffff800
-; GFX9-NEXT: s_mov_b32 s5, s34
-; GFX9-NEXT: s_mov_b32 s34, s32
-; GFX9-NEXT: s_addk_i32 s32, 0x1000
-; GFX9-NEXT: s_mov_b32 s32, s34
-; GFX9-NEXT: s_mov_b32 s34, s5
-; GFX9-NEXT: s_mov_b32 s33, s4
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: bitcast_i160_to_v5i32:
@@ -98,23 +89,6 @@ define <5 x i32> @bitcast_i160_to_v5i32(i160 %int) {
; GFX12-NEXT: s_wait_samplecnt 0x0
; GFX12-NEXT: s_wait_bvhcnt 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
-; GFX12-NEXT: s_mov_b32 s0, s33
-; GFX12-NEXT: s_add_co_i32 s33, s32, 31
-; GFX12-NEXT: s_mov_b32 s1, s34
-; GFX12-NEXT: s_wait_alu 0xfffe
-; GFX12-NEXT: s_and_not1_b32 s33, s33, 31
-; GFX12-NEXT: s_clause 0x1
-; GFX12-NEXT: scratch_store_b64 off, v[2:3], s33 offset:8
-; GFX12-NEXT: scratch_store_b64 off, v[0:1], s33
-; GFX12-NEXT: scratch_load_b128 v[0:3], off, s33
-; GFX12-NEXT: s_mov_b32 s34, s32
-; GFX12-NEXT: s_add_co_i32 s32, s32, 64
-; GFX12-NEXT: s_wait_alu 0xfffe
-; GFX12-NEXT: s_mov_b32 s32, s34
-; GFX12-NEXT: s_mov_b32 s34, s1
-; GFX12-NEXT: s_mov_b32 s33, s0
-; GFX12-NEXT: s_wait_loadcnt 0x0
-; GFX12-NEXT: s_wait_alu 0xfffe
; GFX12-NEXT: s_setpc_b64 s[30:31]
%bitcast = bitcast i160 %int to <5 x i32>
ret <5 x i32> %bitcast
@@ -124,15 +98,6 @@ define <6 x i32> @bitcast_i192_to_v6i32(i192 %int) {
; GFX9-LABEL: bitcast_i192_to_v6i32:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: s_mov_b32 s4, s33
-; GFX9-NEXT: s_add_i32 s33, s32, 0x7c0
-; GFX9-NEXT: s_and_b32 s33, s33, 0xfffff800
-; GFX9-NEXT: s_mov_b32 s5, s34
-; GFX9-NEXT: s_mov_b32 s34, s32
-; GFX9-NEXT: s_addk_i32 s32, 0x1000
-; GFX9-NEXT: s_mov_b32 s32, s34
-; GFX9-NEXT: s_mov_b32 s34, s5
-; GFX9-NEXT: s_mov_b32 s33, s4
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: bitcast_i192_to_v6i32:
@@ -142,23 +107,6 @@ define <6 x i32> @bitcast_i192_to_v6i32(i192 %int) {
; GFX12-NEXT: s_wait_samplecnt 0x0
; GFX12-NEXT: s_wait_bvhcnt 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
-; GFX12-NEXT: s_mov_b32 s0, s33
-; GFX12-NEXT: s_add_co_i32 s33, s32, 31
-; GFX12-NEXT: s_mov_b32 s1, s34
-; GFX12-NEXT: s_wait_alu 0xfffe
-; GFX12-NEXT: s_and_not1_b32 s33, s33, 31
-; GFX12-NEXT: s_clause 0x1
-; GFX12-NEXT: scratch_store_b64 off, v[2:3], s33 offset:8
-; GFX12-NEXT: scratch_store_b64 off, v[0:1], s33
-; GFX12-NEXT: scratch_load_b128 v[0:3], off, s33
-; GFX12-NEXT: s_mov_b32 s34, s32
-; GFX12-NEXT: s_add_co_i32 s32, s32, 64
-; GFX12-NEXT: s_wait_alu 0xfffe
-; GFX12-NEXT: s_mov_b32 s32, s34
-; GFX12-NEXT: s_mov_b32 s34, s1
-; GFX12-NEXT: s_mov_b32 s33, s0
-; GFX12-NEXT: s_wait_loadcnt 0x0
-; GFX12-NEXT: s_wait_alu 0xfffe
; GFX12-NEXT: s_setpc_b64 s[30:31]
%bitcast = bitcast i192 %int to <6 x i32>
ret <6 x i32> %bitcast
@@ -168,15 +116,6 @@ define <7 x i32> @bitcast_i224_to_v7i32(i224 %int) {
; GFX9-LABEL: bitcast_i224_to_v7i32:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: s_mov_b32 s4, s33
-; GFX9-NEXT: s_add_i32 s33, s32, 0x7c0
-; GFX9-NEXT: s_and_b32 s33, s33, 0xfffff800
-; GFX9-NEXT: s_mov_b32 s5, s34
-; GFX9-NEXT: s_mov_b32 s34, s32
-; GFX9-NEXT: s_addk_i32 s32, 0x1000
-; GFX9-NEXT: s_mov_b32 s32, s34
-; GFX9-NEXT: s_mov_b32 s34, s5
-; GFX9-NEXT: s_mov_b32 s33, s4
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: bitcast_i224_to_v7i32:
@@ -186,27 +125,6 @@ define <7 x i32> @bitcast_i224_to_v7i32(i224 %int) {
; GFX12-NEXT: s_wait_samplecnt 0x0
; GFX12-NEXT: s_wait_bvhcnt 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
-; GFX12-NEXT: s_mov_b32 s0, s33
-; GFX12-NEXT: s_add_co_i32 s33, s32, 31
-; GFX12-NEXT: s_mov_b32 s1, s34
-; GFX12-NEXT: s_wait_alu 0xfffe
-; GFX12-NEXT: s_and_not1_b32 s33, s33, 31
-; GFX12-NEXT: s_clause 0x1
-; GFX12-NEXT: scratch_store_b64 off, v[2:3], s33 offset:8
-; GFX12-NEXT: scratch_store_b64 off, v[0:1], s33
-; GFX12-NEXT: scratch_load_b128 v[0:3], off, s33
-; GFX12-NEXT: s_clause 0x1
-; GFX12-NEXT: scratch_store_b32 off, v6, s33 offset:24
-; GFX12-NEXT: scratch_store_b64 off, v[4:5], s33 offset:16
-; GFX12-NEXT: scratch_load_b96 v[4:6], off, s33 offset:16
-; GFX12-NEXT: s_mov_b32 s34, s32
-; GFX12-NEXT: s_add_co_i32 s32, s32, 64
-; GFX12-NEXT: s_wait_alu 0xfffe
-; GFX12-NEXT: s_mov_b32 s32, s34
-; GFX12-NEXT: s_mov_b32 s34, s1
-; GFX12-NEXT: s_mov_b32 s33, s0
-; GFX12-NEXT: s_wait_loadcnt 0x0
-; GFX12-NEXT: s_wait_alu 0xfffe
; GFX12-NEXT: s_setpc_b64 s[30:31]
%bitcast = bitcast i224 %int to <7 x i32>
ret <7 x i32> %bitcast
@@ -252,15 +170,6 @@ define <3 x i64> @bitcast_i192_to_v3i64(i192 %int) {
; GFX9-LABEL: bitcast_i192_to_v3i64:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: s_mov_b32 s4, s33
-; GFX9-NEXT: s_add_i32 s33, s32, 0x7c0
-; GFX9-NEXT: s_and_b32 s33, s33, 0xfffff800
-; GFX9-NEXT: s_mov_b32 s5, s34
-; GFX9-NEXT: s_mov_b32 s34, s32
-; GFX9-NEXT: s_addk_i32 s32, 0x1000
-; GFX9-NEXT: s_mov_b32 s32, s34
-; GFX9-NEXT: s_mov_b32 s34, s5
-; GFX9-NEXT: s_mov_b32 s33, s4
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: bitcast_i192_to_v3i64:
@@ -270,23 +179,6 @@ define <3 x i64> @bitcast_i192_to_v3i64(i192 %int) {
; GFX12-NEXT: s_wait_samplecnt 0x0
; GFX12-NEXT: s_wait_bvhcnt 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
-; GFX12-NEXT: s_mov_b32 s0, s33
-; GFX12-NEXT: s_add_co_i32 s33, s32, 31
-; GFX12-NEXT: s_mov_b32 s1, s34
-; GFX12-NEXT: s_wait_alu 0xfffe
-; GFX12-NEXT: s_and_not1_b32 s33, s33, 31
-; GFX12-NEXT: s_clause 0x1
-; GFX12-NEXT: scratch_store_b64 off, v[2:3], s33 offset:8
-; GFX12-NEXT: scratch_store_b64 off, v[0:1], s33
-; GFX12-NEXT: scratch_load_b128 v[0:3], off, s33
-; GFX12-NEXT: s_mov_b32 s34, s32
-; GFX12-NEXT: s_add_co_i32 s32, s32, 64
-; GFX12-NEXT: s_wait_alu 0xfffe
-; GFX12-NEXT: s_mov_b32 s32, s34
-; GFX12-NEXT: s_mov_b32 s34, s1
-; GFX12-NEXT: s_mov_b32 s33, s0
-; GFX12-NEXT: s_wait_loadcnt 0x0
-; GFX12-NEXT: s_wait_alu 0xfffe
; GFX12-NEXT: s_setpc_b64 s[30:31]
%bitcast = bitcast i192 %int to <3 x i64>
ret <3 x i64> %bitcast
@@ -408,15 +300,6 @@ define <5 x float> @bitcast_i160_to_v5f32(i160 %int) {
; GFX9-LABEL: bitcast_i160_to_v5f32:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: s_mov_b32 s4, s33
-; GFX9-NEXT: s_add_i32 s33, s32, 0x7c0
-; GFX9-NEXT: s_and_b32 s33, s33, 0xfffff800
-; GFX9-NEXT: s_mov_b32 s5, s34
-; GFX9-NEXT: s_mov_b32 s34, s32
-; GFX9-NEXT: s_addk_i32 s32, 0x1000
-; GFX9-NEXT: s_mov_b32 s32, s34
-; GFX9-NEXT: s_mov_b32 s34, s5
-; GFX9-NEXT: s_mov_b32 s33, s4
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: bitcast_i160_to_v5f32:
@@ -426,23 +309,6 @@ define <5 x float> @bitcast_i160_to_v5f32(i160 %int) {
; GFX12-NEXT: s_wait_samplecnt 0x0
; GFX12-NEXT: s_wait_bvhcnt 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
-; GFX12-NEXT: s_mov_b32 s0, s33
-; GFX12-NEXT: s_add_co_i32 s33, s32, 31
-; GFX12-NEXT: s_mov_b32 s1, s34
-; GFX12-NEXT: s_wait_alu 0xfffe
-; GFX12-NEXT: s_and_not1_b32 s33, s33, 31
-; GFX12-NEXT: s_clause 0x1
-; GFX12-NEXT: scratch_store_b64 off, v[2:3], s33 offset:8
-; GFX12-NEXT: scratch_store_b64 off, v[0:1], s33
-; GFX12-NEXT: scratch_load_b128 v[0:3], off, s33
-; GFX12-NEXT: s_mov_b32 s34, s32
-; GFX12-NEXT: s_add_co_i32 s32, s32, 64
-; GFX12-NEXT: s_wait_alu 0xfffe
-; GFX12-NEXT: s_mov_b32 s32, s34
-; GFX12-NEXT: s_mov_b32 s34, s1
-; GFX12-NEXT: s_mov_b32 s33, s0
-; GFX12-NEXT: s_wait_loadcnt 0x0
-; GFX12-NEXT: s_wait_alu 0xfffe
; GFX12-NEXT: s_setpc_b64 s[30:31]
%bitcast = bitcast i160 %int to <5 x float>
ret <5 x float> %bitcast
@@ -452,15 +318,6 @@ define <6 x float> @bitcast_i192_to_v6f32(i192 %int) {
; GFX9-LABEL: bitcast_i192_to_v6f32:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: s_mov_b32 s4, s33
-; GFX9-NEXT: s_add_i32 s33, s32, 0x7c0
-; GFX9-NEXT: s_and_b32 s33, s33, 0xfffff800
-; GFX9-NEXT: s_mov_b32 s5, s34
-; GFX9-NEXT: s_mov_b32 s34, s32
-; GFX9-NEXT: s_addk_i32 s32, 0x1000
-; GFX9-NEXT: s_mov_b32 s32, s34
-; GFX9-NEXT: s_mov_b32 s34, s5
-; GFX9-NEXT: s_mov_b32 s33, s4
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: bitcast_i192_to_v6f32:
@@ -470,23 +327,6 @@ define <6 x float> @bitcast_i192_to_v6f32(i192 %int) {
; GFX12-NEXT: s_wait_samplecnt 0x0
; GFX12-NEXT: s_wait_bvhcnt 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
-; GFX12-NEXT: s_mov_b32 s0, s33
-; GFX12-NEXT: s_add_co_i32 s33, s32, 31
-; GFX12-NEXT: s_mov_b32 s1, s34
-; GFX12-NEXT: s_wait_alu 0xfffe
-; GFX12-NEXT: s_and_not1_b32 s33, s33, 31
-; GFX12-NEXT: s_clause 0x1
-; GFX12-NEXT: scratch_store_b64 off, v[2:3], s33 offset:8
-; GFX12-NEXT: scratch_store_b64 off, v[0:1], s33
-; GFX12-NEXT: scratch_load_b128 v[0:3], off, s33
-; GFX12-NEXT: s_mov_b32 s34, s32
-; GFX12-NEXT: s_add_co_i32 s32, s32, 64
-; GFX12-NEXT: s_wait_alu 0xfffe
-; GFX12-NEXT: s_mov_b32 s32, s34
-; GFX12-NEXT: s_mov_b32 s34, s1
-; GFX12-NEXT: s_mov_b32 s33, s0
-; GFX12-NEXT: s_wait_loadcnt 0x0
-; GFX12-NEXT: s_wait_alu 0xfffe
; GFX12-NEXT: s_setpc_b64 s[30:31]
%bitcast = bitcast i192 %int to <6 x float>
ret <6 x float> %bitcast
diff --git a/llvm/test/CodeGen/AMDGPU/buffer-fat-pointers-contents-legalization.ll b/llvm/test/CodeGen/AMDGPU/buffer-fat-pointers-contents-legalization.ll
index 5f49e69a58ed872..405058b24dcc21e 100644
--- a/llvm/test/CodeGen/AMDGPU/buffer-fat-pointers-contents-legalization.ll
+++ b/llvm/test/CodeGen/AMDGPU/buffer-fat-pointers-contents-legalization.ll
@@ -3110,17 +3110,8 @@ define void @store_i160(i160 %data, ptr addrspace(8) inreg %buf) {
; SDAG-LABEL: store_i160:
; SDAG: ; %bb.0:
; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SDAG-NEXT: s_mov_b32 s4, s33
-; SDAG-NEXT: s_add_i32 s33, s32, 0x7c0
-; SDAG-NEXT: s_and_b32 s33, s33, 0xfffff800
-; SDAG-NEXT: s_mov_b32 s5, s34
-; SDAG-NEXT: s_mov_b32 s34, s32
-; SDAG-NEXT: s_addk_i32 s32, 0x1000
; SDAG-NEXT: buffer_store_dwordx4 v[0:3], off, s[16:19], 0
; SDAG-NEXT: buffer_store_dword v4, off, s[16:19], 0 offset:16
-; SDAG-NEXT: s_mov_b32 s32, s34
-; SDAG-NEXT: s_mov_b32 s34, s5
-; SDAG-NEXT: s_mov_b32 s33, s4
; SDAG-NEXT: s_waitcnt vmcnt(0)
; SDAG-NEXT: s_setpc_b64 s[30:31]
;
diff --git a/llvm/test/CodeGen/AMDGPU/ctpop16.ll b/llvm/test/CodeGen/AMDGPU/ctpop16.ll
index 17ab8fc780fb41d..6bf126af5ade237 100644
--- a/llvm/test/CodeGen/AMDGPU/ctpop16.ll
+++ b/llvm/test/CodeGen/AMDGPU/ctpop16.ll
@@ -457,58 +457,27 @@ define amdgpu_kernel void @v_ctpop_v4i16(ptr addrspace(1) noalias %out, ptr addr
;
; EG-LABEL: v_ctpop_v4i16:
; EG: ; %bb.0:
-; EG-NEXT: ALU 3, @8, KC0[CB0:0-32], KC1[]
+; EG-NEXT: ALU 2, @8, KC0[CB0:0-32], KC1[]
; EG-NEXT: TEX 0 @6
-; EG-NEXT: ALU 37, @12, KC0[CB0:0-32], KC1[]
-; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T8.XY, T0.X, 1
+; EG-NEXT: ALU 7, @11, KC0[CB0:0-32], KC1[]
+; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T6.X, 1
; EG-NEXT: CF_END
; EG-NEXT: PAD
; EG-NEXT: Fetch clause starting at 6:
-; EG-NEXT: VTX_READ_64 T8.XY, T0.X, 0, #1
+; EG-NEXT: VTX_READ_32 T0.X, T0.X, 0, #1
; EG-NEXT: ALU clause starting at 8:
-; EG-NEXT: MOV T0.Y, T4.X,
-; EG-NEXT: LSHL * T0.W, T0.X, literal.x, BS:VEC_120/SCL_212
+; EG-NEXT: LSHL * T0.W, T0.X, literal.x,
; EG-NEXT: 3(4.203895e-45), 0(0.000000e+00)
; EG-NEXT: ADD_INT * T0.X, KC0[2].Z, PV.W,
-; EG-NEXT: ALU clause starting at 12:
-; EG-NEXT: AND_INT * T0.W, T8.X, literal.x,
-; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
-; EG-NEXT: BCNT_INT T0.W, PV.W,
-; EG-NEXT: AND_INT * T1.W, T0.Y, literal.x,
-; EG-NEXT: -65536(nan), 0(0.000000e+00)
-; EG-NEXT: OR_INT * T0.W, PS, PV.W,
-; EG-NEXT: MOV * T4.X, PV.W,
-; EG-NEXT: MOV T0.X, PV.X,
-; EG-NEXT: LSHR * T0.W, T8.X, literal.x,
-; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
-; EG-NEXT: BCNT_INT T0.W, PV.W,
-; EG-NEXT: AND_INT * T1.W, PV.X, literal.x,
-; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
-; EG-NEXT: LSHL * T0.W, PV.W, literal.x,
-; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
-; EG-NEXT: OR_INT * T0.W, T1.W, PV.W,
-; EG-NEXT: MOV T4.X, PV.W,
-; EG-NEXT: MOV * T0.X, T5.X,
-; EG-NEXT: AND_INT * T0.W, T8.Y, literal.x,
-; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
-; EG-NEXT: BCNT_INT T0.W, PV.W,
-; EG-NEXT: AND_INT * T1.W, T0.X, literal.x,
-; EG-NEXT: -65536(nan), 0(0.000000e+00)
-; EG-NEXT: OR_INT * T0.W, PS, PV.W,
-; EG-NEXT: MOV * T5.X, PV.W,
-; EG-NEXT: MOV T0.X, PV.X,
-; EG-NEXT: LSHR * T0.W, T8.Y, literal.x,
+; EG-NEXT: ALU clause starting at 11:
+; EG-NEXT: LSHR * T0.W, T0.X, literal.x,
; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
-; EG-NEXT: BCNT_INT T0.W, PV.W,
-; EG-NEXT: AND_INT * T1.W, PV.X, literal.x,
+; EG-NEXT: BCNT_INT T0.Y, PV.W,
+; EG-NEXT: AND_INT * T0.W, T0.X, literal.x,
; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
-; EG-NEXT: LSHL * T0.W, PV.W, literal.x,
-; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
-; EG-NEXT: LSHR T0.X, KC0[2].Y, literal.x,
-; EG-NEXT: OR_INT * T8.Y, T1.W, PV.W,
+; EG-NEXT: BCNT_INT T0.X, PV.W,
+; EG-NEXT: LSHR * T6.X, KC0[2].Y, literal.x,
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
-; EG-NEXT: MOV T5.X, PV.Y,
-; EG-NEXT: MOV * T8.X, T4.X,
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%in.gep = getelementptr <4 x i16>, ptr addrspace(1) %in, i32 %tid
%val = load <4 x i16>, ptr addrspace(1) %in.gep, align 16
@@ -601,94 +570,33 @@ define amdgpu_kernel void @v_ctpop_v8i16(ptr addrspace(1) noalias %out, ptr addr
;
; EG-LABEL: v_ctpop_v8i16:
; EG: ; %bb.0:
-; EG-NEXT: ALU 3, @8, KC0[CB0:0-32], KC1[]
+; EG-NEXT: ALU 2, @8, KC0[CB0:0-32], KC1[]
; EG-NEXT: TEX 0 @6
-; EG-NEXT: ALU 73, @12, KC0[CB0:0-32], KC1[]
-; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XYZW, T12.X, 1
+; EG-NEXT: ALU 13, @11, KC0[CB0:0-32], KC1[]
+; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XYZW, T8.X, 1
; EG-NEXT: CF_END
; EG-NEXT: PAD
; EG-NEXT: Fetch clause starting at 6:
-; EG-NEXT: VTX_READ_128 T12.XYZW, T0.X, 0, #1
+; EG-NEXT: VTX_READ_128 T0.XYZW, T0.X, 0, #1
; EG-NEXT: ALU clause starting at 8:
-; EG-NEXT: MOV T0.Y, T4.X,
-; EG-NEXT: LSHL * T0.W, T0.X, literal.x, BS:VEC_120/SCL_212
+; EG-NEXT: LSHL * T0.W, T0.X, literal.x,
; EG-NEXT: 4(5.605194e-45), 0(0.000000e+00)
; EG-NEXT: ADD_INT * T0.X, KC0[2].Z, PV.W,
-; EG-NEXT: ALU clause starting at 12:
-; EG-NEXT: LSHR * T0.W, T12.X, literal.x,
-; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
-; EG-NEXT: BCNT_INT * T0.W, PV.W,
-; EG-NEXT: LSHL T0.W, PV.W, literal.x,
-; EG-NEXT: AND_INT * T1.W, T0.Y, literal.y,
-; EG-NEXT: 16(2.242078e-44), 65535(9.183409e-41)
-; EG-NEXT: OR_INT * T0.W, PS, PV.W,
-; EG-NEXT: MOV * T4.X, PV.W,
-; EG-NEXT: MOV T0.X, PV.X,
-; EG-NEXT: AND_INT * T0.W, T12.X, literal.x,
-; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
-; EG-NEXT: BCNT_INT T0.W, PV.W,
-; EG-NEXT: AND_INT * T1.W, PV.X, literal.x,
-; EG-NEXT: -65536(nan), 0(0.000000e+00)
-; EG-NEXT: OR_INT * T0.W, PS, PV.W,
-; EG-NEXT: MOV T4.X, PV.W,
-; EG-NEXT: MOV * T0.X, T5.X,
-; EG-NEXT: LSHR * T0.W, T12.Y, literal.x,
-; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
-; EG-NEXT: BCNT_INT T0.W, PV.W,
-; EG-NEXT: AND_INT * T1.W, T0.X, literal.x,
-; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
-; EG-NEXT: LSHL * T0.W, PV.W, literal.x,
-; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
-; EG-NEXT: OR_INT * T0.W, T1.W, PV.W,
-; EG-NEXT: MOV * T5.X, PV.W,
-; EG-NEXT: MOV T0.X, PV.X,
-; EG-NEXT: AND_INT * T0.W, T12.Y, literal.x,
-; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
-; EG-NEXT: BCNT_INT T0.W, PV.W,
-; EG-NEXT: AND_INT * T1.W, PV.X, literal.x,
-; EG-NEXT: -65536(nan), 0(0.000000e+00)
-; EG-NEXT: OR_INT * T0.Y, PS, PV.W,
-; EG-NEXT: MOV T5.X, PV.Y,
-; EG-NEXT: MOV * T0.X, T8.X,
-; EG-NEXT: LSHR * T0.W, T12.Z, literal.x,
+; EG-NEXT: ALU clause starting at 11:
+; EG-NEXT: LSHR * T0.W, T0.Z, literal.x,
; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
; EG-NEXT: BCNT_INT T0.W, PV.W,
-; EG-NEXT: AND_INT * T1.W, T0.X, literal.x,
-; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
-; EG-NEXT: LSHL * T0.W, PV.W, literal.x,
-; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
-; EG-NEXT: OR_INT * T0.W, T1.W, PV.W,
-; EG-NEXT: MOV * T8.X, PV.W,
-; EG-NEXT: MOV T0.X, PV.X,
-; EG-NEXT: AND_INT * T0.W, T12.Z, literal.x,
+; EG-NEXT: AND_INT * T1.W, T0.Z, literal.x,
; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
-; EG-NEXT: BCNT_INT T0.W, PV.W,
-; EG-NEXT: AND_INT * T1.W, PV.X, literal.x,
-; EG-NEXT: -65536(nan), 0(0.000000e+00)
-; EG-NEXT: OR_INT * T0.W, PS, PV.W,
-; EG-NEXT: MOV T8.X, PV.W,
-; EG-NEXT: MOV * T0.X, T9.X,
-; EG-NEXT: LSHR * T0.W, T12.W, literal.x,
+; EG-NEXT: BCNT_INT T0.Z, PS,
+; EG-NEXT: LSHR * T1.W, T0.X, literal.x,
; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
-; EG-NEXT: BCNT_INT T0.W, PV.W,
+; EG-NEXT: BCNT_INT T0.Y, PV.W,
; EG-NEXT: AND_INT * T1.W, T0.X, literal.x,
; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
-; EG-NEXT: LSHL * T0.W, PV.W, literal.x,
-; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
-; EG-NEXT: OR_INT * T0.W, T1.W, PV.W,
-; EG-NEXT: MOV * T9.X, PV.W,
-; EG-NEXT: MOV T0.X, PV.X,
-; EG-NEXT: AND_INT * T0.W, T12.W, literal.x,
-; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
-; EG-NEXT: BCNT_INT T0.W, PV.W,
-; EG-NEXT: AND_INT * T1.W, PV.X, literal.x,
-; EG-NEXT: -65536(nan), 0(0.000000e+00)
-; EG-NEXT: LSHR T12.X, KC0[2].Y, literal.x,
-; EG-NEXT: OR_INT * T0.W, PS, PV.W,
+; EG-NEXT: BCNT_INT T0.X, PV.W,
+; EG-NEXT: LSHR * T8.X, KC0[2].Y, literal.x,
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
-; EG-NEXT: MOV T9.X, PV.W,
-; EG-NEXT: MOV * T0.X, T4.X,
-; EG-NEXT: MOV * T0.Z, T8.X,
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%in.gep = getelementptr <8 x i16>, ptr addrspace(1) %in, i32 %tid
%val = load <8 x i16>, ptr addrspace(1) %in.gep, align 32
@@ -837,174 +745,46 @@ define amdgpu_kernel void @v_ctpop_v16i16(ptr addrspace(1) noalias %out, ptr add
;
; EG-LABEL: v_ctpop_v16i16:
; EG: ; %bb.0:
-; EG-NEXT: ALU 3, @12, KC0[CB0:0-32], KC1[]
-; EG-NEXT: TEX 1 @8
-; EG-NEXT: ALU 114, @16, KC0[], KC1[]
-; EG-NEXT: ALU 34, @131, KC0[CB0:0-32], KC1[]
-; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XYZW, T22.X, 0
-; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T20.XYZW, T21.X, 1
+; EG-NEXT: ALU 2, @10, KC0[CB0:0-32], KC1[]
+; EG-NEXT: TEX 1 @6
+; EG-NEXT: ALU 25, @13, KC0[CB0:0-32], KC1[]
+; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XYZW, T14.X, 0
+; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T12.XYZW, T13.X, 1
; EG-NEXT: CF_END
-; EG-NEXT: PAD
-; EG-NEXT: Fetch clause starting at 8:
-; EG-NEXT: VTX_READ_128 T20.XYZW, T0.X, 16, #1
-; EG-NEXT: VTX_READ_128 T21.XYZW, T0.X, 0, #1
-; EG-NEXT: ALU clause starting at 12:
-; EG-NEXT: MOV T0.Y, T4.X,
-; EG-NEXT: LSHL * T0.W, T0.X, literal.x, BS:VEC_120/SCL_212
+; EG-NEXT: Fetch clause starting at 6:
+; EG-NEXT: VTX_READ_128 T12.XYZW, T0.X, 16, #1
+; EG-NEXT: VTX_READ_128 T0.XYZW, T0.X, 0, #1
+; EG-NEXT: ALU clause starting at 10:
+; EG-NEXT: LSHL * T0.W, T0.X, literal.x,
; EG-NEXT: 5(7.006492e-45), 0(0.000000e+00)
; EG-NEXT: ADD_INT * T0.X, KC0[2].Z, PV.W,
-; EG-NEXT: ALU clause starting at 16:
-; EG-NEXT: LSHR * T0.W, T20.X, literal.x,
-; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
-; EG-NEXT: BCNT_INT * T0.W, PV.W,
-; EG-NEXT: LSHL T0.W, PV.W, literal.x,
-; EG-NEXT: AND_INT * T1.W, T0.Y, literal.y,
-; EG-NEXT: 16(2.242078e-44), 65535(9.183409e-41)
-; EG-NEXT: OR_INT * T0.W, PS, PV.W,
-; EG-NEXT: MOV * T4.X, PV.W,
-; EG-NEXT: MOV T0.X, PV.X,
-; EG-NEXT: AND_INT * T0.W, T20.X, literal.x,
-; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
-; EG-NEXT: BCNT_INT T0.W, PV.W,
-; EG-NEXT: AND_INT * T1.W, PV.X, literal.x,
-; EG-NEXT: -65536(nan), 0(0.000000e+00)
-; EG-NEXT: OR_INT * T0.W, PS, PV.W,
-; EG-NEXT: MOV T4.X, PV.W,
-; EG-NEXT: MOV * T0.X, T5.X,
-; EG-NEXT: LSHR * T0.W, T20.Y, literal.x,
-; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
-; EG-NEXT: BCNT_INT T0.W, PV.W,
-; EG-NEXT: AND_INT * T1.W, T0.X, literal.x,
-; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
-; EG-NEXT: LSHL * T0.W, PV.W, literal.x,
-; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
-; EG-NEXT: OR_INT * T0.W, T1.W, PV.W,
-; EG-NEXT: MOV * T5.X, PV.W,
-; EG-NEXT: MOV T0.X, PV.X,
-; EG-NEXT: AND_INT * T0.W, T20.Y, literal.x,
-; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
-; EG-NEXT: BCNT_INT T0.W, PV.W,
-; EG-NEXT: AND_INT * T1.W, PV.X, literal.x,
-; EG-NEXT: -65536(nan), 0(0.000000e+00)
-; EG-NEXT: OR_INT * T0.Y, PS, PV.W,
-; EG-NEXT: MOV T5.X, PV.Y,
-; EG-NEXT: MOV * T0.X, T8.X,
-; EG-NEXT: LSHR * T0.W, T20.Z, literal.x,
-; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
-; EG-NEXT: BCNT_INT T0.W, PV.W,
-; EG-NEXT: AND_INT * T1.W, T0.X, literal.x,
-; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
-; EG-NEXT: LSHL * T0.W, PV.W, literal.x,
-; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
-; EG-NEXT: OR_INT * T0.W, T1.W, PV.W,
-; EG-NEXT: MOV * T8.X, PV.W,
-; EG-NEXT: MOV T0.X, PV.X,
-; EG-NEXT: AND_INT * T0.W, T20.Z, literal.x,
-; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
-; EG-NEXT: BCNT_INT T0.W, PV.W,
-; EG-NEXT: AND_INT * T1.W, PV.X, literal.x,
-; EG-NEXT: -65536(nan), 0(0.000000e+00)
-; EG-NEXT: OR_INT * T0.W, PS, PV.W,
-; EG-NEXT: MOV T8.X, PV.W,
-; EG-NEXT: MOV * T0.X, T9.X,
-; EG-NEXT: LSHR * T0.W, T20.W, literal.x,
+; EG-NEXT: ALU clause starting at 13:
+; EG-NEXT: LSHR * T0.W, T12.Z, literal.x,
; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
-; EG-NEXT: BCNT_INT T0.W, PV.W,
-; EG-NEXT: AND_INT * T1.W, T0.X, literal.x,
+; EG-NEXT: BCNT_INT T12.W, PV.W,
+; EG-NEXT: AND_INT * T0.W, T12.Z, literal.x,
; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
-; EG-NEXT: LSHL * T0.W, PV.W, literal.x,
+; EG-NEXT: BCNT_INT T12.Z, PS,
+; EG-NEXT: LSHR T0.W, T0.Z, literal.x,
+; EG-NEXT: LSHR * T1.W, T12.X, literal.x,
; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
-; EG-NEXT: OR_INT * T0.W, T1.W, PV.W,
-; EG-NEXT: MOV * T9.X, PV.W,
-; EG-NEXT: MOV T0.X, PV.X,
-; EG-NEXT: AND_INT * T0.W, T20.W, literal.x,
-; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT: BCNT_INT T12.Y, PS,
+; EG-NEXT: AND_INT T0.Z, T0.Z, literal.x,
; EG-NEXT: BCNT_INT T0.W, PV.W,
-; EG-NEXT: AND_INT * T1.W, PV.X, literal.x,
-; EG-NEXT: -65536(nan), 0(0.000000e+00)
-; EG-NEXT: OR_INT * T0.W, PS, PV.W,
-; EG-NEXT: MOV T9.X, PV.W,
-; EG-NEXT: MOV * T0.X, T12.X,
-; EG-NEXT: LSHR * T1.W, T21.X, literal.x,
-; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
-; EG-NEXT: BCNT_INT T1.W, PV.W,
-; EG-NEXT: AND_INT * T2.W, T0.X, literal.x,
-; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
-; EG-NEXT: LSHL * T1.W, PV.W, literal.x,
-; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
-; EG-NEXT: OR_INT * T1.W, T2.W, PV.W,
-; EG-NEXT: MOV * T12.X, PV.W,
-; EG-NEXT: MOV T0.X, PV.X,
-; EG-NEXT: AND_INT * T1.W, T21.X, literal.x,
-; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
-; EG-NEXT: BCNT_INT T1.W, PV.W,
-; EG-NEXT: AND_INT * T2.W, PV.X, literal.x,
-; EG-NEXT: -65536(nan), 0(0.000000e+00)
-; EG-NEXT: OR_INT * T1.W, PS, PV.W,
-; EG-NEXT: MOV T12.X, PV.W,
-; EG-NEXT: MOV * T0.X, T13.X,
-; EG-NEXT: LSHR * T1.W, T21.Y, literal.x,
-; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
-; EG-NEXT: BCNT_INT T1.W, PV.W,
-; EG-NEXT: AND_INT * T2.W, T0.X, literal.x,
-; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
-; EG-NEXT: LSHL * T1.W, PV.W, literal.x,
-; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
-; EG-NEXT: OR_INT * T1.W, T2.W, PV.W,
-; EG-NEXT: MOV * T13.X, PV.W,
-; EG-NEXT: MOV T0.X, PV.X,
-; EG-NEXT: AND_INT * T1.W, T21.Y, literal.x,
-; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
-; EG-NEXT: BCNT_INT T1.W, PV.W,
-; EG-NEXT: AND_INT * T2.W, PV.X, literal.x,
-; EG-NEXT: -65536(nan), 0(0.000000e+00)
-; EG-NEXT: OR_INT * T20.Y, PS, PV.W,
-; EG-NEXT: MOV T13.X, PV.Y,
-; EG-NEXT: MOV * T0.X, T16.X,
-; EG-NEXT: LSHR * T1.W, T21.Z, literal.x,
-; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
-; EG-NEXT: BCNT_INT T1.W, PV.W,
-; EG-NEXT: AND_INT * T2.W, T0.X, literal.x,
-; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
-; EG-NEXT: LSHL * T1.W, PV.W, literal.x,
-; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
-; EG-NEXT: OR_INT * T1.W, T2.W, PV.W,
-; EG-NEXT: ALU clause starting at 131:
-; EG-NEXT: MOV * T16.X, T1.W,
-; EG-NEXT: MOV T0.X, PV.X,
-; EG-NEXT: AND_INT * T1.W, T21.Z, literal.x,
-; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
-; EG-NEXT: BCNT_INT T1.W, PV.W,
-; EG-NEXT: AND_INT * T2.W, PV.X, literal.x,
-; EG-NEXT: -65536(nan), 0(0.000000e+00)
-; EG-NEXT: OR_INT * T1.W, PS, PV.W,
-; EG-NEXT: MOV T16.X, PV.W,
-; EG-NEXT: MOV * T0.X, T17.X,
-; EG-NEXT: LSHR * T1.W, T21.W, literal.x,
-; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
-; EG-NEXT: BCNT_INT T1.W, PV.W,
-; EG-NEXT: AND_INT * T2.W, T0.X, literal.x,
+; EG-NEXT: AND_INT * T1.W, T12.X, literal.x,
; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
-; EG-NEXT: LSHL * T1.W, PV.W, literal.x,
+; EG-NEXT: BCNT_INT T12.X, PS,
+; EG-NEXT: BCNT_INT T0.Z, PV.Z,
+; EG-NEXT: LSHR T1.W, T0.X, literal.x,
+; EG-NEXT: ADD_INT * T2.W, KC0[2].Y, literal.x,
; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
-; EG-NEXT: OR_INT * T1.W, T2.W, PV.W,
-; EG-NEXT: MOV * T17.X, PV.W,
-; EG-NEXT: MOV T0.X, PV.X,
-; EG-NEXT: AND_INT T1.W, T21.W, literal.x,
-; EG-NEXT: LSHR * T21.X, KC0[2].Y, literal.y,
-; EG-NEXT: 65535(9.183409e-41), 2(2.802597e-45)
-; EG-NEXT: AND_INT T0.Z, PV.X, literal.x,
-; EG-NEXT: BCNT_INT T1.W, PV.W,
-; EG-NEXT: ADD_INT * T2.W, KC0[2].Y, literal.y,
-; EG-NEXT: -65536(nan), 16(2.242078e-44)
-; EG-NEXT: LSHR T22.X, PS, literal.x,
-; EG-NEXT: OR_INT * T20.W, PV.Z, PV.W,
+; EG-NEXT: LSHR T13.X, PS, literal.x,
+; EG-NEXT: BCNT_INT T0.Y, PV.W,
+; EG-NEXT: AND_INT * T1.W, T0.X, literal.y,
+; EG-NEXT: 2(2.802597e-45), 65535(9.183409e-41)
+; EG-NEXT: BCNT_INT T0.X, PV.W,
+; EG-NEXT: LSHR * T14.X, KC0[2].Y, literal.x,
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
-; EG-NEXT: MOV T17.X, PV.W,
-; EG-NEXT: MOV * T0.X, T4.X,
-; EG-NEXT: MOV * T0.Z, T8.X,
-; EG-NEXT: MOV T20.X, T12.X,
-; EG-NEXT: MOV * T20.Z, T16.X, BS:VEC_120/SCL_212
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%in.gep = getelementptr <16 x i16>, ptr addrspace(1) %in, i32 %tid
%val = load <16 x i16>, ptr addrspace(1) %in.gep, align 32
diff --git a/llvm/test/CodeGen/AMDGPU/kernel-args.ll b/llvm/test/CodeGen/AMDGPU/kernel-args.ll
index 8704f4e780448ba..121891adef1827d 100644
--- a/llvm/test/CodeGen/AMDGPU/kernel-args.ll
+++ b/llvm/test/CodeGen/AMDGPU/kernel-args.ll
@@ -1025,74 +1025,67 @@ define amdgpu_kernel void @v3i16_arg(ptr addrspace(1) nocapture %out, <3 x i16>
;
; EG-LABEL: v3i16_arg:
; EG: ; %bb.0: ; %entry
-; EG-NEXT: ALU 0, @12, KC0[], KC1[]
-; EG-NEXT: TEX 2 @6
-; EG-NEXT: ALU 19, @13, KC0[CB0:0-32], KC1[]
-; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T6.X, T7.X, 0
-; EG-NEXT: MEM_RAT MSKOR T5.XW, T8.X
+; EG-NEXT: ALU 0, @10, KC0[], KC1[]
+; EG-NEXT: TEX 1 @6
+; EG-NEXT: ALU 14, @11, KC0[CB0:0-32], KC1[]
+; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.X, T3.X, 0
+; EG-NEXT: MEM_RAT MSKOR T2.XW, T0.X
; EG-NEXT: CF_END
; EG-NEXT: Fetch clause starting at 6:
-; EG-NEXT: VTX_READ_16 T6.X, T5.X, 44, #3
-; EG-NEXT: VTX_READ_16 T7.X, T5.X, 46, #3
-; EG-NEXT: VTX_READ_16 T5.X, T5.X, 48, #3
-; EG-NEXT: ALU clause starting at 12:
-; EG-NEXT: MOV * T5.X, 0.0,
-; EG-NEXT: ALU clause starting at 13:
+; EG-NEXT: VTX_READ_16 T1.X, T0.X, 44, #3
+; EG-NEXT: VTX_READ_16 T0.X, T0.X, 48, #3
+; EG-NEXT: ALU clause starting at 10:
+; EG-NEXT: MOV * T0.X, 0.0,
+; EG-NEXT: ALU clause starting at 11:
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x,
; EG-NEXT: 4(5.605194e-45), 0(0.000000e+00)
; EG-NEXT: AND_INT T1.W, PV.W, literal.x,
-; EG-NEXT: AND_INT * T2.W, T5.X, literal.y,
+; EG-NEXT: AND_INT * T2.W, T0.X, literal.y,
; EG-NEXT: 3(4.203895e-45), 65535(9.183409e-41)
; EG-NEXT: LSHL * T1.W, PV.W, literal.x,
; EG-NEXT: 3(4.203895e-45), 0(0.000000e+00)
-; EG-NEXT: LSHL T5.X, T2.W, PV.W,
-; EG-NEXT: LSHL * T5.W, literal.x, PV.W,
-; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
-; EG-NEXT: MOV T5.Y, 0.0,
-; EG-NEXT: MOV * T5.Z, 0.0,
-; EG-NEXT: LSHR T8.X, T0.W, literal.x,
-; EG-NEXT: LSHL T0.W, T7.X, literal.y,
-; EG-NEXT: AND_INT * T1.W, T6.X, literal.z,
-; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44)
+; EG-NEXT: LSHL T2.X, T2.W, PV.W,
+; EG-NEXT: LSHL * T2.W, literal.x, PV.W,
; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
-; EG-NEXT: OR_INT T6.X, PV.W, PS,
-; EG-NEXT: LSHR * T7.X, KC0[2].Y, literal.x,
+; EG-NEXT: MOV T2.Y, 0.0,
+; EG-NEXT: MOV * T2.Z, 0.0,
+; EG-NEXT: LSHR T0.X, T0.W, literal.x,
+; EG-NEXT: LSHR * T3.X, KC0[2].Y, literal.x,
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
;
; CM-LABEL: v3i16_arg:
; CM: ; %bb.0: ; %entry
; CM-NEXT: ALU 0, @12, KC0[], KC1[]
-; CM-NEXT: TEX 2 @6
-; CM-NEXT: ALU 19, @13, KC0[CB0:0-32], KC1[]
-; CM-NEXT: MEM_RAT MSKOR T5.XW, T8.X
-; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T6.X, T7.X
+; CM-NEXT: TEX 0 @8
+; CM-NEXT: ALU 13, @13, KC0[CB0:0-32], KC1[]
+; CM-NEXT: MEM_RAT MSKOR T1.XW, T2.X
+; CM-NEXT: ALU 1, @27, KC0[CB0:0-32], KC1[]
+; CM-NEXT: TEX 0 @10
+; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T0.X, T1.X
; CM-NEXT: CF_END
-; CM-NEXT: Fetch clause starting at 6:
-; CM-NEXT: VTX_READ_16 T6.X, T5.X, 44, #3
-; CM-NEXT: VTX_READ_16 T7.X, T5.X, 46, #3
-; CM-NEXT: VTX_READ_16 T5.X, T5.X, 48, #3
+; CM-NEXT: Fetch clause starting at 8:
+; CM-NEXT: VTX_READ_16 T1.X, T0.X, 48, #3
+; CM-NEXT: Fetch clause starting at 10:
+; CM-NEXT: VTX_READ_16 T0.X, T0.X, 44, #3
; CM-NEXT: ALU clause starting at 12:
-; CM-NEXT: MOV * T5.X, 0.0,
+; CM-NEXT: MOV * T0.X, 0.0,
; CM-NEXT: ALU clause starting at 13:
; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x,
; CM-NEXT: 4(5.605194e-45), 0(0.000000e+00)
; CM-NEXT: AND_INT * T1.W, PV.W, literal.x,
; CM-NEXT: 3(4.203895e-45), 0(0.000000e+00)
-; CM-NEXT: AND_INT T0.Z, T5.X, literal.x,
+; CM-NEXT: AND_INT T0.Z, T1.X, literal.x,
; CM-NEXT: LSHL * T1.W, PV.W, literal.y,
; CM-NEXT: 65535(9.183409e-41), 3(4.203895e-45)
-; CM-NEXT: LSHL T5.X, PV.Z, PV.W,
-; CM-NEXT: LSHL * T5.W, literal.x, PV.W,
+; CM-NEXT: LSHL T1.X, PV.Z, PV.W,
+; CM-NEXT: LSHL * T1.W, literal.x, PV.W,
; CM-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
-; CM-NEXT: MOV T5.Y, 0.0,
-; CM-NEXT: MOV * T5.Z, 0.0,
-; CM-NEXT: LSHL T0.Z, T7.X, literal.x,
-; CM-NEXT: AND_INT * T1.W, T6.X, literal.y, BS:VEC_120/SCL_212
-; CM-NEXT: 16(2.242078e-44), 65535(9.183409e-41)
-; CM-NEXT: OR_INT * T6.X, PV.Z, PV.W,
-; CM-NEXT: LSHR * T7.X, KC0[2].Y, literal.x,
+; CM-NEXT: MOV T1.Y, 0.0,
+; CM-NEXT: MOV * T1.Z, 0.0,
+; CM-NEXT: LSHR * T2.X, T0.W, literal.x,
; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
-; CM-NEXT: LSHR * T8.X, T0.W, literal.x,
+; CM-NEXT: ALU clause starting at 27:
+; CM-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
entry:
store <3 x i16> %in, ptr addrspace(1) %out, align 4
@@ -2676,205 +2669,47 @@ define amdgpu_kernel void @v8i16_arg(ptr addrspace(1) %out, <8 x i16> %in) {
;
; EG-LABEL: v8i16_arg:
; EG: ; %bb.0: ; %entry
-; EG-NEXT: ALU 1, @36, KC0[], KC1[]
-; EG-NEXT: TEX 0 @20
-; EG-NEXT: ALU 5, @38, KC0[], KC1[]
-; EG-NEXT: TEX 0 @22
-; EG-NEXT: ALU 5, @44, KC0[], KC1[]
-; EG-NEXT: TEX 0 @24
-; EG-NEXT: ALU 5, @50, KC0[], KC1[]
-; EG-NEXT: TEX 0 @26
-; EG-NEXT: ALU 5, @56, KC0[], KC1[]
-; EG-NEXT: TEX 0 @28
-; EG-NEXT: ALU 5, @62, KC0[], KC1[]
-; EG-NEXT: TEX 0 @30
-; EG-NEXT: ALU 5, @68, KC0[], KC1[]
-; EG-NEXT: TEX 0 @32
-; EG-NEXT: ALU 5, @74, KC0[], KC1[]
-; EG-NEXT: TEX 0 @34
-; EG-NEXT: ALU 8, @80, KC0[CB0:0-32], KC1[]
-; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T7.XYZW, T8.X, 1
+; EG-NEXT: ALU 0, @14, KC0[], KC1[]
+; EG-NEXT: TEX 3 @6
+; EG-NEXT: ALU 4, @15, KC0[CB0:0-32], KC1[]
+; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.XYZW, T0.X, 1
; EG-NEXT: CF_END
; EG-NEXT: PAD
-; EG-NEXT: Fetch clause starting at 20:
-; EG-NEXT: VTX_READ_16 T8.X, T7.X, 66, #3
-; EG-NEXT: Fetch clause starting at 22:
-; EG-NEXT: VTX_READ_16 T8.X, T7.X, 58, #3
-; EG-NEXT: Fetch clause starting at 24:
-; EG-NEXT: VTX_READ_16 T8.X, T7.X, 64, #3
-; EG-NEXT: Fetch clause starting at 26:
-; EG-NEXT: VTX_READ_16 T8.X, T7.X, 56, #3
-; EG-NEXT: Fetch clause starting at 28:
-; EG-NEXT: VTX_READ_16 T8.X, T7.X, 62, #3
-; EG-NEXT: Fetch clause starting at 30:
-; EG-NEXT: VTX_READ_16 T8.X, T7.X, 54, #3
-; EG-NEXT: Fetch clause starting at 32:
-; EG-NEXT: VTX_READ_16 T8.X, T7.X, 60, #3
-; EG-NEXT: Fetch clause starting at 34:
-; EG-NEXT: VTX_READ_16 T7.X, T7.X, 52, #3
-; EG-NEXT: ALU clause starting at 36:
-; EG-NEXT: MOV * T0.Y, T3.X,
-; EG-NEXT: MOV * T7.X, 0.0,
-; EG-NEXT: ALU clause starting at 38:
-; EG-NEXT: LSHL T0.W, T8.X, literal.x,
-; EG-NEXT: AND_INT * T1.W, T0.Y, literal.y,
-; EG-NEXT: 16(2.242078e-44), 65535(9.183409e-41)
-; EG-NEXT: OR_INT * T0.W, PS, PV.W,
-; EG-NEXT: MOV T3.X, PV.W,
-; EG-NEXT: MOV * T0.Y, T5.X,
-; EG-NEXT: ALU clause starting at 44:
-; EG-NEXT: AND_INT T0.W, T0.Y, literal.x,
-; EG-NEXT: LSHL * T1.W, T8.X, literal.y,
-; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
-; EG-NEXT: OR_INT * T0.W, PV.W, PS,
-; EG-NEXT: MOV T5.X, PV.W,
-; EG-NEXT: MOV * T0.Y, T3.X,
-; EG-NEXT: ALU clause starting at 50:
-; EG-NEXT: AND_INT T0.W, T0.Y, literal.x,
-; EG-NEXT: AND_INT * T1.W, T8.X, literal.y,
-; EG-NEXT: -65536(nan), 65535(9.183409e-41)
-; EG-NEXT: OR_INT * T0.W, PV.W, PS,
-; EG-NEXT: MOV T3.X, PV.W,
-; EG-NEXT: MOV * T0.Y, T5.X,
-; EG-NEXT: ALU clause starting at 56:
-; EG-NEXT: AND_INT T0.W, T0.Y, literal.x,
-; EG-NEXT: AND_INT * T1.W, T8.X, literal.y,
-; EG-NEXT: -65536(nan), 65535(9.183409e-41)
-; EG-NEXT: OR_INT * T0.W, PV.W, PS,
-; EG-NEXT: MOV T5.X, PV.W,
-; EG-NEXT: MOV * T0.Y, T2.X,
-; EG-NEXT: ALU clause starting at 62:
-; EG-NEXT: AND_INT T0.W, T0.Y, literal.x,
-; EG-NEXT: LSHL * T1.W, T8.X, literal.y,
-; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
-; EG-NEXT: OR_INT * T0.W, PV.W, PS,
-; EG-NEXT: MOV T2.X, PV.W,
-; EG-NEXT: MOV * T0.Y, T4.X,
-; EG-NEXT: ALU clause starting at 68:
-; EG-NEXT: AND_INT T0.W, T0.Y, literal.x,
-; EG-NEXT: LSHL * T1.W, T8.X, literal.y,
-; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
-; EG-NEXT: OR_INT * T0.W, PV.W, PS,
-; EG-NEXT: MOV T4.X, PV.W,
-; EG-NEXT: MOV * T0.Y, T2.X,
-; EG-NEXT: ALU clause starting at 74:
-; EG-NEXT: AND_INT T0.W, T0.Y, literal.x,
-; EG-NEXT: AND_INT * T1.W, T8.X, literal.y,
-; EG-NEXT: -65536(nan), 65535(9.183409e-41)
-; EG-NEXT: OR_INT * T7.Z, PV.W, PS,
-; EG-NEXT: MOV T2.X, PV.Z,
-; EG-NEXT: MOV * T0.Y, T4.X,
-; EG-NEXT: ALU clause starting at 80:
-; EG-NEXT: LSHR T8.X, KC0[2].Y, literal.x,
-; EG-NEXT: AND_INT T0.W, T0.Y, literal.y,
-; EG-NEXT: AND_INT * T1.W, T7.X, literal.z,
-; EG-NEXT: 2(2.802597e-45), -65536(nan)
-; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
-; EG-NEXT: OR_INT * T7.X, PV.W, PS,
-; EG-NEXT: MOV T4.X, PV.X,
-; EG-NEXT: MOV * T7.W, T3.X,
-; EG-NEXT: MOV * T7.Y, T5.X,
+; EG-NEXT: Fetch clause starting at 6:
+; EG-NEXT: VTX_READ_16 T1.X, T0.X, 52, #3
+; EG-NEXT: VTX_READ_16 T2.X, T0.X, 54, #3
+; EG-NEXT: VTX_READ_16 T3.X, T0.X, 62, #3
+; EG-NEXT: VTX_READ_16 T0.X, T0.X, 60, #3
+; EG-NEXT: ALU clause starting at 14:
+; EG-NEXT: MOV * T0.X, 0.0,
+; EG-NEXT: ALU clause starting at 15:
+; EG-NEXT: MOV T1.Y, T2.X,
+; EG-NEXT: MOV * T1.Z, T0.X, BS:VEC_120/SCL_212
+; EG-NEXT: LSHR T0.X, KC0[2].Y, literal.x,
+; EG-NEXT: MOV * T1.W, T3.X,
+; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
;
; CM-LABEL: v8i16_arg:
; CM: ; %bb.0: ; %entry
-; CM-NEXT: ALU 1, @36, KC0[], KC1[]
-; CM-NEXT: TEX 0 @20
-; CM-NEXT: ALU 5, @38, KC0[], KC1[]
-; CM-NEXT: TEX 0 @22
-; CM-NEXT: ALU 5, @44, KC0[], KC1[]
-; CM-NEXT: TEX 0 @24
-; CM-NEXT: ALU 5, @50, KC0[], KC1[]
-; CM-NEXT: TEX 0 @26
-; CM-NEXT: ALU 5, @56, KC0[], KC1[]
-; CM-NEXT: TEX 0 @28
-; CM-NEXT: ALU 5, @62, KC0[], KC1[]
-; CM-NEXT: TEX 0 @30
-; CM-NEXT: ALU 5, @68, KC0[], KC1[]
-; CM-NEXT: TEX 0 @32
-; CM-NEXT: ALU 5, @74, KC0[], KC1[]
-; CM-NEXT: TEX 0 @34
-; CM-NEXT: ALU 8, @80, KC0[CB0:0-32], KC1[]
-; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T7, T8.X
+; CM-NEXT: ALU 0, @14, KC0[], KC1[]
+; CM-NEXT: TEX 3 @6
+; CM-NEXT: ALU 4, @15, KC0[CB0:0-32], KC1[]
+; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T1, T0.X
; CM-NEXT: CF_END
; CM-NEXT: PAD
-; CM-NEXT: Fetch clause starting at 20:
-; CM-NEXT: VTX_READ_16 T8.X, T7.X, 66, #3
-; CM-NEXT: Fetch clause starting at 22:
-; CM-NEXT: VTX_READ_16 T8.X, T7.X, 58, #3
-; CM-NEXT: Fetch clause starting at 24:
-; CM-NEXT: VTX_READ_16 T8.X, T7.X, 64, #3
-; CM-NEXT: Fetch clause starting at 26:
-; CM-NEXT: VTX_READ_16 T8.X, T7.X, 56, #3
-; CM-NEXT: Fetch clause starting at 28:
-; CM-NEXT: VTX_READ_16 T8.X, T7.X, 62, #3
-; CM-NEXT: Fetch clause starting at 30:
-; CM-NEXT: VTX_READ_16 T8.X, T7.X, 54, #3
-; CM-NEXT: Fetch clause starting at 32:
-; CM-NEXT: VTX_READ_16 T8.X, T7.X, 60, #3
-; CM-NEXT: Fetch clause starting at 34:
-; CM-NEXT: VTX_READ_16 T7.X, T7.X, 52, #3
-; CM-NEXT: ALU clause starting at 36:
-; CM-NEXT: MOV * T0.Y, T3.X,
-; CM-NEXT: MOV * T7.X, 0.0,
-; CM-NEXT: ALU clause starting at 38:
-; CM-NEXT: LSHL T0.Z, T8.X, literal.x,
-; CM-NEXT: AND_INT * T0.W, T0.Y, literal.y,
-; CM-NEXT: 16(2.242078e-44), 65535(9.183409e-41)
-; CM-NEXT: OR_INT * T0.W, PV.W, PV.Z,
-; CM-NEXT: MOV T3.X, PV.W,
-; CM-NEXT: MOV * T0.Y, T5.X,
-; CM-NEXT: ALU clause starting at 44:
-; CM-NEXT: AND_INT T0.Z, T0.Y, literal.x,
-; CM-NEXT: LSHL * T0.W, T8.X, literal.y,
-; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
-; CM-NEXT: OR_INT * T0.W, PV.Z, PV.W,
-; CM-NEXT: MOV T5.X, PV.W,
-; CM-NEXT: MOV * T0.Y, T3.X,
-; CM-NEXT: ALU clause starting at 50:
-; CM-NEXT: AND_INT T0.Z, T0.Y, literal.x,
-; CM-NEXT: AND_INT * T0.W, T8.X, literal.y,
-; CM-NEXT: -65536(nan), 65535(9.183409e-41)
-; CM-NEXT: OR_INT * T0.W, PV.Z, PV.W,
-; CM-NEXT: MOV T3.X, PV.W,
-; CM-NEXT: MOV * T0.Y, T5.X,
-; CM-NEXT: ALU clause starting at 56:
-; CM-NEXT: AND_INT T0.Z, T0.Y, literal.x,
-; CM-NEXT: AND_INT * T0.W, T8.X, literal.y,
-; CM-NEXT: -65536(nan), 65535(9.183409e-41)
-; CM-NEXT: OR_INT * T0.W, PV.Z, PV.W,
-; CM-NEXT: MOV T5.X, PV.W,
-; CM-NEXT: MOV * T0.Y, T2.X,
-; CM-NEXT: ALU clause starting at 62:
-; CM-NEXT: AND_INT T0.Z, T0.Y, literal.x,
-; CM-NEXT: LSHL * T0.W, T8.X, literal.y,
-; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
-; CM-NEXT: OR_INT * T0.W, PV.Z, PV.W,
-; CM-NEXT: MOV T2.X, PV.W,
-; CM-NEXT: MOV * T0.Y, T4.X,
-; CM-NEXT: ALU clause starting at 68:
-; CM-NEXT: AND_INT T0.Z, T0.Y, literal.x,
-; CM-NEXT: LSHL * T0.W, T8.X, literal.y,
-; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
-; CM-NEXT: OR_INT * T0.W, PV.Z, PV.W,
-; CM-NEXT: MOV T4.X, PV.W,
-; CM-NEXT: MOV * T0.Y, T2.X,
-; CM-NEXT: ALU clause starting at 74:
-; CM-NEXT: AND_INT T0.Z, T0.Y, literal.x,
-; CM-NEXT: AND_INT * T0.W, T8.X, literal.y,
-; CM-NEXT: -65536(nan), 65535(9.183409e-41)
-; CM-NEXT: OR_INT * T7.Z, PV.Z, PV.W,
-; CM-NEXT: MOV T2.X, PV.Z,
-; CM-NEXT: MOV * T0.Y, T4.X,
-; CM-NEXT: ALU clause starting at 80:
-; CM-NEXT: LSHR T8.X, KC0[2].Y, literal.x,
-; CM-NEXT: AND_INT T0.Z, T0.Y, literal.y,
-; CM-NEXT: AND_INT * T0.W, T7.X, literal.z,
-; CM-NEXT: 2(2.802597e-45), -65536(nan)
-; CM-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
-; CM-NEXT: OR_INT * T7.X, PV.Z, PV.W,
-; CM-NEXT: MOV T4.X, PV.X,
-; CM-NEXT: MOV * T7.W, T3.X,
-; CM-NEXT: MOV * T7.Y, T5.X,
+; CM-NEXT: Fetch clause starting at 6:
+; CM-NEXT: VTX_READ_16 T1.X, T0.X, 52, #3
+; CM-NEXT: VTX_READ_16 T2.X, T0.X, 54, #3
+; CM-NEXT: VTX_READ_16 T3.X, T0.X, 62, #3
+; CM-NEXT: VTX_READ_16 T0.X, T0.X, 60, #3
+; CM-NEXT: ALU clause starting at 14:
+; CM-NEXT: MOV * T0.X, 0.0,
+; CM-NEXT: ALU clause starting at 15:
+; CM-NEXT: MOV T1.Y, T2.X,
+; CM-NEXT: MOV * T1.Z, T0.X, BS:VEC_120/SCL_212
+; CM-NEXT: LSHR T0.X, KC0[2].Y, literal.x,
+; CM-NEXT: MOV * T1.W, T3.X,
+; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
entry:
store <8 x i16> %in, ptr addrspace(1) %out
ret void
@@ -3618,392 +3453,68 @@ define amdgpu_kernel void @v16i16_arg(ptr addrspace(1) %out, <16 x i16> %in) {
;
; EG-LABEL: v16i16_arg:
; EG: ; %bb.0: ; %entry
-; EG-NEXT: ALU 1, @68, KC0[], KC1[]
-; EG-NEXT: TEX 0 @36
-; EG-NEXT: ALU 5, @70, KC0[], KC1[]
-; EG-NEXT: TEX 0 @38
-; EG-NEXT: ALU 5, @76, KC0[], KC1[]
-; EG-NEXT: TEX 0 @40
-; EG-NEXT: ALU 5, @82, KC0[], KC1[]
-; EG-NEXT: TEX 0 @42
-; EG-NEXT: ALU 5, @88, KC0[], KC1[]
-; EG-NEXT: TEX 0 @44
-; EG-NEXT: ALU 5, @94, KC0[], KC1[]
-; EG-NEXT: TEX 0 @46
-; EG-NEXT: ALU 5, @100, KC0[], KC1[]
-; EG-NEXT: TEX 0 @48
-; EG-NEXT: ALU 5, @106, KC0[], KC1[]
-; EG-NEXT: TEX 0 @50
-; EG-NEXT: ALU 5, @112, KC0[], KC1[]
-; EG-NEXT: TEX 0 @52
-; EG-NEXT: ALU 5, @118, KC0[], KC1[]
-; EG-NEXT: TEX 0 @54
-; EG-NEXT: ALU 5, @124, KC0[], KC1[]
-; EG-NEXT: TEX 0 @56
-; EG-NEXT: ALU 5, @130, KC0[], KC1[]
-; EG-NEXT: TEX 0 @58
-; EG-NEXT: ALU 5, @136, KC0[], KC1[]
-; EG-NEXT: TEX 0 @60
-; EG-NEXT: ALU 5, @142, KC0[], KC1[]
-; EG-NEXT: TEX 0 @62
-; EG-NEXT: ALU 5, @148, KC0[], KC1[]
-; EG-NEXT: TEX 0 @64
-; EG-NEXT: ALU 5, @154, KC0[], KC1[]
-; EG-NEXT: TEX 0 @66
-; EG-NEXT: ALU 13, @160, KC0[CB0:0-32], KC1[]
-; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T12.XYZW, T14.X, 0
-; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T11.XYZW, T13.X, 1
+; EG-NEXT: ALU 0, @22, KC0[], KC1[]
+; EG-NEXT: TEX 7 @6
+; EG-NEXT: ALU 10, @23, KC0[CB0:0-32], KC1[]
+; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.XYZW, T2.X, 0
+; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T7.XYZW, T0.X, 1
; EG-NEXT: CF_END
-; EG-NEXT: Fetch clause starting at 36:
-; EG-NEXT: VTX_READ_16 T12.X, T11.X, 98, #3
-; EG-NEXT: Fetch clause starting at 38:
-; EG-NEXT: VTX_READ_16 T12.X, T11.X, 90, #3
-; EG-NEXT: Fetch clause starting at 40:
-; EG-NEXT: VTX_READ_16 T12.X, T11.X, 82, #3
-; EG-NEXT: Fetch clause starting at 42:
-; EG-NEXT: VTX_READ_16 T12.X, T11.X, 74, #3
-; EG-NEXT: Fetch clause starting at 44:
-; EG-NEXT: VTX_READ_16 T12.X, T11.X, 96, #3
-; EG-NEXT: Fetch clause starting at 46:
-; EG-NEXT: VTX_READ_16 T12.X, T11.X, 88, #3
-; EG-NEXT: Fetch clause starting at 48:
-; EG-NEXT: VTX_READ_16 T12.X, T11.X, 80, #3
-; EG-NEXT: Fetch clause starting at 50:
-; EG-NEXT: VTX_READ_16 T12.X, T11.X, 72, #3
-; EG-NEXT: Fetch clause starting at 52:
-; EG-NEXT: VTX_READ_16 T12.X, T11.X, 94, #3
-; EG-NEXT: Fetch clause starting at 54:
-; EG-NEXT: VTX_READ_16 T12.X, T11.X, 86, #3
-; EG-NEXT: Fetch clause starting at 56:
-; EG-NEXT: VTX_READ_16 T12.X, T11.X, 78, #3
-; EG-NEXT: Fetch clause starting at 58:
-; EG-NEXT: VTX_READ_16 T12.X, T11.X, 70, #3
-; EG-NEXT: Fetch clause starting at 60:
-; EG-NEXT: VTX_READ_16 T12.X, T11.X, 92, #3
-; EG-NEXT: Fetch clause starting at 62:
-; EG-NEXT: VTX_READ_16 T12.X, T11.X, 84, #3
-; EG-NEXT: Fetch clause starting at 64:
-; EG-NEXT: VTX_READ_16 T13.X, T11.X, 76, #3
-; EG-NEXT: Fetch clause starting at 66:
-; EG-NEXT: VTX_READ_16 T11.X, T11.X, 68, #3
-; EG-NEXT: ALU clause starting at 68:
-; EG-NEXT: MOV * T0.Y, T3.X,
-; EG-NEXT: MOV * T11.X, 0.0,
-; EG-NEXT: ALU clause starting at 70:
-; EG-NEXT: LSHL T0.W, T12.X, literal.x,
-; EG-NEXT: AND_INT * T1.W, T0.Y, literal.y,
-; EG-NEXT: 16(2.242078e-44), 65535(9.183409e-41)
-; EG-NEXT: OR_INT * T0.W, PS, PV.W,
-; EG-NEXT: MOV T3.X, PV.W,
-; EG-NEXT: MOV * T0.Y, T5.X,
-; EG-NEXT: ALU clause starting at 76:
-; EG-NEXT: AND_INT T0.W, T0.Y, literal.x,
-; EG-NEXT: LSHL * T1.W, T12.X, literal.y,
-; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
-; EG-NEXT: OR_INT * T0.W, PV.W, PS,
-; EG-NEXT: MOV T5.X, PV.W,
-; EG-NEXT: MOV * T0.Y, T7.X,
-; EG-NEXT: ALU clause starting at 82:
-; EG-NEXT: AND_INT T0.W, T0.Y, literal.x,
-; EG-NEXT: LSHL * T1.W, T12.X, literal.y,
-; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
-; EG-NEXT: OR_INT * T0.W, PV.W, PS,
-; EG-NEXT: MOV T7.X, PV.W,
-; EG-NEXT: MOV * T0.Y, T9.X,
-; EG-NEXT: ALU clause starting at 88:
-; EG-NEXT: AND_INT T0.W, T0.Y, literal.x,
-; EG-NEXT: LSHL * T1.W, T12.X, literal.y,
-; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
-; EG-NEXT: OR_INT * T0.W, PV.W, PS,
-; EG-NEXT: MOV T9.X, PV.W,
-; EG-NEXT: MOV * T0.Y, T3.X,
-; EG-NEXT: ALU clause starting at 94:
-; EG-NEXT: AND_INT T0.W, T0.Y, literal.x,
-; EG-NEXT: AND_INT * T1.W, T12.X, literal.y,
-; EG-NEXT: -65536(nan), 65535(9.183409e-41)
-; EG-NEXT: OR_INT * T0.W, PV.W, PS,
-; EG-NEXT: MOV T3.X, PV.W,
-; EG-NEXT: MOV * T0.Y, T5.X,
-; EG-NEXT: ALU clause starting at 100:
-; EG-NEXT: AND_INT T0.W, T0.Y, literal.x,
-; EG-NEXT: AND_INT * T1.W, T12.X, literal.y,
-; EG-NEXT: -65536(nan), 65535(9.183409e-41)
-; EG-NEXT: OR_INT * T0.W, PV.W, PS,
-; EG-NEXT: MOV T5.X, PV.W,
-; EG-NEXT: MOV * T0.Y, T7.X,
-; EG-NEXT: ALU clause starting at 106:
-; EG-NEXT: AND_INT T0.W, T0.Y, literal.x,
-; EG-NEXT: AND_INT * T1.W, T12.X, literal.y,
-; EG-NEXT: -65536(nan), 65535(9.183409e-41)
-; EG-NEXT: OR_INT * T0.W, PV.W, PS,
-; EG-NEXT: MOV T7.X, PV.W,
-; EG-NEXT: MOV * T0.Y, T9.X,
-; EG-NEXT: ALU clause starting at 112:
-; EG-NEXT: AND_INT T0.W, T0.Y, literal.x,
-; EG-NEXT: AND_INT * T1.W, T12.X, literal.y,
-; EG-NEXT: -65536(nan), 65535(9.183409e-41)
-; EG-NEXT: OR_INT * T0.W, PV.W, PS,
-; EG-NEXT: MOV T9.X, PV.W,
-; EG-NEXT: MOV * T0.Y, T2.X,
-; EG-NEXT: ALU clause starting at 118:
-; EG-NEXT: AND_INT T0.W, T0.Y, literal.x,
-; EG-NEXT: LSHL * T1.W, T12.X, literal.y,
-; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
-; EG-NEXT: OR_INT * T0.W, PV.W, PS,
-; EG-NEXT: MOV T2.X, PV.W,
-; EG-NEXT: MOV * T0.Y, T4.X,
-; EG-NEXT: ALU clause starting at 124:
-; EG-NEXT: AND_INT T0.W, T0.Y, literal.x,
-; EG-NEXT: LSHL * T1.W, T12.X, literal.y,
-; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
-; EG-NEXT: OR_INT * T0.W, PV.W, PS,
-; EG-NEXT: MOV T4.X, PV.W,
-; EG-NEXT: MOV * T0.Y, T6.X,
-; EG-NEXT: ALU clause starting at 130:
-; EG-NEXT: AND_INT T0.W, T0.Y, literal.x,
-; EG-NEXT: LSHL * T1.W, T12.X, literal.y,
-; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
-; EG-NEXT: OR_INT * T0.W, PV.W, PS,
-; EG-NEXT: MOV T6.X, PV.W,
-; EG-NEXT: MOV * T0.Y, T8.X,
-; EG-NEXT: ALU clause starting at 136:
-; EG-NEXT: AND_INT T0.W, T0.Y, literal.x,
-; EG-NEXT: LSHL * T1.W, T12.X, literal.y,
-; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
-; EG-NEXT: OR_INT * T0.W, PV.W, PS,
-; EG-NEXT: MOV T8.X, PV.W,
-; EG-NEXT: MOV * T0.Y, T2.X,
-; EG-NEXT: ALU clause starting at 142:
-; EG-NEXT: AND_INT T0.W, T0.Y, literal.x,
-; EG-NEXT: AND_INT * T1.W, T12.X, literal.y,
-; EG-NEXT: -65536(nan), 65535(9.183409e-41)
-; EG-NEXT: OR_INT * T12.Z, PV.W, PS,
-; EG-NEXT: MOV T2.X, PV.Z,
-; EG-NEXT: MOV * T0.Y, T4.X,
-; EG-NEXT: ALU clause starting at 148:
-; EG-NEXT: AND_INT T0.W, T0.Y, literal.x,
-; EG-NEXT: AND_INT * T1.W, T12.X, literal.y,
-; EG-NEXT: -65536(nan), 65535(9.183409e-41)
-; EG-NEXT: OR_INT * T12.X, PV.W, PS,
-; EG-NEXT: MOV T4.X, PV.X,
-; EG-NEXT: MOV * T0.Y, T6.X,
-; EG-NEXT: ALU clause starting at 154:
-; EG-NEXT: AND_INT T0.W, T0.Y, literal.x,
-; EG-NEXT: AND_INT * T1.W, T13.X, literal.y,
-; EG-NEXT: -65536(nan), 65535(9.183409e-41)
-; EG-NEXT: OR_INT * T11.Z, PV.W, PS,
-; EG-NEXT: MOV T6.X, PV.Z,
-; EG-NEXT: MOV * T0.Y, T8.X,
-; EG-NEXT: ALU clause starting at 160:
-; EG-NEXT: LSHR T13.X, KC0[2].Y, literal.x,
+; EG-NEXT: Fetch clause starting at 6:
+; EG-NEXT: VTX_READ_16 T1.X, T0.X, 84, #3
+; EG-NEXT: VTX_READ_16 T2.X, T0.X, 86, #3
+; EG-NEXT: VTX_READ_16 T3.X, T0.X, 94, #3
+; EG-NEXT: VTX_READ_16 T4.X, T0.X, 78, #3
+; EG-NEXT: VTX_READ_16 T5.X, T0.X, 76, #3
+; EG-NEXT: VTX_READ_16 T6.X, T0.X, 92, #3
+; EG-NEXT: VTX_READ_16 T7.X, T0.X, 68, #3
+; EG-NEXT: VTX_READ_16 T0.X, T0.X, 70, #3
+; EG-NEXT: ALU clause starting at 22:
+; EG-NEXT: MOV * T0.X, 0.0,
+; EG-NEXT: ALU clause starting at 23:
+; EG-NEXT: MOV T1.Y, T2.X,
+; EG-NEXT: MOV * T7.Y, T0.X,
+; EG-NEXT: MOV * T1.Z, T6.X,
+; EG-NEXT: LSHR T0.X, KC0[2].Y, literal.x,
+; EG-NEXT: MOV T7.Z, T5.X,
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44)
-; EG-NEXT: LSHR T14.X, PV.W, literal.x,
-; EG-NEXT: AND_INT T0.W, T0.Y, literal.y,
-; EG-NEXT: AND_INT * T1.W, T11.X, literal.z,
-; EG-NEXT: 2(2.802597e-45), -65536(nan)
-; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
-; EG-NEXT: OR_INT * T11.X, PV.W, PS,
-; EG-NEXT: MOV T8.X, PV.X,
-; EG-NEXT: MOV * T12.W, T3.X,
-; EG-NEXT: MOV T12.Y, T5.X,
-; EG-NEXT: MOV T11.W, T7.X, BS:VEC_120/SCL_212
-; EG-NEXT: MOV * T11.Y, T9.X,
+; EG-NEXT: LSHR T2.X, PV.W, literal.x,
+; EG-NEXT: MOV T7.W, T4.X,
+; EG-NEXT: MOV * T1.W, T3.X,
+; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
;
; CM-LABEL: v16i16_arg:
; CM: ; %bb.0: ; %entry
-; CM-NEXT: ALU 1, @68, KC0[], KC1[]
-; CM-NEXT: TEX 0 @36
-; CM-NEXT: ALU 5, @70, KC0[], KC1[]
-; CM-NEXT: TEX 0 @38
-; CM-NEXT: ALU 5, @76, KC0[], KC1[]
-; CM-NEXT: TEX 0 @40
-; CM-NEXT: ALU 5, @82, KC0[], KC1[]
-; CM-NEXT: TEX 0 @42
-; CM-NEXT: ALU 5, @88, KC0[], KC1[]
-; CM-NEXT: TEX 0 @44
-; CM-NEXT: ALU 5, @94, KC0[], KC1[]
-; CM-NEXT: TEX 0 @46
-; CM-NEXT: ALU 5, @100, KC0[], KC1[]
-; CM-NEXT: TEX 0 @48
-; CM-NEXT: ALU 5, @106, KC0[], KC1[]
-; CM-NEXT: TEX 0 @50
-; CM-NEXT: ALU 5, @112, KC0[], KC1[]
-; CM-NEXT: TEX 0 @52
-; CM-NEXT: ALU 5, @118, KC0[], KC1[]
-; CM-NEXT: TEX 0 @54
-; CM-NEXT: ALU 5, @124, KC0[], KC1[]
-; CM-NEXT: TEX 0 @56
-; CM-NEXT: ALU 5, @130, KC0[], KC1[]
-; CM-NEXT: TEX 0 @58
-; CM-NEXT: ALU 5, @136, KC0[], KC1[]
-; CM-NEXT: TEX 0 @60
-; CM-NEXT: ALU 5, @142, KC0[], KC1[]
-; CM-NEXT: TEX 0 @62
-; CM-NEXT: ALU 5, @148, KC0[], KC1[]
-; CM-NEXT: TEX 0 @64
-; CM-NEXT: ALU 5, @154, KC0[], KC1[]
-; CM-NEXT: TEX 0 @66
-; CM-NEXT: ALU 14, @160, KC0[CB0:0-32], KC1[]
-; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T11, T14.X
-; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T12, T13.X
+; CM-NEXT: ALU 0, @22, KC0[], KC1[]
+; CM-NEXT: TEX 7 @6
+; CM-NEXT: ALU 11, @23, KC0[CB0:0-32], KC1[]
+; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T7, T2.X
+; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T1, T0.X
; CM-NEXT: CF_END
-; CM-NEXT: Fetch clause starting at 36:
-; CM-NEXT: VTX_READ_16 T12.X, T11.X, 98, #3
-; CM-NEXT: Fetch clause starting at 38:
-; CM-NEXT: VTX_READ_16 T12.X, T11.X, 90, #3
-; CM-NEXT: Fetch clause starting at 40:
-; CM-NEXT: VTX_READ_16 T12.X, T11.X, 82, #3
-; CM-NEXT: Fetch clause starting at 42:
-; CM-NEXT: VTX_READ_16 T12.X, T11.X, 74, #3
-; CM-NEXT: Fetch clause starting at 44:
-; CM-NEXT: VTX_READ_16 T12.X, T11.X, 96, #3
-; CM-NEXT: Fetch clause starting at 46:
-; CM-NEXT: VTX_READ_16 T12.X, T11.X, 88, #3
-; CM-NEXT: Fetch clause starting at 48:
-; CM-NEXT: VTX_READ_16 T12.X, T11.X, 80, #3
-; CM-NEXT: Fetch clause starting at 50:
-; CM-NEXT: VTX_READ_16 T12.X, T11.X, 72, #3
-; CM-NEXT: Fetch clause starting at 52:
-; CM-NEXT: VTX_READ_16 T12.X, T11.X, 94, #3
-; CM-NEXT: Fetch clause starting at 54:
-; CM-NEXT: VTX_READ_16 T12.X, T11.X, 86, #3
-; CM-NEXT: Fetch clause starting at 56:
-; CM-NEXT: VTX_READ_16 T12.X, T11.X, 78, #3
-; CM-NEXT: Fetch clause starting at 58:
-; CM-NEXT: VTX_READ_16 T12.X, T11.X, 70, #3
-; CM-NEXT: Fetch clause starting at 60:
-; CM-NEXT: VTX_READ_16 T12.X, T11.X, 92, #3
-; CM-NEXT: Fetch clause starting at 62:
-; CM-NEXT: VTX_READ_16 T12.X, T11.X, 84, #3
-; CM-NEXT: Fetch clause starting at 64:
-; CM-NEXT: VTX_READ_16 T13.X, T11.X, 76, #3
-; CM-NEXT: Fetch clause starting at 66:
-; CM-NEXT: VTX_READ_16 T11.X, T11.X, 68, #3
-; CM-NEXT: ALU clause starting at 68:
-; CM-NEXT: MOV * T0.Y, T3.X,
-; CM-NEXT: MOV * T11.X, 0.0,
-; CM-NEXT: ALU clause starting at 70:
-; CM-NEXT: LSHL T0.Z, T12.X, literal.x,
-; CM-NEXT: AND_INT * T0.W, T0.Y, literal.y,
-; CM-NEXT: 16(2.242078e-44), 65535(9.183409e-41)
-; CM-NEXT: OR_INT * T0.W, PV.W, PV.Z,
-; CM-NEXT: MOV T3.X, PV.W,
-; CM-NEXT: MOV * T0.Y, T5.X,
-; CM-NEXT: ALU clause starting at 76:
-; CM-NEXT: AND_INT T0.Z, T0.Y, literal.x,
-; CM-NEXT: LSHL * T0.W, T12.X, literal.y,
-; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
-; CM-NEXT: OR_INT * T0.W, PV.Z, PV.W,
-; CM-NEXT: MOV T5.X, PV.W,
-; CM-NEXT: MOV * T0.Y, T7.X,
-; CM-NEXT: ALU clause starting at 82:
-; CM-NEXT: AND_INT T0.Z, T0.Y, literal.x,
-; CM-NEXT: LSHL * T0.W, T12.X, literal.y,
-; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
-; CM-NEXT: OR_INT * T0.W, PV.Z, PV.W,
-; CM-NEXT: MOV T7.X, PV.W,
-; CM-NEXT: MOV * T0.Y, T9.X,
-; CM-NEXT: ALU clause starting at 88:
-; CM-NEXT: AND_INT T0.Z, T0.Y, literal.x,
-; CM-NEXT: LSHL * T0.W, T12.X, literal.y,
-; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
-; CM-NEXT: OR_INT * T0.W, PV.Z, PV.W,
-; CM-NEXT: MOV T9.X, PV.W,
-; CM-NEXT: MOV * T0.Y, T3.X,
-; CM-NEXT: ALU clause starting at 94:
-; CM-NEXT: AND_INT T0.Z, T0.Y, literal.x,
-; CM-NEXT: AND_INT * T0.W, T12.X, literal.y,
-; CM-NEXT: -65536(nan), 65535(9.183409e-41)
-; CM-NEXT: OR_INT * T0.W, PV.Z, PV.W,
-; CM-NEXT: MOV T3.X, PV.W,
-; CM-NEXT: MOV * T0.Y, T5.X,
-; CM-NEXT: ALU clause starting at 100:
-; CM-NEXT: AND_INT T0.Z, T0.Y, literal.x,
-; CM-NEXT: AND_INT * T0.W, T12.X, literal.y,
-; CM-NEXT: -65536(nan), 65535(9.183409e-41)
-; CM-NEXT: OR_INT * T0.W, PV.Z, PV.W,
-; CM-NEXT: MOV T5.X, PV.W,
-; CM-NEXT: MOV * T0.Y, T7.X,
-; CM-NEXT: ALU clause starting at 106:
-; CM-NEXT: AND_INT T0.Z, T0.Y, literal.x,
-; CM-NEXT: AND_INT * T0.W, T12.X, literal.y,
-; CM-NEXT: -65536(nan), 65535(9.183409e-41)
-; CM-NEXT: OR_INT * T0.W, PV.Z, PV.W,
-; CM-NEXT: MOV T7.X, PV.W,
-; CM-NEXT: MOV * T0.Y, T9.X,
-; CM-NEXT: ALU clause starting at 112:
-; CM-NEXT: AND_INT T0.Z, T0.Y, literal.x,
-; CM-NEXT: AND_INT * T0.W, T12.X, literal.y,
-; CM-NEXT: -65536(nan), 65535(9.183409e-41)
-; CM-NEXT: OR_INT * T0.W, PV.Z, PV.W,
-; CM-NEXT: MOV T9.X, PV.W,
-; CM-NEXT: MOV * T0.Y, T2.X,
-; CM-NEXT: ALU clause starting at 118:
-; CM-NEXT: AND_INT T0.Z, T0.Y, literal.x,
-; CM-NEXT: LSHL * T0.W, T12.X, literal.y,
-; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
-; CM-NEXT: OR_INT * T0.W, PV.Z, PV.W,
-; CM-NEXT: MOV T2.X, PV.W,
-; CM-NEXT: MOV * T0.Y, T4.X,
-; CM-NEXT: ALU clause starting at 124:
-; CM-NEXT: AND_INT T0.Z, T0.Y, literal.x,
-; CM-NEXT: LSHL * T0.W, T12.X, literal.y,
-; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
-; CM-NEXT: OR_INT * T0.W, PV.Z, PV.W,
-; CM-NEXT: MOV T4.X, PV.W,
-; CM-NEXT: MOV * T0.Y, T6.X,
-; CM-NEXT: ALU clause starting at 130:
-; CM-NEXT: AND_INT T0.Z, T0.Y, literal.x,
-; CM-NEXT: LSHL * T0.W, T12.X, literal.y,
-; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
-; CM-NEXT: OR_INT * T0.W, PV.Z, PV.W,
-; CM-NEXT: MOV T6.X, PV.W,
-; CM-NEXT: MOV * T0.Y, T8.X,
-; CM-NEXT: ALU clause starting at 136:
-; CM-NEXT: AND_INT T0.Z, T0.Y, literal.x,
-; CM-NEXT: LSHL * T0.W, T12.X, literal.y,
-; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
-; CM-NEXT: OR_INT * T0.W, PV.Z, PV.W,
-; CM-NEXT: MOV T8.X, PV.W,
-; CM-NEXT: MOV * T0.Y, T2.X,
-; CM-NEXT: ALU clause starting at 142:
-; CM-NEXT: AND_INT T0.Z, T0.Y, literal.x,
-; CM-NEXT: AND_INT * T0.W, T12.X, literal.y,
-; CM-NEXT: -65536(nan), 65535(9.183409e-41)
-; CM-NEXT: OR_INT * T12.Z, PV.Z, PV.W,
-; CM-NEXT: MOV T2.X, PV.Z,
-; CM-NEXT: MOV * T0.Y, T4.X,
-; CM-NEXT: ALU clause starting at 148:
-; CM-NEXT: AND_INT T0.Z, T0.Y, literal.x,
-; CM-NEXT: AND_INT * T0.W, T12.X, literal.y,
-; CM-NEXT: -65536(nan), 65535(9.183409e-41)
-; CM-NEXT: OR_INT * T12.X, PV.Z, PV.W,
-; CM-NEXT: MOV T4.X, PV.X,
-; CM-NEXT: MOV * T0.Y, T6.X,
-; CM-NEXT: ALU clause starting at 154:
-; CM-NEXT: AND_INT T0.Z, T0.Y, literal.x,
-; CM-NEXT: AND_INT * T0.W, T13.X, literal.y,
-; CM-NEXT: -65536(nan), 65535(9.183409e-41)
-; CM-NEXT: OR_INT * T11.Z, PV.Z, PV.W,
-; CM-NEXT: MOV T6.X, PV.Z,
-; CM-NEXT: MOV * T0.Y, T8.X,
-; CM-NEXT: ALU clause starting at 160:
+; CM-NEXT: Fetch clause starting at 6:
+; CM-NEXT: VTX_READ_16 T1.X, T0.X, 84, #3
+; CM-NEXT: VTX_READ_16 T2.X, T0.X, 86, #3
+; CM-NEXT: VTX_READ_16 T3.X, T0.X, 78, #3
+; CM-NEXT: VTX_READ_16 T4.X, T0.X, 94, #3
+; CM-NEXT: VTX_READ_16 T5.X, T0.X, 76, #3
+; CM-NEXT: VTX_READ_16 T6.X, T0.X, 92, #3
+; CM-NEXT: VTX_READ_16 T7.X, T0.X, 68, #3
+; CM-NEXT: VTX_READ_16 T0.X, T0.X, 70, #3
+; CM-NEXT: ALU clause starting at 22:
+; CM-NEXT: MOV * T0.X, 0.0,
+; CM-NEXT: ALU clause starting at 23:
+; CM-NEXT: MOV * T1.Y, T2.X,
+; CM-NEXT: MOV T7.Y, T0.X,
+; CM-NEXT: MOV T1.Z, T6.X, BS:VEC_120/SCL_212
; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x,
; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00)
-; CM-NEXT: LSHR * T13.X, PV.W, literal.x,
+; CM-NEXT: LSHR T0.X, PV.W, literal.x,
+; CM-NEXT: MOV T7.Z, T5.X,
+; CM-NEXT: MOV * T1.W, T4.X, BS:VEC_120/SCL_212
+; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
+; CM-NEXT: LSHR T2.X, KC0[2].Y, literal.x,
+; CM-NEXT: MOV * T7.W, T3.X,
; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
-; CM-NEXT: LSHR T14.X, KC0[2].Y, literal.x,
-; CM-NEXT: AND_INT T0.Z, T0.Y, literal.y,
-; CM-NEXT: AND_INT * T0.W, T11.X, literal.z,
-; CM-NEXT: 2(2.802597e-45), -65536(nan)
-; CM-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
-; CM-NEXT: OR_INT * T11.X, PV.Z, PV.W,
-; CM-NEXT: MOV T8.X, PV.X,
-; CM-NEXT: MOV * T12.W, T3.X,
-; CM-NEXT: MOV T12.Y, T5.X,
-; CM-NEXT: MOV * T11.W, T7.X, BS:VEC_120/SCL_212
-; CM-NEXT: MOV * T11.Y, T9.X,
entry:
store <16 x i16> %in, ptr addrspace(1) %out
ret void
diff --git a/llvm/test/CodeGen/AMDGPU/load-constant-i16.ll b/llvm/test/CodeGen/AMDGPU/load-constant-i16.ll
index 2afac4e90aa4072..458afa4d6aad220 100644
--- a/llvm/test/CodeGen/AMDGPU/load-constant-i16.ll
+++ b/llvm/test/CodeGen/AMDGPU/load-constant-i16.ll
@@ -212,38 +212,32 @@ define amdgpu_kernel void @constant_load_v3i16(ptr addrspace(1) %out, ptr addrsp
;
; EG-LABEL: constant_load_v3i16:
; EG: ; %bb.0: ; %entry
-; EG-NEXT: ALU 0, @12, KC0[CB0:0-32], KC1[]
-; EG-NEXT: TEX 2 @6
-; EG-NEXT: ALU 19, @13, KC0[CB0:0-32], KC1[]
-; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T6.X, T7.X, 0
-; EG-NEXT: MEM_RAT MSKOR T5.XW, T8.X
+; EG-NEXT: ALU 0, @10, KC0[CB0:0-32], KC1[]
+; EG-NEXT: TEX 1 @6
+; EG-NEXT: ALU 14, @11, KC0[CB0:0-32], KC1[]
+; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.X, T3.X, 0
+; EG-NEXT: MEM_RAT MSKOR T2.XW, T0.X
; EG-NEXT: CF_END
; EG-NEXT: Fetch clause starting at 6:
-; EG-NEXT: VTX_READ_16 T6.X, T5.X, 0, #1
-; EG-NEXT: VTX_READ_16 T7.X, T5.X, 2, #1
-; EG-NEXT: VTX_READ_16 T5.X, T5.X, 4, #1
-; EG-NEXT: ALU clause starting at 12:
-; EG-NEXT: MOV * T5.X, KC0[2].Z,
-; EG-NEXT: ALU clause starting at 13:
+; EG-NEXT: VTX_READ_16 T1.X, T0.X, 0, #1
+; EG-NEXT: VTX_READ_16 T0.X, T0.X, 4, #1
+; EG-NEXT: ALU clause starting at 10:
+; EG-NEXT: MOV * T0.X, KC0[2].Z,
+; EG-NEXT: ALU clause starting at 11:
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x,
; EG-NEXT: 4(5.605194e-45), 0(0.000000e+00)
; EG-NEXT: AND_INT T1.W, PV.W, literal.x,
-; EG-NEXT: AND_INT * T2.W, T5.X, literal.y,
+; EG-NEXT: AND_INT * T2.W, T0.X, literal.y,
; EG-NEXT: 3(4.203895e-45), 65535(9.183409e-41)
; EG-NEXT: LSHL * T1.W, PV.W, literal.x,
; EG-NEXT: 3(4.203895e-45), 0(0.000000e+00)
-; EG-NEXT: LSHL T5.X, T2.W, PV.W,
-; EG-NEXT: LSHL * T5.W, literal.x, PV.W,
-; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
-; EG-NEXT: MOV T5.Y, 0.0,
-; EG-NEXT: MOV * T5.Z, 0.0,
-; EG-NEXT: LSHR T8.X, T0.W, literal.x,
-; EG-NEXT: LSHL T0.W, T7.X, literal.y,
-; EG-NEXT: AND_INT * T1.W, T6.X, literal.z,
-; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44)
+; EG-NEXT: LSHL T2.X, T2.W, PV.W,
+; EG-NEXT: LSHL * T2.W, literal.x, PV.W,
; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
-; EG-NEXT: OR_INT T6.X, PV.W, PS,
-; EG-NEXT: LSHR * T7.X, KC0[2].Y, literal.x,
+; EG-NEXT: MOV T2.Y, 0.0,
+; EG-NEXT: MOV * T2.Z, 0.0,
+; EG-NEXT: LSHR T0.X, T0.W, literal.x,
+; EG-NEXT: LSHR * T3.X, KC0[2].Y, literal.x,
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
;
; GFX12-LABEL: constant_load_v3i16:
diff --git a/llvm/test/CodeGen/AMDGPU/load-constant-i8.ll b/llvm/test/CodeGen/AMDGPU/load-constant-i8.ll
index b945c7c3def6adf..c608bef3f726ed6 100644
--- a/llvm/test/CodeGen/AMDGPU/load-constant-i8.ll
+++ b/llvm/test/CodeGen/AMDGPU/load-constant-i8.ll
@@ -9491,50 +9491,24 @@ define amdgpu_kernel void @constant_zextload_v4i8_to_v4i16(ptr addrspace(1) %out
;
; EG-LABEL: constant_zextload_v4i8_to_v4i16:
; EG: ; %bb.0:
-; EG-NEXT: ALU 1, @8, KC0[CB0:0-32], KC1[]
+; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[]
; EG-NEXT: TEX 0 @6
-; EG-NEXT: ALU 31, @10, KC0[CB0:0-32], KC1[]
-; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T8.XY, T7.X, 1
+; EG-NEXT: ALU 6, @9, KC0[CB0:0-32], KC1[]
+; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T4.XY, T5.X, 1
; EG-NEXT: CF_END
; EG-NEXT: PAD
; EG-NEXT: Fetch clause starting at 6:
-; EG-NEXT: VTX_READ_32 T7.X, T7.X, 0, #1
+; EG-NEXT: VTX_READ_32 T4.X, T4.X, 0, #1
; EG-NEXT: ALU clause starting at 8:
-; EG-NEXT: MOV * T0.Y, T4.X,
-; EG-NEXT: MOV * T7.X, KC0[2].Z,
-; EG-NEXT: ALU clause starting at 10:
-; EG-NEXT: AND_INT T0.W, T7.X, literal.x,
-; EG-NEXT: AND_INT * T1.W, T0.Y, literal.y,
-; EG-NEXT: 255(3.573311e-43), -65536(nan)
-; EG-NEXT: OR_INT * T0.W, PS, PV.W,
-; EG-NEXT: MOV * T4.X, PV.W,
-; EG-NEXT: MOV T0.Y, PV.X,
-; EG-NEXT: LSHL * T0.W, T7.X, literal.x,
-; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
-; EG-NEXT: AND_INT T1.W, PV.Y, literal.x,
-; EG-NEXT: AND_INT * T0.W, PV.W, literal.y,
-; EG-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38)
-; EG-NEXT: OR_INT * T0.W, PV.W, PS,
-; EG-NEXT: MOV T4.X, PV.W,
-; EG-NEXT: MOV T0.Y, T5.X,
+; EG-NEXT: MOV * T4.X, KC0[2].Z,
+; EG-NEXT: ALU clause starting at 9:
; EG-NEXT: MOV * T0.W, literal.x,
; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
-; EG-NEXT: BFE_UINT T0.W, T7.X, literal.x, PV.W,
-; EG-NEXT: AND_INT * T1.W, PV.Y, literal.y,
-; EG-NEXT: 16(2.242078e-44), -65536(nan)
-; EG-NEXT: OR_INT * T0.W, PS, PV.W,
-; EG-NEXT: MOV * T5.X, PV.W,
-; EG-NEXT: MOV T0.Y, PV.X,
-; EG-NEXT: LSHR * T0.W, T7.X, literal.x,
+; EG-NEXT: BFE_UINT * T4.Y, T4.X, literal.x, PV.W,
; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
-; EG-NEXT: AND_INT T1.W, PV.Y, literal.x,
-; EG-NEXT: AND_INT * T0.W, PV.W, literal.y,
-; EG-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38)
-; EG-NEXT: LSHR T7.X, KC0[2].Y, literal.x,
-; EG-NEXT: OR_INT * T8.Y, PV.W, PS,
-; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
-; EG-NEXT: MOV T5.X, PV.Y,
-; EG-NEXT: MOV * T8.X, T4.X,
+; EG-NEXT: AND_INT T4.X, T4.X, literal.x,
+; EG-NEXT: LSHR * T5.X, KC0[2].Y, literal.y,
+; EG-NEXT: 255(3.573311e-43), 2(2.802597e-45)
;
; GFX12-LABEL: constant_zextload_v4i8_to_v4i16:
; GFX12: ; %bb.0:
@@ -9633,56 +9607,23 @@ define amdgpu_kernel void @constant_sextload_v4i8_to_v4i16(ptr addrspace(1) %out
;
; EG-LABEL: constant_sextload_v4i8_to_v4i16:
; EG: ; %bb.0:
-; EG-NEXT: ALU 1, @8, KC0[CB0:0-32], KC1[]
+; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[]
; EG-NEXT: TEX 0 @6
-; EG-NEXT: ALU 37, @10, KC0[CB0:0-32], KC1[]
-; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T8.XY, T7.X, 1
+; EG-NEXT: ALU 5, @9, KC0[CB0:0-32], KC1[]
+; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T5.XY, T4.X, 1
; EG-NEXT: CF_END
; EG-NEXT: PAD
; EG-NEXT: Fetch clause starting at 6:
-; EG-NEXT: VTX_READ_32 T7.X, T7.X, 0, #1
+; EG-NEXT: VTX_READ_32 T4.X, T4.X, 0, #1
; EG-NEXT: ALU clause starting at 8:
-; EG-NEXT: MOV * T0.Y, T4.X,
-; EG-NEXT: MOV * T7.X, KC0[2].Z,
-; EG-NEXT: ALU clause starting at 10:
-; EG-NEXT: BFE_INT * T0.W, T7.X, 0.0, literal.x,
-; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
-; EG-NEXT: AND_INT T0.W, PV.W, literal.x,
-; EG-NEXT: AND_INT * T1.W, T0.Y, literal.y,
-; EG-NEXT: 65535(9.183409e-41), -65536(nan)
-; EG-NEXT: OR_INT * T0.W, PS, PV.W,
-; EG-NEXT: MOV * T4.X, PV.W,
-; EG-NEXT: MOV T0.Y, PV.X,
-; EG-NEXT: LSHR * T0.W, T7.X, literal.x,
+; EG-NEXT: MOV * T4.X, KC0[2].Z,
+; EG-NEXT: ALU clause starting at 9:
+; EG-NEXT: BFE_INT T5.X, T4.X, 0.0, literal.x,
+; EG-NEXT: LSHR T0.W, T4.X, literal.x,
+; EG-NEXT: LSHR * T4.X, KC0[2].Y, literal.y,
+; EG-NEXT: 8(1.121039e-44), 2(2.802597e-45)
+; EG-NEXT: BFE_INT * T5.Y, PV.W, 0.0, literal.x,
; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
-; EG-NEXT: BFE_INT T0.W, PV.W, 0.0, literal.x,
-; EG-NEXT: AND_INT * T1.W, PV.Y, literal.y,
-; EG-NEXT: 8(1.121039e-44), 65535(9.183409e-41)
-; EG-NEXT: LSHL * T0.W, PV.W, literal.x,
-; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
-; EG-NEXT: OR_INT * T0.W, T1.W, PV.W,
-; EG-NEXT: MOV T4.X, PV.W,
-; EG-NEXT: MOV T0.Y, T5.X,
-; EG-NEXT: LSHR * T0.W, T7.X, literal.x, BS:VEC_120/SCL_212
-; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
-; EG-NEXT: BFE_INT T0.W, PV.W, 0.0, literal.x,
-; EG-NEXT: AND_INT * T1.W, PV.Y, literal.y,
-; EG-NEXT: 8(1.121039e-44), -65536(nan)
-; EG-NEXT: AND_INT * T0.W, PV.W, literal.x,
-; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
-; EG-NEXT: OR_INT * T0.W, T1.W, PV.W,
-; EG-NEXT: MOV * T5.X, PV.W,
-; EG-NEXT: MOV T0.Y, PV.X,
-; EG-NEXT: ASHR * T0.W, T7.X, literal.x,
-; EG-NEXT: 24(3.363116e-44), 0(0.000000e+00)
-; EG-NEXT: AND_INT T1.W, PV.Y, literal.x,
-; EG-NEXT: LSHL * T0.W, PV.W, literal.y,
-; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
-; EG-NEXT: LSHR T7.X, KC0[2].Y, literal.x,
-; EG-NEXT: OR_INT * T8.Y, PV.W, PS,
-; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
-; EG-NEXT: MOV T5.X, PV.Y,
-; EG-NEXT: MOV * T8.X, T4.X,
;
; GFX12-LABEL: constant_sextload_v4i8_to_v4i16:
; GFX12: ; %bb.0:
@@ -9800,80 +9741,27 @@ define amdgpu_kernel void @constant_zextload_v8i8_to_v8i16(ptr addrspace(1) %out
;
; EG-LABEL: constant_zextload_v8i8_to_v8i16:
; EG: ; %bb.0:
-; EG-NEXT: ALU 1, @8, KC0[CB0:0-32], KC1[]
+; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[]
; EG-NEXT: TEX 0 @6
-; EG-NEXT: ALU 61, @10, KC0[CB0:0-32], KC1[]
-; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T12.XYZW, T11.X, 1
+; EG-NEXT: ALU 9, @9, KC0[CB0:0-32], KC1[]
+; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T6.XYZW, T5.X, 1
; EG-NEXT: CF_END
; EG-NEXT: PAD
; EG-NEXT: Fetch clause starting at 6:
-; EG-NEXT: VTX_READ_64 T11.XY, T11.X, 0, #1
+; EG-NEXT: VTX_READ_64 T5.XY, T5.X, 0, #1
; EG-NEXT: ALU clause starting at 8:
-; EG-NEXT: MOV * T0.Y, T8.X,
-; EG-NEXT: MOV * T11.X, KC0[2].Z,
-; EG-NEXT: ALU clause starting at 10:
-; EG-NEXT: AND_INT T0.W, T11.X, literal.x,
-; EG-NEXT: AND_INT * T1.W, T0.Y, literal.y,
-; EG-NEXT: 255(3.573311e-43), -65536(nan)
-; EG-NEXT: OR_INT * T0.W, PS, PV.W,
-; EG-NEXT: MOV * T8.X, PV.W,
-; EG-NEXT: MOV T0.Y, PV.X,
-; EG-NEXT: LSHL * T0.W, T11.X, literal.x,
-; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
-; EG-NEXT: AND_INT T1.W, PV.Y, literal.x,
-; EG-NEXT: AND_INT * T0.W, PV.W, literal.y,
-; EG-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38)
-; EG-NEXT: OR_INT * T0.W, PV.W, PS,
-; EG-NEXT: MOV T8.X, PV.W,
-; EG-NEXT: MOV T0.Y, T9.X,
+; EG-NEXT: MOV * T5.X, KC0[2].Z,
+; EG-NEXT: ALU clause starting at 9:
; EG-NEXT: MOV * T0.W, literal.x,
; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
-; EG-NEXT: BFE_UINT T1.W, T11.X, literal.x, PV.W,
-; EG-NEXT: AND_INT * T2.W, PV.Y, literal.y,
-; EG-NEXT: 16(2.242078e-44), -65536(nan)
-; EG-NEXT: OR_INT * T1.W, PS, PV.W,
-; EG-NEXT: MOV * T9.X, PV.W,
-; EG-NEXT: MOV T0.Y, PV.X,
-; EG-NEXT: LSHR * T1.W, T11.X, literal.x,
-; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
-; EG-NEXT: AND_INT T2.W, PV.Y, literal.x,
-; EG-NEXT: AND_INT * T1.W, PV.W, literal.y,
-; EG-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38)
-; EG-NEXT: OR_INT * T12.Y, PV.W, PS,
-; EG-NEXT: MOV T9.X, PV.Y,
-; EG-NEXT: MOV * T0.Y, T4.X,
-; EG-NEXT: AND_INT T1.W, PV.Y, literal.x,
-; EG-NEXT: AND_INT * T2.W, T11.Y, literal.y,
-; EG-NEXT: -65536(nan), 255(3.573311e-43)
-; EG-NEXT: OR_INT * T1.W, PV.W, PS,
-; EG-NEXT: MOV * T4.X, PV.W,
-; EG-NEXT: MOV T0.Y, PV.X,
-; EG-NEXT: LSHL * T1.W, T11.Y, literal.x,
-; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
-; EG-NEXT: AND_INT T2.W, PV.Y, literal.x,
-; EG-NEXT: AND_INT * T1.W, PV.W, literal.y,
-; EG-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38)
-; EG-NEXT: OR_INT * T1.W, PV.W, PS,
-; EG-NEXT: MOV T4.X, PV.W,
-; EG-NEXT: MOV T0.Y, T5.X,
-; EG-NEXT: BFE_UINT * T0.W, T11.Y, literal.x, T0.W,
-; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
-; EG-NEXT: AND_INT * T1.W, PV.Y, literal.x,
-; EG-NEXT: -65536(nan), 0(0.000000e+00)
-; EG-NEXT: OR_INT * T0.W, PV.W, T0.W,
-; EG-NEXT: MOV * T5.X, PV.W,
-; EG-NEXT: MOV T0.Y, PV.X,
-; EG-NEXT: LSHR * T0.W, T11.Y, literal.x,
+; EG-NEXT: BFE_UINT * T6.W, T5.Y, literal.x, PV.W,
; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
-; EG-NEXT: AND_INT T1.W, PV.Y, literal.x,
-; EG-NEXT: AND_INT * T0.W, PV.W, literal.y,
-; EG-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38)
-; EG-NEXT: LSHR T11.X, KC0[2].Y, literal.x,
-; EG-NEXT: OR_INT * T12.W, PV.W, PS,
-; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
-; EG-NEXT: MOV T5.X, PV.W,
-; EG-NEXT: MOV * T12.X, T8.X,
-; EG-NEXT: MOV * T12.Z, T4.X,
+; EG-NEXT: BFE_UINT T6.Y, T5.X, literal.x, T0.W,
+; EG-NEXT: AND_INT * T6.Z, T5.Y, literal.y,
+; EG-NEXT: 8(1.121039e-44), 255(3.573311e-43)
+; EG-NEXT: AND_INT T6.X, T5.X, literal.x,
+; EG-NEXT: LSHR * T5.X, KC0[2].Y, literal.y,
+; EG-NEXT: 255(3.573311e-43), 2(2.802597e-45)
;
; GFX12-LABEL: constant_zextload_v8i8_to_v8i16:
; GFX12: ; %bb.0:
@@ -10017,93 +9905,28 @@ define amdgpu_kernel void @constant_sextload_v8i8_to_v8i16(ptr addrspace(1) %out
;
; EG-LABEL: constant_sextload_v8i8_to_v8i16:
; EG: ; %bb.0:
-; EG-NEXT: ALU 1, @8, KC0[CB0:0-32], KC1[]
+; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[]
; EG-NEXT: TEX 0 @6
-; EG-NEXT: ALU 74, @10, KC0[CB0:0-32], KC1[]
-; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T12.XYZW, T11.X, 1
+; EG-NEXT: ALU 10, @9, KC0[CB0:0-32], KC1[]
+; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T6.XYZW, T5.X, 1
; EG-NEXT: CF_END
; EG-NEXT: PAD
; EG-NEXT: Fetch clause starting at 6:
-; EG-NEXT: VTX_READ_64 T11.XY, T11.X, 0, #1
+; EG-NEXT: VTX_READ_64 T5.XY, T5.X, 0, #1
; EG-NEXT: ALU clause starting at 8:
-; EG-NEXT: MOV * T0.Y, T8.X,
-; EG-NEXT: MOV * T11.X, KC0[2].Z,
-; EG-NEXT: ALU clause starting at 10:
-; EG-NEXT: BFE_INT * T0.W, T11.X, 0.0, literal.x,
-; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
-; EG-NEXT: AND_INT T0.W, PV.W, literal.x,
-; EG-NEXT: AND_INT * T1.W, T0.Y, literal.y,
-; EG-NEXT: 65535(9.183409e-41), -65536(nan)
-; EG-NEXT: OR_INT * T0.W, PS, PV.W,
-; EG-NEXT: MOV * T8.X, PV.W,
-; EG-NEXT: MOV T0.Y, PV.X,
-; EG-NEXT: LSHR * T0.W, T11.X, literal.x,
+; EG-NEXT: MOV * T5.X, KC0[2].Z,
+; EG-NEXT: ALU clause starting at 9:
+; EG-NEXT: BFE_INT * T6.Z, T5.Y, 0.0, literal.x,
; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
-; EG-NEXT: BFE_INT T0.W, PV.W, 0.0, literal.x,
-; EG-NEXT: AND_INT * T1.W, PV.Y, literal.y,
-; EG-NEXT: 8(1.121039e-44), 65535(9.183409e-41)
-; EG-NEXT: LSHL * T0.W, PV.W, literal.x,
-; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
-; EG-NEXT: OR_INT * T0.W, T1.W, PV.W,
-; EG-NEXT: MOV T8.X, PV.W,
-; EG-NEXT: MOV T0.Y, T9.X,
-; EG-NEXT: LSHR * T0.W, T11.X, literal.x, BS:VEC_120/SCL_212
-; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
-; EG-NEXT: BFE_INT T0.W, PV.W, 0.0, literal.x,
-; EG-NEXT: AND_INT * T1.W, PV.Y, literal.y,
-; EG-NEXT: 8(1.121039e-44), -65536(nan)
-; EG-NEXT: AND_INT * T0.W, PV.W, literal.x,
-; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
-; EG-NEXT: OR_INT * T0.W, T1.W, PV.W,
-; EG-NEXT: MOV * T9.X, PV.W,
-; EG-NEXT: MOV T0.Y, PV.X,
-; EG-NEXT: ASHR * T0.W, T11.X, literal.x,
-; EG-NEXT: 24(3.363116e-44), 0(0.000000e+00)
-; EG-NEXT: AND_INT T1.W, PV.Y, literal.x,
-; EG-NEXT: LSHL * T0.W, PV.W, literal.y,
-; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
-; EG-NEXT: OR_INT * T12.Y, PV.W, PS,
-; EG-NEXT: MOV T9.X, PV.Y,
-; EG-NEXT: MOV T0.Y, T4.X,
-; EG-NEXT: BFE_INT * T0.W, T11.Y, 0.0, literal.x,
+; EG-NEXT: BFE_INT T6.X, T5.X, 0.0, literal.x,
+; EG-NEXT: LSHR * T0.W, T5.Y, literal.x,
; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
-; EG-NEXT: AND_INT T1.W, PV.Y, literal.x,
-; EG-NEXT: AND_INT * T0.W, PV.W, literal.y,
-; EG-NEXT: -65536(nan), 65535(9.183409e-41)
-; EG-NEXT: OR_INT * T0.W, PV.W, PS,
-; EG-NEXT: MOV * T4.X, PV.W,
-; EG-NEXT: MOV T0.Y, PV.X,
-; EG-NEXT: LSHR * T0.W, T11.Y, literal.x,
+; EG-NEXT: BFE_INT T6.W, PV.W, 0.0, literal.x,
+; EG-NEXT: LSHR * T0.W, T5.X, literal.x,
; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
-; EG-NEXT: BFE_INT T0.W, PV.W, 0.0, literal.x,
-; EG-NEXT: AND_INT * T1.W, PV.Y, literal.y,
-; EG-NEXT: 8(1.121039e-44), 65535(9.183409e-41)
-; EG-NEXT: LSHL * T0.W, PV.W, literal.x,
-; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
-; EG-NEXT: OR_INT * T0.W, T1.W, PV.W,
-; EG-NEXT: MOV T4.X, PV.W,
-; EG-NEXT: MOV T0.Y, T5.X,
-; EG-NEXT: LSHR * T0.W, T11.Y, literal.x,
-; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
-; EG-NEXT: BFE_INT T0.W, PV.W, 0.0, literal.x,
-; EG-NEXT: AND_INT * T1.W, PV.Y, literal.y,
-; EG-NEXT: 8(1.121039e-44), -65536(nan)
-; EG-NEXT: AND_INT * T0.W, PV.W, literal.x,
-; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
-; EG-NEXT: OR_INT * T0.W, T1.W, PV.W,
-; EG-NEXT: MOV * T5.X, PV.W,
-; EG-NEXT: MOV T0.Y, PV.X,
-; EG-NEXT: ASHR * T0.W, T11.Y, literal.x,
-; EG-NEXT: 24(3.363116e-44), 0(0.000000e+00)
-; EG-NEXT: AND_INT T1.W, PV.Y, literal.x,
-; EG-NEXT: LSHL * T0.W, PV.W, literal.y,
-; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
-; EG-NEXT: LSHR T11.X, KC0[2].Y, literal.x,
-; EG-NEXT: OR_INT * T12.W, PV.W, PS,
-; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
-; EG-NEXT: MOV T5.X, PV.W,
-; EG-NEXT: MOV * T12.X, T8.X,
-; EG-NEXT: MOV * T12.Z, T4.X,
+; EG-NEXT: LSHR T5.X, KC0[2].Y, literal.x,
+; EG-NEXT: BFE_INT * T6.Y, PS, 0.0, literal.y,
+; EG-NEXT: 2(2.802597e-45), 8(1.121039e-44)
;
; GFX12-LABEL: constant_sextload_v8i8_to_v8i16:
; GFX12: ; %bb.0:
@@ -10296,146 +10119,37 @@ define amdgpu_kernel void @constant_zextload_v16i8_to_v16i16(ptr addrspace(1) %o
;
; EG-LABEL: constant_zextload_v16i8_to_v16i16:
; EG: ; %bb.0:
-; EG-NEXT: ALU 1, @10, KC0[CB0:0-32], KC1[]
-; EG-NEXT: TEX 0 @8
-; EG-NEXT: ALU 103, @12, KC0[], KC1[]
-; EG-NEXT: ALU 20, @116, KC0[CB0:0-32], KC1[]
-; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T20.XYZW, T22.X, 0
-; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T19.XYZW, T21.X, 1
+; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[]
+; EG-NEXT: TEX 0 @6
+; EG-NEXT: ALU 19, @9, KC0[CB0:0-32], KC1[]
+; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T9.XYZW, T10.X, 0
+; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T8.XYZW, T7.X, 1
; EG-NEXT: CF_END
-; EG-NEXT: PAD
-; EG-NEXT: Fetch clause starting at 8:
-; EG-NEXT: VTX_READ_128 T19.XYZW, T19.X, 0, #1
-; EG-NEXT: ALU clause starting at 10:
-; EG-NEXT: MOV * T0.Y, T16.X,
-; EG-NEXT: MOV * T19.X, KC0[2].Z,
-; EG-NEXT: ALU clause starting at 12:
-; EG-NEXT: AND_INT T0.W, T19.X, literal.x,
-; EG-NEXT: AND_INT * T1.W, T0.Y, literal.y,
-; EG-NEXT: 255(3.573311e-43), -65536(nan)
-; EG-NEXT: OR_INT * T0.W, PS, PV.W,
-; EG-NEXT: MOV * T16.X, PV.W,
-; EG-NEXT: MOV T0.Y, PV.X,
-; EG-NEXT: LSHL * T0.W, T19.X, literal.x,
-; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
-; EG-NEXT: AND_INT T1.W, PV.Y, literal.x,
-; EG-NEXT: AND_INT * T0.W, PV.W, literal.y,
-; EG-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38)
-; EG-NEXT: OR_INT * T0.W, PV.W, PS,
-; EG-NEXT: MOV T16.X, PV.W,
-; EG-NEXT: MOV T0.Y, T17.X,
+; EG-NEXT: Fetch clause starting at 6:
+; EG-NEXT: VTX_READ_128 T7.XYZW, T7.X, 0, #1
+; EG-NEXT: ALU clause starting at 8:
+; EG-NEXT: MOV * T7.X, KC0[2].Z,
+; EG-NEXT: ALU clause starting at 9:
; EG-NEXT: MOV * T0.W, literal.x,
; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
-; EG-NEXT: BFE_UINT T1.W, T19.X, literal.x, PV.W,
-; EG-NEXT: AND_INT * T2.W, PV.Y, literal.y,
-; EG-NEXT: 16(2.242078e-44), -65536(nan)
-; EG-NEXT: OR_INT * T1.W, PS, PV.W,
-; EG-NEXT: MOV * T17.X, PV.W,
-; EG-NEXT: MOV T0.Y, PV.X,
-; EG-NEXT: LSHR * T1.W, T19.X, literal.x,
-; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
-; EG-NEXT: AND_INT T2.W, PV.Y, literal.x,
-; EG-NEXT: AND_INT * T1.W, PV.W, literal.y,
-; EG-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38)
-; EG-NEXT: OR_INT * T20.Y, PV.W, PS,
-; EG-NEXT: MOV T17.X, PV.Y,
-; EG-NEXT: MOV * T0.Y, T12.X,
-; EG-NEXT: AND_INT T1.W, PV.Y, literal.x,
-; EG-NEXT: AND_INT * T2.W, T19.Y, literal.y,
-; EG-NEXT: -65536(nan), 255(3.573311e-43)
-; EG-NEXT: OR_INT * T1.W, PV.W, PS,
-; EG-NEXT: MOV * T12.X, PV.W,
-; EG-NEXT: MOV T0.Y, PV.X,
-; EG-NEXT: LSHL * T1.W, T19.Y, literal.x,
-; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
-; EG-NEXT: AND_INT T2.W, PV.Y, literal.x,
-; EG-NEXT: AND_INT * T1.W, PV.W, literal.y,
-; EG-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38)
-; EG-NEXT: OR_INT * T1.W, PV.W, PS,
-; EG-NEXT: MOV T12.X, PV.W,
-; EG-NEXT: MOV T0.Y, T13.X,
-; EG-NEXT: BFE_UINT * T1.W, T19.Y, literal.x, T0.W,
-; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
-; EG-NEXT: AND_INT * T2.W, PV.Y, literal.x,
-; EG-NEXT: -65536(nan), 0(0.000000e+00)
-; EG-NEXT: OR_INT * T1.W, PV.W, T1.W,
-; EG-NEXT: MOV * T13.X, PV.W,
-; EG-NEXT: MOV T0.Y, PV.X,
-; EG-NEXT: LSHR * T1.W, T19.Y, literal.x,
+; EG-NEXT: BFE_UINT * T8.W, T7.Y, literal.x, PV.W,
; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
-; EG-NEXT: AND_INT T2.W, PV.Y, literal.x,
-; EG-NEXT: AND_INT * T1.W, PV.W, literal.y,
-; EG-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38)
-; EG-NEXT: OR_INT * T20.W, PV.W, PS,
-; EG-NEXT: MOV T13.X, PV.W,
-; EG-NEXT: MOV * T0.Y, T8.X,
-; EG-NEXT: AND_INT T1.W, PV.Y, literal.x,
-; EG-NEXT: AND_INT * T2.W, T19.Z, literal.y,
-; EG-NEXT: -65536(nan), 255(3.573311e-43)
-; EG-NEXT: OR_INT * T1.W, PV.W, PS,
-; EG-NEXT: MOV * T8.X, PV.W,
-; EG-NEXT: MOV T0.Y, PV.X,
-; EG-NEXT: LSHL * T1.W, T19.Z, literal.x,
-; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
-; EG-NEXT: AND_INT T2.W, PV.Y, literal.x,
-; EG-NEXT: AND_INT * T1.W, PV.W, literal.y,
-; EG-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38)
-; EG-NEXT: OR_INT * T1.W, PV.W, PS,
-; EG-NEXT: MOV T8.X, PV.W,
-; EG-NEXT: MOV T0.Y, T9.X,
-; EG-NEXT: BFE_UINT * T1.W, T19.Z, literal.x, T0.W,
-; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
-; EG-NEXT: AND_INT * T2.W, PV.Y, literal.x,
-; EG-NEXT: -65536(nan), 0(0.000000e+00)
-; EG-NEXT: OR_INT * T1.W, PV.W, T1.W,
-; EG-NEXT: MOV * T9.X, PV.W,
-; EG-NEXT: MOV T0.Y, PV.X,
-; EG-NEXT: LSHR * T1.W, T19.Z, literal.x,
-; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
-; EG-NEXT: AND_INT T2.W, PV.Y, literal.x,
-; EG-NEXT: AND_INT * T1.W, PV.W, literal.y,
-; EG-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38)
-; EG-NEXT: OR_INT * T19.Y, PV.W, PS,
-; EG-NEXT: MOV T9.X, PV.Y,
-; EG-NEXT: MOV * T0.Y, T4.X,
-; EG-NEXT: AND_INT T1.W, PV.Y, literal.x,
-; EG-NEXT: AND_INT * T2.W, T19.W, literal.y,
-; EG-NEXT: -65536(nan), 255(3.573311e-43)
-; EG-NEXT: OR_INT * T1.W, PV.W, PS,
-; EG-NEXT: MOV * T4.X, PV.W,
-; EG-NEXT: MOV T0.Y, PV.X,
-; EG-NEXT: LSHL * T1.W, T19.W, literal.x,
-; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
-; EG-NEXT: AND_INT T2.W, PV.Y, literal.x,
-; EG-NEXT: AND_INT * T1.W, PV.W, literal.y,
-; EG-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38)
-; EG-NEXT: OR_INT * T1.W, PV.W, PS,
-; EG-NEXT: MOV T4.X, PV.W,
-; EG-NEXT: MOV T0.Y, T5.X,
-; EG-NEXT: BFE_UINT * T0.W, T19.W, literal.x, T0.W,
-; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
-; EG-NEXT: ALU clause starting at 116:
-; EG-NEXT: AND_INT * T1.W, T0.Y, literal.x,
-; EG-NEXT: -65536(nan), 0(0.000000e+00)
-; EG-NEXT: OR_INT * T0.W, PV.W, T0.W,
-; EG-NEXT: MOV * T5.X, PV.W,
-; EG-NEXT: MOV T0.Y, PV.X,
-; EG-NEXT: LSHR T0.W, T19.W, literal.x,
-; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.y,
-; EG-NEXT: 8(1.121039e-44), 16(2.242078e-44)
-; EG-NEXT: LSHR T21.X, PS, literal.x,
-; EG-NEXT: AND_INT T1.W, PV.Y, literal.y,
-; EG-NEXT: AND_INT * T0.W, PV.W, literal.z,
-; EG-NEXT: 2(2.802597e-45), 65535(9.183409e-41)
-; EG-NEXT: 16711680(2.341805e-38), 0(0.000000e+00)
-; EG-NEXT: LSHR T22.X, KC0[2].Y, literal.x,
-; EG-NEXT: OR_INT * T19.W, PV.W, PS,
+; EG-NEXT: BFE_UINT T8.Y, T7.X, literal.x, T0.W,
+; EG-NEXT: AND_INT T8.Z, T7.Y, literal.y,
+; EG-NEXT: BFE_UINT * T9.W, T7.W, literal.x, T0.W,
+; EG-NEXT: 8(1.121039e-44), 255(3.573311e-43)
+; EG-NEXT: AND_INT T8.X, T7.X, literal.x,
+; EG-NEXT: BFE_UINT T9.Y, T7.Z, literal.y, T0.W,
+; EG-NEXT: LSHR * T7.X, KC0[2].Y, literal.z,
+; EG-NEXT: 255(3.573311e-43), 8(1.121039e-44)
+; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
+; EG-NEXT: AND_INT * T9.Z, T7.W, literal.x,
+; EG-NEXT: 255(3.573311e-43), 0(0.000000e+00)
+; EG-NEXT: AND_INT T9.X, T7.Z, literal.x,
+; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
+; EG-NEXT: 255(3.573311e-43), 16(2.242078e-44)
+; EG-NEXT: LSHR * T10.X, PV.W, literal.x,
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
-; EG-NEXT: MOV T5.X, PV.W,
-; EG-NEXT: MOV * T20.X, T16.X,
-; EG-NEXT: MOV * T20.Z, T12.X,
-; EG-NEXT: MOV T19.X, T8.X,
-; EG-NEXT: MOV * T19.Z, T4.X, BS:VEC_120/SCL_212
;
; GFX12-LABEL: constant_zextload_v16i8_to_v16i16:
; GFX12: ; %bb.0:
@@ -10683,173 +10397,38 @@ define amdgpu_kernel void @constant_sextload_v16i8_to_v16i16(ptr addrspace(1) %o
;
; EG-LABEL: constant_sextload_v16i8_to_v16i16:
; EG: ; %bb.0:
-; EG-NEXT: ALU 1, @10, KC0[CB0:0-32], KC1[]
-; EG-NEXT: TEX 0 @8
-; EG-NEXT: ALU 104, @12, KC0[], KC1[]
-; EG-NEXT: ALU 46, @117, KC0[CB0:0-32], KC1[]
-; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T20.XYZW, T22.X, 0
-; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T19.XYZW, T21.X, 1
+; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[]
+; EG-NEXT: TEX 0 @6
+; EG-NEXT: ALU 20, @9, KC0[CB0:0-32], KC1[]
+; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T9.XYZW, T10.X, 0
+; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T8.XYZW, T7.X, 1
; EG-NEXT: CF_END
-; EG-NEXT: PAD
-; EG-NEXT: Fetch clause starting at 8:
-; EG-NEXT: VTX_READ_128 T19.XYZW, T19.X, 0, #1
-; EG-NEXT: ALU clause starting at 10:
-; EG-NEXT: MOV * T0.Y, T16.X,
-; EG-NEXT: MOV * T19.X, KC0[2].Z,
-; EG-NEXT: ALU clause starting at 12:
-; EG-NEXT: BFE_INT * T0.W, T19.X, 0.0, literal.x,
-; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
-; EG-NEXT: AND_INT T0.W, PV.W, literal.x,
-; EG-NEXT: AND_INT * T1.W, T0.Y, literal.y,
-; EG-NEXT: 65535(9.183409e-41), -65536(nan)
-; EG-NEXT: OR_INT * T0.W, PS, PV.W,
-; EG-NEXT: MOV * T16.X, PV.W,
-; EG-NEXT: MOV T0.Y, PV.X,
-; EG-NEXT: LSHR * T0.W, T19.X, literal.x,
-; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
-; EG-NEXT: BFE_INT T0.W, PV.W, 0.0, literal.x,
-; EG-NEXT: AND_INT * T1.W, PV.Y, literal.y,
-; EG-NEXT: 8(1.121039e-44), 65535(9.183409e-41)
-; EG-NEXT: LSHL * T0.W, PV.W, literal.x,
-; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
-; EG-NEXT: OR_INT * T0.W, T1.W, PV.W,
-; EG-NEXT: MOV T16.X, PV.W,
-; EG-NEXT: MOV T0.Y, T17.X,
-; EG-NEXT: LSHR * T0.W, T19.X, literal.x, BS:VEC_120/SCL_212
-; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
-; EG-NEXT: BFE_INT T0.W, PV.W, 0.0, literal.x,
-; EG-NEXT: AND_INT * T1.W, PV.Y, literal.y,
-; EG-NEXT: 8(1.121039e-44), -65536(nan)
-; EG-NEXT: AND_INT * T0.W, PV.W, literal.x,
-; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
-; EG-NEXT: OR_INT * T0.W, T1.W, PV.W,
-; EG-NEXT: MOV * T17.X, PV.W,
-; EG-NEXT: MOV T0.Y, PV.X,
-; EG-NEXT: ASHR * T0.W, T19.X, literal.x,
-; EG-NEXT: 24(3.363116e-44), 0(0.000000e+00)
-; EG-NEXT: AND_INT T1.W, PV.Y, literal.x,
-; EG-NEXT: LSHL * T0.W, PV.W, literal.y,
-; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
-; EG-NEXT: OR_INT * T20.Y, PV.W, PS,
-; EG-NEXT: MOV T17.X, PV.Y,
-; EG-NEXT: MOV T0.Y, T12.X,
-; EG-NEXT: BFE_INT * T0.W, T19.Y, 0.0, literal.x,
-; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
-; EG-NEXT: AND_INT T1.W, PV.Y, literal.x,
-; EG-NEXT: AND_INT * T0.W, PV.W, literal.y,
-; EG-NEXT: -65536(nan), 65535(9.183409e-41)
-; EG-NEXT: OR_INT * T0.W, PV.W, PS,
-; EG-NEXT: MOV * T12.X, PV.W,
-; EG-NEXT: MOV T0.Y, PV.X,
-; EG-NEXT: LSHR * T0.W, T19.Y, literal.x,
-; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
-; EG-NEXT: BFE_INT T0.W, PV.W, 0.0, literal.x,
-; EG-NEXT: AND_INT * T1.W, PV.Y, literal.y,
-; EG-NEXT: 8(1.121039e-44), 65535(9.183409e-41)
-; EG-NEXT: LSHL * T0.W, PV.W, literal.x,
-; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
-; EG-NEXT: OR_INT * T0.W, T1.W, PV.W,
-; EG-NEXT: MOV T12.X, PV.W,
-; EG-NEXT: MOV T0.Y, T13.X,
-; EG-NEXT: LSHR * T0.W, T19.Y, literal.x,
-; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
-; EG-NEXT: BFE_INT T0.W, PV.W, 0.0, literal.x,
-; EG-NEXT: AND_INT * T1.W, PV.Y, literal.y,
-; EG-NEXT: 8(1.121039e-44), -65536(nan)
-; EG-NEXT: AND_INT * T0.W, PV.W, literal.x,
-; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
-; EG-NEXT: OR_INT * T0.W, T1.W, PV.W,
-; EG-NEXT: MOV * T13.X, PV.W,
-; EG-NEXT: MOV T0.Y, PV.X,
-; EG-NEXT: ASHR * T0.W, T19.Y, literal.x,
-; EG-NEXT: 24(3.363116e-44), 0(0.000000e+00)
-; EG-NEXT: AND_INT T1.W, PV.Y, literal.x,
-; EG-NEXT: LSHL * T0.W, PV.W, literal.y,
-; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
-; EG-NEXT: OR_INT * T20.W, PV.W, PS,
-; EG-NEXT: MOV T13.X, PV.W,
-; EG-NEXT: MOV T0.Y, T8.X,
-; EG-NEXT: BFE_INT * T0.W, T19.Z, 0.0, literal.x,
+; EG-NEXT: Fetch clause starting at 6:
+; EG-NEXT: VTX_READ_128 T7.XYZW, T7.X, 0, #1
+; EG-NEXT: ALU clause starting at 8:
+; EG-NEXT: MOV * T7.X, KC0[2].Z,
+; EG-NEXT: ALU clause starting at 9:
+; EG-NEXT: BFE_INT * T8.Z, T7.Y, 0.0, literal.x,
; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
-; EG-NEXT: AND_INT T1.W, PV.Y, literal.x,
-; EG-NEXT: AND_INT * T0.W, PV.W, literal.y,
-; EG-NEXT: -65536(nan), 65535(9.183409e-41)
-; EG-NEXT: OR_INT * T0.W, PV.W, PS,
-; EG-NEXT: MOV * T8.X, PV.W,
-; EG-NEXT: MOV T0.Y, PV.X,
-; EG-NEXT: LSHR * T0.W, T19.Z, literal.x,
+; EG-NEXT: BFE_INT T8.X, T7.X, 0.0, literal.x,
+; EG-NEXT: BFE_INT T9.Z, T7.W, 0.0, literal.x,
+; EG-NEXT: LSHR * T0.W, T7.Y, literal.x,
; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
-; EG-NEXT: BFE_INT T0.W, PV.W, 0.0, literal.x,
-; EG-NEXT: AND_INT * T1.W, PV.Y, literal.y,
-; EG-NEXT: 8(1.121039e-44), 65535(9.183409e-41)
-; EG-NEXT: LSHL * T0.W, PV.W, literal.x,
-; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
-; EG-NEXT: OR_INT * T0.W, T1.W, PV.W,
-; EG-NEXT: MOV T8.X, PV.W,
-; EG-NEXT: MOV T0.Y, T9.X,
-; EG-NEXT: LSHR * T0.W, T19.Z, literal.x,
-; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
-; EG-NEXT: BFE_INT T0.W, PV.W, 0.0, literal.x,
-; EG-NEXT: AND_INT * T1.W, PV.Y, literal.y,
-; EG-NEXT: 8(1.121039e-44), -65536(nan)
-; EG-NEXT: AND_INT * T0.W, PV.W, literal.x,
-; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
-; EG-NEXT: OR_INT * T0.W, T1.W, PV.W,
-; EG-NEXT: MOV * T9.X, PV.W,
-; EG-NEXT: MOV T0.Y, PV.X,
-; EG-NEXT: ASHR * T0.W, T19.Z, literal.x,
-; EG-NEXT: 24(3.363116e-44), 0(0.000000e+00)
-; EG-NEXT: AND_INT T1.W, PV.Y, literal.x,
-; EG-NEXT: LSHL * T0.W, PV.W, literal.y,
-; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
-; EG-NEXT: ALU clause starting at 117:
-; EG-NEXT: OR_INT * T19.Y, T1.W, T0.W,
-; EG-NEXT: MOV T9.X, PV.Y,
-; EG-NEXT: MOV T0.Y, T4.X,
-; EG-NEXT: BFE_INT * T0.W, T19.W, 0.0, literal.x,
-; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
-; EG-NEXT: AND_INT T1.W, PV.Y, literal.x,
-; EG-NEXT: AND_INT * T0.W, PV.W, literal.y,
-; EG-NEXT: -65536(nan), 65535(9.183409e-41)
-; EG-NEXT: OR_INT * T0.W, PV.W, PS,
-; EG-NEXT: MOV * T4.X, PV.W,
-; EG-NEXT: MOV T0.Y, PV.X,
-; EG-NEXT: LSHR * T0.W, T19.W, literal.x,
+; EG-NEXT: BFE_INT T9.X, T7.Z, 0.0, literal.x,
+; EG-NEXT: LSHR T0.Z, T7.W, literal.x,
+; EG-NEXT: BFE_INT T8.W, PV.W, 0.0, literal.x,
+; EG-NEXT: LSHR * T0.W, T7.X, literal.x,
; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
-; EG-NEXT: BFE_INT T0.W, PV.W, 0.0, literal.x,
-; EG-NEXT: AND_INT * T1.W, PV.Y, literal.y,
-; EG-NEXT: 8(1.121039e-44), 65535(9.183409e-41)
-; EG-NEXT: LSHL * T0.W, PV.W, literal.x,
-; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
-; EG-NEXT: OR_INT * T0.W, T1.W, PV.W,
-; EG-NEXT: MOV T4.X, PV.W,
-; EG-NEXT: MOV T0.Y, T5.X,
-; EG-NEXT: LSHR * T0.W, T19.W, literal.x,
-; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
-; EG-NEXT: BFE_INT T0.W, PV.W, 0.0, literal.x,
-; EG-NEXT: AND_INT * T1.W, PV.Y, literal.y,
-; EG-NEXT: 8(1.121039e-44), -65536(nan)
-; EG-NEXT: AND_INT * T0.W, PV.W, literal.x,
-; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
-; EG-NEXT: OR_INT * T0.W, T1.W, PV.W,
-; EG-NEXT: MOV * T5.X, PV.W,
-; EG-NEXT: MOV T0.Y, PV.X,
-; EG-NEXT: ASHR T0.W, T19.W, literal.x,
-; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.y,
-; EG-NEXT: 24(3.363116e-44), 16(2.242078e-44)
-; EG-NEXT: LSHR T21.X, PS, literal.x,
-; EG-NEXT: AND_INT T1.W, PV.Y, literal.y,
-; EG-NEXT: LSHL * T0.W, PV.W, literal.z,
-; EG-NEXT: 2(2.802597e-45), 65535(9.183409e-41)
+; EG-NEXT: LSHR T7.X, KC0[2].Y, literal.x,
+; EG-NEXT: BFE_INT T8.Y, PS, 0.0, literal.y,
+; EG-NEXT: LSHR T1.Z, T7.Z, literal.y,
+; EG-NEXT: BFE_INT T9.W, PV.Z, 0.0, literal.y,
+; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.z,
+; EG-NEXT: 2(2.802597e-45), 8(1.121039e-44)
; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
-; EG-NEXT: LSHR T22.X, KC0[2].Y, literal.x,
-; EG-NEXT: OR_INT * T19.W, PV.W, PS,
-; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
-; EG-NEXT: MOV T5.X, PV.W,
-; EG-NEXT: MOV * T20.X, T16.X,
-; EG-NEXT: MOV * T20.Z, T12.X,
-; EG-NEXT: MOV T19.X, T8.X,
-; EG-NEXT: MOV * T19.Z, T4.X, BS:VEC_120/SCL_212
+; EG-NEXT: LSHR T10.X, PS, literal.x,
+; EG-NEXT: BFE_INT * T9.Y, PV.Z, 0.0, literal.y,
+; EG-NEXT: 2(2.802597e-45), 8(1.121039e-44)
;
; GFX12-LABEL: constant_sextload_v16i8_to_v16i16:
; GFX12: ; %bb.0:
@@ -11194,276 +10773,58 @@ define amdgpu_kernel void @constant_zextload_v32i8_to_v32i16(ptr addrspace(1) %o
;
; EG-LABEL: constant_zextload_v32i8_to_v32i16:
; EG: ; %bb.0:
-; EG-NEXT: ALU 1, @14, KC0[CB0:0-32], KC1[]
-; EG-NEXT: TEX 1 @10
-; EG-NEXT: ALU 103, @16, KC0[], KC1[]
-; EG-NEXT: ALU 104, @120, KC0[], KC1[]
-; EG-NEXT: ALU 41, @225, KC0[CB0:0-32], KC1[]
-; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T36.XYZW, T42.X, 0
-; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T37.XYZW, T41.X, 0
-; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T38.XYZW, T40.X, 0
-; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T35.XYZW, T39.X, 1
+; EG-NEXT: ALU 0, @12, KC0[CB0:0-32], KC1[]
+; EG-NEXT: TEX 1 @8
+; EG-NEXT: ALU 37, @13, KC0[CB0:0-32], KC1[]
+; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T17.XYZW, T18.X, 0
+; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T15.XYZW, T12.X, 0
+; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T14.XYZW, T16.X, 0
+; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T13.XYZW, T11.X, 1
; EG-NEXT: CF_END
-; EG-NEXT: Fetch clause starting at 10:
-; EG-NEXT: VTX_READ_128 T37.XYZW, T35.X, 16, #1
-; EG-NEXT: VTX_READ_128 T35.XYZW, T35.X, 0, #1
-; EG-NEXT: ALU clause starting at 14:
-; EG-NEXT: MOV * T0.Y, T16.X,
-; EG-NEXT: MOV * T35.X, KC0[2].Z,
-; EG-NEXT: ALU clause starting at 16:
-; EG-NEXT: AND_INT T0.W, T37.X, literal.x,
-; EG-NEXT: AND_INT * T1.W, T0.Y, literal.y,
-; EG-NEXT: 255(3.573311e-43), -65536(nan)
-; EG-NEXT: OR_INT * T0.W, PS, PV.W,
-; EG-NEXT: MOV * T16.X, PV.W,
-; EG-NEXT: MOV T0.Y, PV.X,
-; EG-NEXT: LSHL * T0.W, T37.X, literal.x,
-; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
-; EG-NEXT: AND_INT T1.W, PV.Y, literal.x,
-; EG-NEXT: AND_INT * T0.W, PV.W, literal.y,
-; EG-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38)
-; EG-NEXT: OR_INT * T0.W, PV.W, PS,
-; EG-NEXT: MOV T16.X, PV.W,
-; EG-NEXT: MOV T0.Y, T17.X,
+; EG-NEXT: Fetch clause starting at 8:
+; EG-NEXT: VTX_READ_128 T12.XYZW, T11.X, 16, #1
+; EG-NEXT: VTX_READ_128 T11.XYZW, T11.X, 0, #1
+; EG-NEXT: ALU clause starting at 12:
+; EG-NEXT: MOV * T11.X, KC0[2].Z,
+; EG-NEXT: ALU clause starting at 13:
; EG-NEXT: MOV * T0.W, literal.x,
; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
-; EG-NEXT: BFE_UINT T1.W, T37.X, literal.x, PV.W,
-; EG-NEXT: AND_INT * T2.W, PV.Y, literal.y,
-; EG-NEXT: 16(2.242078e-44), -65536(nan)
-; EG-NEXT: OR_INT * T1.W, PS, PV.W,
-; EG-NEXT: MOV * T17.X, PV.W,
-; EG-NEXT: MOV T0.Y, PV.X,
-; EG-NEXT: LSHR * T1.W, T37.X, literal.x,
+; EG-NEXT: BFE_UINT * T13.W, T11.Y, literal.x, PV.W,
; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
-; EG-NEXT: AND_INT T2.W, PV.Y, literal.x,
-; EG-NEXT: AND_INT * T1.W, PV.W, literal.y,
-; EG-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38)
-; EG-NEXT: OR_INT * T36.Y, PV.W, PS,
-; EG-NEXT: MOV T17.X, PV.Y,
-; EG-NEXT: MOV * T0.Y, T12.X,
-; EG-NEXT: AND_INT T1.W, PV.Y, literal.x,
-; EG-NEXT: AND_INT * T2.W, T37.Y, literal.y,
-; EG-NEXT: -65536(nan), 255(3.573311e-43)
-; EG-NEXT: OR_INT * T1.W, PV.W, PS,
-; EG-NEXT: MOV * T12.X, PV.W,
-; EG-NEXT: MOV T0.Y, PV.X,
-; EG-NEXT: LSHL * T1.W, T37.Y, literal.x,
-; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
-; EG-NEXT: AND_INT T2.W, PV.Y, literal.x,
-; EG-NEXT: AND_INT * T1.W, PV.W, literal.y,
-; EG-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38)
-; EG-NEXT: OR_INT * T1.W, PV.W, PS,
-; EG-NEXT: MOV T12.X, PV.W,
-; EG-NEXT: MOV T0.Y, T13.X,
-; EG-NEXT: BFE_UINT * T1.W, T37.Y, literal.x, T0.W,
-; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
-; EG-NEXT: AND_INT * T2.W, PV.Y, literal.x,
-; EG-NEXT: -65536(nan), 0(0.000000e+00)
-; EG-NEXT: OR_INT * T1.W, PV.W, T1.W,
-; EG-NEXT: MOV * T13.X, PV.W,
-; EG-NEXT: MOV T0.Y, PV.X,
-; EG-NEXT: LSHR * T1.W, T37.Y, literal.x,
-; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
-; EG-NEXT: AND_INT T2.W, PV.Y, literal.x,
-; EG-NEXT: AND_INT * T1.W, PV.W, literal.y,
-; EG-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38)
-; EG-NEXT: OR_INT * T36.W, PV.W, PS,
-; EG-NEXT: MOV T13.X, PV.W,
-; EG-NEXT: MOV * T0.Y, T8.X,
-; EG-NEXT: AND_INT T1.W, PV.Y, literal.x,
-; EG-NEXT: AND_INT * T2.W, T37.Z, literal.y,
-; EG-NEXT: -65536(nan), 255(3.573311e-43)
-; EG-NEXT: OR_INT * T1.W, PV.W, PS,
-; EG-NEXT: MOV * T8.X, PV.W,
-; EG-NEXT: MOV T0.Y, PV.X,
-; EG-NEXT: LSHL * T1.W, T37.Z, literal.x,
-; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
-; EG-NEXT: AND_INT T2.W, PV.Y, literal.x,
-; EG-NEXT: AND_INT * T1.W, PV.W, literal.y,
-; EG-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38)
-; EG-NEXT: OR_INT * T1.W, PV.W, PS,
-; EG-NEXT: MOV T8.X, PV.W,
-; EG-NEXT: MOV T0.Y, T9.X,
-; EG-NEXT: BFE_UINT * T1.W, T37.Z, literal.x, T0.W,
-; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
-; EG-NEXT: AND_INT * T2.W, PV.Y, literal.x,
-; EG-NEXT: -65536(nan), 0(0.000000e+00)
-; EG-NEXT: OR_INT * T1.W, PV.W, T1.W,
-; EG-NEXT: MOV * T9.X, PV.W,
-; EG-NEXT: MOV T0.Y, PV.X,
-; EG-NEXT: LSHR * T1.W, T37.Z, literal.x,
-; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
-; EG-NEXT: AND_INT T2.W, PV.Y, literal.x,
-; EG-NEXT: AND_INT * T1.W, PV.W, literal.y,
-; EG-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38)
-; EG-NEXT: OR_INT * T37.Y, PV.W, PS,
-; EG-NEXT: MOV T9.X, PV.Y,
-; EG-NEXT: MOV * T0.Y, T4.X,
-; EG-NEXT: AND_INT T1.W, PV.Y, literal.x,
-; EG-NEXT: AND_INT * T2.W, T37.W, literal.y,
-; EG-NEXT: -65536(nan), 255(3.573311e-43)
-; EG-NEXT: OR_INT * T1.W, PV.W, PS,
-; EG-NEXT: MOV * T4.X, PV.W,
-; EG-NEXT: MOV T0.Y, PV.X,
-; EG-NEXT: LSHL * T1.W, T37.W, literal.x,
-; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
-; EG-NEXT: AND_INT T2.W, PV.Y, literal.x,
-; EG-NEXT: AND_INT * T1.W, PV.W, literal.y,
-; EG-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38)
-; EG-NEXT: OR_INT * T1.W, PV.W, PS,
-; EG-NEXT: MOV T4.X, PV.W,
-; EG-NEXT: MOV T0.Y, T5.X,
-; EG-NEXT: BFE_UINT * T1.W, T37.W, literal.x, T0.W,
-; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
-; EG-NEXT: ALU clause starting at 120:
-; EG-NEXT: AND_INT * T2.W, T0.Y, literal.x,
-; EG-NEXT: -65536(nan), 0(0.000000e+00)
-; EG-NEXT: OR_INT * T1.W, PV.W, T1.W,
-; EG-NEXT: MOV * T5.X, PV.W,
-; EG-NEXT: MOV T0.Y, PV.X,
-; EG-NEXT: LSHR * T1.W, T37.W, literal.x,
-; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
-; EG-NEXT: AND_INT T2.W, PV.Y, literal.x,
-; EG-NEXT: AND_INT * T1.W, PV.W, literal.y,
-; EG-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38)
-; EG-NEXT: OR_INT * T37.W, PV.W, PS,
-; EG-NEXT: MOV T5.X, PV.W,
-; EG-NEXT: MOV * T0.Y, T32.X,
-; EG-NEXT: AND_INT T1.W, PV.Y, literal.x,
-; EG-NEXT: AND_INT * T2.W, T35.X, literal.y,
-; EG-NEXT: -65536(nan), 255(3.573311e-43)
-; EG-NEXT: OR_INT * T1.W, PV.W, PS,
-; EG-NEXT: MOV * T32.X, PV.W,
-; EG-NEXT: MOV T0.Y, PV.X,
-; EG-NEXT: LSHL * T1.W, T35.X, literal.x,
-; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
-; EG-NEXT: AND_INT T2.W, PV.Y, literal.x,
-; EG-NEXT: AND_INT * T1.W, PV.W, literal.y,
-; EG-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38)
-; EG-NEXT: OR_INT * T1.W, PV.W, PS,
-; EG-NEXT: MOV T32.X, PV.W,
-; EG-NEXT: MOV T0.Y, T33.X,
-; EG-NEXT: BFE_UINT * T1.W, T35.X, literal.x, T0.W, BS:VEC_120/SCL_212
-; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
-; EG-NEXT: AND_INT * T2.W, PV.Y, literal.x,
-; EG-NEXT: -65536(nan), 0(0.000000e+00)
-; EG-NEXT: OR_INT * T1.W, PV.W, T1.W,
-; EG-NEXT: MOV * T33.X, PV.W,
-; EG-NEXT: MOV T0.Y, PV.X,
-; EG-NEXT: LSHR * T1.W, T35.X, literal.x,
-; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
-; EG-NEXT: AND_INT T2.W, PV.Y, literal.x,
-; EG-NEXT: AND_INT * T1.W, PV.W, literal.y,
-; EG-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38)
-; EG-NEXT: OR_INT * T38.Y, PV.W, PS,
-; EG-NEXT: MOV T33.X, PV.Y,
-; EG-NEXT: MOV * T0.Y, T28.X,
-; EG-NEXT: AND_INT T1.W, PV.Y, literal.x,
-; EG-NEXT: AND_INT * T2.W, T35.Y, literal.y,
-; EG-NEXT: -65536(nan), 255(3.573311e-43)
-; EG-NEXT: OR_INT * T1.W, PV.W, PS,
-; EG-NEXT: MOV * T28.X, PV.W,
-; EG-NEXT: MOV T0.Y, PV.X,
-; EG-NEXT: LSHL * T1.W, T35.Y, literal.x,
-; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
-; EG-NEXT: AND_INT T2.W, PV.Y, literal.x,
-; EG-NEXT: AND_INT * T1.W, PV.W, literal.y,
-; EG-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38)
-; EG-NEXT: OR_INT * T1.W, PV.W, PS,
-; EG-NEXT: MOV T28.X, PV.W,
-; EG-NEXT: MOV T0.Y, T29.X,
-; EG-NEXT: BFE_UINT * T1.W, T35.Y, literal.x, T0.W,
-; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
-; EG-NEXT: AND_INT * T2.W, PV.Y, literal.x,
-; EG-NEXT: -65536(nan), 0(0.000000e+00)
-; EG-NEXT: OR_INT * T1.W, PV.W, T1.W,
-; EG-NEXT: MOV * T29.X, PV.W,
-; EG-NEXT: MOV T0.Y, PV.X,
-; EG-NEXT: LSHR * T1.W, T35.Y, literal.x,
-; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
-; EG-NEXT: AND_INT T2.W, PV.Y, literal.x,
-; EG-NEXT: AND_INT * T1.W, PV.W, literal.y,
-; EG-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38)
-; EG-NEXT: OR_INT * T38.W, PV.W, PS,
-; EG-NEXT: MOV T29.X, PV.W,
-; EG-NEXT: MOV * T0.Y, T24.X,
-; EG-NEXT: AND_INT T1.W, PV.Y, literal.x,
-; EG-NEXT: AND_INT * T2.W, T35.Z, literal.y,
-; EG-NEXT: -65536(nan), 255(3.573311e-43)
-; EG-NEXT: OR_INT * T1.W, PV.W, PS,
-; EG-NEXT: MOV * T24.X, PV.W,
-; EG-NEXT: MOV T0.Y, PV.X,
-; EG-NEXT: LSHL * T1.W, T35.Z, literal.x,
-; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
-; EG-NEXT: AND_INT T2.W, PV.Y, literal.x,
-; EG-NEXT: AND_INT * T1.W, PV.W, literal.y,
-; EG-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38)
-; EG-NEXT: OR_INT * T1.W, PV.W, PS,
-; EG-NEXT: MOV T24.X, PV.W,
-; EG-NEXT: MOV T0.Y, T25.X,
-; EG-NEXT: BFE_UINT * T1.W, T35.Z, literal.x, T0.W,
+; EG-NEXT: BFE_UINT T13.Y, T11.X, literal.x, T0.W,
+; EG-NEXT: AND_INT T13.Z, T11.Y, literal.y,
+; EG-NEXT: BFE_UINT * T14.W, T11.W, literal.x, T0.W,
+; EG-NEXT: 8(1.121039e-44), 255(3.573311e-43)
+; EG-NEXT: AND_INT T13.X, T11.X, literal.x,
+; EG-NEXT: BFE_UINT T14.Y, T11.Z, literal.y, T0.W,
+; EG-NEXT: LSHR * T11.X, KC0[2].Y, literal.z,
+; EG-NEXT: 255(3.573311e-43), 8(1.121039e-44)
+; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
+; EG-NEXT: AND_INT T14.Z, T11.W, literal.x,
+; EG-NEXT: BFE_UINT * T15.W, T12.Y, literal.y, T0.W,
+; EG-NEXT: 255(3.573311e-43), 8(1.121039e-44)
+; EG-NEXT: AND_INT T14.X, T11.Z, literal.x,
+; EG-NEXT: BFE_UINT T15.Y, T12.X, literal.y, T0.W,
+; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.z,
+; EG-NEXT: 255(3.573311e-43), 8(1.121039e-44)
; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
-; EG-NEXT: AND_INT * T2.W, PV.Y, literal.x,
-; EG-NEXT: -65536(nan), 0(0.000000e+00)
-; EG-NEXT: OR_INT * T1.W, PV.W, T1.W,
-; EG-NEXT: MOV * T25.X, PV.W,
-; EG-NEXT: MOV T0.Y, PV.X,
-; EG-NEXT: LSHR * T1.W, T35.Z, literal.x,
-; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
-; EG-NEXT: AND_INT T2.W, PV.Y, literal.x,
-; EG-NEXT: AND_INT * T1.W, PV.W, literal.y,
-; EG-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38)
-; EG-NEXT: OR_INT * T35.Y, PV.W, PS,
-; EG-NEXT: MOV T25.X, PV.Y,
-; EG-NEXT: MOV * T0.Y, T20.X,
-; EG-NEXT: AND_INT T1.W, PV.Y, literal.x,
-; EG-NEXT: AND_INT * T2.W, T35.W, literal.y,
-; EG-NEXT: -65536(nan), 255(3.573311e-43)
-; EG-NEXT: OR_INT * T1.W, PV.W, PS,
-; EG-NEXT: MOV * T20.X, PV.W,
-; EG-NEXT: ALU clause starting at 225:
-; EG-NEXT: MOV T0.Y, T20.X,
-; EG-NEXT: LSHL * T1.W, T35.W, literal.x,
+; EG-NEXT: LSHR T16.X, PV.W, literal.x,
+; EG-NEXT: AND_INT T15.Z, T12.Y, literal.y,
+; EG-NEXT: BFE_UINT T17.W, T12.W, literal.z, T0.W,
+; EG-NEXT: AND_INT * T15.X, T12.X, literal.y,
+; EG-NEXT: 2(2.802597e-45), 255(3.573311e-43)
; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
-; EG-NEXT: AND_INT T2.W, PV.Y, literal.x,
-; EG-NEXT: AND_INT * T1.W, PV.W, literal.y,
-; EG-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38)
-; EG-NEXT: OR_INT * T1.W, PV.W, PS,
-; EG-NEXT: MOV T20.X, PV.W,
-; EG-NEXT: MOV T0.Y, T21.X,
-; EG-NEXT: BFE_UINT * T0.W, T35.W, literal.x, T0.W,
-; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
-; EG-NEXT: AND_INT * T1.W, PV.Y, literal.x,
-; EG-NEXT: -65536(nan), 0(0.000000e+00)
-; EG-NEXT: OR_INT * T0.W, PV.W, T0.W,
-; EG-NEXT: MOV * T21.X, PV.W,
-; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: BFE_UINT T17.Y, T12.Z, literal.x, T0.W,
+; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
+; EG-NEXT: 8(1.121039e-44), 32(4.484155e-44)
+; EG-NEXT: LSHR T12.X, PV.W, literal.x,
+; EG-NEXT: AND_INT T17.Z, T12.W, literal.y,
+; EG-NEXT: AND_INT * T17.X, T12.Z, literal.y,
+; EG-NEXT: 2(2.802597e-45), 255(3.573311e-43)
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x,
-; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
-; EG-NEXT: LSHR T39.X, PV.W, literal.x,
-; EG-NEXT: LSHR * T40.X, KC0[2].Y, literal.x,
-; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
-; EG-NEXT: LSHR T0.W, T35.W, literal.x,
-; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.y,
-; EG-NEXT: 8(1.121039e-44), 48(6.726233e-44)
-; EG-NEXT: LSHR T41.X, PS, literal.x,
-; EG-NEXT: AND_INT T0.Z, T0.Y, literal.y,
-; EG-NEXT: AND_INT T0.W, PV.W, literal.z,
-; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.w,
-; EG-NEXT: 2(2.802597e-45), 65535(9.183409e-41)
-; EG-NEXT: 16711680(2.341805e-38), 32(4.484155e-44)
-; EG-NEXT: LSHR T42.X, PS, literal.x,
-; EG-NEXT: OR_INT * T35.W, PV.Z, PV.W,
+; EG-NEXT: 48(6.726233e-44), 0(0.000000e+00)
+; EG-NEXT: LSHR * T18.X, PV.W, literal.x,
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
-; EG-NEXT: MOV T21.X, PV.W,
-; EG-NEXT: MOV * T36.X, T16.X,
-; EG-NEXT: MOV * T36.Z, T12.X,
-; EG-NEXT: MOV T37.X, T8.X,
-; EG-NEXT: MOV T37.Z, T4.X, BS:VEC_120/SCL_212
-; EG-NEXT: MOV * T38.X, T32.X,
-; EG-NEXT: MOV * T38.Z, T28.X,
-; EG-NEXT: MOV T35.X, T24.X,
-; EG-NEXT: MOV * T35.Z, T20.X, BS:VEC_120/SCL_212
;
; GFX12-LABEL: constant_zextload_v32i8_to_v32i16:
; GFX12: ; %bb.0:
@@ -11919,331 +11280,60 @@ define amdgpu_kernel void @constant_sextload_v32i8_to_v32i16(ptr addrspace(1) %o
;
; EG-LABEL: constant_sextload_v32i8_to_v32i16:
; EG: ; %bb.0:
-; EG-NEXT: ALU 1, @14, KC0[CB0:0-32], KC1[]
-; EG-NEXT: TEX 1 @10
-; EG-NEXT: ALU 104, @16, KC0[], KC1[]
-; EG-NEXT: ALU 104, @121, KC0[], KC1[]
-; EG-NEXT: ALU 95, @226, KC0[CB0:0-32], KC1[]
-; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T36.XYZW, T42.X, 0
-; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T37.XYZW, T41.X, 0
-; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T38.XYZW, T40.X, 0
-; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T35.XYZW, T39.X, 1
+; EG-NEXT: ALU 0, @12, KC0[CB0:0-32], KC1[]
+; EG-NEXT: TEX 1 @8
+; EG-NEXT: ALU 39, @13, KC0[CB0:0-32], KC1[]
+; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T18.XYZW, T12.X, 0
+; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T17.XYZW, T11.X, 0
+; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T16.XYZW, T14.X, 0
+; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T15.XYZW, T13.X, 1
; EG-NEXT: CF_END
-; EG-NEXT: Fetch clause starting at 10:
-; EG-NEXT: VTX_READ_128 T37.XYZW, T35.X, 16, #1
-; EG-NEXT: VTX_READ_128 T35.XYZW, T35.X, 0, #1
-; EG-NEXT: ALU clause starting at 14:
-; EG-NEXT: MOV * T0.Y, T16.X,
-; EG-NEXT: MOV * T35.X, KC0[2].Z,
-; EG-NEXT: ALU clause starting at 16:
-; EG-NEXT: BFE_INT * T0.W, T37.X, 0.0, literal.x,
-; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
-; EG-NEXT: AND_INT T0.W, PV.W, literal.x,
-; EG-NEXT: AND_INT * T1.W, T0.Y, literal.y,
-; EG-NEXT: 65535(9.183409e-41), -65536(nan)
-; EG-NEXT: OR_INT * T0.W, PS, PV.W,
-; EG-NEXT: MOV * T16.X, PV.W,
-; EG-NEXT: MOV T0.Y, PV.X,
-; EG-NEXT: LSHR * T0.W, T37.X, literal.x,
-; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
-; EG-NEXT: BFE_INT T0.W, PV.W, 0.0, literal.x,
-; EG-NEXT: AND_INT * T1.W, PV.Y, literal.y,
-; EG-NEXT: 8(1.121039e-44), 65535(9.183409e-41)
-; EG-NEXT: LSHL * T0.W, PV.W, literal.x,
-; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
-; EG-NEXT: OR_INT * T0.W, T1.W, PV.W,
-; EG-NEXT: MOV T16.X, PV.W,
-; EG-NEXT: MOV T0.Y, T17.X,
-; EG-NEXT: LSHR * T0.W, T37.X, literal.x, BS:VEC_120/SCL_212
-; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
-; EG-NEXT: BFE_INT T0.W, PV.W, 0.0, literal.x,
-; EG-NEXT: AND_INT * T1.W, PV.Y, literal.y,
-; EG-NEXT: 8(1.121039e-44), -65536(nan)
-; EG-NEXT: AND_INT * T0.W, PV.W, literal.x,
-; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
-; EG-NEXT: OR_INT * T0.W, T1.W, PV.W,
-; EG-NEXT: MOV * T17.X, PV.W,
-; EG-NEXT: MOV T0.Y, PV.X,
-; EG-NEXT: ASHR * T0.W, T37.X, literal.x,
-; EG-NEXT: 24(3.363116e-44), 0(0.000000e+00)
-; EG-NEXT: AND_INT T1.W, PV.Y, literal.x,
-; EG-NEXT: LSHL * T0.W, PV.W, literal.y,
-; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
-; EG-NEXT: OR_INT * T36.Y, PV.W, PS,
-; EG-NEXT: MOV T17.X, PV.Y,
-; EG-NEXT: MOV T0.Y, T12.X,
-; EG-NEXT: BFE_INT * T0.W, T37.Y, 0.0, literal.x,
-; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
-; EG-NEXT: AND_INT T1.W, PV.Y, literal.x,
-; EG-NEXT: AND_INT * T0.W, PV.W, literal.y,
-; EG-NEXT: -65536(nan), 65535(9.183409e-41)
-; EG-NEXT: OR_INT * T0.W, PV.W, PS,
-; EG-NEXT: MOV * T12.X, PV.W,
-; EG-NEXT: MOV T0.Y, PV.X,
-; EG-NEXT: LSHR * T0.W, T37.Y, literal.x,
-; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
-; EG-NEXT: BFE_INT T0.W, PV.W, 0.0, literal.x,
-; EG-NEXT: AND_INT * T1.W, PV.Y, literal.y,
-; EG-NEXT: 8(1.121039e-44), 65535(9.183409e-41)
-; EG-NEXT: LSHL * T0.W, PV.W, literal.x,
-; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
-; EG-NEXT: OR_INT * T0.W, T1.W, PV.W,
-; EG-NEXT: MOV T12.X, PV.W,
-; EG-NEXT: MOV T0.Y, T13.X,
-; EG-NEXT: LSHR * T0.W, T37.Y, literal.x,
-; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
-; EG-NEXT: BFE_INT T0.W, PV.W, 0.0, literal.x,
-; EG-NEXT: AND_INT * T1.W, PV.Y, literal.y,
-; EG-NEXT: 8(1.121039e-44), -65536(nan)
-; EG-NEXT: AND_INT * T0.W, PV.W, literal.x,
-; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
-; EG-NEXT: OR_INT * T0.W, T1.W, PV.W,
-; EG-NEXT: MOV * T13.X, PV.W,
-; EG-NEXT: MOV T0.Y, PV.X,
-; EG-NEXT: ASHR * T0.W, T37.Y, literal.x,
-; EG-NEXT: 24(3.363116e-44), 0(0.000000e+00)
-; EG-NEXT: AND_INT T1.W, PV.Y, literal.x,
-; EG-NEXT: LSHL * T0.W, PV.W, literal.y,
-; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
-; EG-NEXT: OR_INT * T36.W, PV.W, PS,
-; EG-NEXT: MOV T13.X, PV.W,
-; EG-NEXT: MOV T0.Y, T8.X,
-; EG-NEXT: BFE_INT * T0.W, T37.Z, 0.0, literal.x,
-; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
-; EG-NEXT: AND_INT T1.W, PV.Y, literal.x,
-; EG-NEXT: AND_INT * T0.W, PV.W, literal.y,
-; EG-NEXT: -65536(nan), 65535(9.183409e-41)
-; EG-NEXT: OR_INT * T0.W, PV.W, PS,
-; EG-NEXT: MOV * T8.X, PV.W,
-; EG-NEXT: MOV T0.Y, PV.X,
-; EG-NEXT: LSHR * T0.W, T37.Z, literal.x,
-; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
-; EG-NEXT: BFE_INT T0.W, PV.W, 0.0, literal.x,
-; EG-NEXT: AND_INT * T1.W, PV.Y, literal.y,
-; EG-NEXT: 8(1.121039e-44), 65535(9.183409e-41)
-; EG-NEXT: LSHL * T0.W, PV.W, literal.x,
-; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
-; EG-NEXT: OR_INT * T0.W, T1.W, PV.W,
-; EG-NEXT: MOV T8.X, PV.W,
-; EG-NEXT: MOV T0.Y, T9.X,
-; EG-NEXT: LSHR * T0.W, T37.Z, literal.x,
-; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
-; EG-NEXT: BFE_INT T0.W, PV.W, 0.0, literal.x,
-; EG-NEXT: AND_INT * T1.W, PV.Y, literal.y,
-; EG-NEXT: 8(1.121039e-44), -65536(nan)
-; EG-NEXT: AND_INT * T0.W, PV.W, literal.x,
-; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
-; EG-NEXT: OR_INT * T0.W, T1.W, PV.W,
-; EG-NEXT: MOV * T9.X, PV.W,
-; EG-NEXT: MOV T0.Y, PV.X,
-; EG-NEXT: ASHR * T0.W, T37.Z, literal.x,
-; EG-NEXT: 24(3.363116e-44), 0(0.000000e+00)
-; EG-NEXT: AND_INT T1.W, PV.Y, literal.x,
-; EG-NEXT: LSHL * T0.W, PV.W, literal.y,
-; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
-; EG-NEXT: ALU clause starting at 121:
-; EG-NEXT: OR_INT * T37.Y, T1.W, T0.W,
-; EG-NEXT: MOV T9.X, PV.Y,
-; EG-NEXT: MOV T0.Y, T4.X,
-; EG-NEXT: BFE_INT * T0.W, T37.W, 0.0, literal.x,
-; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
-; EG-NEXT: AND_INT T1.W, PV.Y, literal.x,
-; EG-NEXT: AND_INT * T0.W, PV.W, literal.y,
-; EG-NEXT: -65536(nan), 65535(9.183409e-41)
-; EG-NEXT: OR_INT * T0.W, PV.W, PS,
-; EG-NEXT: MOV * T4.X, PV.W,
-; EG-NEXT: MOV T0.Y, PV.X,
-; EG-NEXT: LSHR * T0.W, T37.W, literal.x,
-; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
-; EG-NEXT: BFE_INT T0.W, PV.W, 0.0, literal.x,
-; EG-NEXT: AND_INT * T1.W, PV.Y, literal.y,
-; EG-NEXT: 8(1.121039e-44), 65535(9.183409e-41)
-; EG-NEXT: LSHL * T0.W, PV.W, literal.x,
-; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
-; EG-NEXT: OR_INT * T0.W, T1.W, PV.W,
-; EG-NEXT: MOV T4.X, PV.W,
-; EG-NEXT: MOV T0.Y, T5.X,
-; EG-NEXT: LSHR * T0.W, T37.W, literal.x,
-; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
-; EG-NEXT: BFE_INT T0.W, PV.W, 0.0, literal.x,
-; EG-NEXT: AND_INT * T1.W, PV.Y, literal.y,
-; EG-NEXT: 8(1.121039e-44), -65536(nan)
-; EG-NEXT: AND_INT * T0.W, PV.W, literal.x,
-; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
-; EG-NEXT: OR_INT * T0.W, T1.W, PV.W,
-; EG-NEXT: MOV * T5.X, PV.W,
-; EG-NEXT: MOV T0.Y, PV.X,
-; EG-NEXT: ASHR * T0.W, T37.W, literal.x,
-; EG-NEXT: 24(3.363116e-44), 0(0.000000e+00)
-; EG-NEXT: AND_INT T1.W, PV.Y, literal.x,
-; EG-NEXT: LSHL * T0.W, PV.W, literal.y,
-; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
-; EG-NEXT: OR_INT * T37.W, PV.W, PS,
-; EG-NEXT: MOV T5.X, PV.W,
-; EG-NEXT: MOV T0.Y, T32.X,
-; EG-NEXT: BFE_INT * T0.W, T35.X, 0.0, literal.x, BS:VEC_120/SCL_212
-; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
-; EG-NEXT: AND_INT T1.W, PV.Y, literal.x,
-; EG-NEXT: AND_INT * T0.W, PV.W, literal.y,
-; EG-NEXT: -65536(nan), 65535(9.183409e-41)
-; EG-NEXT: OR_INT * T0.W, PV.W, PS,
-; EG-NEXT: MOV * T32.X, PV.W,
-; EG-NEXT: MOV T0.Y, PV.X,
-; EG-NEXT: LSHR * T0.W, T35.X, literal.x,
-; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
-; EG-NEXT: BFE_INT T0.W, PV.W, 0.0, literal.x,
-; EG-NEXT: AND_INT * T1.W, PV.Y, literal.y,
-; EG-NEXT: 8(1.121039e-44), 65535(9.183409e-41)
-; EG-NEXT: LSHL * T0.W, PV.W, literal.x,
-; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
-; EG-NEXT: OR_INT * T0.W, T1.W, PV.W,
-; EG-NEXT: MOV T32.X, PV.W,
-; EG-NEXT: MOV T0.Y, T33.X,
-; EG-NEXT: LSHR * T0.W, T35.X, literal.x, BS:VEC_120/SCL_212
-; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
-; EG-NEXT: BFE_INT T0.W, PV.W, 0.0, literal.x,
-; EG-NEXT: AND_INT * T1.W, PV.Y, literal.y,
-; EG-NEXT: 8(1.121039e-44), -65536(nan)
-; EG-NEXT: AND_INT * T0.W, PV.W, literal.x,
-; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
-; EG-NEXT: OR_INT * T0.W, T1.W, PV.W,
-; EG-NEXT: MOV * T33.X, PV.W,
-; EG-NEXT: MOV T0.Y, PV.X,
-; EG-NEXT: ASHR * T0.W, T35.X, literal.x,
-; EG-NEXT: 24(3.363116e-44), 0(0.000000e+00)
-; EG-NEXT: AND_INT T1.W, PV.Y, literal.x,
-; EG-NEXT: LSHL * T0.W, PV.W, literal.y,
-; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
-; EG-NEXT: OR_INT * T38.Y, PV.W, PS,
-; EG-NEXT: MOV T33.X, PV.Y,
-; EG-NEXT: MOV T0.Y, T28.X,
-; EG-NEXT: BFE_INT * T0.W, T35.Y, 0.0, literal.x,
-; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
-; EG-NEXT: AND_INT T1.W, PV.Y, literal.x,
-; EG-NEXT: AND_INT * T0.W, PV.W, literal.y,
-; EG-NEXT: -65536(nan), 65535(9.183409e-41)
-; EG-NEXT: OR_INT * T0.W, PV.W, PS,
-; EG-NEXT: MOV * T28.X, PV.W,
-; EG-NEXT: MOV T0.Y, PV.X,
-; EG-NEXT: LSHR * T0.W, T35.Y, literal.x,
-; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
-; EG-NEXT: BFE_INT T0.W, PV.W, 0.0, literal.x,
-; EG-NEXT: AND_INT * T1.W, PV.Y, literal.y,
-; EG-NEXT: 8(1.121039e-44), 65535(9.183409e-41)
-; EG-NEXT: LSHL * T0.W, PV.W, literal.x,
-; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
-; EG-NEXT: OR_INT * T0.W, T1.W, PV.W,
-; EG-NEXT: MOV T28.X, PV.W,
-; EG-NEXT: MOV T0.Y, T29.X,
-; EG-NEXT: LSHR * T0.W, T35.Y, literal.x,
-; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
-; EG-NEXT: BFE_INT T0.W, PV.W, 0.0, literal.x,
-; EG-NEXT: AND_INT * T1.W, PV.Y, literal.y,
-; EG-NEXT: 8(1.121039e-44), -65536(nan)
-; EG-NEXT: AND_INT * T0.W, PV.W, literal.x,
-; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
-; EG-NEXT: OR_INT * T0.W, T1.W, PV.W,
-; EG-NEXT: MOV * T29.X, PV.W,
-; EG-NEXT: MOV T0.Y, PV.X,
-; EG-NEXT: ASHR * T0.W, T35.Y, literal.x,
-; EG-NEXT: 24(3.363116e-44), 0(0.000000e+00)
-; EG-NEXT: ALU clause starting at 226:
-; EG-NEXT: AND_INT T1.W, T0.Y, literal.x,
-; EG-NEXT: LSHL * T0.W, T0.W, literal.y,
-; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
-; EG-NEXT: OR_INT * T38.W, PV.W, PS,
-; EG-NEXT: MOV T29.X, PV.W,
-; EG-NEXT: MOV T0.Y, T24.X,
-; EG-NEXT: BFE_INT * T0.W, T35.Z, 0.0, literal.x,
-; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
-; EG-NEXT: AND_INT T1.W, PV.Y, literal.x,
-; EG-NEXT: AND_INT * T0.W, PV.W, literal.y,
-; EG-NEXT: -65536(nan), 65535(9.183409e-41)
-; EG-NEXT: OR_INT * T0.W, PV.W, PS,
-; EG-NEXT: MOV * T24.X, PV.W,
-; EG-NEXT: MOV T0.Y, PV.X,
-; EG-NEXT: LSHR * T0.W, T35.Z, literal.x,
+; EG-NEXT: Fetch clause starting at 8:
+; EG-NEXT: VTX_READ_128 T12.XYZW, T11.X, 16, #1
+; EG-NEXT: VTX_READ_128 T11.XYZW, T11.X, 0, #1
+; EG-NEXT: ALU clause starting at 12:
+; EG-NEXT: MOV * T11.X, KC0[2].Z,
+; EG-NEXT: ALU clause starting at 13:
+; EG-NEXT: LSHR T13.X, KC0[2].Y, literal.x,
+; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
+; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44)
+; EG-NEXT: LSHR T14.X, PV.W, literal.x,
+; EG-NEXT: BFE_INT * T15.Z, T11.Y, 0.0, literal.y,
+; EG-NEXT: 2(2.802597e-45), 8(1.121039e-44)
+; EG-NEXT: BFE_INT T15.X, T11.X, 0.0, literal.x,
+; EG-NEXT: LSHR T0.Y, T12.W, literal.x,
+; EG-NEXT: BFE_INT T16.Z, T11.W, 0.0, literal.x, BS:VEC_120/SCL_212
+; EG-NEXT: LSHR T0.W, T12.Y, literal.x,
+; EG-NEXT: LSHR * T1.W, T11.Y, literal.x,
; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
-; EG-NEXT: BFE_INT T0.W, PV.W, 0.0, literal.x,
-; EG-NEXT: AND_INT * T1.W, PV.Y, literal.y,
-; EG-NEXT: 8(1.121039e-44), 65535(9.183409e-41)
-; EG-NEXT: LSHL * T0.W, PV.W, literal.x,
-; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
-; EG-NEXT: OR_INT * T0.W, T1.W, PV.W,
-; EG-NEXT: MOV T24.X, PV.W,
-; EG-NEXT: MOV T0.Y, T25.X,
-; EG-NEXT: LSHR * T0.W, T35.Z, literal.x,
-; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
-; EG-NEXT: BFE_INT T0.W, PV.W, 0.0, literal.x,
-; EG-NEXT: AND_INT * T1.W, PV.Y, literal.y,
-; EG-NEXT: 8(1.121039e-44), -65536(nan)
-; EG-NEXT: AND_INT * T0.W, PV.W, literal.x,
-; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
-; EG-NEXT: OR_INT * T0.W, T1.W, PV.W,
-; EG-NEXT: MOV * T25.X, PV.W,
-; EG-NEXT: MOV T0.Y, PV.X,
-; EG-NEXT: ASHR * T0.W, T35.Z, literal.x,
-; EG-NEXT: 24(3.363116e-44), 0(0.000000e+00)
-; EG-NEXT: AND_INT T1.W, PV.Y, literal.x,
-; EG-NEXT: LSHL * T0.W, PV.W, literal.y,
-; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
-; EG-NEXT: OR_INT * T35.Y, PV.W, PS,
-; EG-NEXT: MOV T25.X, PV.Y,
-; EG-NEXT: MOV T0.Y, T20.X,
-; EG-NEXT: BFE_INT * T0.W, T35.W, 0.0, literal.x,
+; EG-NEXT: BFE_INT T16.X, T11.Z, 0.0, literal.x,
+; EG-NEXT: LSHR T1.Y, T11.W, literal.x,
+; EG-NEXT: BFE_INT T17.Z, T12.Y, 0.0, literal.x,
+; EG-NEXT: BFE_INT T15.W, PS, 0.0, literal.x,
+; EG-NEXT: LSHR * T1.W, T11.X, literal.x,
; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
-; EG-NEXT: AND_INT T1.W, PV.Y, literal.x,
-; EG-NEXT: AND_INT * T0.W, PV.W, literal.y,
-; EG-NEXT: -65536(nan), 65535(9.183409e-41)
-; EG-NEXT: OR_INT * T0.W, PV.W, PS,
-; EG-NEXT: MOV * T20.X, PV.W,
-; EG-NEXT: MOV T0.Y, PV.X,
-; EG-NEXT: LSHR * T0.W, T35.W, literal.x,
+; EG-NEXT: BFE_INT T17.X, T12.X, 0.0, literal.x,
+; EG-NEXT: BFE_INT T15.Y, PS, 0.0, literal.x,
+; EG-NEXT: BFE_INT T18.Z, T12.W, 0.0, literal.x,
+; EG-NEXT: BFE_INT T16.W, PV.Y, 0.0, literal.x,
+; EG-NEXT: LSHR * T1.W, T11.Z, literal.x,
; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
-; EG-NEXT: BFE_INT T0.W, PV.W, 0.0, literal.x,
-; EG-NEXT: AND_INT * T1.W, PV.Y, literal.y,
-; EG-NEXT: 8(1.121039e-44), 65535(9.183409e-41)
-; EG-NEXT: LSHL * T0.W, PV.W, literal.x,
-; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
-; EG-NEXT: OR_INT * T0.W, T1.W, PV.W,
-; EG-NEXT: MOV T20.X, PV.W,
-; EG-NEXT: MOV T0.Y, T21.X,
-; EG-NEXT: LSHR * T0.W, T35.W, literal.x,
-; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
-; EG-NEXT: BFE_INT T0.W, PV.W, 0.0, literal.x,
-; EG-NEXT: AND_INT * T1.W, PV.Y, literal.y,
-; EG-NEXT: 8(1.121039e-44), -65536(nan)
-; EG-NEXT: AND_INT * T0.W, PV.W, literal.x,
-; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
-; EG-NEXT: OR_INT * T0.W, T1.W, PV.W,
-; EG-NEXT: MOV * T21.X, PV.W,
-; EG-NEXT: MOV T0.Y, PV.X,
-; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x,
-; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
-; EG-NEXT: LSHR T39.X, PV.W, literal.x,
-; EG-NEXT: LSHR * T40.X, KC0[2].Y, literal.x,
-; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
-; EG-NEXT: ASHR T0.W, T35.W, literal.x,
-; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.y,
-; EG-NEXT: 24(3.363116e-44), 48(6.726233e-44)
-; EG-NEXT: LSHR T41.X, PS, literal.x,
-; EG-NEXT: AND_INT T0.Z, T0.Y, literal.y,
-; EG-NEXT: LSHL T0.W, PV.W, literal.z,
-; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.w,
-; EG-NEXT: 2(2.802597e-45), 65535(9.183409e-41)
-; EG-NEXT: 16(2.242078e-44), 32(4.484155e-44)
-; EG-NEXT: LSHR T42.X, PS, literal.x,
-; EG-NEXT: OR_INT * T35.W, PV.Z, PV.W,
-; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
-; EG-NEXT: MOV T21.X, PV.W,
-; EG-NEXT: MOV * T36.X, T16.X,
-; EG-NEXT: MOV * T36.Z, T12.X,
-; EG-NEXT: MOV T37.X, T8.X,
-; EG-NEXT: MOV T37.Z, T4.X, BS:VEC_120/SCL_212
-; EG-NEXT: MOV * T38.X, T32.X,
-; EG-NEXT: MOV * T38.Z, T28.X,
-; EG-NEXT: MOV T35.X, T24.X,
-; EG-NEXT: MOV * T35.Z, T20.X, BS:VEC_120/SCL_212
+; EG-NEXT: BFE_INT T18.X, T12.Z, 0.0, literal.x,
+; EG-NEXT: BFE_INT T16.Y, PS, 0.0, literal.x,
+; EG-NEXT: LSHR T0.Z, T12.X, literal.x,
+; EG-NEXT: BFE_INT T17.W, T0.W, 0.0, literal.x,
+; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
+; EG-NEXT: 8(1.121039e-44), 32(4.484155e-44)
+; EG-NEXT: LSHR T11.X, PS, literal.x,
+; EG-NEXT: BFE_INT T17.Y, PV.Z, 0.0, literal.y,
+; EG-NEXT: LSHR T0.Z, T12.Z, literal.y,
+; EG-NEXT: BFE_INT T18.W, T0.Y, 0.0, literal.y,
+; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.z,
+; EG-NEXT: 2(2.802597e-45), 8(1.121039e-44)
+; EG-NEXT: 48(6.726233e-44), 0(0.000000e+00)
+; EG-NEXT: LSHR T12.X, PS, literal.x,
+; EG-NEXT: BFE_INT * T18.Y, PV.Z, 0.0, literal.y,
+; EG-NEXT: 2(2.802597e-45), 8(1.121039e-44)
;
; GFX12-LABEL: constant_sextload_v32i8_to_v32i16:
; GFX12: ; %bb.0:
diff --git a/llvm/test/CodeGen/AMDGPU/load-global-i16.ll b/llvm/test/CodeGen/AMDGPU/load-global-i16.ll
index 8589158f11a7088..573338231bd57c3 100644
--- a/llvm/test/CodeGen/AMDGPU/load-global-i16.ll
+++ b/llvm/test/CodeGen/AMDGPU/load-global-i16.ll
@@ -254,74 +254,63 @@ define amdgpu_kernel void @global_load_v3i16(ptr addrspace(1) %out, ptr addrspac
;
; EG-LABEL: global_load_v3i16:
; EG: ; %bb.0: ; %entry
-; EG-NEXT: ALU 0, @12, KC0[CB0:0-32], KC1[]
-; EG-NEXT: TEX 2 @6
-; EG-NEXT: ALU 19, @13, KC0[CB0:0-32], KC1[]
-; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T6.X, T7.X, 0
-; EG-NEXT: MEM_RAT MSKOR T5.XW, T8.X
+; EG-NEXT: ALU 0, @10, KC0[CB0:0-32], KC1[]
+; EG-NEXT: TEX 1 @6
+; EG-NEXT: ALU 14, @11, KC0[CB0:0-32], KC1[]
+; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.X, T3.X, 0
+; EG-NEXT: MEM_RAT MSKOR T2.XW, T0.X
; EG-NEXT: CF_END
; EG-NEXT: Fetch clause starting at 6:
-; EG-NEXT: VTX_READ_16 T6.X, T5.X, 0, #1
-; EG-NEXT: VTX_READ_16 T7.X, T5.X, 2, #1
-; EG-NEXT: VTX_READ_16 T5.X, T5.X, 4, #1
-; EG-NEXT: ALU clause starting at 12:
-; EG-NEXT: MOV * T5.X, KC0[2].Z,
-; EG-NEXT: ALU clause starting at 13:
+; EG-NEXT: VTX_READ_16 T1.X, T0.X, 0, #1
+; EG-NEXT: VTX_READ_16 T0.X, T0.X, 4, #1
+; EG-NEXT: ALU clause starting at 10:
+; EG-NEXT: MOV * T0.X, KC0[2].Z,
+; EG-NEXT: ALU clause starting at 11:
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x,
; EG-NEXT: 4(5.605194e-45), 0(0.000000e+00)
; EG-NEXT: AND_INT T1.W, PV.W, literal.x,
-; EG-NEXT: AND_INT * T2.W, T5.X, literal.y,
+; EG-NEXT: AND_INT * T2.W, T0.X, literal.y,
; EG-NEXT: 3(4.203895e-45), 65535(9.183409e-41)
; EG-NEXT: LSHL * T1.W, PV.W, literal.x,
; EG-NEXT: 3(4.203895e-45), 0(0.000000e+00)
-; EG-NEXT: LSHL T5.X, T2.W, PV.W,
-; EG-NEXT: LSHL * T5.W, literal.x, PV.W,
-; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
-; EG-NEXT: MOV T5.Y, 0.0,
-; EG-NEXT: MOV * T5.Z, 0.0,
-; EG-NEXT: LSHR T8.X, T0.W, literal.x,
-; EG-NEXT: LSHL T0.W, T7.X, literal.y,
-; EG-NEXT: AND_INT * T1.W, T6.X, literal.z,
-; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44)
+; EG-NEXT: LSHL T2.X, T2.W, PV.W,
+; EG-NEXT: LSHL * T2.W, literal.x, PV.W,
; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
-; EG-NEXT: OR_INT T6.X, PV.W, PS,
-; EG-NEXT: LSHR * T7.X, KC0[2].Y, literal.x,
+; EG-NEXT: MOV T2.Y, 0.0,
+; EG-NEXT: MOV * T2.Z, 0.0,
+; EG-NEXT: LSHR T0.X, T0.W, literal.x,
+; EG-NEXT: LSHR * T3.X, KC0[2].Y, literal.x,
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
;
; CM-LABEL: global_load_v3i16:
; CM: ; %bb.0: ; %entry
-; CM-NEXT: ALU 0, @12, KC0[CB0:0-32], KC1[]
-; CM-NEXT: TEX 2 @6
-; CM-NEXT: ALU 19, @13, KC0[CB0:0-32], KC1[]
-; CM-NEXT: MEM_RAT MSKOR T5.XW, T8.X
-; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T6.X, T7.X
+; CM-NEXT: ALU 0, @10, KC0[CB0:0-32], KC1[]
+; CM-NEXT: TEX 1 @6
+; CM-NEXT: ALU 15, @11, KC0[CB0:0-32], KC1[]
+; CM-NEXT: MEM_RAT MSKOR T2.XW, T3.X
+; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T1.X, T0.X
; CM-NEXT: CF_END
; CM-NEXT: Fetch clause starting at 6:
-; CM-NEXT: VTX_READ_16 T6.X, T5.X, 0, #1
-; CM-NEXT: VTX_READ_16 T7.X, T5.X, 2, #1
-; CM-NEXT: VTX_READ_16 T5.X, T5.X, 4, #1
-; CM-NEXT: ALU clause starting at 12:
-; CM-NEXT: MOV * T5.X, KC0[2].Z,
-; CM-NEXT: ALU clause starting at 13:
+; CM-NEXT: VTX_READ_16 T1.X, T0.X, 0, #1
+; CM-NEXT: VTX_READ_16 T0.X, T0.X, 4, #1
+; CM-NEXT: ALU clause starting at 10:
+; CM-NEXT: MOV * T0.X, KC0[2].Z,
+; CM-NEXT: ALU clause starting at 11:
; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x,
; CM-NEXT: 4(5.605194e-45), 0(0.000000e+00)
; CM-NEXT: AND_INT * T1.W, PV.W, literal.x,
; CM-NEXT: 3(4.203895e-45), 0(0.000000e+00)
-; CM-NEXT: AND_INT T0.Z, T5.X, literal.x,
+; CM-NEXT: AND_INT T0.Z, T0.X, literal.x,
; CM-NEXT: LSHL * T1.W, PV.W, literal.y,
; CM-NEXT: 65535(9.183409e-41), 3(4.203895e-45)
-; CM-NEXT: LSHL T5.X, PV.Z, PV.W,
-; CM-NEXT: LSHL * T5.W, literal.x, PV.W,
+; CM-NEXT: LSHL T2.X, PV.Z, PV.W,
+; CM-NEXT: LSHL * T2.W, literal.x, PV.W,
; CM-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
-; CM-NEXT: MOV T5.Y, 0.0,
-; CM-NEXT: MOV * T5.Z, 0.0,
-; CM-NEXT: LSHL T0.Z, T7.X, literal.x,
-; CM-NEXT: AND_INT * T1.W, T6.X, literal.y, BS:VEC_120/SCL_212
-; CM-NEXT: 16(2.242078e-44), 65535(9.183409e-41)
-; CM-NEXT: OR_INT * T6.X, PV.Z, PV.W,
-; CM-NEXT: LSHR * T7.X, KC0[2].Y, literal.x,
+; CM-NEXT: MOV T2.Y, 0.0,
+; CM-NEXT: MOV * T2.Z, 0.0,
+; CM-NEXT: LSHR * T0.X, KC0[2].Y, literal.x,
; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
-; CM-NEXT: LSHR * T8.X, T0.W, literal.x,
+; CM-NEXT: LSHR * T3.X, T0.W, literal.x,
; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
entry:
%ld = load <3 x i16>, ptr addrspace(1) %in
diff --git a/llvm/test/CodeGen/AMDGPU/load-global-i8.ll b/llvm/test/CodeGen/AMDGPU/load-global-i8.ll
index fb34b5e1f3af6e5..896e60900c745fd 100644
--- a/llvm/test/CodeGen/AMDGPU/load-global-i8.ll
+++ b/llvm/test/CodeGen/AMDGPU/load-global-i8.ll
@@ -916,38 +916,22 @@ define amdgpu_kernel void @global_zextload_v32i8_to_v32i16(ptr addrspace(1) %out
; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1
; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 16, #1
-; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
-; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
-; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
-; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
-; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
-; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
-; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
-; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
-; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
-; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
-; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
-; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
-; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
-; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
-; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
-; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
-; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
-; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
-; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
-; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
-; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
-; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
-; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
-; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
-; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
-; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
-; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
-; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
-; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
-; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
-; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
-; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
+; EG: BFE_{{U?}}INT
+; EG: BFE_{{U?}}INT
+; EG: BFE_{{U?}}INT
+; EG: BFE_{{U?}}INT
+; EG: BFE_{{U?}}INT
+; EG: BFE_{{U?}}INT
+; EG: BFE_{{U?}}INT
+; EG: BFE_{{U?}}INT
+; EG: BFE_{{U?}}INT
+; EG: BFE_{{U?}}INT
+; EG: BFE_{{U?}}INT
+; EG: BFE_{{U?}}INT
+; EG: BFE_{{U?}}INT
+; EG: BFE_{{U?}}INT
+; EG: BFE_{{U?}}INT
+; EG: BFE_{{U?}}INT
define amdgpu_kernel void @global_sextload_v32i8_to_v32i16(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
%load = load <32 x i8>, ptr addrspace(1) %in
%ext = sext <32 x i8> %load to <32 x i16>
diff --git a/llvm/test/CodeGen/AMDGPU/min.ll b/llvm/test/CodeGen/AMDGPU/min.ll
index af7f92798a9319a..a6db7d331cef39d 100644
--- a/llvm/test/CodeGen/AMDGPU/min.ll
+++ b/llvm/test/CodeGen/AMDGPU/min.ll
@@ -828,81 +828,30 @@ define amdgpu_kernel void @s_test_imin_sle_v2i16(ptr addrspace(1) %out, <2 x i16
define amdgpu_kernel void @s_test_imin_sle_v4i16(ptr addrspace(1) %out, <4 x i16> %a, <4 x i16> %b) #0 {
; EG-LABEL: s_test_imin_sle_v4i16:
; EG: ; %bb.0:
-; EG-NEXT: ALU 1, @28, KC0[], KC1[]
-; EG-NEXT: TEX 1 @12
-; EG-NEXT: ALU 9, @30, KC0[], KC1[]
-; EG-NEXT: TEX 1 @16
-; EG-NEXT: ALU 10, @40, KC0[], KC1[]
-; EG-NEXT: TEX 1 @20
-; EG-NEXT: ALU 10, @51, KC0[], KC1[]
-; EG-NEXT: TEX 1 @24
-; EG-NEXT: ALU 11, @62, KC0[CB0:0-32], KC1[]
-; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T6.XY, T5.X, 1
+; EG-NEXT: ALU 0, @14, KC0[], KC1[]
+; EG-NEXT: TEX 3 @6
+; EG-NEXT: ALU 9, @15, KC0[CB0:0-32], KC1[]
+; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1
; EG-NEXT: CF_END
; EG-NEXT: PAD
-; EG-NEXT: Fetch clause starting at 12:
-; EG-NEXT: VTX_READ_16 T6.X, T5.X, 50, #3
-; EG-NEXT: VTX_READ_16 T7.X, T5.X, 58, #3
-; EG-NEXT: Fetch clause starting at 16:
-; EG-NEXT: VTX_READ_16 T6.X, T5.X, 48, #3
-; EG-NEXT: VTX_READ_16 T7.X, T5.X, 56, #3
-; EG-NEXT: Fetch clause starting at 20:
-; EG-NEXT: VTX_READ_16 T6.X, T5.X, 46, #3
-; EG-NEXT: VTX_READ_16 T7.X, T5.X, 54, #3
-; EG-NEXT: Fetch clause starting at 24:
-; EG-NEXT: VTX_READ_16 T6.X, T5.X, 44, #3
-; EG-NEXT: VTX_READ_16 T5.X, T5.X, 52, #3
-; EG-NEXT: ALU clause starting at 28:
-; EG-NEXT: MOV * T0.Y, T3.X,
-; EG-NEXT: MOV * T5.X, 0.0,
-; EG-NEXT: ALU clause starting at 30:
-; EG-NEXT: BFE_INT T0.Z, T6.X, 0.0, literal.x,
-; EG-NEXT: BFE_INT * T0.W, T7.X, 0.0, literal.x, BS:VEC_120/SCL_212
-; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
-; EG-NEXT: MIN_INT * T0.W, PV.Z, PV.W,
-; EG-NEXT: LSHL T0.W, PV.W, literal.x,
-; EG-NEXT: AND_INT * T1.W, T0.Y, literal.y,
-; EG-NEXT: 16(2.242078e-44), 65535(9.183409e-41)
-; EG-NEXT: OR_INT * T0.W, PS, PV.W,
-; EG-NEXT: MOV * T3.X, PV.W,
-; EG-NEXT: MOV * T0.Y, PV.X,
-; EG-NEXT: ALU clause starting at 40:
-; EG-NEXT: BFE_INT T0.Z, T6.X, 0.0, literal.x,
-; EG-NEXT: BFE_INT * T0.W, T7.X, 0.0, literal.x, BS:VEC_120/SCL_212
-; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
-; EG-NEXT: MIN_INT T0.W, PV.Z, PV.W,
-; EG-NEXT: AND_INT * T1.W, T0.Y, literal.x,
-; EG-NEXT: -65536(nan), 0(0.000000e+00)
-; EG-NEXT: AND_INT * T0.W, PV.W, literal.x,
-; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
-; EG-NEXT: OR_INT * T0.W, T1.W, PV.W,
-; EG-NEXT: MOV T3.X, PV.W,
-; EG-NEXT: MOV * T0.Y, T2.X,
-; EG-NEXT: ALU clause starting at 51:
-; EG-NEXT: BFE_INT T0.Z, T6.X, 0.0, literal.x,
-; EG-NEXT: BFE_INT * T0.W, T7.X, 0.0, literal.x, BS:VEC_120/SCL_212
-; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
-; EG-NEXT: MIN_INT T0.W, PV.Z, PV.W,
-; EG-NEXT: AND_INT * T1.W, T0.Y, literal.x,
-; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
-; EG-NEXT: LSHL * T0.W, PV.W, literal.x,
+; EG-NEXT: Fetch clause starting at 6:
+; EG-NEXT: VTX_READ_16 T1.X, T0.X, 46, #3
+; EG-NEXT: VTX_READ_16 T2.X, T0.X, 52, #3
+; EG-NEXT: VTX_READ_16 T3.X, T0.X, 44, #3
+; EG-NEXT: VTX_READ_16 T0.X, T0.X, 54, #3
+; EG-NEXT: ALU clause starting at 14:
+; EG-NEXT: MOV * T0.X, 0.0,
+; EG-NEXT: ALU clause starting at 15:
+; EG-NEXT: BFE_INT T0.Z, T1.X, 0.0, literal.x,
+; EG-NEXT: BFE_INT * T0.W, T0.X, 0.0, literal.x, BS:VEC_120/SCL_212
; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
-; EG-NEXT: OR_INT * T0.W, T1.W, PV.W,
-; EG-NEXT: MOV * T2.X, PV.W,
-; EG-NEXT: MOV * T0.Y, PV.X,
-; EG-NEXT: ALU clause starting at 62:
-; EG-NEXT: BFE_INT T0.Z, T6.X, 0.0, literal.x,
-; EG-NEXT: BFE_INT * T0.W, T5.X, 0.0, literal.x, BS:VEC_120/SCL_212
+; EG-NEXT: MIN_INT T0.Y, PV.Z, PV.W,
+; EG-NEXT: BFE_INT T0.Z, T3.X, 0.0, literal.x,
+; EG-NEXT: BFE_INT * T0.W, T2.X, 0.0, literal.x, BS:VEC_120/SCL_212
; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
-; EG-NEXT: MIN_INT * T0.W, PV.Z, PV.W,
-; EG-NEXT: LSHR T5.X, KC0[2].Y, literal.x,
-; EG-NEXT: AND_INT T1.W, T0.Y, literal.y,
-; EG-NEXT: AND_INT * T0.W, PV.W, literal.z,
-; EG-NEXT: 2(2.802597e-45), -65536(nan)
-; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
-; EG-NEXT: OR_INT * T6.X, PV.W, PS,
-; EG-NEXT: MOV T2.X, PV.X,
-; EG-NEXT: MOV * T6.Y, T3.X,
+; EG-NEXT: MIN_INT T0.X, PV.Z, PV.W,
+; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
+; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
;
; CI-LABEL: s_test_imin_sle_v4i16:
; CI: ; %bb.0:
@@ -1848,49 +1797,40 @@ define amdgpu_kernel void @v_test_umin_ule_v3i32(ptr addrspace(1) %out, ptr addr
define amdgpu_kernel void @v_test_umin_ule_v3i16(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 {
; EG-LABEL: v_test_umin_ule_v3i16:
; EG: ; %bb.0:
-; EG-NEXT: ALU 3, @20, KC0[CB0:0-32], KC1[]
-; EG-NEXT: TEX 1 @8
-; EG-NEXT: ALU 11, @24, KC0[CB0:0-32], KC1[]
-; EG-NEXT: TEX 3 @12
-; EG-NEXT: ALU 8, @36, KC0[], KC1[]
-; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T6.X, T8.X, 0
-; EG-NEXT: MEM_RAT MSKOR T7.XW, T0.X
+; EG-NEXT: ALU 3, @14, KC0[CB0:0-32], KC1[]
+; EG-NEXT: TEX 3 @6
+; EG-NEXT: ALU 17, @18, KC0[CB0:0-32], KC1[]
+; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.X, T2.X, 0
+; EG-NEXT: MEM_RAT MSKOR T4.XW, T0.X
; EG-NEXT: CF_END
-; EG-NEXT: Fetch clause starting at 8:
-; EG-NEXT: VTX_READ_16 T7.X, T6.X, 4, #1
-; EG-NEXT: VTX_READ_16 T8.X, T0.X, 4, #1
-; EG-NEXT: Fetch clause starting at 12:
-; EG-NEXT: VTX_READ_16 T8.X, T6.X, 0, #1
-; EG-NEXT: VTX_READ_16 T9.X, T0.X, 0, #1
-; EG-NEXT: VTX_READ_16 T6.X, T6.X, 2, #1
-; EG-NEXT: VTX_READ_16 T0.X, T0.X, 2, #1
-; EG-NEXT: ALU clause starting at 20:
+; EG-NEXT: Fetch clause starting at 6:
+; EG-NEXT: VTX_READ_16 T2.X, T1.X, 0, #1
+; EG-NEXT: VTX_READ_16 T3.X, T0.X, 0, #1
+; EG-NEXT: VTX_READ_16 T1.X, T1.X, 4, #1
+; EG-NEXT: VTX_READ_16 T0.X, T0.X, 4, #1
+; EG-NEXT: ALU clause starting at 14:
; EG-NEXT: LSHL * T0.W, T0.X, literal.x,
; EG-NEXT: 3(4.203895e-45), 0(0.000000e+00)
; EG-NEXT: ADD_INT T0.X, KC0[2].Z, PV.W,
-; EG-NEXT: ADD_INT * T6.X, KC0[2].W, PV.W,
-; EG-NEXT: ALU clause starting at 24:
+; EG-NEXT: ADD_INT * T1.X, KC0[2].W, PV.W,
+; EG-NEXT: ALU clause starting at 18:
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, T0.W,
; EG-NEXT: ADD_INT * T1.W, PV.W, literal.x,
; EG-NEXT: 4(5.605194e-45), 0(0.000000e+00)
; EG-NEXT: AND_INT * T2.W, PV.W, literal.x,
; EG-NEXT: 3(4.203895e-45), 0(0.000000e+00)
; EG-NEXT: LSHL T2.W, PV.W, literal.x,
-; EG-NEXT: MIN_UINT * T3.W, T8.X, T7.X,
+; EG-NEXT: MIN_UINT * T3.W, T0.X, T1.X,
; EG-NEXT: 3(4.203895e-45), 0(0.000000e+00)
-; EG-NEXT: LSHL T7.X, PS, PV.W,
-; EG-NEXT: LSHL * T7.W, literal.x, PV.W,
+; EG-NEXT: LSHL T4.X, PS, PV.W,
+; EG-NEXT: LSHL * T4.W, literal.x, PV.W,
; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
-; EG-NEXT: MOV * T7.Y, 0.0,
-; EG-NEXT: ALU clause starting at 36:
-; EG-NEXT: MOV T7.Z, 0.0,
-; EG-NEXT: MIN_UINT * T2.W, T0.X, T6.X,
+; EG-NEXT: MOV T4.Y, 0.0,
+; EG-NEXT: MOV * T4.Z, 0.0,
; EG-NEXT: LSHR T0.X, T1.W, literal.x,
-; EG-NEXT: LSHL T1.W, PV.W, literal.y,
-; EG-NEXT: MIN_UINT * T2.W, T9.X, T8.X,
-; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44)
-; EG-NEXT: OR_INT T6.X, PV.W, PS,
-; EG-NEXT: LSHR * T8.X, T0.W, literal.x,
+; EG-NEXT: MIN_UINT * T1.X, T3.X, T2.X,
+; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
+; EG-NEXT: LSHR * T2.X, T0.W, literal.x,
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
;
; CI-LABEL: v_test_umin_ule_v3i16:
@@ -2936,142 +2876,46 @@ define amdgpu_kernel void @s_test_umin_ult_v8i32(ptr addrspace(1) %out, <8 x i32
define amdgpu_kernel void @s_test_umin_ult_v8i16(ptr addrspace(1) %out, <8 x i16> %a, <8 x i16> %b) #0 {
; EG-LABEL: s_test_umin_ult_v8i16:
; EG: ; %bb.0:
-; EG-NEXT: ALU 1, @52, KC0[], KC1[]
-; EG-NEXT: TEX 1 @20
-; EG-NEXT: ALU 9, @54, KC0[], KC1[]
-; EG-NEXT: TEX 1 @24
-; EG-NEXT: ALU 8, @64, KC0[], KC1[]
-; EG-NEXT: TEX 1 @28
-; EG-NEXT: ALU 10, @73, KC0[], KC1[]
-; EG-NEXT: TEX 1 @32
-; EG-NEXT: ALU 8, @84, KC0[], KC1[]
-; EG-NEXT: TEX 1 @36
-; EG-NEXT: ALU 10, @93, KC0[], KC1[]
-; EG-NEXT: TEX 1 @40
-; EG-NEXT: ALU 8, @104, KC0[], KC1[]
-; EG-NEXT: TEX 1 @44
-; EG-NEXT: ALU 10, @113, KC0[], KC1[]
-; EG-NEXT: TEX 1 @48
-; EG-NEXT: ALU 10, @124, KC0[CB0:0-32], KC1[]
-; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T7.XYZW, T8.X, 1
+; EG-NEXT: ALU 0, @24, KC0[], KC1[]
+; EG-NEXT: TEX 2 @8
+; EG-NEXT: ALU 2, @25, KC0[], KC1[]
+; EG-NEXT: TEX 4 @14
+; EG-NEXT: ALU 14, @28, KC0[CB0:0-32], KC1[]
+; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XYZW, T1.X, 1
; EG-NEXT: CF_END
; EG-NEXT: PAD
-; EG-NEXT: Fetch clause starting at 20:
-; EG-NEXT: VTX_READ_16 T8.X, T7.X, 66, #3
-; EG-NEXT: VTX_READ_16 T9.X, T7.X, 82, #3
-; EG-NEXT: Fetch clause starting at 24:
-; EG-NEXT: VTX_READ_16 T8.X, T7.X, 64, #3
-; EG-NEXT: VTX_READ_16 T9.X, T7.X, 80, #3
-; EG-NEXT: Fetch clause starting at 28:
-; EG-NEXT: VTX_READ_16 T8.X, T7.X, 62, #3
-; EG-NEXT: VTX_READ_16 T9.X, T7.X, 78, #3
-; EG-NEXT: Fetch clause starting at 32:
-; EG-NEXT: VTX_READ_16 T8.X, T7.X, 60, #3
-; EG-NEXT: VTX_READ_16 T9.X, T7.X, 76, #3
-; EG-NEXT: Fetch clause starting at 36:
-; EG-NEXT: VTX_READ_16 T8.X, T7.X, 58, #3
-; EG-NEXT: VTX_READ_16 T9.X, T7.X, 74, #3
-; EG-NEXT: Fetch clause starting at 40:
-; EG-NEXT: VTX_READ_16 T8.X, T7.X, 56, #3
-; EG-NEXT: VTX_READ_16 T9.X, T7.X, 72, #3
-; EG-NEXT: Fetch clause starting at 44:
-; EG-NEXT: VTX_READ_16 T8.X, T7.X, 54, #3
-; EG-NEXT: VTX_READ_16 T9.X, T7.X, 70, #3
-; EG-NEXT: Fetch clause starting at 48:
-; EG-NEXT: VTX_READ_16 T8.X, T7.X, 52, #3
-; EG-NEXT: VTX_READ_16 T7.X, T7.X, 68, #3
-; EG-NEXT: ALU clause starting at 52:
-; EG-NEXT: MOV * T0.Y, T3.X,
-; EG-NEXT: MOV * T7.X, 0.0,
-; EG-NEXT: ALU clause starting at 54:
-; EG-NEXT: AND_INT T0.W, T8.X, literal.x,
-; EG-NEXT: AND_INT * T1.W, T9.X, literal.x,
-; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
-; EG-NEXT: MIN_UINT * T0.W, PV.W, PS,
-; EG-NEXT: LSHL T0.W, PV.W, literal.x,
-; EG-NEXT: AND_INT * T1.W, T0.Y, literal.y,
-; EG-NEXT: 16(2.242078e-44), 65535(9.183409e-41)
-; EG-NEXT: OR_INT * T0.W, PS, PV.W,
-; EG-NEXT: MOV * T3.X, PV.W,
-; EG-NEXT: MOV * T0.Y, PV.X,
-; EG-NEXT: ALU clause starting at 64:
-; EG-NEXT: AND_INT T0.W, T8.X, literal.x,
-; EG-NEXT: AND_INT * T1.W, T9.X, literal.x,
-; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
-; EG-NEXT: AND_INT T2.W, T0.Y, literal.x,
-; EG-NEXT: MIN_UINT * T0.W, PV.W, PS,
-; EG-NEXT: -65536(nan), 0(0.000000e+00)
-; EG-NEXT: OR_INT * T0.W, PV.W, PS,
-; EG-NEXT: MOV T3.X, PV.W,
-; EG-NEXT: MOV * T0.Y, T2.X,
-; EG-NEXT: ALU clause starting at 73:
-; EG-NEXT: AND_INT T0.W, T8.X, literal.x,
-; EG-NEXT: AND_INT * T1.W, T9.X, literal.x,
-; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
-; EG-NEXT: MIN_UINT T0.W, PV.W, PS,
-; EG-NEXT: AND_INT * T1.W, T0.Y, literal.x,
-; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
-; EG-NEXT: LSHL * T0.W, PV.W, literal.x,
-; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
-; EG-NEXT: OR_INT * T0.W, T1.W, PV.W,
-; EG-NEXT: MOV * T2.X, PV.W,
-; EG-NEXT: MOV * T0.Y, PV.X,
-; EG-NEXT: ALU clause starting at 84:
-; EG-NEXT: AND_INT T0.W, T8.X, literal.x,
-; EG-NEXT: AND_INT * T1.W, T9.X, literal.x,
-; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
-; EG-NEXT: AND_INT T2.W, T0.Y, literal.x,
-; EG-NEXT: MIN_UINT * T0.W, PV.W, PS,
-; EG-NEXT: -65536(nan), 0(0.000000e+00)
-; EG-NEXT: OR_INT * T7.Z, PV.W, PS,
-; EG-NEXT: MOV T2.X, PV.Z,
-; EG-NEXT: MOV * T0.Y, T5.X,
-; EG-NEXT: ALU clause starting at 93:
-; EG-NEXT: AND_INT T0.W, T8.X, literal.x,
-; EG-NEXT: AND_INT * T1.W, T9.X, literal.x,
-; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
-; EG-NEXT: MIN_UINT T0.W, PV.W, PS,
-; EG-NEXT: AND_INT * T1.W, T0.Y, literal.x,
-; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
-; EG-NEXT: LSHL * T0.W, PV.W, literal.x,
-; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
-; EG-NEXT: OR_INT * T0.W, T1.W, PV.W,
-; EG-NEXT: MOV * T5.X, PV.W,
-; EG-NEXT: MOV * T0.Y, PV.X,
-; EG-NEXT: ALU clause starting at 104:
-; EG-NEXT: AND_INT T0.W, T8.X, literal.x,
-; EG-NEXT: AND_INT * T1.W, T9.X, literal.x,
+; EG-NEXT: Fetch clause starting at 8:
+; EG-NEXT: VTX_READ_16 T1.X, T0.X, 62, #3
+; EG-NEXT: VTX_READ_16 T2.X, T0.X, 60, #3
+; EG-NEXT: VTX_READ_16 T3.X, T0.X, 78, #3
+; EG-NEXT: Fetch clause starting at 14:
+; EG-NEXT: VTX_READ_16 T1.X, T0.X, 68, #3
+; EG-NEXT: VTX_READ_16 T3.X, T0.X, 52, #3
+; EG-NEXT: VTX_READ_16 T4.X, T0.X, 70, #3
+; EG-NEXT: VTX_READ_16 T5.X, T0.X, 54, #3
+; EG-NEXT: VTX_READ_16 T0.X, T0.X, 76, #3
+; EG-NEXT: ALU clause starting at 24:
+; EG-NEXT: MOV * T0.X, 0.0,
+; EG-NEXT: ALU clause starting at 25:
+; EG-NEXT: AND_INT T0.W, T1.X, literal.x,
+; EG-NEXT: AND_INT * T1.W, T3.X, literal.x,
; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
-; EG-NEXT: AND_INT T2.W, T0.Y, literal.x,
-; EG-NEXT: MIN_UINT * T0.W, PV.W, PS,
-; EG-NEXT: -65536(nan), 0(0.000000e+00)
-; EG-NEXT: OR_INT * T0.W, PV.W, PS,
-; EG-NEXT: MOV T5.X, PV.W,
-; EG-NEXT: MOV * T0.Y, T4.X,
-; EG-NEXT: ALU clause starting at 113:
-; EG-NEXT: AND_INT T0.W, T8.X, literal.x,
-; EG-NEXT: AND_INT * T1.W, T9.X, literal.x,
+; EG-NEXT: ALU clause starting at 28:
+; EG-NEXT: AND_INT T0.Z, T2.X, literal.x,
+; EG-NEXT: AND_INT T2.W, T0.X, literal.x, BS:VEC_120/SCL_212
+; EG-NEXT: MIN_UINT * T0.W, T0.W, T1.W,
; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
-; EG-NEXT: MIN_UINT T0.W, PV.W, PS,
-; EG-NEXT: AND_INT * T1.W, T0.Y, literal.x,
+; EG-NEXT: MIN_UINT T0.Z, PV.Z, PV.W,
+; EG-NEXT: AND_INT T1.W, T5.X, literal.x,
+; EG-NEXT: AND_INT * T2.W, T4.X, literal.x,
; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
-; EG-NEXT: LSHL * T0.W, PV.W, literal.x,
-; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
-; EG-NEXT: OR_INT * T0.W, T1.W, PV.W,
-; EG-NEXT: MOV * T4.X, PV.W,
-; EG-NEXT: MOV * T0.Y, PV.X,
-; EG-NEXT: ALU clause starting at 124:
-; EG-NEXT: AND_INT T0.W, T8.X, literal.x,
-; EG-NEXT: AND_INT * T1.W, T7.X, literal.x,
+; EG-NEXT: MIN_UINT T0.Y, PV.W, PS,
+; EG-NEXT: AND_INT T1.W, T3.X, literal.x,
+; EG-NEXT: AND_INT * T2.W, T1.X, literal.x,
; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
-; EG-NEXT: LSHR T8.X, KC0[2].Y, literal.x,
-; EG-NEXT: AND_INT T2.W, T0.Y, literal.y,
-; EG-NEXT: MIN_UINT * T0.W, PV.W, PS,
-; EG-NEXT: 2(2.802597e-45), -65536(nan)
-; EG-NEXT: OR_INT * T7.X, PV.W, PS,
-; EG-NEXT: MOV T4.X, PV.X,
-; EG-NEXT: MOV * T7.W, T3.X,
-; EG-NEXT: MOV * T7.Y, T5.X,
+; EG-NEXT: MIN_UINT T0.X, PV.W, PS,
+; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
+; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
;
; CI-LABEL: s_test_umin_ult_v8i16:
; CI: ; %bb.0:
diff --git a/llvm/test/CodeGen/AMDGPU/shl.ll b/llvm/test/CodeGen/AMDGPU/shl.ll
index 7e7f4f5d19914b8..c9efeeefdf2d89b 100644
--- a/llvm/test/CodeGen/AMDGPU/shl.ll
+++ b/llvm/test/CodeGen/AMDGPU/shl.ll
@@ -681,63 +681,30 @@ define amdgpu_kernel void @shl_v4i16(ptr addrspace(1) %out, ptr addrspace(1) %in
;
; EG-LABEL: shl_v4i16:
; EG: ; %bb.0:
-; EG-NEXT: ALU 3, @8, KC0[CB0:0-32], KC1[]
+; EG-NEXT: ALU 2, @8, KC0[CB0:0-32], KC1[]
; EG-NEXT: TEX 0 @6
-; EG-NEXT: ALU 42, @12, KC0[CB0:0-32], KC1[]
-; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T10.XY, T0.X, 1
+; EG-NEXT: ALU 10, @11, KC0[CB0:0-32], KC1[]
+; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T8.X, 1
; EG-NEXT: CF_END
; EG-NEXT: PAD
; EG-NEXT: Fetch clause starting at 6:
-; EG-NEXT: VTX_READ_128 T10.XYZW, T0.X, 0, #1
+; EG-NEXT: VTX_READ_128 T8.XYZW, T0.X, 0, #1
; EG-NEXT: ALU clause starting at 8:
-; EG-NEXT: MOV T0.Y, T6.X,
-; EG-NEXT: LSHL * T0.W, T0.X, literal.x, BS:VEC_120/SCL_212
+; EG-NEXT: LSHL * T0.W, T0.X, literal.x,
; EG-NEXT: 3(4.203895e-45), 0(0.000000e+00)
; EG-NEXT: ADD_INT * T0.X, KC0[2].Z, PV.W,
-; EG-NEXT: ALU clause starting at 12:
-; EG-NEXT: AND_INT * T1.W, T10.Z, literal.x,
-; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
-; EG-NEXT: LSHL * T1.W, T10.X, PV.W,
-; EG-NEXT: AND_INT T1.W, PV.W, literal.x,
-; EG-NEXT: AND_INT * T2.W, T0.Y, literal.y,
-; EG-NEXT: 65535(9.183409e-41), -65536(nan)
-; EG-NEXT: OR_INT * T1.W, PS, PV.W,
-; EG-NEXT: MOV * T6.X, PV.W,
-; EG-NEXT: MOV T0.X, PV.X,
-; EG-NEXT: LSHR T1.W, T10.Z, literal.x,
-; EG-NEXT: LSHR * T2.W, T10.X, literal.x,
-; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
-; EG-NEXT: LSHL T1.W, PS, PV.W,
-; EG-NEXT: AND_INT * T2.W, PV.X, literal.x,
-; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
-; EG-NEXT: LSHL * T1.W, PV.W, literal.x,
+; EG-NEXT: ALU clause starting at 11:
+; EG-NEXT: LSHR T1.W, T8.Z, literal.x,
+; EG-NEXT: LSHR * T2.W, T8.X, literal.x,
; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
-; EG-NEXT: OR_INT * T1.W, T2.W, PV.W,
-; EG-NEXT: MOV T6.X, PV.W,
-; EG-NEXT: MOV * T0.X, T7.X,
-; EG-NEXT: AND_INT * T1.W, T10.W, literal.x,
-; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
-; EG-NEXT: LSHL T1.W, T10.Y, PV.W,
-; EG-NEXT: AND_INT * T2.W, T0.X, literal.x,
-; EG-NEXT: -65536(nan), 0(0.000000e+00)
-; EG-NEXT: AND_INT * T1.W, PV.W, literal.x,
+; EG-NEXT: LSHL T0.Y, PS, PV.W,
+; EG-NEXT: AND_INT T1.W, T8.Z, literal.x,
+; EG-NEXT: AND_INT * T2.W, T8.X, literal.x,
; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
-; EG-NEXT: OR_INT * T1.W, T2.W, PV.W,
-; EG-NEXT: MOV * T7.X, PV.W,
-; EG-NEXT: MOV T0.X, PV.X,
-; EG-NEXT: LSHR T1.W, T10.W, literal.x,
-; EG-NEXT: LSHR * T2.W, T10.Y, literal.x,
-; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
-; EG-NEXT: LSHL * T1.W, PS, PV.W,
-; EG-NEXT: AND_INT T0.Z, T0.X, literal.x,
-; EG-NEXT: LSHL T1.W, PV.W, literal.y,
+; EG-NEXT: LSHL T0.X, PS, PV.W,
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, T0.W,
-; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
-; EG-NEXT: LSHR T0.X, PS, literal.x,
-; EG-NEXT: OR_INT * T10.Y, PV.Z, PV.W,
+; EG-NEXT: LSHR * T8.X, PV.W, literal.x,
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
-; EG-NEXT: MOV T7.X, PV.Y,
-; EG-NEXT: MOV * T10.X, T6.X,
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
%gep = getelementptr inbounds <4 x i16>, ptr addrspace(1) %in, i32 %tid
%gep.out = getelementptr inbounds <4 x i16>, ptr addrspace(1) %out, i32 %tid
diff --git a/llvm/test/CodeGen/AMDGPU/sra.ll b/llvm/test/CodeGen/AMDGPU/sra.ll
index ef1adbb395e76ef..386a046113964de 100644
--- a/llvm/test/CodeGen/AMDGPU/sra.ll
+++ b/llvm/test/CodeGen/AMDGPU/sra.ll
@@ -323,67 +323,28 @@ define amdgpu_kernel void @ashr_v4i16(ptr addrspace(1) %out, ptr addrspace(1) %i
;
; EG-LABEL: ashr_v4i16:
; EG: ; %bb.0:
-; EG-NEXT: ALU 1, @8, KC0[CB0:0-32], KC1[]
+; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[]
; EG-NEXT: TEX 0 @6
-; EG-NEXT: ALU 48, @10, KC0[CB0:0-32], KC1[]
-; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T10.XY, T9.X, 1
+; EG-NEXT: ALU 10, @9, KC0[CB0:0-32], KC1[]
+; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T7.XY, T8.X, 1
; EG-NEXT: CF_END
; EG-NEXT: PAD
; EG-NEXT: Fetch clause starting at 6:
-; EG-NEXT: VTX_READ_128 T9.XYZW, T9.X, 0, #1
+; EG-NEXT: VTX_READ_128 T7.XYZW, T7.X, 0, #1
; EG-NEXT: ALU clause starting at 8:
-; EG-NEXT: MOV * T0.Y, T6.X,
-; EG-NEXT: MOV * T9.X, KC0[2].Z,
-; EG-NEXT: ALU clause starting at 10:
-; EG-NEXT: BFE_INT T0.W, T9.X, 0.0, literal.x,
-; EG-NEXT: AND_INT * T1.W, T9.Z, literal.y,
-; EG-NEXT: 16(2.242078e-44), 65535(9.183409e-41)
-; EG-NEXT: ASHR * T0.W, PV.W, PS,
-; EG-NEXT: AND_INT T0.W, PV.W, literal.x,
-; EG-NEXT: AND_INT * T1.W, T0.Y, literal.y,
-; EG-NEXT: 65535(9.183409e-41), -65536(nan)
-; EG-NEXT: OR_INT * T0.W, PS, PV.W,
-; EG-NEXT: MOV * T6.X, PV.W,
-; EG-NEXT: MOV T0.Y, PV.X,
-; EG-NEXT: LSHR * T0.W, T9.X, literal.x,
-; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
-; EG-NEXT: BFE_INT T0.W, PV.W, 0.0, literal.x,
-; EG-NEXT: LSHR * T1.W, T9.Z, literal.x,
-; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
-; EG-NEXT: ASHR T0.W, PV.W, PS,
-; EG-NEXT: AND_INT * T1.W, T0.Y, literal.x,
-; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
-; EG-NEXT: LSHL * T0.W, PV.W, literal.x,
-; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
-; EG-NEXT: OR_INT * T0.W, T1.W, PV.W,
-; EG-NEXT: MOV T6.X, PV.W,
-; EG-NEXT: MOV T0.Y, T7.X,
-; EG-NEXT: BFE_INT T0.W, T9.Y, 0.0, literal.x,
-; EG-NEXT: AND_INT * T1.W, T9.W, literal.y,
+; EG-NEXT: MOV * T7.X, KC0[2].Z,
+; EG-NEXT: ALU clause starting at 9:
+; EG-NEXT: LSHR T0.Z, T7.X, literal.x,
+; EG-NEXT: BFE_INT T0.W, T7.X, 0.0, literal.x,
+; EG-NEXT: AND_INT * T1.W, T7.Z, literal.y,
; EG-NEXT: 16(2.242078e-44), 65535(9.183409e-41)
-; EG-NEXT: ASHR T0.W, PV.W, PS,
-; EG-NEXT: AND_INT * T1.W, PV.Y, literal.x,
-; EG-NEXT: -65536(nan), 0(0.000000e+00)
-; EG-NEXT: AND_INT * T0.W, PV.W, literal.x,
-; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
-; EG-NEXT: OR_INT * T0.W, T1.W, PV.W,
-; EG-NEXT: MOV * T7.X, PV.W,
-; EG-NEXT: MOV T0.Y, PV.X,
-; EG-NEXT: LSHR * T0.W, T9.Y, literal.x,
-; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
-; EG-NEXT: BFE_INT T0.W, PV.W, 0.0, literal.x,
-; EG-NEXT: LSHR * T1.W, T9.W, literal.x,
-; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
-; EG-NEXT: ASHR T0.W, PV.W, PS,
-; EG-NEXT: AND_INT * T1.W, T0.Y, literal.x,
-; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
-; EG-NEXT: LSHL * T0.W, PV.W, literal.x,
+; EG-NEXT: ASHR T7.X, PV.W, PS,
+; EG-NEXT: BFE_INT T0.W, PV.Z, 0.0, literal.x,
+; EG-NEXT: LSHR * T1.W, T7.Z, literal.x,
; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
-; EG-NEXT: LSHR T9.X, KC0[2].Y, literal.x,
-; EG-NEXT: OR_INT * T10.Y, T1.W, PV.W,
+; EG-NEXT: LSHR T8.X, KC0[2].Y, literal.x,
+; EG-NEXT: ASHR * T7.Y, PV.W, PS,
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
-; EG-NEXT: MOV T7.X, PV.Y,
-; EG-NEXT: MOV * T10.X, T6.X,
%b_ptr = getelementptr <4 x i16>, ptr addrspace(1) %in, i16 1
%a = load <4 x i16>, ptr addrspace(1) %in
%b = load <4 x i16>, ptr addrspace(1) %b_ptr
More information about the llvm-branch-commits
mailing list