[llvm] [DAG] Always use stack to promote bitcast when the source is vector (PR #151065)

Min-Yih Hsu via llvm-commits llvm-commits at lists.llvm.org
Sat Aug 2 15:31:31 PDT 2025


https://github.com/mshockwave updated https://github.com/llvm/llvm-project/pull/151065

>From f7291c2931e8df72f95d1ed97202afebfa4a8a30 Mon Sep 17 00:00:00 2001
From: Min-Yih Hsu <min.hsu at sifive.com>
Date: Fri, 1 Aug 2025 17:04:47 -0700
Subject: [PATCH 1/4] Pre-commit test

---
 llvm/test/CodeGen/RISCV/rvv/fp4-bitcast.ll | 10 ++++++++++
 1 file changed, 10 insertions(+)
 create mode 100644 llvm/test/CodeGen/RISCV/rvv/fp4-bitcast.ll

diff --git a/llvm/test/CodeGen/RISCV/rvv/fp4-bitcast.ll b/llvm/test/CodeGen/RISCV/rvv/fp4-bitcast.ll
new file mode 100644
index 0000000000000..4bdd00d94e1c8
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/fp4-bitcast.ll
@@ -0,0 +1,10 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=riscv64 -mattr='+v' < %s | FileCheck %s
+
+define <2 x i8> @fp4(<4 x i4> %0) nounwind {
+; CHECK-LABEL: fp4:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    ret
+  %2 = bitcast <4 x i4> %0 to <2 x i8>
+  ret <2 x i8> %2
+}

>From f58d68a3ddfa4afe225a5f8643b688e049e16894 Mon Sep 17 00:00:00 2001
From: Min-Yih Hsu <min.hsu at sifive.com>
Date: Mon, 28 Jul 2025 17:46:24 -0700
Subject: [PATCH 2/4] [DAG] Always use stack to promote bitcast operand when
 the source is vector

---
 .../SelectionDAG/LegalizeIntegerTypes.cpp     |    3 +-
 llvm/test/CodeGen/AMDGPU/ctpop16.ll           |  330 ++-
 llvm/test/CodeGen/AMDGPU/kernel-args.ll       |  733 ++++-
 llvm/test/CodeGen/AMDGPU/load-constant-i16.ll |   40 +-
 llvm/test/CodeGen/AMDGPU/load-constant-i8.ll  | 1300 +++++++--
 llvm/test/CodeGen/AMDGPU/load-global-i16.ll   |   79 +-
 llvm/test/CodeGen/AMDGPU/load-global-i8.ll    | 2595 ++++++++++++++---
 llvm/test/CodeGen/AMDGPU/load-local-i16.ll    |   24 +-
 llvm/test/CodeGen/AMDGPU/min.ll               |  306 +-
 llvm/test/CodeGen/AMDGPU/shl.ll               |   59 +-
 llvm/test/CodeGen/AMDGPU/sra.ll               |   67 +-
 11 files changed, 4608 insertions(+), 928 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index 583a85a332dcd..0bd0599692bcc 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -2218,7 +2218,8 @@ SDValue DAGTypeLegalizer::PromoteIntOp_BITCAST(SDNode *N) {
   switch (getTypeAction(InVT)) {
   case TargetLowering::TypePromoteInteger: {
     // TODO: Handle big endian
-    if (OutVT.isVector() && DAG.getDataLayout().isLittleEndian()) {
+    if (OutVT.isVector() && !InVT.isVector() &&
+        DAG.getDataLayout().isLittleEndian()) {
       EVT EltVT = OutVT.getVectorElementType();
       TypeSize EltSize = EltVT.getSizeInBits();
       TypeSize NInSize = NInVT.getSizeInBits();
diff --git a/llvm/test/CodeGen/AMDGPU/ctpop16.ll b/llvm/test/CodeGen/AMDGPU/ctpop16.ll
index 1b9b508137b7f..cefcbddd3e394 100644
--- a/llvm/test/CodeGen/AMDGPU/ctpop16.ll
+++ b/llvm/test/CodeGen/AMDGPU/ctpop16.ll
@@ -457,27 +457,58 @@ define amdgpu_kernel void @v_ctpop_v4i16(ptr addrspace(1) noalias %out, ptr addr
 ;
 ; EG-LABEL: v_ctpop_v4i16:
 ; EG:       ; %bb.0:
-; EG-NEXT:    ALU 2, @8, KC0[CB0:0-32], KC1[]
+; EG-NEXT:    ALU 3, @8, KC0[CB0:0-32], KC1[]
 ; EG-NEXT:    TEX 0 @6
-; EG-NEXT:    ALU 7, @11, KC0[CB0:0-32], KC1[]
-; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.XY, T6.X, 1
+; EG-NEXT:    ALU 37, @12, KC0[CB0:0-32], KC1[]
+; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T8.XY, T0.X, 1
 ; EG-NEXT:    CF_END
 ; EG-NEXT:    PAD
 ; EG-NEXT:    Fetch clause starting at 6:
-; EG-NEXT:     VTX_READ_32 T0.X, T0.X, 0, #1
+; EG-NEXT:     VTX_READ_64 T8.XY, T0.X, 0, #1
 ; EG-NEXT:    ALU clause starting at 8:
-; EG-NEXT:     LSHL * T0.W, T0.X, literal.x,
+; EG-NEXT:     MOV T0.Y, T4.X,
+; EG-NEXT:     LSHL * T0.W, T0.X, literal.x, BS:VEC_120/SCL_212
 ; EG-NEXT:    3(4.203895e-45), 0(0.000000e+00)
 ; EG-NEXT:     ADD_INT * T0.X, KC0[2].Z, PV.W,
-; EG-NEXT:    ALU clause starting at 11:
-; EG-NEXT:     LSHR * T0.W, T0.X, literal.x,
+; EG-NEXT:    ALU clause starting at 12:
+; EG-NEXT:     AND_INT * T0.W, T8.X, literal.x,
+; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT:     BCNT_INT T0.W, PV.W,
+; EG-NEXT:     AND_INT * T1.W, T0.Y, literal.x,
+; EG-NEXT:    -65536(nan), 0(0.000000e+00)
+; EG-NEXT:     OR_INT * T0.W, PS, PV.W,
+; EG-NEXT:     MOV * T4.X, PV.W,
+; EG-NEXT:     MOV T0.X, PV.X,
+; EG-NEXT:     LSHR * T0.W, T8.X, literal.x,
 ; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
-; EG-NEXT:     BCNT_INT T0.Y, PV.W,
-; EG-NEXT:     AND_INT * T0.W, T0.X, literal.x,
+; EG-NEXT:     BCNT_INT T0.W, PV.W,
+; EG-NEXT:     AND_INT * T1.W, PV.X, literal.x,
 ; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
-; EG-NEXT:     BCNT_INT T0.X, PV.W,
-; EG-NEXT:     LSHR * T6.X, KC0[2].Y, literal.x,
+; EG-NEXT:     LSHL * T0.W, PV.W, literal.x,
+; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT:     OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT:     MOV T4.X, PV.W,
+; EG-NEXT:     MOV * T0.X, T5.X,
+; EG-NEXT:     AND_INT * T0.W, T8.Y, literal.x,
+; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT:     BCNT_INT T0.W, PV.W,
+; EG-NEXT:     AND_INT * T1.W, T0.X, literal.x,
+; EG-NEXT:    -65536(nan), 0(0.000000e+00)
+; EG-NEXT:     OR_INT * T0.W, PS, PV.W,
+; EG-NEXT:     MOV * T5.X, PV.W,
+; EG-NEXT:     MOV T0.X, PV.X,
+; EG-NEXT:     LSHR * T0.W, T8.Y, literal.x,
+; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT:     BCNT_INT T0.W, PV.W,
+; EG-NEXT:     AND_INT * T1.W, PV.X, literal.x,
+; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT:     LSHL * T0.W, PV.W, literal.x,
+; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT:     LSHR T0.X, KC0[2].Y, literal.x,
+; EG-NEXT:     OR_INT * T8.Y, T1.W, PV.W,
 ; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
+; EG-NEXT:     MOV T5.X, PV.Y,
+; EG-NEXT:     MOV * T8.X, T4.X,
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %in.gep = getelementptr <4 x i16>, ptr addrspace(1) %in, i32 %tid
   %val = load <4 x i16>, ptr addrspace(1) %in.gep, align 16
@@ -570,33 +601,94 @@ define amdgpu_kernel void @v_ctpop_v8i16(ptr addrspace(1) noalias %out, ptr addr
 ;
 ; EG-LABEL: v_ctpop_v8i16:
 ; EG:       ; %bb.0:
-; EG-NEXT:    ALU 2, @8, KC0[CB0:0-32], KC1[]
+; EG-NEXT:    ALU 3, @8, KC0[CB0:0-32], KC1[]
 ; EG-NEXT:    TEX 0 @6
-; EG-NEXT:    ALU 13, @11, KC0[CB0:0-32], KC1[]
-; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.XYZW, T8.X, 1
+; EG-NEXT:    ALU 73, @12, KC0[CB0:0-32], KC1[]
+; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.XYZW, T12.X, 1
 ; EG-NEXT:    CF_END
 ; EG-NEXT:    PAD
 ; EG-NEXT:    Fetch clause starting at 6:
-; EG-NEXT:     VTX_READ_128 T0.XYZW, T0.X, 0, #1
+; EG-NEXT:     VTX_READ_128 T12.XYZW, T0.X, 0, #1
 ; EG-NEXT:    ALU clause starting at 8:
-; EG-NEXT:     LSHL * T0.W, T0.X, literal.x,
+; EG-NEXT:     MOV T0.Y, T4.X,
+; EG-NEXT:     LSHL * T0.W, T0.X, literal.x, BS:VEC_120/SCL_212
 ; EG-NEXT:    4(5.605194e-45), 0(0.000000e+00)
 ; EG-NEXT:     ADD_INT * T0.X, KC0[2].Z, PV.W,
-; EG-NEXT:    ALU clause starting at 11:
-; EG-NEXT:     LSHR * T0.W, T0.Z, literal.x,
+; EG-NEXT:    ALU clause starting at 12:
+; EG-NEXT:     LSHR * T0.W, T12.X, literal.x,
+; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT:     BCNT_INT * T0.W, PV.W,
+; EG-NEXT:     LSHL T0.W, PV.W, literal.x,
+; EG-NEXT:     AND_INT * T1.W, T0.Y, literal.y,
+; EG-NEXT:    16(2.242078e-44), 65535(9.183409e-41)
+; EG-NEXT:     OR_INT * T0.W, PS, PV.W,
+; EG-NEXT:     MOV * T4.X, PV.W,
+; EG-NEXT:     MOV T0.X, PV.X,
+; EG-NEXT:     AND_INT * T0.W, T12.X, literal.x,
+; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT:     BCNT_INT T0.W, PV.W,
+; EG-NEXT:     AND_INT * T1.W, PV.X, literal.x,
+; EG-NEXT:    -65536(nan), 0(0.000000e+00)
+; EG-NEXT:     OR_INT * T0.W, PS, PV.W,
+; EG-NEXT:     MOV T4.X, PV.W,
+; EG-NEXT:     MOV * T0.X, T5.X,
+; EG-NEXT:     LSHR * T0.W, T12.Y, literal.x,
 ; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
 ; EG-NEXT:     BCNT_INT T0.W, PV.W,
-; EG-NEXT:     AND_INT * T1.W, T0.Z, literal.x,
+; EG-NEXT:     AND_INT * T1.W, T0.X, literal.x,
 ; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
-; EG-NEXT:     BCNT_INT T0.Z, PS,
-; EG-NEXT:     LSHR * T1.W, T0.X, literal.x,
+; EG-NEXT:     LSHL * T0.W, PV.W, literal.x,
+; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT:     OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT:     MOV * T5.X, PV.W,
+; EG-NEXT:     MOV T0.X, PV.X,
+; EG-NEXT:     AND_INT * T0.W, T12.Y, literal.x,
+; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT:     BCNT_INT T0.W, PV.W,
+; EG-NEXT:     AND_INT * T1.W, PV.X, literal.x,
+; EG-NEXT:    -65536(nan), 0(0.000000e+00)
+; EG-NEXT:     OR_INT * T0.Y, PS, PV.W,
+; EG-NEXT:     MOV T5.X, PV.Y,
+; EG-NEXT:     MOV * T0.X, T8.X,
+; EG-NEXT:     LSHR * T0.W, T12.Z, literal.x,
 ; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
-; EG-NEXT:     BCNT_INT T0.Y, PV.W,
+; EG-NEXT:     BCNT_INT T0.W, PV.W,
 ; EG-NEXT:     AND_INT * T1.W, T0.X, literal.x,
 ; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
-; EG-NEXT:     BCNT_INT T0.X, PV.W,
-; EG-NEXT:     LSHR * T8.X, KC0[2].Y, literal.x,
+; EG-NEXT:     LSHL * T0.W, PV.W, literal.x,
+; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT:     OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT:     MOV * T8.X, PV.W,
+; EG-NEXT:     MOV T0.X, PV.X,
+; EG-NEXT:     AND_INT * T0.W, T12.Z, literal.x,
+; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT:     BCNT_INT T0.W, PV.W,
+; EG-NEXT:     AND_INT * T1.W, PV.X, literal.x,
+; EG-NEXT:    -65536(nan), 0(0.000000e+00)
+; EG-NEXT:     OR_INT * T0.W, PS, PV.W,
+; EG-NEXT:     MOV T8.X, PV.W,
+; EG-NEXT:     MOV * T0.X, T9.X,
+; EG-NEXT:     LSHR * T0.W, T12.W, literal.x,
+; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT:     BCNT_INT T0.W, PV.W,
+; EG-NEXT:     AND_INT * T1.W, T0.X, literal.x,
+; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT:     LSHL * T0.W, PV.W, literal.x,
+; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT:     OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT:     MOV * T9.X, PV.W,
+; EG-NEXT:     MOV T0.X, PV.X,
+; EG-NEXT:     AND_INT * T0.W, T12.W, literal.x,
+; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT:     BCNT_INT T0.W, PV.W,
+; EG-NEXT:     AND_INT * T1.W, PV.X, literal.x,
+; EG-NEXT:    -65536(nan), 0(0.000000e+00)
+; EG-NEXT:     LSHR T12.X, KC0[2].Y, literal.x,
+; EG-NEXT:     OR_INT * T0.W, PS, PV.W,
 ; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
+; EG-NEXT:     MOV T9.X, PV.W,
+; EG-NEXT:     MOV * T0.X, T4.X,
+; EG-NEXT:     MOV * T0.Z, T8.X,
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %in.gep = getelementptr <8 x i16>, ptr addrspace(1) %in, i32 %tid
   %val = load <8 x i16>, ptr addrspace(1) %in.gep, align 32
@@ -745,46 +837,174 @@ define amdgpu_kernel void @v_ctpop_v16i16(ptr addrspace(1) noalias %out, ptr add
 ;
 ; EG-LABEL: v_ctpop_v16i16:
 ; EG:       ; %bb.0:
-; EG-NEXT:    ALU 2, @10, KC0[CB0:0-32], KC1[]
-; EG-NEXT:    TEX 1 @6
-; EG-NEXT:    ALU 25, @13, KC0[CB0:0-32], KC1[]
-; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.XYZW, T14.X, 0
-; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T12.XYZW, T13.X, 1
+; EG-NEXT:    ALU 3, @12, KC0[CB0:0-32], KC1[]
+; EG-NEXT:    TEX 1 @8
+; EG-NEXT:    ALU 114, @16, KC0[], KC1[]
+; EG-NEXT:    ALU 34, @131, KC0[CB0:0-32], KC1[]
+; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.XYZW, T22.X, 0
+; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T20.XYZW, T21.X, 1
 ; EG-NEXT:    CF_END
-; EG-NEXT:    Fetch clause starting at 6:
-; EG-NEXT:     VTX_READ_128 T12.XYZW, T0.X, 16, #1
-; EG-NEXT:     VTX_READ_128 T0.XYZW, T0.X, 0, #1
-; EG-NEXT:    ALU clause starting at 10:
-; EG-NEXT:     LSHL * T0.W, T0.X, literal.x,
+; EG-NEXT:    PAD
+; EG-NEXT:    Fetch clause starting at 8:
+; EG-NEXT:     VTX_READ_128 T20.XYZW, T0.X, 16, #1
+; EG-NEXT:     VTX_READ_128 T21.XYZW, T0.X, 0, #1
+; EG-NEXT:    ALU clause starting at 12:
+; EG-NEXT:     MOV T0.Y, T4.X,
+; EG-NEXT:     LSHL * T0.W, T0.X, literal.x, BS:VEC_120/SCL_212
 ; EG-NEXT:    5(7.006492e-45), 0(0.000000e+00)
 ; EG-NEXT:     ADD_INT * T0.X, KC0[2].Z, PV.W,
-; EG-NEXT:    ALU clause starting at 13:
-; EG-NEXT:     LSHR * T0.W, T12.Z, literal.x,
+; EG-NEXT:    ALU clause starting at 16:
+; EG-NEXT:     LSHR * T0.W, T20.X, literal.x,
 ; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
-; EG-NEXT:     BCNT_INT T12.W, PV.W,
-; EG-NEXT:     AND_INT * T0.W, T12.Z, literal.x,
+; EG-NEXT:     BCNT_INT * T0.W, PV.W,
+; EG-NEXT:     LSHL T0.W, PV.W, literal.x,
+; EG-NEXT:     AND_INT * T1.W, T0.Y, literal.y,
+; EG-NEXT:    16(2.242078e-44), 65535(9.183409e-41)
+; EG-NEXT:     OR_INT * T0.W, PS, PV.W,
+; EG-NEXT:     MOV * T4.X, PV.W,
+; EG-NEXT:     MOV T0.X, PV.X,
+; EG-NEXT:     AND_INT * T0.W, T20.X, literal.x,
+; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT:     BCNT_INT T0.W, PV.W,
+; EG-NEXT:     AND_INT * T1.W, PV.X, literal.x,
+; EG-NEXT:    -65536(nan), 0(0.000000e+00)
+; EG-NEXT:     OR_INT * T0.W, PS, PV.W,
+; EG-NEXT:     MOV T4.X, PV.W,
+; EG-NEXT:     MOV * T0.X, T5.X,
+; EG-NEXT:     LSHR * T0.W, T20.Y, literal.x,
+; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT:     BCNT_INT T0.W, PV.W,
+; EG-NEXT:     AND_INT * T1.W, T0.X, literal.x,
+; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT:     LSHL * T0.W, PV.W, literal.x,
+; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT:     OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT:     MOV * T5.X, PV.W,
+; EG-NEXT:     MOV T0.X, PV.X,
+; EG-NEXT:     AND_INT * T0.W, T20.Y, literal.x,
+; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT:     BCNT_INT T0.W, PV.W,
+; EG-NEXT:     AND_INT * T1.W, PV.X, literal.x,
+; EG-NEXT:    -65536(nan), 0(0.000000e+00)
+; EG-NEXT:     OR_INT * T0.Y, PS, PV.W,
+; EG-NEXT:     MOV T5.X, PV.Y,
+; EG-NEXT:     MOV * T0.X, T8.X,
+; EG-NEXT:     LSHR * T0.W, T20.Z, literal.x,
+; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT:     BCNT_INT T0.W, PV.W,
+; EG-NEXT:     AND_INT * T1.W, T0.X, literal.x,
 ; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
-; EG-NEXT:     BCNT_INT T12.Z, PS,
-; EG-NEXT:     LSHR T0.W, T0.Z, literal.x,
-; EG-NEXT:     LSHR * T1.W, T12.X, literal.x,
+; EG-NEXT:     LSHL * T0.W, PV.W, literal.x,
+; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT:     OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT:     MOV * T8.X, PV.W,
+; EG-NEXT:     MOV T0.X, PV.X,
+; EG-NEXT:     AND_INT * T0.W, T20.Z, literal.x,
+; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT:     BCNT_INT T0.W, PV.W,
+; EG-NEXT:     AND_INT * T1.W, PV.X, literal.x,
+; EG-NEXT:    -65536(nan), 0(0.000000e+00)
+; EG-NEXT:     OR_INT * T0.W, PS, PV.W,
+; EG-NEXT:     MOV T8.X, PV.W,
+; EG-NEXT:     MOV * T0.X, T9.X,
+; EG-NEXT:     LSHR * T0.W, T20.W, literal.x,
+; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT:     BCNT_INT T0.W, PV.W,
+; EG-NEXT:     AND_INT * T1.W, T0.X, literal.x,
+; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT:     LSHL * T0.W, PV.W, literal.x,
 ; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
-; EG-NEXT:     BCNT_INT T12.Y, PS,
-; EG-NEXT:     AND_INT T0.Z, T0.Z, literal.x,
+; EG-NEXT:     OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT:     MOV * T9.X, PV.W,
+; EG-NEXT:     MOV T0.X, PV.X,
+; EG-NEXT:     AND_INT * T0.W, T20.W, literal.x,
+; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
 ; EG-NEXT:     BCNT_INT T0.W, PV.W,
-; EG-NEXT:     AND_INT * T1.W, T12.X, literal.x,
+; EG-NEXT:     AND_INT * T1.W, PV.X, literal.x,
+; EG-NEXT:    -65536(nan), 0(0.000000e+00)
+; EG-NEXT:     OR_INT * T0.W, PS, PV.W,
+; EG-NEXT:     MOV T9.X, PV.W,
+; EG-NEXT:     MOV * T0.X, T12.X,
+; EG-NEXT:     LSHR * T1.W, T21.X, literal.x,
+; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT:     BCNT_INT T1.W, PV.W,
+; EG-NEXT:     AND_INT * T2.W, T0.X, literal.x,
+; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT:     LSHL * T1.W, PV.W, literal.x,
+; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT:     OR_INT * T1.W, T2.W, PV.W,
+; EG-NEXT:     MOV * T12.X, PV.W,
+; EG-NEXT:     MOV T0.X, PV.X,
+; EG-NEXT:     AND_INT * T1.W, T21.X, literal.x,
+; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT:     BCNT_INT T1.W, PV.W,
+; EG-NEXT:     AND_INT * T2.W, PV.X, literal.x,
+; EG-NEXT:    -65536(nan), 0(0.000000e+00)
+; EG-NEXT:     OR_INT * T1.W, PS, PV.W,
+; EG-NEXT:     MOV T12.X, PV.W,
+; EG-NEXT:     MOV * T0.X, T13.X,
+; EG-NEXT:     LSHR * T1.W, T21.Y, literal.x,
+; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT:     BCNT_INT T1.W, PV.W,
+; EG-NEXT:     AND_INT * T2.W, T0.X, literal.x,
+; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT:     LSHL * T1.W, PV.W, literal.x,
+; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT:     OR_INT * T1.W, T2.W, PV.W,
+; EG-NEXT:     MOV * T13.X, PV.W,
+; EG-NEXT:     MOV T0.X, PV.X,
+; EG-NEXT:     AND_INT * T1.W, T21.Y, literal.x,
+; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT:     BCNT_INT T1.W, PV.W,
+; EG-NEXT:     AND_INT * T2.W, PV.X, literal.x,
+; EG-NEXT:    -65536(nan), 0(0.000000e+00)
+; EG-NEXT:     OR_INT * T20.Y, PS, PV.W,
+; EG-NEXT:     MOV T13.X, PV.Y,
+; EG-NEXT:     MOV * T0.X, T16.X,
+; EG-NEXT:     LSHR * T1.W, T21.Z, literal.x,
+; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT:     BCNT_INT T1.W, PV.W,
+; EG-NEXT:     AND_INT * T2.W, T0.X, literal.x,
+; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT:     LSHL * T1.W, PV.W, literal.x,
+; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT:     OR_INT * T1.W, T2.W, PV.W,
+; EG-NEXT:    ALU clause starting at 131:
+; EG-NEXT:     MOV * T16.X, T1.W,
+; EG-NEXT:     MOV T0.X, PV.X,
+; EG-NEXT:     AND_INT * T1.W, T21.Z, literal.x,
+; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT:     BCNT_INT T1.W, PV.W,
+; EG-NEXT:     AND_INT * T2.W, PV.X, literal.x,
+; EG-NEXT:    -65536(nan), 0(0.000000e+00)
+; EG-NEXT:     OR_INT * T1.W, PS, PV.W,
+; EG-NEXT:     MOV T16.X, PV.W,
+; EG-NEXT:     MOV * T0.X, T17.X,
+; EG-NEXT:     LSHR * T1.W, T21.W, literal.x,
+; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT:     BCNT_INT T1.W, PV.W,
+; EG-NEXT:     AND_INT * T2.W, T0.X, literal.x,
 ; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
-; EG-NEXT:     BCNT_INT T12.X, PS,
-; EG-NEXT:     BCNT_INT T0.Z, PV.Z,
-; EG-NEXT:     LSHR T1.W, T0.X, literal.x,
-; EG-NEXT:     ADD_INT * T2.W, KC0[2].Y, literal.x,
+; EG-NEXT:     LSHL * T1.W, PV.W, literal.x,
 ; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
-; EG-NEXT:     LSHR T13.X, PS, literal.x,
-; EG-NEXT:     BCNT_INT T0.Y, PV.W,
-; EG-NEXT:     AND_INT * T1.W, T0.X, literal.y,
-; EG-NEXT:    2(2.802597e-45), 65535(9.183409e-41)
-; EG-NEXT:     BCNT_INT T0.X, PV.W,
-; EG-NEXT:     LSHR * T14.X, KC0[2].Y, literal.x,
+; EG-NEXT:     OR_INT * T1.W, T2.W, PV.W,
+; EG-NEXT:     MOV * T17.X, PV.W,
+; EG-NEXT:     MOV T0.X, PV.X,
+; EG-NEXT:     AND_INT T1.W, T21.W, literal.x,
+; EG-NEXT:     LSHR * T21.X, KC0[2].Y, literal.y,
+; EG-NEXT:    65535(9.183409e-41), 2(2.802597e-45)
+; EG-NEXT:     AND_INT T0.Z, PV.X, literal.x,
+; EG-NEXT:     BCNT_INT T1.W, PV.W,
+; EG-NEXT:     ADD_INT * T2.W, KC0[2].Y, literal.y,
+; EG-NEXT:    -65536(nan), 16(2.242078e-44)
+; EG-NEXT:     LSHR T22.X, PS, literal.x,
+; EG-NEXT:     OR_INT * T20.W, PV.Z, PV.W,
 ; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
+; EG-NEXT:     MOV T17.X, PV.W,
+; EG-NEXT:     MOV * T0.X, T4.X,
+; EG-NEXT:     MOV * T0.Z, T8.X,
+; EG-NEXT:     MOV T20.X, T12.X,
+; EG-NEXT:     MOV * T20.Z, T16.X, BS:VEC_120/SCL_212
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %in.gep = getelementptr <16 x i16>, ptr addrspace(1) %in, i32 %tid
   %val = load <16 x i16>, ptr addrspace(1) %in.gep, align 32
@@ -1292,7 +1512,7 @@ define amdgpu_kernel void @ctpop_i16_in_br(ptr addrspace(1) %out, ptr addrspace(
 ; SI-NEXT:    buffer_store_short v0, off, s[0:3], 0
 ; SI-NEXT:    s_endpgm
 ; SI-NEXT:  .LBB14_4:
-; SI-NEXT:                    ; implicit-def: $vgpr0
+; SI-NEXT:    ; implicit-def: $vgpr0
 ; SI-NEXT:    s_branch .LBB14_2
 ;
 ; VI-LABEL: ctpop_i16_in_br:
diff --git a/llvm/test/CodeGen/AMDGPU/kernel-args.ll b/llvm/test/CodeGen/AMDGPU/kernel-args.ll
index bad2e603f22e7..a2da8876472ab 100644
--- a/llvm/test/CodeGen/AMDGPU/kernel-args.ll
+++ b/llvm/test/CodeGen/AMDGPU/kernel-args.ll
@@ -1025,67 +1025,74 @@ define amdgpu_kernel void @v3i16_arg(ptr addrspace(1) nocapture %out, <3 x i16>
 ;
 ; EG-LABEL: v3i16_arg:
 ; EG:       ; %bb.0: ; %entry
-; EG-NEXT:    ALU 0, @10, KC0[], KC1[]
-; EG-NEXT:    TEX 1 @6
-; EG-NEXT:    ALU 14, @11, KC0[CB0:0-32], KC1[]
-; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T1.X, T3.X, 0
-; EG-NEXT:    MEM_RAT MSKOR T2.XW, T0.X
+; EG-NEXT:    ALU 0, @12, KC0[], KC1[]
+; EG-NEXT:    TEX 2 @6
+; EG-NEXT:    ALU 19, @13, KC0[CB0:0-32], KC1[]
+; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T6.X, T7.X, 0
+; EG-NEXT:    MEM_RAT MSKOR T5.XW, T8.X
 ; EG-NEXT:    CF_END
 ; EG-NEXT:    Fetch clause starting at 6:
-; EG-NEXT:     VTX_READ_16 T1.X, T0.X, 44, #3
-; EG-NEXT:     VTX_READ_16 T0.X, T0.X, 48, #3
-; EG-NEXT:    ALU clause starting at 10:
-; EG-NEXT:     MOV * T0.X, 0.0,
-; EG-NEXT:    ALU clause starting at 11:
+; EG-NEXT:     VTX_READ_16 T6.X, T5.X, 44, #3
+; EG-NEXT:     VTX_READ_16 T7.X, T5.X, 46, #3
+; EG-NEXT:     VTX_READ_16 T5.X, T5.X, 48, #3
+; EG-NEXT:    ALU clause starting at 12:
+; EG-NEXT:     MOV * T5.X, 0.0,
+; EG-NEXT:    ALU clause starting at 13:
 ; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.x,
 ; EG-NEXT:    4(5.605194e-45), 0(0.000000e+00)
 ; EG-NEXT:     AND_INT T1.W, PV.W, literal.x,
-; EG-NEXT:     AND_INT * T2.W, T0.X, literal.y,
+; EG-NEXT:     AND_INT * T2.W, T5.X, literal.y,
 ; EG-NEXT:    3(4.203895e-45), 65535(9.183409e-41)
 ; EG-NEXT:     LSHL * T1.W, PV.W, literal.x,
 ; EG-NEXT:    3(4.203895e-45), 0(0.000000e+00)
-; EG-NEXT:     LSHL T2.X, T2.W, PV.W,
-; EG-NEXT:     LSHL * T2.W, literal.x, PV.W,
+; EG-NEXT:     LSHL T5.X, T2.W, PV.W,
+; EG-NEXT:     LSHL * T5.W, literal.x, PV.W,
 ; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
-; EG-NEXT:     MOV T2.Y, 0.0,
-; EG-NEXT:     MOV * T2.Z, 0.0,
-; EG-NEXT:     LSHR T0.X, T0.W, literal.x,
-; EG-NEXT:     LSHR * T3.X, KC0[2].Y, literal.x,
+; EG-NEXT:     MOV T5.Y, 0.0,
+; EG-NEXT:     MOV * T5.Z, 0.0,
+; EG-NEXT:     LSHR T8.X, T0.W, literal.x,
+; EG-NEXT:     LSHL T0.W, T7.X, literal.y,
+; EG-NEXT:     AND_INT * T1.W, T6.X, literal.z,
+; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
+; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT:     OR_INT T6.X, PV.W, PS,
+; EG-NEXT:     LSHR * T7.X, KC0[2].Y, literal.x,
 ; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
 ;
 ; CM-LABEL: v3i16_arg:
 ; CM:       ; %bb.0: ; %entry
 ; CM-NEXT:    ALU 0, @12, KC0[], KC1[]
-; CM-NEXT:    TEX 0 @8
-; CM-NEXT:    ALU 13, @13, KC0[CB0:0-32], KC1[]
-; CM-NEXT:    MEM_RAT MSKOR T1.XW, T2.X
-; CM-NEXT:    ALU 1, @27, KC0[CB0:0-32], KC1[]
-; CM-NEXT:    TEX 0 @10
-; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T0.X, T1.X
+; CM-NEXT:    TEX 2 @6
+; CM-NEXT:    ALU 19, @13, KC0[CB0:0-32], KC1[]
+; CM-NEXT:    MEM_RAT MSKOR T5.XW, T8.X
+; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T6.X, T7.X
 ; CM-NEXT:    CF_END
-; CM-NEXT:    Fetch clause starting at 8:
-; CM-NEXT:     VTX_READ_16 T1.X, T0.X, 48, #3
-; CM-NEXT:    Fetch clause starting at 10:
-; CM-NEXT:     VTX_READ_16 T0.X, T0.X, 44, #3
+; CM-NEXT:    Fetch clause starting at 6:
+; CM-NEXT:     VTX_READ_16 T6.X, T5.X, 44, #3
+; CM-NEXT:     VTX_READ_16 T7.X, T5.X, 46, #3
+; CM-NEXT:     VTX_READ_16 T5.X, T5.X, 48, #3
 ; CM-NEXT:    ALU clause starting at 12:
-; CM-NEXT:     MOV * T0.X, 0.0,
+; CM-NEXT:     MOV * T5.X, 0.0,
 ; CM-NEXT:    ALU clause starting at 13:
 ; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.x,
 ; CM-NEXT:    4(5.605194e-45), 0(0.000000e+00)
 ; CM-NEXT:     AND_INT * T1.W, PV.W, literal.x,
 ; CM-NEXT:    3(4.203895e-45), 0(0.000000e+00)
-; CM-NEXT:     AND_INT T0.Z, T1.X, literal.x,
+; CM-NEXT:     AND_INT T0.Z, T5.X, literal.x,
 ; CM-NEXT:     LSHL * T1.W, PV.W, literal.y,
 ; CM-NEXT:    65535(9.183409e-41), 3(4.203895e-45)
-; CM-NEXT:     LSHL T1.X, PV.Z, PV.W,
-; CM-NEXT:     LSHL * T1.W, literal.x, PV.W,
+; CM-NEXT:     LSHL T5.X, PV.Z, PV.W,
+; CM-NEXT:     LSHL * T5.W, literal.x, PV.W,
 ; CM-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
-; CM-NEXT:     MOV T1.Y, 0.0,
-; CM-NEXT:     MOV * T1.Z, 0.0,
-; CM-NEXT:     LSHR * T2.X, T0.W, literal.x,
+; CM-NEXT:     MOV T5.Y, 0.0,
+; CM-NEXT:     MOV * T5.Z, 0.0,
+; CM-NEXT:     LSHL T0.Z, T7.X, literal.x,
+; CM-NEXT:     AND_INT * T1.W, T6.X, literal.y, BS:VEC_120/SCL_212
+; CM-NEXT:    16(2.242078e-44), 65535(9.183409e-41)
+; CM-NEXT:     OR_INT * T6.X, PV.Z, PV.W,
+; CM-NEXT:     LSHR * T7.X, KC0[2].Y, literal.x,
 ; CM-NEXT:    2(2.802597e-45), 0(0.000000e+00)
-; CM-NEXT:    ALU clause starting at 27:
-; CM-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
+; CM-NEXT:     LSHR * T8.X, T0.W, literal.x,
 ; CM-NEXT:    2(2.802597e-45), 0(0.000000e+00)
 entry:
   store <3 x i16> %in, ptr addrspace(1) %out, align 4
@@ -2669,47 +2676,205 @@ define amdgpu_kernel void @v8i16_arg(ptr addrspace(1) %out, <8 x i16> %in) {
 ;
 ; EG-LABEL: v8i16_arg:
 ; EG:       ; %bb.0: ; %entry
-; EG-NEXT:    ALU 0, @14, KC0[], KC1[]
-; EG-NEXT:    TEX 3 @6
-; EG-NEXT:    ALU 4, @15, KC0[CB0:0-32], KC1[]
-; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T1.XYZW, T0.X, 1
+; EG-NEXT:    ALU 1, @36, KC0[], KC1[]
+; EG-NEXT:    TEX 0 @20
+; EG-NEXT:    ALU 5, @38, KC0[], KC1[]
+; EG-NEXT:    TEX 0 @22
+; EG-NEXT:    ALU 5, @44, KC0[], KC1[]
+; EG-NEXT:    TEX 0 @24
+; EG-NEXT:    ALU 5, @50, KC0[], KC1[]
+; EG-NEXT:    TEX 0 @26
+; EG-NEXT:    ALU 5, @56, KC0[], KC1[]
+; EG-NEXT:    TEX 0 @28
+; EG-NEXT:    ALU 5, @62, KC0[], KC1[]
+; EG-NEXT:    TEX 0 @30
+; EG-NEXT:    ALU 5, @68, KC0[], KC1[]
+; EG-NEXT:    TEX 0 @32
+; EG-NEXT:    ALU 5, @74, KC0[], KC1[]
+; EG-NEXT:    TEX 0 @34
+; EG-NEXT:    ALU 8, @80, KC0[CB0:0-32], KC1[]
+; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T7.XYZW, T8.X, 1
 ; EG-NEXT:    CF_END
 ; EG-NEXT:    PAD
-; EG-NEXT:    Fetch clause starting at 6:
-; EG-NEXT:     VTX_READ_16 T1.X, T0.X, 52, #3
-; EG-NEXT:     VTX_READ_16 T2.X, T0.X, 54, #3
-; EG-NEXT:     VTX_READ_16 T3.X, T0.X, 62, #3
-; EG-NEXT:     VTX_READ_16 T0.X, T0.X, 60, #3
-; EG-NEXT:    ALU clause starting at 14:
-; EG-NEXT:     MOV * T0.X, 0.0,
-; EG-NEXT:    ALU clause starting at 15:
-; EG-NEXT:     MOV T1.Y, T2.X,
-; EG-NEXT:     MOV * T1.Z, T0.X, BS:VEC_120/SCL_212
-; EG-NEXT:     LSHR T0.X, KC0[2].Y, literal.x,
-; EG-NEXT:     MOV * T1.W, T3.X,
-; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
+; EG-NEXT:    Fetch clause starting at 20:
+; EG-NEXT:     VTX_READ_16 T8.X, T7.X, 66, #3
+; EG-NEXT:    Fetch clause starting at 22:
+; EG-NEXT:     VTX_READ_16 T8.X, T7.X, 58, #3
+; EG-NEXT:    Fetch clause starting at 24:
+; EG-NEXT:     VTX_READ_16 T8.X, T7.X, 64, #3
+; EG-NEXT:    Fetch clause starting at 26:
+; EG-NEXT:     VTX_READ_16 T8.X, T7.X, 56, #3
+; EG-NEXT:    Fetch clause starting at 28:
+; EG-NEXT:     VTX_READ_16 T8.X, T7.X, 62, #3
+; EG-NEXT:    Fetch clause starting at 30:
+; EG-NEXT:     VTX_READ_16 T8.X, T7.X, 54, #3
+; EG-NEXT:    Fetch clause starting at 32:
+; EG-NEXT:     VTX_READ_16 T8.X, T7.X, 60, #3
+; EG-NEXT:    Fetch clause starting at 34:
+; EG-NEXT:     VTX_READ_16 T7.X, T7.X, 52, #3
+; EG-NEXT:    ALU clause starting at 36:
+; EG-NEXT:     MOV * T0.Y, T3.X,
+; EG-NEXT:     MOV * T7.X, 0.0,
+; EG-NEXT:    ALU clause starting at 38:
+; EG-NEXT:     LSHL T0.W, T8.X, literal.x,
+; EG-NEXT:     AND_INT * T1.W, T0.Y, literal.y,
+; EG-NEXT:    16(2.242078e-44), 65535(9.183409e-41)
+; EG-NEXT:     OR_INT * T0.W, PS, PV.W,
+; EG-NEXT:     MOV T3.X, PV.W,
+; EG-NEXT:     MOV * T0.Y, T5.X,
+; EG-NEXT:    ALU clause starting at 44:
+; EG-NEXT:     AND_INT T0.W, T0.Y, literal.x,
+; EG-NEXT:     LSHL * T1.W, T8.X, literal.y,
+; EG-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
+; EG-NEXT:     OR_INT * T0.W, PV.W, PS,
+; EG-NEXT:     MOV T5.X, PV.W,
+; EG-NEXT:     MOV * T0.Y, T3.X,
+; EG-NEXT:    ALU clause starting at 50:
+; EG-NEXT:     AND_INT T0.W, T0.Y, literal.x,
+; EG-NEXT:     AND_INT * T1.W, T8.X, literal.y,
+; EG-NEXT:    -65536(nan), 65535(9.183409e-41)
+; EG-NEXT:     OR_INT * T0.W, PV.W, PS,
+; EG-NEXT:     MOV T3.X, PV.W,
+; EG-NEXT:     MOV * T0.Y, T5.X,
+; EG-NEXT:    ALU clause starting at 56:
+; EG-NEXT:     AND_INT T0.W, T0.Y, literal.x,
+; EG-NEXT:     AND_INT * T1.W, T8.X, literal.y,
+; EG-NEXT:    -65536(nan), 65535(9.183409e-41)
+; EG-NEXT:     OR_INT * T0.W, PV.W, PS,
+; EG-NEXT:     MOV T5.X, PV.W,
+; EG-NEXT:     MOV * T0.Y, T2.X,
+; EG-NEXT:    ALU clause starting at 62:
+; EG-NEXT:     AND_INT T0.W, T0.Y, literal.x,
+; EG-NEXT:     LSHL * T1.W, T8.X, literal.y,
+; EG-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
+; EG-NEXT:     OR_INT * T0.W, PV.W, PS,
+; EG-NEXT:     MOV T2.X, PV.W,
+; EG-NEXT:     MOV * T0.Y, T4.X,
+; EG-NEXT:    ALU clause starting at 68:
+; EG-NEXT:     AND_INT T0.W, T0.Y, literal.x,
+; EG-NEXT:     LSHL * T1.W, T8.X, literal.y,
+; EG-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
+; EG-NEXT:     OR_INT * T0.W, PV.W, PS,
+; EG-NEXT:     MOV T4.X, PV.W,
+; EG-NEXT:     MOV * T0.Y, T2.X,
+; EG-NEXT:    ALU clause starting at 74:
+; EG-NEXT:     AND_INT T0.W, T0.Y, literal.x,
+; EG-NEXT:     AND_INT * T1.W, T8.X, literal.y,
+; EG-NEXT:    -65536(nan), 65535(9.183409e-41)
+; EG-NEXT:     OR_INT * T7.Z, PV.W, PS,
+; EG-NEXT:     MOV T2.X, PV.Z,
+; EG-NEXT:     MOV * T0.Y, T4.X,
+; EG-NEXT:    ALU clause starting at 80:
+; EG-NEXT:     LSHR T8.X, KC0[2].Y, literal.x,
+; EG-NEXT:     AND_INT T0.W, T0.Y, literal.y,
+; EG-NEXT:     AND_INT * T1.W, T7.X, literal.z,
+; EG-NEXT:    2(2.802597e-45), -65536(nan)
+; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT:     OR_INT * T7.X, PV.W, PS,
+; EG-NEXT:     MOV T4.X, PV.X,
+; EG-NEXT:     MOV * T7.W, T3.X,
+; EG-NEXT:     MOV * T7.Y, T5.X,
 ;
 ; CM-LABEL: v8i16_arg:
 ; CM:       ; %bb.0: ; %entry
-; CM-NEXT:    ALU 0, @14, KC0[], KC1[]
-; CM-NEXT:    TEX 3 @6
-; CM-NEXT:    ALU 4, @15, KC0[CB0:0-32], KC1[]
-; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T1, T0.X
+; CM-NEXT:    ALU 1, @36, KC0[], KC1[]
+; CM-NEXT:    TEX 0 @20
+; CM-NEXT:    ALU 5, @38, KC0[], KC1[]
+; CM-NEXT:    TEX 0 @22
+; CM-NEXT:    ALU 5, @44, KC0[], KC1[]
+; CM-NEXT:    TEX 0 @24
+; CM-NEXT:    ALU 5, @50, KC0[], KC1[]
+; CM-NEXT:    TEX 0 @26
+; CM-NEXT:    ALU 5, @56, KC0[], KC1[]
+; CM-NEXT:    TEX 0 @28
+; CM-NEXT:    ALU 5, @62, KC0[], KC1[]
+; CM-NEXT:    TEX 0 @30
+; CM-NEXT:    ALU 5, @68, KC0[], KC1[]
+; CM-NEXT:    TEX 0 @32
+; CM-NEXT:    ALU 5, @74, KC0[], KC1[]
+; CM-NEXT:    TEX 0 @34
+; CM-NEXT:    ALU 8, @80, KC0[CB0:0-32], KC1[]
+; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T7, T8.X
 ; CM-NEXT:    CF_END
 ; CM-NEXT:    PAD
-; CM-NEXT:    Fetch clause starting at 6:
-; CM-NEXT:     VTX_READ_16 T1.X, T0.X, 52, #3
-; CM-NEXT:     VTX_READ_16 T2.X, T0.X, 54, #3
-; CM-NEXT:     VTX_READ_16 T3.X, T0.X, 62, #3
-; CM-NEXT:     VTX_READ_16 T0.X, T0.X, 60, #3
-; CM-NEXT:    ALU clause starting at 14:
-; CM-NEXT:     MOV * T0.X, 0.0,
-; CM-NEXT:    ALU clause starting at 15:
-; CM-NEXT:     MOV T1.Y, T2.X,
-; CM-NEXT:     MOV * T1.Z, T0.X, BS:VEC_120/SCL_212
-; CM-NEXT:     LSHR T0.X, KC0[2].Y, literal.x,
-; CM-NEXT:     MOV * T1.W, T3.X,
-; CM-NEXT:    2(2.802597e-45), 0(0.000000e+00)
+; CM-NEXT:    Fetch clause starting at 20:
+; CM-NEXT:     VTX_READ_16 T8.X, T7.X, 66, #3
+; CM-NEXT:    Fetch clause starting at 22:
+; CM-NEXT:     VTX_READ_16 T8.X, T7.X, 58, #3
+; CM-NEXT:    Fetch clause starting at 24:
+; CM-NEXT:     VTX_READ_16 T8.X, T7.X, 64, #3
+; CM-NEXT:    Fetch clause starting at 26:
+; CM-NEXT:     VTX_READ_16 T8.X, T7.X, 56, #3
+; CM-NEXT:    Fetch clause starting at 28:
+; CM-NEXT:     VTX_READ_16 T8.X, T7.X, 62, #3
+; CM-NEXT:    Fetch clause starting at 30:
+; CM-NEXT:     VTX_READ_16 T8.X, T7.X, 54, #3
+; CM-NEXT:    Fetch clause starting at 32:
+; CM-NEXT:     VTX_READ_16 T8.X, T7.X, 60, #3
+; CM-NEXT:    Fetch clause starting at 34:
+; CM-NEXT:     VTX_READ_16 T7.X, T7.X, 52, #3
+; CM-NEXT:    ALU clause starting at 36:
+; CM-NEXT:     MOV * T0.Y, T3.X,
+; CM-NEXT:     MOV * T7.X, 0.0,
+; CM-NEXT:    ALU clause starting at 38:
+; CM-NEXT:     LSHL T0.Z, T8.X, literal.x,
+; CM-NEXT:     AND_INT * T0.W, T0.Y, literal.y,
+; CM-NEXT:    16(2.242078e-44), 65535(9.183409e-41)
+; CM-NEXT:     OR_INT * T0.W, PV.W, PV.Z,
+; CM-NEXT:     MOV T3.X, PV.W,
+; CM-NEXT:     MOV * T0.Y, T5.X,
+; CM-NEXT:    ALU clause starting at 44:
+; CM-NEXT:     AND_INT T0.Z, T0.Y, literal.x,
+; CM-NEXT:     LSHL * T0.W, T8.X, literal.y,
+; CM-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
+; CM-NEXT:     OR_INT * T0.W, PV.Z, PV.W,
+; CM-NEXT:     MOV T5.X, PV.W,
+; CM-NEXT:     MOV * T0.Y, T3.X,
+; CM-NEXT:    ALU clause starting at 50:
+; CM-NEXT:     AND_INT T0.Z, T0.Y, literal.x,
+; CM-NEXT:     AND_INT * T0.W, T8.X, literal.y,
+; CM-NEXT:    -65536(nan), 65535(9.183409e-41)
+; CM-NEXT:     OR_INT * T0.W, PV.Z, PV.W,
+; CM-NEXT:     MOV T3.X, PV.W,
+; CM-NEXT:     MOV * T0.Y, T5.X,
+; CM-NEXT:    ALU clause starting at 56:
+; CM-NEXT:     AND_INT T0.Z, T0.Y, literal.x,
+; CM-NEXT:     AND_INT * T0.W, T8.X, literal.y,
+; CM-NEXT:    -65536(nan), 65535(9.183409e-41)
+; CM-NEXT:     OR_INT * T0.W, PV.Z, PV.W,
+; CM-NEXT:     MOV T5.X, PV.W,
+; CM-NEXT:     MOV * T0.Y, T2.X,
+; CM-NEXT:    ALU clause starting at 62:
+; CM-NEXT:     AND_INT T0.Z, T0.Y, literal.x,
+; CM-NEXT:     LSHL * T0.W, T8.X, literal.y,
+; CM-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
+; CM-NEXT:     OR_INT * T0.W, PV.Z, PV.W,
+; CM-NEXT:     MOV T2.X, PV.W,
+; CM-NEXT:     MOV * T0.Y, T4.X,
+; CM-NEXT:    ALU clause starting at 68:
+; CM-NEXT:     AND_INT T0.Z, T0.Y, literal.x,
+; CM-NEXT:     LSHL * T0.W, T8.X, literal.y,
+; CM-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
+; CM-NEXT:     OR_INT * T0.W, PV.Z, PV.W,
+; CM-NEXT:     MOV T4.X, PV.W,
+; CM-NEXT:     MOV * T0.Y, T2.X,
+; CM-NEXT:    ALU clause starting at 74:
+; CM-NEXT:     AND_INT T0.Z, T0.Y, literal.x,
+; CM-NEXT:     AND_INT * T0.W, T8.X, literal.y,
+; CM-NEXT:    -65536(nan), 65535(9.183409e-41)
+; CM-NEXT:     OR_INT * T7.Z, PV.Z, PV.W,
+; CM-NEXT:     MOV T2.X, PV.Z,
+; CM-NEXT:     MOV * T0.Y, T4.X,
+; CM-NEXT:    ALU clause starting at 80:
+; CM-NEXT:     LSHR T8.X, KC0[2].Y, literal.x,
+; CM-NEXT:     AND_INT T0.Z, T0.Y, literal.y,
+; CM-NEXT:     AND_INT * T0.W, T7.X, literal.z,
+; CM-NEXT:    2(2.802597e-45), -65536(nan)
+; CM-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
+; CM-NEXT:     OR_INT * T7.X, PV.Z, PV.W,
+; CM-NEXT:     MOV T4.X, PV.X,
+; CM-NEXT:     MOV * T7.W, T3.X,
+; CM-NEXT:     MOV * T7.Y, T5.X,
 entry:
   store <8 x i16> %in, ptr addrspace(1) %out
   ret void
@@ -3453,68 +3618,392 @@ define amdgpu_kernel void @v16i16_arg(ptr addrspace(1) %out, <16 x i16> %in) {
 ;
 ; EG-LABEL: v16i16_arg:
 ; EG:       ; %bb.0: ; %entry
-; EG-NEXT:    ALU 0, @22, KC0[], KC1[]
-; EG-NEXT:    TEX 7 @6
-; EG-NEXT:    ALU 10, @23, KC0[CB0:0-32], KC1[]
-; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T1.XYZW, T2.X, 0
-; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T7.XYZW, T0.X, 1
+; EG-NEXT:    ALU 1, @68, KC0[], KC1[]
+; EG-NEXT:    TEX 0 @36
+; EG-NEXT:    ALU 5, @70, KC0[], KC1[]
+; EG-NEXT:    TEX 0 @38
+; EG-NEXT:    ALU 5, @76, KC0[], KC1[]
+; EG-NEXT:    TEX 0 @40
+; EG-NEXT:    ALU 5, @82, KC0[], KC1[]
+; EG-NEXT:    TEX 0 @42
+; EG-NEXT:    ALU 5, @88, KC0[], KC1[]
+; EG-NEXT:    TEX 0 @44
+; EG-NEXT:    ALU 5, @94, KC0[], KC1[]
+; EG-NEXT:    TEX 0 @46
+; EG-NEXT:    ALU 5, @100, KC0[], KC1[]
+; EG-NEXT:    TEX 0 @48
+; EG-NEXT:    ALU 5, @106, KC0[], KC1[]
+; EG-NEXT:    TEX 0 @50
+; EG-NEXT:    ALU 5, @112, KC0[], KC1[]
+; EG-NEXT:    TEX 0 @52
+; EG-NEXT:    ALU 5, @118, KC0[], KC1[]
+; EG-NEXT:    TEX 0 @54
+; EG-NEXT:    ALU 5, @124, KC0[], KC1[]
+; EG-NEXT:    TEX 0 @56
+; EG-NEXT:    ALU 5, @130, KC0[], KC1[]
+; EG-NEXT:    TEX 0 @58
+; EG-NEXT:    ALU 5, @136, KC0[], KC1[]
+; EG-NEXT:    TEX 0 @60
+; EG-NEXT:    ALU 5, @142, KC0[], KC1[]
+; EG-NEXT:    TEX 0 @62
+; EG-NEXT:    ALU 5, @148, KC0[], KC1[]
+; EG-NEXT:    TEX 0 @64
+; EG-NEXT:    ALU 5, @154, KC0[], KC1[]
+; EG-NEXT:    TEX 0 @66
+; EG-NEXT:    ALU 13, @160, KC0[CB0:0-32], KC1[]
+; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T12.XYZW, T14.X, 0
+; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T11.XYZW, T13.X, 1
 ; EG-NEXT:    CF_END
-; EG-NEXT:    Fetch clause starting at 6:
-; EG-NEXT:     VTX_READ_16 T1.X, T0.X, 84, #3
-; EG-NEXT:     VTX_READ_16 T2.X, T0.X, 86, #3
-; EG-NEXT:     VTX_READ_16 T3.X, T0.X, 94, #3
-; EG-NEXT:     VTX_READ_16 T4.X, T0.X, 78, #3
-; EG-NEXT:     VTX_READ_16 T5.X, T0.X, 76, #3
-; EG-NEXT:     VTX_READ_16 T6.X, T0.X, 92, #3
-; EG-NEXT:     VTX_READ_16 T7.X, T0.X, 68, #3
-; EG-NEXT:     VTX_READ_16 T0.X, T0.X, 70, #3
-; EG-NEXT:    ALU clause starting at 22:
-; EG-NEXT:     MOV * T0.X, 0.0,
-; EG-NEXT:    ALU clause starting at 23:
-; EG-NEXT:     MOV T1.Y, T2.X,
-; EG-NEXT:     MOV * T7.Y, T0.X,
-; EG-NEXT:     MOV * T1.Z, T6.X,
-; EG-NEXT:     LSHR T0.X, KC0[2].Y, literal.x,
-; EG-NEXT:     MOV T7.Z, T5.X,
+; EG-NEXT:    Fetch clause starting at 36:
+; EG-NEXT:     VTX_READ_16 T12.X, T11.X, 98, #3
+; EG-NEXT:    Fetch clause starting at 38:
+; EG-NEXT:     VTX_READ_16 T12.X, T11.X, 90, #3
+; EG-NEXT:    Fetch clause starting at 40:
+; EG-NEXT:     VTX_READ_16 T12.X, T11.X, 82, #3
+; EG-NEXT:    Fetch clause starting at 42:
+; EG-NEXT:     VTX_READ_16 T12.X, T11.X, 74, #3
+; EG-NEXT:    Fetch clause starting at 44:
+; EG-NEXT:     VTX_READ_16 T12.X, T11.X, 96, #3
+; EG-NEXT:    Fetch clause starting at 46:
+; EG-NEXT:     VTX_READ_16 T12.X, T11.X, 88, #3
+; EG-NEXT:    Fetch clause starting at 48:
+; EG-NEXT:     VTX_READ_16 T12.X, T11.X, 80, #3
+; EG-NEXT:    Fetch clause starting at 50:
+; EG-NEXT:     VTX_READ_16 T12.X, T11.X, 72, #3
+; EG-NEXT:    Fetch clause starting at 52:
+; EG-NEXT:     VTX_READ_16 T12.X, T11.X, 94, #3
+; EG-NEXT:    Fetch clause starting at 54:
+; EG-NEXT:     VTX_READ_16 T12.X, T11.X, 86, #3
+; EG-NEXT:    Fetch clause starting at 56:
+; EG-NEXT:     VTX_READ_16 T12.X, T11.X, 78, #3
+; EG-NEXT:    Fetch clause starting at 58:
+; EG-NEXT:     VTX_READ_16 T12.X, T11.X, 70, #3
+; EG-NEXT:    Fetch clause starting at 60:
+; EG-NEXT:     VTX_READ_16 T12.X, T11.X, 92, #3
+; EG-NEXT:    Fetch clause starting at 62:
+; EG-NEXT:     VTX_READ_16 T12.X, T11.X, 84, #3
+; EG-NEXT:    Fetch clause starting at 64:
+; EG-NEXT:     VTX_READ_16 T13.X, T11.X, 76, #3
+; EG-NEXT:    Fetch clause starting at 66:
+; EG-NEXT:     VTX_READ_16 T11.X, T11.X, 68, #3
+; EG-NEXT:    ALU clause starting at 68:
+; EG-NEXT:     MOV * T0.Y, T3.X,
+; EG-NEXT:     MOV * T11.X, 0.0,
+; EG-NEXT:    ALU clause starting at 70:
+; EG-NEXT:     LSHL T0.W, T12.X, literal.x,
+; EG-NEXT:     AND_INT * T1.W, T0.Y, literal.y,
+; EG-NEXT:    16(2.242078e-44), 65535(9.183409e-41)
+; EG-NEXT:     OR_INT * T0.W, PS, PV.W,
+; EG-NEXT:     MOV T3.X, PV.W,
+; EG-NEXT:     MOV * T0.Y, T5.X,
+; EG-NEXT:    ALU clause starting at 76:
+; EG-NEXT:     AND_INT T0.W, T0.Y, literal.x,
+; EG-NEXT:     LSHL * T1.W, T12.X, literal.y,
+; EG-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
+; EG-NEXT:     OR_INT * T0.W, PV.W, PS,
+; EG-NEXT:     MOV T5.X, PV.W,
+; EG-NEXT:     MOV * T0.Y, T7.X,
+; EG-NEXT:    ALU clause starting at 82:
+; EG-NEXT:     AND_INT T0.W, T0.Y, literal.x,
+; EG-NEXT:     LSHL * T1.W, T12.X, literal.y,
+; EG-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
+; EG-NEXT:     OR_INT * T0.W, PV.W, PS,
+; EG-NEXT:     MOV T7.X, PV.W,
+; EG-NEXT:     MOV * T0.Y, T9.X,
+; EG-NEXT:    ALU clause starting at 88:
+; EG-NEXT:     AND_INT T0.W, T0.Y, literal.x,
+; EG-NEXT:     LSHL * T1.W, T12.X, literal.y,
+; EG-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
+; EG-NEXT:     OR_INT * T0.W, PV.W, PS,
+; EG-NEXT:     MOV T9.X, PV.W,
+; EG-NEXT:     MOV * T0.Y, T3.X,
+; EG-NEXT:    ALU clause starting at 94:
+; EG-NEXT:     AND_INT T0.W, T0.Y, literal.x,
+; EG-NEXT:     AND_INT * T1.W, T12.X, literal.y,
+; EG-NEXT:    -65536(nan), 65535(9.183409e-41)
+; EG-NEXT:     OR_INT * T0.W, PV.W, PS,
+; EG-NEXT:     MOV T3.X, PV.W,
+; EG-NEXT:     MOV * T0.Y, T5.X,
+; EG-NEXT:    ALU clause starting at 100:
+; EG-NEXT:     AND_INT T0.W, T0.Y, literal.x,
+; EG-NEXT:     AND_INT * T1.W, T12.X, literal.y,
+; EG-NEXT:    -65536(nan), 65535(9.183409e-41)
+; EG-NEXT:     OR_INT * T0.W, PV.W, PS,
+; EG-NEXT:     MOV T5.X, PV.W,
+; EG-NEXT:     MOV * T0.Y, T7.X,
+; EG-NEXT:    ALU clause starting at 106:
+; EG-NEXT:     AND_INT T0.W, T0.Y, literal.x,
+; EG-NEXT:     AND_INT * T1.W, T12.X, literal.y,
+; EG-NEXT:    -65536(nan), 65535(9.183409e-41)
+; EG-NEXT:     OR_INT * T0.W, PV.W, PS,
+; EG-NEXT:     MOV T7.X, PV.W,
+; EG-NEXT:     MOV * T0.Y, T9.X,
+; EG-NEXT:    ALU clause starting at 112:
+; EG-NEXT:     AND_INT T0.W, T0.Y, literal.x,
+; EG-NEXT:     AND_INT * T1.W, T12.X, literal.y,
+; EG-NEXT:    -65536(nan), 65535(9.183409e-41)
+; EG-NEXT:     OR_INT * T0.W, PV.W, PS,
+; EG-NEXT:     MOV T9.X, PV.W,
+; EG-NEXT:     MOV * T0.Y, T2.X,
+; EG-NEXT:    ALU clause starting at 118:
+; EG-NEXT:     AND_INT T0.W, T0.Y, literal.x,
+; EG-NEXT:     LSHL * T1.W, T12.X, literal.y,
+; EG-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
+; EG-NEXT:     OR_INT * T0.W, PV.W, PS,
+; EG-NEXT:     MOV T2.X, PV.W,
+; EG-NEXT:     MOV * T0.Y, T4.X,
+; EG-NEXT:    ALU clause starting at 124:
+; EG-NEXT:     AND_INT T0.W, T0.Y, literal.x,
+; EG-NEXT:     LSHL * T1.W, T12.X, literal.y,
+; EG-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
+; EG-NEXT:     OR_INT * T0.W, PV.W, PS,
+; EG-NEXT:     MOV T4.X, PV.W,
+; EG-NEXT:     MOV * T0.Y, T6.X,
+; EG-NEXT:    ALU clause starting at 130:
+; EG-NEXT:     AND_INT T0.W, T0.Y, literal.x,
+; EG-NEXT:     LSHL * T1.W, T12.X, literal.y,
+; EG-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
+; EG-NEXT:     OR_INT * T0.W, PV.W, PS,
+; EG-NEXT:     MOV T6.X, PV.W,
+; EG-NEXT:     MOV * T0.Y, T8.X,
+; EG-NEXT:    ALU clause starting at 136:
+; EG-NEXT:     AND_INT T0.W, T0.Y, literal.x,
+; EG-NEXT:     LSHL * T1.W, T12.X, literal.y,
+; EG-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
+; EG-NEXT:     OR_INT * T0.W, PV.W, PS,
+; EG-NEXT:     MOV T8.X, PV.W,
+; EG-NEXT:     MOV * T0.Y, T2.X,
+; EG-NEXT:    ALU clause starting at 142:
+; EG-NEXT:     AND_INT T0.W, T0.Y, literal.x,
+; EG-NEXT:     AND_INT * T1.W, T12.X, literal.y,
+; EG-NEXT:    -65536(nan), 65535(9.183409e-41)
+; EG-NEXT:     OR_INT * T12.Z, PV.W, PS,
+; EG-NEXT:     MOV T2.X, PV.Z,
+; EG-NEXT:     MOV * T0.Y, T4.X,
+; EG-NEXT:    ALU clause starting at 148:
+; EG-NEXT:     AND_INT T0.W, T0.Y, literal.x,
+; EG-NEXT:     AND_INT * T1.W, T12.X, literal.y,
+; EG-NEXT:    -65536(nan), 65535(9.183409e-41)
+; EG-NEXT:     OR_INT * T12.X, PV.W, PS,
+; EG-NEXT:     MOV T4.X, PV.X,
+; EG-NEXT:     MOV * T0.Y, T6.X,
+; EG-NEXT:    ALU clause starting at 154:
+; EG-NEXT:     AND_INT T0.W, T0.Y, literal.x,
+; EG-NEXT:     AND_INT * T1.W, T13.X, literal.y,
+; EG-NEXT:    -65536(nan), 65535(9.183409e-41)
+; EG-NEXT:     OR_INT * T11.Z, PV.W, PS,
+; EG-NEXT:     MOV T6.X, PV.Z,
+; EG-NEXT:     MOV * T0.Y, T8.X,
+; EG-NEXT:    ALU clause starting at 160:
+; EG-NEXT:     LSHR T13.X, KC0[2].Y, literal.x,
 ; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
 ; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
-; EG-NEXT:     LSHR T2.X, PV.W, literal.x,
-; EG-NEXT:     MOV T7.W, T4.X,
-; EG-NEXT:     MOV * T1.W, T3.X,
-; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
+; EG-NEXT:     LSHR T14.X, PV.W, literal.x,
+; EG-NEXT:     AND_INT T0.W, T0.Y, literal.y,
+; EG-NEXT:     AND_INT * T1.W, T11.X, literal.z,
+; EG-NEXT:    2(2.802597e-45), -65536(nan)
+; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT:     OR_INT * T11.X, PV.W, PS,
+; EG-NEXT:     MOV T8.X, PV.X,
+; EG-NEXT:     MOV * T12.W, T3.X,
+; EG-NEXT:     MOV T12.Y, T5.X,
+; EG-NEXT:     MOV T11.W, T7.X, BS:VEC_120/SCL_212
+; EG-NEXT:     MOV * T11.Y, T9.X,
 ;
 ; CM-LABEL: v16i16_arg:
 ; CM:       ; %bb.0: ; %entry
-; CM-NEXT:    ALU 0, @22, KC0[], KC1[]
-; CM-NEXT:    TEX 7 @6
-; CM-NEXT:    ALU 11, @23, KC0[CB0:0-32], KC1[]
-; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T7, T2.X
-; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T1, T0.X
+; CM-NEXT:    ALU 1, @68, KC0[], KC1[]
+; CM-NEXT:    TEX 0 @36
+; CM-NEXT:    ALU 5, @70, KC0[], KC1[]
+; CM-NEXT:    TEX 0 @38
+; CM-NEXT:    ALU 5, @76, KC0[], KC1[]
+; CM-NEXT:    TEX 0 @40
+; CM-NEXT:    ALU 5, @82, KC0[], KC1[]
+; CM-NEXT:    TEX 0 @42
+; CM-NEXT:    ALU 5, @88, KC0[], KC1[]
+; CM-NEXT:    TEX 0 @44
+; CM-NEXT:    ALU 5, @94, KC0[], KC1[]
+; CM-NEXT:    TEX 0 @46
+; CM-NEXT:    ALU 5, @100, KC0[], KC1[]
+; CM-NEXT:    TEX 0 @48
+; CM-NEXT:    ALU 5, @106, KC0[], KC1[]
+; CM-NEXT:    TEX 0 @50
+; CM-NEXT:    ALU 5, @112, KC0[], KC1[]
+; CM-NEXT:    TEX 0 @52
+; CM-NEXT:    ALU 5, @118, KC0[], KC1[]
+; CM-NEXT:    TEX 0 @54
+; CM-NEXT:    ALU 5, @124, KC0[], KC1[]
+; CM-NEXT:    TEX 0 @56
+; CM-NEXT:    ALU 5, @130, KC0[], KC1[]
+; CM-NEXT:    TEX 0 @58
+; CM-NEXT:    ALU 5, @136, KC0[], KC1[]
+; CM-NEXT:    TEX 0 @60
+; CM-NEXT:    ALU 5, @142, KC0[], KC1[]
+; CM-NEXT:    TEX 0 @62
+; CM-NEXT:    ALU 5, @148, KC0[], KC1[]
+; CM-NEXT:    TEX 0 @64
+; CM-NEXT:    ALU 5, @154, KC0[], KC1[]
+; CM-NEXT:    TEX 0 @66
+; CM-NEXT:    ALU 14, @160, KC0[CB0:0-32], KC1[]
+; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T11, T14.X
+; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T12, T13.X
 ; CM-NEXT:    CF_END
-; CM-NEXT:    Fetch clause starting at 6:
-; CM-NEXT:     VTX_READ_16 T1.X, T0.X, 84, #3
-; CM-NEXT:     VTX_READ_16 T2.X, T0.X, 86, #3
-; CM-NEXT:     VTX_READ_16 T3.X, T0.X, 78, #3
-; CM-NEXT:     VTX_READ_16 T4.X, T0.X, 94, #3
-; CM-NEXT:     VTX_READ_16 T5.X, T0.X, 76, #3
-; CM-NEXT:     VTX_READ_16 T6.X, T0.X, 92, #3
-; CM-NEXT:     VTX_READ_16 T7.X, T0.X, 68, #3
-; CM-NEXT:     VTX_READ_16 T0.X, T0.X, 70, #3
-; CM-NEXT:    ALU clause starting at 22:
-; CM-NEXT:     MOV * T0.X, 0.0,
-; CM-NEXT:    ALU clause starting at 23:
-; CM-NEXT:     MOV * T1.Y, T2.X,
-; CM-NEXT:     MOV T7.Y, T0.X,
-; CM-NEXT:     MOV T1.Z, T6.X, BS:VEC_120/SCL_212
+; CM-NEXT:    Fetch clause starting at 36:
+; CM-NEXT:     VTX_READ_16 T12.X, T11.X, 98, #3
+; CM-NEXT:    Fetch clause starting at 38:
+; CM-NEXT:     VTX_READ_16 T12.X, T11.X, 90, #3
+; CM-NEXT:    Fetch clause starting at 40:
+; CM-NEXT:     VTX_READ_16 T12.X, T11.X, 82, #3
+; CM-NEXT:    Fetch clause starting at 42:
+; CM-NEXT:     VTX_READ_16 T12.X, T11.X, 74, #3
+; CM-NEXT:    Fetch clause starting at 44:
+; CM-NEXT:     VTX_READ_16 T12.X, T11.X, 96, #3
+; CM-NEXT:    Fetch clause starting at 46:
+; CM-NEXT:     VTX_READ_16 T12.X, T11.X, 88, #3
+; CM-NEXT:    Fetch clause starting at 48:
+; CM-NEXT:     VTX_READ_16 T12.X, T11.X, 80, #3
+; CM-NEXT:    Fetch clause starting at 50:
+; CM-NEXT:     VTX_READ_16 T12.X, T11.X, 72, #3
+; CM-NEXT:    Fetch clause starting at 52:
+; CM-NEXT:     VTX_READ_16 T12.X, T11.X, 94, #3
+; CM-NEXT:    Fetch clause starting at 54:
+; CM-NEXT:     VTX_READ_16 T12.X, T11.X, 86, #3
+; CM-NEXT:    Fetch clause starting at 56:
+; CM-NEXT:     VTX_READ_16 T12.X, T11.X, 78, #3
+; CM-NEXT:    Fetch clause starting at 58:
+; CM-NEXT:     VTX_READ_16 T12.X, T11.X, 70, #3
+; CM-NEXT:    Fetch clause starting at 60:
+; CM-NEXT:     VTX_READ_16 T12.X, T11.X, 92, #3
+; CM-NEXT:    Fetch clause starting at 62:
+; CM-NEXT:     VTX_READ_16 T12.X, T11.X, 84, #3
+; CM-NEXT:    Fetch clause starting at 64:
+; CM-NEXT:     VTX_READ_16 T13.X, T11.X, 76, #3
+; CM-NEXT:    Fetch clause starting at 66:
+; CM-NEXT:     VTX_READ_16 T11.X, T11.X, 68, #3
+; CM-NEXT:    ALU clause starting at 68:
+; CM-NEXT:     MOV * T0.Y, T3.X,
+; CM-NEXT:     MOV * T11.X, 0.0,
+; CM-NEXT:    ALU clause starting at 70:
+; CM-NEXT:     LSHL T0.Z, T12.X, literal.x,
+; CM-NEXT:     AND_INT * T0.W, T0.Y, literal.y,
+; CM-NEXT:    16(2.242078e-44), 65535(9.183409e-41)
+; CM-NEXT:     OR_INT * T0.W, PV.W, PV.Z,
+; CM-NEXT:     MOV T3.X, PV.W,
+; CM-NEXT:     MOV * T0.Y, T5.X,
+; CM-NEXT:    ALU clause starting at 76:
+; CM-NEXT:     AND_INT T0.Z, T0.Y, literal.x,
+; CM-NEXT:     LSHL * T0.W, T12.X, literal.y,
+; CM-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
+; CM-NEXT:     OR_INT * T0.W, PV.Z, PV.W,
+; CM-NEXT:     MOV T5.X, PV.W,
+; CM-NEXT:     MOV * T0.Y, T7.X,
+; CM-NEXT:    ALU clause starting at 82:
+; CM-NEXT:     AND_INT T0.Z, T0.Y, literal.x,
+; CM-NEXT:     LSHL * T0.W, T12.X, literal.y,
+; CM-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
+; CM-NEXT:     OR_INT * T0.W, PV.Z, PV.W,
+; CM-NEXT:     MOV T7.X, PV.W,
+; CM-NEXT:     MOV * T0.Y, T9.X,
+; CM-NEXT:    ALU clause starting at 88:
+; CM-NEXT:     AND_INT T0.Z, T0.Y, literal.x,
+; CM-NEXT:     LSHL * T0.W, T12.X, literal.y,
+; CM-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
+; CM-NEXT:     OR_INT * T0.W, PV.Z, PV.W,
+; CM-NEXT:     MOV T9.X, PV.W,
+; CM-NEXT:     MOV * T0.Y, T3.X,
+; CM-NEXT:    ALU clause starting at 94:
+; CM-NEXT:     AND_INT T0.Z, T0.Y, literal.x,
+; CM-NEXT:     AND_INT * T0.W, T12.X, literal.y,
+; CM-NEXT:    -65536(nan), 65535(9.183409e-41)
+; CM-NEXT:     OR_INT * T0.W, PV.Z, PV.W,
+; CM-NEXT:     MOV T3.X, PV.W,
+; CM-NEXT:     MOV * T0.Y, T5.X,
+; CM-NEXT:    ALU clause starting at 100:
+; CM-NEXT:     AND_INT T0.Z, T0.Y, literal.x,
+; CM-NEXT:     AND_INT * T0.W, T12.X, literal.y,
+; CM-NEXT:    -65536(nan), 65535(9.183409e-41)
+; CM-NEXT:     OR_INT * T0.W, PV.Z, PV.W,
+; CM-NEXT:     MOV T5.X, PV.W,
+; CM-NEXT:     MOV * T0.Y, T7.X,
+; CM-NEXT:    ALU clause starting at 106:
+; CM-NEXT:     AND_INT T0.Z, T0.Y, literal.x,
+; CM-NEXT:     AND_INT * T0.W, T12.X, literal.y,
+; CM-NEXT:    -65536(nan), 65535(9.183409e-41)
+; CM-NEXT:     OR_INT * T0.W, PV.Z, PV.W,
+; CM-NEXT:     MOV T7.X, PV.W,
+; CM-NEXT:     MOV * T0.Y, T9.X,
+; CM-NEXT:    ALU clause starting at 112:
+; CM-NEXT:     AND_INT T0.Z, T0.Y, literal.x,
+; CM-NEXT:     AND_INT * T0.W, T12.X, literal.y,
+; CM-NEXT:    -65536(nan), 65535(9.183409e-41)
+; CM-NEXT:     OR_INT * T0.W, PV.Z, PV.W,
+; CM-NEXT:     MOV T9.X, PV.W,
+; CM-NEXT:     MOV * T0.Y, T2.X,
+; CM-NEXT:    ALU clause starting at 118:
+; CM-NEXT:     AND_INT T0.Z, T0.Y, literal.x,
+; CM-NEXT:     LSHL * T0.W, T12.X, literal.y,
+; CM-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
+; CM-NEXT:     OR_INT * T0.W, PV.Z, PV.W,
+; CM-NEXT:     MOV T2.X, PV.W,
+; CM-NEXT:     MOV * T0.Y, T4.X,
+; CM-NEXT:    ALU clause starting at 124:
+; CM-NEXT:     AND_INT T0.Z, T0.Y, literal.x,
+; CM-NEXT:     LSHL * T0.W, T12.X, literal.y,
+; CM-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
+; CM-NEXT:     OR_INT * T0.W, PV.Z, PV.W,
+; CM-NEXT:     MOV T4.X, PV.W,
+; CM-NEXT:     MOV * T0.Y, T6.X,
+; CM-NEXT:    ALU clause starting at 130:
+; CM-NEXT:     AND_INT T0.Z, T0.Y, literal.x,
+; CM-NEXT:     LSHL * T0.W, T12.X, literal.y,
+; CM-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
+; CM-NEXT:     OR_INT * T0.W, PV.Z, PV.W,
+; CM-NEXT:     MOV T6.X, PV.W,
+; CM-NEXT:     MOV * T0.Y, T8.X,
+; CM-NEXT:    ALU clause starting at 136:
+; CM-NEXT:     AND_INT T0.Z, T0.Y, literal.x,
+; CM-NEXT:     LSHL * T0.W, T12.X, literal.y,
+; CM-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
+; CM-NEXT:     OR_INT * T0.W, PV.Z, PV.W,
+; CM-NEXT:     MOV T8.X, PV.W,
+; CM-NEXT:     MOV * T0.Y, T2.X,
+; CM-NEXT:    ALU clause starting at 142:
+; CM-NEXT:     AND_INT T0.Z, T0.Y, literal.x,
+; CM-NEXT:     AND_INT * T0.W, T12.X, literal.y,
+; CM-NEXT:    -65536(nan), 65535(9.183409e-41)
+; CM-NEXT:     OR_INT * T12.Z, PV.Z, PV.W,
+; CM-NEXT:     MOV T2.X, PV.Z,
+; CM-NEXT:     MOV * T0.Y, T4.X,
+; CM-NEXT:    ALU clause starting at 148:
+; CM-NEXT:     AND_INT T0.Z, T0.Y, literal.x,
+; CM-NEXT:     AND_INT * T0.W, T12.X, literal.y,
+; CM-NEXT:    -65536(nan), 65535(9.183409e-41)
+; CM-NEXT:     OR_INT * T12.X, PV.Z, PV.W,
+; CM-NEXT:     MOV T4.X, PV.X,
+; CM-NEXT:     MOV * T0.Y, T6.X,
+; CM-NEXT:    ALU clause starting at 154:
+; CM-NEXT:     AND_INT T0.Z, T0.Y, literal.x,
+; CM-NEXT:     AND_INT * T0.W, T13.X, literal.y,
+; CM-NEXT:    -65536(nan), 65535(9.183409e-41)
+; CM-NEXT:     OR_INT * T11.Z, PV.Z, PV.W,
+; CM-NEXT:     MOV T6.X, PV.Z,
+; CM-NEXT:     MOV * T0.Y, T8.X,
+; CM-NEXT:    ALU clause starting at 160:
 ; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.x,
 ; CM-NEXT:    16(2.242078e-44), 0(0.000000e+00)
-; CM-NEXT:     LSHR T0.X, PV.W, literal.x,
-; CM-NEXT:     MOV T7.Z, T5.X,
-; CM-NEXT:     MOV * T1.W, T4.X, BS:VEC_120/SCL_212
-; CM-NEXT:    2(2.802597e-45), 0(0.000000e+00)
-; CM-NEXT:     LSHR T2.X, KC0[2].Y, literal.x,
-; CM-NEXT:     MOV * T7.W, T3.X,
+; CM-NEXT:     LSHR * T13.X, PV.W, literal.x,
 ; CM-NEXT:    2(2.802597e-45), 0(0.000000e+00)
+; CM-NEXT:     LSHR T14.X, KC0[2].Y, literal.x,
+; CM-NEXT:     AND_INT T0.Z, T0.Y, literal.y,
+; CM-NEXT:     AND_INT * T0.W, T11.X, literal.z,
+; CM-NEXT:    2(2.802597e-45), -65536(nan)
+; CM-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
+; CM-NEXT:     OR_INT * T11.X, PV.Z, PV.W,
+; CM-NEXT:     MOV T8.X, PV.X,
+; CM-NEXT:     MOV * T12.W, T3.X,
+; CM-NEXT:     MOV T12.Y, T5.X,
+; CM-NEXT:     MOV * T11.W, T7.X, BS:VEC_120/SCL_212
+; CM-NEXT:     MOV * T11.Y, T9.X,
 entry:
   store <16 x i16> %in, ptr addrspace(1) %out
   ret void
diff --git a/llvm/test/CodeGen/AMDGPU/load-constant-i16.ll b/llvm/test/CodeGen/AMDGPU/load-constant-i16.ll
index 4491c4b766db9..8c8dd83c7a4bf 100644
--- a/llvm/test/CodeGen/AMDGPU/load-constant-i16.ll
+++ b/llvm/test/CodeGen/AMDGPU/load-constant-i16.ll
@@ -232,32 +232,38 @@ define amdgpu_kernel void @constant_load_v3i16(ptr addrspace(1) %out, ptr addrsp
 ;
 ; EG-LABEL: constant_load_v3i16:
 ; EG:       ; %bb.0: ; %entry
-; EG-NEXT:    ALU 0, @10, KC0[CB0:0-32], KC1[]
-; EG-NEXT:    TEX 1 @6
-; EG-NEXT:    ALU 14, @11, KC0[CB0:0-32], KC1[]
-; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T1.X, T3.X, 0
-; EG-NEXT:    MEM_RAT MSKOR T2.XW, T0.X
+; EG-NEXT:    ALU 0, @12, KC0[CB0:0-32], KC1[]
+; EG-NEXT:    TEX 2 @6
+; EG-NEXT:    ALU 19, @13, KC0[CB0:0-32], KC1[]
+; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T6.X, T7.X, 0
+; EG-NEXT:    MEM_RAT MSKOR T5.XW, T8.X
 ; EG-NEXT:    CF_END
 ; EG-NEXT:    Fetch clause starting at 6:
-; EG-NEXT:     VTX_READ_16 T1.X, T0.X, 0, #1
-; EG-NEXT:     VTX_READ_16 T0.X, T0.X, 4, #1
-; EG-NEXT:    ALU clause starting at 10:
-; EG-NEXT:     MOV * T0.X, KC0[2].Z,
-; EG-NEXT:    ALU clause starting at 11:
+; EG-NEXT:     VTX_READ_16 T6.X, T5.X, 0, #1
+; EG-NEXT:     VTX_READ_16 T7.X, T5.X, 2, #1
+; EG-NEXT:     VTX_READ_16 T5.X, T5.X, 4, #1
+; EG-NEXT:    ALU clause starting at 12:
+; EG-NEXT:     MOV * T5.X, KC0[2].Z,
+; EG-NEXT:    ALU clause starting at 13:
 ; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.x,
 ; EG-NEXT:    4(5.605194e-45), 0(0.000000e+00)
 ; EG-NEXT:     AND_INT T1.W, PV.W, literal.x,
-; EG-NEXT:     AND_INT * T2.W, T0.X, literal.y,
+; EG-NEXT:     AND_INT * T2.W, T5.X, literal.y,
 ; EG-NEXT:    3(4.203895e-45), 65535(9.183409e-41)
 ; EG-NEXT:     LSHL * T1.W, PV.W, literal.x,
 ; EG-NEXT:    3(4.203895e-45), 0(0.000000e+00)
-; EG-NEXT:     LSHL T2.X, T2.W, PV.W,
-; EG-NEXT:     LSHL * T2.W, literal.x, PV.W,
+; EG-NEXT:     LSHL T5.X, T2.W, PV.W,
+; EG-NEXT:     LSHL * T5.W, literal.x, PV.W,
+; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT:     MOV T5.Y, 0.0,
+; EG-NEXT:     MOV * T5.Z, 0.0,
+; EG-NEXT:     LSHR T8.X, T0.W, literal.x,
+; EG-NEXT:     LSHL T0.W, T7.X, literal.y,
+; EG-NEXT:     AND_INT * T1.W, T6.X, literal.z,
+; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
 ; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
-; EG-NEXT:     MOV T2.Y, 0.0,
-; EG-NEXT:     MOV * T2.Z, 0.0,
-; EG-NEXT:     LSHR T0.X, T0.W, literal.x,
-; EG-NEXT:     LSHR * T3.X, KC0[2].Y, literal.x,
+; EG-NEXT:     OR_INT T6.X, PV.W, PS,
+; EG-NEXT:     LSHR * T7.X, KC0[2].Y, literal.x,
 ; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
 ;
 ; GFX12-LABEL: constant_load_v3i16:
diff --git a/llvm/test/CodeGen/AMDGPU/load-constant-i8.ll b/llvm/test/CodeGen/AMDGPU/load-constant-i8.ll
index b39b38a420233..5c4bc95578bb4 100644
--- a/llvm/test/CodeGen/AMDGPU/load-constant-i8.ll
+++ b/llvm/test/CodeGen/AMDGPU/load-constant-i8.ll
@@ -9832,24 +9832,50 @@ define amdgpu_kernel void @constant_zextload_v4i8_to_v4i16(ptr addrspace(1) %out
 ;
 ; EG-LABEL: constant_zextload_v4i8_to_v4i16:
 ; EG:       ; %bb.0:
-; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
+; EG-NEXT:    ALU 1, @8, KC0[CB0:0-32], KC1[]
 ; EG-NEXT:    TEX 0 @6
-; EG-NEXT:    ALU 6, @9, KC0[CB0:0-32], KC1[]
-; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T4.XY, T5.X, 1
+; EG-NEXT:    ALU 31, @10, KC0[CB0:0-32], KC1[]
+; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T8.XY, T7.X, 1
 ; EG-NEXT:    CF_END
 ; EG-NEXT:    PAD
 ; EG-NEXT:    Fetch clause starting at 6:
-; EG-NEXT:     VTX_READ_32 T4.X, T4.X, 0, #1
+; EG-NEXT:     VTX_READ_32 T7.X, T7.X, 0, #1
 ; EG-NEXT:    ALU clause starting at 8:
-; EG-NEXT:     MOV * T4.X, KC0[2].Z,
-; EG-NEXT:    ALU clause starting at 9:
+; EG-NEXT:     MOV * T0.Y, T4.X,
+; EG-NEXT:     MOV * T7.X, KC0[2].Z,
+; EG-NEXT:    ALU clause starting at 10:
+; EG-NEXT:     AND_INT T0.W, T7.X, literal.x,
+; EG-NEXT:     AND_INT * T1.W, T0.Y, literal.y,
+; EG-NEXT:    255(3.573311e-43), -65536(nan)
+; EG-NEXT:     OR_INT * T0.W, PS, PV.W,
+; EG-NEXT:     MOV * T4.X, PV.W,
+; EG-NEXT:     MOV T0.Y, PV.X,
+; EG-NEXT:     LSHL * T0.W, T7.X, literal.x,
+; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT:     AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT:     AND_INT * T0.W, PV.W, literal.y,
+; EG-NEXT:    65535(9.183409e-41), 16711680(2.341805e-38)
+; EG-NEXT:     OR_INT * T0.W, PV.W, PS,
+; EG-NEXT:     MOV T4.X, PV.W,
+; EG-NEXT:     MOV T0.Y, T5.X,
 ; EG-NEXT:     MOV * T0.W, literal.x,
 ; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
-; EG-NEXT:     BFE_UINT * T4.Y, T4.X, literal.x, PV.W,
+; EG-NEXT:     BFE_UINT T0.W, T7.X, literal.x, PV.W,
+; EG-NEXT:     AND_INT * T1.W, PV.Y, literal.y,
+; EG-NEXT:    16(2.242078e-44), -65536(nan)
+; EG-NEXT:     OR_INT * T0.W, PS, PV.W,
+; EG-NEXT:     MOV * T5.X, PV.W,
+; EG-NEXT:     MOV T0.Y, PV.X,
+; EG-NEXT:     LSHR * T0.W, T7.X, literal.x,
 ; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
-; EG-NEXT:     AND_INT T4.X, T4.X, literal.x,
-; EG-NEXT:     LSHR * T5.X, KC0[2].Y, literal.y,
-; EG-NEXT:    255(3.573311e-43), 2(2.802597e-45)
+; EG-NEXT:     AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT:     AND_INT * T0.W, PV.W, literal.y,
+; EG-NEXT:    65535(9.183409e-41), 16711680(2.341805e-38)
+; EG-NEXT:     LSHR T7.X, KC0[2].Y, literal.x,
+; EG-NEXT:     OR_INT * T8.Y, PV.W, PS,
+; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
+; EG-NEXT:     MOV T5.X, PV.Y,
+; EG-NEXT:     MOV * T8.X, T4.X,
 ;
 ; GFX12-LABEL: constant_zextload_v4i8_to_v4i16:
 ; GFX12:       ; %bb.0:
@@ -9951,23 +9977,56 @@ define amdgpu_kernel void @constant_sextload_v4i8_to_v4i16(ptr addrspace(1) %out
 ;
 ; EG-LABEL: constant_sextload_v4i8_to_v4i16:
 ; EG:       ; %bb.0:
-; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
+; EG-NEXT:    ALU 1, @8, KC0[CB0:0-32], KC1[]
 ; EG-NEXT:    TEX 0 @6
-; EG-NEXT:    ALU 5, @9, KC0[CB0:0-32], KC1[]
-; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T5.XY, T4.X, 1
+; EG-NEXT:    ALU 37, @10, KC0[CB0:0-32], KC1[]
+; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T8.XY, T7.X, 1
 ; EG-NEXT:    CF_END
 ; EG-NEXT:    PAD
 ; EG-NEXT:    Fetch clause starting at 6:
-; EG-NEXT:     VTX_READ_32 T4.X, T4.X, 0, #1
+; EG-NEXT:     VTX_READ_32 T7.X, T7.X, 0, #1
 ; EG-NEXT:    ALU clause starting at 8:
-; EG-NEXT:     MOV * T4.X, KC0[2].Z,
-; EG-NEXT:    ALU clause starting at 9:
-; EG-NEXT:     BFE_INT T5.X, T4.X, 0.0, literal.x,
-; EG-NEXT:     LSHR T0.W, T4.X, literal.x,
-; EG-NEXT:     LSHR * T4.X, KC0[2].Y, literal.y,
-; EG-NEXT:    8(1.121039e-44), 2(2.802597e-45)
-; EG-NEXT:     BFE_INT * T5.Y, PV.W, 0.0, literal.x,
+; EG-NEXT:     MOV * T0.Y, T4.X,
+; EG-NEXT:     MOV * T7.X, KC0[2].Z,
+; EG-NEXT:    ALU clause starting at 10:
+; EG-NEXT:     BFE_INT * T0.W, T7.X, 0.0, literal.x,
 ; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT:     AND_INT T0.W, PV.W, literal.x,
+; EG-NEXT:     AND_INT * T1.W, T0.Y, literal.y,
+; EG-NEXT:    65535(9.183409e-41), -65536(nan)
+; EG-NEXT:     OR_INT * T0.W, PS, PV.W,
+; EG-NEXT:     MOV * T4.X, PV.W,
+; EG-NEXT:     MOV T0.Y, PV.X,
+; EG-NEXT:     LSHR * T0.W, T7.X, literal.x,
+; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT:     BFE_INT T0.W, PV.W, 0.0, literal.x,
+; EG-NEXT:     AND_INT * T1.W, PV.Y, literal.y,
+; EG-NEXT:    8(1.121039e-44), 65535(9.183409e-41)
+; EG-NEXT:     LSHL * T0.W, PV.W, literal.x,
+; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT:     OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT:     MOV T4.X, PV.W,
+; EG-NEXT:     MOV T0.Y, T5.X,
+; EG-NEXT:     LSHR * T0.W, T7.X, literal.x, BS:VEC_120/SCL_212
+; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT:     BFE_INT T0.W, PV.W, 0.0, literal.x,
+; EG-NEXT:     AND_INT * T1.W, PV.Y, literal.y,
+; EG-NEXT:    8(1.121039e-44), -65536(nan)
+; EG-NEXT:     AND_INT * T0.W, PV.W, literal.x,
+; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT:     OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT:     MOV * T5.X, PV.W,
+; EG-NEXT:     MOV T0.Y, PV.X,
+; EG-NEXT:     ASHR * T0.W, T7.X, literal.x,
+; EG-NEXT:    24(3.363116e-44), 0(0.000000e+00)
+; EG-NEXT:     AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT:     LSHL * T0.W, PV.W, literal.y,
+; EG-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
+; EG-NEXT:     LSHR T7.X, KC0[2].Y, literal.x,
+; EG-NEXT:     OR_INT * T8.Y, PV.W, PS,
+; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
+; EG-NEXT:     MOV T5.X, PV.Y,
+; EG-NEXT:     MOV * T8.X, T4.X,
 ;
 ; GFX12-LABEL: constant_sextload_v4i8_to_v4i16:
 ; GFX12:       ; %bb.0:
@@ -10088,27 +10147,80 @@ define amdgpu_kernel void @constant_zextload_v8i8_to_v8i16(ptr addrspace(1) %out
 ;
 ; EG-LABEL: constant_zextload_v8i8_to_v8i16:
 ; EG:       ; %bb.0:
-; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
+; EG-NEXT:    ALU 1, @8, KC0[CB0:0-32], KC1[]
 ; EG-NEXT:    TEX 0 @6
-; EG-NEXT:    ALU 9, @9, KC0[CB0:0-32], KC1[]
-; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T6.XYZW, T5.X, 1
+; EG-NEXT:    ALU 61, @10, KC0[CB0:0-32], KC1[]
+; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T12.XYZW, T11.X, 1
 ; EG-NEXT:    CF_END
 ; EG-NEXT:    PAD
 ; EG-NEXT:    Fetch clause starting at 6:
-; EG-NEXT:     VTX_READ_64 T5.XY, T5.X, 0, #1
+; EG-NEXT:     VTX_READ_64 T11.XY, T11.X, 0, #1
 ; EG-NEXT:    ALU clause starting at 8:
-; EG-NEXT:     MOV * T5.X, KC0[2].Z,
-; EG-NEXT:    ALU clause starting at 9:
+; EG-NEXT:     MOV * T0.Y, T8.X,
+; EG-NEXT:     MOV * T11.X, KC0[2].Z,
+; EG-NEXT:    ALU clause starting at 10:
+; EG-NEXT:     AND_INT T0.W, T11.X, literal.x,
+; EG-NEXT:     AND_INT * T1.W, T0.Y, literal.y,
+; EG-NEXT:    255(3.573311e-43), -65536(nan)
+; EG-NEXT:     OR_INT * T0.W, PS, PV.W,
+; EG-NEXT:     MOV * T8.X, PV.W,
+; EG-NEXT:     MOV T0.Y, PV.X,
+; EG-NEXT:     LSHL * T0.W, T11.X, literal.x,
+; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT:     AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT:     AND_INT * T0.W, PV.W, literal.y,
+; EG-NEXT:    65535(9.183409e-41), 16711680(2.341805e-38)
+; EG-NEXT:     OR_INT * T0.W, PV.W, PS,
+; EG-NEXT:     MOV T8.X, PV.W,
+; EG-NEXT:     MOV T0.Y, T9.X,
 ; EG-NEXT:     MOV * T0.W, literal.x,
 ; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
-; EG-NEXT:     BFE_UINT * T6.W, T5.Y, literal.x, PV.W,
+; EG-NEXT:     BFE_UINT T1.W, T11.X, literal.x, PV.W,
+; EG-NEXT:     AND_INT * T2.W, PV.Y, literal.y,
+; EG-NEXT:    16(2.242078e-44), -65536(nan)
+; EG-NEXT:     OR_INT * T1.W, PS, PV.W,
+; EG-NEXT:     MOV * T9.X, PV.W,
+; EG-NEXT:     MOV T0.Y, PV.X,
+; EG-NEXT:     LSHR * T1.W, T11.X, literal.x,
+; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT:     AND_INT T2.W, PV.Y, literal.x,
+; EG-NEXT:     AND_INT * T1.W, PV.W, literal.y,
+; EG-NEXT:    65535(9.183409e-41), 16711680(2.341805e-38)
+; EG-NEXT:     OR_INT * T12.Y, PV.W, PS,
+; EG-NEXT:     MOV T9.X, PV.Y,
+; EG-NEXT:     MOV * T0.Y, T4.X,
+; EG-NEXT:     AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT:     AND_INT * T2.W, T11.Y, literal.y,
+; EG-NEXT:    -65536(nan), 255(3.573311e-43)
+; EG-NEXT:     OR_INT * T1.W, PV.W, PS,
+; EG-NEXT:     MOV * T4.X, PV.W,
+; EG-NEXT:     MOV T0.Y, PV.X,
+; EG-NEXT:     LSHL * T1.W, T11.Y, literal.x,
 ; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
-; EG-NEXT:     BFE_UINT T6.Y, T5.X, literal.x, T0.W,
-; EG-NEXT:     AND_INT * T6.Z, T5.Y, literal.y,
-; EG-NEXT:    8(1.121039e-44), 255(3.573311e-43)
-; EG-NEXT:     AND_INT T6.X, T5.X, literal.x,
-; EG-NEXT:     LSHR * T5.X, KC0[2].Y, literal.y,
-; EG-NEXT:    255(3.573311e-43), 2(2.802597e-45)
+; EG-NEXT:     AND_INT T2.W, PV.Y, literal.x,
+; EG-NEXT:     AND_INT * T1.W, PV.W, literal.y,
+; EG-NEXT:    65535(9.183409e-41), 16711680(2.341805e-38)
+; EG-NEXT:     OR_INT * T1.W, PV.W, PS,
+; EG-NEXT:     MOV T4.X, PV.W,
+; EG-NEXT:     MOV T0.Y, T5.X,
+; EG-NEXT:     BFE_UINT * T0.W, T11.Y, literal.x, T0.W,
+; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT:     AND_INT * T1.W, PV.Y, literal.x,
+; EG-NEXT:    -65536(nan), 0(0.000000e+00)
+; EG-NEXT:     OR_INT * T0.W, PV.W, T0.W,
+; EG-NEXT:     MOV * T5.X, PV.W,
+; EG-NEXT:     MOV T0.Y, PV.X,
+; EG-NEXT:     LSHR * T0.W, T11.Y, literal.x,
+; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT:     AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT:     AND_INT * T0.W, PV.W, literal.y,
+; EG-NEXT:    65535(9.183409e-41), 16711680(2.341805e-38)
+; EG-NEXT:     LSHR T11.X, KC0[2].Y, literal.x,
+; EG-NEXT:     OR_INT * T12.W, PV.W, PS,
+; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
+; EG-NEXT:     MOV T5.X, PV.W,
+; EG-NEXT:     MOV * T12.X, T8.X,
+; EG-NEXT:     MOV * T12.Z, T4.X,
 ;
 ; GFX12-LABEL: constant_zextload_v8i8_to_v8i16:
 ; GFX12:       ; %bb.0:
@@ -10255,28 +10367,93 @@ define amdgpu_kernel void @constant_sextload_v8i8_to_v8i16(ptr addrspace(1) %out
 ;
 ; EG-LABEL: constant_sextload_v8i8_to_v8i16:
 ; EG:       ; %bb.0:
-; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
+; EG-NEXT:    ALU 1, @8, KC0[CB0:0-32], KC1[]
 ; EG-NEXT:    TEX 0 @6
-; EG-NEXT:    ALU 10, @9, KC0[CB0:0-32], KC1[]
-; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T6.XYZW, T5.X, 1
+; EG-NEXT:    ALU 74, @10, KC0[CB0:0-32], KC1[]
+; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T12.XYZW, T11.X, 1
 ; EG-NEXT:    CF_END
 ; EG-NEXT:    PAD
 ; EG-NEXT:    Fetch clause starting at 6:
-; EG-NEXT:     VTX_READ_64 T5.XY, T5.X, 0, #1
+; EG-NEXT:     VTX_READ_64 T11.XY, T11.X, 0, #1
 ; EG-NEXT:    ALU clause starting at 8:
-; EG-NEXT:     MOV * T5.X, KC0[2].Z,
-; EG-NEXT:    ALU clause starting at 9:
-; EG-NEXT:     BFE_INT * T6.Z, T5.Y, 0.0, literal.x,
+; EG-NEXT:     MOV * T0.Y, T8.X,
+; EG-NEXT:     MOV * T11.X, KC0[2].Z,
+; EG-NEXT:    ALU clause starting at 10:
+; EG-NEXT:     BFE_INT * T0.W, T11.X, 0.0, literal.x,
 ; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
-; EG-NEXT:     BFE_INT T6.X, T5.X, 0.0, literal.x,
-; EG-NEXT:     LSHR * T0.W, T5.Y, literal.x,
+; EG-NEXT:     AND_INT T0.W, PV.W, literal.x,
+; EG-NEXT:     AND_INT * T1.W, T0.Y, literal.y,
+; EG-NEXT:    65535(9.183409e-41), -65536(nan)
+; EG-NEXT:     OR_INT * T0.W, PS, PV.W,
+; EG-NEXT:     MOV * T8.X, PV.W,
+; EG-NEXT:     MOV T0.Y, PV.X,
+; EG-NEXT:     LSHR * T0.W, T11.X, literal.x,
 ; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
-; EG-NEXT:     BFE_INT T6.W, PV.W, 0.0, literal.x,
-; EG-NEXT:     LSHR * T0.W, T5.X, literal.x,
+; EG-NEXT:     BFE_INT T0.W, PV.W, 0.0, literal.x,
+; EG-NEXT:     AND_INT * T1.W, PV.Y, literal.y,
+; EG-NEXT:    8(1.121039e-44), 65535(9.183409e-41)
+; EG-NEXT:     LSHL * T0.W, PV.W, literal.x,
+; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT:     OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT:     MOV T8.X, PV.W,
+; EG-NEXT:     MOV T0.Y, T9.X,
+; EG-NEXT:     LSHR * T0.W, T11.X, literal.x, BS:VEC_120/SCL_212
+; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT:     BFE_INT T0.W, PV.W, 0.0, literal.x,
+; EG-NEXT:     AND_INT * T1.W, PV.Y, literal.y,
+; EG-NEXT:    8(1.121039e-44), -65536(nan)
+; EG-NEXT:     AND_INT * T0.W, PV.W, literal.x,
+; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT:     OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT:     MOV * T9.X, PV.W,
+; EG-NEXT:     MOV T0.Y, PV.X,
+; EG-NEXT:     ASHR * T0.W, T11.X, literal.x,
+; EG-NEXT:    24(3.363116e-44), 0(0.000000e+00)
+; EG-NEXT:     AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT:     LSHL * T0.W, PV.W, literal.y,
+; EG-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
+; EG-NEXT:     OR_INT * T12.Y, PV.W, PS,
+; EG-NEXT:     MOV T9.X, PV.Y,
+; EG-NEXT:     MOV T0.Y, T4.X,
+; EG-NEXT:     BFE_INT * T0.W, T11.Y, 0.0, literal.x,
 ; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
-; EG-NEXT:     LSHR T5.X, KC0[2].Y, literal.x,
-; EG-NEXT:     BFE_INT * T6.Y, PS, 0.0, literal.y,
-; EG-NEXT:    2(2.802597e-45), 8(1.121039e-44)
+; EG-NEXT:     AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT:     AND_INT * T0.W, PV.W, literal.y,
+; EG-NEXT:    -65536(nan), 65535(9.183409e-41)
+; EG-NEXT:     OR_INT * T0.W, PV.W, PS,
+; EG-NEXT:     MOV * T4.X, PV.W,
+; EG-NEXT:     MOV T0.Y, PV.X,
+; EG-NEXT:     LSHR * T0.W, T11.Y, literal.x,
+; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT:     BFE_INT T0.W, PV.W, 0.0, literal.x,
+; EG-NEXT:     AND_INT * T1.W, PV.Y, literal.y,
+; EG-NEXT:    8(1.121039e-44), 65535(9.183409e-41)
+; EG-NEXT:     LSHL * T0.W, PV.W, literal.x,
+; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT:     OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT:     MOV T4.X, PV.W,
+; EG-NEXT:     MOV T0.Y, T5.X,
+; EG-NEXT:     LSHR * T0.W, T11.Y, literal.x,
+; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT:     BFE_INT T0.W, PV.W, 0.0, literal.x,
+; EG-NEXT:     AND_INT * T1.W, PV.Y, literal.y,
+; EG-NEXT:    8(1.121039e-44), -65536(nan)
+; EG-NEXT:     AND_INT * T0.W, PV.W, literal.x,
+; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT:     OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT:     MOV * T5.X, PV.W,
+; EG-NEXT:     MOV T0.Y, PV.X,
+; EG-NEXT:     ASHR * T0.W, T11.Y, literal.x,
+; EG-NEXT:    24(3.363116e-44), 0(0.000000e+00)
+; EG-NEXT:     AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT:     LSHL * T0.W, PV.W, literal.y,
+; EG-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
+; EG-NEXT:     LSHR T11.X, KC0[2].Y, literal.x,
+; EG-NEXT:     OR_INT * T12.W, PV.W, PS,
+; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
+; EG-NEXT:     MOV T5.X, PV.W,
+; EG-NEXT:     MOV * T12.X, T8.X,
+; EG-NEXT:     MOV * T12.Z, T4.X,
 ;
 ; GFX12-LABEL: constant_sextload_v8i8_to_v8i16:
 ; GFX12:       ; %bb.0:
@@ -10472,37 +10649,146 @@ define amdgpu_kernel void @constant_zextload_v16i8_to_v16i16(ptr addrspace(1) %o
 ;
 ; EG-LABEL: constant_zextload_v16i8_to_v16i16:
 ; EG:       ; %bb.0:
-; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
-; EG-NEXT:    TEX 0 @6
-; EG-NEXT:    ALU 19, @9, KC0[CB0:0-32], KC1[]
-; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T9.XYZW, T10.X, 0
-; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T8.XYZW, T7.X, 1
+; EG-NEXT:    ALU 1, @10, KC0[CB0:0-32], KC1[]
+; EG-NEXT:    TEX 0 @8
+; EG-NEXT:    ALU 103, @12, KC0[], KC1[]
+; EG-NEXT:    ALU 20, @116, KC0[CB0:0-32], KC1[]
+; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T20.XYZW, T22.X, 0
+; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T19.XYZW, T21.X, 1
 ; EG-NEXT:    CF_END
-; EG-NEXT:    Fetch clause starting at 6:
-; EG-NEXT:     VTX_READ_128 T7.XYZW, T7.X, 0, #1
-; EG-NEXT:    ALU clause starting at 8:
-; EG-NEXT:     MOV * T7.X, KC0[2].Z,
-; EG-NEXT:    ALU clause starting at 9:
+; EG-NEXT:    PAD
+; EG-NEXT:    Fetch clause starting at 8:
+; EG-NEXT:     VTX_READ_128 T19.XYZW, T19.X, 0, #1
+; EG-NEXT:    ALU clause starting at 10:
+; EG-NEXT:     MOV * T0.Y, T16.X,
+; EG-NEXT:     MOV * T19.X, KC0[2].Z,
+; EG-NEXT:    ALU clause starting at 12:
+; EG-NEXT:     AND_INT T0.W, T19.X, literal.x,
+; EG-NEXT:     AND_INT * T1.W, T0.Y, literal.y,
+; EG-NEXT:    255(3.573311e-43), -65536(nan)
+; EG-NEXT:     OR_INT * T0.W, PS, PV.W,
+; EG-NEXT:     MOV * T16.X, PV.W,
+; EG-NEXT:     MOV T0.Y, PV.X,
+; EG-NEXT:     LSHL * T0.W, T19.X, literal.x,
+; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT:     AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT:     AND_INT * T0.W, PV.W, literal.y,
+; EG-NEXT:    65535(9.183409e-41), 16711680(2.341805e-38)
+; EG-NEXT:     OR_INT * T0.W, PV.W, PS,
+; EG-NEXT:     MOV T16.X, PV.W,
+; EG-NEXT:     MOV T0.Y, T17.X,
 ; EG-NEXT:     MOV * T0.W, literal.x,
 ; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
-; EG-NEXT:     BFE_UINT * T8.W, T7.Y, literal.x, PV.W,
+; EG-NEXT:     BFE_UINT T1.W, T19.X, literal.x, PV.W,
+; EG-NEXT:     AND_INT * T2.W, PV.Y, literal.y,
+; EG-NEXT:    16(2.242078e-44), -65536(nan)
+; EG-NEXT:     OR_INT * T1.W, PS, PV.W,
+; EG-NEXT:     MOV * T17.X, PV.W,
+; EG-NEXT:     MOV T0.Y, PV.X,
+; EG-NEXT:     LSHR * T1.W, T19.X, literal.x,
 ; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
-; EG-NEXT:     BFE_UINT T8.Y, T7.X, literal.x, T0.W,
-; EG-NEXT:     AND_INT T8.Z, T7.Y, literal.y,
-; EG-NEXT:     BFE_UINT * T9.W, T7.W, literal.x, T0.W,
-; EG-NEXT:    8(1.121039e-44), 255(3.573311e-43)
-; EG-NEXT:     AND_INT T8.X, T7.X, literal.x,
-; EG-NEXT:     BFE_UINT T9.Y, T7.Z, literal.y, T0.W,
-; EG-NEXT:     LSHR * T7.X, KC0[2].Y, literal.z,
-; EG-NEXT:    255(3.573311e-43), 8(1.121039e-44)
-; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
-; EG-NEXT:     AND_INT * T9.Z, T7.W, literal.x,
-; EG-NEXT:    255(3.573311e-43), 0(0.000000e+00)
-; EG-NEXT:     AND_INT T9.X, T7.Z, literal.x,
-; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
-; EG-NEXT:    255(3.573311e-43), 16(2.242078e-44)
-; EG-NEXT:     LSHR * T10.X, PV.W, literal.x,
+; EG-NEXT:     AND_INT T2.W, PV.Y, literal.x,
+; EG-NEXT:     AND_INT * T1.W, PV.W, literal.y,
+; EG-NEXT:    65535(9.183409e-41), 16711680(2.341805e-38)
+; EG-NEXT:     OR_INT * T20.Y, PV.W, PS,
+; EG-NEXT:     MOV T17.X, PV.Y,
+; EG-NEXT:     MOV * T0.Y, T12.X,
+; EG-NEXT:     AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT:     AND_INT * T2.W, T19.Y, literal.y,
+; EG-NEXT:    -65536(nan), 255(3.573311e-43)
+; EG-NEXT:     OR_INT * T1.W, PV.W, PS,
+; EG-NEXT:     MOV * T12.X, PV.W,
+; EG-NEXT:     MOV T0.Y, PV.X,
+; EG-NEXT:     LSHL * T1.W, T19.Y, literal.x,
+; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT:     AND_INT T2.W, PV.Y, literal.x,
+; EG-NEXT:     AND_INT * T1.W, PV.W, literal.y,
+; EG-NEXT:    65535(9.183409e-41), 16711680(2.341805e-38)
+; EG-NEXT:     OR_INT * T1.W, PV.W, PS,
+; EG-NEXT:     MOV T12.X, PV.W,
+; EG-NEXT:     MOV T0.Y, T13.X,
+; EG-NEXT:     BFE_UINT * T1.W, T19.Y, literal.x, T0.W,
+; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT:     AND_INT * T2.W, PV.Y, literal.x,
+; EG-NEXT:    -65536(nan), 0(0.000000e+00)
+; EG-NEXT:     OR_INT * T1.W, PV.W, T1.W,
+; EG-NEXT:     MOV * T13.X, PV.W,
+; EG-NEXT:     MOV T0.Y, PV.X,
+; EG-NEXT:     LSHR * T1.W, T19.Y, literal.x,
+; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT:     AND_INT T2.W, PV.Y, literal.x,
+; EG-NEXT:     AND_INT * T1.W, PV.W, literal.y,
+; EG-NEXT:    65535(9.183409e-41), 16711680(2.341805e-38)
+; EG-NEXT:     OR_INT * T20.W, PV.W, PS,
+; EG-NEXT:     MOV T13.X, PV.W,
+; EG-NEXT:     MOV * T0.Y, T8.X,
+; EG-NEXT:     AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT:     AND_INT * T2.W, T19.Z, literal.y,
+; EG-NEXT:    -65536(nan), 255(3.573311e-43)
+; EG-NEXT:     OR_INT * T1.W, PV.W, PS,
+; EG-NEXT:     MOV * T8.X, PV.W,
+; EG-NEXT:     MOV T0.Y, PV.X,
+; EG-NEXT:     LSHL * T1.W, T19.Z, literal.x,
+; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT:     AND_INT T2.W, PV.Y, literal.x,
+; EG-NEXT:     AND_INT * T1.W, PV.W, literal.y,
+; EG-NEXT:    65535(9.183409e-41), 16711680(2.341805e-38)
+; EG-NEXT:     OR_INT * T1.W, PV.W, PS,
+; EG-NEXT:     MOV T8.X, PV.W,
+; EG-NEXT:     MOV T0.Y, T9.X,
+; EG-NEXT:     BFE_UINT * T1.W, T19.Z, literal.x, T0.W,
+; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT:     AND_INT * T2.W, PV.Y, literal.x,
+; EG-NEXT:    -65536(nan), 0(0.000000e+00)
+; EG-NEXT:     OR_INT * T1.W, PV.W, T1.W,
+; EG-NEXT:     MOV * T9.X, PV.W,
+; EG-NEXT:     MOV T0.Y, PV.X,
+; EG-NEXT:     LSHR * T1.W, T19.Z, literal.x,
+; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT:     AND_INT T2.W, PV.Y, literal.x,
+; EG-NEXT:     AND_INT * T1.W, PV.W, literal.y,
+; EG-NEXT:    65535(9.183409e-41), 16711680(2.341805e-38)
+; EG-NEXT:     OR_INT * T19.Y, PV.W, PS,
+; EG-NEXT:     MOV T9.X, PV.Y,
+; EG-NEXT:     MOV * T0.Y, T4.X,
+; EG-NEXT:     AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT:     AND_INT * T2.W, T19.W, literal.y,
+; EG-NEXT:    -65536(nan), 255(3.573311e-43)
+; EG-NEXT:     OR_INT * T1.W, PV.W, PS,
+; EG-NEXT:     MOV * T4.X, PV.W,
+; EG-NEXT:     MOV T0.Y, PV.X,
+; EG-NEXT:     LSHL * T1.W, T19.W, literal.x,
+; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT:     AND_INT T2.W, PV.Y, literal.x,
+; EG-NEXT:     AND_INT * T1.W, PV.W, literal.y,
+; EG-NEXT:    65535(9.183409e-41), 16711680(2.341805e-38)
+; EG-NEXT:     OR_INT * T1.W, PV.W, PS,
+; EG-NEXT:     MOV T4.X, PV.W,
+; EG-NEXT:     MOV T0.Y, T5.X,
+; EG-NEXT:     BFE_UINT * T0.W, T19.W, literal.x, T0.W,
+; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT:    ALU clause starting at 116:
+; EG-NEXT:     AND_INT * T1.W, T0.Y, literal.x,
+; EG-NEXT:    -65536(nan), 0(0.000000e+00)
+; EG-NEXT:     OR_INT * T0.W, PV.W, T0.W,
+; EG-NEXT:     MOV * T5.X, PV.W,
+; EG-NEXT:     MOV T0.Y, PV.X,
+; EG-NEXT:     LSHR T0.W, T19.W, literal.x,
+; EG-NEXT:     ADD_INT * T1.W, KC0[2].Y, literal.y,
+; EG-NEXT:    8(1.121039e-44), 16(2.242078e-44)
+; EG-NEXT:     LSHR T21.X, PS, literal.x,
+; EG-NEXT:     AND_INT T1.W, PV.Y, literal.y,
+; EG-NEXT:     AND_INT * T0.W, PV.W, literal.z,
+; EG-NEXT:    2(2.802597e-45), 65535(9.183409e-41)
+; EG-NEXT:    16711680(2.341805e-38), 0(0.000000e+00)
+; EG-NEXT:     LSHR T22.X, KC0[2].Y, literal.x,
+; EG-NEXT:     OR_INT * T19.W, PV.W, PS,
 ; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
+; EG-NEXT:     MOV T5.X, PV.W,
+; EG-NEXT:     MOV * T20.X, T16.X,
+; EG-NEXT:     MOV * T20.Z, T12.X,
+; EG-NEXT:     MOV T19.X, T8.X,
+; EG-NEXT:     MOV * T19.Z, T4.X, BS:VEC_120/SCL_212
 ;
 ; GFX12-LABEL: constant_zextload_v16i8_to_v16i16:
 ; GFX12:       ; %bb.0:
@@ -10753,38 +11039,173 @@ define amdgpu_kernel void @constant_sextload_v16i8_to_v16i16(ptr addrspace(1) %o
 ;
 ; EG-LABEL: constant_sextload_v16i8_to_v16i16:
 ; EG:       ; %bb.0:
-; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
-; EG-NEXT:    TEX 0 @6
-; EG-NEXT:    ALU 20, @9, KC0[CB0:0-32], KC1[]
-; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T9.XYZW, T10.X, 0
-; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T8.XYZW, T7.X, 1
+; EG-NEXT:    ALU 1, @10, KC0[CB0:0-32], KC1[]
+; EG-NEXT:    TEX 0 @8
+; EG-NEXT:    ALU 104, @12, KC0[], KC1[]
+; EG-NEXT:    ALU 46, @117, KC0[CB0:0-32], KC1[]
+; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T20.XYZW, T22.X, 0
+; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T19.XYZW, T21.X, 1
 ; EG-NEXT:    CF_END
-; EG-NEXT:    Fetch clause starting at 6:
-; EG-NEXT:     VTX_READ_128 T7.XYZW, T7.X, 0, #1
-; EG-NEXT:    ALU clause starting at 8:
-; EG-NEXT:     MOV * T7.X, KC0[2].Z,
-; EG-NEXT:    ALU clause starting at 9:
-; EG-NEXT:     BFE_INT * T8.Z, T7.Y, 0.0, literal.x,
+; EG-NEXT:    PAD
+; EG-NEXT:    Fetch clause starting at 8:
+; EG-NEXT:     VTX_READ_128 T19.XYZW, T19.X, 0, #1
+; EG-NEXT:    ALU clause starting at 10:
+; EG-NEXT:     MOV * T0.Y, T16.X,
+; EG-NEXT:     MOV * T19.X, KC0[2].Z,
+; EG-NEXT:    ALU clause starting at 12:
+; EG-NEXT:     BFE_INT * T0.W, T19.X, 0.0, literal.x,
+; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT:     AND_INT T0.W, PV.W, literal.x,
+; EG-NEXT:     AND_INT * T1.W, T0.Y, literal.y,
+; EG-NEXT:    65535(9.183409e-41), -65536(nan)
+; EG-NEXT:     OR_INT * T0.W, PS, PV.W,
+; EG-NEXT:     MOV * T16.X, PV.W,
+; EG-NEXT:     MOV T0.Y, PV.X,
+; EG-NEXT:     LSHR * T0.W, T19.X, literal.x,
 ; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
-; EG-NEXT:     BFE_INT T8.X, T7.X, 0.0, literal.x,
-; EG-NEXT:     BFE_INT T9.Z, T7.W, 0.0, literal.x,
-; EG-NEXT:     LSHR * T0.W, T7.Y, literal.x,
+; EG-NEXT:     BFE_INT T0.W, PV.W, 0.0, literal.x,
+; EG-NEXT:     AND_INT * T1.W, PV.Y, literal.y,
+; EG-NEXT:    8(1.121039e-44), 65535(9.183409e-41)
+; EG-NEXT:     LSHL * T0.W, PV.W, literal.x,
+; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT:     OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT:     MOV T16.X, PV.W,
+; EG-NEXT:     MOV T0.Y, T17.X,
+; EG-NEXT:     LSHR * T0.W, T19.X, literal.x, BS:VEC_120/SCL_212
+; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT:     BFE_INT T0.W, PV.W, 0.0, literal.x,
+; EG-NEXT:     AND_INT * T1.W, PV.Y, literal.y,
+; EG-NEXT:    8(1.121039e-44), -65536(nan)
+; EG-NEXT:     AND_INT * T0.W, PV.W, literal.x,
+; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT:     OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT:     MOV * T17.X, PV.W,
+; EG-NEXT:     MOV T0.Y, PV.X,
+; EG-NEXT:     ASHR * T0.W, T19.X, literal.x,
+; EG-NEXT:    24(3.363116e-44), 0(0.000000e+00)
+; EG-NEXT:     AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT:     LSHL * T0.W, PV.W, literal.y,
+; EG-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
+; EG-NEXT:     OR_INT * T20.Y, PV.W, PS,
+; EG-NEXT:     MOV T17.X, PV.Y,
+; EG-NEXT:     MOV T0.Y, T12.X,
+; EG-NEXT:     BFE_INT * T0.W, T19.Y, 0.0, literal.x,
 ; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
-; EG-NEXT:     BFE_INT T9.X, T7.Z, 0.0, literal.x,
-; EG-NEXT:     LSHR T0.Z, T7.W, literal.x,
-; EG-NEXT:     BFE_INT T8.W, PV.W, 0.0, literal.x,
-; EG-NEXT:     LSHR * T0.W, T7.X, literal.x,
+; EG-NEXT:     AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT:     AND_INT * T0.W, PV.W, literal.y,
+; EG-NEXT:    -65536(nan), 65535(9.183409e-41)
+; EG-NEXT:     OR_INT * T0.W, PV.W, PS,
+; EG-NEXT:     MOV * T12.X, PV.W,
+; EG-NEXT:     MOV T0.Y, PV.X,
+; EG-NEXT:     LSHR * T0.W, T19.Y, literal.x,
 ; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
-; EG-NEXT:     LSHR T7.X, KC0[2].Y, literal.x,
-; EG-NEXT:     BFE_INT T8.Y, PS, 0.0, literal.y,
-; EG-NEXT:     LSHR T1.Z, T7.Z, literal.y,
-; EG-NEXT:     BFE_INT T9.W, PV.Z, 0.0, literal.y,
-; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.z,
-; EG-NEXT:    2(2.802597e-45), 8(1.121039e-44)
+; EG-NEXT:     BFE_INT T0.W, PV.W, 0.0, literal.x,
+; EG-NEXT:     AND_INT * T1.W, PV.Y, literal.y,
+; EG-NEXT:    8(1.121039e-44), 65535(9.183409e-41)
+; EG-NEXT:     LSHL * T0.W, PV.W, literal.x,
 ; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
-; EG-NEXT:     LSHR T10.X, PS, literal.x,
-; EG-NEXT:     BFE_INT * T9.Y, PV.Z, 0.0, literal.y,
-; EG-NEXT:    2(2.802597e-45), 8(1.121039e-44)
+; EG-NEXT:     OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT:     MOV T12.X, PV.W,
+; EG-NEXT:     MOV T0.Y, T13.X,
+; EG-NEXT:     LSHR * T0.W, T19.Y, literal.x,
+; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT:     BFE_INT T0.W, PV.W, 0.0, literal.x,
+; EG-NEXT:     AND_INT * T1.W, PV.Y, literal.y,
+; EG-NEXT:    8(1.121039e-44), -65536(nan)
+; EG-NEXT:     AND_INT * T0.W, PV.W, literal.x,
+; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT:     OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT:     MOV * T13.X, PV.W,
+; EG-NEXT:     MOV T0.Y, PV.X,
+; EG-NEXT:     ASHR * T0.W, T19.Y, literal.x,
+; EG-NEXT:    24(3.363116e-44), 0(0.000000e+00)
+; EG-NEXT:     AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT:     LSHL * T0.W, PV.W, literal.y,
+; EG-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
+; EG-NEXT:     OR_INT * T20.W, PV.W, PS,
+; EG-NEXT:     MOV T13.X, PV.W,
+; EG-NEXT:     MOV T0.Y, T8.X,
+; EG-NEXT:     BFE_INT * T0.W, T19.Z, 0.0, literal.x,
+; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT:     AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT:     AND_INT * T0.W, PV.W, literal.y,
+; EG-NEXT:    -65536(nan), 65535(9.183409e-41)
+; EG-NEXT:     OR_INT * T0.W, PV.W, PS,
+; EG-NEXT:     MOV * T8.X, PV.W,
+; EG-NEXT:     MOV T0.Y, PV.X,
+; EG-NEXT:     LSHR * T0.W, T19.Z, literal.x,
+; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT:     BFE_INT T0.W, PV.W, 0.0, literal.x,
+; EG-NEXT:     AND_INT * T1.W, PV.Y, literal.y,
+; EG-NEXT:    8(1.121039e-44), 65535(9.183409e-41)
+; EG-NEXT:     LSHL * T0.W, PV.W, literal.x,
+; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT:     OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT:     MOV T8.X, PV.W,
+; EG-NEXT:     MOV T0.Y, T9.X,
+; EG-NEXT:     LSHR * T0.W, T19.Z, literal.x,
+; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT:     BFE_INT T0.W, PV.W, 0.0, literal.x,
+; EG-NEXT:     AND_INT * T1.W, PV.Y, literal.y,
+; EG-NEXT:    8(1.121039e-44), -65536(nan)
+; EG-NEXT:     AND_INT * T0.W, PV.W, literal.x,
+; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT:     OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT:     MOV * T9.X, PV.W,
+; EG-NEXT:     MOV T0.Y, PV.X,
+; EG-NEXT:     ASHR * T0.W, T19.Z, literal.x,
+; EG-NEXT:    24(3.363116e-44), 0(0.000000e+00)
+; EG-NEXT:     AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT:     LSHL * T0.W, PV.W, literal.y,
+; EG-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
+; EG-NEXT:    ALU clause starting at 117:
+; EG-NEXT:     OR_INT * T19.Y, T1.W, T0.W,
+; EG-NEXT:     MOV T9.X, PV.Y,
+; EG-NEXT:     MOV T0.Y, T4.X,
+; EG-NEXT:     BFE_INT * T0.W, T19.W, 0.0, literal.x,
+; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT:     AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT:     AND_INT * T0.W, PV.W, literal.y,
+; EG-NEXT:    -65536(nan), 65535(9.183409e-41)
+; EG-NEXT:     OR_INT * T0.W, PV.W, PS,
+; EG-NEXT:     MOV * T4.X, PV.W,
+; EG-NEXT:     MOV T0.Y, PV.X,
+; EG-NEXT:     LSHR * T0.W, T19.W, literal.x,
+; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT:     BFE_INT T0.W, PV.W, 0.0, literal.x,
+; EG-NEXT:     AND_INT * T1.W, PV.Y, literal.y,
+; EG-NEXT:    8(1.121039e-44), 65535(9.183409e-41)
+; EG-NEXT:     LSHL * T0.W, PV.W, literal.x,
+; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT:     OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT:     MOV T4.X, PV.W,
+; EG-NEXT:     MOV T0.Y, T5.X,
+; EG-NEXT:     LSHR * T0.W, T19.W, literal.x,
+; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT:     BFE_INT T0.W, PV.W, 0.0, literal.x,
+; EG-NEXT:     AND_INT * T1.W, PV.Y, literal.y,
+; EG-NEXT:    8(1.121039e-44), -65536(nan)
+; EG-NEXT:     AND_INT * T0.W, PV.W, literal.x,
+; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT:     OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT:     MOV * T5.X, PV.W,
+; EG-NEXT:     MOV T0.Y, PV.X,
+; EG-NEXT:     ASHR T0.W, T19.W, literal.x,
+; EG-NEXT:     ADD_INT * T1.W, KC0[2].Y, literal.y,
+; EG-NEXT:    24(3.363116e-44), 16(2.242078e-44)
+; EG-NEXT:     LSHR T21.X, PS, literal.x,
+; EG-NEXT:     AND_INT T1.W, PV.Y, literal.y,
+; EG-NEXT:     LSHL * T0.W, PV.W, literal.z,
+; EG-NEXT:    2(2.802597e-45), 65535(9.183409e-41)
+; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT:     LSHR T22.X, KC0[2].Y, literal.x,
+; EG-NEXT:     OR_INT * T19.W, PV.W, PS,
+; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
+; EG-NEXT:     MOV T5.X, PV.W,
+; EG-NEXT:     MOV * T20.X, T16.X,
+; EG-NEXT:     MOV * T20.Z, T12.X,
+; EG-NEXT:     MOV T19.X, T8.X,
+; EG-NEXT:     MOV * T19.Z, T4.X, BS:VEC_120/SCL_212
 ;
 ; GFX12-LABEL: constant_sextload_v16i8_to_v16i16:
 ; GFX12:       ; %bb.0:
@@ -11132,58 +11553,276 @@ define amdgpu_kernel void @constant_zextload_v32i8_to_v32i16(ptr addrspace(1) %o
 ;
 ; EG-LABEL: constant_zextload_v32i8_to_v32i16:
 ; EG:       ; %bb.0:
-; EG-NEXT:    ALU 0, @12, KC0[CB0:0-32], KC1[]
-; EG-NEXT:    TEX 1 @8
-; EG-NEXT:    ALU 37, @13, KC0[CB0:0-32], KC1[]
-; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T17.XYZW, T18.X, 0
-; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T15.XYZW, T12.X, 0
-; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T14.XYZW, T16.X, 0
-; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T13.XYZW, T11.X, 1
+; EG-NEXT:    ALU 1, @14, KC0[CB0:0-32], KC1[]
+; EG-NEXT:    TEX 1 @10
+; EG-NEXT:    ALU 103, @16, KC0[], KC1[]
+; EG-NEXT:    ALU 104, @120, KC0[], KC1[]
+; EG-NEXT:    ALU 41, @225, KC0[CB0:0-32], KC1[]
+; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T36.XYZW, T42.X, 0
+; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T37.XYZW, T41.X, 0
+; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T38.XYZW, T40.X, 0
+; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T35.XYZW, T39.X, 1
 ; EG-NEXT:    CF_END
-; EG-NEXT:    Fetch clause starting at 8:
-; EG-NEXT:     VTX_READ_128 T12.XYZW, T11.X, 16, #1
-; EG-NEXT:     VTX_READ_128 T11.XYZW, T11.X, 0, #1
-; EG-NEXT:    ALU clause starting at 12:
-; EG-NEXT:     MOV * T11.X, KC0[2].Z,
-; EG-NEXT:    ALU clause starting at 13:
+; EG-NEXT:    Fetch clause starting at 10:
+; EG-NEXT:     VTX_READ_128 T37.XYZW, T35.X, 16, #1
+; EG-NEXT:     VTX_READ_128 T35.XYZW, T35.X, 0, #1
+; EG-NEXT:    ALU clause starting at 14:
+; EG-NEXT:     MOV * T0.Y, T16.X,
+; EG-NEXT:     MOV * T35.X, KC0[2].Z,
+; EG-NEXT:    ALU clause starting at 16:
+; EG-NEXT:     AND_INT T0.W, T37.X, literal.x,
+; EG-NEXT:     AND_INT * T1.W, T0.Y, literal.y,
+; EG-NEXT:    255(3.573311e-43), -65536(nan)
+; EG-NEXT:     OR_INT * T0.W, PS, PV.W,
+; EG-NEXT:     MOV * T16.X, PV.W,
+; EG-NEXT:     MOV T0.Y, PV.X,
+; EG-NEXT:     LSHL * T0.W, T37.X, literal.x,
+; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT:     AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT:     AND_INT * T0.W, PV.W, literal.y,
+; EG-NEXT:    65535(9.183409e-41), 16711680(2.341805e-38)
+; EG-NEXT:     OR_INT * T0.W, PV.W, PS,
+; EG-NEXT:     MOV T16.X, PV.W,
+; EG-NEXT:     MOV T0.Y, T17.X,
 ; EG-NEXT:     MOV * T0.W, literal.x,
 ; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
-; EG-NEXT:     BFE_UINT * T13.W, T11.Y, literal.x, PV.W,
+; EG-NEXT:     BFE_UINT T1.W, T37.X, literal.x, PV.W,
+; EG-NEXT:     AND_INT * T2.W, PV.Y, literal.y,
+; EG-NEXT:    16(2.242078e-44), -65536(nan)
+; EG-NEXT:     OR_INT * T1.W, PS, PV.W,
+; EG-NEXT:     MOV * T17.X, PV.W,
+; EG-NEXT:     MOV T0.Y, PV.X,
+; EG-NEXT:     LSHR * T1.W, T37.X, literal.x,
 ; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
-; EG-NEXT:     BFE_UINT T13.Y, T11.X, literal.x, T0.W,
-; EG-NEXT:     AND_INT T13.Z, T11.Y, literal.y,
-; EG-NEXT:     BFE_UINT * T14.W, T11.W, literal.x, T0.W,
-; EG-NEXT:    8(1.121039e-44), 255(3.573311e-43)
-; EG-NEXT:     AND_INT T13.X, T11.X, literal.x,
-; EG-NEXT:     BFE_UINT T14.Y, T11.Z, literal.y, T0.W,
-; EG-NEXT:     LSHR * T11.X, KC0[2].Y, literal.z,
-; EG-NEXT:    255(3.573311e-43), 8(1.121039e-44)
-; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
-; EG-NEXT:     AND_INT T14.Z, T11.W, literal.x,
-; EG-NEXT:     BFE_UINT * T15.W, T12.Y, literal.y, T0.W,
-; EG-NEXT:    255(3.573311e-43), 8(1.121039e-44)
-; EG-NEXT:     AND_INT T14.X, T11.Z, literal.x,
-; EG-NEXT:     BFE_UINT T15.Y, T12.X, literal.y, T0.W,
-; EG-NEXT:     ADD_INT * T1.W, KC0[2].Y, literal.z,
-; EG-NEXT:    255(3.573311e-43), 8(1.121039e-44)
+; EG-NEXT:     AND_INT T2.W, PV.Y, literal.x,
+; EG-NEXT:     AND_INT * T1.W, PV.W, literal.y,
+; EG-NEXT:    65535(9.183409e-41), 16711680(2.341805e-38)
+; EG-NEXT:     OR_INT * T36.Y, PV.W, PS,
+; EG-NEXT:     MOV T17.X, PV.Y,
+; EG-NEXT:     MOV * T0.Y, T12.X,
+; EG-NEXT:     AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT:     AND_INT * T2.W, T37.Y, literal.y,
+; EG-NEXT:    -65536(nan), 255(3.573311e-43)
+; EG-NEXT:     OR_INT * T1.W, PV.W, PS,
+; EG-NEXT:     MOV * T12.X, PV.W,
+; EG-NEXT:     MOV T0.Y, PV.X,
+; EG-NEXT:     LSHL * T1.W, T37.Y, literal.x,
+; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT:     AND_INT T2.W, PV.Y, literal.x,
+; EG-NEXT:     AND_INT * T1.W, PV.W, literal.y,
+; EG-NEXT:    65535(9.183409e-41), 16711680(2.341805e-38)
+; EG-NEXT:     OR_INT * T1.W, PV.W, PS,
+; EG-NEXT:     MOV T12.X, PV.W,
+; EG-NEXT:     MOV T0.Y, T13.X,
+; EG-NEXT:     BFE_UINT * T1.W, T37.Y, literal.x, T0.W,
 ; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
-; EG-NEXT:     LSHR T16.X, PV.W, literal.x,
-; EG-NEXT:     AND_INT T15.Z, T12.Y, literal.y,
-; EG-NEXT:     BFE_UINT T17.W, T12.W, literal.z, T0.W,
-; EG-NEXT:     AND_INT * T15.X, T12.X, literal.y,
-; EG-NEXT:    2(2.802597e-45), 255(3.573311e-43)
+; EG-NEXT:     AND_INT * T2.W, PV.Y, literal.x,
+; EG-NEXT:    -65536(nan), 0(0.000000e+00)
+; EG-NEXT:     OR_INT * T1.W, PV.W, T1.W,
+; EG-NEXT:     MOV * T13.X, PV.W,
+; EG-NEXT:     MOV T0.Y, PV.X,
+; EG-NEXT:     LSHR * T1.W, T37.Y, literal.x,
 ; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
-; EG-NEXT:     BFE_UINT T17.Y, T12.Z, literal.x, T0.W,
-; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
-; EG-NEXT:    8(1.121039e-44), 32(4.484155e-44)
-; EG-NEXT:     LSHR T12.X, PV.W, literal.x,
-; EG-NEXT:     AND_INT T17.Z, T12.W, literal.y,
-; EG-NEXT:     AND_INT * T17.X, T12.Z, literal.y,
-; EG-NEXT:    2(2.802597e-45), 255(3.573311e-43)
+; EG-NEXT:     AND_INT T2.W, PV.Y, literal.x,
+; EG-NEXT:     AND_INT * T1.W, PV.W, literal.y,
+; EG-NEXT:    65535(9.183409e-41), 16711680(2.341805e-38)
+; EG-NEXT:     OR_INT * T36.W, PV.W, PS,
+; EG-NEXT:     MOV T13.X, PV.W,
+; EG-NEXT:     MOV * T0.Y, T8.X,
+; EG-NEXT:     AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT:     AND_INT * T2.W, T37.Z, literal.y,
+; EG-NEXT:    -65536(nan), 255(3.573311e-43)
+; EG-NEXT:     OR_INT * T1.W, PV.W, PS,
+; EG-NEXT:     MOV * T8.X, PV.W,
+; EG-NEXT:     MOV T0.Y, PV.X,
+; EG-NEXT:     LSHL * T1.W, T37.Z, literal.x,
+; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT:     AND_INT T2.W, PV.Y, literal.x,
+; EG-NEXT:     AND_INT * T1.W, PV.W, literal.y,
+; EG-NEXT:    65535(9.183409e-41), 16711680(2.341805e-38)
+; EG-NEXT:     OR_INT * T1.W, PV.W, PS,
+; EG-NEXT:     MOV T8.X, PV.W,
+; EG-NEXT:     MOV T0.Y, T9.X,
+; EG-NEXT:     BFE_UINT * T1.W, T37.Z, literal.x, T0.W,
+; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT:     AND_INT * T2.W, PV.Y, literal.x,
+; EG-NEXT:    -65536(nan), 0(0.000000e+00)
+; EG-NEXT:     OR_INT * T1.W, PV.W, T1.W,
+; EG-NEXT:     MOV * T9.X, PV.W,
+; EG-NEXT:     MOV T0.Y, PV.X,
+; EG-NEXT:     LSHR * T1.W, T37.Z, literal.x,
+; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT:     AND_INT T2.W, PV.Y, literal.x,
+; EG-NEXT:     AND_INT * T1.W, PV.W, literal.y,
+; EG-NEXT:    65535(9.183409e-41), 16711680(2.341805e-38)
+; EG-NEXT:     OR_INT * T37.Y, PV.W, PS,
+; EG-NEXT:     MOV T9.X, PV.Y,
+; EG-NEXT:     MOV * T0.Y, T4.X,
+; EG-NEXT:     AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT:     AND_INT * T2.W, T37.W, literal.y,
+; EG-NEXT:    -65536(nan), 255(3.573311e-43)
+; EG-NEXT:     OR_INT * T1.W, PV.W, PS,
+; EG-NEXT:     MOV * T4.X, PV.W,
+; EG-NEXT:     MOV T0.Y, PV.X,
+; EG-NEXT:     LSHL * T1.W, T37.W, literal.x,
+; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT:     AND_INT T2.W, PV.Y, literal.x,
+; EG-NEXT:     AND_INT * T1.W, PV.W, literal.y,
+; EG-NEXT:    65535(9.183409e-41), 16711680(2.341805e-38)
+; EG-NEXT:     OR_INT * T1.W, PV.W, PS,
+; EG-NEXT:     MOV T4.X, PV.W,
+; EG-NEXT:     MOV T0.Y, T5.X,
+; EG-NEXT:     BFE_UINT * T1.W, T37.W, literal.x, T0.W,
+; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT:    ALU clause starting at 120:
+; EG-NEXT:     AND_INT * T2.W, T0.Y, literal.x,
+; EG-NEXT:    -65536(nan), 0(0.000000e+00)
+; EG-NEXT:     OR_INT * T1.W, PV.W, T1.W,
+; EG-NEXT:     MOV * T5.X, PV.W,
+; EG-NEXT:     MOV T0.Y, PV.X,
+; EG-NEXT:     LSHR * T1.W, T37.W, literal.x,
+; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT:     AND_INT T2.W, PV.Y, literal.x,
+; EG-NEXT:     AND_INT * T1.W, PV.W, literal.y,
+; EG-NEXT:    65535(9.183409e-41), 16711680(2.341805e-38)
+; EG-NEXT:     OR_INT * T37.W, PV.W, PS,
+; EG-NEXT:     MOV T5.X, PV.W,
+; EG-NEXT:     MOV * T0.Y, T32.X,
+; EG-NEXT:     AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT:     AND_INT * T2.W, T35.X, literal.y,
+; EG-NEXT:    -65536(nan), 255(3.573311e-43)
+; EG-NEXT:     OR_INT * T1.W, PV.W, PS,
+; EG-NEXT:     MOV * T32.X, PV.W,
+; EG-NEXT:     MOV T0.Y, PV.X,
+; EG-NEXT:     LSHL * T1.W, T35.X, literal.x,
+; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT:     AND_INT T2.W, PV.Y, literal.x,
+; EG-NEXT:     AND_INT * T1.W, PV.W, literal.y,
+; EG-NEXT:    65535(9.183409e-41), 16711680(2.341805e-38)
+; EG-NEXT:     OR_INT * T1.W, PV.W, PS,
+; EG-NEXT:     MOV T32.X, PV.W,
+; EG-NEXT:     MOV T0.Y, T33.X,
+; EG-NEXT:     BFE_UINT * T1.W, T35.X, literal.x, T0.W, BS:VEC_120/SCL_212
+; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT:     AND_INT * T2.W, PV.Y, literal.x,
+; EG-NEXT:    -65536(nan), 0(0.000000e+00)
+; EG-NEXT:     OR_INT * T1.W, PV.W, T1.W,
+; EG-NEXT:     MOV * T33.X, PV.W,
+; EG-NEXT:     MOV T0.Y, PV.X,
+; EG-NEXT:     LSHR * T1.W, T35.X, literal.x,
+; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT:     AND_INT T2.W, PV.Y, literal.x,
+; EG-NEXT:     AND_INT * T1.W, PV.W, literal.y,
+; EG-NEXT:    65535(9.183409e-41), 16711680(2.341805e-38)
+; EG-NEXT:     OR_INT * T38.Y, PV.W, PS,
+; EG-NEXT:     MOV T33.X, PV.Y,
+; EG-NEXT:     MOV * T0.Y, T28.X,
+; EG-NEXT:     AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT:     AND_INT * T2.W, T35.Y, literal.y,
+; EG-NEXT:    -65536(nan), 255(3.573311e-43)
+; EG-NEXT:     OR_INT * T1.W, PV.W, PS,
+; EG-NEXT:     MOV * T28.X, PV.W,
+; EG-NEXT:     MOV T0.Y, PV.X,
+; EG-NEXT:     LSHL * T1.W, T35.Y, literal.x,
+; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT:     AND_INT T2.W, PV.Y, literal.x,
+; EG-NEXT:     AND_INT * T1.W, PV.W, literal.y,
+; EG-NEXT:    65535(9.183409e-41), 16711680(2.341805e-38)
+; EG-NEXT:     OR_INT * T1.W, PV.W, PS,
+; EG-NEXT:     MOV T28.X, PV.W,
+; EG-NEXT:     MOV T0.Y, T29.X,
+; EG-NEXT:     BFE_UINT * T1.W, T35.Y, literal.x, T0.W,
+; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT:     AND_INT * T2.W, PV.Y, literal.x,
+; EG-NEXT:    -65536(nan), 0(0.000000e+00)
+; EG-NEXT:     OR_INT * T1.W, PV.W, T1.W,
+; EG-NEXT:     MOV * T29.X, PV.W,
+; EG-NEXT:     MOV T0.Y, PV.X,
+; EG-NEXT:     LSHR * T1.W, T35.Y, literal.x,
+; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT:     AND_INT T2.W, PV.Y, literal.x,
+; EG-NEXT:     AND_INT * T1.W, PV.W, literal.y,
+; EG-NEXT:    65535(9.183409e-41), 16711680(2.341805e-38)
+; EG-NEXT:     OR_INT * T38.W, PV.W, PS,
+; EG-NEXT:     MOV T29.X, PV.W,
+; EG-NEXT:     MOV * T0.Y, T24.X,
+; EG-NEXT:     AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT:     AND_INT * T2.W, T35.Z, literal.y,
+; EG-NEXT:    -65536(nan), 255(3.573311e-43)
+; EG-NEXT:     OR_INT * T1.W, PV.W, PS,
+; EG-NEXT:     MOV * T24.X, PV.W,
+; EG-NEXT:     MOV T0.Y, PV.X,
+; EG-NEXT:     LSHL * T1.W, T35.Z, literal.x,
+; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT:     AND_INT T2.W, PV.Y, literal.x,
+; EG-NEXT:     AND_INT * T1.W, PV.W, literal.y,
+; EG-NEXT:    65535(9.183409e-41), 16711680(2.341805e-38)
+; EG-NEXT:     OR_INT * T1.W, PV.W, PS,
+; EG-NEXT:     MOV T24.X, PV.W,
+; EG-NEXT:     MOV T0.Y, T25.X,
+; EG-NEXT:     BFE_UINT * T1.W, T35.Z, literal.x, T0.W,
+; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT:     AND_INT * T2.W, PV.Y, literal.x,
+; EG-NEXT:    -65536(nan), 0(0.000000e+00)
+; EG-NEXT:     OR_INT * T1.W, PV.W, T1.W,
+; EG-NEXT:     MOV * T25.X, PV.W,
+; EG-NEXT:     MOV T0.Y, PV.X,
+; EG-NEXT:     LSHR * T1.W, T35.Z, literal.x,
+; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT:     AND_INT T2.W, PV.Y, literal.x,
+; EG-NEXT:     AND_INT * T1.W, PV.W, literal.y,
+; EG-NEXT:    65535(9.183409e-41), 16711680(2.341805e-38)
+; EG-NEXT:     OR_INT * T35.Y, PV.W, PS,
+; EG-NEXT:     MOV T25.X, PV.Y,
+; EG-NEXT:     MOV * T0.Y, T20.X,
+; EG-NEXT:     AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT:     AND_INT * T2.W, T35.W, literal.y,
+; EG-NEXT:    -65536(nan), 255(3.573311e-43)
+; EG-NEXT:     OR_INT * T1.W, PV.W, PS,
+; EG-NEXT:     MOV * T20.X, PV.W,
+; EG-NEXT:    ALU clause starting at 225:
+; EG-NEXT:     MOV T0.Y, T20.X,
+; EG-NEXT:     LSHL * T1.W, T35.W, literal.x,
+; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT:     AND_INT T2.W, PV.Y, literal.x,
+; EG-NEXT:     AND_INT * T1.W, PV.W, literal.y,
+; EG-NEXT:    65535(9.183409e-41), 16711680(2.341805e-38)
+; EG-NEXT:     OR_INT * T1.W, PV.W, PS,
+; EG-NEXT:     MOV T20.X, PV.W,
+; EG-NEXT:     MOV T0.Y, T21.X,
+; EG-NEXT:     BFE_UINT * T0.W, T35.W, literal.x, T0.W,
+; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT:     AND_INT * T1.W, PV.Y, literal.x,
+; EG-NEXT:    -65536(nan), 0(0.000000e+00)
+; EG-NEXT:     OR_INT * T0.W, PV.W, T0.W,
+; EG-NEXT:     MOV * T21.X, PV.W,
+; EG-NEXT:     MOV T0.Y, PV.X,
 ; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.x,
-; EG-NEXT:    48(6.726233e-44), 0(0.000000e+00)
-; EG-NEXT:     LSHR * T18.X, PV.W, literal.x,
+; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT:     LSHR T39.X, PV.W, literal.x,
+; EG-NEXT:     LSHR * T40.X, KC0[2].Y, literal.x,
 ; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
+; EG-NEXT:     LSHR T0.W, T35.W, literal.x,
+; EG-NEXT:     ADD_INT * T1.W, KC0[2].Y, literal.y,
+; EG-NEXT:    8(1.121039e-44), 48(6.726233e-44)
+; EG-NEXT:     LSHR T41.X, PS, literal.x,
+; EG-NEXT:     AND_INT T0.Z, T0.Y, literal.y,
+; EG-NEXT:     AND_INT T0.W, PV.W, literal.z,
+; EG-NEXT:     ADD_INT * T1.W, KC0[2].Y, literal.w,
+; EG-NEXT:    2(2.802597e-45), 65535(9.183409e-41)
+; EG-NEXT:    16711680(2.341805e-38), 32(4.484155e-44)
+; EG-NEXT:     LSHR T42.X, PS, literal.x,
+; EG-NEXT:     OR_INT * T35.W, PV.Z, PV.W,
+; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
+; EG-NEXT:     MOV T21.X, PV.W,
+; EG-NEXT:     MOV * T36.X, T16.X,
+; EG-NEXT:     MOV * T36.Z, T12.X,
+; EG-NEXT:     MOV T37.X, T8.X,
+; EG-NEXT:     MOV T37.Z, T4.X, BS:VEC_120/SCL_212
+; EG-NEXT:     MOV * T38.X, T32.X,
+; EG-NEXT:     MOV * T38.Z, T28.X,
+; EG-NEXT:     MOV T35.X, T24.X,
+; EG-NEXT:     MOV * T35.Z, T20.X, BS:VEC_120/SCL_212
 ;
 ; GFX12-LABEL: constant_zextload_v32i8_to_v32i16:
 ; GFX12:       ; %bb.0:
@@ -11642,60 +12281,331 @@ define amdgpu_kernel void @constant_sextload_v32i8_to_v32i16(ptr addrspace(1) %o
 ;
 ; EG-LABEL: constant_sextload_v32i8_to_v32i16:
 ; EG:       ; %bb.0:
-; EG-NEXT:    ALU 0, @12, KC0[CB0:0-32], KC1[]
-; EG-NEXT:    TEX 1 @8
-; EG-NEXT:    ALU 39, @13, KC0[CB0:0-32], KC1[]
-; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T18.XYZW, T12.X, 0
-; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T17.XYZW, T11.X, 0
-; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T16.XYZW, T14.X, 0
-; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T15.XYZW, T13.X, 1
+; EG-NEXT:    ALU 1, @14, KC0[CB0:0-32], KC1[]
+; EG-NEXT:    TEX 1 @10
+; EG-NEXT:    ALU 104, @16, KC0[], KC1[]
+; EG-NEXT:    ALU 104, @121, KC0[], KC1[]
+; EG-NEXT:    ALU 95, @226, KC0[CB0:0-32], KC1[]
+; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T36.XYZW, T42.X, 0
+; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T37.XYZW, T41.X, 0
+; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T38.XYZW, T40.X, 0
+; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T35.XYZW, T39.X, 1
 ; EG-NEXT:    CF_END
-; EG-NEXT:    Fetch clause starting at 8:
-; EG-NEXT:     VTX_READ_128 T12.XYZW, T11.X, 16, #1
-; EG-NEXT:     VTX_READ_128 T11.XYZW, T11.X, 0, #1
-; EG-NEXT:    ALU clause starting at 12:
-; EG-NEXT:     MOV * T11.X, KC0[2].Z,
-; EG-NEXT:    ALU clause starting at 13:
-; EG-NEXT:     LSHR T13.X, KC0[2].Y, literal.x,
-; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
-; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
-; EG-NEXT:     LSHR T14.X, PV.W, literal.x,
-; EG-NEXT:     BFE_INT * T15.Z, T11.Y, 0.0, literal.y,
-; EG-NEXT:    2(2.802597e-45), 8(1.121039e-44)
-; EG-NEXT:     BFE_INT T15.X, T11.X, 0.0, literal.x,
-; EG-NEXT:     LSHR T0.Y, T12.W, literal.x,
-; EG-NEXT:     BFE_INT T16.Z, T11.W, 0.0, literal.x, BS:VEC_120/SCL_212
-; EG-NEXT:     LSHR T0.W, T12.Y, literal.x,
-; EG-NEXT:     LSHR * T1.W, T11.Y, literal.x,
+; EG-NEXT:    Fetch clause starting at 10:
+; EG-NEXT:     VTX_READ_128 T37.XYZW, T35.X, 16, #1
+; EG-NEXT:     VTX_READ_128 T35.XYZW, T35.X, 0, #1
+; EG-NEXT:    ALU clause starting at 14:
+; EG-NEXT:     MOV * T0.Y, T16.X,
+; EG-NEXT:     MOV * T35.X, KC0[2].Z,
+; EG-NEXT:    ALU clause starting at 16:
+; EG-NEXT:     BFE_INT * T0.W, T37.X, 0.0, literal.x,
 ; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
-; EG-NEXT:     BFE_INT T16.X, T11.Z, 0.0, literal.x,
-; EG-NEXT:     LSHR T1.Y, T11.W, literal.x,
-; EG-NEXT:     BFE_INT T17.Z, T12.Y, 0.0, literal.x,
-; EG-NEXT:     BFE_INT T15.W, PS, 0.0, literal.x,
-; EG-NEXT:     LSHR * T1.W, T11.X, literal.x,
+; EG-NEXT:     AND_INT T0.W, PV.W, literal.x,
+; EG-NEXT:     AND_INT * T1.W, T0.Y, literal.y,
+; EG-NEXT:    65535(9.183409e-41), -65536(nan)
+; EG-NEXT:     OR_INT * T0.W, PS, PV.W,
+; EG-NEXT:     MOV * T16.X, PV.W,
+; EG-NEXT:     MOV T0.Y, PV.X,
+; EG-NEXT:     LSHR * T0.W, T37.X, literal.x,
 ; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
-; EG-NEXT:     BFE_INT T17.X, T12.X, 0.0, literal.x,
-; EG-NEXT:     BFE_INT T15.Y, PS, 0.0, literal.x,
-; EG-NEXT:     BFE_INT T18.Z, T12.W, 0.0, literal.x,
-; EG-NEXT:     BFE_INT T16.W, PV.Y, 0.0, literal.x,
-; EG-NEXT:     LSHR * T1.W, T11.Z, literal.x,
+; EG-NEXT:     BFE_INT T0.W, PV.W, 0.0, literal.x,
+; EG-NEXT:     AND_INT * T1.W, PV.Y, literal.y,
+; EG-NEXT:    8(1.121039e-44), 65535(9.183409e-41)
+; EG-NEXT:     LSHL * T0.W, PV.W, literal.x,
+; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT:     OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT:     MOV T16.X, PV.W,
+; EG-NEXT:     MOV T0.Y, T17.X,
+; EG-NEXT:     LSHR * T0.W, T37.X, literal.x, BS:VEC_120/SCL_212
+; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT:     BFE_INT T0.W, PV.W, 0.0, literal.x,
+; EG-NEXT:     AND_INT * T1.W, PV.Y, literal.y,
+; EG-NEXT:    8(1.121039e-44), -65536(nan)
+; EG-NEXT:     AND_INT * T0.W, PV.W, literal.x,
+; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT:     OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT:     MOV * T17.X, PV.W,
+; EG-NEXT:     MOV T0.Y, PV.X,
+; EG-NEXT:     ASHR * T0.W, T37.X, literal.x,
+; EG-NEXT:    24(3.363116e-44), 0(0.000000e+00)
+; EG-NEXT:     AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT:     LSHL * T0.W, PV.W, literal.y,
+; EG-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
+; EG-NEXT:     OR_INT * T36.Y, PV.W, PS,
+; EG-NEXT:     MOV T17.X, PV.Y,
+; EG-NEXT:     MOV T0.Y, T12.X,
+; EG-NEXT:     BFE_INT * T0.W, T37.Y, 0.0, literal.x,
 ; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
-; EG-NEXT:     BFE_INT T18.X, T12.Z, 0.0, literal.x,
-; EG-NEXT:     BFE_INT T16.Y, PS, 0.0, literal.x,
-; EG-NEXT:     LSHR T0.Z, T12.X, literal.x,
-; EG-NEXT:     BFE_INT T17.W, T0.W, 0.0, literal.x,
-; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
-; EG-NEXT:    8(1.121039e-44), 32(4.484155e-44)
-; EG-NEXT:     LSHR T11.X, PS, literal.x,
-; EG-NEXT:     BFE_INT T17.Y, PV.Z, 0.0, literal.y,
-; EG-NEXT:     LSHR T0.Z, T12.Z, literal.y,
-; EG-NEXT:     BFE_INT T18.W, T0.Y, 0.0, literal.y,
-; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.z,
-; EG-NEXT:    2(2.802597e-45), 8(1.121039e-44)
-; EG-NEXT:    48(6.726233e-44), 0(0.000000e+00)
-; EG-NEXT:     LSHR T12.X, PS, literal.x,
-; EG-NEXT:     BFE_INT * T18.Y, PV.Z, 0.0, literal.y,
-; EG-NEXT:    2(2.802597e-45), 8(1.121039e-44)
+; EG-NEXT:     AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT:     AND_INT * T0.W, PV.W, literal.y,
+; EG-NEXT:    -65536(nan), 65535(9.183409e-41)
+; EG-NEXT:     OR_INT * T0.W, PV.W, PS,
+; EG-NEXT:     MOV * T12.X, PV.W,
+; EG-NEXT:     MOV T0.Y, PV.X,
+; EG-NEXT:     LSHR * T0.W, T37.Y, literal.x,
+; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT:     BFE_INT T0.W, PV.W, 0.0, literal.x,
+; EG-NEXT:     AND_INT * T1.W, PV.Y, literal.y,
+; EG-NEXT:    8(1.121039e-44), 65535(9.183409e-41)
+; EG-NEXT:     LSHL * T0.W, PV.W, literal.x,
+; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT:     OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT:     MOV T12.X, PV.W,
+; EG-NEXT:     MOV T0.Y, T13.X,
+; EG-NEXT:     LSHR * T0.W, T37.Y, literal.x,
+; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT:     BFE_INT T0.W, PV.W, 0.0, literal.x,
+; EG-NEXT:     AND_INT * T1.W, PV.Y, literal.y,
+; EG-NEXT:    8(1.121039e-44), -65536(nan)
+; EG-NEXT:     AND_INT * T0.W, PV.W, literal.x,
+; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT:     OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT:     MOV * T13.X, PV.W,
+; EG-NEXT:     MOV T0.Y, PV.X,
+; EG-NEXT:     ASHR * T0.W, T37.Y, literal.x,
+; EG-NEXT:    24(3.363116e-44), 0(0.000000e+00)
+; EG-NEXT:     AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT:     LSHL * T0.W, PV.W, literal.y,
+; EG-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
+; EG-NEXT:     OR_INT * T36.W, PV.W, PS,
+; EG-NEXT:     MOV T13.X, PV.W,
+; EG-NEXT:     MOV T0.Y, T8.X,
+; EG-NEXT:     BFE_INT * T0.W, T37.Z, 0.0, literal.x,
+; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT:     AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT:     AND_INT * T0.W, PV.W, literal.y,
+; EG-NEXT:    -65536(nan), 65535(9.183409e-41)
+; EG-NEXT:     OR_INT * T0.W, PV.W, PS,
+; EG-NEXT:     MOV * T8.X, PV.W,
+; EG-NEXT:     MOV T0.Y, PV.X,
+; EG-NEXT:     LSHR * T0.W, T37.Z, literal.x,
+; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT:     BFE_INT T0.W, PV.W, 0.0, literal.x,
+; EG-NEXT:     AND_INT * T1.W, PV.Y, literal.y,
+; EG-NEXT:    8(1.121039e-44), 65535(9.183409e-41)
+; EG-NEXT:     LSHL * T0.W, PV.W, literal.x,
+; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT:     OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT:     MOV T8.X, PV.W,
+; EG-NEXT:     MOV T0.Y, T9.X,
+; EG-NEXT:     LSHR * T0.W, T37.Z, literal.x,
+; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT:     BFE_INT T0.W, PV.W, 0.0, literal.x,
+; EG-NEXT:     AND_INT * T1.W, PV.Y, literal.y,
+; EG-NEXT:    8(1.121039e-44), -65536(nan)
+; EG-NEXT:     AND_INT * T0.W, PV.W, literal.x,
+; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT:     OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT:     MOV * T9.X, PV.W,
+; EG-NEXT:     MOV T0.Y, PV.X,
+; EG-NEXT:     ASHR * T0.W, T37.Z, literal.x,
+; EG-NEXT:    24(3.363116e-44), 0(0.000000e+00)
+; EG-NEXT:     AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT:     LSHL * T0.W, PV.W, literal.y,
+; EG-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
+; EG-NEXT:    ALU clause starting at 121:
+; EG-NEXT:     OR_INT * T37.Y, T1.W, T0.W,
+; EG-NEXT:     MOV T9.X, PV.Y,
+; EG-NEXT:     MOV T0.Y, T4.X,
+; EG-NEXT:     BFE_INT * T0.W, T37.W, 0.0, literal.x,
+; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT:     AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT:     AND_INT * T0.W, PV.W, literal.y,
+; EG-NEXT:    -65536(nan), 65535(9.183409e-41)
+; EG-NEXT:     OR_INT * T0.W, PV.W, PS,
+; EG-NEXT:     MOV * T4.X, PV.W,
+; EG-NEXT:     MOV T0.Y, PV.X,
+; EG-NEXT:     LSHR * T0.W, T37.W, literal.x,
+; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT:     BFE_INT T0.W, PV.W, 0.0, literal.x,
+; EG-NEXT:     AND_INT * T1.W, PV.Y, literal.y,
+; EG-NEXT:    8(1.121039e-44), 65535(9.183409e-41)
+; EG-NEXT:     LSHL * T0.W, PV.W, literal.x,
+; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT:     OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT:     MOV T4.X, PV.W,
+; EG-NEXT:     MOV T0.Y, T5.X,
+; EG-NEXT:     LSHR * T0.W, T37.W, literal.x,
+; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT:     BFE_INT T0.W, PV.W, 0.0, literal.x,
+; EG-NEXT:     AND_INT * T1.W, PV.Y, literal.y,
+; EG-NEXT:    8(1.121039e-44), -65536(nan)
+; EG-NEXT:     AND_INT * T0.W, PV.W, literal.x,
+; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT:     OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT:     MOV * T5.X, PV.W,
+; EG-NEXT:     MOV T0.Y, PV.X,
+; EG-NEXT:     ASHR * T0.W, T37.W, literal.x,
+; EG-NEXT:    24(3.363116e-44), 0(0.000000e+00)
+; EG-NEXT:     AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT:     LSHL * T0.W, PV.W, literal.y,
+; EG-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
+; EG-NEXT:     OR_INT * T37.W, PV.W, PS,
+; EG-NEXT:     MOV T5.X, PV.W,
+; EG-NEXT:     MOV T0.Y, T32.X,
+; EG-NEXT:     BFE_INT * T0.W, T35.X, 0.0, literal.x, BS:VEC_120/SCL_212
+; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT:     AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT:     AND_INT * T0.W, PV.W, literal.y,
+; EG-NEXT:    -65536(nan), 65535(9.183409e-41)
+; EG-NEXT:     OR_INT * T0.W, PV.W, PS,
+; EG-NEXT:     MOV * T32.X, PV.W,
+; EG-NEXT:     MOV T0.Y, PV.X,
+; EG-NEXT:     LSHR * T0.W, T35.X, literal.x,
+; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT:     BFE_INT T0.W, PV.W, 0.0, literal.x,
+; EG-NEXT:     AND_INT * T1.W, PV.Y, literal.y,
+; EG-NEXT:    8(1.121039e-44), 65535(9.183409e-41)
+; EG-NEXT:     LSHL * T0.W, PV.W, literal.x,
+; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT:     OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT:     MOV T32.X, PV.W,
+; EG-NEXT:     MOV T0.Y, T33.X,
+; EG-NEXT:     LSHR * T0.W, T35.X, literal.x, BS:VEC_120/SCL_212
+; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT:     BFE_INT T0.W, PV.W, 0.0, literal.x,
+; EG-NEXT:     AND_INT * T1.W, PV.Y, literal.y,
+; EG-NEXT:    8(1.121039e-44), -65536(nan)
+; EG-NEXT:     AND_INT * T0.W, PV.W, literal.x,
+; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT:     OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT:     MOV * T33.X, PV.W,
+; EG-NEXT:     MOV T0.Y, PV.X,
+; EG-NEXT:     ASHR * T0.W, T35.X, literal.x,
+; EG-NEXT:    24(3.363116e-44), 0(0.000000e+00)
+; EG-NEXT:     AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT:     LSHL * T0.W, PV.W, literal.y,
+; EG-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
+; EG-NEXT:     OR_INT * T38.Y, PV.W, PS,
+; EG-NEXT:     MOV T33.X, PV.Y,
+; EG-NEXT:     MOV T0.Y, T28.X,
+; EG-NEXT:     BFE_INT * T0.W, T35.Y, 0.0, literal.x,
+; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT:     AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT:     AND_INT * T0.W, PV.W, literal.y,
+; EG-NEXT:    -65536(nan), 65535(9.183409e-41)
+; EG-NEXT:     OR_INT * T0.W, PV.W, PS,
+; EG-NEXT:     MOV * T28.X, PV.W,
+; EG-NEXT:     MOV T0.Y, PV.X,
+; EG-NEXT:     LSHR * T0.W, T35.Y, literal.x,
+; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT:     BFE_INT T0.W, PV.W, 0.0, literal.x,
+; EG-NEXT:     AND_INT * T1.W, PV.Y, literal.y,
+; EG-NEXT:    8(1.121039e-44), 65535(9.183409e-41)
+; EG-NEXT:     LSHL * T0.W, PV.W, literal.x,
+; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT:     OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT:     MOV T28.X, PV.W,
+; EG-NEXT:     MOV T0.Y, T29.X,
+; EG-NEXT:     LSHR * T0.W, T35.Y, literal.x,
+; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT:     BFE_INT T0.W, PV.W, 0.0, literal.x,
+; EG-NEXT:     AND_INT * T1.W, PV.Y, literal.y,
+; EG-NEXT:    8(1.121039e-44), -65536(nan)
+; EG-NEXT:     AND_INT * T0.W, PV.W, literal.x,
+; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT:     OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT:     MOV * T29.X, PV.W,
+; EG-NEXT:     MOV T0.Y, PV.X,
+; EG-NEXT:     ASHR * T0.W, T35.Y, literal.x,
+; EG-NEXT:    24(3.363116e-44), 0(0.000000e+00)
+; EG-NEXT:    ALU clause starting at 226:
+; EG-NEXT:     AND_INT T1.W, T0.Y, literal.x,
+; EG-NEXT:     LSHL * T0.W, T0.W, literal.y,
+; EG-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
+; EG-NEXT:     OR_INT * T38.W, PV.W, PS,
+; EG-NEXT:     MOV T29.X, PV.W,
+; EG-NEXT:     MOV T0.Y, T24.X,
+; EG-NEXT:     BFE_INT * T0.W, T35.Z, 0.0, literal.x,
+; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT:     AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT:     AND_INT * T0.W, PV.W, literal.y,
+; EG-NEXT:    -65536(nan), 65535(9.183409e-41)
+; EG-NEXT:     OR_INT * T0.W, PV.W, PS,
+; EG-NEXT:     MOV * T24.X, PV.W,
+; EG-NEXT:     MOV T0.Y, PV.X,
+; EG-NEXT:     LSHR * T0.W, T35.Z, literal.x,
+; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT:     BFE_INT T0.W, PV.W, 0.0, literal.x,
+; EG-NEXT:     AND_INT * T1.W, PV.Y, literal.y,
+; EG-NEXT:    8(1.121039e-44), 65535(9.183409e-41)
+; EG-NEXT:     LSHL * T0.W, PV.W, literal.x,
+; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT:     OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT:     MOV T24.X, PV.W,
+; EG-NEXT:     MOV T0.Y, T25.X,
+; EG-NEXT:     LSHR * T0.W, T35.Z, literal.x,
+; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT:     BFE_INT T0.W, PV.W, 0.0, literal.x,
+; EG-NEXT:     AND_INT * T1.W, PV.Y, literal.y,
+; EG-NEXT:    8(1.121039e-44), -65536(nan)
+; EG-NEXT:     AND_INT * T0.W, PV.W, literal.x,
+; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT:     OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT:     MOV * T25.X, PV.W,
+; EG-NEXT:     MOV T0.Y, PV.X,
+; EG-NEXT:     ASHR * T0.W, T35.Z, literal.x,
+; EG-NEXT:    24(3.363116e-44), 0(0.000000e+00)
+; EG-NEXT:     AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT:     LSHL * T0.W, PV.W, literal.y,
+; EG-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
+; EG-NEXT:     OR_INT * T35.Y, PV.W, PS,
+; EG-NEXT:     MOV T25.X, PV.Y,
+; EG-NEXT:     MOV T0.Y, T20.X,
+; EG-NEXT:     BFE_INT * T0.W, T35.W, 0.0, literal.x,
+; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT:     AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT:     AND_INT * T0.W, PV.W, literal.y,
+; EG-NEXT:    -65536(nan), 65535(9.183409e-41)
+; EG-NEXT:     OR_INT * T0.W, PV.W, PS,
+; EG-NEXT:     MOV * T20.X, PV.W,
+; EG-NEXT:     MOV T0.Y, PV.X,
+; EG-NEXT:     LSHR * T0.W, T35.W, literal.x,
+; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT:     BFE_INT T0.W, PV.W, 0.0, literal.x,
+; EG-NEXT:     AND_INT * T1.W, PV.Y, literal.y,
+; EG-NEXT:    8(1.121039e-44), 65535(9.183409e-41)
+; EG-NEXT:     LSHL * T0.W, PV.W, literal.x,
+; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT:     OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT:     MOV T20.X, PV.W,
+; EG-NEXT:     MOV T0.Y, T21.X,
+; EG-NEXT:     LSHR * T0.W, T35.W, literal.x,
+; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT:     BFE_INT T0.W, PV.W, 0.0, literal.x,
+; EG-NEXT:     AND_INT * T1.W, PV.Y, literal.y,
+; EG-NEXT:    8(1.121039e-44), -65536(nan)
+; EG-NEXT:     AND_INT * T0.W, PV.W, literal.x,
+; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT:     OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT:     MOV * T21.X, PV.W,
+; EG-NEXT:     MOV T0.Y, PV.X,
+; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.x,
+; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT:     LSHR T39.X, PV.W, literal.x,
+; EG-NEXT:     LSHR * T40.X, KC0[2].Y, literal.x,
+; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
+; EG-NEXT:     ASHR T0.W, T35.W, literal.x,
+; EG-NEXT:     ADD_INT * T1.W, KC0[2].Y, literal.y,
+; EG-NEXT:    24(3.363116e-44), 48(6.726233e-44)
+; EG-NEXT:     LSHR T41.X, PS, literal.x,
+; EG-NEXT:     AND_INT T0.Z, T0.Y, literal.y,
+; EG-NEXT:     LSHL T0.W, PV.W, literal.z,
+; EG-NEXT:     ADD_INT * T1.W, KC0[2].Y, literal.w,
+; EG-NEXT:    2(2.802597e-45), 65535(9.183409e-41)
+; EG-NEXT:    16(2.242078e-44), 32(4.484155e-44)
+; EG-NEXT:     LSHR T42.X, PS, literal.x,
+; EG-NEXT:     OR_INT * T35.W, PV.Z, PV.W,
+; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
+; EG-NEXT:     MOV T21.X, PV.W,
+; EG-NEXT:     MOV * T36.X, T16.X,
+; EG-NEXT:     MOV * T36.Z, T12.X,
+; EG-NEXT:     MOV T37.X, T8.X,
+; EG-NEXT:     MOV T37.Z, T4.X, BS:VEC_120/SCL_212
+; EG-NEXT:     MOV * T38.X, T32.X,
+; EG-NEXT:     MOV * T38.Z, T28.X,
+; EG-NEXT:     MOV T35.X, T24.X,
+; EG-NEXT:     MOV * T35.Z, T20.X, BS:VEC_120/SCL_212
 ;
 ; GFX12-LABEL: constant_sextload_v32i8_to_v32i16:
 ; GFX12:       ; %bb.0:
diff --git a/llvm/test/CodeGen/AMDGPU/load-global-i16.ll b/llvm/test/CodeGen/AMDGPU/load-global-i16.ll
index 3753737d251e4..ff5b9aadc87fb 100644
--- a/llvm/test/CodeGen/AMDGPU/load-global-i16.ll
+++ b/llvm/test/CodeGen/AMDGPU/load-global-i16.ll
@@ -263,63 +263,74 @@ define amdgpu_kernel void @global_load_v3i16(ptr addrspace(1) %out, ptr addrspac
 ;
 ; EG-LABEL: global_load_v3i16:
 ; EG:       ; %bb.0: ; %entry
-; EG-NEXT:    ALU 0, @10, KC0[CB0:0-32], KC1[]
-; EG-NEXT:    TEX 1 @6
-; EG-NEXT:    ALU 14, @11, KC0[CB0:0-32], KC1[]
-; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T1.X, T3.X, 0
-; EG-NEXT:    MEM_RAT MSKOR T2.XW, T0.X
+; EG-NEXT:    ALU 0, @12, KC0[CB0:0-32], KC1[]
+; EG-NEXT:    TEX 2 @6
+; EG-NEXT:    ALU 19, @13, KC0[CB0:0-32], KC1[]
+; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T6.X, T7.X, 0
+; EG-NEXT:    MEM_RAT MSKOR T5.XW, T8.X
 ; EG-NEXT:    CF_END
 ; EG-NEXT:    Fetch clause starting at 6:
-; EG-NEXT:     VTX_READ_16 T1.X, T0.X, 0, #1
-; EG-NEXT:     VTX_READ_16 T0.X, T0.X, 4, #1
-; EG-NEXT:    ALU clause starting at 10:
-; EG-NEXT:     MOV * T0.X, KC0[2].Z,
-; EG-NEXT:    ALU clause starting at 11:
+; EG-NEXT:     VTX_READ_16 T6.X, T5.X, 0, #1
+; EG-NEXT:     VTX_READ_16 T7.X, T5.X, 2, #1
+; EG-NEXT:     VTX_READ_16 T5.X, T5.X, 4, #1
+; EG-NEXT:    ALU clause starting at 12:
+; EG-NEXT:     MOV * T5.X, KC0[2].Z,
+; EG-NEXT:    ALU clause starting at 13:
 ; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.x,
 ; EG-NEXT:    4(5.605194e-45), 0(0.000000e+00)
 ; EG-NEXT:     AND_INT T1.W, PV.W, literal.x,
-; EG-NEXT:     AND_INT * T2.W, T0.X, literal.y,
+; EG-NEXT:     AND_INT * T2.W, T5.X, literal.y,
 ; EG-NEXT:    3(4.203895e-45), 65535(9.183409e-41)
 ; EG-NEXT:     LSHL * T1.W, PV.W, literal.x,
 ; EG-NEXT:    3(4.203895e-45), 0(0.000000e+00)
-; EG-NEXT:     LSHL T2.X, T2.W, PV.W,
-; EG-NEXT:     LSHL * T2.W, literal.x, PV.W,
+; EG-NEXT:     LSHL T5.X, T2.W, PV.W,
+; EG-NEXT:     LSHL * T5.W, literal.x, PV.W,
+; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT:     MOV T5.Y, 0.0,
+; EG-NEXT:     MOV * T5.Z, 0.0,
+; EG-NEXT:     LSHR T8.X, T0.W, literal.x,
+; EG-NEXT:     LSHL T0.W, T7.X, literal.y,
+; EG-NEXT:     AND_INT * T1.W, T6.X, literal.z,
+; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
 ; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
-; EG-NEXT:     MOV T2.Y, 0.0,
-; EG-NEXT:     MOV * T2.Z, 0.0,
-; EG-NEXT:     LSHR T0.X, T0.W, literal.x,
-; EG-NEXT:     LSHR * T3.X, KC0[2].Y, literal.x,
+; EG-NEXT:     OR_INT T6.X, PV.W, PS,
+; EG-NEXT:     LSHR * T7.X, KC0[2].Y, literal.x,
 ; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
 ;
 ; CM-LABEL: global_load_v3i16:
 ; CM:       ; %bb.0: ; %entry
-; CM-NEXT:    ALU 0, @10, KC0[CB0:0-32], KC1[]
-; CM-NEXT:    TEX 1 @6
-; CM-NEXT:    ALU 15, @11, KC0[CB0:0-32], KC1[]
-; CM-NEXT:    MEM_RAT MSKOR T2.XW, T3.X
-; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T1.X, T0.X
+; CM-NEXT:    ALU 0, @12, KC0[CB0:0-32], KC1[]
+; CM-NEXT:    TEX 2 @6
+; CM-NEXT:    ALU 19, @13, KC0[CB0:0-32], KC1[]
+; CM-NEXT:    MEM_RAT MSKOR T5.XW, T8.X
+; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T6.X, T7.X
 ; CM-NEXT:    CF_END
 ; CM-NEXT:    Fetch clause starting at 6:
-; CM-NEXT:     VTX_READ_16 T1.X, T0.X, 0, #1
-; CM-NEXT:     VTX_READ_16 T0.X, T0.X, 4, #1
-; CM-NEXT:    ALU clause starting at 10:
-; CM-NEXT:     MOV * T0.X, KC0[2].Z,
-; CM-NEXT:    ALU clause starting at 11:
+; CM-NEXT:     VTX_READ_16 T6.X, T5.X, 0, #1
+; CM-NEXT:     VTX_READ_16 T7.X, T5.X, 2, #1
+; CM-NEXT:     VTX_READ_16 T5.X, T5.X, 4, #1
+; CM-NEXT:    ALU clause starting at 12:
+; CM-NEXT:     MOV * T5.X, KC0[2].Z,
+; CM-NEXT:    ALU clause starting at 13:
 ; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.x,
 ; CM-NEXT:    4(5.605194e-45), 0(0.000000e+00)
 ; CM-NEXT:     AND_INT * T1.W, PV.W, literal.x,
 ; CM-NEXT:    3(4.203895e-45), 0(0.000000e+00)
-; CM-NEXT:     AND_INT T0.Z, T0.X, literal.x,
+; CM-NEXT:     AND_INT T0.Z, T5.X, literal.x,
 ; CM-NEXT:     LSHL * T1.W, PV.W, literal.y,
 ; CM-NEXT:    65535(9.183409e-41), 3(4.203895e-45)
-; CM-NEXT:     LSHL T2.X, PV.Z, PV.W,
-; CM-NEXT:     LSHL * T2.W, literal.x, PV.W,
+; CM-NEXT:     LSHL T5.X, PV.Z, PV.W,
+; CM-NEXT:     LSHL * T5.W, literal.x, PV.W,
 ; CM-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
-; CM-NEXT:     MOV T2.Y, 0.0,
-; CM-NEXT:     MOV * T2.Z, 0.0,
-; CM-NEXT:     LSHR * T0.X, KC0[2].Y, literal.x,
+; CM-NEXT:     MOV T5.Y, 0.0,
+; CM-NEXT:     MOV * T5.Z, 0.0,
+; CM-NEXT:     LSHL T0.Z, T7.X, literal.x,
+; CM-NEXT:     AND_INT * T1.W, T6.X, literal.y, BS:VEC_120/SCL_212
+; CM-NEXT:    16(2.242078e-44), 65535(9.183409e-41)
+; CM-NEXT:     OR_INT * T6.X, PV.Z, PV.W,
+; CM-NEXT:     LSHR * T7.X, KC0[2].Y, literal.x,
 ; CM-NEXT:    2(2.802597e-45), 0(0.000000e+00)
-; CM-NEXT:     LSHR * T3.X, T0.W, literal.x,
+; CM-NEXT:     LSHR * T8.X, T0.W, literal.x,
 ; CM-NEXT:    2(2.802597e-45), 0(0.000000e+00)
 entry:
   %ld = load <3 x i16>, ptr addrspace(1) %in
diff --git a/llvm/test/CodeGen/AMDGPU/load-global-i8.ll b/llvm/test/CodeGen/AMDGPU/load-global-i8.ll
index 5bc02c4d63181..6a39df95f6aba 100644
--- a/llvm/test/CodeGen/AMDGPU/load-global-i8.ll
+++ b/llvm/test/CodeGen/AMDGPU/load-global-i8.ll
@@ -9887,46 +9887,97 @@ define amdgpu_kernel void @global_zextload_v4i8_to_v4i16(ptr addrspace(1) %out,
 ;
 ; EG-LABEL: global_zextload_v4i8_to_v4i16:
 ; EG:       ; %bb.0:
-; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
+; EG-NEXT:    ALU 1, @8, KC0[CB0:0-32], KC1[]
 ; EG-NEXT:    TEX 0 @6
-; EG-NEXT:    ALU 6, @9, KC0[CB0:0-32], KC1[]
-; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T4.XY, T5.X, 1
+; EG-NEXT:    ALU 31, @10, KC0[CB0:0-32], KC1[]
+; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T8.XY, T7.X, 1
 ; EG-NEXT:    CF_END
 ; EG-NEXT:    PAD
 ; EG-NEXT:    Fetch clause starting at 6:
-; EG-NEXT:     VTX_READ_32 T4.X, T4.X, 0, #1
+; EG-NEXT:     VTX_READ_32 T7.X, T7.X, 0, #1
 ; EG-NEXT:    ALU clause starting at 8:
-; EG-NEXT:     MOV * T4.X, KC0[2].Z,
-; EG-NEXT:    ALU clause starting at 9:
+; EG-NEXT:     MOV * T0.Y, T4.X,
+; EG-NEXT:     MOV * T7.X, KC0[2].Z,
+; EG-NEXT:    ALU clause starting at 10:
+; EG-NEXT:     AND_INT T0.W, T7.X, literal.x,
+; EG-NEXT:     AND_INT * T1.W, T0.Y, literal.y,
+; EG-NEXT:    255(3.573311e-43), -65536(nan)
+; EG-NEXT:     OR_INT * T0.W, PS, PV.W,
+; EG-NEXT:     MOV * T4.X, PV.W,
+; EG-NEXT:     MOV T0.Y, PV.X,
+; EG-NEXT:     LSHL * T0.W, T7.X, literal.x,
+; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT:     AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT:     AND_INT * T0.W, PV.W, literal.y,
+; EG-NEXT:    65535(9.183409e-41), 16711680(2.341805e-38)
+; EG-NEXT:     OR_INT * T0.W, PV.W, PS,
+; EG-NEXT:     MOV T4.X, PV.W,
+; EG-NEXT:     MOV T0.Y, T5.X,
 ; EG-NEXT:     MOV * T0.W, literal.x,
 ; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
-; EG-NEXT:     BFE_UINT * T4.Y, T4.X, literal.x, PV.W,
+; EG-NEXT:     BFE_UINT T0.W, T7.X, literal.x, PV.W,
+; EG-NEXT:     AND_INT * T1.W, PV.Y, literal.y,
+; EG-NEXT:    16(2.242078e-44), -65536(nan)
+; EG-NEXT:     OR_INT * T0.W, PS, PV.W,
+; EG-NEXT:     MOV * T5.X, PV.W,
+; EG-NEXT:     MOV T0.Y, PV.X,
+; EG-NEXT:     LSHR * T0.W, T7.X, literal.x,
 ; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
-; EG-NEXT:     AND_INT T4.X, T4.X, literal.x,
-; EG-NEXT:     LSHR * T5.X, KC0[2].Y, literal.y,
-; EG-NEXT:    255(3.573311e-43), 2(2.802597e-45)
+; EG-NEXT:     AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT:     AND_INT * T0.W, PV.W, literal.y,
+; EG-NEXT:    65535(9.183409e-41), 16711680(2.341805e-38)
+; EG-NEXT:     LSHR T7.X, KC0[2].Y, literal.x,
+; EG-NEXT:     OR_INT * T8.Y, PV.W, PS,
+; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
+; EG-NEXT:     MOV T5.X, PV.Y,
+; EG-NEXT:     MOV * T8.X, T4.X,
 ;
 ; CM-LABEL: global_zextload_v4i8_to_v4i16:
 ; CM:       ; %bb.0:
-; CM-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
+; CM-NEXT:    ALU 1, @8, KC0[CB0:0-32], KC1[]
 ; CM-NEXT:    TEX 0 @6
-; CM-NEXT:    ALU 7, @9, KC0[CB0:0-32], KC1[]
-; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T4, T5.X
+; CM-NEXT:    ALU 31, @10, KC0[CB0:0-32], KC1[]
+; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T8, T7.X
 ; CM-NEXT:    CF_END
 ; CM-NEXT:    PAD
 ; CM-NEXT:    Fetch clause starting at 6:
-; CM-NEXT:     VTX_READ_32 T4.X, T4.X, 0, #1
+; CM-NEXT:     VTX_READ_32 T7.X, T7.X, 0, #1
 ; CM-NEXT:    ALU clause starting at 8:
-; CM-NEXT:     MOV * T4.X, KC0[2].Z,
-; CM-NEXT:    ALU clause starting at 9:
+; CM-NEXT:     MOV * T0.Y, T4.X,
+; CM-NEXT:     MOV * T7.X, KC0[2].Z,
+; CM-NEXT:    ALU clause starting at 10:
+; CM-NEXT:     AND_INT T0.Z, T7.X, literal.x,
+; CM-NEXT:     AND_INT * T0.W, T0.Y, literal.y,
+; CM-NEXT:    255(3.573311e-43), -65536(nan)
+; CM-NEXT:     OR_INT * T0.W, PV.W, PV.Z,
+; CM-NEXT:     MOV * T4.X, PV.W,
+; CM-NEXT:     MOV T0.Y, PV.X,
+; CM-NEXT:     LSHL * T0.W, T7.X, literal.x,
+; CM-NEXT:    8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT:     AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT:     AND_INT * T0.W, PV.W, literal.y,
+; CM-NEXT:    65535(9.183409e-41), 16711680(2.341805e-38)
+; CM-NEXT:     OR_INT * T0.W, PV.Z, PV.W,
+; CM-NEXT:     MOV T4.X, PV.W,
+; CM-NEXT:     MOV T0.Y, T5.X,
 ; CM-NEXT:     MOV * T0.W, literal.x,
 ; CM-NEXT:    8(1.121039e-44), 0(0.000000e+00)
-; CM-NEXT:     BFE_UINT * T4.Y, T4.X, literal.x, PV.W,
+; CM-NEXT:     AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT:     BFE_UINT * T0.W, T7.X, literal.y, PV.W,
+; CM-NEXT:    -65536(nan), 16(2.242078e-44)
+; CM-NEXT:     OR_INT * T0.W, PV.Z, PV.W,
+; CM-NEXT:     MOV * T5.X, PV.W,
+; CM-NEXT:     MOV T0.Y, PV.X,
+; CM-NEXT:     LSHR * T0.W, T7.X, literal.x,
 ; CM-NEXT:    8(1.121039e-44), 0(0.000000e+00)
-; CM-NEXT:     AND_INT * T4.X, T4.X, literal.x,
-; CM-NEXT:    255(3.573311e-43), 0(0.000000e+00)
-; CM-NEXT:     LSHR * T5.X, KC0[2].Y, literal.x,
+; CM-NEXT:     AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT:     AND_INT * T0.W, PV.W, literal.y,
+; CM-NEXT:    65535(9.183409e-41), 16711680(2.341805e-38)
+; CM-NEXT:     LSHR T7.X, KC0[2].Y, literal.x,
+; CM-NEXT:     OR_INT * T8.Y, PV.Z, PV.W,
 ; CM-NEXT:    2(2.802597e-45), 0(0.000000e+00)
+; CM-NEXT:     MOV * T5.X, PV.Y,
+; CM-NEXT:     MOV * T8.X, T4.X,
   %load = load <4 x i8>, ptr addrspace(1) %in
   %ext = zext <4 x i8> %load to <4 x i16>
   store <4 x i16> %ext, ptr addrspace(1) %out
@@ -10017,43 +10068,109 @@ define amdgpu_kernel void @global_sextload_v4i8_to_v4i16(ptr addrspace(1) %out,
 ;
 ; EG-LABEL: global_sextload_v4i8_to_v4i16:
 ; EG:       ; %bb.0:
-; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
+; EG-NEXT:    ALU 1, @8, KC0[CB0:0-32], KC1[]
 ; EG-NEXT:    TEX 0 @6
-; EG-NEXT:    ALU 5, @9, KC0[CB0:0-32], KC1[]
-; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T5.XY, T4.X, 1
+; EG-NEXT:    ALU 37, @10, KC0[CB0:0-32], KC1[]
+; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T8.XY, T7.X, 1
 ; EG-NEXT:    CF_END
 ; EG-NEXT:    PAD
 ; EG-NEXT:    Fetch clause starting at 6:
-; EG-NEXT:     VTX_READ_32 T4.X, T4.X, 0, #1
+; EG-NEXT:     VTX_READ_32 T7.X, T7.X, 0, #1
 ; EG-NEXT:    ALU clause starting at 8:
-; EG-NEXT:     MOV * T4.X, KC0[2].Z,
-; EG-NEXT:    ALU clause starting at 9:
-; EG-NEXT:     BFE_INT T5.X, T4.X, 0.0, literal.x,
-; EG-NEXT:     LSHR T0.W, T4.X, literal.x,
-; EG-NEXT:     LSHR * T4.X, KC0[2].Y, literal.y,
-; EG-NEXT:    8(1.121039e-44), 2(2.802597e-45)
-; EG-NEXT:     BFE_INT * T5.Y, PV.W, 0.0, literal.x,
+; EG-NEXT:     MOV * T0.Y, T4.X,
+; EG-NEXT:     MOV * T7.X, KC0[2].Z,
+; EG-NEXT:    ALU clause starting at 10:
+; EG-NEXT:     BFE_INT * T0.W, T7.X, 0.0, literal.x,
+; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT:     AND_INT T0.W, PV.W, literal.x,
+; EG-NEXT:     AND_INT * T1.W, T0.Y, literal.y,
+; EG-NEXT:    65535(9.183409e-41), -65536(nan)
+; EG-NEXT:     OR_INT * T0.W, PS, PV.W,
+; EG-NEXT:     MOV * T4.X, PV.W,
+; EG-NEXT:     MOV T0.Y, PV.X,
+; EG-NEXT:     LSHR * T0.W, T7.X, literal.x,
 ; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT:     BFE_INT T0.W, PV.W, 0.0, literal.x,
+; EG-NEXT:     AND_INT * T1.W, PV.Y, literal.y,
+; EG-NEXT:    8(1.121039e-44), 65535(9.183409e-41)
+; EG-NEXT:     LSHL * T0.W, PV.W, literal.x,
+; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT:     OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT:     MOV T4.X, PV.W,
+; EG-NEXT:     MOV T0.Y, T5.X,
+; EG-NEXT:     LSHR * T0.W, T7.X, literal.x, BS:VEC_120/SCL_212
+; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT:     BFE_INT T0.W, PV.W, 0.0, literal.x,
+; EG-NEXT:     AND_INT * T1.W, PV.Y, literal.y,
+; EG-NEXT:    8(1.121039e-44), -65536(nan)
+; EG-NEXT:     AND_INT * T0.W, PV.W, literal.x,
+; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT:     OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT:     MOV * T5.X, PV.W,
+; EG-NEXT:     MOV T0.Y, PV.X,
+; EG-NEXT:     ASHR * T0.W, T7.X, literal.x,
+; EG-NEXT:    24(3.363116e-44), 0(0.000000e+00)
+; EG-NEXT:     AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT:     LSHL * T0.W, PV.W, literal.y,
+; EG-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
+; EG-NEXT:     LSHR T7.X, KC0[2].Y, literal.x,
+; EG-NEXT:     OR_INT * T8.Y, PV.W, PS,
+; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
+; EG-NEXT:     MOV T5.X, PV.Y,
+; EG-NEXT:     MOV * T8.X, T4.X,
 ;
 ; CM-LABEL: global_sextload_v4i8_to_v4i16:
 ; CM:       ; %bb.0:
-; CM-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
+; CM-NEXT:    ALU 1, @8, KC0[CB0:0-32], KC1[]
 ; CM-NEXT:    TEX 0 @6
-; CM-NEXT:    ALU 5, @9, KC0[CB0:0-32], KC1[]
-; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T5, T4.X
+; CM-NEXT:    ALU 37, @10, KC0[CB0:0-32], KC1[]
+; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T8, T7.X
 ; CM-NEXT:    CF_END
 ; CM-NEXT:    PAD
 ; CM-NEXT:    Fetch clause starting at 6:
-; CM-NEXT:     VTX_READ_32 T4.X, T4.X, 0, #1
+; CM-NEXT:     VTX_READ_32 T7.X, T7.X, 0, #1
 ; CM-NEXT:    ALU clause starting at 8:
-; CM-NEXT:     MOV * T4.X, KC0[2].Z,
-; CM-NEXT:    ALU clause starting at 9:
-; CM-NEXT:     BFE_INT T5.X, T4.X, 0.0, literal.x,
-; CM-NEXT:     LSHR * T0.W, T4.X, literal.x,
+; CM-NEXT:     MOV * T0.Y, T4.X,
+; CM-NEXT:     MOV * T7.X, KC0[2].Z,
+; CM-NEXT:    ALU clause starting at 10:
+; CM-NEXT:     BFE_INT * T0.W, T7.X, 0.0, literal.x,
 ; CM-NEXT:    8(1.121039e-44), 0(0.000000e+00)
-; CM-NEXT:     LSHR T4.X, KC0[2].Y, literal.x,
-; CM-NEXT:     BFE_INT * T5.Y, PV.W, 0.0, literal.y,
-; CM-NEXT:    2(2.802597e-45), 8(1.121039e-44)
+; CM-NEXT:     AND_INT T0.Z, PV.W, literal.x,
+; CM-NEXT:     AND_INT * T0.W, T0.Y, literal.y,
+; CM-NEXT:    65535(9.183409e-41), -65536(nan)
+; CM-NEXT:     OR_INT * T0.W, PV.W, PV.Z,
+; CM-NEXT:     MOV * T4.X, PV.W,
+; CM-NEXT:     MOV T0.Y, PV.X,
+; CM-NEXT:     LSHR * T0.W, T7.X, literal.x,
+; CM-NEXT:    8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT:     BFE_INT * T0.W, PV.W, 0.0, literal.x,
+; CM-NEXT:    8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT:     AND_INT T0.Z, T0.Y, literal.x,
+; CM-NEXT:     LSHL * T0.W, PV.W, literal.y,
+; CM-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
+; CM-NEXT:     OR_INT * T0.W, PV.Z, PV.W,
+; CM-NEXT:     MOV T4.X, PV.W,
+; CM-NEXT:     MOV T0.Y, T5.X,
+; CM-NEXT:     LSHR * T0.W, T7.X, literal.x, BS:VEC_120/SCL_212
+; CM-NEXT:    16(2.242078e-44), 0(0.000000e+00)
+; CM-NEXT:     BFE_INT * T0.W, PV.W, 0.0, literal.x,
+; CM-NEXT:    8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT:     AND_INT T0.Z, T0.Y, literal.x,
+; CM-NEXT:     AND_INT * T0.W, PV.W, literal.y,
+; CM-NEXT:    -65536(nan), 65535(9.183409e-41)
+; CM-NEXT:     OR_INT * T0.W, PV.Z, PV.W,
+; CM-NEXT:     MOV * T5.X, PV.W,
+; CM-NEXT:     MOV T0.Y, PV.X,
+; CM-NEXT:     ASHR * T0.W, T7.X, literal.x,
+; CM-NEXT:    24(3.363116e-44), 0(0.000000e+00)
+; CM-NEXT:     AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT:     LSHL * T0.W, PV.W, literal.y,
+; CM-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
+; CM-NEXT:     LSHR T7.X, KC0[2].Y, literal.x,
+; CM-NEXT:     OR_INT * T8.Y, PV.Z, PV.W,
+; CM-NEXT:    2(2.802597e-45), 0(0.000000e+00)
+; CM-NEXT:     MOV * T5.X, PV.Y,
+; CM-NEXT:     MOV * T8.X, T4.X,
   %load = load <4 x i8>, ptr addrspace(1) %in
   %ext = sext <4 x i8> %load to <4 x i16>
   store <4 x i16> %ext, ptr addrspace(1) %out
@@ -10158,52 +10275,156 @@ define amdgpu_kernel void @global_zextload_v8i8_to_v8i16(ptr addrspace(1) %out,
 ;
 ; EG-LABEL: global_zextload_v8i8_to_v8i16:
 ; EG:       ; %bb.0:
-; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
+; EG-NEXT:    ALU 1, @8, KC0[CB0:0-32], KC1[]
 ; EG-NEXT:    TEX 0 @6
-; EG-NEXT:    ALU 9, @9, KC0[CB0:0-32], KC1[]
-; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T6.XYZW, T5.X, 1
+; EG-NEXT:    ALU 61, @10, KC0[CB0:0-32], KC1[]
+; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T12.XYZW, T11.X, 1
 ; EG-NEXT:    CF_END
 ; EG-NEXT:    PAD
 ; EG-NEXT:    Fetch clause starting at 6:
-; EG-NEXT:     VTX_READ_64 T5.XY, T5.X, 0, #1
+; EG-NEXT:     VTX_READ_64 T11.XY, T11.X, 0, #1
 ; EG-NEXT:    ALU clause starting at 8:
-; EG-NEXT:     MOV * T5.X, KC0[2].Z,
-; EG-NEXT:    ALU clause starting at 9:
+; EG-NEXT:     MOV * T0.Y, T8.X,
+; EG-NEXT:     MOV * T11.X, KC0[2].Z,
+; EG-NEXT:    ALU clause starting at 10:
+; EG-NEXT:     AND_INT T0.W, T11.X, literal.x,
+; EG-NEXT:     AND_INT * T1.W, T0.Y, literal.y,
+; EG-NEXT:    255(3.573311e-43), -65536(nan)
+; EG-NEXT:     OR_INT * T0.W, PS, PV.W,
+; EG-NEXT:     MOV * T8.X, PV.W,
+; EG-NEXT:     MOV T0.Y, PV.X,
+; EG-NEXT:     LSHL * T0.W, T11.X, literal.x,
+; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT:     AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT:     AND_INT * T0.W, PV.W, literal.y,
+; EG-NEXT:    65535(9.183409e-41), 16711680(2.341805e-38)
+; EG-NEXT:     OR_INT * T0.W, PV.W, PS,
+; EG-NEXT:     MOV T8.X, PV.W,
+; EG-NEXT:     MOV T0.Y, T9.X,
 ; EG-NEXT:     MOV * T0.W, literal.x,
 ; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
-; EG-NEXT:     BFE_UINT * T6.W, T5.Y, literal.x, PV.W,
+; EG-NEXT:     BFE_UINT T1.W, T11.X, literal.x, PV.W,
+; EG-NEXT:     AND_INT * T2.W, PV.Y, literal.y,
+; EG-NEXT:    16(2.242078e-44), -65536(nan)
+; EG-NEXT:     OR_INT * T1.W, PS, PV.W,
+; EG-NEXT:     MOV * T9.X, PV.W,
+; EG-NEXT:     MOV T0.Y, PV.X,
+; EG-NEXT:     LSHR * T1.W, T11.X, literal.x,
 ; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
-; EG-NEXT:     BFE_UINT T6.Y, T5.X, literal.x, T0.W,
-; EG-NEXT:     AND_INT * T6.Z, T5.Y, literal.y,
-; EG-NEXT:    8(1.121039e-44), 255(3.573311e-43)
-; EG-NEXT:     AND_INT T6.X, T5.X, literal.x,
-; EG-NEXT:     LSHR * T5.X, KC0[2].Y, literal.y,
-; EG-NEXT:    255(3.573311e-43), 2(2.802597e-45)
+; EG-NEXT:     AND_INT T2.W, PV.Y, literal.x,
+; EG-NEXT:     AND_INT * T1.W, PV.W, literal.y,
+; EG-NEXT:    65535(9.183409e-41), 16711680(2.341805e-38)
+; EG-NEXT:     OR_INT * T12.Y, PV.W, PS,
+; EG-NEXT:     MOV T9.X, PV.Y,
+; EG-NEXT:     MOV * T0.Y, T4.X,
+; EG-NEXT:     AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT:     AND_INT * T2.W, T11.Y, literal.y,
+; EG-NEXT:    -65536(nan), 255(3.573311e-43)
+; EG-NEXT:     OR_INT * T1.W, PV.W, PS,
+; EG-NEXT:     MOV * T4.X, PV.W,
+; EG-NEXT:     MOV T0.Y, PV.X,
+; EG-NEXT:     LSHL * T1.W, T11.Y, literal.x,
+; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT:     AND_INT T2.W, PV.Y, literal.x,
+; EG-NEXT:     AND_INT * T1.W, PV.W, literal.y,
+; EG-NEXT:    65535(9.183409e-41), 16711680(2.341805e-38)
+; EG-NEXT:     OR_INT * T1.W, PV.W, PS,
+; EG-NEXT:     MOV T4.X, PV.W,
+; EG-NEXT:     MOV T0.Y, T5.X,
+; EG-NEXT:     BFE_UINT * T0.W, T11.Y, literal.x, T0.W,
+; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT:     AND_INT * T1.W, PV.Y, literal.x,
+; EG-NEXT:    -65536(nan), 0(0.000000e+00)
+; EG-NEXT:     OR_INT * T0.W, PV.W, T0.W,
+; EG-NEXT:     MOV * T5.X, PV.W,
+; EG-NEXT:     MOV T0.Y, PV.X,
+; EG-NEXT:     LSHR * T0.W, T11.Y, literal.x,
+; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT:     AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT:     AND_INT * T0.W, PV.W, literal.y,
+; EG-NEXT:    65535(9.183409e-41), 16711680(2.341805e-38)
+; EG-NEXT:     LSHR T11.X, KC0[2].Y, literal.x,
+; EG-NEXT:     OR_INT * T12.W, PV.W, PS,
+; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
+; EG-NEXT:     MOV T5.X, PV.W,
+; EG-NEXT:     MOV * T12.X, T8.X,
+; EG-NEXT:     MOV * T12.Z, T4.X,
 ;
 ; CM-LABEL: global_zextload_v8i8_to_v8i16:
 ; CM:       ; %bb.0:
-; CM-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
+; CM-NEXT:    ALU 1, @8, KC0[CB0:0-32], KC1[]
 ; CM-NEXT:    TEX 0 @6
-; CM-NEXT:    ALU 10, @9, KC0[CB0:0-32], KC1[]
-; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T6, T5.X
+; CM-NEXT:    ALU 60, @10, KC0[CB0:0-32], KC1[]
+; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T12, T11.X
 ; CM-NEXT:    CF_END
 ; CM-NEXT:    PAD
 ; CM-NEXT:    Fetch clause starting at 6:
-; CM-NEXT:     VTX_READ_64 T5.XY, T5.X, 0, #1
+; CM-NEXT:     VTX_READ_64 T11.XY, T11.X, 0, #1
 ; CM-NEXT:    ALU clause starting at 8:
-; CM-NEXT:     MOV * T5.X, KC0[2].Z,
-; CM-NEXT:    ALU clause starting at 9:
+; CM-NEXT:     MOV * T0.Y, T8.X,
+; CM-NEXT:     MOV * T11.X, KC0[2].Z,
+; CM-NEXT:    ALU clause starting at 10:
+; CM-NEXT:     AND_INT T0.Z, T11.X, literal.x,
+; CM-NEXT:     AND_INT * T0.W, T0.Y, literal.y,
+; CM-NEXT:    255(3.573311e-43), -65536(nan)
+; CM-NEXT:     OR_INT * T0.W, PV.W, PV.Z,
+; CM-NEXT:     MOV * T8.X, PV.W,
+; CM-NEXT:     MOV T0.Y, PV.X,
+; CM-NEXT:     LSHL * T0.W, T11.X, literal.x,
+; CM-NEXT:    8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT:     AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT:     AND_INT * T0.W, PV.W, literal.y,
+; CM-NEXT:    65535(9.183409e-41), 16711680(2.341805e-38)
+; CM-NEXT:     OR_INT * T0.W, PV.Z, PV.W,
+; CM-NEXT:     MOV T8.X, PV.W,
+; CM-NEXT:     MOV T0.Y, T9.X,
 ; CM-NEXT:     MOV * T0.W, literal.x,
 ; CM-NEXT:    8(1.121039e-44), 0(0.000000e+00)
-; CM-NEXT:     BFE_UINT * T6.W, T5.Y, literal.x, PV.W,
+; CM-NEXT:     AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT:     BFE_UINT * T1.W, T11.X, literal.y, PV.W,
+; CM-NEXT:    -65536(nan), 16(2.242078e-44)
+; CM-NEXT:     OR_INT * T1.W, PV.Z, PV.W,
+; CM-NEXT:     MOV * T9.X, PV.W,
+; CM-NEXT:     MOV T0.Y, PV.X,
+; CM-NEXT:     LSHR * T1.W, T11.X, literal.x,
 ; CM-NEXT:    8(1.121039e-44), 0(0.000000e+00)
-; CM-NEXT:     BFE_UINT T6.Y, T5.X, literal.x, T0.W,
-; CM-NEXT:     AND_INT * T6.Z, T5.Y, literal.y,
-; CM-NEXT:    8(1.121039e-44), 255(3.573311e-43)
-; CM-NEXT:     AND_INT * T6.X, T5.X, literal.x,
-; CM-NEXT:    255(3.573311e-43), 0(0.000000e+00)
-; CM-NEXT:     LSHR * T5.X, KC0[2].Y, literal.x,
+; CM-NEXT:     AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT:     AND_INT * T1.W, PV.W, literal.y,
+; CM-NEXT:    65535(9.183409e-41), 16711680(2.341805e-38)
+; CM-NEXT:     OR_INT * T12.Y, PV.Z, PV.W,
+; CM-NEXT:     MOV T9.X, PV.Y,
+; CM-NEXT:     MOV * T0.Y, T4.X,
+; CM-NEXT:     AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT:     AND_INT * T1.W, T11.Y, literal.y,
+; CM-NEXT:    -65536(nan), 255(3.573311e-43)
+; CM-NEXT:     OR_INT * T1.W, PV.Z, PV.W,
+; CM-NEXT:     MOV * T4.X, PV.W,
+; CM-NEXT:     MOV T0.Y, PV.X,
+; CM-NEXT:     LSHL * T1.W, T11.Y, literal.x,
+; CM-NEXT:    8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT:     AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT:     AND_INT * T1.W, PV.W, literal.y,
+; CM-NEXT:    65535(9.183409e-41), 16711680(2.341805e-38)
+; CM-NEXT:     OR_INT * T1.W, PV.Z, PV.W,
+; CM-NEXT:     MOV T4.X, PV.W,
+; CM-NEXT:     MOV * T0.Y, T5.X,
+; CM-NEXT:     AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT:     BFE_UINT * T0.W, T11.Y, literal.y, T0.W,
+; CM-NEXT:    -65536(nan), 16(2.242078e-44)
+; CM-NEXT:     OR_INT * T0.W, PV.Z, PV.W,
+; CM-NEXT:     MOV * T5.X, PV.W,
+; CM-NEXT:     MOV T0.Y, PV.X,
+; CM-NEXT:     LSHR * T0.W, T11.Y, literal.x,
+; CM-NEXT:    8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT:     AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT:     AND_INT * T0.W, PV.W, literal.y,
+; CM-NEXT:    65535(9.183409e-41), 16711680(2.341805e-38)
+; CM-NEXT:     LSHR T11.X, KC0[2].Y, literal.x,
+; CM-NEXT:     OR_INT * T12.W, PV.Z, PV.W,
 ; CM-NEXT:    2(2.802597e-45), 0(0.000000e+00)
+; CM-NEXT:     MOV * T5.X, PV.W,
+; CM-NEXT:     MOV T12.X, T8.X,
+; CM-NEXT:     MOV * T12.Z, T4.X, BS:VEC_120/SCL_212
   %load = load <8 x i8>, ptr addrspace(1) %in
   %ext = zext <8 x i8> %load to <8 x i16>
   store <8 x i16> %ext, ptr addrspace(1) %out
@@ -10344,53 +10565,183 @@ define amdgpu_kernel void @global_sextload_v8i8_to_v8i16(ptr addrspace(1) %out,
 ;
 ; EG-LABEL: global_sextload_v8i8_to_v8i16:
 ; EG:       ; %bb.0:
-; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
+; EG-NEXT:    ALU 1, @8, KC0[CB0:0-32], KC1[]
 ; EG-NEXT:    TEX 0 @6
-; EG-NEXT:    ALU 10, @9, KC0[CB0:0-32], KC1[]
-; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T6.XYZW, T5.X, 1
+; EG-NEXT:    ALU 74, @10, KC0[CB0:0-32], KC1[]
+; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T12.XYZW, T11.X, 1
 ; EG-NEXT:    CF_END
 ; EG-NEXT:    PAD
 ; EG-NEXT:    Fetch clause starting at 6:
-; EG-NEXT:     VTX_READ_64 T5.XY, T5.X, 0, #1
+; EG-NEXT:     VTX_READ_64 T11.XY, T11.X, 0, #1
 ; EG-NEXT:    ALU clause starting at 8:
-; EG-NEXT:     MOV * T5.X, KC0[2].Z,
-; EG-NEXT:    ALU clause starting at 9:
-; EG-NEXT:     BFE_INT * T6.Z, T5.Y, 0.0, literal.x,
+; EG-NEXT:     MOV * T0.Y, T8.X,
+; EG-NEXT:     MOV * T11.X, KC0[2].Z,
+; EG-NEXT:    ALU clause starting at 10:
+; EG-NEXT:     BFE_INT * T0.W, T11.X, 0.0, literal.x,
 ; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
-; EG-NEXT:     BFE_INT T6.X, T5.X, 0.0, literal.x,
-; EG-NEXT:     LSHR * T0.W, T5.Y, literal.x,
+; EG-NEXT:     AND_INT T0.W, PV.W, literal.x,
+; EG-NEXT:     AND_INT * T1.W, T0.Y, literal.y,
+; EG-NEXT:    65535(9.183409e-41), -65536(nan)
+; EG-NEXT:     OR_INT * T0.W, PS, PV.W,
+; EG-NEXT:     MOV * T8.X, PV.W,
+; EG-NEXT:     MOV T0.Y, PV.X,
+; EG-NEXT:     LSHR * T0.W, T11.X, literal.x,
 ; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
-; EG-NEXT:     BFE_INT T6.W, PV.W, 0.0, literal.x,
-; EG-NEXT:     LSHR * T0.W, T5.X, literal.x,
+; EG-NEXT:     BFE_INT T0.W, PV.W, 0.0, literal.x,
+; EG-NEXT:     AND_INT * T1.W, PV.Y, literal.y,
+; EG-NEXT:    8(1.121039e-44), 65535(9.183409e-41)
+; EG-NEXT:     LSHL * T0.W, PV.W, literal.x,
+; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT:     OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT:     MOV T8.X, PV.W,
+; EG-NEXT:     MOV T0.Y, T9.X,
+; EG-NEXT:     LSHR * T0.W, T11.X, literal.x, BS:VEC_120/SCL_212
+; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT:     BFE_INT T0.W, PV.W, 0.0, literal.x,
+; EG-NEXT:     AND_INT * T1.W, PV.Y, literal.y,
+; EG-NEXT:    8(1.121039e-44), -65536(nan)
+; EG-NEXT:     AND_INT * T0.W, PV.W, literal.x,
+; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT:     OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT:     MOV * T9.X, PV.W,
+; EG-NEXT:     MOV T0.Y, PV.X,
+; EG-NEXT:     ASHR * T0.W, T11.X, literal.x,
+; EG-NEXT:    24(3.363116e-44), 0(0.000000e+00)
+; EG-NEXT:     AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT:     LSHL * T0.W, PV.W, literal.y,
+; EG-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
+; EG-NEXT:     OR_INT * T12.Y, PV.W, PS,
+; EG-NEXT:     MOV T9.X, PV.Y,
+; EG-NEXT:     MOV T0.Y, T4.X,
+; EG-NEXT:     BFE_INT * T0.W, T11.Y, 0.0, literal.x,
 ; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
-; EG-NEXT:     LSHR T5.X, KC0[2].Y, literal.x,
-; EG-NEXT:     BFE_INT * T6.Y, PS, 0.0, literal.y,
-; EG-NEXT:    2(2.802597e-45), 8(1.121039e-44)
+; EG-NEXT:     AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT:     AND_INT * T0.W, PV.W, literal.y,
+; EG-NEXT:    -65536(nan), 65535(9.183409e-41)
+; EG-NEXT:     OR_INT * T0.W, PV.W, PS,
+; EG-NEXT:     MOV * T4.X, PV.W,
+; EG-NEXT:     MOV T0.Y, PV.X,
+; EG-NEXT:     LSHR * T0.W, T11.Y, literal.x,
+; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT:     BFE_INT T0.W, PV.W, 0.0, literal.x,
+; EG-NEXT:     AND_INT * T1.W, PV.Y, literal.y,
+; EG-NEXT:    8(1.121039e-44), 65535(9.183409e-41)
+; EG-NEXT:     LSHL * T0.W, PV.W, literal.x,
+; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT:     OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT:     MOV T4.X, PV.W,
+; EG-NEXT:     MOV T0.Y, T5.X,
+; EG-NEXT:     LSHR * T0.W, T11.Y, literal.x,
+; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT:     BFE_INT T0.W, PV.W, 0.0, literal.x,
+; EG-NEXT:     AND_INT * T1.W, PV.Y, literal.y,
+; EG-NEXT:    8(1.121039e-44), -65536(nan)
+; EG-NEXT:     AND_INT * T0.W, PV.W, literal.x,
+; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT:     OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT:     MOV * T5.X, PV.W,
+; EG-NEXT:     MOV T0.Y, PV.X,
+; EG-NEXT:     ASHR * T0.W, T11.Y, literal.x,
+; EG-NEXT:    24(3.363116e-44), 0(0.000000e+00)
+; EG-NEXT:     AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT:     LSHL * T0.W, PV.W, literal.y,
+; EG-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
+; EG-NEXT:     LSHR T11.X, KC0[2].Y, literal.x,
+; EG-NEXT:     OR_INT * T12.W, PV.W, PS,
+; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
+; EG-NEXT:     MOV T5.X, PV.W,
+; EG-NEXT:     MOV * T12.X, T8.X,
+; EG-NEXT:     MOV * T12.Z, T4.X,
 ;
 ; CM-LABEL: global_sextload_v8i8_to_v8i16:
 ; CM:       ; %bb.0:
-; CM-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
+; CM-NEXT:    ALU 1, @8, KC0[CB0:0-32], KC1[]
 ; CM-NEXT:    TEX 0 @6
-; CM-NEXT:    ALU 10, @9, KC0[CB0:0-32], KC1[]
-; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T6, T5.X
+; CM-NEXT:    ALU 74, @10, KC0[CB0:0-32], KC1[]
+; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T12, T11.X
 ; CM-NEXT:    CF_END
 ; CM-NEXT:    PAD
 ; CM-NEXT:    Fetch clause starting at 6:
-; CM-NEXT:     VTX_READ_64 T5.XY, T5.X, 0, #1
+; CM-NEXT:     VTX_READ_64 T11.XY, T11.X, 0, #1
 ; CM-NEXT:    ALU clause starting at 8:
-; CM-NEXT:     MOV * T5.X, KC0[2].Z,
-; CM-NEXT:    ALU clause starting at 9:
-; CM-NEXT:     BFE_INT * T6.Z, T5.Y, 0.0, literal.x,
+; CM-NEXT:     MOV * T0.Y, T8.X,
+; CM-NEXT:     MOV * T11.X, KC0[2].Z,
+; CM-NEXT:    ALU clause starting at 10:
+; CM-NEXT:     BFE_INT * T0.W, T11.X, 0.0, literal.x,
 ; CM-NEXT:    8(1.121039e-44), 0(0.000000e+00)
-; CM-NEXT:     BFE_INT T6.X, T5.X, 0.0, literal.x,
-; CM-NEXT:     LSHR * T0.W, T5.Y, literal.x,
+; CM-NEXT:     AND_INT T0.Z, PV.W, literal.x,
+; CM-NEXT:     AND_INT * T0.W, T0.Y, literal.y,
+; CM-NEXT:    65535(9.183409e-41), -65536(nan)
+; CM-NEXT:     OR_INT * T0.W, PV.W, PV.Z,
+; CM-NEXT:     MOV * T8.X, PV.W,
+; CM-NEXT:     MOV T0.Y, PV.X,
+; CM-NEXT:     LSHR * T0.W, T11.X, literal.x,
 ; CM-NEXT:    8(1.121039e-44), 0(0.000000e+00)
-; CM-NEXT:     LSHR T0.Z, T5.X, literal.x,
-; CM-NEXT:     BFE_INT * T6.W, PV.W, 0.0, literal.x,
+; CM-NEXT:     BFE_INT * T0.W, PV.W, 0.0, literal.x,
 ; CM-NEXT:    8(1.121039e-44), 0(0.000000e+00)
-; CM-NEXT:     LSHR T5.X, KC0[2].Y, literal.x,
-; CM-NEXT:     BFE_INT * T6.Y, PV.Z, 0.0, literal.y,
-; CM-NEXT:    2(2.802597e-45), 8(1.121039e-44)
+; CM-NEXT:     AND_INT T0.Z, T0.Y, literal.x,
+; CM-NEXT:     LSHL * T0.W, PV.W, literal.y,
+; CM-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
+; CM-NEXT:     OR_INT * T0.W, PV.Z, PV.W,
+; CM-NEXT:     MOV T8.X, PV.W,
+; CM-NEXT:     MOV T0.Y, T9.X,
+; CM-NEXT:     LSHR * T0.W, T11.X, literal.x, BS:VEC_120/SCL_212
+; CM-NEXT:    16(2.242078e-44), 0(0.000000e+00)
+; CM-NEXT:     BFE_INT * T0.W, PV.W, 0.0, literal.x,
+; CM-NEXT:    8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT:     AND_INT T0.Z, T0.Y, literal.x,
+; CM-NEXT:     AND_INT * T0.W, PV.W, literal.y,
+; CM-NEXT:    -65536(nan), 65535(9.183409e-41)
+; CM-NEXT:     OR_INT * T0.W, PV.Z, PV.W,
+; CM-NEXT:     MOV * T9.X, PV.W,
+; CM-NEXT:     MOV T0.Y, PV.X,
+; CM-NEXT:     ASHR * T0.W, T11.X, literal.x,
+; CM-NEXT:    24(3.363116e-44), 0(0.000000e+00)
+; CM-NEXT:     AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT:     LSHL * T0.W, PV.W, literal.y,
+; CM-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
+; CM-NEXT:     OR_INT * T12.Y, PV.Z, PV.W,
+; CM-NEXT:     MOV T9.X, PV.Y,
+; CM-NEXT:     MOV T0.Y, T4.X,
+; CM-NEXT:     BFE_INT * T0.W, T11.Y, 0.0, literal.x,
+; CM-NEXT:    8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT:     AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT:     AND_INT * T0.W, PV.W, literal.y,
+; CM-NEXT:    -65536(nan), 65535(9.183409e-41)
+; CM-NEXT:     OR_INT * T0.W, PV.Z, PV.W,
+; CM-NEXT:     MOV * T4.X, PV.W,
+; CM-NEXT:     MOV T0.Y, PV.X,
+; CM-NEXT:     LSHR * T0.W, T11.Y, literal.x,
+; CM-NEXT:    8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT:     BFE_INT * T0.W, PV.W, 0.0, literal.x,
+; CM-NEXT:    8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT:     AND_INT T0.Z, T0.Y, literal.x,
+; CM-NEXT:     LSHL * T0.W, PV.W, literal.y,
+; CM-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
+; CM-NEXT:     OR_INT * T0.W, PV.Z, PV.W,
+; CM-NEXT:     MOV T4.X, PV.W,
+; CM-NEXT:     MOV T0.Y, T5.X,
+; CM-NEXT:     LSHR * T0.W, T11.Y, literal.x,
+; CM-NEXT:    16(2.242078e-44), 0(0.000000e+00)
+; CM-NEXT:     BFE_INT * T0.W, PV.W, 0.0, literal.x,
+; CM-NEXT:    8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT:     AND_INT T0.Z, T0.Y, literal.x,
+; CM-NEXT:     AND_INT * T0.W, PV.W, literal.y,
+; CM-NEXT:    -65536(nan), 65535(9.183409e-41)
+; CM-NEXT:     OR_INT * T0.W, PV.Z, PV.W,
+; CM-NEXT:     MOV * T5.X, PV.W,
+; CM-NEXT:     MOV T0.Y, PV.X,
+; CM-NEXT:     ASHR * T0.W, T11.Y, literal.x,
+; CM-NEXT:    24(3.363116e-44), 0(0.000000e+00)
+; CM-NEXT:     AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT:     LSHL * T0.W, PV.W, literal.y,
+; CM-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
+; CM-NEXT:     LSHR T11.X, KC0[2].Y, literal.x,
+; CM-NEXT:     OR_INT * T12.W, PV.Z, PV.W,
+; CM-NEXT:    2(2.802597e-45), 0(0.000000e+00)
+; CM-NEXT:     MOV * T5.X, PV.W,
+; CM-NEXT:     MOV T12.X, T8.X,
+; CM-NEXT:     MOV * T12.Z, T4.X, BS:VEC_120/SCL_212
   %load = load <8 x i8>, ptr addrspace(1) %in
   %ext = sext <8 x i8> %load to <8 x i16>
   store <8 x i16> %ext, ptr addrspace(1) %out
@@ -10547,71 +10898,287 @@ define amdgpu_kernel void @global_zextload_v16i8_to_v16i16(ptr addrspace(1) %out
 ;
 ; EG-LABEL: global_zextload_v16i8_to_v16i16:
 ; EG:       ; %bb.0:
-; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
-; EG-NEXT:    TEX 0 @6
-; EG-NEXT:    ALU 19, @9, KC0[CB0:0-32], KC1[]
-; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T9.XYZW, T10.X, 0
-; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T8.XYZW, T7.X, 1
+; EG-NEXT:    ALU 1, @10, KC0[CB0:0-32], KC1[]
+; EG-NEXT:    TEX 0 @8
+; EG-NEXT:    ALU 103, @12, KC0[], KC1[]
+; EG-NEXT:    ALU 20, @116, KC0[CB0:0-32], KC1[]
+; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T20.XYZW, T22.X, 0
+; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T19.XYZW, T21.X, 1
 ; EG-NEXT:    CF_END
-; EG-NEXT:    Fetch clause starting at 6:
-; EG-NEXT:     VTX_READ_128 T7.XYZW, T7.X, 0, #1
-; EG-NEXT:    ALU clause starting at 8:
-; EG-NEXT:     MOV * T7.X, KC0[2].Z,
-; EG-NEXT:    ALU clause starting at 9:
+; EG-NEXT:    PAD
+; EG-NEXT:    Fetch clause starting at 8:
+; EG-NEXT:     VTX_READ_128 T19.XYZW, T19.X, 0, #1
+; EG-NEXT:    ALU clause starting at 10:
+; EG-NEXT:     MOV * T0.Y, T16.X,
+; EG-NEXT:     MOV * T19.X, KC0[2].Z,
+; EG-NEXT:    ALU clause starting at 12:
+; EG-NEXT:     AND_INT T0.W, T19.X, literal.x,
+; EG-NEXT:     AND_INT * T1.W, T0.Y, literal.y,
+; EG-NEXT:    255(3.573311e-43), -65536(nan)
+; EG-NEXT:     OR_INT * T0.W, PS, PV.W,
+; EG-NEXT:     MOV * T16.X, PV.W,
+; EG-NEXT:     MOV T0.Y, PV.X,
+; EG-NEXT:     LSHL * T0.W, T19.X, literal.x,
+; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT:     AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT:     AND_INT * T0.W, PV.W, literal.y,
+; EG-NEXT:    65535(9.183409e-41), 16711680(2.341805e-38)
+; EG-NEXT:     OR_INT * T0.W, PV.W, PS,
+; EG-NEXT:     MOV T16.X, PV.W,
+; EG-NEXT:     MOV T0.Y, T17.X,
 ; EG-NEXT:     MOV * T0.W, literal.x,
 ; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
-; EG-NEXT:     BFE_UINT * T8.W, T7.Y, literal.x, PV.W,
+; EG-NEXT:     BFE_UINT T1.W, T19.X, literal.x, PV.W,
+; EG-NEXT:     AND_INT * T2.W, PV.Y, literal.y,
+; EG-NEXT:    16(2.242078e-44), -65536(nan)
+; EG-NEXT:     OR_INT * T1.W, PS, PV.W,
+; EG-NEXT:     MOV * T17.X, PV.W,
+; EG-NEXT:     MOV T0.Y, PV.X,
+; EG-NEXT:     LSHR * T1.W, T19.X, literal.x,
 ; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
-; EG-NEXT:     BFE_UINT T8.Y, T7.X, literal.x, T0.W,
-; EG-NEXT:     AND_INT T8.Z, T7.Y, literal.y,
-; EG-NEXT:     BFE_UINT * T9.W, T7.W, literal.x, T0.W,
-; EG-NEXT:    8(1.121039e-44), 255(3.573311e-43)
-; EG-NEXT:     AND_INT T8.X, T7.X, literal.x,
-; EG-NEXT:     BFE_UINT T9.Y, T7.Z, literal.y, T0.W,
-; EG-NEXT:     LSHR * T7.X, KC0[2].Y, literal.z,
-; EG-NEXT:    255(3.573311e-43), 8(1.121039e-44)
-; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
-; EG-NEXT:     AND_INT * T9.Z, T7.W, literal.x,
-; EG-NEXT:    255(3.573311e-43), 0(0.000000e+00)
-; EG-NEXT:     AND_INT T9.X, T7.Z, literal.x,
-; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
-; EG-NEXT:    255(3.573311e-43), 16(2.242078e-44)
-; EG-NEXT:     LSHR * T10.X, PV.W, literal.x,
+; EG-NEXT:     AND_INT T2.W, PV.Y, literal.x,
+; EG-NEXT:     AND_INT * T1.W, PV.W, literal.y,
+; EG-NEXT:    65535(9.183409e-41), 16711680(2.341805e-38)
+; EG-NEXT:     OR_INT * T20.Y, PV.W, PS,
+; EG-NEXT:     MOV T17.X, PV.Y,
+; EG-NEXT:     MOV * T0.Y, T12.X,
+; EG-NEXT:     AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT:     AND_INT * T2.W, T19.Y, literal.y,
+; EG-NEXT:    -65536(nan), 255(3.573311e-43)
+; EG-NEXT:     OR_INT * T1.W, PV.W, PS,
+; EG-NEXT:     MOV * T12.X, PV.W,
+; EG-NEXT:     MOV T0.Y, PV.X,
+; EG-NEXT:     LSHL * T1.W, T19.Y, literal.x,
+; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT:     AND_INT T2.W, PV.Y, literal.x,
+; EG-NEXT:     AND_INT * T1.W, PV.W, literal.y,
+; EG-NEXT:    65535(9.183409e-41), 16711680(2.341805e-38)
+; EG-NEXT:     OR_INT * T1.W, PV.W, PS,
+; EG-NEXT:     MOV T12.X, PV.W,
+; EG-NEXT:     MOV T0.Y, T13.X,
+; EG-NEXT:     BFE_UINT * T1.W, T19.Y, literal.x, T0.W,
+; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT:     AND_INT * T2.W, PV.Y, literal.x,
+; EG-NEXT:    -65536(nan), 0(0.000000e+00)
+; EG-NEXT:     OR_INT * T1.W, PV.W, T1.W,
+; EG-NEXT:     MOV * T13.X, PV.W,
+; EG-NEXT:     MOV T0.Y, PV.X,
+; EG-NEXT:     LSHR * T1.W, T19.Y, literal.x,
+; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT:     AND_INT T2.W, PV.Y, literal.x,
+; EG-NEXT:     AND_INT * T1.W, PV.W, literal.y,
+; EG-NEXT:    65535(9.183409e-41), 16711680(2.341805e-38)
+; EG-NEXT:     OR_INT * T20.W, PV.W, PS,
+; EG-NEXT:     MOV T13.X, PV.W,
+; EG-NEXT:     MOV * T0.Y, T8.X,
+; EG-NEXT:     AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT:     AND_INT * T2.W, T19.Z, literal.y,
+; EG-NEXT:    -65536(nan), 255(3.573311e-43)
+; EG-NEXT:     OR_INT * T1.W, PV.W, PS,
+; EG-NEXT:     MOV * T8.X, PV.W,
+; EG-NEXT:     MOV T0.Y, PV.X,
+; EG-NEXT:     LSHL * T1.W, T19.Z, literal.x,
+; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT:     AND_INT T2.W, PV.Y, literal.x,
+; EG-NEXT:     AND_INT * T1.W, PV.W, literal.y,
+; EG-NEXT:    65535(9.183409e-41), 16711680(2.341805e-38)
+; EG-NEXT:     OR_INT * T1.W, PV.W, PS,
+; EG-NEXT:     MOV T8.X, PV.W,
+; EG-NEXT:     MOV T0.Y, T9.X,
+; EG-NEXT:     BFE_UINT * T1.W, T19.Z, literal.x, T0.W,
+; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT:     AND_INT * T2.W, PV.Y, literal.x,
+; EG-NEXT:    -65536(nan), 0(0.000000e+00)
+; EG-NEXT:     OR_INT * T1.W, PV.W, T1.W,
+; EG-NEXT:     MOV * T9.X, PV.W,
+; EG-NEXT:     MOV T0.Y, PV.X,
+; EG-NEXT:     LSHR * T1.W, T19.Z, literal.x,
+; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT:     AND_INT T2.W, PV.Y, literal.x,
+; EG-NEXT:     AND_INT * T1.W, PV.W, literal.y,
+; EG-NEXT:    65535(9.183409e-41), 16711680(2.341805e-38)
+; EG-NEXT:     OR_INT * T19.Y, PV.W, PS,
+; EG-NEXT:     MOV T9.X, PV.Y,
+; EG-NEXT:     MOV * T0.Y, T4.X,
+; EG-NEXT:     AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT:     AND_INT * T2.W, T19.W, literal.y,
+; EG-NEXT:    -65536(nan), 255(3.573311e-43)
+; EG-NEXT:     OR_INT * T1.W, PV.W, PS,
+; EG-NEXT:     MOV * T4.X, PV.W,
+; EG-NEXT:     MOV T0.Y, PV.X,
+; EG-NEXT:     LSHL * T1.W, T19.W, literal.x,
+; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT:     AND_INT T2.W, PV.Y, literal.x,
+; EG-NEXT:     AND_INT * T1.W, PV.W, literal.y,
+; EG-NEXT:    65535(9.183409e-41), 16711680(2.341805e-38)
+; EG-NEXT:     OR_INT * T1.W, PV.W, PS,
+; EG-NEXT:     MOV T4.X, PV.W,
+; EG-NEXT:     MOV T0.Y, T5.X,
+; EG-NEXT:     BFE_UINT * T0.W, T19.W, literal.x, T0.W,
+; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT:    ALU clause starting at 116:
+; EG-NEXT:     AND_INT * T1.W, T0.Y, literal.x,
+; EG-NEXT:    -65536(nan), 0(0.000000e+00)
+; EG-NEXT:     OR_INT * T0.W, PV.W, T0.W,
+; EG-NEXT:     MOV * T5.X, PV.W,
+; EG-NEXT:     MOV T0.Y, PV.X,
+; EG-NEXT:     LSHR T0.W, T19.W, literal.x,
+; EG-NEXT:     ADD_INT * T1.W, KC0[2].Y, literal.y,
+; EG-NEXT:    8(1.121039e-44), 16(2.242078e-44)
+; EG-NEXT:     LSHR T21.X, PS, literal.x,
+; EG-NEXT:     AND_INT T1.W, PV.Y, literal.y,
+; EG-NEXT:     AND_INT * T0.W, PV.W, literal.z,
+; EG-NEXT:    2(2.802597e-45), 65535(9.183409e-41)
+; EG-NEXT:    16711680(2.341805e-38), 0(0.000000e+00)
+; EG-NEXT:     LSHR T22.X, KC0[2].Y, literal.x,
+; EG-NEXT:     OR_INT * T19.W, PV.W, PS,
 ; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
+; EG-NEXT:     MOV T5.X, PV.W,
+; EG-NEXT:     MOV * T20.X, T16.X,
+; EG-NEXT:     MOV * T20.Z, T12.X,
+; EG-NEXT:     MOV T19.X, T8.X,
+; EG-NEXT:     MOV * T19.Z, T4.X, BS:VEC_120/SCL_212
 ;
 ; CM-LABEL: global_zextload_v16i8_to_v16i16:
 ; CM:       ; %bb.0:
-; CM-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
-; CM-NEXT:    TEX 0 @6
-; CM-NEXT:    ALU 19, @9, KC0[CB0:0-32], KC1[]
-; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T9, T7.X
-; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T8, T10.X
+; CM-NEXT:    ALU 1, @10, KC0[CB0:0-32], KC1[]
+; CM-NEXT:    TEX 0 @8
+; CM-NEXT:    ALU 101, @12, KC0[], KC1[]
+; CM-NEXT:    ALU 20, @114, KC0[CB0:0-32], KC1[]
+; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T19, T22.X
+; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T20, T21.X
 ; CM-NEXT:    CF_END
-; CM-NEXT:    Fetch clause starting at 6:
-; CM-NEXT:     VTX_READ_128 T7.XYZW, T7.X, 0, #1
-; CM-NEXT:    ALU clause starting at 8:
-; CM-NEXT:     MOV * T7.X, KC0[2].Z,
-; CM-NEXT:    ALU clause starting at 9:
+; CM-NEXT:    PAD
+; CM-NEXT:    Fetch clause starting at 8:
+; CM-NEXT:     VTX_READ_128 T19.XYZW, T19.X, 0, #1
+; CM-NEXT:    ALU clause starting at 10:
+; CM-NEXT:     MOV * T0.Y, T16.X,
+; CM-NEXT:     MOV * T19.X, KC0[2].Z,
+; CM-NEXT:    ALU clause starting at 12:
+; CM-NEXT:     AND_INT T0.Z, T19.X, literal.x,
+; CM-NEXT:     AND_INT * T0.W, T0.Y, literal.y,
+; CM-NEXT:    255(3.573311e-43), -65536(nan)
+; CM-NEXT:     OR_INT * T0.W, PV.W, PV.Z,
+; CM-NEXT:     MOV * T16.X, PV.W,
+; CM-NEXT:     MOV T0.Y, PV.X,
+; CM-NEXT:     LSHL * T0.W, T19.X, literal.x,
+; CM-NEXT:    8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT:     AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT:     AND_INT * T0.W, PV.W, literal.y,
+; CM-NEXT:    65535(9.183409e-41), 16711680(2.341805e-38)
+; CM-NEXT:     OR_INT * T0.W, PV.Z, PV.W,
+; CM-NEXT:     MOV T16.X, PV.W,
+; CM-NEXT:     MOV T0.Y, T17.X,
 ; CM-NEXT:     MOV * T0.W, literal.x,
 ; CM-NEXT:    8(1.121039e-44), 0(0.000000e+00)
-; CM-NEXT:     BFE_UINT * T8.W, T7.W, literal.x, PV.W,
+; CM-NEXT:     AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT:     BFE_UINT * T1.W, T19.X, literal.y, PV.W,
+; CM-NEXT:    -65536(nan), 16(2.242078e-44)
+; CM-NEXT:     OR_INT * T1.W, PV.Z, PV.W,
+; CM-NEXT:     MOV * T17.X, PV.W,
+; CM-NEXT:     MOV T0.Y, PV.X,
+; CM-NEXT:     LSHR * T1.W, T19.X, literal.x,
+; CM-NEXT:    8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT:     AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT:     AND_INT * T1.W, PV.W, literal.y,
+; CM-NEXT:    65535(9.183409e-41), 16711680(2.341805e-38)
+; CM-NEXT:     OR_INT * T20.Y, PV.Z, PV.W,
+; CM-NEXT:     MOV T17.X, PV.Y,
+; CM-NEXT:     MOV * T0.Y, T12.X,
+; CM-NEXT:     AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT:     AND_INT * T1.W, T19.Y, literal.y,
+; CM-NEXT:    -65536(nan), 255(3.573311e-43)
+; CM-NEXT:     OR_INT * T1.W, PV.Z, PV.W,
+; CM-NEXT:     MOV * T12.X, PV.W,
+; CM-NEXT:     MOV T0.Y, PV.X,
+; CM-NEXT:     LSHL * T1.W, T19.Y, literal.x,
 ; CM-NEXT:    8(1.121039e-44), 0(0.000000e+00)
-; CM-NEXT:     BFE_UINT T8.Y, T7.Z, literal.x, T0.W,
-; CM-NEXT:     AND_INT T8.Z, T7.W, literal.y,
-; CM-NEXT:     BFE_UINT * T9.W, T7.Y, literal.x, T0.W,
-; CM-NEXT:    8(1.121039e-44), 255(3.573311e-43)
-; CM-NEXT:     AND_INT T8.X, T7.Z, literal.x,
-; CM-NEXT:     BFE_UINT T9.Y, T7.X, literal.y, T0.W,
-; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.z,
-; CM-NEXT:    255(3.573311e-43), 8(1.121039e-44)
+; CM-NEXT:     AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT:     AND_INT * T1.W, PV.W, literal.y,
+; CM-NEXT:    65535(9.183409e-41), 16711680(2.341805e-38)
+; CM-NEXT:     OR_INT * T1.W, PV.Z, PV.W,
+; CM-NEXT:     MOV T12.X, PV.W,
+; CM-NEXT:     MOV * T0.Y, T13.X,
+; CM-NEXT:     AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT:     BFE_UINT * T1.W, T19.Y, literal.y, T0.W,
+; CM-NEXT:    -65536(nan), 16(2.242078e-44)
+; CM-NEXT:     OR_INT * T1.W, PV.Z, PV.W,
+; CM-NEXT:     MOV * T13.X, PV.W,
+; CM-NEXT:     MOV T0.Y, PV.X,
+; CM-NEXT:     LSHR * T1.W, T19.Y, literal.x,
+; CM-NEXT:    8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT:     AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT:     AND_INT * T1.W, PV.W, literal.y,
+; CM-NEXT:    65535(9.183409e-41), 16711680(2.341805e-38)
+; CM-NEXT:     OR_INT * T20.W, PV.Z, PV.W,
+; CM-NEXT:     MOV T13.X, PV.W,
+; CM-NEXT:     MOV * T0.Y, T8.X,
+; CM-NEXT:     AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT:     AND_INT * T1.W, T19.Z, literal.y,
+; CM-NEXT:    -65536(nan), 255(3.573311e-43)
+; CM-NEXT:     OR_INT * T1.W, PV.Z, PV.W,
+; CM-NEXT:     MOV * T8.X, PV.W,
+; CM-NEXT:     MOV T0.Y, PV.X,
+; CM-NEXT:     LSHL * T1.W, T19.Z, literal.x,
+; CM-NEXT:    8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT:     AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT:     AND_INT * T1.W, PV.W, literal.y,
+; CM-NEXT:    65535(9.183409e-41), 16711680(2.341805e-38)
+; CM-NEXT:     OR_INT * T1.W, PV.Z, PV.W,
+; CM-NEXT:     MOV T8.X, PV.W,
+; CM-NEXT:     MOV * T0.Y, T9.X,
+; CM-NEXT:     AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT:     BFE_UINT * T1.W, T19.Z, literal.y, T0.W,
+; CM-NEXT:    -65536(nan), 16(2.242078e-44)
+; CM-NEXT:     OR_INT * T1.W, PV.Z, PV.W,
+; CM-NEXT:     MOV * T9.X, PV.W,
+; CM-NEXT:     MOV T0.Y, PV.X,
+; CM-NEXT:     LSHR * T1.W, T19.Z, literal.x,
+; CM-NEXT:    8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT:     AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT:     AND_INT * T1.W, PV.W, literal.y,
+; CM-NEXT:    65535(9.183409e-41), 16711680(2.341805e-38)
+; CM-NEXT:     OR_INT * T19.Y, PV.Z, PV.W,
+; CM-NEXT:     MOV T9.X, PV.Y,
+; CM-NEXT:     MOV * T0.Y, T4.X,
+; CM-NEXT:     AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT:     AND_INT * T1.W, T19.W, literal.y,
+; CM-NEXT:    -65536(nan), 255(3.573311e-43)
+; CM-NEXT:     OR_INT * T1.W, PV.Z, PV.W,
+; CM-NEXT:     MOV * T4.X, PV.W,
+; CM-NEXT:     MOV T0.Y, PV.X,
+; CM-NEXT:     LSHL * T1.W, T19.W, literal.x,
+; CM-NEXT:    8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT:     AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT:     AND_INT * T1.W, PV.W, literal.y,
+; CM-NEXT:    65535(9.183409e-41), 16711680(2.341805e-38)
+; CM-NEXT:     OR_INT * T1.W, PV.Z, PV.W,
+; CM-NEXT:     MOV T4.X, PV.W,
+; CM-NEXT:     MOV * T0.Y, T5.X,
+; CM-NEXT:     AND_INT * T0.Z, PV.Y, literal.x,
+; CM-NEXT:    -65536(nan), 0(0.000000e+00)
+; CM-NEXT:    ALU clause starting at 114:
+; CM-NEXT:     BFE_UINT * T0.W, T19.W, literal.x, T0.W,
 ; CM-NEXT:    16(2.242078e-44), 0(0.000000e+00)
-; CM-NEXT:     LSHR T10.X, PV.W, literal.x,
-; CM-NEXT:     AND_INT * T9.Z, T7.Y, literal.y,
-; CM-NEXT:    2(2.802597e-45), 255(3.573311e-43)
-; CM-NEXT:     AND_INT * T9.X, T7.X, literal.x,
-; CM-NEXT:    255(3.573311e-43), 0(0.000000e+00)
-; CM-NEXT:     LSHR * T7.X, KC0[2].Y, literal.x,
+; CM-NEXT:     OR_INT * T0.W, T0.Z, PV.W,
+; CM-NEXT:     MOV * T5.X, PV.W,
+; CM-NEXT:     MOV T0.Y, PV.X,
+; CM-NEXT:     LSHR * T0.W, T19.W, literal.x,
+; CM-NEXT:    8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT:     LSHR T21.X, KC0[2].Y, literal.x,
+; CM-NEXT:     AND_INT T0.Y, PV.Y, literal.y,
+; CM-NEXT:     AND_INT T0.Z, PV.W, literal.z,
+; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.w,
+; CM-NEXT:    2(2.802597e-45), 65535(9.183409e-41)
+; CM-NEXT:    16711680(2.341805e-38), 16(2.242078e-44)
+; CM-NEXT:     LSHR T22.X, PV.W, literal.x,
+; CM-NEXT:     OR_INT * T19.W, PV.Y, PV.Z,
 ; CM-NEXT:    2(2.802597e-45), 0(0.000000e+00)
+; CM-NEXT:     MOV * T5.X, PV.W,
+; CM-NEXT:     MOV T20.X, T16.X,
+; CM-NEXT:     MOV * T20.Z, T12.X, BS:VEC_120/SCL_212
+; CM-NEXT:     MOV T19.X, T8.X,
+; CM-NEXT:     MOV * T19.Z, T4.X, BS:VEC_120/SCL_212
   %load = load <16 x i8>, ptr addrspace(1) %in
   %ext = zext <16 x i8> %load to <16 x i16>
   store <16 x i16> %ext, ptr addrspace(1) %out
@@ -10844,72 +11411,343 @@ define amdgpu_kernel void @global_sextload_v16i8_to_v16i16(ptr addrspace(1) %out
 ;
 ; EG-LABEL: global_sextload_v16i8_to_v16i16:
 ; EG:       ; %bb.0:
-; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
-; EG-NEXT:    TEX 0 @6
-; EG-NEXT:    ALU 20, @9, KC0[CB0:0-32], KC1[]
-; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T9.XYZW, T10.X, 0
-; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T8.XYZW, T7.X, 1
+; EG-NEXT:    ALU 1, @10, KC0[CB0:0-32], KC1[]
+; EG-NEXT:    TEX 0 @8
+; EG-NEXT:    ALU 104, @12, KC0[], KC1[]
+; EG-NEXT:    ALU 46, @117, KC0[CB0:0-32], KC1[]
+; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T20.XYZW, T22.X, 0
+; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T19.XYZW, T21.X, 1
 ; EG-NEXT:    CF_END
-; EG-NEXT:    Fetch clause starting at 6:
-; EG-NEXT:     VTX_READ_128 T7.XYZW, T7.X, 0, #1
-; EG-NEXT:    ALU clause starting at 8:
-; EG-NEXT:     MOV * T7.X, KC0[2].Z,
-; EG-NEXT:    ALU clause starting at 9:
-; EG-NEXT:     BFE_INT * T8.Z, T7.Y, 0.0, literal.x,
+; EG-NEXT:    PAD
+; EG-NEXT:    Fetch clause starting at 8:
+; EG-NEXT:     VTX_READ_128 T19.XYZW, T19.X, 0, #1
+; EG-NEXT:    ALU clause starting at 10:
+; EG-NEXT:     MOV * T0.Y, T16.X,
+; EG-NEXT:     MOV * T19.X, KC0[2].Z,
+; EG-NEXT:    ALU clause starting at 12:
+; EG-NEXT:     BFE_INT * T0.W, T19.X, 0.0, literal.x,
 ; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
-; EG-NEXT:     BFE_INT T8.X, T7.X, 0.0, literal.x,
-; EG-NEXT:     BFE_INT T9.Z, T7.W, 0.0, literal.x,
-; EG-NEXT:     LSHR * T0.W, T7.Y, literal.x,
+; EG-NEXT:     AND_INT T0.W, PV.W, literal.x,
+; EG-NEXT:     AND_INT * T1.W, T0.Y, literal.y,
+; EG-NEXT:    65535(9.183409e-41), -65536(nan)
+; EG-NEXT:     OR_INT * T0.W, PS, PV.W,
+; EG-NEXT:     MOV * T16.X, PV.W,
+; EG-NEXT:     MOV T0.Y, PV.X,
+; EG-NEXT:     LSHR * T0.W, T19.X, literal.x,
 ; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
-; EG-NEXT:     BFE_INT T9.X, T7.Z, 0.0, literal.x,
-; EG-NEXT:     LSHR T0.Z, T7.W, literal.x,
-; EG-NEXT:     BFE_INT T8.W, PV.W, 0.0, literal.x,
-; EG-NEXT:     LSHR * T0.W, T7.X, literal.x,
+; EG-NEXT:     BFE_INT T0.W, PV.W, 0.0, literal.x,
+; EG-NEXT:     AND_INT * T1.W, PV.Y, literal.y,
+; EG-NEXT:    8(1.121039e-44), 65535(9.183409e-41)
+; EG-NEXT:     LSHL * T0.W, PV.W, literal.x,
+; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT:     OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT:     MOV T16.X, PV.W,
+; EG-NEXT:     MOV T0.Y, T17.X,
+; EG-NEXT:     LSHR * T0.W, T19.X, literal.x, BS:VEC_120/SCL_212
+; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT:     BFE_INT T0.W, PV.W, 0.0, literal.x,
+; EG-NEXT:     AND_INT * T1.W, PV.Y, literal.y,
+; EG-NEXT:    8(1.121039e-44), -65536(nan)
+; EG-NEXT:     AND_INT * T0.W, PV.W, literal.x,
+; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT:     OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT:     MOV * T17.X, PV.W,
+; EG-NEXT:     MOV T0.Y, PV.X,
+; EG-NEXT:     ASHR * T0.W, T19.X, literal.x,
+; EG-NEXT:    24(3.363116e-44), 0(0.000000e+00)
+; EG-NEXT:     AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT:     LSHL * T0.W, PV.W, literal.y,
+; EG-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
+; EG-NEXT:     OR_INT * T20.Y, PV.W, PS,
+; EG-NEXT:     MOV T17.X, PV.Y,
+; EG-NEXT:     MOV T0.Y, T12.X,
+; EG-NEXT:     BFE_INT * T0.W, T19.Y, 0.0, literal.x,
 ; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
-; EG-NEXT:     LSHR T7.X, KC0[2].Y, literal.x,
-; EG-NEXT:     BFE_INT T8.Y, PS, 0.0, literal.y,
-; EG-NEXT:     LSHR T1.Z, T7.Z, literal.y,
-; EG-NEXT:     BFE_INT T9.W, PV.Z, 0.0, literal.y,
-; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.z,
-; EG-NEXT:    2(2.802597e-45), 8(1.121039e-44)
+; EG-NEXT:     AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT:     AND_INT * T0.W, PV.W, literal.y,
+; EG-NEXT:    -65536(nan), 65535(9.183409e-41)
+; EG-NEXT:     OR_INT * T0.W, PV.W, PS,
+; EG-NEXT:     MOV * T12.X, PV.W,
+; EG-NEXT:     MOV T0.Y, PV.X,
+; EG-NEXT:     LSHR * T0.W, T19.Y, literal.x,
+; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT:     BFE_INT T0.W, PV.W, 0.0, literal.x,
+; EG-NEXT:     AND_INT * T1.W, PV.Y, literal.y,
+; EG-NEXT:    8(1.121039e-44), 65535(9.183409e-41)
+; EG-NEXT:     LSHL * T0.W, PV.W, literal.x,
 ; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
-; EG-NEXT:     LSHR T10.X, PS, literal.x,
-; EG-NEXT:     BFE_INT * T9.Y, PV.Z, 0.0, literal.y,
-; EG-NEXT:    2(2.802597e-45), 8(1.121039e-44)
+; EG-NEXT:     OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT:     MOV T12.X, PV.W,
+; EG-NEXT:     MOV T0.Y, T13.X,
+; EG-NEXT:     LSHR * T0.W, T19.Y, literal.x,
+; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT:     BFE_INT T0.W, PV.W, 0.0, literal.x,
+; EG-NEXT:     AND_INT * T1.W, PV.Y, literal.y,
+; EG-NEXT:    8(1.121039e-44), -65536(nan)
+; EG-NEXT:     AND_INT * T0.W, PV.W, literal.x,
+; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT:     OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT:     MOV * T13.X, PV.W,
+; EG-NEXT:     MOV T0.Y, PV.X,
+; EG-NEXT:     ASHR * T0.W, T19.Y, literal.x,
+; EG-NEXT:    24(3.363116e-44), 0(0.000000e+00)
+; EG-NEXT:     AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT:     LSHL * T0.W, PV.W, literal.y,
+; EG-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
+; EG-NEXT:     OR_INT * T20.W, PV.W, PS,
+; EG-NEXT:     MOV T13.X, PV.W,
+; EG-NEXT:     MOV T0.Y, T8.X,
+; EG-NEXT:     BFE_INT * T0.W, T19.Z, 0.0, literal.x,
+; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT:     AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT:     AND_INT * T0.W, PV.W, literal.y,
+; EG-NEXT:    -65536(nan), 65535(9.183409e-41)
+; EG-NEXT:     OR_INT * T0.W, PV.W, PS,
+; EG-NEXT:     MOV * T8.X, PV.W,
+; EG-NEXT:     MOV T0.Y, PV.X,
+; EG-NEXT:     LSHR * T0.W, T19.Z, literal.x,
+; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT:     BFE_INT T0.W, PV.W, 0.0, literal.x,
+; EG-NEXT:     AND_INT * T1.W, PV.Y, literal.y,
+; EG-NEXT:    8(1.121039e-44), 65535(9.183409e-41)
+; EG-NEXT:     LSHL * T0.W, PV.W, literal.x,
+; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT:     OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT:     MOV T8.X, PV.W,
+; EG-NEXT:     MOV T0.Y, T9.X,
+; EG-NEXT:     LSHR * T0.W, T19.Z, literal.x,
+; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT:     BFE_INT T0.W, PV.W, 0.0, literal.x,
+; EG-NEXT:     AND_INT * T1.W, PV.Y, literal.y,
+; EG-NEXT:    8(1.121039e-44), -65536(nan)
+; EG-NEXT:     AND_INT * T0.W, PV.W, literal.x,
+; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT:     OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT:     MOV * T9.X, PV.W,
+; EG-NEXT:     MOV T0.Y, PV.X,
+; EG-NEXT:     ASHR * T0.W, T19.Z, literal.x,
+; EG-NEXT:    24(3.363116e-44), 0(0.000000e+00)
+; EG-NEXT:     AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT:     LSHL * T0.W, PV.W, literal.y,
+; EG-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
+; EG-NEXT:    ALU clause starting at 117:
+; EG-NEXT:     OR_INT * T19.Y, T1.W, T0.W,
+; EG-NEXT:     MOV T9.X, PV.Y,
+; EG-NEXT:     MOV T0.Y, T4.X,
+; EG-NEXT:     BFE_INT * T0.W, T19.W, 0.0, literal.x,
+; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT:     AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT:     AND_INT * T0.W, PV.W, literal.y,
+; EG-NEXT:    -65536(nan), 65535(9.183409e-41)
+; EG-NEXT:     OR_INT * T0.W, PV.W, PS,
+; EG-NEXT:     MOV * T4.X, PV.W,
+; EG-NEXT:     MOV T0.Y, PV.X,
+; EG-NEXT:     LSHR * T0.W, T19.W, literal.x,
+; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT:     BFE_INT T0.W, PV.W, 0.0, literal.x,
+; EG-NEXT:     AND_INT * T1.W, PV.Y, literal.y,
+; EG-NEXT:    8(1.121039e-44), 65535(9.183409e-41)
+; EG-NEXT:     LSHL * T0.W, PV.W, literal.x,
+; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT:     OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT:     MOV T4.X, PV.W,
+; EG-NEXT:     MOV T0.Y, T5.X,
+; EG-NEXT:     LSHR * T0.W, T19.W, literal.x,
+; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT:     BFE_INT T0.W, PV.W, 0.0, literal.x,
+; EG-NEXT:     AND_INT * T1.W, PV.Y, literal.y,
+; EG-NEXT:    8(1.121039e-44), -65536(nan)
+; EG-NEXT:     AND_INT * T0.W, PV.W, literal.x,
+; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT:     OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT:     MOV * T5.X, PV.W,
+; EG-NEXT:     MOV T0.Y, PV.X,
+; EG-NEXT:     ASHR T0.W, T19.W, literal.x,
+; EG-NEXT:     ADD_INT * T1.W, KC0[2].Y, literal.y,
+; EG-NEXT:    24(3.363116e-44), 16(2.242078e-44)
+; EG-NEXT:     LSHR T21.X, PS, literal.x,
+; EG-NEXT:     AND_INT T1.W, PV.Y, literal.y,
+; EG-NEXT:     LSHL * T0.W, PV.W, literal.z,
+; EG-NEXT:    2(2.802597e-45), 65535(9.183409e-41)
+; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT:     LSHR T22.X, KC0[2].Y, literal.x,
+; EG-NEXT:     OR_INT * T19.W, PV.W, PS,
+; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
+; EG-NEXT:     MOV T5.X, PV.W,
+; EG-NEXT:     MOV * T20.X, T16.X,
+; EG-NEXT:     MOV * T20.Z, T12.X,
+; EG-NEXT:     MOV T19.X, T8.X,
+; EG-NEXT:     MOV * T19.Z, T4.X, BS:VEC_120/SCL_212
 ;
 ; CM-LABEL: global_sextload_v16i8_to_v16i16:
 ; CM:       ; %bb.0:
-; CM-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
-; CM-NEXT:    TEX 0 @6
-; CM-NEXT:    ALU 19, @9, KC0[CB0:0-32], KC1[]
-; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T9, T7.X
-; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T8, T10.X
+; CM-NEXT:    ALU 1, @10, KC0[CB0:0-32], KC1[]
+; CM-NEXT:    TEX 0 @8
+; CM-NEXT:    ALU 104, @12, KC0[], KC1[]
+; CM-NEXT:    ALU 46, @117, KC0[CB0:0-32], KC1[]
+; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T19, T22.X
+; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T20, T21.X
 ; CM-NEXT:    CF_END
-; CM-NEXT:    Fetch clause starting at 6:
-; CM-NEXT:     VTX_READ_128 T7.XYZW, T7.X, 0, #1
-; CM-NEXT:    ALU clause starting at 8:
-; CM-NEXT:     MOV * T7.X, KC0[2].Z,
-; CM-NEXT:    ALU clause starting at 9:
-; CM-NEXT:     BFE_INT * T8.Z, T7.W, 0.0, literal.x,
+; CM-NEXT:    PAD
+; CM-NEXT:    Fetch clause starting at 8:
+; CM-NEXT:     VTX_READ_128 T19.XYZW, T19.X, 0, #1
+; CM-NEXT:    ALU clause starting at 10:
+; CM-NEXT:     MOV * T0.Y, T16.X,
+; CM-NEXT:     MOV * T19.X, KC0[2].Z,
+; CM-NEXT:    ALU clause starting at 12:
+; CM-NEXT:     BFE_INT * T0.W, T19.X, 0.0, literal.x,
 ; CM-NEXT:    8(1.121039e-44), 0(0.000000e+00)
-; CM-NEXT:     BFE_INT T8.X, T7.Z, 0.0, literal.x,
-; CM-NEXT:     LSHR T0.Y, T7.Y, literal.x,
-; CM-NEXT:     BFE_INT T9.Z, T7.Y, 0.0, literal.x,
-; CM-NEXT:     LSHR * T0.W, T7.W, literal.x,
+; CM-NEXT:     AND_INT T0.Z, PV.W, literal.x,
+; CM-NEXT:     AND_INT * T0.W, T0.Y, literal.y,
+; CM-NEXT:    65535(9.183409e-41), -65536(nan)
+; CM-NEXT:     OR_INT * T0.W, PV.W, PV.Z,
+; CM-NEXT:     MOV * T16.X, PV.W,
+; CM-NEXT:     MOV T0.Y, PV.X,
+; CM-NEXT:     LSHR * T0.W, T19.X, literal.x,
 ; CM-NEXT:    8(1.121039e-44), 0(0.000000e+00)
-; CM-NEXT:     BFE_INT T9.X, T7.X, 0.0, literal.x,
-; CM-NEXT:     LSHR T1.Y, T7.Z, literal.x,
-; CM-NEXT:     ADD_INT T0.Z, KC0[2].Y, literal.y,
-; CM-NEXT:     BFE_INT * T8.W, PV.W, 0.0, literal.x,
-; CM-NEXT:    8(1.121039e-44), 16(2.242078e-44)
-; CM-NEXT:     LSHR T10.X, PV.Z, literal.x,
-; CM-NEXT:     BFE_INT T8.Y, PV.Y, 0.0, literal.y,
-; CM-NEXT:     LSHR T0.Z, T7.X, literal.y,
-; CM-NEXT:     BFE_INT * T9.W, T0.Y, 0.0, literal.y,
-; CM-NEXT:    2(2.802597e-45), 8(1.121039e-44)
-; CM-NEXT:     LSHR T7.X, KC0[2].Y, literal.x,
-; CM-NEXT:     BFE_INT * T9.Y, PV.Z, 0.0, literal.y,
-; CM-NEXT:    2(2.802597e-45), 8(1.121039e-44)
+; CM-NEXT:     BFE_INT * T0.W, PV.W, 0.0, literal.x,
+; CM-NEXT:    8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT:     AND_INT T0.Z, T0.Y, literal.x,
+; CM-NEXT:     LSHL * T0.W, PV.W, literal.y,
+; CM-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
+; CM-NEXT:     OR_INT * T0.W, PV.Z, PV.W,
+; CM-NEXT:     MOV T16.X, PV.W,
+; CM-NEXT:     MOV T0.Y, T17.X,
+; CM-NEXT:     LSHR * T0.W, T19.X, literal.x, BS:VEC_120/SCL_212
+; CM-NEXT:    16(2.242078e-44), 0(0.000000e+00)
+; CM-NEXT:     BFE_INT * T0.W, PV.W, 0.0, literal.x,
+; CM-NEXT:    8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT:     AND_INT T0.Z, T0.Y, literal.x,
+; CM-NEXT:     AND_INT * T0.W, PV.W, literal.y,
+; CM-NEXT:    -65536(nan), 65535(9.183409e-41)
+; CM-NEXT:     OR_INT * T0.W, PV.Z, PV.W,
+; CM-NEXT:     MOV * T17.X, PV.W,
+; CM-NEXT:     MOV T0.Y, PV.X,
+; CM-NEXT:     ASHR * T0.W, T19.X, literal.x,
+; CM-NEXT:    24(3.363116e-44), 0(0.000000e+00)
+; CM-NEXT:     AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT:     LSHL * T0.W, PV.W, literal.y,
+; CM-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
+; CM-NEXT:     OR_INT * T20.Y, PV.Z, PV.W,
+; CM-NEXT:     MOV T17.X, PV.Y,
+; CM-NEXT:     MOV T0.Y, T12.X,
+; CM-NEXT:     BFE_INT * T0.W, T19.Y, 0.0, literal.x,
+; CM-NEXT:    8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT:     AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT:     AND_INT * T0.W, PV.W, literal.y,
+; CM-NEXT:    -65536(nan), 65535(9.183409e-41)
+; CM-NEXT:     OR_INT * T0.W, PV.Z, PV.W,
+; CM-NEXT:     MOV * T12.X, PV.W,
+; CM-NEXT:     MOV T0.Y, PV.X,
+; CM-NEXT:     LSHR * T0.W, T19.Y, literal.x,
+; CM-NEXT:    8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT:     BFE_INT * T0.W, PV.W, 0.0, literal.x,
+; CM-NEXT:    8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT:     AND_INT T0.Z, T0.Y, literal.x,
+; CM-NEXT:     LSHL * T0.W, PV.W, literal.y,
+; CM-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
+; CM-NEXT:     OR_INT * T0.W, PV.Z, PV.W,
+; CM-NEXT:     MOV T12.X, PV.W,
+; CM-NEXT:     MOV T0.Y, T13.X,
+; CM-NEXT:     LSHR * T0.W, T19.Y, literal.x,
+; CM-NEXT:    16(2.242078e-44), 0(0.000000e+00)
+; CM-NEXT:     BFE_INT * T0.W, PV.W, 0.0, literal.x,
+; CM-NEXT:    8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT:     AND_INT T0.Z, T0.Y, literal.x,
+; CM-NEXT:     AND_INT * T0.W, PV.W, literal.y,
+; CM-NEXT:    -65536(nan), 65535(9.183409e-41)
+; CM-NEXT:     OR_INT * T0.W, PV.Z, PV.W,
+; CM-NEXT:     MOV * T13.X, PV.W,
+; CM-NEXT:     MOV T0.Y, PV.X,
+; CM-NEXT:     ASHR * T0.W, T19.Y, literal.x,
+; CM-NEXT:    24(3.363116e-44), 0(0.000000e+00)
+; CM-NEXT:     AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT:     LSHL * T0.W, PV.W, literal.y,
+; CM-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
+; CM-NEXT:     OR_INT * T20.W, PV.Z, PV.W,
+; CM-NEXT:     MOV T13.X, PV.W,
+; CM-NEXT:     MOV T0.Y, T8.X,
+; CM-NEXT:     BFE_INT * T0.W, T19.Z, 0.0, literal.x,
+; CM-NEXT:    8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT:     AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT:     AND_INT * T0.W, PV.W, literal.y,
+; CM-NEXT:    -65536(nan), 65535(9.183409e-41)
+; CM-NEXT:     OR_INT * T0.W, PV.Z, PV.W,
+; CM-NEXT:     MOV * T8.X, PV.W,
+; CM-NEXT:     MOV T0.Y, PV.X,
+; CM-NEXT:     LSHR * T0.W, T19.Z, literal.x,
+; CM-NEXT:    8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT:     BFE_INT * T0.W, PV.W, 0.0, literal.x,
+; CM-NEXT:    8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT:     AND_INT T0.Z, T0.Y, literal.x,
+; CM-NEXT:     LSHL * T0.W, PV.W, literal.y,
+; CM-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
+; CM-NEXT:     OR_INT * T0.W, PV.Z, PV.W,
+; CM-NEXT:     MOV T8.X, PV.W,
+; CM-NEXT:     MOV T0.Y, T9.X,
+; CM-NEXT:     LSHR * T0.W, T19.Z, literal.x,
+; CM-NEXT:    16(2.242078e-44), 0(0.000000e+00)
+; CM-NEXT:     BFE_INT * T0.W, PV.W, 0.0, literal.x,
+; CM-NEXT:    8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT:     AND_INT T0.Z, T0.Y, literal.x,
+; CM-NEXT:     AND_INT * T0.W, PV.W, literal.y,
+; CM-NEXT:    -65536(nan), 65535(9.183409e-41)
+; CM-NEXT:     OR_INT * T0.W, PV.Z, PV.W,
+; CM-NEXT:     MOV * T9.X, PV.W,
+; CM-NEXT:     MOV T0.Y, PV.X,
+; CM-NEXT:     ASHR * T0.W, T19.Z, literal.x,
+; CM-NEXT:    24(3.363116e-44), 0(0.000000e+00)
+; CM-NEXT:     AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT:     LSHL * T0.W, PV.W, literal.y,
+; CM-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
+; CM-NEXT:    ALU clause starting at 117:
+; CM-NEXT:     OR_INT * T19.Y, T0.Z, T0.W,
+; CM-NEXT:     MOV T9.X, PV.Y,
+; CM-NEXT:     MOV T0.Y, T4.X,
+; CM-NEXT:     BFE_INT * T0.W, T19.W, 0.0, literal.x,
+; CM-NEXT:    8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT:     AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT:     AND_INT * T0.W, PV.W, literal.y,
+; CM-NEXT:    -65536(nan), 65535(9.183409e-41)
+; CM-NEXT:     OR_INT * T0.W, PV.Z, PV.W,
+; CM-NEXT:     MOV * T4.X, PV.W,
+; CM-NEXT:     MOV T0.Y, PV.X,
+; CM-NEXT:     LSHR * T0.W, T19.W, literal.x,
+; CM-NEXT:    8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT:     BFE_INT * T0.W, PV.W, 0.0, literal.x,
+; CM-NEXT:    8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT:     AND_INT T0.Z, T0.Y, literal.x,
+; CM-NEXT:     LSHL * T0.W, PV.W, literal.y,
+; CM-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
+; CM-NEXT:     OR_INT * T0.W, PV.Z, PV.W,
+; CM-NEXT:     MOV T4.X, PV.W,
+; CM-NEXT:     MOV T0.Y, T5.X,
+; CM-NEXT:     LSHR * T0.W, T19.W, literal.x,
+; CM-NEXT:    16(2.242078e-44), 0(0.000000e+00)
+; CM-NEXT:     BFE_INT * T0.W, PV.W, 0.0, literal.x,
+; CM-NEXT:    8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT:     AND_INT T0.Z, T0.Y, literal.x,
+; CM-NEXT:     AND_INT * T0.W, PV.W, literal.y,
+; CM-NEXT:    -65536(nan), 65535(9.183409e-41)
+; CM-NEXT:     OR_INT * T0.W, PV.Z, PV.W,
+; CM-NEXT:     MOV * T5.X, PV.W,
+; CM-NEXT:     MOV T0.Y, PV.X,
+; CM-NEXT:     ASHR * T0.W, T19.W, literal.x,
+; CM-NEXT:    24(3.363116e-44), 0(0.000000e+00)
+; CM-NEXT:     LSHR T21.X, KC0[2].Y, literal.x,
+; CM-NEXT:     AND_INT T0.Y, PV.Y, literal.y,
+; CM-NEXT:     LSHL T0.Z, PV.W, literal.z,
+; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.z,
+; CM-NEXT:    2(2.802597e-45), 65535(9.183409e-41)
+; CM-NEXT:    16(2.242078e-44), 0(0.000000e+00)
+; CM-NEXT:     LSHR T22.X, PV.W, literal.x,
+; CM-NEXT:     OR_INT * T19.W, PV.Y, PV.Z,
+; CM-NEXT:    2(2.802597e-45), 0(0.000000e+00)
+; CM-NEXT:     MOV * T5.X, PV.W,
+; CM-NEXT:     MOV T20.X, T16.X,
+; CM-NEXT:     MOV * T20.Z, T12.X, BS:VEC_120/SCL_212
+; CM-NEXT:     MOV T19.X, T8.X,
+; CM-NEXT:     MOV * T19.Z, T4.X, BS:VEC_120/SCL_212
   %load = load <16 x i8>, ptr addrspace(1) %in
   %ext = sext <16 x i8> %load to <16 x i16>
   store <16 x i16> %ext, ptr addrspace(1) %out
@@ -11181,115 +12019,543 @@ define amdgpu_kernel void @global_zextload_v32i8_to_v32i16(ptr addrspace(1) %out
 ;
 ; EG-LABEL: global_zextload_v32i8_to_v32i16:
 ; EG:       ; %bb.0:
-; EG-NEXT:    ALU 0, @12, KC0[CB0:0-32], KC1[]
-; EG-NEXT:    TEX 1 @8
-; EG-NEXT:    ALU 37, @13, KC0[CB0:0-32], KC1[]
-; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T17.XYZW, T18.X, 0
-; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T15.XYZW, T12.X, 0
-; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T14.XYZW, T16.X, 0
-; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T13.XYZW, T11.X, 1
+; EG-NEXT:    ALU 1, @14, KC0[CB0:0-32], KC1[]
+; EG-NEXT:    TEX 1 @10
+; EG-NEXT:    ALU 103, @16, KC0[], KC1[]
+; EG-NEXT:    ALU 104, @120, KC0[], KC1[]
+; EG-NEXT:    ALU 41, @225, KC0[CB0:0-32], KC1[]
+; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T36.XYZW, T42.X, 0
+; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T37.XYZW, T41.X, 0
+; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T38.XYZW, T40.X, 0
+; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T35.XYZW, T39.X, 1
 ; EG-NEXT:    CF_END
-; EG-NEXT:    Fetch clause starting at 8:
-; EG-NEXT:     VTX_READ_128 T12.XYZW, T11.X, 16, #1
-; EG-NEXT:     VTX_READ_128 T11.XYZW, T11.X, 0, #1
-; EG-NEXT:    ALU clause starting at 12:
-; EG-NEXT:     MOV * T11.X, KC0[2].Z,
-; EG-NEXT:    ALU clause starting at 13:
+; EG-NEXT:    Fetch clause starting at 10:
+; EG-NEXT:     VTX_READ_128 T37.XYZW, T35.X, 16, #1
+; EG-NEXT:     VTX_READ_128 T35.XYZW, T35.X, 0, #1
+; EG-NEXT:    ALU clause starting at 14:
+; EG-NEXT:     MOV * T0.Y, T16.X,
+; EG-NEXT:     MOV * T35.X, KC0[2].Z,
+; EG-NEXT:    ALU clause starting at 16:
+; EG-NEXT:     AND_INT T0.W, T37.X, literal.x,
+; EG-NEXT:     AND_INT * T1.W, T0.Y, literal.y,
+; EG-NEXT:    255(3.573311e-43), -65536(nan)
+; EG-NEXT:     OR_INT * T0.W, PS, PV.W,
+; EG-NEXT:     MOV * T16.X, PV.W,
+; EG-NEXT:     MOV T0.Y, PV.X,
+; EG-NEXT:     LSHL * T0.W, T37.X, literal.x,
+; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT:     AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT:     AND_INT * T0.W, PV.W, literal.y,
+; EG-NEXT:    65535(9.183409e-41), 16711680(2.341805e-38)
+; EG-NEXT:     OR_INT * T0.W, PV.W, PS,
+; EG-NEXT:     MOV T16.X, PV.W,
+; EG-NEXT:     MOV T0.Y, T17.X,
 ; EG-NEXT:     MOV * T0.W, literal.x,
 ; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
-; EG-NEXT:     BFE_UINT * T13.W, T11.Y, literal.x, PV.W,
+; EG-NEXT:     BFE_UINT T1.W, T37.X, literal.x, PV.W,
+; EG-NEXT:     AND_INT * T2.W, PV.Y, literal.y,
+; EG-NEXT:    16(2.242078e-44), -65536(nan)
+; EG-NEXT:     OR_INT * T1.W, PS, PV.W,
+; EG-NEXT:     MOV * T17.X, PV.W,
+; EG-NEXT:     MOV T0.Y, PV.X,
+; EG-NEXT:     LSHR * T1.W, T37.X, literal.x,
 ; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
-; EG-NEXT:     BFE_UINT T13.Y, T11.X, literal.x, T0.W,
-; EG-NEXT:     AND_INT T13.Z, T11.Y, literal.y,
-; EG-NEXT:     BFE_UINT * T14.W, T11.W, literal.x, T0.W,
-; EG-NEXT:    8(1.121039e-44), 255(3.573311e-43)
-; EG-NEXT:     AND_INT T13.X, T11.X, literal.x,
-; EG-NEXT:     BFE_UINT T14.Y, T11.Z, literal.y, T0.W,
-; EG-NEXT:     LSHR * T11.X, KC0[2].Y, literal.z,
-; EG-NEXT:    255(3.573311e-43), 8(1.121039e-44)
-; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
-; EG-NEXT:     AND_INT T14.Z, T11.W, literal.x,
-; EG-NEXT:     BFE_UINT * T15.W, T12.Y, literal.y, T0.W,
-; EG-NEXT:    255(3.573311e-43), 8(1.121039e-44)
-; EG-NEXT:     AND_INT T14.X, T11.Z, literal.x,
-; EG-NEXT:     BFE_UINT T15.Y, T12.X, literal.y, T0.W,
-; EG-NEXT:     ADD_INT * T1.W, KC0[2].Y, literal.z,
-; EG-NEXT:    255(3.573311e-43), 8(1.121039e-44)
+; EG-NEXT:     AND_INT T2.W, PV.Y, literal.x,
+; EG-NEXT:     AND_INT * T1.W, PV.W, literal.y,
+; EG-NEXT:    65535(9.183409e-41), 16711680(2.341805e-38)
+; EG-NEXT:     OR_INT * T36.Y, PV.W, PS,
+; EG-NEXT:     MOV T17.X, PV.Y,
+; EG-NEXT:     MOV * T0.Y, T12.X,
+; EG-NEXT:     AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT:     AND_INT * T2.W, T37.Y, literal.y,
+; EG-NEXT:    -65536(nan), 255(3.573311e-43)
+; EG-NEXT:     OR_INT * T1.W, PV.W, PS,
+; EG-NEXT:     MOV * T12.X, PV.W,
+; EG-NEXT:     MOV T0.Y, PV.X,
+; EG-NEXT:     LSHL * T1.W, T37.Y, literal.x,
+; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT:     AND_INT T2.W, PV.Y, literal.x,
+; EG-NEXT:     AND_INT * T1.W, PV.W, literal.y,
+; EG-NEXT:    65535(9.183409e-41), 16711680(2.341805e-38)
+; EG-NEXT:     OR_INT * T1.W, PV.W, PS,
+; EG-NEXT:     MOV T12.X, PV.W,
+; EG-NEXT:     MOV T0.Y, T13.X,
+; EG-NEXT:     BFE_UINT * T1.W, T37.Y, literal.x, T0.W,
 ; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
-; EG-NEXT:     LSHR T16.X, PV.W, literal.x,
-; EG-NEXT:     AND_INT T15.Z, T12.Y, literal.y,
-; EG-NEXT:     BFE_UINT T17.W, T12.W, literal.z, T0.W,
-; EG-NEXT:     AND_INT * T15.X, T12.X, literal.y,
-; EG-NEXT:    2(2.802597e-45), 255(3.573311e-43)
+; EG-NEXT:     AND_INT * T2.W, PV.Y, literal.x,
+; EG-NEXT:    -65536(nan), 0(0.000000e+00)
+; EG-NEXT:     OR_INT * T1.W, PV.W, T1.W,
+; EG-NEXT:     MOV * T13.X, PV.W,
+; EG-NEXT:     MOV T0.Y, PV.X,
+; EG-NEXT:     LSHR * T1.W, T37.Y, literal.x,
 ; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
-; EG-NEXT:     BFE_UINT T17.Y, T12.Z, literal.x, T0.W,
-; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
-; EG-NEXT:    8(1.121039e-44), 32(4.484155e-44)
-; EG-NEXT:     LSHR T12.X, PV.W, literal.x,
-; EG-NEXT:     AND_INT T17.Z, T12.W, literal.y,
-; EG-NEXT:     AND_INT * T17.X, T12.Z, literal.y,
-; EG-NEXT:    2(2.802597e-45), 255(3.573311e-43)
+; EG-NEXT:     AND_INT T2.W, PV.Y, literal.x,
+; EG-NEXT:     AND_INT * T1.W, PV.W, literal.y,
+; EG-NEXT:    65535(9.183409e-41), 16711680(2.341805e-38)
+; EG-NEXT:     OR_INT * T36.W, PV.W, PS,
+; EG-NEXT:     MOV T13.X, PV.W,
+; EG-NEXT:     MOV * T0.Y, T8.X,
+; EG-NEXT:     AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT:     AND_INT * T2.W, T37.Z, literal.y,
+; EG-NEXT:    -65536(nan), 255(3.573311e-43)
+; EG-NEXT:     OR_INT * T1.W, PV.W, PS,
+; EG-NEXT:     MOV * T8.X, PV.W,
+; EG-NEXT:     MOV T0.Y, PV.X,
+; EG-NEXT:     LSHL * T1.W, T37.Z, literal.x,
+; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT:     AND_INT T2.W, PV.Y, literal.x,
+; EG-NEXT:     AND_INT * T1.W, PV.W, literal.y,
+; EG-NEXT:    65535(9.183409e-41), 16711680(2.341805e-38)
+; EG-NEXT:     OR_INT * T1.W, PV.W, PS,
+; EG-NEXT:     MOV T8.X, PV.W,
+; EG-NEXT:     MOV T0.Y, T9.X,
+; EG-NEXT:     BFE_UINT * T1.W, T37.Z, literal.x, T0.W,
+; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT:     AND_INT * T2.W, PV.Y, literal.x,
+; EG-NEXT:    -65536(nan), 0(0.000000e+00)
+; EG-NEXT:     OR_INT * T1.W, PV.W, T1.W,
+; EG-NEXT:     MOV * T9.X, PV.W,
+; EG-NEXT:     MOV T0.Y, PV.X,
+; EG-NEXT:     LSHR * T1.W, T37.Z, literal.x,
+; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT:     AND_INT T2.W, PV.Y, literal.x,
+; EG-NEXT:     AND_INT * T1.W, PV.W, literal.y,
+; EG-NEXT:    65535(9.183409e-41), 16711680(2.341805e-38)
+; EG-NEXT:     OR_INT * T37.Y, PV.W, PS,
+; EG-NEXT:     MOV T9.X, PV.Y,
+; EG-NEXT:     MOV * T0.Y, T4.X,
+; EG-NEXT:     AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT:     AND_INT * T2.W, T37.W, literal.y,
+; EG-NEXT:    -65536(nan), 255(3.573311e-43)
+; EG-NEXT:     OR_INT * T1.W, PV.W, PS,
+; EG-NEXT:     MOV * T4.X, PV.W,
+; EG-NEXT:     MOV T0.Y, PV.X,
+; EG-NEXT:     LSHL * T1.W, T37.W, literal.x,
+; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT:     AND_INT T2.W, PV.Y, literal.x,
+; EG-NEXT:     AND_INT * T1.W, PV.W, literal.y,
+; EG-NEXT:    65535(9.183409e-41), 16711680(2.341805e-38)
+; EG-NEXT:     OR_INT * T1.W, PV.W, PS,
+; EG-NEXT:     MOV T4.X, PV.W,
+; EG-NEXT:     MOV T0.Y, T5.X,
+; EG-NEXT:     BFE_UINT * T1.W, T37.W, literal.x, T0.W,
+; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT:    ALU clause starting at 120:
+; EG-NEXT:     AND_INT * T2.W, T0.Y, literal.x,
+; EG-NEXT:    -65536(nan), 0(0.000000e+00)
+; EG-NEXT:     OR_INT * T1.W, PV.W, T1.W,
+; EG-NEXT:     MOV * T5.X, PV.W,
+; EG-NEXT:     MOV T0.Y, PV.X,
+; EG-NEXT:     LSHR * T1.W, T37.W, literal.x,
+; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT:     AND_INT T2.W, PV.Y, literal.x,
+; EG-NEXT:     AND_INT * T1.W, PV.W, literal.y,
+; EG-NEXT:    65535(9.183409e-41), 16711680(2.341805e-38)
+; EG-NEXT:     OR_INT * T37.W, PV.W, PS,
+; EG-NEXT:     MOV T5.X, PV.W,
+; EG-NEXT:     MOV * T0.Y, T32.X,
+; EG-NEXT:     AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT:     AND_INT * T2.W, T35.X, literal.y,
+; EG-NEXT:    -65536(nan), 255(3.573311e-43)
+; EG-NEXT:     OR_INT * T1.W, PV.W, PS,
+; EG-NEXT:     MOV * T32.X, PV.W,
+; EG-NEXT:     MOV T0.Y, PV.X,
+; EG-NEXT:     LSHL * T1.W, T35.X, literal.x,
+; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT:     AND_INT T2.W, PV.Y, literal.x,
+; EG-NEXT:     AND_INT * T1.W, PV.W, literal.y,
+; EG-NEXT:    65535(9.183409e-41), 16711680(2.341805e-38)
+; EG-NEXT:     OR_INT * T1.W, PV.W, PS,
+; EG-NEXT:     MOV T32.X, PV.W,
+; EG-NEXT:     MOV T0.Y, T33.X,
+; EG-NEXT:     BFE_UINT * T1.W, T35.X, literal.x, T0.W, BS:VEC_120/SCL_212
+; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT:     AND_INT * T2.W, PV.Y, literal.x,
+; EG-NEXT:    -65536(nan), 0(0.000000e+00)
+; EG-NEXT:     OR_INT * T1.W, PV.W, T1.W,
+; EG-NEXT:     MOV * T33.X, PV.W,
+; EG-NEXT:     MOV T0.Y, PV.X,
+; EG-NEXT:     LSHR * T1.W, T35.X, literal.x,
+; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT:     AND_INT T2.W, PV.Y, literal.x,
+; EG-NEXT:     AND_INT * T1.W, PV.W, literal.y,
+; EG-NEXT:    65535(9.183409e-41), 16711680(2.341805e-38)
+; EG-NEXT:     OR_INT * T38.Y, PV.W, PS,
+; EG-NEXT:     MOV T33.X, PV.Y,
+; EG-NEXT:     MOV * T0.Y, T28.X,
+; EG-NEXT:     AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT:     AND_INT * T2.W, T35.Y, literal.y,
+; EG-NEXT:    -65536(nan), 255(3.573311e-43)
+; EG-NEXT:     OR_INT * T1.W, PV.W, PS,
+; EG-NEXT:     MOV * T28.X, PV.W,
+; EG-NEXT:     MOV T0.Y, PV.X,
+; EG-NEXT:     LSHL * T1.W, T35.Y, literal.x,
+; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT:     AND_INT T2.W, PV.Y, literal.x,
+; EG-NEXT:     AND_INT * T1.W, PV.W, literal.y,
+; EG-NEXT:    65535(9.183409e-41), 16711680(2.341805e-38)
+; EG-NEXT:     OR_INT * T1.W, PV.W, PS,
+; EG-NEXT:     MOV T28.X, PV.W,
+; EG-NEXT:     MOV T0.Y, T29.X,
+; EG-NEXT:     BFE_UINT * T1.W, T35.Y, literal.x, T0.W,
+; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT:     AND_INT * T2.W, PV.Y, literal.x,
+; EG-NEXT:    -65536(nan), 0(0.000000e+00)
+; EG-NEXT:     OR_INT * T1.W, PV.W, T1.W,
+; EG-NEXT:     MOV * T29.X, PV.W,
+; EG-NEXT:     MOV T0.Y, PV.X,
+; EG-NEXT:     LSHR * T1.W, T35.Y, literal.x,
+; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT:     AND_INT T2.W, PV.Y, literal.x,
+; EG-NEXT:     AND_INT * T1.W, PV.W, literal.y,
+; EG-NEXT:    65535(9.183409e-41), 16711680(2.341805e-38)
+; EG-NEXT:     OR_INT * T38.W, PV.W, PS,
+; EG-NEXT:     MOV T29.X, PV.W,
+; EG-NEXT:     MOV * T0.Y, T24.X,
+; EG-NEXT:     AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT:     AND_INT * T2.W, T35.Z, literal.y,
+; EG-NEXT:    -65536(nan), 255(3.573311e-43)
+; EG-NEXT:     OR_INT * T1.W, PV.W, PS,
+; EG-NEXT:     MOV * T24.X, PV.W,
+; EG-NEXT:     MOV T0.Y, PV.X,
+; EG-NEXT:     LSHL * T1.W, T35.Z, literal.x,
+; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT:     AND_INT T2.W, PV.Y, literal.x,
+; EG-NEXT:     AND_INT * T1.W, PV.W, literal.y,
+; EG-NEXT:    65535(9.183409e-41), 16711680(2.341805e-38)
+; EG-NEXT:     OR_INT * T1.W, PV.W, PS,
+; EG-NEXT:     MOV T24.X, PV.W,
+; EG-NEXT:     MOV T0.Y, T25.X,
+; EG-NEXT:     BFE_UINT * T1.W, T35.Z, literal.x, T0.W,
+; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT:     AND_INT * T2.W, PV.Y, literal.x,
+; EG-NEXT:    -65536(nan), 0(0.000000e+00)
+; EG-NEXT:     OR_INT * T1.W, PV.W, T1.W,
+; EG-NEXT:     MOV * T25.X, PV.W,
+; EG-NEXT:     MOV T0.Y, PV.X,
+; EG-NEXT:     LSHR * T1.W, T35.Z, literal.x,
+; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT:     AND_INT T2.W, PV.Y, literal.x,
+; EG-NEXT:     AND_INT * T1.W, PV.W, literal.y,
+; EG-NEXT:    65535(9.183409e-41), 16711680(2.341805e-38)
+; EG-NEXT:     OR_INT * T35.Y, PV.W, PS,
+; EG-NEXT:     MOV T25.X, PV.Y,
+; EG-NEXT:     MOV * T0.Y, T20.X,
+; EG-NEXT:     AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT:     AND_INT * T2.W, T35.W, literal.y,
+; EG-NEXT:    -65536(nan), 255(3.573311e-43)
+; EG-NEXT:     OR_INT * T1.W, PV.W, PS,
+; EG-NEXT:     MOV * T20.X, PV.W,
+; EG-NEXT:    ALU clause starting at 225:
+; EG-NEXT:     MOV T0.Y, T20.X,
+; EG-NEXT:     LSHL * T1.W, T35.W, literal.x,
+; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT:     AND_INT T2.W, PV.Y, literal.x,
+; EG-NEXT:     AND_INT * T1.W, PV.W, literal.y,
+; EG-NEXT:    65535(9.183409e-41), 16711680(2.341805e-38)
+; EG-NEXT:     OR_INT * T1.W, PV.W, PS,
+; EG-NEXT:     MOV T20.X, PV.W,
+; EG-NEXT:     MOV T0.Y, T21.X,
+; EG-NEXT:     BFE_UINT * T0.W, T35.W, literal.x, T0.W,
+; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT:     AND_INT * T1.W, PV.Y, literal.x,
+; EG-NEXT:    -65536(nan), 0(0.000000e+00)
+; EG-NEXT:     OR_INT * T0.W, PV.W, T0.W,
+; EG-NEXT:     MOV * T21.X, PV.W,
+; EG-NEXT:     MOV T0.Y, PV.X,
 ; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.x,
-; EG-NEXT:    48(6.726233e-44), 0(0.000000e+00)
-; EG-NEXT:     LSHR * T18.X, PV.W, literal.x,
+; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT:     LSHR T39.X, PV.W, literal.x,
+; EG-NEXT:     LSHR * T40.X, KC0[2].Y, literal.x,
+; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
+; EG-NEXT:     LSHR T0.W, T35.W, literal.x,
+; EG-NEXT:     ADD_INT * T1.W, KC0[2].Y, literal.y,
+; EG-NEXT:    8(1.121039e-44), 48(6.726233e-44)
+; EG-NEXT:     LSHR T41.X, PS, literal.x,
+; EG-NEXT:     AND_INT T0.Z, T0.Y, literal.y,
+; EG-NEXT:     AND_INT T0.W, PV.W, literal.z,
+; EG-NEXT:     ADD_INT * T1.W, KC0[2].Y, literal.w,
+; EG-NEXT:    2(2.802597e-45), 65535(9.183409e-41)
+; EG-NEXT:    16711680(2.341805e-38), 32(4.484155e-44)
+; EG-NEXT:     LSHR T42.X, PS, literal.x,
+; EG-NEXT:     OR_INT * T35.W, PV.Z, PV.W,
 ; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
+; EG-NEXT:     MOV T21.X, PV.W,
+; EG-NEXT:     MOV * T36.X, T16.X,
+; EG-NEXT:     MOV * T36.Z, T12.X,
+; EG-NEXT:     MOV T37.X, T8.X,
+; EG-NEXT:     MOV T37.Z, T4.X, BS:VEC_120/SCL_212
+; EG-NEXT:     MOV * T38.X, T32.X,
+; EG-NEXT:     MOV * T38.Z, T28.X,
+; EG-NEXT:     MOV T35.X, T24.X,
+; EG-NEXT:     MOV * T35.Z, T20.X, BS:VEC_120/SCL_212
 ;
 ; CM-LABEL: global_zextload_v32i8_to_v32i16:
 ; CM:       ; %bb.0:
-; CM-NEXT:    ALU 0, @12, KC0[CB0:0-32], KC1[]
-; CM-NEXT:    TEX 1 @8
-; CM-NEXT:    ALU 39, @13, KC0[CB0:0-32], KC1[]
-; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T17, T12.X
-; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T11, T18.X
-; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T14, T16.X
-; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T13, T15.X
+; CM-NEXT:    ALU 1, @14, KC0[CB0:0-32], KC1[]
+; CM-NEXT:    TEX 1 @10
+; CM-NEXT:    ALU 101, @16, KC0[], KC1[]
+; CM-NEXT:    ALU 101, @118, KC0[], KC1[]
+; CM-NEXT:    ALU 40, @220, KC0[CB0:0-32], KC1[]
+; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T36, T42.X
+; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T38, T41.X
+; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T37, T40.X
+; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T35, T39.X
 ; CM-NEXT:    CF_END
-; CM-NEXT:    Fetch clause starting at 8:
-; CM-NEXT:     VTX_READ_128 T12.XYZW, T11.X, 0, #1
-; CM-NEXT:     VTX_READ_128 T11.XYZW, T11.X, 16, #1
-; CM-NEXT:    ALU clause starting at 12:
-; CM-NEXT:     MOV * T11.X, KC0[2].Z,
-; CM-NEXT:    ALU clause starting at 13:
+; CM-NEXT:    Fetch clause starting at 10:
+; CM-NEXT:     VTX_READ_128 T37.XYZW, T35.X, 16, #1
+; CM-NEXT:     VTX_READ_128 T36.XYZW, T35.X, 0, #1
+; CM-NEXT:    ALU clause starting at 14:
+; CM-NEXT:     MOV * T0.Y, T16.X,
+; CM-NEXT:     MOV * T35.X, KC0[2].Z,
+; CM-NEXT:    ALU clause starting at 16:
+; CM-NEXT:     AND_INT T0.Z, T37.X, literal.x,
+; CM-NEXT:     AND_INT * T0.W, T0.Y, literal.y,
+; CM-NEXT:    255(3.573311e-43), -65536(nan)
+; CM-NEXT:     OR_INT * T0.W, PV.W, PV.Z,
+; CM-NEXT:     MOV * T16.X, PV.W,
+; CM-NEXT:     MOV T0.Y, PV.X,
+; CM-NEXT:     LSHL * T0.W, T37.X, literal.x,
+; CM-NEXT:    8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT:     AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT:     AND_INT * T0.W, PV.W, literal.y,
+; CM-NEXT:    65535(9.183409e-41), 16711680(2.341805e-38)
+; CM-NEXT:     OR_INT * T0.W, PV.Z, PV.W,
+; CM-NEXT:     MOV T16.X, PV.W,
+; CM-NEXT:     MOV T0.Y, T17.X,
 ; CM-NEXT:     MOV * T0.W, literal.x,
 ; CM-NEXT:    8(1.121039e-44), 0(0.000000e+00)
-; CM-NEXT:     BFE_UINT * T13.W, T11.W, literal.x, PV.W,
+; CM-NEXT:     AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT:     BFE_UINT * T1.W, T37.X, literal.y, PV.W,
+; CM-NEXT:    -65536(nan), 16(2.242078e-44)
+; CM-NEXT:     OR_INT * T1.W, PV.Z, PV.W,
+; CM-NEXT:     MOV * T17.X, PV.W,
+; CM-NEXT:     MOV T0.Y, PV.X,
+; CM-NEXT:     LSHR * T1.W, T37.X, literal.x,
 ; CM-NEXT:    8(1.121039e-44), 0(0.000000e+00)
-; CM-NEXT:     BFE_UINT T13.Y, T11.Z, literal.x, T0.W,
-; CM-NEXT:     AND_INT T13.Z, T11.W, literal.y,
-; CM-NEXT:     BFE_UINT * T14.W, T11.Y, literal.x, T0.W,
-; CM-NEXT:    8(1.121039e-44), 255(3.573311e-43)
-; CM-NEXT:     AND_INT T13.X, T11.Z, literal.x,
-; CM-NEXT:     BFE_UINT T14.Y, T11.X, literal.y, T0.W,
-; CM-NEXT:     ADD_INT * T1.W, KC0[2].Y, literal.z,
-; CM-NEXT:    255(3.573311e-43), 8(1.121039e-44)
-; CM-NEXT:    48(6.726233e-44), 0(0.000000e+00)
-; CM-NEXT:     LSHR T15.X, PV.W, literal.x,
-; CM-NEXT:     AND_INT T14.Z, T11.Y, literal.y,
-; CM-NEXT:     BFE_UINT * T11.W, T12.W, literal.z, T0.W,
-; CM-NEXT:    2(2.802597e-45), 255(3.573311e-43)
+; CM-NEXT:     AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT:     AND_INT * T1.W, PV.W, literal.y,
+; CM-NEXT:    65535(9.183409e-41), 16711680(2.341805e-38)
+; CM-NEXT:     OR_INT * T35.Y, PV.Z, PV.W,
+; CM-NEXT:     MOV T17.X, PV.Y,
+; CM-NEXT:     MOV * T0.Y, T12.X,
+; CM-NEXT:     AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT:     AND_INT * T1.W, T37.Y, literal.y,
+; CM-NEXT:    -65536(nan), 255(3.573311e-43)
+; CM-NEXT:     OR_INT * T1.W, PV.Z, PV.W,
+; CM-NEXT:     MOV * T12.X, PV.W,
+; CM-NEXT:     MOV T0.Y, PV.X,
+; CM-NEXT:     LSHL * T1.W, T37.Y, literal.x,
 ; CM-NEXT:    8(1.121039e-44), 0(0.000000e+00)
-; CM-NEXT:     AND_INT T14.X, T11.X, literal.x,
-; CM-NEXT:     BFE_UINT T11.Y, T12.Z, literal.y, T0.W,
-; CM-NEXT:     ADD_INT * T1.W, KC0[2].Y, literal.z,
-; CM-NEXT:    255(3.573311e-43), 8(1.121039e-44)
-; CM-NEXT:    32(4.484155e-44), 0(0.000000e+00)
-; CM-NEXT:     LSHR T16.X, PV.W, literal.x,
-; CM-NEXT:     AND_INT T11.Z, T12.W, literal.y,
-; CM-NEXT:     BFE_UINT * T17.W, T12.Y, literal.z, T0.W,
-; CM-NEXT:    2(2.802597e-45), 255(3.573311e-43)
+; CM-NEXT:     AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT:     AND_INT * T1.W, PV.W, literal.y,
+; CM-NEXT:    65535(9.183409e-41), 16711680(2.341805e-38)
+; CM-NEXT:     OR_INT * T1.W, PV.Z, PV.W,
+; CM-NEXT:     MOV T12.X, PV.W,
+; CM-NEXT:     MOV * T0.Y, T13.X,
+; CM-NEXT:     AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT:     BFE_UINT * T1.W, T37.Y, literal.y, T0.W,
+; CM-NEXT:    -65536(nan), 16(2.242078e-44)
+; CM-NEXT:     OR_INT * T1.W, PV.Z, PV.W,
+; CM-NEXT:     MOV * T13.X, PV.W,
+; CM-NEXT:     MOV T0.Y, PV.X,
+; CM-NEXT:     LSHR * T1.W, T37.Y, literal.x,
 ; CM-NEXT:    8(1.121039e-44), 0(0.000000e+00)
-; CM-NEXT:     AND_INT T11.X, T12.Z, literal.x,
-; CM-NEXT:     BFE_UINT T17.Y, T12.X, literal.y, T0.W,
-; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.z,
-; CM-NEXT:    255(3.573311e-43), 8(1.121039e-44)
+; CM-NEXT:     AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT:     AND_INT * T1.W, PV.W, literal.y,
+; CM-NEXT:    65535(9.183409e-41), 16711680(2.341805e-38)
+; CM-NEXT:     OR_INT * T35.W, PV.Z, PV.W,
+; CM-NEXT:     MOV T13.X, PV.W,
+; CM-NEXT:     MOV * T0.Y, T8.X,
+; CM-NEXT:     AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT:     AND_INT * T1.W, T37.Z, literal.y,
+; CM-NEXT:    -65536(nan), 255(3.573311e-43)
+; CM-NEXT:     OR_INT * T1.W, PV.Z, PV.W,
+; CM-NEXT:     MOV * T8.X, PV.W,
+; CM-NEXT:     MOV T0.Y, PV.X,
+; CM-NEXT:     LSHL * T1.W, T37.Z, literal.x,
+; CM-NEXT:    8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT:     AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT:     AND_INT * T1.W, PV.W, literal.y,
+; CM-NEXT:    65535(9.183409e-41), 16711680(2.341805e-38)
+; CM-NEXT:     OR_INT * T1.W, PV.Z, PV.W,
+; CM-NEXT:     MOV T8.X, PV.W,
+; CM-NEXT:     MOV * T0.Y, T9.X,
+; CM-NEXT:     AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT:     BFE_UINT * T1.W, T37.Z, literal.y, T0.W,
+; CM-NEXT:    -65536(nan), 16(2.242078e-44)
+; CM-NEXT:     OR_INT * T1.W, PV.Z, PV.W,
+; CM-NEXT:     MOV * T9.X, PV.W,
+; CM-NEXT:     MOV T0.Y, PV.X,
+; CM-NEXT:     LSHR * T1.W, T37.Z, literal.x,
+; CM-NEXT:    8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT:     AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT:     AND_INT * T1.W, PV.W, literal.y,
+; CM-NEXT:    65535(9.183409e-41), 16711680(2.341805e-38)
+; CM-NEXT:     OR_INT * T37.Y, PV.Z, PV.W,
+; CM-NEXT:     MOV T9.X, PV.Y,
+; CM-NEXT:     MOV * T0.Y, T4.X,
+; CM-NEXT:     AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT:     AND_INT * T1.W, T37.W, literal.y,
+; CM-NEXT:    -65536(nan), 255(3.573311e-43)
+; CM-NEXT:     OR_INT * T1.W, PV.Z, PV.W,
+; CM-NEXT:     MOV * T4.X, PV.W,
+; CM-NEXT:     MOV T0.Y, PV.X,
+; CM-NEXT:     LSHL * T1.W, T37.W, literal.x,
+; CM-NEXT:    8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT:     AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT:     AND_INT * T1.W, PV.W, literal.y,
+; CM-NEXT:    65535(9.183409e-41), 16711680(2.341805e-38)
+; CM-NEXT:     OR_INT * T1.W, PV.Z, PV.W,
+; CM-NEXT:     MOV T4.X, PV.W,
+; CM-NEXT:     MOV * T0.Y, T5.X,
+; CM-NEXT:     AND_INT * T0.Z, PV.Y, literal.x,
+; CM-NEXT:    -65536(nan), 0(0.000000e+00)
+; CM-NEXT:    ALU clause starting at 118:
+; CM-NEXT:     BFE_UINT * T1.W, T37.W, literal.x, T0.W,
 ; CM-NEXT:    16(2.242078e-44), 0(0.000000e+00)
-; CM-NEXT:     LSHR T18.X, PV.W, literal.x,
-; CM-NEXT:     AND_INT * T17.Z, T12.Y, literal.y,
-; CM-NEXT:    2(2.802597e-45), 255(3.573311e-43)
-; CM-NEXT:     AND_INT * T17.X, T12.X, literal.x,
-; CM-NEXT:    255(3.573311e-43), 0(0.000000e+00)
-; CM-NEXT:     LSHR * T12.X, KC0[2].Y, literal.x,
+; CM-NEXT:     OR_INT * T1.W, T0.Z, PV.W,
+; CM-NEXT:     MOV * T5.X, PV.W,
+; CM-NEXT:     MOV T0.Y, PV.X,
+; CM-NEXT:     LSHR * T1.W, T37.W, literal.x,
+; CM-NEXT:    8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT:     AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT:     AND_INT * T1.W, PV.W, literal.y,
+; CM-NEXT:    65535(9.183409e-41), 16711680(2.341805e-38)
+; CM-NEXT:     OR_INT * T37.W, PV.Z, PV.W,
+; CM-NEXT:     MOV T5.X, PV.W,
+; CM-NEXT:     MOV * T0.Y, T32.X,
+; CM-NEXT:     AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT:     AND_INT * T1.W, T36.X, literal.y,
+; CM-NEXT:    -65536(nan), 255(3.573311e-43)
+; CM-NEXT:     OR_INT * T1.W, PV.Z, PV.W,
+; CM-NEXT:     MOV * T32.X, PV.W,
+; CM-NEXT:     MOV T0.Y, PV.X,
+; CM-NEXT:     LSHL * T1.W, T36.X, literal.x,
+; CM-NEXT:    8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT:     AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT:     AND_INT * T1.W, PV.W, literal.y,
+; CM-NEXT:    65535(9.183409e-41), 16711680(2.341805e-38)
+; CM-NEXT:     OR_INT * T1.W, PV.Z, PV.W,
+; CM-NEXT:     MOV T32.X, PV.W,
+; CM-NEXT:     MOV * T0.Y, T33.X,
+; CM-NEXT:     AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT:     BFE_UINT * T1.W, T36.X, literal.y, T0.W,
+; CM-NEXT:    -65536(nan), 16(2.242078e-44)
+; CM-NEXT:     OR_INT * T1.W, PV.Z, PV.W,
+; CM-NEXT:     MOV * T33.X, PV.W,
+; CM-NEXT:     MOV T0.Y, PV.X,
+; CM-NEXT:     LSHR * T1.W, T36.X, literal.x,
+; CM-NEXT:    8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT:     AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT:     AND_INT * T1.W, PV.W, literal.y,
+; CM-NEXT:    65535(9.183409e-41), 16711680(2.341805e-38)
+; CM-NEXT:     OR_INT * T38.Y, PV.Z, PV.W,
+; CM-NEXT:     MOV T33.X, PV.Y,
+; CM-NEXT:     MOV * T0.Y, T28.X,
+; CM-NEXT:     AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT:     AND_INT * T1.W, T36.Y, literal.y,
+; CM-NEXT:    -65536(nan), 255(3.573311e-43)
+; CM-NEXT:     OR_INT * T1.W, PV.Z, PV.W,
+; CM-NEXT:     MOV * T28.X, PV.W,
+; CM-NEXT:     MOV T0.Y, PV.X,
+; CM-NEXT:     LSHL * T1.W, T36.Y, literal.x,
+; CM-NEXT:    8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT:     AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT:     AND_INT * T1.W, PV.W, literal.y,
+; CM-NEXT:    65535(9.183409e-41), 16711680(2.341805e-38)
+; CM-NEXT:     OR_INT * T1.W, PV.Z, PV.W,
+; CM-NEXT:     MOV T28.X, PV.W,
+; CM-NEXT:     MOV * T0.Y, T29.X,
+; CM-NEXT:     AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT:     BFE_UINT * T1.W, T36.Y, literal.y, T0.W,
+; CM-NEXT:    -65536(nan), 16(2.242078e-44)
+; CM-NEXT:     OR_INT * T1.W, PV.Z, PV.W,
+; CM-NEXT:     MOV * T29.X, PV.W,
+; CM-NEXT:     MOV T0.Y, PV.X,
+; CM-NEXT:     LSHR * T1.W, T36.Y, literal.x,
+; CM-NEXT:    8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT:     AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT:     AND_INT * T1.W, PV.W, literal.y,
+; CM-NEXT:    65535(9.183409e-41), 16711680(2.341805e-38)
+; CM-NEXT:     OR_INT * T38.W, PV.Z, PV.W,
+; CM-NEXT:     MOV T29.X, PV.W,
+; CM-NEXT:     MOV * T0.Y, T24.X,
+; CM-NEXT:     AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT:     AND_INT * T1.W, T36.Z, literal.y,
+; CM-NEXT:    -65536(nan), 255(3.573311e-43)
+; CM-NEXT:     OR_INT * T1.W, PV.Z, PV.W,
+; CM-NEXT:     MOV * T24.X, PV.W,
+; CM-NEXT:     MOV T0.Y, PV.X,
+; CM-NEXT:     LSHL * T1.W, T36.Z, literal.x,
+; CM-NEXT:    8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT:     AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT:     AND_INT * T1.W, PV.W, literal.y,
+; CM-NEXT:    65535(9.183409e-41), 16711680(2.341805e-38)
+; CM-NEXT:     OR_INT * T1.W, PV.Z, PV.W,
+; CM-NEXT:     MOV T24.X, PV.W,
+; CM-NEXT:     MOV * T0.Y, T25.X,
+; CM-NEXT:     AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT:     BFE_UINT * T1.W, T36.Z, literal.y, T0.W,
+; CM-NEXT:    -65536(nan), 16(2.242078e-44)
+; CM-NEXT:     OR_INT * T1.W, PV.Z, PV.W,
+; CM-NEXT:     MOV * T25.X, PV.W,
+; CM-NEXT:     MOV T0.Y, PV.X,
+; CM-NEXT:     LSHR * T1.W, T36.Z, literal.x,
+; CM-NEXT:    8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT:     AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT:     AND_INT * T1.W, PV.W, literal.y,
+; CM-NEXT:    65535(9.183409e-41), 16711680(2.341805e-38)
+; CM-NEXT:     OR_INT * T36.Y, PV.Z, PV.W,
+; CM-NEXT:     MOV T25.X, PV.Y,
+; CM-NEXT:     MOV * T0.Y, T20.X,
+; CM-NEXT:     AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT:     AND_INT * T1.W, T36.W, literal.y,
+; CM-NEXT:    -65536(nan), 255(3.573311e-43)
+; CM-NEXT:     OR_INT * T1.W, PV.Z, PV.W,
+; CM-NEXT:     MOV * T20.X, PV.W,
+; CM-NEXT:    ALU clause starting at 220:
+; CM-NEXT:     MOV T0.Y, T20.X,
+; CM-NEXT:     LSHL * T1.W, T36.W, literal.x,
+; CM-NEXT:    8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT:     AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT:     AND_INT * T1.W, PV.W, literal.y,
+; CM-NEXT:    65535(9.183409e-41), 16711680(2.341805e-38)
+; CM-NEXT:     OR_INT * T1.W, PV.Z, PV.W,
+; CM-NEXT:     MOV T20.X, PV.W,
+; CM-NEXT:     MOV * T0.Y, T21.X,
+; CM-NEXT:     AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT:     BFE_UINT * T0.W, T36.W, literal.y, T0.W,
+; CM-NEXT:    -65536(nan), 16(2.242078e-44)
+; CM-NEXT:     OR_INT * T0.W, PV.Z, PV.W,
+; CM-NEXT:     MOV * T21.X, PV.W,
+; CM-NEXT:     MOV T0.Y, PV.X,
+; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.x,
+; CM-NEXT:    32(4.484155e-44), 0(0.000000e+00)
+; CM-NEXT:     LSHR T39.X, PV.W, literal.x,
+; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
+; CM-NEXT:    2(2.802597e-45), 48(6.726233e-44)
+; CM-NEXT:     LSHR T40.X, PV.W, literal.x,
+; CM-NEXT:     LSHR * T0.W, T36.W, literal.y,
+; CM-NEXT:    2(2.802597e-45), 8(1.121039e-44)
+; CM-NEXT:     LSHR T41.X, KC0[2].Y, literal.x,
+; CM-NEXT:     AND_INT T0.Y, T0.Y, literal.y,
+; CM-NEXT:     AND_INT T0.Z, PV.W, literal.z,
+; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.w,
+; CM-NEXT:    2(2.802597e-45), 65535(9.183409e-41)
+; CM-NEXT:    16711680(2.341805e-38), 16(2.242078e-44)
+; CM-NEXT:     LSHR T42.X, PV.W, literal.x,
+; CM-NEXT:     OR_INT * T36.W, PV.Y, PV.Z,
 ; CM-NEXT:    2(2.802597e-45), 0(0.000000e+00)
+; CM-NEXT:     MOV * T21.X, PV.W,
+; CM-NEXT:     MOV T35.X, T16.X,
+; CM-NEXT:     MOV * T35.Z, T12.X, BS:VEC_120/SCL_212
+; CM-NEXT:     MOV T37.X, T8.X,
+; CM-NEXT:     MOV * T37.Z, T4.X, BS:VEC_120/SCL_212
+; CM-NEXT:     MOV T38.X, T32.X,
+; CM-NEXT:     MOV * T38.Z, T28.X, BS:VEC_120/SCL_212
+; CM-NEXT:     MOV T36.X, T24.X,
+; CM-NEXT:     MOV * T36.Z, T20.X, BS:VEC_120/SCL_212
   %load = load <32 x i8>, ptr addrspace(1) %in
   %ext = zext <32 x i8> %load to <32 x i16>
   store <32 x i16> %ext, ptr addrspace(1) %out
@@ -11717,118 +12983,659 @@ define amdgpu_kernel void @global_sextload_v32i8_to_v32i16(ptr addrspace(1) %out
 ;
 ; EG-LABEL: global_sextload_v32i8_to_v32i16:
 ; EG:       ; %bb.0:
-; EG-NEXT:    ALU 0, @12, KC0[CB0:0-32], KC1[]
-; EG-NEXT:    TEX 1 @8
-; EG-NEXT:    ALU 39, @13, KC0[CB0:0-32], KC1[]
-; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T18.XYZW, T12.X, 0
-; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T17.XYZW, T11.X, 0
-; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T16.XYZW, T14.X, 0
-; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T15.XYZW, T13.X, 1
+; EG-NEXT:    ALU 1, @14, KC0[CB0:0-32], KC1[]
+; EG-NEXT:    TEX 1 @10
+; EG-NEXT:    ALU 104, @16, KC0[], KC1[]
+; EG-NEXT:    ALU 104, @121, KC0[], KC1[]
+; EG-NEXT:    ALU 95, @226, KC0[CB0:0-32], KC1[]
+; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T36.XYZW, T42.X, 0
+; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T37.XYZW, T41.X, 0
+; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T38.XYZW, T40.X, 0
+; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T35.XYZW, T39.X, 1
 ; EG-NEXT:    CF_END
-; EG-NEXT:    Fetch clause starting at 8:
-; EG-NEXT:     VTX_READ_128 T12.XYZW, T11.X, 16, #1
-; EG-NEXT:     VTX_READ_128 T11.XYZW, T11.X, 0, #1
-; EG-NEXT:    ALU clause starting at 12:
-; EG-NEXT:     MOV * T11.X, KC0[2].Z,
-; EG-NEXT:    ALU clause starting at 13:
-; EG-NEXT:     LSHR T13.X, KC0[2].Y, literal.x,
-; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
-; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
-; EG-NEXT:     LSHR T14.X, PV.W, literal.x,
-; EG-NEXT:     BFE_INT * T15.Z, T11.Y, 0.0, literal.y,
-; EG-NEXT:    2(2.802597e-45), 8(1.121039e-44)
-; EG-NEXT:     BFE_INT T15.X, T11.X, 0.0, literal.x,
-; EG-NEXT:     LSHR T0.Y, T12.W, literal.x,
-; EG-NEXT:     BFE_INT T16.Z, T11.W, 0.0, literal.x, BS:VEC_120/SCL_212
-; EG-NEXT:     LSHR T0.W, T12.Y, literal.x,
-; EG-NEXT:     LSHR * T1.W, T11.Y, literal.x,
+; EG-NEXT:    Fetch clause starting at 10:
+; EG-NEXT:     VTX_READ_128 T37.XYZW, T35.X, 16, #1
+; EG-NEXT:     VTX_READ_128 T35.XYZW, T35.X, 0, #1
+; EG-NEXT:    ALU clause starting at 14:
+; EG-NEXT:     MOV * T0.Y, T16.X,
+; EG-NEXT:     MOV * T35.X, KC0[2].Z,
+; EG-NEXT:    ALU clause starting at 16:
+; EG-NEXT:     BFE_INT * T0.W, T37.X, 0.0, literal.x,
 ; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
-; EG-NEXT:     BFE_INT T16.X, T11.Z, 0.0, literal.x,
-; EG-NEXT:     LSHR T1.Y, T11.W, literal.x,
-; EG-NEXT:     BFE_INT T17.Z, T12.Y, 0.0, literal.x,
-; EG-NEXT:     BFE_INT T15.W, PS, 0.0, literal.x,
-; EG-NEXT:     LSHR * T1.W, T11.X, literal.x,
+; EG-NEXT:     AND_INT T0.W, PV.W, literal.x,
+; EG-NEXT:     AND_INT * T1.W, T0.Y, literal.y,
+; EG-NEXT:    65535(9.183409e-41), -65536(nan)
+; EG-NEXT:     OR_INT * T0.W, PS, PV.W,
+; EG-NEXT:     MOV * T16.X, PV.W,
+; EG-NEXT:     MOV T0.Y, PV.X,
+; EG-NEXT:     LSHR * T0.W, T37.X, literal.x,
 ; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
-; EG-NEXT:     BFE_INT T17.X, T12.X, 0.0, literal.x,
-; EG-NEXT:     BFE_INT T15.Y, PS, 0.0, literal.x,
-; EG-NEXT:     BFE_INT T18.Z, T12.W, 0.0, literal.x,
-; EG-NEXT:     BFE_INT T16.W, PV.Y, 0.0, literal.x,
-; EG-NEXT:     LSHR * T1.W, T11.Z, literal.x,
+; EG-NEXT:     BFE_INT T0.W, PV.W, 0.0, literal.x,
+; EG-NEXT:     AND_INT * T1.W, PV.Y, literal.y,
+; EG-NEXT:    8(1.121039e-44), 65535(9.183409e-41)
+; EG-NEXT:     LSHL * T0.W, PV.W, literal.x,
+; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT:     OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT:     MOV T16.X, PV.W,
+; EG-NEXT:     MOV T0.Y, T17.X,
+; EG-NEXT:     LSHR * T0.W, T37.X, literal.x, BS:VEC_120/SCL_212
+; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT:     BFE_INT T0.W, PV.W, 0.0, literal.x,
+; EG-NEXT:     AND_INT * T1.W, PV.Y, literal.y,
+; EG-NEXT:    8(1.121039e-44), -65536(nan)
+; EG-NEXT:     AND_INT * T0.W, PV.W, literal.x,
+; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT:     OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT:     MOV * T17.X, PV.W,
+; EG-NEXT:     MOV T0.Y, PV.X,
+; EG-NEXT:     ASHR * T0.W, T37.X, literal.x,
+; EG-NEXT:    24(3.363116e-44), 0(0.000000e+00)
+; EG-NEXT:     AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT:     LSHL * T0.W, PV.W, literal.y,
+; EG-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
+; EG-NEXT:     OR_INT * T36.Y, PV.W, PS,
+; EG-NEXT:     MOV T17.X, PV.Y,
+; EG-NEXT:     MOV T0.Y, T12.X,
+; EG-NEXT:     BFE_INT * T0.W, T37.Y, 0.0, literal.x,
 ; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
-; EG-NEXT:     BFE_INT T18.X, T12.Z, 0.0, literal.x,
-; EG-NEXT:     BFE_INT T16.Y, PS, 0.0, literal.x,
-; EG-NEXT:     LSHR T0.Z, T12.X, literal.x,
-; EG-NEXT:     BFE_INT T17.W, T0.W, 0.0, literal.x,
-; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
-; EG-NEXT:    8(1.121039e-44), 32(4.484155e-44)
-; EG-NEXT:     LSHR T11.X, PS, literal.x,
-; EG-NEXT:     BFE_INT T17.Y, PV.Z, 0.0, literal.y,
-; EG-NEXT:     LSHR T0.Z, T12.Z, literal.y,
-; EG-NEXT:     BFE_INT T18.W, T0.Y, 0.0, literal.y,
-; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.z,
-; EG-NEXT:    2(2.802597e-45), 8(1.121039e-44)
-; EG-NEXT:    48(6.726233e-44), 0(0.000000e+00)
-; EG-NEXT:     LSHR T12.X, PS, literal.x,
-; EG-NEXT:     BFE_INT * T18.Y, PV.Z, 0.0, literal.y,
-; EG-NEXT:    2(2.802597e-45), 8(1.121039e-44)
+; EG-NEXT:     AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT:     AND_INT * T0.W, PV.W, literal.y,
+; EG-NEXT:    -65536(nan), 65535(9.183409e-41)
+; EG-NEXT:     OR_INT * T0.W, PV.W, PS,
+; EG-NEXT:     MOV * T12.X, PV.W,
+; EG-NEXT:     MOV T0.Y, PV.X,
+; EG-NEXT:     LSHR * T0.W, T37.Y, literal.x,
+; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT:     BFE_INT T0.W, PV.W, 0.0, literal.x,
+; EG-NEXT:     AND_INT * T1.W, PV.Y, literal.y,
+; EG-NEXT:    8(1.121039e-44), 65535(9.183409e-41)
+; EG-NEXT:     LSHL * T0.W, PV.W, literal.x,
+; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT:     OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT:     MOV T12.X, PV.W,
+; EG-NEXT:     MOV T0.Y, T13.X,
+; EG-NEXT:     LSHR * T0.W, T37.Y, literal.x,
+; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT:     BFE_INT T0.W, PV.W, 0.0, literal.x,
+; EG-NEXT:     AND_INT * T1.W, PV.Y, literal.y,
+; EG-NEXT:    8(1.121039e-44), -65536(nan)
+; EG-NEXT:     AND_INT * T0.W, PV.W, literal.x,
+; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT:     OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT:     MOV * T13.X, PV.W,
+; EG-NEXT:     MOV T0.Y, PV.X,
+; EG-NEXT:     ASHR * T0.W, T37.Y, literal.x,
+; EG-NEXT:    24(3.363116e-44), 0(0.000000e+00)
+; EG-NEXT:     AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT:     LSHL * T0.W, PV.W, literal.y,
+; EG-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
+; EG-NEXT:     OR_INT * T36.W, PV.W, PS,
+; EG-NEXT:     MOV T13.X, PV.W,
+; EG-NEXT:     MOV T0.Y, T8.X,
+; EG-NEXT:     BFE_INT * T0.W, T37.Z, 0.0, literal.x,
+; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT:     AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT:     AND_INT * T0.W, PV.W, literal.y,
+; EG-NEXT:    -65536(nan), 65535(9.183409e-41)
+; EG-NEXT:     OR_INT * T0.W, PV.W, PS,
+; EG-NEXT:     MOV * T8.X, PV.W,
+; EG-NEXT:     MOV T0.Y, PV.X,
+; EG-NEXT:     LSHR * T0.W, T37.Z, literal.x,
+; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT:     BFE_INT T0.W, PV.W, 0.0, literal.x,
+; EG-NEXT:     AND_INT * T1.W, PV.Y, literal.y,
+; EG-NEXT:    8(1.121039e-44), 65535(9.183409e-41)
+; EG-NEXT:     LSHL * T0.W, PV.W, literal.x,
+; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT:     OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT:     MOV T8.X, PV.W,
+; EG-NEXT:     MOV T0.Y, T9.X,
+; EG-NEXT:     LSHR * T0.W, T37.Z, literal.x,
+; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT:     BFE_INT T0.W, PV.W, 0.0, literal.x,
+; EG-NEXT:     AND_INT * T1.W, PV.Y, literal.y,
+; EG-NEXT:    8(1.121039e-44), -65536(nan)
+; EG-NEXT:     AND_INT * T0.W, PV.W, literal.x,
+; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT:     OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT:     MOV * T9.X, PV.W,
+; EG-NEXT:     MOV T0.Y, PV.X,
+; EG-NEXT:     ASHR * T0.W, T37.Z, literal.x,
+; EG-NEXT:    24(3.363116e-44), 0(0.000000e+00)
+; EG-NEXT:     AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT:     LSHL * T0.W, PV.W, literal.y,
+; EG-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
+; EG-NEXT:    ALU clause starting at 121:
+; EG-NEXT:     OR_INT * T37.Y, T1.W, T0.W,
+; EG-NEXT:     MOV T9.X, PV.Y,
+; EG-NEXT:     MOV T0.Y, T4.X,
+; EG-NEXT:     BFE_INT * T0.W, T37.W, 0.0, literal.x,
+; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT:     AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT:     AND_INT * T0.W, PV.W, literal.y,
+; EG-NEXT:    -65536(nan), 65535(9.183409e-41)
+; EG-NEXT:     OR_INT * T0.W, PV.W, PS,
+; EG-NEXT:     MOV * T4.X, PV.W,
+; EG-NEXT:     MOV T0.Y, PV.X,
+; EG-NEXT:     LSHR * T0.W, T37.W, literal.x,
+; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT:     BFE_INT T0.W, PV.W, 0.0, literal.x,
+; EG-NEXT:     AND_INT * T1.W, PV.Y, literal.y,
+; EG-NEXT:    8(1.121039e-44), 65535(9.183409e-41)
+; EG-NEXT:     LSHL * T0.W, PV.W, literal.x,
+; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT:     OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT:     MOV T4.X, PV.W,
+; EG-NEXT:     MOV T0.Y, T5.X,
+; EG-NEXT:     LSHR * T0.W, T37.W, literal.x,
+; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT:     BFE_INT T0.W, PV.W, 0.0, literal.x,
+; EG-NEXT:     AND_INT * T1.W, PV.Y, literal.y,
+; EG-NEXT:    8(1.121039e-44), -65536(nan)
+; EG-NEXT:     AND_INT * T0.W, PV.W, literal.x,
+; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT:     OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT:     MOV * T5.X, PV.W,
+; EG-NEXT:     MOV T0.Y, PV.X,
+; EG-NEXT:     ASHR * T0.W, T37.W, literal.x,
+; EG-NEXT:    24(3.363116e-44), 0(0.000000e+00)
+; EG-NEXT:     AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT:     LSHL * T0.W, PV.W, literal.y,
+; EG-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
+; EG-NEXT:     OR_INT * T37.W, PV.W, PS,
+; EG-NEXT:     MOV T5.X, PV.W,
+; EG-NEXT:     MOV T0.Y, T32.X,
+; EG-NEXT:     BFE_INT * T0.W, T35.X, 0.0, literal.x, BS:VEC_120/SCL_212
+; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT:     AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT:     AND_INT * T0.W, PV.W, literal.y,
+; EG-NEXT:    -65536(nan), 65535(9.183409e-41)
+; EG-NEXT:     OR_INT * T0.W, PV.W, PS,
+; EG-NEXT:     MOV * T32.X, PV.W,
+; EG-NEXT:     MOV T0.Y, PV.X,
+; EG-NEXT:     LSHR * T0.W, T35.X, literal.x,
+; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT:     BFE_INT T0.W, PV.W, 0.0, literal.x,
+; EG-NEXT:     AND_INT * T1.W, PV.Y, literal.y,
+; EG-NEXT:    8(1.121039e-44), 65535(9.183409e-41)
+; EG-NEXT:     LSHL * T0.W, PV.W, literal.x,
+; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT:     OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT:     MOV T32.X, PV.W,
+; EG-NEXT:     MOV T0.Y, T33.X,
+; EG-NEXT:     LSHR * T0.W, T35.X, literal.x, BS:VEC_120/SCL_212
+; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT:     BFE_INT T0.W, PV.W, 0.0, literal.x,
+; EG-NEXT:     AND_INT * T1.W, PV.Y, literal.y,
+; EG-NEXT:    8(1.121039e-44), -65536(nan)
+; EG-NEXT:     AND_INT * T0.W, PV.W, literal.x,
+; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT:     OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT:     MOV * T33.X, PV.W,
+; EG-NEXT:     MOV T0.Y, PV.X,
+; EG-NEXT:     ASHR * T0.W, T35.X, literal.x,
+; EG-NEXT:    24(3.363116e-44), 0(0.000000e+00)
+; EG-NEXT:     AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT:     LSHL * T0.W, PV.W, literal.y,
+; EG-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
+; EG-NEXT:     OR_INT * T38.Y, PV.W, PS,
+; EG-NEXT:     MOV T33.X, PV.Y,
+; EG-NEXT:     MOV T0.Y, T28.X,
+; EG-NEXT:     BFE_INT * T0.W, T35.Y, 0.0, literal.x,
+; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT:     AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT:     AND_INT * T0.W, PV.W, literal.y,
+; EG-NEXT:    -65536(nan), 65535(9.183409e-41)
+; EG-NEXT:     OR_INT * T0.W, PV.W, PS,
+; EG-NEXT:     MOV * T28.X, PV.W,
+; EG-NEXT:     MOV T0.Y, PV.X,
+; EG-NEXT:     LSHR * T0.W, T35.Y, literal.x,
+; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT:     BFE_INT T0.W, PV.W, 0.0, literal.x,
+; EG-NEXT:     AND_INT * T1.W, PV.Y, literal.y,
+; EG-NEXT:    8(1.121039e-44), 65535(9.183409e-41)
+; EG-NEXT:     LSHL * T0.W, PV.W, literal.x,
+; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT:     OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT:     MOV T28.X, PV.W,
+; EG-NEXT:     MOV T0.Y, T29.X,
+; EG-NEXT:     LSHR * T0.W, T35.Y, literal.x,
+; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT:     BFE_INT T0.W, PV.W, 0.0, literal.x,
+; EG-NEXT:     AND_INT * T1.W, PV.Y, literal.y,
+; EG-NEXT:    8(1.121039e-44), -65536(nan)
+; EG-NEXT:     AND_INT * T0.W, PV.W, literal.x,
+; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT:     OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT:     MOV * T29.X, PV.W,
+; EG-NEXT:     MOV T0.Y, PV.X,
+; EG-NEXT:     ASHR * T0.W, T35.Y, literal.x,
+; EG-NEXT:    24(3.363116e-44), 0(0.000000e+00)
+; EG-NEXT:    ALU clause starting at 226:
+; EG-NEXT:     AND_INT T1.W, T0.Y, literal.x,
+; EG-NEXT:     LSHL * T0.W, T0.W, literal.y,
+; EG-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
+; EG-NEXT:     OR_INT * T38.W, PV.W, PS,
+; EG-NEXT:     MOV T29.X, PV.W,
+; EG-NEXT:     MOV T0.Y, T24.X,
+; EG-NEXT:     BFE_INT * T0.W, T35.Z, 0.0, literal.x,
+; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT:     AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT:     AND_INT * T0.W, PV.W, literal.y,
+; EG-NEXT:    -65536(nan), 65535(9.183409e-41)
+; EG-NEXT:     OR_INT * T0.W, PV.W, PS,
+; EG-NEXT:     MOV * T24.X, PV.W,
+; EG-NEXT:     MOV T0.Y, PV.X,
+; EG-NEXT:     LSHR * T0.W, T35.Z, literal.x,
+; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT:     BFE_INT T0.W, PV.W, 0.0, literal.x,
+; EG-NEXT:     AND_INT * T1.W, PV.Y, literal.y,
+; EG-NEXT:    8(1.121039e-44), 65535(9.183409e-41)
+; EG-NEXT:     LSHL * T0.W, PV.W, literal.x,
+; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT:     OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT:     MOV T24.X, PV.W,
+; EG-NEXT:     MOV T0.Y, T25.X,
+; EG-NEXT:     LSHR * T0.W, T35.Z, literal.x,
+; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT:     BFE_INT T0.W, PV.W, 0.0, literal.x,
+; EG-NEXT:     AND_INT * T1.W, PV.Y, literal.y,
+; EG-NEXT:    8(1.121039e-44), -65536(nan)
+; EG-NEXT:     AND_INT * T0.W, PV.W, literal.x,
+; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT:     OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT:     MOV * T25.X, PV.W,
+; EG-NEXT:     MOV T0.Y, PV.X,
+; EG-NEXT:     ASHR * T0.W, T35.Z, literal.x,
+; EG-NEXT:    24(3.363116e-44), 0(0.000000e+00)
+; EG-NEXT:     AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT:     LSHL * T0.W, PV.W, literal.y,
+; EG-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
+; EG-NEXT:     OR_INT * T35.Y, PV.W, PS,
+; EG-NEXT:     MOV T25.X, PV.Y,
+; EG-NEXT:     MOV T0.Y, T20.X,
+; EG-NEXT:     BFE_INT * T0.W, T35.W, 0.0, literal.x,
+; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT:     AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT:     AND_INT * T0.W, PV.W, literal.y,
+; EG-NEXT:    -65536(nan), 65535(9.183409e-41)
+; EG-NEXT:     OR_INT * T0.W, PV.W, PS,
+; EG-NEXT:     MOV * T20.X, PV.W,
+; EG-NEXT:     MOV T0.Y, PV.X,
+; EG-NEXT:     LSHR * T0.W, T35.W, literal.x,
+; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT:     BFE_INT T0.W, PV.W, 0.0, literal.x,
+; EG-NEXT:     AND_INT * T1.W, PV.Y, literal.y,
+; EG-NEXT:    8(1.121039e-44), 65535(9.183409e-41)
+; EG-NEXT:     LSHL * T0.W, PV.W, literal.x,
+; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT:     OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT:     MOV T20.X, PV.W,
+; EG-NEXT:     MOV T0.Y, T21.X,
+; EG-NEXT:     LSHR * T0.W, T35.W, literal.x,
+; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT:     BFE_INT T0.W, PV.W, 0.0, literal.x,
+; EG-NEXT:     AND_INT * T1.W, PV.Y, literal.y,
+; EG-NEXT:    8(1.121039e-44), -65536(nan)
+; EG-NEXT:     AND_INT * T0.W, PV.W, literal.x,
+; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT:     OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT:     MOV * T21.X, PV.W,
+; EG-NEXT:     MOV T0.Y, PV.X,
+; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.x,
+; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT:     LSHR T39.X, PV.W, literal.x,
+; EG-NEXT:     LSHR * T40.X, KC0[2].Y, literal.x,
+; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
+; EG-NEXT:     ASHR T0.W, T35.W, literal.x,
+; EG-NEXT:     ADD_INT * T1.W, KC0[2].Y, literal.y,
+; EG-NEXT:    24(3.363116e-44), 48(6.726233e-44)
+; EG-NEXT:     LSHR T41.X, PS, literal.x,
+; EG-NEXT:     AND_INT T0.Z, T0.Y, literal.y,
+; EG-NEXT:     LSHL T0.W, PV.W, literal.z,
+; EG-NEXT:     ADD_INT * T1.W, KC0[2].Y, literal.w,
+; EG-NEXT:    2(2.802597e-45), 65535(9.183409e-41)
+; EG-NEXT:    16(2.242078e-44), 32(4.484155e-44)
+; EG-NEXT:     LSHR T42.X, PS, literal.x,
+; EG-NEXT:     OR_INT * T35.W, PV.Z, PV.W,
+; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
+; EG-NEXT:     MOV T21.X, PV.W,
+; EG-NEXT:     MOV * T36.X, T16.X,
+; EG-NEXT:     MOV * T36.Z, T12.X,
+; EG-NEXT:     MOV T37.X, T8.X,
+; EG-NEXT:     MOV T37.Z, T4.X, BS:VEC_120/SCL_212
+; EG-NEXT:     MOV * T38.X, T32.X,
+; EG-NEXT:     MOV * T38.Z, T28.X,
+; EG-NEXT:     MOV T35.X, T24.X,
+; EG-NEXT:     MOV * T35.Z, T20.X, BS:VEC_120/SCL_212
 ;
 ; CM-LABEL: global_sextload_v32i8_to_v32i16:
 ; CM:       ; %bb.0:
-; CM-NEXT:    ALU 0, @12, KC0[CB0:0-32], KC1[]
-; CM-NEXT:    TEX 1 @8
-; CM-NEXT:    ALU 40, @13, KC0[CB0:0-32], KC1[]
-; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T17, T11.X
-; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T12, T18.X
-; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T16, T14.X
-; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T15, T13.X
+; CM-NEXT:    ALU 1, @14, KC0[CB0:0-32], KC1[]
+; CM-NEXT:    TEX 1 @10
+; CM-NEXT:    ALU 104, @16, KC0[], KC1[]
+; CM-NEXT:    ALU 104, @121, KC0[], KC1[]
+; CM-NEXT:    ALU 95, @226, KC0[CB0:0-32], KC1[]
+; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T35, T42.X
+; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T38, T41.X
+; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T37, T40.X
+; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T36, T39.X
 ; CM-NEXT:    CF_END
-; CM-NEXT:    Fetch clause starting at 8:
-; CM-NEXT:     VTX_READ_128 T12.XYZW, T11.X, 16, #1
-; CM-NEXT:     VTX_READ_128 T11.XYZW, T11.X, 0, #1
-; CM-NEXT:    ALU clause starting at 12:
-; CM-NEXT:     MOV * T11.X, KC0[2].Z,
-; CM-NEXT:    ALU clause starting at 13:
+; CM-NEXT:    Fetch clause starting at 10:
+; CM-NEXT:     VTX_READ_128 T37.XYZW, T35.X, 16, #1
+; CM-NEXT:     VTX_READ_128 T35.XYZW, T35.X, 0, #1
+; CM-NEXT:    ALU clause starting at 14:
+; CM-NEXT:     MOV * T0.Y, T16.X,
+; CM-NEXT:     MOV * T35.X, KC0[2].Z,
+; CM-NEXT:    ALU clause starting at 16:
+; CM-NEXT:     BFE_INT * T0.W, T37.X, 0.0, literal.x,
+; CM-NEXT:    8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT:     AND_INT T0.Z, PV.W, literal.x,
+; CM-NEXT:     AND_INT * T0.W, T0.Y, literal.y,
+; CM-NEXT:    65535(9.183409e-41), -65536(nan)
+; CM-NEXT:     OR_INT * T0.W, PV.W, PV.Z,
+; CM-NEXT:     MOV * T16.X, PV.W,
+; CM-NEXT:     MOV T0.Y, PV.X,
+; CM-NEXT:     LSHR * T0.W, T37.X, literal.x,
+; CM-NEXT:    8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT:     BFE_INT * T0.W, PV.W, 0.0, literal.x,
+; CM-NEXT:    8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT:     AND_INT T0.Z, T0.Y, literal.x,
+; CM-NEXT:     LSHL * T0.W, PV.W, literal.y,
+; CM-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
+; CM-NEXT:     OR_INT * T0.W, PV.Z, PV.W,
+; CM-NEXT:     MOV T16.X, PV.W,
+; CM-NEXT:     MOV T0.Y, T17.X,
+; CM-NEXT:     LSHR * T0.W, T37.X, literal.x, BS:VEC_120/SCL_212
+; CM-NEXT:    16(2.242078e-44), 0(0.000000e+00)
+; CM-NEXT:     BFE_INT * T0.W, PV.W, 0.0, literal.x,
+; CM-NEXT:    8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT:     AND_INT T0.Z, T0.Y, literal.x,
+; CM-NEXT:     AND_INT * T0.W, PV.W, literal.y,
+; CM-NEXT:    -65536(nan), 65535(9.183409e-41)
+; CM-NEXT:     OR_INT * T0.W, PV.Z, PV.W,
+; CM-NEXT:     MOV * T17.X, PV.W,
+; CM-NEXT:     MOV T0.Y, PV.X,
+; CM-NEXT:     ASHR * T0.W, T37.X, literal.x,
+; CM-NEXT:    24(3.363116e-44), 0(0.000000e+00)
+; CM-NEXT:     AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT:     LSHL * T0.W, PV.W, literal.y,
+; CM-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
+; CM-NEXT:     OR_INT * T36.Y, PV.Z, PV.W,
+; CM-NEXT:     MOV T17.X, PV.Y,
+; CM-NEXT:     MOV T0.Y, T12.X,
+; CM-NEXT:     BFE_INT * T0.W, T37.Y, 0.0, literal.x,
+; CM-NEXT:    8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT:     AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT:     AND_INT * T0.W, PV.W, literal.y,
+; CM-NEXT:    -65536(nan), 65535(9.183409e-41)
+; CM-NEXT:     OR_INT * T0.W, PV.Z, PV.W,
+; CM-NEXT:     MOV * T12.X, PV.W,
+; CM-NEXT:     MOV T0.Y, PV.X,
+; CM-NEXT:     LSHR * T0.W, T37.Y, literal.x,
+; CM-NEXT:    8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT:     BFE_INT * T0.W, PV.W, 0.0, literal.x,
+; CM-NEXT:    8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT:     AND_INT T0.Z, T0.Y, literal.x,
+; CM-NEXT:     LSHL * T0.W, PV.W, literal.y,
+; CM-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
+; CM-NEXT:     OR_INT * T0.W, PV.Z, PV.W,
+; CM-NEXT:     MOV T12.X, PV.W,
+; CM-NEXT:     MOV T0.Y, T13.X,
+; CM-NEXT:     LSHR * T0.W, T37.Y, literal.x,
+; CM-NEXT:    16(2.242078e-44), 0(0.000000e+00)
+; CM-NEXT:     BFE_INT * T0.W, PV.W, 0.0, literal.x,
+; CM-NEXT:    8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT:     AND_INT T0.Z, T0.Y, literal.x,
+; CM-NEXT:     AND_INT * T0.W, PV.W, literal.y,
+; CM-NEXT:    -65536(nan), 65535(9.183409e-41)
+; CM-NEXT:     OR_INT * T0.W, PV.Z, PV.W,
+; CM-NEXT:     MOV * T13.X, PV.W,
+; CM-NEXT:     MOV T0.Y, PV.X,
+; CM-NEXT:     ASHR * T0.W, T37.Y, literal.x,
+; CM-NEXT:    24(3.363116e-44), 0(0.000000e+00)
+; CM-NEXT:     AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT:     LSHL * T0.W, PV.W, literal.y,
+; CM-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
+; CM-NEXT:     OR_INT * T36.W, PV.Z, PV.W,
+; CM-NEXT:     MOV T13.X, PV.W,
+; CM-NEXT:     MOV T0.Y, T8.X,
+; CM-NEXT:     BFE_INT * T0.W, T37.Z, 0.0, literal.x,
+; CM-NEXT:    8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT:     AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT:     AND_INT * T0.W, PV.W, literal.y,
+; CM-NEXT:    -65536(nan), 65535(9.183409e-41)
+; CM-NEXT:     OR_INT * T0.W, PV.Z, PV.W,
+; CM-NEXT:     MOV * T8.X, PV.W,
+; CM-NEXT:     MOV T0.Y, PV.X,
+; CM-NEXT:     LSHR * T0.W, T37.Z, literal.x,
+; CM-NEXT:    8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT:     BFE_INT * T0.W, PV.W, 0.0, literal.x,
+; CM-NEXT:    8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT:     AND_INT T0.Z, T0.Y, literal.x,
+; CM-NEXT:     LSHL * T0.W, PV.W, literal.y,
+; CM-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
+; CM-NEXT:     OR_INT * T0.W, PV.Z, PV.W,
+; CM-NEXT:     MOV T8.X, PV.W,
+; CM-NEXT:     MOV T0.Y, T9.X,
+; CM-NEXT:     LSHR * T0.W, T37.Z, literal.x,
+; CM-NEXT:    16(2.242078e-44), 0(0.000000e+00)
+; CM-NEXT:     BFE_INT * T0.W, PV.W, 0.0, literal.x,
+; CM-NEXT:    8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT:     AND_INT T0.Z, T0.Y, literal.x,
+; CM-NEXT:     AND_INT * T0.W, PV.W, literal.y,
+; CM-NEXT:    -65536(nan), 65535(9.183409e-41)
+; CM-NEXT:     OR_INT * T0.W, PV.Z, PV.W,
+; CM-NEXT:     MOV * T9.X, PV.W,
+; CM-NEXT:     MOV T0.Y, PV.X,
+; CM-NEXT:     ASHR * T0.W, T37.Z, literal.x,
+; CM-NEXT:    24(3.363116e-44), 0(0.000000e+00)
+; CM-NEXT:     AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT:     LSHL * T0.W, PV.W, literal.y,
+; CM-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
+; CM-NEXT:    ALU clause starting at 121:
+; CM-NEXT:     OR_INT * T37.Y, T0.Z, T0.W,
+; CM-NEXT:     MOV T9.X, PV.Y,
+; CM-NEXT:     MOV T0.Y, T4.X,
+; CM-NEXT:     BFE_INT * T0.W, T37.W, 0.0, literal.x,
+; CM-NEXT:    8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT:     AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT:     AND_INT * T0.W, PV.W, literal.y,
+; CM-NEXT:    -65536(nan), 65535(9.183409e-41)
+; CM-NEXT:     OR_INT * T0.W, PV.Z, PV.W,
+; CM-NEXT:     MOV * T4.X, PV.W,
+; CM-NEXT:     MOV T0.Y, PV.X,
+; CM-NEXT:     LSHR * T0.W, T37.W, literal.x,
+; CM-NEXT:    8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT:     BFE_INT * T0.W, PV.W, 0.0, literal.x,
+; CM-NEXT:    8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT:     AND_INT T0.Z, T0.Y, literal.x,
+; CM-NEXT:     LSHL * T0.W, PV.W, literal.y,
+; CM-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
+; CM-NEXT:     OR_INT * T0.W, PV.Z, PV.W,
+; CM-NEXT:     MOV T4.X, PV.W,
+; CM-NEXT:     MOV T0.Y, T5.X,
+; CM-NEXT:     LSHR * T0.W, T37.W, literal.x,
+; CM-NEXT:    16(2.242078e-44), 0(0.000000e+00)
+; CM-NEXT:     BFE_INT * T0.W, PV.W, 0.0, literal.x,
+; CM-NEXT:    8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT:     AND_INT T0.Z, T0.Y, literal.x,
+; CM-NEXT:     AND_INT * T0.W, PV.W, literal.y,
+; CM-NEXT:    -65536(nan), 65535(9.183409e-41)
+; CM-NEXT:     OR_INT * T0.W, PV.Z, PV.W,
+; CM-NEXT:     MOV * T5.X, PV.W,
+; CM-NEXT:     MOV T0.Y, PV.X,
+; CM-NEXT:     ASHR * T0.W, T37.W, literal.x,
+; CM-NEXT:    24(3.363116e-44), 0(0.000000e+00)
+; CM-NEXT:     AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT:     LSHL * T0.W, PV.W, literal.y,
+; CM-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
+; CM-NEXT:     OR_INT * T37.W, PV.Z, PV.W,
+; CM-NEXT:     MOV T5.X, PV.W,
+; CM-NEXT:     MOV T0.Y, T32.X,
+; CM-NEXT:     BFE_INT * T0.W, T35.X, 0.0, literal.x, BS:VEC_120/SCL_212
+; CM-NEXT:    8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT:     AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT:     AND_INT * T0.W, PV.W, literal.y,
+; CM-NEXT:    -65536(nan), 65535(9.183409e-41)
+; CM-NEXT:     OR_INT * T0.W, PV.Z, PV.W,
+; CM-NEXT:     MOV * T32.X, PV.W,
+; CM-NEXT:     MOV T0.Y, PV.X,
+; CM-NEXT:     LSHR * T0.W, T35.X, literal.x,
+; CM-NEXT:    8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT:     BFE_INT * T0.W, PV.W, 0.0, literal.x,
+; CM-NEXT:    8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT:     AND_INT T0.Z, T0.Y, literal.x,
+; CM-NEXT:     LSHL * T0.W, PV.W, literal.y,
+; CM-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
+; CM-NEXT:     OR_INT * T0.W, PV.Z, PV.W,
+; CM-NEXT:     MOV T32.X, PV.W,
+; CM-NEXT:     MOV T0.Y, T33.X,
+; CM-NEXT:     LSHR * T0.W, T35.X, literal.x, BS:VEC_120/SCL_212
+; CM-NEXT:    16(2.242078e-44), 0(0.000000e+00)
+; CM-NEXT:     BFE_INT * T0.W, PV.W, 0.0, literal.x,
+; CM-NEXT:    8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT:     AND_INT T0.Z, T0.Y, literal.x,
+; CM-NEXT:     AND_INT * T0.W, PV.W, literal.y,
+; CM-NEXT:    -65536(nan), 65535(9.183409e-41)
+; CM-NEXT:     OR_INT * T0.W, PV.Z, PV.W,
+; CM-NEXT:     MOV * T33.X, PV.W,
+; CM-NEXT:     MOV T0.Y, PV.X,
+; CM-NEXT:     ASHR * T0.W, T35.X, literal.x,
+; CM-NEXT:    24(3.363116e-44), 0(0.000000e+00)
+; CM-NEXT:     AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT:     LSHL * T0.W, PV.W, literal.y,
+; CM-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
+; CM-NEXT:     OR_INT * T38.Y, PV.Z, PV.W,
+; CM-NEXT:     MOV T33.X, PV.Y,
+; CM-NEXT:     MOV T0.Y, T28.X,
+; CM-NEXT:     BFE_INT * T0.W, T35.Y, 0.0, literal.x,
+; CM-NEXT:    8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT:     AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT:     AND_INT * T0.W, PV.W, literal.y,
+; CM-NEXT:    -65536(nan), 65535(9.183409e-41)
+; CM-NEXT:     OR_INT * T0.W, PV.Z, PV.W,
+; CM-NEXT:     MOV * T28.X, PV.W,
+; CM-NEXT:     MOV T0.Y, PV.X,
+; CM-NEXT:     LSHR * T0.W, T35.Y, literal.x,
+; CM-NEXT:    8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT:     BFE_INT * T0.W, PV.W, 0.0, literal.x,
+; CM-NEXT:    8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT:     AND_INT T0.Z, T0.Y, literal.x,
+; CM-NEXT:     LSHL * T0.W, PV.W, literal.y,
+; CM-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
+; CM-NEXT:     OR_INT * T0.W, PV.Z, PV.W,
+; CM-NEXT:     MOV T28.X, PV.W,
+; CM-NEXT:     MOV T0.Y, T29.X,
+; CM-NEXT:     LSHR * T0.W, T35.Y, literal.x,
+; CM-NEXT:    16(2.242078e-44), 0(0.000000e+00)
+; CM-NEXT:     BFE_INT * T0.W, PV.W, 0.0, literal.x,
+; CM-NEXT:    8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT:     AND_INT T0.Z, T0.Y, literal.x,
+; CM-NEXT:     AND_INT * T0.W, PV.W, literal.y,
+; CM-NEXT:    -65536(nan), 65535(9.183409e-41)
+; CM-NEXT:     OR_INT * T0.W, PV.Z, PV.W,
+; CM-NEXT:     MOV * T29.X, PV.W,
+; CM-NEXT:     MOV T0.Y, PV.X,
+; CM-NEXT:     ASHR * T0.W, T35.Y, literal.x,
+; CM-NEXT:    24(3.363116e-44), 0(0.000000e+00)
+; CM-NEXT:    ALU clause starting at 226:
+; CM-NEXT:     AND_INT T0.Z, T0.Y, literal.x,
+; CM-NEXT:     LSHL * T0.W, T0.W, literal.y,
+; CM-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
+; CM-NEXT:     OR_INT * T38.W, PV.Z, PV.W,
+; CM-NEXT:     MOV T29.X, PV.W,
+; CM-NEXT:     MOV T0.Y, T24.X,
+; CM-NEXT:     BFE_INT * T0.W, T35.Z, 0.0, literal.x,
+; CM-NEXT:    8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT:     AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT:     AND_INT * T0.W, PV.W, literal.y,
+; CM-NEXT:    -65536(nan), 65535(9.183409e-41)
+; CM-NEXT:     OR_INT * T0.W, PV.Z, PV.W,
+; CM-NEXT:     MOV * T24.X, PV.W,
+; CM-NEXT:     MOV T0.Y, PV.X,
+; CM-NEXT:     LSHR * T0.W, T35.Z, literal.x,
+; CM-NEXT:    8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT:     BFE_INT * T0.W, PV.W, 0.0, literal.x,
+; CM-NEXT:    8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT:     AND_INT T0.Z, T0.Y, literal.x,
+; CM-NEXT:     LSHL * T0.W, PV.W, literal.y,
+; CM-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
+; CM-NEXT:     OR_INT * T0.W, PV.Z, PV.W,
+; CM-NEXT:     MOV T24.X, PV.W,
+; CM-NEXT:     MOV T0.Y, T25.X,
+; CM-NEXT:     LSHR * T0.W, T35.Z, literal.x,
+; CM-NEXT:    16(2.242078e-44), 0(0.000000e+00)
+; CM-NEXT:     BFE_INT * T0.W, PV.W, 0.0, literal.x,
+; CM-NEXT:    8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT:     AND_INT T0.Z, T0.Y, literal.x,
+; CM-NEXT:     AND_INT * T0.W, PV.W, literal.y,
+; CM-NEXT:    -65536(nan), 65535(9.183409e-41)
+; CM-NEXT:     OR_INT * T0.W, PV.Z, PV.W,
+; CM-NEXT:     MOV * T25.X, PV.W,
+; CM-NEXT:     MOV T0.Y, PV.X,
+; CM-NEXT:     ASHR * T0.W, T35.Z, literal.x,
+; CM-NEXT:    24(3.363116e-44), 0(0.000000e+00)
+; CM-NEXT:     AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT:     LSHL * T0.W, PV.W, literal.y,
+; CM-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
+; CM-NEXT:     OR_INT * T35.Y, PV.Z, PV.W,
+; CM-NEXT:     MOV T25.X, PV.Y,
+; CM-NEXT:     MOV T0.Y, T20.X,
+; CM-NEXT:     BFE_INT * T0.W, T35.W, 0.0, literal.x,
+; CM-NEXT:    8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT:     AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT:     AND_INT * T0.W, PV.W, literal.y,
+; CM-NEXT:    -65536(nan), 65535(9.183409e-41)
+; CM-NEXT:     OR_INT * T0.W, PV.Z, PV.W,
+; CM-NEXT:     MOV * T20.X, PV.W,
+; CM-NEXT:     MOV T0.Y, PV.X,
+; CM-NEXT:     LSHR * T0.W, T35.W, literal.x,
+; CM-NEXT:    8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT:     BFE_INT * T0.W, PV.W, 0.0, literal.x,
+; CM-NEXT:    8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT:     AND_INT T0.Z, T0.Y, literal.x,
+; CM-NEXT:     LSHL * T0.W, PV.W, literal.y,
+; CM-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
+; CM-NEXT:     OR_INT * T0.W, PV.Z, PV.W,
+; CM-NEXT:     MOV T20.X, PV.W,
+; CM-NEXT:     MOV T0.Y, T21.X,
+; CM-NEXT:     LSHR * T0.W, T35.W, literal.x,
+; CM-NEXT:    16(2.242078e-44), 0(0.000000e+00)
+; CM-NEXT:     BFE_INT * T0.W, PV.W, 0.0, literal.x,
+; CM-NEXT:    8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT:     AND_INT T0.Z, T0.Y, literal.x,
+; CM-NEXT:     AND_INT * T0.W, PV.W, literal.y,
+; CM-NEXT:    -65536(nan), 65535(9.183409e-41)
+; CM-NEXT:     OR_INT * T0.W, PV.Z, PV.W,
+; CM-NEXT:     MOV * T21.X, PV.W,
+; CM-NEXT:     MOV T0.Y, PV.X,
 ; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.x,
-; CM-NEXT:    48(6.726233e-44), 0(0.000000e+00)
-; CM-NEXT:     LSHR T13.X, PV.W, literal.x,
-; CM-NEXT:     LSHR T0.Y, T11.Y, literal.y,
-; CM-NEXT:     LSHR T0.Z, T11.Z, literal.y,
-; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.z,
-; CM-NEXT:    2(2.802597e-45), 8(1.121039e-44)
 ; CM-NEXT:    32(4.484155e-44), 0(0.000000e+00)
-; CM-NEXT:     LSHR T14.X, PV.W, literal.x,
-; CM-NEXT:     LSHR T1.Y, T11.W, literal.y,
-; CM-NEXT:     BFE_INT T15.Z, T12.W, 0.0, literal.y, BS:VEC_120/SCL_212
-; CM-NEXT:     LSHR * T0.W, T12.X, literal.y,
-; CM-NEXT:    2(2.802597e-45), 8(1.121039e-44)
-; CM-NEXT:     BFE_INT T15.X, T12.Z, 0.0, literal.x,
-; CM-NEXT:     LSHR T2.Y, T12.Y, literal.x,
-; CM-NEXT:     BFE_INT T16.Z, T12.Y, 0.0, literal.x,
-; CM-NEXT:     LSHR * T1.W, T12.W, literal.x,
-; CM-NEXT:    8(1.121039e-44), 0(0.000000e+00)
-; CM-NEXT:     BFE_INT T16.X, T12.X, 0.0, literal.x,
-; CM-NEXT:     LSHR T3.Y, T12.Z, literal.x,
-; CM-NEXT:     BFE_INT T12.Z, T11.W, 0.0, literal.x,
-; CM-NEXT:     BFE_INT * T15.W, PV.W, 0.0, literal.x,
-; CM-NEXT:    8(1.121039e-44), 0(0.000000e+00)
-; CM-NEXT:     BFE_INT T12.X, T11.Z, 0.0, literal.x,
-; CM-NEXT:     BFE_INT T15.Y, PV.Y, 0.0, literal.x,
-; CM-NEXT:     BFE_INT T17.Z, T11.Y, 0.0, literal.x,
-; CM-NEXT:     BFE_INT * T16.W, T2.Y, 0.0, literal.x, BS:VEC_120/SCL_212
-; CM-NEXT:    8(1.121039e-44), 0(0.000000e+00)
-; CM-NEXT:     BFE_INT T17.X, T11.X, 0.0, literal.x,
-; CM-NEXT:     BFE_INT T16.Y, T0.W, 0.0, literal.x,
-; CM-NEXT:     ADD_INT T1.Z, KC0[2].Y, literal.y,
-; CM-NEXT:     BFE_INT * T12.W, T1.Y, 0.0, literal.x,
-; CM-NEXT:    8(1.121039e-44), 16(2.242078e-44)
-; CM-NEXT:     LSHR T18.X, PV.Z, literal.x,
-; CM-NEXT:     BFE_INT T12.Y, T0.Z, 0.0, literal.y,
-; CM-NEXT:     LSHR T0.Z, T11.X, literal.y,
-; CM-NEXT:     BFE_INT * T17.W, T0.Y, 0.0, literal.y,
-; CM-NEXT:    2(2.802597e-45), 8(1.121039e-44)
-; CM-NEXT:     LSHR T11.X, KC0[2].Y, literal.x,
-; CM-NEXT:     BFE_INT * T17.Y, PV.Z, 0.0, literal.y,
-; CM-NEXT:    2(2.802597e-45), 8(1.121039e-44)
+; CM-NEXT:     LSHR T39.X, PV.W, literal.x,
+; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
+; CM-NEXT:    2(2.802597e-45), 48(6.726233e-44)
+; CM-NEXT:     LSHR T40.X, PV.W, literal.x,
+; CM-NEXT:     ASHR * T0.W, T35.W, literal.y,
+; CM-NEXT:    2(2.802597e-45), 24(3.363116e-44)
+; CM-NEXT:     LSHR T41.X, KC0[2].Y, literal.x,
+; CM-NEXT:     AND_INT T0.Y, T0.Y, literal.y,
+; CM-NEXT:     LSHL T0.Z, PV.W, literal.z,
+; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.z,
+; CM-NEXT:    2(2.802597e-45), 65535(9.183409e-41)
+; CM-NEXT:    16(2.242078e-44), 0(0.000000e+00)
+; CM-NEXT:     LSHR T42.X, PV.W, literal.x,
+; CM-NEXT:     OR_INT * T35.W, PV.Y, PV.Z,
+; CM-NEXT:    2(2.802597e-45), 0(0.000000e+00)
+; CM-NEXT:     MOV * T21.X, PV.W,
+; CM-NEXT:     MOV T36.X, T16.X,
+; CM-NEXT:     MOV * T36.Z, T12.X, BS:VEC_120/SCL_212
+; CM-NEXT:     MOV T37.X, T8.X,
+; CM-NEXT:     MOV * T37.Z, T4.X, BS:VEC_120/SCL_212
+; CM-NEXT:     MOV T38.X, T32.X,
+; CM-NEXT:     MOV * T38.Z, T28.X, BS:VEC_120/SCL_212
+; CM-NEXT:     MOV T35.X, T24.X,
+; CM-NEXT:     MOV * T35.Z, T20.X, BS:VEC_120/SCL_212
   %load = load <32 x i8>, ptr addrspace(1) %in
   %ext = sext <32 x i8> %load to <32 x i16>
   store <32 x i16> %ext, ptr addrspace(1) %out
diff --git a/llvm/test/CodeGen/AMDGPU/load-local-i16.ll b/llvm/test/CodeGen/AMDGPU/load-local-i16.ll
index 8dcecfe291177..a209de78cd299 100644
--- a/llvm/test/CodeGen/AMDGPU/load-local-i16.ll
+++ b/llvm/test/CodeGen/AMDGPU/load-local-i16.ll
@@ -151,19 +151,27 @@ define amdgpu_kernel void @local_load_v3i16(ptr addrspace(3) %out, ptr addrspace
 ;
 ; EG-LABEL: local_load_v3i16:
 ; EG:       ; %bb.0: ; %entry
-; EG-NEXT:    ALU 11, @2, KC0[CB0:0-32], KC1[]
-; EG-NEXT:     ADD_INT * T0.W, KC0[2].Z, literal.x,
-; EG-NEXT:    4(5.605194e-45), 0(0.000000e+00)
-; EG-NEXT:     LDS_USHORT_READ_RET * OQAP, T0.W
-; EG-NEXT:     MOV T0.X, OQAP,
+; EG-NEXT:    ALU 19, @2, KC0[CB0:0-32], KC1[]
 ; EG-NEXT:     MOV * T0.W, KC0[2].Z,
 ; EG-NEXT:     LDS_USHORT_READ_RET * OQAP, T0.W
 ; EG-NEXT:     MOV T0.Y, OQAP,
-; EG-NEXT:     MOV * T0.W, KC0[2].Y,
-; EG-NEXT:     LDS_WRITE * T0.W, T0.Y,
+; EG-NEXT:     ADD_INT * T0.W, KC0[2].Z, literal.x,
+; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
+; EG-NEXT:     LDS_USHORT_READ_RET * OQAP, T0.W
+; EG-NEXT:     MOV * T0.Z, OQAP,
+; EG-NEXT:     LSHL T0.Z, PV.Z, literal.x,
+; EG-NEXT:     AND_INT T0.W, T0.Y, literal.y,
+; EG-NEXT:     ADD_INT * T1.W, KC0[2].Z, literal.z,
+; EG-NEXT:    16(2.242078e-44), 65535(9.183409e-41)
+; EG-NEXT:    4(5.605194e-45), 0(0.000000e+00)
+; EG-NEXT:     LDS_USHORT_READ_RET * OQAP, T1.W
+; EG-NEXT:     MOV T0.Y, OQAP,
+; EG-NEXT:     OR_INT T0.W, T0.Z, T0.W,
+; EG-NEXT:     MOV * T1.W, KC0[2].Y,
+; EG-NEXT:     LDS_WRITE * T1.W, T0.W,
 ; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.x,
 ; EG-NEXT:    4(5.605194e-45), 0(0.000000e+00)
-; EG-NEXT:     LDS_SHORT_WRITE * T0.W, T0.X,
+; EG-NEXT:     LDS_SHORT_WRITE * T0.W, T0.Y,
 ; EG-NEXT:    RETURN
 entry:
   %ld = load <3 x i16>, ptr addrspace(3) %in
diff --git a/llvm/test/CodeGen/AMDGPU/min.ll b/llvm/test/CodeGen/AMDGPU/min.ll
index bf2ddc178ac63..dbf402b711944 100644
--- a/llvm/test/CodeGen/AMDGPU/min.ll
+++ b/llvm/test/CodeGen/AMDGPU/min.ll
@@ -871,30 +871,81 @@ define amdgpu_kernel void @s_test_imin_sle_v2i16(ptr addrspace(1) %out, <2 x i16
 define amdgpu_kernel void @s_test_imin_sle_v4i16(ptr addrspace(1) %out, <4 x i16> %a, <4 x i16> %b) #0 {
 ; EG-LABEL: s_test_imin_sle_v4i16:
 ; EG:       ; %bb.0:
-; EG-NEXT:    ALU 0, @14, KC0[], KC1[]
-; EG-NEXT:    TEX 3 @6
-; EG-NEXT:    ALU 9, @15, KC0[CB0:0-32], KC1[]
-; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1
+; EG-NEXT:    ALU 1, @28, KC0[], KC1[]
+; EG-NEXT:    TEX 1 @12
+; EG-NEXT:    ALU 9, @30, KC0[], KC1[]
+; EG-NEXT:    TEX 1 @16
+; EG-NEXT:    ALU 10, @40, KC0[], KC1[]
+; EG-NEXT:    TEX 1 @20
+; EG-NEXT:    ALU 10, @51, KC0[], KC1[]
+; EG-NEXT:    TEX 1 @24
+; EG-NEXT:    ALU 11, @62, KC0[CB0:0-32], KC1[]
+; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T6.XY, T5.X, 1
 ; EG-NEXT:    CF_END
 ; EG-NEXT:    PAD
-; EG-NEXT:    Fetch clause starting at 6:
-; EG-NEXT:     VTX_READ_16 T1.X, T0.X, 46, #3
-; EG-NEXT:     VTX_READ_16 T2.X, T0.X, 52, #3
-; EG-NEXT:     VTX_READ_16 T3.X, T0.X, 44, #3
-; EG-NEXT:     VTX_READ_16 T0.X, T0.X, 54, #3
-; EG-NEXT:    ALU clause starting at 14:
-; EG-NEXT:     MOV * T0.X, 0.0,
-; EG-NEXT:    ALU clause starting at 15:
-; EG-NEXT:     BFE_INT T0.Z, T1.X, 0.0, literal.x,
-; EG-NEXT:     BFE_INT * T0.W, T0.X, 0.0, literal.x, BS:VEC_120/SCL_212
+; EG-NEXT:    Fetch clause starting at 12:
+; EG-NEXT:     VTX_READ_16 T6.X, T5.X, 50, #3
+; EG-NEXT:     VTX_READ_16 T7.X, T5.X, 58, #3
+; EG-NEXT:    Fetch clause starting at 16:
+; EG-NEXT:     VTX_READ_16 T6.X, T5.X, 48, #3
+; EG-NEXT:     VTX_READ_16 T7.X, T5.X, 56, #3
+; EG-NEXT:    Fetch clause starting at 20:
+; EG-NEXT:     VTX_READ_16 T6.X, T5.X, 46, #3
+; EG-NEXT:     VTX_READ_16 T7.X, T5.X, 54, #3
+; EG-NEXT:    Fetch clause starting at 24:
+; EG-NEXT:     VTX_READ_16 T6.X, T5.X, 44, #3
+; EG-NEXT:     VTX_READ_16 T5.X, T5.X, 52, #3
+; EG-NEXT:    ALU clause starting at 28:
+; EG-NEXT:     MOV * T0.Y, T3.X,
+; EG-NEXT:     MOV * T5.X, 0.0,
+; EG-NEXT:    ALU clause starting at 30:
+; EG-NEXT:     BFE_INT T0.Z, T6.X, 0.0, literal.x,
+; EG-NEXT:     BFE_INT * T0.W, T7.X, 0.0, literal.x, BS:VEC_120/SCL_212
 ; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
-; EG-NEXT:     MIN_INT T0.Y, PV.Z, PV.W,
-; EG-NEXT:     BFE_INT T0.Z, T3.X, 0.0, literal.x,
-; EG-NEXT:     BFE_INT * T0.W, T2.X, 0.0, literal.x, BS:VEC_120/SCL_212
+; EG-NEXT:     MIN_INT * T0.W, PV.Z, PV.W,
+; EG-NEXT:     LSHL T0.W, PV.W, literal.x,
+; EG-NEXT:     AND_INT * T1.W, T0.Y, literal.y,
+; EG-NEXT:    16(2.242078e-44), 65535(9.183409e-41)
+; EG-NEXT:     OR_INT * T0.W, PS, PV.W,
+; EG-NEXT:     MOV * T3.X, PV.W,
+; EG-NEXT:     MOV * T0.Y, PV.X,
+; EG-NEXT:    ALU clause starting at 40:
+; EG-NEXT:     BFE_INT T0.Z, T6.X, 0.0, literal.x,
+; EG-NEXT:     BFE_INT * T0.W, T7.X, 0.0, literal.x, BS:VEC_120/SCL_212
 ; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
-; EG-NEXT:     MIN_INT T0.X, PV.Z, PV.W,
-; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
-; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
+; EG-NEXT:     MIN_INT T0.W, PV.Z, PV.W,
+; EG-NEXT:     AND_INT * T1.W, T0.Y, literal.x,
+; EG-NEXT:    -65536(nan), 0(0.000000e+00)
+; EG-NEXT:     AND_INT * T0.W, PV.W, literal.x,
+; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT:     OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT:     MOV T3.X, PV.W,
+; EG-NEXT:     MOV * T0.Y, T2.X,
+; EG-NEXT:    ALU clause starting at 51:
+; EG-NEXT:     BFE_INT T0.Z, T6.X, 0.0, literal.x,
+; EG-NEXT:     BFE_INT * T0.W, T7.X, 0.0, literal.x, BS:VEC_120/SCL_212
+; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT:     MIN_INT T0.W, PV.Z, PV.W,
+; EG-NEXT:     AND_INT * T1.W, T0.Y, literal.x,
+; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT:     LSHL * T0.W, PV.W, literal.x,
+; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT:     OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT:     MOV * T2.X, PV.W,
+; EG-NEXT:     MOV * T0.Y, PV.X,
+; EG-NEXT:    ALU clause starting at 62:
+; EG-NEXT:     BFE_INT T0.Z, T6.X, 0.0, literal.x,
+; EG-NEXT:     BFE_INT * T0.W, T5.X, 0.0, literal.x, BS:VEC_120/SCL_212
+; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT:     MIN_INT * T0.W, PV.Z, PV.W,
+; EG-NEXT:     LSHR T5.X, KC0[2].Y, literal.x,
+; EG-NEXT:     AND_INT T1.W, T0.Y, literal.y,
+; EG-NEXT:     AND_INT * T0.W, PV.W, literal.z,
+; EG-NEXT:    2(2.802597e-45), -65536(nan)
+; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT:     OR_INT * T6.X, PV.W, PS,
+; EG-NEXT:     MOV T2.X, PV.X,
+; EG-NEXT:     MOV * T6.Y, T3.X,
 ;
 ; CI-LABEL: s_test_imin_sle_v4i16:
 ; CI:       ; %bb.0:
@@ -1911,40 +1962,49 @@ define amdgpu_kernel void @v_test_umin_ule_v3i32(ptr addrspace(1) %out, ptr addr
 define amdgpu_kernel void @v_test_umin_ule_v3i16(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 {
 ; EG-LABEL: v_test_umin_ule_v3i16:
 ; EG:       ; %bb.0:
-; EG-NEXT:    ALU 3, @14, KC0[CB0:0-32], KC1[]
-; EG-NEXT:    TEX 3 @6
-; EG-NEXT:    ALU 17, @18, KC0[CB0:0-32], KC1[]
-; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T1.X, T2.X, 0
-; EG-NEXT:    MEM_RAT MSKOR T4.XW, T0.X
+; EG-NEXT:    ALU 3, @20, KC0[CB0:0-32], KC1[]
+; EG-NEXT:    TEX 1 @8
+; EG-NEXT:    ALU 11, @24, KC0[CB0:0-32], KC1[]
+; EG-NEXT:    TEX 3 @12
+; EG-NEXT:    ALU 8, @36, KC0[], KC1[]
+; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T6.X, T8.X, 0
+; EG-NEXT:    MEM_RAT MSKOR T7.XW, T0.X
 ; EG-NEXT:    CF_END
-; EG-NEXT:    Fetch clause starting at 6:
-; EG-NEXT:     VTX_READ_16 T2.X, T1.X, 0, #1
-; EG-NEXT:     VTX_READ_16 T3.X, T0.X, 0, #1
-; EG-NEXT:     VTX_READ_16 T1.X, T1.X, 4, #1
-; EG-NEXT:     VTX_READ_16 T0.X, T0.X, 4, #1
-; EG-NEXT:    ALU clause starting at 14:
+; EG-NEXT:    Fetch clause starting at 8:
+; EG-NEXT:     VTX_READ_16 T7.X, T6.X, 4, #1
+; EG-NEXT:     VTX_READ_16 T8.X, T0.X, 4, #1
+; EG-NEXT:    Fetch clause starting at 12:
+; EG-NEXT:     VTX_READ_16 T8.X, T6.X, 0, #1
+; EG-NEXT:     VTX_READ_16 T9.X, T0.X, 0, #1
+; EG-NEXT:     VTX_READ_16 T6.X, T6.X, 2, #1
+; EG-NEXT:     VTX_READ_16 T0.X, T0.X, 2, #1
+; EG-NEXT:    ALU clause starting at 20:
 ; EG-NEXT:     LSHL * T0.W, T0.X, literal.x,
 ; EG-NEXT:    3(4.203895e-45), 0(0.000000e+00)
 ; EG-NEXT:     ADD_INT T0.X, KC0[2].Z, PV.W,
-; EG-NEXT:     ADD_INT * T1.X, KC0[2].W, PV.W,
-; EG-NEXT:    ALU clause starting at 18:
+; EG-NEXT:     ADD_INT * T6.X, KC0[2].W, PV.W,
+; EG-NEXT:    ALU clause starting at 24:
 ; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, T0.W,
 ; EG-NEXT:     ADD_INT * T1.W, PV.W, literal.x,
 ; EG-NEXT:    4(5.605194e-45), 0(0.000000e+00)
 ; EG-NEXT:     AND_INT * T2.W, PV.W, literal.x,
 ; EG-NEXT:    3(4.203895e-45), 0(0.000000e+00)
 ; EG-NEXT:     LSHL T2.W, PV.W, literal.x,
-; EG-NEXT:     MIN_UINT * T3.W, T0.X, T1.X,
+; EG-NEXT:     MIN_UINT * T3.W, T8.X, T7.X,
 ; EG-NEXT:    3(4.203895e-45), 0(0.000000e+00)
-; EG-NEXT:     LSHL T4.X, PS, PV.W,
-; EG-NEXT:     LSHL * T4.W, literal.x, PV.W,
+; EG-NEXT:     LSHL T7.X, PS, PV.W,
+; EG-NEXT:     LSHL * T7.W, literal.x, PV.W,
 ; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
-; EG-NEXT:     MOV T4.Y, 0.0,
-; EG-NEXT:     MOV * T4.Z, 0.0,
+; EG-NEXT:     MOV * T7.Y, 0.0,
+; EG-NEXT:    ALU clause starting at 36:
+; EG-NEXT:     MOV T7.Z, 0.0,
+; EG-NEXT:     MIN_UINT * T2.W, T0.X, T6.X,
 ; EG-NEXT:     LSHR T0.X, T1.W, literal.x,
-; EG-NEXT:     MIN_UINT * T1.X, T3.X, T2.X,
-; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
-; EG-NEXT:     LSHR * T2.X, T0.W, literal.x,
+; EG-NEXT:     LSHL T1.W, PV.W, literal.y,
+; EG-NEXT:     MIN_UINT * T2.W, T9.X, T8.X,
+; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
+; EG-NEXT:     OR_INT T6.X, PV.W, PS,
+; EG-NEXT:     LSHR * T8.X, T0.W, literal.x,
 ; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
 ;
 ; CI-LABEL: v_test_umin_ule_v3i16:
@@ -3087,46 +3147,142 @@ define amdgpu_kernel void @s_test_umin_ult_v8i32(ptr addrspace(1) %out, <8 x i32
 define amdgpu_kernel void @s_test_umin_ult_v8i16(ptr addrspace(1) %out, <8 x i16> %a, <8 x i16> %b) #0 {
 ; EG-LABEL: s_test_umin_ult_v8i16:
 ; EG:       ; %bb.0:
-; EG-NEXT:    ALU 0, @24, KC0[], KC1[]
-; EG-NEXT:    TEX 2 @8
-; EG-NEXT:    ALU 2, @25, KC0[], KC1[]
-; EG-NEXT:    TEX 4 @14
-; EG-NEXT:    ALU 14, @28, KC0[CB0:0-32], KC1[]
-; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.XYZW, T1.X, 1
+; EG-NEXT:    ALU 1, @52, KC0[], KC1[]
+; EG-NEXT:    TEX 1 @20
+; EG-NEXT:    ALU 9, @54, KC0[], KC1[]
+; EG-NEXT:    TEX 1 @24
+; EG-NEXT:    ALU 8, @64, KC0[], KC1[]
+; EG-NEXT:    TEX 1 @28
+; EG-NEXT:    ALU 10, @73, KC0[], KC1[]
+; EG-NEXT:    TEX 1 @32
+; EG-NEXT:    ALU 8, @84, KC0[], KC1[]
+; EG-NEXT:    TEX 1 @36
+; EG-NEXT:    ALU 10, @93, KC0[], KC1[]
+; EG-NEXT:    TEX 1 @40
+; EG-NEXT:    ALU 8, @104, KC0[], KC1[]
+; EG-NEXT:    TEX 1 @44
+; EG-NEXT:    ALU 10, @113, KC0[], KC1[]
+; EG-NEXT:    TEX 1 @48
+; EG-NEXT:    ALU 10, @124, KC0[CB0:0-32], KC1[]
+; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T7.XYZW, T8.X, 1
 ; EG-NEXT:    CF_END
 ; EG-NEXT:    PAD
-; EG-NEXT:    Fetch clause starting at 8:
-; EG-NEXT:     VTX_READ_16 T1.X, T0.X, 62, #3
-; EG-NEXT:     VTX_READ_16 T2.X, T0.X, 60, #3
-; EG-NEXT:     VTX_READ_16 T3.X, T0.X, 78, #3
-; EG-NEXT:    Fetch clause starting at 14:
-; EG-NEXT:     VTX_READ_16 T1.X, T0.X, 68, #3
-; EG-NEXT:     VTX_READ_16 T3.X, T0.X, 52, #3
-; EG-NEXT:     VTX_READ_16 T4.X, T0.X, 70, #3
-; EG-NEXT:     VTX_READ_16 T5.X, T0.X, 54, #3
-; EG-NEXT:     VTX_READ_16 T0.X, T0.X, 76, #3
-; EG-NEXT:    ALU clause starting at 24:
-; EG-NEXT:     MOV * T0.X, 0.0,
-; EG-NEXT:    ALU clause starting at 25:
-; EG-NEXT:     AND_INT T0.W, T1.X, literal.x,
-; EG-NEXT:     AND_INT * T1.W, T3.X, literal.x,
+; EG-NEXT:    Fetch clause starting at 20:
+; EG-NEXT:     VTX_READ_16 T8.X, T7.X, 66, #3
+; EG-NEXT:     VTX_READ_16 T9.X, T7.X, 82, #3
+; EG-NEXT:    Fetch clause starting at 24:
+; EG-NEXT:     VTX_READ_16 T8.X, T7.X, 64, #3
+; EG-NEXT:     VTX_READ_16 T9.X, T7.X, 80, #3
+; EG-NEXT:    Fetch clause starting at 28:
+; EG-NEXT:     VTX_READ_16 T8.X, T7.X, 62, #3
+; EG-NEXT:     VTX_READ_16 T9.X, T7.X, 78, #3
+; EG-NEXT:    Fetch clause starting at 32:
+; EG-NEXT:     VTX_READ_16 T8.X, T7.X, 60, #3
+; EG-NEXT:     VTX_READ_16 T9.X, T7.X, 76, #3
+; EG-NEXT:    Fetch clause starting at 36:
+; EG-NEXT:     VTX_READ_16 T8.X, T7.X, 58, #3
+; EG-NEXT:     VTX_READ_16 T9.X, T7.X, 74, #3
+; EG-NEXT:    Fetch clause starting at 40:
+; EG-NEXT:     VTX_READ_16 T8.X, T7.X, 56, #3
+; EG-NEXT:     VTX_READ_16 T9.X, T7.X, 72, #3
+; EG-NEXT:    Fetch clause starting at 44:
+; EG-NEXT:     VTX_READ_16 T8.X, T7.X, 54, #3
+; EG-NEXT:     VTX_READ_16 T9.X, T7.X, 70, #3
+; EG-NEXT:    Fetch clause starting at 48:
+; EG-NEXT:     VTX_READ_16 T8.X, T7.X, 52, #3
+; EG-NEXT:     VTX_READ_16 T7.X, T7.X, 68, #3
+; EG-NEXT:    ALU clause starting at 52:
+; EG-NEXT:     MOV * T0.Y, T3.X,
+; EG-NEXT:     MOV * T7.X, 0.0,
+; EG-NEXT:    ALU clause starting at 54:
+; EG-NEXT:     AND_INT T0.W, T8.X, literal.x,
+; EG-NEXT:     AND_INT * T1.W, T9.X, literal.x,
 ; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
-; EG-NEXT:    ALU clause starting at 28:
-; EG-NEXT:     AND_INT T0.Z, T2.X, literal.x,
-; EG-NEXT:     AND_INT T2.W, T0.X, literal.x, BS:VEC_120/SCL_212
-; EG-NEXT:     MIN_UINT * T0.W, T0.W, T1.W,
+; EG-NEXT:     MIN_UINT * T0.W, PV.W, PS,
+; EG-NEXT:     LSHL T0.W, PV.W, literal.x,
+; EG-NEXT:     AND_INT * T1.W, T0.Y, literal.y,
+; EG-NEXT:    16(2.242078e-44), 65535(9.183409e-41)
+; EG-NEXT:     OR_INT * T0.W, PS, PV.W,
+; EG-NEXT:     MOV * T3.X, PV.W,
+; EG-NEXT:     MOV * T0.Y, PV.X,
+; EG-NEXT:    ALU clause starting at 64:
+; EG-NEXT:     AND_INT T0.W, T8.X, literal.x,
+; EG-NEXT:     AND_INT * T1.W, T9.X, literal.x,
 ; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
-; EG-NEXT:     MIN_UINT T0.Z, PV.Z, PV.W,
-; EG-NEXT:     AND_INT T1.W, T5.X, literal.x,
-; EG-NEXT:     AND_INT * T2.W, T4.X, literal.x,
+; EG-NEXT:     AND_INT T2.W, T0.Y, literal.x,
+; EG-NEXT:     MIN_UINT * T0.W, PV.W, PS,
+; EG-NEXT:    -65536(nan), 0(0.000000e+00)
+; EG-NEXT:     OR_INT * T0.W, PV.W, PS,
+; EG-NEXT:     MOV T3.X, PV.W,
+; EG-NEXT:     MOV * T0.Y, T2.X,
+; EG-NEXT:    ALU clause starting at 73:
+; EG-NEXT:     AND_INT T0.W, T8.X, literal.x,
+; EG-NEXT:     AND_INT * T1.W, T9.X, literal.x,
 ; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
-; EG-NEXT:     MIN_UINT T0.Y, PV.W, PS,
-; EG-NEXT:     AND_INT T1.W, T3.X, literal.x,
-; EG-NEXT:     AND_INT * T2.W, T1.X, literal.x,
+; EG-NEXT:     MIN_UINT T0.W, PV.W, PS,
+; EG-NEXT:     AND_INT * T1.W, T0.Y, literal.x,
 ; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
-; EG-NEXT:     MIN_UINT T0.X, PV.W, PS,
-; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
-; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
+; EG-NEXT:     LSHL * T0.W, PV.W, literal.x,
+; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT:     OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT:     MOV * T2.X, PV.W,
+; EG-NEXT:     MOV * T0.Y, PV.X,
+; EG-NEXT:    ALU clause starting at 84:
+; EG-NEXT:     AND_INT T0.W, T8.X, literal.x,
+; EG-NEXT:     AND_INT * T1.W, T9.X, literal.x,
+; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT:     AND_INT T2.W, T0.Y, literal.x,
+; EG-NEXT:     MIN_UINT * T0.W, PV.W, PS,
+; EG-NEXT:    -65536(nan), 0(0.000000e+00)
+; EG-NEXT:     OR_INT * T7.Z, PV.W, PS,
+; EG-NEXT:     MOV T2.X, PV.Z,
+; EG-NEXT:     MOV * T0.Y, T5.X,
+; EG-NEXT:    ALU clause starting at 93:
+; EG-NEXT:     AND_INT T0.W, T8.X, literal.x,
+; EG-NEXT:     AND_INT * T1.W, T9.X, literal.x,
+; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT:     MIN_UINT T0.W, PV.W, PS,
+; EG-NEXT:     AND_INT * T1.W, T0.Y, literal.x,
+; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT:     LSHL * T0.W, PV.W, literal.x,
+; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT:     OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT:     MOV * T5.X, PV.W,
+; EG-NEXT:     MOV * T0.Y, PV.X,
+; EG-NEXT:    ALU clause starting at 104:
+; EG-NEXT:     AND_INT T0.W, T8.X, literal.x,
+; EG-NEXT:     AND_INT * T1.W, T9.X, literal.x,
+; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT:     AND_INT T2.W, T0.Y, literal.x,
+; EG-NEXT:     MIN_UINT * T0.W, PV.W, PS,
+; EG-NEXT:    -65536(nan), 0(0.000000e+00)
+; EG-NEXT:     OR_INT * T0.W, PV.W, PS,
+; EG-NEXT:     MOV T5.X, PV.W,
+; EG-NEXT:     MOV * T0.Y, T4.X,
+; EG-NEXT:    ALU clause starting at 113:
+; EG-NEXT:     AND_INT T0.W, T8.X, literal.x,
+; EG-NEXT:     AND_INT * T1.W, T9.X, literal.x,
+; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT:     MIN_UINT T0.W, PV.W, PS,
+; EG-NEXT:     AND_INT * T1.W, T0.Y, literal.x,
+; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT:     LSHL * T0.W, PV.W, literal.x,
+; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT:     OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT:     MOV * T4.X, PV.W,
+; EG-NEXT:     MOV * T0.Y, PV.X,
+; EG-NEXT:    ALU clause starting at 124:
+; EG-NEXT:     AND_INT T0.W, T8.X, literal.x,
+; EG-NEXT:     AND_INT * T1.W, T7.X, literal.x,
+; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT:     LSHR T8.X, KC0[2].Y, literal.x,
+; EG-NEXT:     AND_INT T2.W, T0.Y, literal.y,
+; EG-NEXT:     MIN_UINT * T0.W, PV.W, PS,
+; EG-NEXT:    2(2.802597e-45), -65536(nan)
+; EG-NEXT:     OR_INT * T7.X, PV.W, PS,
+; EG-NEXT:     MOV T4.X, PV.X,
+; EG-NEXT:     MOV * T7.W, T3.X,
+; EG-NEXT:     MOV * T7.Y, T5.X,
 ;
 ; CI-LABEL: s_test_umin_ult_v8i16:
 ; CI:       ; %bb.0:
diff --git a/llvm/test/CodeGen/AMDGPU/shl.ll b/llvm/test/CodeGen/AMDGPU/shl.ll
index 7aa7342ec8706..28330bfc9bb69 100644
--- a/llvm/test/CodeGen/AMDGPU/shl.ll
+++ b/llvm/test/CodeGen/AMDGPU/shl.ll
@@ -681,30 +681,63 @@ define amdgpu_kernel void @shl_v4i16(ptr addrspace(1) %out, ptr addrspace(1) %in
 ;
 ; EG-LABEL: shl_v4i16:
 ; EG:       ; %bb.0:
-; EG-NEXT:    ALU 2, @8, KC0[CB0:0-32], KC1[]
+; EG-NEXT:    ALU 3, @8, KC0[CB0:0-32], KC1[]
 ; EG-NEXT:    TEX 0 @6
-; EG-NEXT:    ALU 10, @11, KC0[CB0:0-32], KC1[]
-; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.XY, T8.X, 1
+; EG-NEXT:    ALU 42, @12, KC0[CB0:0-32], KC1[]
+; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T10.XY, T0.X, 1
 ; EG-NEXT:    CF_END
 ; EG-NEXT:    PAD
 ; EG-NEXT:    Fetch clause starting at 6:
-; EG-NEXT:     VTX_READ_128 T8.XYZW, T0.X, 0, #1
+; EG-NEXT:     VTX_READ_128 T10.XYZW, T0.X, 0, #1
 ; EG-NEXT:    ALU clause starting at 8:
-; EG-NEXT:     LSHL * T0.W, T0.X, literal.x,
+; EG-NEXT:     MOV T0.Y, T6.X,
+; EG-NEXT:     LSHL * T0.W, T0.X, literal.x, BS:VEC_120/SCL_212
 ; EG-NEXT:    3(4.203895e-45), 0(0.000000e+00)
 ; EG-NEXT:     ADD_INT * T0.X, KC0[2].Z, PV.W,
-; EG-NEXT:    ALU clause starting at 11:
-; EG-NEXT:     LSHR T1.W, T8.Z, literal.x,
-; EG-NEXT:     LSHR * T2.W, T8.X, literal.x,
+; EG-NEXT:    ALU clause starting at 12:
+; EG-NEXT:     AND_INT * T1.W, T10.Z, literal.x,
+; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT:     LSHL * T1.W, T10.X, PV.W,
+; EG-NEXT:     AND_INT T1.W, PV.W, literal.x,
+; EG-NEXT:     AND_INT * T2.W, T0.Y, literal.y,
+; EG-NEXT:    65535(9.183409e-41), -65536(nan)
+; EG-NEXT:     OR_INT * T1.W, PS, PV.W,
+; EG-NEXT:     MOV * T6.X, PV.W,
+; EG-NEXT:     MOV T0.X, PV.X,
+; EG-NEXT:     LSHR T1.W, T10.Z, literal.x,
+; EG-NEXT:     LSHR * T2.W, T10.X, literal.x,
 ; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
-; EG-NEXT:     LSHL T0.Y, PS, PV.W,
-; EG-NEXT:     AND_INT T1.W, T8.Z, literal.x,
-; EG-NEXT:     AND_INT * T2.W, T8.X, literal.x,
+; EG-NEXT:     LSHL T1.W, PS, PV.W,
+; EG-NEXT:     AND_INT * T2.W, PV.X, literal.x,
 ; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
-; EG-NEXT:     LSHL T0.X, PS, PV.W,
+; EG-NEXT:     LSHL * T1.W, PV.W, literal.x,
+; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT:     OR_INT * T1.W, T2.W, PV.W,
+; EG-NEXT:     MOV T6.X, PV.W,
+; EG-NEXT:     MOV * T0.X, T7.X,
+; EG-NEXT:     AND_INT * T1.W, T10.W, literal.x,
+; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT:     LSHL T1.W, T10.Y, PV.W,
+; EG-NEXT:     AND_INT * T2.W, T0.X, literal.x,
+; EG-NEXT:    -65536(nan), 0(0.000000e+00)
+; EG-NEXT:     AND_INT * T1.W, PV.W, literal.x,
+; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT:     OR_INT * T1.W, T2.W, PV.W,
+; EG-NEXT:     MOV * T7.X, PV.W,
+; EG-NEXT:     MOV T0.X, PV.X,
+; EG-NEXT:     LSHR T1.W, T10.W, literal.x,
+; EG-NEXT:     LSHR * T2.W, T10.Y, literal.x,
+; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT:     LSHL * T1.W, PS, PV.W,
+; EG-NEXT:     AND_INT T0.Z, T0.X, literal.x,
+; EG-NEXT:     LSHL T1.W, PV.W, literal.y,
 ; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, T0.W,
-; EG-NEXT:     LSHR * T8.X, PV.W, literal.x,
+; EG-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
+; EG-NEXT:     LSHR T0.X, PS, literal.x,
+; EG-NEXT:     OR_INT * T10.Y, PV.Z, PV.W,
 ; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
+; EG-NEXT:     MOV T7.X, PV.Y,
+; EG-NEXT:     MOV * T10.X, T6.X,
   %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
   %gep = getelementptr inbounds <4 x i16>, ptr addrspace(1) %in, i32 %tid
   %gep.out = getelementptr inbounds <4 x i16>, ptr addrspace(1) %out, i32 %tid
diff --git a/llvm/test/CodeGen/AMDGPU/sra.ll b/llvm/test/CodeGen/AMDGPU/sra.ll
index 5d169c142b2e9..80c0d0f45eb97 100644
--- a/llvm/test/CodeGen/AMDGPU/sra.ll
+++ b/llvm/test/CodeGen/AMDGPU/sra.ll
@@ -320,28 +320,67 @@ define amdgpu_kernel void @ashr_v4i16(ptr addrspace(1) %out, ptr addrspace(1) %i
 ;
 ; EG-LABEL: ashr_v4i16:
 ; EG:       ; %bb.0:
-; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
+; EG-NEXT:    ALU 1, @8, KC0[CB0:0-32], KC1[]
 ; EG-NEXT:    TEX 0 @6
-; EG-NEXT:    ALU 10, @9, KC0[CB0:0-32], KC1[]
-; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T7.XY, T8.X, 1
+; EG-NEXT:    ALU 48, @10, KC0[CB0:0-32], KC1[]
+; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T10.XY, T9.X, 1
 ; EG-NEXT:    CF_END
 ; EG-NEXT:    PAD
 ; EG-NEXT:    Fetch clause starting at 6:
-; EG-NEXT:     VTX_READ_128 T7.XYZW, T7.X, 0, #1
+; EG-NEXT:     VTX_READ_128 T9.XYZW, T9.X, 0, #1
 ; EG-NEXT:    ALU clause starting at 8:
-; EG-NEXT:     MOV * T7.X, KC0[2].Z,
-; EG-NEXT:    ALU clause starting at 9:
-; EG-NEXT:     LSHR T0.Z, T7.X, literal.x,
-; EG-NEXT:     BFE_INT T0.W, T7.X, 0.0, literal.x,
-; EG-NEXT:     AND_INT * T1.W, T7.Z, literal.y,
+; EG-NEXT:     MOV * T0.Y, T6.X,
+; EG-NEXT:     MOV * T9.X, KC0[2].Z,
+; EG-NEXT:    ALU clause starting at 10:
+; EG-NEXT:     BFE_INT T0.W, T9.X, 0.0, literal.x,
+; EG-NEXT:     AND_INT * T1.W, T9.Z, literal.y,
 ; EG-NEXT:    16(2.242078e-44), 65535(9.183409e-41)
-; EG-NEXT:     ASHR T7.X, PV.W, PS,
-; EG-NEXT:     BFE_INT T0.W, PV.Z, 0.0, literal.x,
-; EG-NEXT:     LSHR * T1.W, T7.Z, literal.x,
+; EG-NEXT:     ASHR * T0.W, PV.W, PS,
+; EG-NEXT:     AND_INT T0.W, PV.W, literal.x,
+; EG-NEXT:     AND_INT * T1.W, T0.Y, literal.y,
+; EG-NEXT:    65535(9.183409e-41), -65536(nan)
+; EG-NEXT:     OR_INT * T0.W, PS, PV.W,
+; EG-NEXT:     MOV * T6.X, PV.W,
+; EG-NEXT:     MOV T0.Y, PV.X,
+; EG-NEXT:     LSHR * T0.W, T9.X, literal.x,
+; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT:     BFE_INT T0.W, PV.W, 0.0, literal.x,
+; EG-NEXT:     LSHR * T1.W, T9.Z, literal.x,
+; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT:     ASHR T0.W, PV.W, PS,
+; EG-NEXT:     AND_INT * T1.W, T0.Y, literal.x,
+; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT:     LSHL * T0.W, PV.W, literal.x,
+; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT:     OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT:     MOV T6.X, PV.W,
+; EG-NEXT:     MOV T0.Y, T7.X,
+; EG-NEXT:     BFE_INT T0.W, T9.Y, 0.0, literal.x,
+; EG-NEXT:     AND_INT * T1.W, T9.W, literal.y,
+; EG-NEXT:    16(2.242078e-44), 65535(9.183409e-41)
+; EG-NEXT:     ASHR T0.W, PV.W, PS,
+; EG-NEXT:     AND_INT * T1.W, PV.Y, literal.x,
+; EG-NEXT:    -65536(nan), 0(0.000000e+00)
+; EG-NEXT:     AND_INT * T0.W, PV.W, literal.x,
+; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT:     OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT:     MOV * T7.X, PV.W,
+; EG-NEXT:     MOV T0.Y, PV.X,
+; EG-NEXT:     LSHR * T0.W, T9.Y, literal.x,
+; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT:     BFE_INT T0.W, PV.W, 0.0, literal.x,
+; EG-NEXT:     LSHR * T1.W, T9.W, literal.x,
+; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT:     ASHR T0.W, PV.W, PS,
+; EG-NEXT:     AND_INT * T1.W, T0.Y, literal.x,
+; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT:     LSHL * T0.W, PV.W, literal.x,
 ; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
-; EG-NEXT:     LSHR T8.X, KC0[2].Y, literal.x,
-; EG-NEXT:     ASHR * T7.Y, PV.W, PS,
+; EG-NEXT:     LSHR T9.X, KC0[2].Y, literal.x,
+; EG-NEXT:     OR_INT * T10.Y, T1.W, PV.W,
 ; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
+; EG-NEXT:     MOV T7.X, PV.Y,
+; EG-NEXT:     MOV * T10.X, T6.X,
   %b_ptr = getelementptr <4 x i16>, ptr addrspace(1) %in, i16 1
   %a = load <4 x i16>, ptr addrspace(1) %in
   %b = load <4 x i16>, ptr addrspace(1) %b_ptr

>From bb45125b0ef393a433851c7d05f7c87037f5084e Mon Sep 17 00:00:00 2001
From: Min-Yih Hsu <min.hsu at sifive.com>
Date: Fri, 1 Aug 2025 17:07:14 -0700
Subject: [PATCH 3/4] fixup! Update test

---
 llvm/test/CodeGen/RISCV/rvv/fp4-bitcast.ll | 23 ++++++++++++++++++++++
 1 file changed, 23 insertions(+)

diff --git a/llvm/test/CodeGen/RISCV/rvv/fp4-bitcast.ll b/llvm/test/CodeGen/RISCV/rvv/fp4-bitcast.ll
index 4bdd00d94e1c8..ac3cd84694f4c 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fp4-bitcast.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fp4-bitcast.ll
@@ -4,6 +4,29 @@
 define <2 x i8> @fp4(<4 x i4> %0) nounwind {
 ; CHECK-LABEL: fp4:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    vsetivli zero, 1, e8, mf4, ta, ma
+; CHECK-NEXT:    vmv.x.s a0, v8
+; CHECK-NEXT:    vslidedown.vi v9, v8, 1
+; CHECK-NEXT:    vmv.x.s a1, v9
+; CHECK-NEXT:    vslidedown.vi v9, v8, 2
+; CHECK-NEXT:    vslidedown.vi v8, v8, 3
+; CHECK-NEXT:    andi a0, a0, 15
+; CHECK-NEXT:    vmv.x.s a2, v9
+; CHECK-NEXT:    andi a1, a1, 15
+; CHECK-NEXT:    slli a1, a1, 4
+; CHECK-NEXT:    or a0, a0, a1
+; CHECK-NEXT:    vmv.x.s a1, v8
+; CHECK-NEXT:    andi a2, a2, 15
+; CHECK-NEXT:    slli a1, a1, 12
+; CHECK-NEXT:    slli a2, a2, 8
+; CHECK-NEXT:    or a1, a2, a1
+; CHECK-NEXT:    or a0, a0, a1
+; CHECK-NEXT:    sh a0, 14(sp)
+; CHECK-NEXT:    addi a0, sp, 14
+; CHECK-NEXT:    vsetivli zero, 2, e8, mf8, ta, ma
+; CHECK-NEXT:    vle8.v v8, (a0)
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    ret
   %2 = bitcast <4 x i4> %0 to <2 x i8>
   ret <2 x i8> %2

>From efa234fe7e0d8bca8722d6dd434bdc8cefc84705 Mon Sep 17 00:00:00 2001
From: Min-Yih Hsu <min.hsu at sifive.com>
Date: Sat, 2 Aug 2025 15:31:14 -0700
Subject: [PATCH 4/4] fixup! Add TODO comment

---
 llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index 0bd0599692bcc..a5bd97ace169e 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -2217,7 +2217,7 @@ SDValue DAGTypeLegalizer::PromoteIntOp_BITCAST(SDNode *N) {
 
   switch (getTypeAction(InVT)) {
   case TargetLowering::TypePromoteInteger: {
-    // TODO: Handle big endian
+    // TODO: Handle big endian & vector input type.
     if (OutVT.isVector() && !InVT.isVector() &&
         DAG.getDataLayout().isLittleEndian()) {
       EVT EltVT = OutVT.getVectorElementType();



More information about the llvm-commits mailing list