[llvm] 7ebbbd8 - [DAG] Always use stack to promote bitcast when the source is vector (#151065)
via llvm-commits
llvm-commits at lists.llvm.org
Sat Aug 2 15:32:13 PDT 2025
Author: Min-Yih Hsu
Date: 2025-08-02T15:32:10-07:00
New Revision: 7ebbbd885f759ef5b7ae50a1b07f810296c4967f
URL: https://github.com/llvm/llvm-project/commit/7ebbbd885f759ef5b7ae50a1b07f810296c4967f
DIFF: https://github.com/llvm/llvm-project/commit/7ebbbd885f759ef5b7ae50a1b07f810296c4967f.diff
LOG: [DAG] Always use stack to promote bitcast when the source is vector (#151065)
The optimization introduced by #125637 tried to avoid using stacks to
promote bitcast with vector result type. However, it wouldn't be correct
if the input type is vector. This patch limits that optimizations to
only scalar to vector bitcasts.
Added:
llvm/test/CodeGen/RISCV/rvv/fp4-bitcast.ll
Modified:
llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
llvm/test/CodeGen/AMDGPU/ctpop16.ll
llvm/test/CodeGen/AMDGPU/kernel-args.ll
llvm/test/CodeGen/AMDGPU/load-constant-i16.ll
llvm/test/CodeGen/AMDGPU/load-constant-i8.ll
llvm/test/CodeGen/AMDGPU/load-global-i16.ll
llvm/test/CodeGen/AMDGPU/load-global-i8.ll
llvm/test/CodeGen/AMDGPU/load-local-i16.ll
llvm/test/CodeGen/AMDGPU/min.ll
llvm/test/CodeGen/AMDGPU/shl.ll
llvm/test/CodeGen/AMDGPU/sra.ll
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index 583a85a332dcd..a5bd97ace169e 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -2217,8 +2217,9 @@ SDValue DAGTypeLegalizer::PromoteIntOp_BITCAST(SDNode *N) {
switch (getTypeAction(InVT)) {
case TargetLowering::TypePromoteInteger: {
- // TODO: Handle big endian
- if (OutVT.isVector() && DAG.getDataLayout().isLittleEndian()) {
+ // TODO: Handle big endian & vector input type.
+ if (OutVT.isVector() && !InVT.isVector() &&
+ DAG.getDataLayout().isLittleEndian()) {
EVT EltVT = OutVT.getVectorElementType();
TypeSize EltSize = EltVT.getSizeInBits();
TypeSize NInSize = NInVT.getSizeInBits();
diff --git a/llvm/test/CodeGen/AMDGPU/ctpop16.ll b/llvm/test/CodeGen/AMDGPU/ctpop16.ll
index 1b9b508137b7f..cefcbddd3e394 100644
--- a/llvm/test/CodeGen/AMDGPU/ctpop16.ll
+++ b/llvm/test/CodeGen/AMDGPU/ctpop16.ll
@@ -457,27 +457,58 @@ define amdgpu_kernel void @v_ctpop_v4i16(ptr addrspace(1) noalias %out, ptr addr
;
; EG-LABEL: v_ctpop_v4i16:
; EG: ; %bb.0:
-; EG-NEXT: ALU 2, @8, KC0[CB0:0-32], KC1[]
+; EG-NEXT: ALU 3, @8, KC0[CB0:0-32], KC1[]
; EG-NEXT: TEX 0 @6
-; EG-NEXT: ALU 7, @11, KC0[CB0:0-32], KC1[]
-; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T6.X, 1
+; EG-NEXT: ALU 37, @12, KC0[CB0:0-32], KC1[]
+; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T8.XY, T0.X, 1
; EG-NEXT: CF_END
; EG-NEXT: PAD
; EG-NEXT: Fetch clause starting at 6:
-; EG-NEXT: VTX_READ_32 T0.X, T0.X, 0, #1
+; EG-NEXT: VTX_READ_64 T8.XY, T0.X, 0, #1
; EG-NEXT: ALU clause starting at 8:
-; EG-NEXT: LSHL * T0.W, T0.X, literal.x,
+; EG-NEXT: MOV T0.Y, T4.X,
+; EG-NEXT: LSHL * T0.W, T0.X, literal.x, BS:VEC_120/SCL_212
; EG-NEXT: 3(4.203895e-45), 0(0.000000e+00)
; EG-NEXT: ADD_INT * T0.X, KC0[2].Z, PV.W,
-; EG-NEXT: ALU clause starting at 11:
-; EG-NEXT: LSHR * T0.W, T0.X, literal.x,
+; EG-NEXT: ALU clause starting at 12:
+; EG-NEXT: AND_INT * T0.W, T8.X, literal.x,
+; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT: BCNT_INT T0.W, PV.W,
+; EG-NEXT: AND_INT * T1.W, T0.Y, literal.x,
+; EG-NEXT: -65536(nan), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T0.W, PS, PV.W,
+; EG-NEXT: MOV * T4.X, PV.W,
+; EG-NEXT: MOV T0.X, PV.X,
+; EG-NEXT: LSHR * T0.W, T8.X, literal.x,
; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
-; EG-NEXT: BCNT_INT T0.Y, PV.W,
-; EG-NEXT: AND_INT * T0.W, T0.X, literal.x,
+; EG-NEXT: BCNT_INT T0.W, PV.W,
+; EG-NEXT: AND_INT * T1.W, PV.X, literal.x,
; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
-; EG-NEXT: BCNT_INT T0.X, PV.W,
-; EG-NEXT: LSHR * T6.X, KC0[2].Y, literal.x,
+; EG-NEXT: LSHL * T0.W, PV.W, literal.x,
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT: MOV T4.X, PV.W,
+; EG-NEXT: MOV * T0.X, T5.X,
+; EG-NEXT: AND_INT * T0.W, T8.Y, literal.x,
+; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT: BCNT_INT T0.W, PV.W,
+; EG-NEXT: AND_INT * T1.W, T0.X, literal.x,
+; EG-NEXT: -65536(nan), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T0.W, PS, PV.W,
+; EG-NEXT: MOV * T5.X, PV.W,
+; EG-NEXT: MOV T0.X, PV.X,
+; EG-NEXT: LSHR * T0.W, T8.Y, literal.x,
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: BCNT_INT T0.W, PV.W,
+; EG-NEXT: AND_INT * T1.W, PV.X, literal.x,
+; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT: LSHL * T0.W, PV.W, literal.x,
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: LSHR T0.X, KC0[2].Y, literal.x,
+; EG-NEXT: OR_INT * T8.Y, T1.W, PV.W,
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
+; EG-NEXT: MOV T5.X, PV.Y,
+; EG-NEXT: MOV * T8.X, T4.X,
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%in.gep = getelementptr <4 x i16>, ptr addrspace(1) %in, i32 %tid
%val = load <4 x i16>, ptr addrspace(1) %in.gep, align 16
@@ -570,33 +601,94 @@ define amdgpu_kernel void @v_ctpop_v8i16(ptr addrspace(1) noalias %out, ptr addr
;
; EG-LABEL: v_ctpop_v8i16:
; EG: ; %bb.0:
-; EG-NEXT: ALU 2, @8, KC0[CB0:0-32], KC1[]
+; EG-NEXT: ALU 3, @8, KC0[CB0:0-32], KC1[]
; EG-NEXT: TEX 0 @6
-; EG-NEXT: ALU 13, @11, KC0[CB0:0-32], KC1[]
-; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XYZW, T8.X, 1
+; EG-NEXT: ALU 73, @12, KC0[CB0:0-32], KC1[]
+; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XYZW, T12.X, 1
; EG-NEXT: CF_END
; EG-NEXT: PAD
; EG-NEXT: Fetch clause starting at 6:
-; EG-NEXT: VTX_READ_128 T0.XYZW, T0.X, 0, #1
+; EG-NEXT: VTX_READ_128 T12.XYZW, T0.X, 0, #1
; EG-NEXT: ALU clause starting at 8:
-; EG-NEXT: LSHL * T0.W, T0.X, literal.x,
+; EG-NEXT: MOV T0.Y, T4.X,
+; EG-NEXT: LSHL * T0.W, T0.X, literal.x, BS:VEC_120/SCL_212
; EG-NEXT: 4(5.605194e-45), 0(0.000000e+00)
; EG-NEXT: ADD_INT * T0.X, KC0[2].Z, PV.W,
-; EG-NEXT: ALU clause starting at 11:
-; EG-NEXT: LSHR * T0.W, T0.Z, literal.x,
+; EG-NEXT: ALU clause starting at 12:
+; EG-NEXT: LSHR * T0.W, T12.X, literal.x,
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: BCNT_INT * T0.W, PV.W,
+; EG-NEXT: LSHL T0.W, PV.W, literal.x,
+; EG-NEXT: AND_INT * T1.W, T0.Y, literal.y,
+; EG-NEXT: 16(2.242078e-44), 65535(9.183409e-41)
+; EG-NEXT: OR_INT * T0.W, PS, PV.W,
+; EG-NEXT: MOV * T4.X, PV.W,
+; EG-NEXT: MOV T0.X, PV.X,
+; EG-NEXT: AND_INT * T0.W, T12.X, literal.x,
+; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT: BCNT_INT T0.W, PV.W,
+; EG-NEXT: AND_INT * T1.W, PV.X, literal.x,
+; EG-NEXT: -65536(nan), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T0.W, PS, PV.W,
+; EG-NEXT: MOV T4.X, PV.W,
+; EG-NEXT: MOV * T0.X, T5.X,
+; EG-NEXT: LSHR * T0.W, T12.Y, literal.x,
; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
; EG-NEXT: BCNT_INT T0.W, PV.W,
-; EG-NEXT: AND_INT * T1.W, T0.Z, literal.x,
+; EG-NEXT: AND_INT * T1.W, T0.X, literal.x,
; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
-; EG-NEXT: BCNT_INT T0.Z, PS,
-; EG-NEXT: LSHR * T1.W, T0.X, literal.x,
+; EG-NEXT: LSHL * T0.W, PV.W, literal.x,
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT: MOV * T5.X, PV.W,
+; EG-NEXT: MOV T0.X, PV.X,
+; EG-NEXT: AND_INT * T0.W, T12.Y, literal.x,
+; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT: BCNT_INT T0.W, PV.W,
+; EG-NEXT: AND_INT * T1.W, PV.X, literal.x,
+; EG-NEXT: -65536(nan), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T0.Y, PS, PV.W,
+; EG-NEXT: MOV T5.X, PV.Y,
+; EG-NEXT: MOV * T0.X, T8.X,
+; EG-NEXT: LSHR * T0.W, T12.Z, literal.x,
; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
-; EG-NEXT: BCNT_INT T0.Y, PV.W,
+; EG-NEXT: BCNT_INT T0.W, PV.W,
; EG-NEXT: AND_INT * T1.W, T0.X, literal.x,
; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
-; EG-NEXT: BCNT_INT T0.X, PV.W,
-; EG-NEXT: LSHR * T8.X, KC0[2].Y, literal.x,
+; EG-NEXT: LSHL * T0.W, PV.W, literal.x,
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT: MOV * T8.X, PV.W,
+; EG-NEXT: MOV T0.X, PV.X,
+; EG-NEXT: AND_INT * T0.W, T12.Z, literal.x,
+; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT: BCNT_INT T0.W, PV.W,
+; EG-NEXT: AND_INT * T1.W, PV.X, literal.x,
+; EG-NEXT: -65536(nan), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T0.W, PS, PV.W,
+; EG-NEXT: MOV T8.X, PV.W,
+; EG-NEXT: MOV * T0.X, T9.X,
+; EG-NEXT: LSHR * T0.W, T12.W, literal.x,
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: BCNT_INT T0.W, PV.W,
+; EG-NEXT: AND_INT * T1.W, T0.X, literal.x,
+; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT: LSHL * T0.W, PV.W, literal.x,
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT: MOV * T9.X, PV.W,
+; EG-NEXT: MOV T0.X, PV.X,
+; EG-NEXT: AND_INT * T0.W, T12.W, literal.x,
+; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT: BCNT_INT T0.W, PV.W,
+; EG-NEXT: AND_INT * T1.W, PV.X, literal.x,
+; EG-NEXT: -65536(nan), 0(0.000000e+00)
+; EG-NEXT: LSHR T12.X, KC0[2].Y, literal.x,
+; EG-NEXT: OR_INT * T0.W, PS, PV.W,
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
+; EG-NEXT: MOV T9.X, PV.W,
+; EG-NEXT: MOV * T0.X, T4.X,
+; EG-NEXT: MOV * T0.Z, T8.X,
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%in.gep = getelementptr <8 x i16>, ptr addrspace(1) %in, i32 %tid
%val = load <8 x i16>, ptr addrspace(1) %in.gep, align 32
@@ -745,46 +837,174 @@ define amdgpu_kernel void @v_ctpop_v16i16(ptr addrspace(1) noalias %out, ptr add
;
; EG-LABEL: v_ctpop_v16i16:
; EG: ; %bb.0:
-; EG-NEXT: ALU 2, @10, KC0[CB0:0-32], KC1[]
-; EG-NEXT: TEX 1 @6
-; EG-NEXT: ALU 25, @13, KC0[CB0:0-32], KC1[]
-; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XYZW, T14.X, 0
-; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T12.XYZW, T13.X, 1
+; EG-NEXT: ALU 3, @12, KC0[CB0:0-32], KC1[]
+; EG-NEXT: TEX 1 @8
+; EG-NEXT: ALU 114, @16, KC0[], KC1[]
+; EG-NEXT: ALU 34, @131, KC0[CB0:0-32], KC1[]
+; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XYZW, T22.X, 0
+; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T20.XYZW, T21.X, 1
; EG-NEXT: CF_END
-; EG-NEXT: Fetch clause starting at 6:
-; EG-NEXT: VTX_READ_128 T12.XYZW, T0.X, 16, #1
-; EG-NEXT: VTX_READ_128 T0.XYZW, T0.X, 0, #1
-; EG-NEXT: ALU clause starting at 10:
-; EG-NEXT: LSHL * T0.W, T0.X, literal.x,
+; EG-NEXT: PAD
+; EG-NEXT: Fetch clause starting at 8:
+; EG-NEXT: VTX_READ_128 T20.XYZW, T0.X, 16, #1
+; EG-NEXT: VTX_READ_128 T21.XYZW, T0.X, 0, #1
+; EG-NEXT: ALU clause starting at 12:
+; EG-NEXT: MOV T0.Y, T4.X,
+; EG-NEXT: LSHL * T0.W, T0.X, literal.x, BS:VEC_120/SCL_212
; EG-NEXT: 5(7.006492e-45), 0(0.000000e+00)
; EG-NEXT: ADD_INT * T0.X, KC0[2].Z, PV.W,
-; EG-NEXT: ALU clause starting at 13:
-; EG-NEXT: LSHR * T0.W, T12.Z, literal.x,
+; EG-NEXT: ALU clause starting at 16:
+; EG-NEXT: LSHR * T0.W, T20.X, literal.x,
; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
-; EG-NEXT: BCNT_INT T12.W, PV.W,
-; EG-NEXT: AND_INT * T0.W, T12.Z, literal.x,
+; EG-NEXT: BCNT_INT * T0.W, PV.W,
+; EG-NEXT: LSHL T0.W, PV.W, literal.x,
+; EG-NEXT: AND_INT * T1.W, T0.Y, literal.y,
+; EG-NEXT: 16(2.242078e-44), 65535(9.183409e-41)
+; EG-NEXT: OR_INT * T0.W, PS, PV.W,
+; EG-NEXT: MOV * T4.X, PV.W,
+; EG-NEXT: MOV T0.X, PV.X,
+; EG-NEXT: AND_INT * T0.W, T20.X, literal.x,
+; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT: BCNT_INT T0.W, PV.W,
+; EG-NEXT: AND_INT * T1.W, PV.X, literal.x,
+; EG-NEXT: -65536(nan), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T0.W, PS, PV.W,
+; EG-NEXT: MOV T4.X, PV.W,
+; EG-NEXT: MOV * T0.X, T5.X,
+; EG-NEXT: LSHR * T0.W, T20.Y, literal.x,
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: BCNT_INT T0.W, PV.W,
+; EG-NEXT: AND_INT * T1.W, T0.X, literal.x,
+; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT: LSHL * T0.W, PV.W, literal.x,
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT: MOV * T5.X, PV.W,
+; EG-NEXT: MOV T0.X, PV.X,
+; EG-NEXT: AND_INT * T0.W, T20.Y, literal.x,
+; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT: BCNT_INT T0.W, PV.W,
+; EG-NEXT: AND_INT * T1.W, PV.X, literal.x,
+; EG-NEXT: -65536(nan), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T0.Y, PS, PV.W,
+; EG-NEXT: MOV T5.X, PV.Y,
+; EG-NEXT: MOV * T0.X, T8.X,
+; EG-NEXT: LSHR * T0.W, T20.Z, literal.x,
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: BCNT_INT T0.W, PV.W,
+; EG-NEXT: AND_INT * T1.W, T0.X, literal.x,
; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
-; EG-NEXT: BCNT_INT T12.Z, PS,
-; EG-NEXT: LSHR T0.W, T0.Z, literal.x,
-; EG-NEXT: LSHR * T1.W, T12.X, literal.x,
+; EG-NEXT: LSHL * T0.W, PV.W, literal.x,
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT: MOV * T8.X, PV.W,
+; EG-NEXT: MOV T0.X, PV.X,
+; EG-NEXT: AND_INT * T0.W, T20.Z, literal.x,
+; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT: BCNT_INT T0.W, PV.W,
+; EG-NEXT: AND_INT * T1.W, PV.X, literal.x,
+; EG-NEXT: -65536(nan), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T0.W, PS, PV.W,
+; EG-NEXT: MOV T8.X, PV.W,
+; EG-NEXT: MOV * T0.X, T9.X,
+; EG-NEXT: LSHR * T0.W, T20.W, literal.x,
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: BCNT_INT T0.W, PV.W,
+; EG-NEXT: AND_INT * T1.W, T0.X, literal.x,
+; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT: LSHL * T0.W, PV.W, literal.x,
; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
-; EG-NEXT: BCNT_INT T12.Y, PS,
-; EG-NEXT: AND_INT T0.Z, T0.Z, literal.x,
+; EG-NEXT: OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT: MOV * T9.X, PV.W,
+; EG-NEXT: MOV T0.X, PV.X,
+; EG-NEXT: AND_INT * T0.W, T20.W, literal.x,
+; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
; EG-NEXT: BCNT_INT T0.W, PV.W,
-; EG-NEXT: AND_INT * T1.W, T12.X, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.X, literal.x,
+; EG-NEXT: -65536(nan), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T0.W, PS, PV.W,
+; EG-NEXT: MOV T9.X, PV.W,
+; EG-NEXT: MOV * T0.X, T12.X,
+; EG-NEXT: LSHR * T1.W, T21.X, literal.x,
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: BCNT_INT T1.W, PV.W,
+; EG-NEXT: AND_INT * T2.W, T0.X, literal.x,
+; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT: LSHL * T1.W, PV.W, literal.x,
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T1.W, T2.W, PV.W,
+; EG-NEXT: MOV * T12.X, PV.W,
+; EG-NEXT: MOV T0.X, PV.X,
+; EG-NEXT: AND_INT * T1.W, T21.X, literal.x,
+; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT: BCNT_INT T1.W, PV.W,
+; EG-NEXT: AND_INT * T2.W, PV.X, literal.x,
+; EG-NEXT: -65536(nan), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T1.W, PS, PV.W,
+; EG-NEXT: MOV T12.X, PV.W,
+; EG-NEXT: MOV * T0.X, T13.X,
+; EG-NEXT: LSHR * T1.W, T21.Y, literal.x,
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: BCNT_INT T1.W, PV.W,
+; EG-NEXT: AND_INT * T2.W, T0.X, literal.x,
+; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT: LSHL * T1.W, PV.W, literal.x,
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T1.W, T2.W, PV.W,
+; EG-NEXT: MOV * T13.X, PV.W,
+; EG-NEXT: MOV T0.X, PV.X,
+; EG-NEXT: AND_INT * T1.W, T21.Y, literal.x,
+; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT: BCNT_INT T1.W, PV.W,
+; EG-NEXT: AND_INT * T2.W, PV.X, literal.x,
+; EG-NEXT: -65536(nan), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T20.Y, PS, PV.W,
+; EG-NEXT: MOV T13.X, PV.Y,
+; EG-NEXT: MOV * T0.X, T16.X,
+; EG-NEXT: LSHR * T1.W, T21.Z, literal.x,
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: BCNT_INT T1.W, PV.W,
+; EG-NEXT: AND_INT * T2.W, T0.X, literal.x,
+; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT: LSHL * T1.W, PV.W, literal.x,
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T1.W, T2.W, PV.W,
+; EG-NEXT: ALU clause starting at 131:
+; EG-NEXT: MOV * T16.X, T1.W,
+; EG-NEXT: MOV T0.X, PV.X,
+; EG-NEXT: AND_INT * T1.W, T21.Z, literal.x,
+; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT: BCNT_INT T1.W, PV.W,
+; EG-NEXT: AND_INT * T2.W, PV.X, literal.x,
+; EG-NEXT: -65536(nan), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T1.W, PS, PV.W,
+; EG-NEXT: MOV T16.X, PV.W,
+; EG-NEXT: MOV * T0.X, T17.X,
+; EG-NEXT: LSHR * T1.W, T21.W, literal.x,
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: BCNT_INT T1.W, PV.W,
+; EG-NEXT: AND_INT * T2.W, T0.X, literal.x,
; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
-; EG-NEXT: BCNT_INT T12.X, PS,
-; EG-NEXT: BCNT_INT T0.Z, PV.Z,
-; EG-NEXT: LSHR T1.W, T0.X, literal.x,
-; EG-NEXT: ADD_INT * T2.W, KC0[2].Y, literal.x,
+; EG-NEXT: LSHL * T1.W, PV.W, literal.x,
; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
-; EG-NEXT: LSHR T13.X, PS, literal.x,
-; EG-NEXT: BCNT_INT T0.Y, PV.W,
-; EG-NEXT: AND_INT * T1.W, T0.X, literal.y,
-; EG-NEXT: 2(2.802597e-45), 65535(9.183409e-41)
-; EG-NEXT: BCNT_INT T0.X, PV.W,
-; EG-NEXT: LSHR * T14.X, KC0[2].Y, literal.x,
+; EG-NEXT: OR_INT * T1.W, T2.W, PV.W,
+; EG-NEXT: MOV * T17.X, PV.W,
+; EG-NEXT: MOV T0.X, PV.X,
+; EG-NEXT: AND_INT T1.W, T21.W, literal.x,
+; EG-NEXT: LSHR * T21.X, KC0[2].Y, literal.y,
+; EG-NEXT: 65535(9.183409e-41), 2(2.802597e-45)
+; EG-NEXT: AND_INT T0.Z, PV.X, literal.x,
+; EG-NEXT: BCNT_INT T1.W, PV.W,
+; EG-NEXT: ADD_INT * T2.W, KC0[2].Y, literal.y,
+; EG-NEXT: -65536(nan), 16(2.242078e-44)
+; EG-NEXT: LSHR T22.X, PS, literal.x,
+; EG-NEXT: OR_INT * T20.W, PV.Z, PV.W,
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
+; EG-NEXT: MOV T17.X, PV.W,
+; EG-NEXT: MOV * T0.X, T4.X,
+; EG-NEXT: MOV * T0.Z, T8.X,
+; EG-NEXT: MOV T20.X, T12.X,
+; EG-NEXT: MOV * T20.Z, T16.X, BS:VEC_120/SCL_212
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%in.gep = getelementptr <16 x i16>, ptr addrspace(1) %in, i32 %tid
%val = load <16 x i16>, ptr addrspace(1) %in.gep, align 32
@@ -1292,7 +1512,7 @@ define amdgpu_kernel void @ctpop_i16_in_br(ptr addrspace(1) %out, ptr addrspace(
; SI-NEXT: buffer_store_short v0, off, s[0:3], 0
; SI-NEXT: s_endpgm
; SI-NEXT: .LBB14_4:
-; SI-NEXT: ; implicit-def: $vgpr0
+; SI-NEXT: ; implicit-def: $vgpr0
; SI-NEXT: s_branch .LBB14_2
;
; VI-LABEL: ctpop_i16_in_br:
diff --git a/llvm/test/CodeGen/AMDGPU/kernel-args.ll b/llvm/test/CodeGen/AMDGPU/kernel-args.ll
index bad2e603f22e7..a2da8876472ab 100644
--- a/llvm/test/CodeGen/AMDGPU/kernel-args.ll
+++ b/llvm/test/CodeGen/AMDGPU/kernel-args.ll
@@ -1025,67 +1025,74 @@ define amdgpu_kernel void @v3i16_arg(ptr addrspace(1) nocapture %out, <3 x i16>
;
; EG-LABEL: v3i16_arg:
; EG: ; %bb.0: ; %entry
-; EG-NEXT: ALU 0, @10, KC0[], KC1[]
-; EG-NEXT: TEX 1 @6
-; EG-NEXT: ALU 14, @11, KC0[CB0:0-32], KC1[]
-; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.X, T3.X, 0
-; EG-NEXT: MEM_RAT MSKOR T2.XW, T0.X
+; EG-NEXT: ALU 0, @12, KC0[], KC1[]
+; EG-NEXT: TEX 2 @6
+; EG-NEXT: ALU 19, @13, KC0[CB0:0-32], KC1[]
+; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T6.X, T7.X, 0
+; EG-NEXT: MEM_RAT MSKOR T5.XW, T8.X
; EG-NEXT: CF_END
; EG-NEXT: Fetch clause starting at 6:
-; EG-NEXT: VTX_READ_16 T1.X, T0.X, 44, #3
-; EG-NEXT: VTX_READ_16 T0.X, T0.X, 48, #3
-; EG-NEXT: ALU clause starting at 10:
-; EG-NEXT: MOV * T0.X, 0.0,
-; EG-NEXT: ALU clause starting at 11:
+; EG-NEXT: VTX_READ_16 T6.X, T5.X, 44, #3
+; EG-NEXT: VTX_READ_16 T7.X, T5.X, 46, #3
+; EG-NEXT: VTX_READ_16 T5.X, T5.X, 48, #3
+; EG-NEXT: ALU clause starting at 12:
+; EG-NEXT: MOV * T5.X, 0.0,
+; EG-NEXT: ALU clause starting at 13:
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x,
; EG-NEXT: 4(5.605194e-45), 0(0.000000e+00)
; EG-NEXT: AND_INT T1.W, PV.W, literal.x,
-; EG-NEXT: AND_INT * T2.W, T0.X, literal.y,
+; EG-NEXT: AND_INT * T2.W, T5.X, literal.y,
; EG-NEXT: 3(4.203895e-45), 65535(9.183409e-41)
; EG-NEXT: LSHL * T1.W, PV.W, literal.x,
; EG-NEXT: 3(4.203895e-45), 0(0.000000e+00)
-; EG-NEXT: LSHL T2.X, T2.W, PV.W,
-; EG-NEXT: LSHL * T2.W, literal.x, PV.W,
+; EG-NEXT: LSHL T5.X, T2.W, PV.W,
+; EG-NEXT: LSHL * T5.W, literal.x, PV.W,
; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
-; EG-NEXT: MOV T2.Y, 0.0,
-; EG-NEXT: MOV * T2.Z, 0.0,
-; EG-NEXT: LSHR T0.X, T0.W, literal.x,
-; EG-NEXT: LSHR * T3.X, KC0[2].Y, literal.x,
+; EG-NEXT: MOV T5.Y, 0.0,
+; EG-NEXT: MOV * T5.Z, 0.0,
+; EG-NEXT: LSHR T8.X, T0.W, literal.x,
+; EG-NEXT: LSHL T0.W, T7.X, literal.y,
+; EG-NEXT: AND_INT * T1.W, T6.X, literal.z,
+; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44)
+; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT: OR_INT T6.X, PV.W, PS,
+; EG-NEXT: LSHR * T7.X, KC0[2].Y, literal.x,
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
;
; CM-LABEL: v3i16_arg:
; CM: ; %bb.0: ; %entry
; CM-NEXT: ALU 0, @12, KC0[], KC1[]
-; CM-NEXT: TEX 0 @8
-; CM-NEXT: ALU 13, @13, KC0[CB0:0-32], KC1[]
-; CM-NEXT: MEM_RAT MSKOR T1.XW, T2.X
-; CM-NEXT: ALU 1, @27, KC0[CB0:0-32], KC1[]
-; CM-NEXT: TEX 0 @10
-; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T0.X, T1.X
+; CM-NEXT: TEX 2 @6
+; CM-NEXT: ALU 19, @13, KC0[CB0:0-32], KC1[]
+; CM-NEXT: MEM_RAT MSKOR T5.XW, T8.X
+; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T6.X, T7.X
; CM-NEXT: CF_END
-; CM-NEXT: Fetch clause starting at 8:
-; CM-NEXT: VTX_READ_16 T1.X, T0.X, 48, #3
-; CM-NEXT: Fetch clause starting at 10:
-; CM-NEXT: VTX_READ_16 T0.X, T0.X, 44, #3
+; CM-NEXT: Fetch clause starting at 6:
+; CM-NEXT: VTX_READ_16 T6.X, T5.X, 44, #3
+; CM-NEXT: VTX_READ_16 T7.X, T5.X, 46, #3
+; CM-NEXT: VTX_READ_16 T5.X, T5.X, 48, #3
; CM-NEXT: ALU clause starting at 12:
-; CM-NEXT: MOV * T0.X, 0.0,
+; CM-NEXT: MOV * T5.X, 0.0,
; CM-NEXT: ALU clause starting at 13:
; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x,
; CM-NEXT: 4(5.605194e-45), 0(0.000000e+00)
; CM-NEXT: AND_INT * T1.W, PV.W, literal.x,
; CM-NEXT: 3(4.203895e-45), 0(0.000000e+00)
-; CM-NEXT: AND_INT T0.Z, T1.X, literal.x,
+; CM-NEXT: AND_INT T0.Z, T5.X, literal.x,
; CM-NEXT: LSHL * T1.W, PV.W, literal.y,
; CM-NEXT: 65535(9.183409e-41), 3(4.203895e-45)
-; CM-NEXT: LSHL T1.X, PV.Z, PV.W,
-; CM-NEXT: LSHL * T1.W, literal.x, PV.W,
+; CM-NEXT: LSHL T5.X, PV.Z, PV.W,
+; CM-NEXT: LSHL * T5.W, literal.x, PV.W,
; CM-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
-; CM-NEXT: MOV T1.Y, 0.0,
-; CM-NEXT: MOV * T1.Z, 0.0,
-; CM-NEXT: LSHR * T2.X, T0.W, literal.x,
+; CM-NEXT: MOV T5.Y, 0.0,
+; CM-NEXT: MOV * T5.Z, 0.0,
+; CM-NEXT: LSHL T0.Z, T7.X, literal.x,
+; CM-NEXT: AND_INT * T1.W, T6.X, literal.y, BS:VEC_120/SCL_212
+; CM-NEXT: 16(2.242078e-44), 65535(9.183409e-41)
+; CM-NEXT: OR_INT * T6.X, PV.Z, PV.W,
+; CM-NEXT: LSHR * T7.X, KC0[2].Y, literal.x,
; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
-; CM-NEXT: ALU clause starting at 27:
-; CM-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
+; CM-NEXT: LSHR * T8.X, T0.W, literal.x,
; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
entry:
store <3 x i16> %in, ptr addrspace(1) %out, align 4
@@ -2669,47 +2676,205 @@ define amdgpu_kernel void @v8i16_arg(ptr addrspace(1) %out, <8 x i16> %in) {
;
; EG-LABEL: v8i16_arg:
; EG: ; %bb.0: ; %entry
-; EG-NEXT: ALU 0, @14, KC0[], KC1[]
-; EG-NEXT: TEX 3 @6
-; EG-NEXT: ALU 4, @15, KC0[CB0:0-32], KC1[]
-; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.XYZW, T0.X, 1
+; EG-NEXT: ALU 1, @36, KC0[], KC1[]
+; EG-NEXT: TEX 0 @20
+; EG-NEXT: ALU 5, @38, KC0[], KC1[]
+; EG-NEXT: TEX 0 @22
+; EG-NEXT: ALU 5, @44, KC0[], KC1[]
+; EG-NEXT: TEX 0 @24
+; EG-NEXT: ALU 5, @50, KC0[], KC1[]
+; EG-NEXT: TEX 0 @26
+; EG-NEXT: ALU 5, @56, KC0[], KC1[]
+; EG-NEXT: TEX 0 @28
+; EG-NEXT: ALU 5, @62, KC0[], KC1[]
+; EG-NEXT: TEX 0 @30
+; EG-NEXT: ALU 5, @68, KC0[], KC1[]
+; EG-NEXT: TEX 0 @32
+; EG-NEXT: ALU 5, @74, KC0[], KC1[]
+; EG-NEXT: TEX 0 @34
+; EG-NEXT: ALU 8, @80, KC0[CB0:0-32], KC1[]
+; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T7.XYZW, T8.X, 1
; EG-NEXT: CF_END
; EG-NEXT: PAD
-; EG-NEXT: Fetch clause starting at 6:
-; EG-NEXT: VTX_READ_16 T1.X, T0.X, 52, #3
-; EG-NEXT: VTX_READ_16 T2.X, T0.X, 54, #3
-; EG-NEXT: VTX_READ_16 T3.X, T0.X, 62, #3
-; EG-NEXT: VTX_READ_16 T0.X, T0.X, 60, #3
-; EG-NEXT: ALU clause starting at 14:
-; EG-NEXT: MOV * T0.X, 0.0,
-; EG-NEXT: ALU clause starting at 15:
-; EG-NEXT: MOV T1.Y, T2.X,
-; EG-NEXT: MOV * T1.Z, T0.X, BS:VEC_120/SCL_212
-; EG-NEXT: LSHR T0.X, KC0[2].Y, literal.x,
-; EG-NEXT: MOV * T1.W, T3.X,
-; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
+; EG-NEXT: Fetch clause starting at 20:
+; EG-NEXT: VTX_READ_16 T8.X, T7.X, 66, #3
+; EG-NEXT: Fetch clause starting at 22:
+; EG-NEXT: VTX_READ_16 T8.X, T7.X, 58, #3
+; EG-NEXT: Fetch clause starting at 24:
+; EG-NEXT: VTX_READ_16 T8.X, T7.X, 64, #3
+; EG-NEXT: Fetch clause starting at 26:
+; EG-NEXT: VTX_READ_16 T8.X, T7.X, 56, #3
+; EG-NEXT: Fetch clause starting at 28:
+; EG-NEXT: VTX_READ_16 T8.X, T7.X, 62, #3
+; EG-NEXT: Fetch clause starting at 30:
+; EG-NEXT: VTX_READ_16 T8.X, T7.X, 54, #3
+; EG-NEXT: Fetch clause starting at 32:
+; EG-NEXT: VTX_READ_16 T8.X, T7.X, 60, #3
+; EG-NEXT: Fetch clause starting at 34:
+; EG-NEXT: VTX_READ_16 T7.X, T7.X, 52, #3
+; EG-NEXT: ALU clause starting at 36:
+; EG-NEXT: MOV * T0.Y, T3.X,
+; EG-NEXT: MOV * T7.X, 0.0,
+; EG-NEXT: ALU clause starting at 38:
+; EG-NEXT: LSHL T0.W, T8.X, literal.x,
+; EG-NEXT: AND_INT * T1.W, T0.Y, literal.y,
+; EG-NEXT: 16(2.242078e-44), 65535(9.183409e-41)
+; EG-NEXT: OR_INT * T0.W, PS, PV.W,
+; EG-NEXT: MOV T3.X, PV.W,
+; EG-NEXT: MOV * T0.Y, T5.X,
+; EG-NEXT: ALU clause starting at 44:
+; EG-NEXT: AND_INT T0.W, T0.Y, literal.x,
+; EG-NEXT: LSHL * T1.W, T8.X, literal.y,
+; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
+; EG-NEXT: OR_INT * T0.W, PV.W, PS,
+; EG-NEXT: MOV T5.X, PV.W,
+; EG-NEXT: MOV * T0.Y, T3.X,
+; EG-NEXT: ALU clause starting at 50:
+; EG-NEXT: AND_INT T0.W, T0.Y, literal.x,
+; EG-NEXT: AND_INT * T1.W, T8.X, literal.y,
+; EG-NEXT: -65536(nan), 65535(9.183409e-41)
+; EG-NEXT: OR_INT * T0.W, PV.W, PS,
+; EG-NEXT: MOV T3.X, PV.W,
+; EG-NEXT: MOV * T0.Y, T5.X,
+; EG-NEXT: ALU clause starting at 56:
+; EG-NEXT: AND_INT T0.W, T0.Y, literal.x,
+; EG-NEXT: AND_INT * T1.W, T8.X, literal.y,
+; EG-NEXT: -65536(nan), 65535(9.183409e-41)
+; EG-NEXT: OR_INT * T0.W, PV.W, PS,
+; EG-NEXT: MOV T5.X, PV.W,
+; EG-NEXT: MOV * T0.Y, T2.X,
+; EG-NEXT: ALU clause starting at 62:
+; EG-NEXT: AND_INT T0.W, T0.Y, literal.x,
+; EG-NEXT: LSHL * T1.W, T8.X, literal.y,
+; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
+; EG-NEXT: OR_INT * T0.W, PV.W, PS,
+; EG-NEXT: MOV T2.X, PV.W,
+; EG-NEXT: MOV * T0.Y, T4.X,
+; EG-NEXT: ALU clause starting at 68:
+; EG-NEXT: AND_INT T0.W, T0.Y, literal.x,
+; EG-NEXT: LSHL * T1.W, T8.X, literal.y,
+; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
+; EG-NEXT: OR_INT * T0.W, PV.W, PS,
+; EG-NEXT: MOV T4.X, PV.W,
+; EG-NEXT: MOV * T0.Y, T2.X,
+; EG-NEXT: ALU clause starting at 74:
+; EG-NEXT: AND_INT T0.W, T0.Y, literal.x,
+; EG-NEXT: AND_INT * T1.W, T8.X, literal.y,
+; EG-NEXT: -65536(nan), 65535(9.183409e-41)
+; EG-NEXT: OR_INT * T7.Z, PV.W, PS,
+; EG-NEXT: MOV T2.X, PV.Z,
+; EG-NEXT: MOV * T0.Y, T4.X,
+; EG-NEXT: ALU clause starting at 80:
+; EG-NEXT: LSHR T8.X, KC0[2].Y, literal.x,
+; EG-NEXT: AND_INT T0.W, T0.Y, literal.y,
+; EG-NEXT: AND_INT * T1.W, T7.X, literal.z,
+; EG-NEXT: 2(2.802597e-45), -65536(nan)
+; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T7.X, PV.W, PS,
+; EG-NEXT: MOV T4.X, PV.X,
+; EG-NEXT: MOV * T7.W, T3.X,
+; EG-NEXT: MOV * T7.Y, T5.X,
;
; CM-LABEL: v8i16_arg:
; CM: ; %bb.0: ; %entry
-; CM-NEXT: ALU 0, @14, KC0[], KC1[]
-; CM-NEXT: TEX 3 @6
-; CM-NEXT: ALU 4, @15, KC0[CB0:0-32], KC1[]
-; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T1, T0.X
+; CM-NEXT: ALU 1, @36, KC0[], KC1[]
+; CM-NEXT: TEX 0 @20
+; CM-NEXT: ALU 5, @38, KC0[], KC1[]
+; CM-NEXT: TEX 0 @22
+; CM-NEXT: ALU 5, @44, KC0[], KC1[]
+; CM-NEXT: TEX 0 @24
+; CM-NEXT: ALU 5, @50, KC0[], KC1[]
+; CM-NEXT: TEX 0 @26
+; CM-NEXT: ALU 5, @56, KC0[], KC1[]
+; CM-NEXT: TEX 0 @28
+; CM-NEXT: ALU 5, @62, KC0[], KC1[]
+; CM-NEXT: TEX 0 @30
+; CM-NEXT: ALU 5, @68, KC0[], KC1[]
+; CM-NEXT: TEX 0 @32
+; CM-NEXT: ALU 5, @74, KC0[], KC1[]
+; CM-NEXT: TEX 0 @34
+; CM-NEXT: ALU 8, @80, KC0[CB0:0-32], KC1[]
+; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T7, T8.X
; CM-NEXT: CF_END
; CM-NEXT: PAD
-; CM-NEXT: Fetch clause starting at 6:
-; CM-NEXT: VTX_READ_16 T1.X, T0.X, 52, #3
-; CM-NEXT: VTX_READ_16 T2.X, T0.X, 54, #3
-; CM-NEXT: VTX_READ_16 T3.X, T0.X, 62, #3
-; CM-NEXT: VTX_READ_16 T0.X, T0.X, 60, #3
-; CM-NEXT: ALU clause starting at 14:
-; CM-NEXT: MOV * T0.X, 0.0,
-; CM-NEXT: ALU clause starting at 15:
-; CM-NEXT: MOV T1.Y, T2.X,
-; CM-NEXT: MOV * T1.Z, T0.X, BS:VEC_120/SCL_212
-; CM-NEXT: LSHR T0.X, KC0[2].Y, literal.x,
-; CM-NEXT: MOV * T1.W, T3.X,
-; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
+; CM-NEXT: Fetch clause starting at 20:
+; CM-NEXT: VTX_READ_16 T8.X, T7.X, 66, #3
+; CM-NEXT: Fetch clause starting at 22:
+; CM-NEXT: VTX_READ_16 T8.X, T7.X, 58, #3
+; CM-NEXT: Fetch clause starting at 24:
+; CM-NEXT: VTX_READ_16 T8.X, T7.X, 64, #3
+; CM-NEXT: Fetch clause starting at 26:
+; CM-NEXT: VTX_READ_16 T8.X, T7.X, 56, #3
+; CM-NEXT: Fetch clause starting at 28:
+; CM-NEXT: VTX_READ_16 T8.X, T7.X, 62, #3
+; CM-NEXT: Fetch clause starting at 30:
+; CM-NEXT: VTX_READ_16 T8.X, T7.X, 54, #3
+; CM-NEXT: Fetch clause starting at 32:
+; CM-NEXT: VTX_READ_16 T8.X, T7.X, 60, #3
+; CM-NEXT: Fetch clause starting at 34:
+; CM-NEXT: VTX_READ_16 T7.X, T7.X, 52, #3
+; CM-NEXT: ALU clause starting at 36:
+; CM-NEXT: MOV * T0.Y, T3.X,
+; CM-NEXT: MOV * T7.X, 0.0,
+; CM-NEXT: ALU clause starting at 38:
+; CM-NEXT: LSHL T0.Z, T8.X, literal.x,
+; CM-NEXT: AND_INT * T0.W, T0.Y, literal.y,
+; CM-NEXT: 16(2.242078e-44), 65535(9.183409e-41)
+; CM-NEXT: OR_INT * T0.W, PV.W, PV.Z,
+; CM-NEXT: MOV T3.X, PV.W,
+; CM-NEXT: MOV * T0.Y, T5.X,
+; CM-NEXT: ALU clause starting at 44:
+; CM-NEXT: AND_INT T0.Z, T0.Y, literal.x,
+; CM-NEXT: LSHL * T0.W, T8.X, literal.y,
+; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
+; CM-NEXT: OR_INT * T0.W, PV.Z, PV.W,
+; CM-NEXT: MOV T5.X, PV.W,
+; CM-NEXT: MOV * T0.Y, T3.X,
+; CM-NEXT: ALU clause starting at 50:
+; CM-NEXT: AND_INT T0.Z, T0.Y, literal.x,
+; CM-NEXT: AND_INT * T0.W, T8.X, literal.y,
+; CM-NEXT: -65536(nan), 65535(9.183409e-41)
+; CM-NEXT: OR_INT * T0.W, PV.Z, PV.W,
+; CM-NEXT: MOV T3.X, PV.W,
+; CM-NEXT: MOV * T0.Y, T5.X,
+; CM-NEXT: ALU clause starting at 56:
+; CM-NEXT: AND_INT T0.Z, T0.Y, literal.x,
+; CM-NEXT: AND_INT * T0.W, T8.X, literal.y,
+; CM-NEXT: -65536(nan), 65535(9.183409e-41)
+; CM-NEXT: OR_INT * T0.W, PV.Z, PV.W,
+; CM-NEXT: MOV T5.X, PV.W,
+; CM-NEXT: MOV * T0.Y, T2.X,
+; CM-NEXT: ALU clause starting at 62:
+; CM-NEXT: AND_INT T0.Z, T0.Y, literal.x,
+; CM-NEXT: LSHL * T0.W, T8.X, literal.y,
+; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
+; CM-NEXT: OR_INT * T0.W, PV.Z, PV.W,
+; CM-NEXT: MOV T2.X, PV.W,
+; CM-NEXT: MOV * T0.Y, T4.X,
+; CM-NEXT: ALU clause starting at 68:
+; CM-NEXT: AND_INT T0.Z, T0.Y, literal.x,
+; CM-NEXT: LSHL * T0.W, T8.X, literal.y,
+; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
+; CM-NEXT: OR_INT * T0.W, PV.Z, PV.W,
+; CM-NEXT: MOV T4.X, PV.W,
+; CM-NEXT: MOV * T0.Y, T2.X,
+; CM-NEXT: ALU clause starting at 74:
+; CM-NEXT: AND_INT T0.Z, T0.Y, literal.x,
+; CM-NEXT: AND_INT * T0.W, T8.X, literal.y,
+; CM-NEXT: -65536(nan), 65535(9.183409e-41)
+; CM-NEXT: OR_INT * T7.Z, PV.Z, PV.W,
+; CM-NEXT: MOV T2.X, PV.Z,
+; CM-NEXT: MOV * T0.Y, T4.X,
+; CM-NEXT: ALU clause starting at 80:
+; CM-NEXT: LSHR T8.X, KC0[2].Y, literal.x,
+; CM-NEXT: AND_INT T0.Z, T0.Y, literal.y,
+; CM-NEXT: AND_INT * T0.W, T7.X, literal.z,
+; CM-NEXT: 2(2.802597e-45), -65536(nan)
+; CM-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
+; CM-NEXT: OR_INT * T7.X, PV.Z, PV.W,
+; CM-NEXT: MOV T4.X, PV.X,
+; CM-NEXT: MOV * T7.W, T3.X,
+; CM-NEXT: MOV * T7.Y, T5.X,
entry:
store <8 x i16> %in, ptr addrspace(1) %out
ret void
@@ -3453,68 +3618,392 @@ define amdgpu_kernel void @v16i16_arg(ptr addrspace(1) %out, <16 x i16> %in) {
;
; EG-LABEL: v16i16_arg:
; EG: ; %bb.0: ; %entry
-; EG-NEXT: ALU 0, @22, KC0[], KC1[]
-; EG-NEXT: TEX 7 @6
-; EG-NEXT: ALU 10, @23, KC0[CB0:0-32], KC1[]
-; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.XYZW, T2.X, 0
-; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T7.XYZW, T0.X, 1
+; EG-NEXT: ALU 1, @68, KC0[], KC1[]
+; EG-NEXT: TEX 0 @36
+; EG-NEXT: ALU 5, @70, KC0[], KC1[]
+; EG-NEXT: TEX 0 @38
+; EG-NEXT: ALU 5, @76, KC0[], KC1[]
+; EG-NEXT: TEX 0 @40
+; EG-NEXT: ALU 5, @82, KC0[], KC1[]
+; EG-NEXT: TEX 0 @42
+; EG-NEXT: ALU 5, @88, KC0[], KC1[]
+; EG-NEXT: TEX 0 @44
+; EG-NEXT: ALU 5, @94, KC0[], KC1[]
+; EG-NEXT: TEX 0 @46
+; EG-NEXT: ALU 5, @100, KC0[], KC1[]
+; EG-NEXT: TEX 0 @48
+; EG-NEXT: ALU 5, @106, KC0[], KC1[]
+; EG-NEXT: TEX 0 @50
+; EG-NEXT: ALU 5, @112, KC0[], KC1[]
+; EG-NEXT: TEX 0 @52
+; EG-NEXT: ALU 5, @118, KC0[], KC1[]
+; EG-NEXT: TEX 0 @54
+; EG-NEXT: ALU 5, @124, KC0[], KC1[]
+; EG-NEXT: TEX 0 @56
+; EG-NEXT: ALU 5, @130, KC0[], KC1[]
+; EG-NEXT: TEX 0 @58
+; EG-NEXT: ALU 5, @136, KC0[], KC1[]
+; EG-NEXT: TEX 0 @60
+; EG-NEXT: ALU 5, @142, KC0[], KC1[]
+; EG-NEXT: TEX 0 @62
+; EG-NEXT: ALU 5, @148, KC0[], KC1[]
+; EG-NEXT: TEX 0 @64
+; EG-NEXT: ALU 5, @154, KC0[], KC1[]
+; EG-NEXT: TEX 0 @66
+; EG-NEXT: ALU 13, @160, KC0[CB0:0-32], KC1[]
+; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T12.XYZW, T14.X, 0
+; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T11.XYZW, T13.X, 1
; EG-NEXT: CF_END
-; EG-NEXT: Fetch clause starting at 6:
-; EG-NEXT: VTX_READ_16 T1.X, T0.X, 84, #3
-; EG-NEXT: VTX_READ_16 T2.X, T0.X, 86, #3
-; EG-NEXT: VTX_READ_16 T3.X, T0.X, 94, #3
-; EG-NEXT: VTX_READ_16 T4.X, T0.X, 78, #3
-; EG-NEXT: VTX_READ_16 T5.X, T0.X, 76, #3
-; EG-NEXT: VTX_READ_16 T6.X, T0.X, 92, #3
-; EG-NEXT: VTX_READ_16 T7.X, T0.X, 68, #3
-; EG-NEXT: VTX_READ_16 T0.X, T0.X, 70, #3
-; EG-NEXT: ALU clause starting at 22:
-; EG-NEXT: MOV * T0.X, 0.0,
-; EG-NEXT: ALU clause starting at 23:
-; EG-NEXT: MOV T1.Y, T2.X,
-; EG-NEXT: MOV * T7.Y, T0.X,
-; EG-NEXT: MOV * T1.Z, T6.X,
-; EG-NEXT: LSHR T0.X, KC0[2].Y, literal.x,
-; EG-NEXT: MOV T7.Z, T5.X,
+; EG-NEXT: Fetch clause starting at 36:
+; EG-NEXT: VTX_READ_16 T12.X, T11.X, 98, #3
+; EG-NEXT: Fetch clause starting at 38:
+; EG-NEXT: VTX_READ_16 T12.X, T11.X, 90, #3
+; EG-NEXT: Fetch clause starting at 40:
+; EG-NEXT: VTX_READ_16 T12.X, T11.X, 82, #3
+; EG-NEXT: Fetch clause starting at 42:
+; EG-NEXT: VTX_READ_16 T12.X, T11.X, 74, #3
+; EG-NEXT: Fetch clause starting at 44:
+; EG-NEXT: VTX_READ_16 T12.X, T11.X, 96, #3
+; EG-NEXT: Fetch clause starting at 46:
+; EG-NEXT: VTX_READ_16 T12.X, T11.X, 88, #3
+; EG-NEXT: Fetch clause starting at 48:
+; EG-NEXT: VTX_READ_16 T12.X, T11.X, 80, #3
+; EG-NEXT: Fetch clause starting at 50:
+; EG-NEXT: VTX_READ_16 T12.X, T11.X, 72, #3
+; EG-NEXT: Fetch clause starting at 52:
+; EG-NEXT: VTX_READ_16 T12.X, T11.X, 94, #3
+; EG-NEXT: Fetch clause starting at 54:
+; EG-NEXT: VTX_READ_16 T12.X, T11.X, 86, #3
+; EG-NEXT: Fetch clause starting at 56:
+; EG-NEXT: VTX_READ_16 T12.X, T11.X, 78, #3
+; EG-NEXT: Fetch clause starting at 58:
+; EG-NEXT: VTX_READ_16 T12.X, T11.X, 70, #3
+; EG-NEXT: Fetch clause starting at 60:
+; EG-NEXT: VTX_READ_16 T12.X, T11.X, 92, #3
+; EG-NEXT: Fetch clause starting at 62:
+; EG-NEXT: VTX_READ_16 T12.X, T11.X, 84, #3
+; EG-NEXT: Fetch clause starting at 64:
+; EG-NEXT: VTX_READ_16 T13.X, T11.X, 76, #3
+; EG-NEXT: Fetch clause starting at 66:
+; EG-NEXT: VTX_READ_16 T11.X, T11.X, 68, #3
+; EG-NEXT: ALU clause starting at 68:
+; EG-NEXT: MOV * T0.Y, T3.X,
+; EG-NEXT: MOV * T11.X, 0.0,
+; EG-NEXT: ALU clause starting at 70:
+; EG-NEXT: LSHL T0.W, T12.X, literal.x,
+; EG-NEXT: AND_INT * T1.W, T0.Y, literal.y,
+; EG-NEXT: 16(2.242078e-44), 65535(9.183409e-41)
+; EG-NEXT: OR_INT * T0.W, PS, PV.W,
+; EG-NEXT: MOV T3.X, PV.W,
+; EG-NEXT: MOV * T0.Y, T5.X,
+; EG-NEXT: ALU clause starting at 76:
+; EG-NEXT: AND_INT T0.W, T0.Y, literal.x,
+; EG-NEXT: LSHL * T1.W, T12.X, literal.y,
+; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
+; EG-NEXT: OR_INT * T0.W, PV.W, PS,
+; EG-NEXT: MOV T5.X, PV.W,
+; EG-NEXT: MOV * T0.Y, T7.X,
+; EG-NEXT: ALU clause starting at 82:
+; EG-NEXT: AND_INT T0.W, T0.Y, literal.x,
+; EG-NEXT: LSHL * T1.W, T12.X, literal.y,
+; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
+; EG-NEXT: OR_INT * T0.W, PV.W, PS,
+; EG-NEXT: MOV T7.X, PV.W,
+; EG-NEXT: MOV * T0.Y, T9.X,
+; EG-NEXT: ALU clause starting at 88:
+; EG-NEXT: AND_INT T0.W, T0.Y, literal.x,
+; EG-NEXT: LSHL * T1.W, T12.X, literal.y,
+; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
+; EG-NEXT: OR_INT * T0.W, PV.W, PS,
+; EG-NEXT: MOV T9.X, PV.W,
+; EG-NEXT: MOV * T0.Y, T3.X,
+; EG-NEXT: ALU clause starting at 94:
+; EG-NEXT: AND_INT T0.W, T0.Y, literal.x,
+; EG-NEXT: AND_INT * T1.W, T12.X, literal.y,
+; EG-NEXT: -65536(nan), 65535(9.183409e-41)
+; EG-NEXT: OR_INT * T0.W, PV.W, PS,
+; EG-NEXT: MOV T3.X, PV.W,
+; EG-NEXT: MOV * T0.Y, T5.X,
+; EG-NEXT: ALU clause starting at 100:
+; EG-NEXT: AND_INT T0.W, T0.Y, literal.x,
+; EG-NEXT: AND_INT * T1.W, T12.X, literal.y,
+; EG-NEXT: -65536(nan), 65535(9.183409e-41)
+; EG-NEXT: OR_INT * T0.W, PV.W, PS,
+; EG-NEXT: MOV T5.X, PV.W,
+; EG-NEXT: MOV * T0.Y, T7.X,
+; EG-NEXT: ALU clause starting at 106:
+; EG-NEXT: AND_INT T0.W, T0.Y, literal.x,
+; EG-NEXT: AND_INT * T1.W, T12.X, literal.y,
+; EG-NEXT: -65536(nan), 65535(9.183409e-41)
+; EG-NEXT: OR_INT * T0.W, PV.W, PS,
+; EG-NEXT: MOV T7.X, PV.W,
+; EG-NEXT: MOV * T0.Y, T9.X,
+; EG-NEXT: ALU clause starting at 112:
+; EG-NEXT: AND_INT T0.W, T0.Y, literal.x,
+; EG-NEXT: AND_INT * T1.W, T12.X, literal.y,
+; EG-NEXT: -65536(nan), 65535(9.183409e-41)
+; EG-NEXT: OR_INT * T0.W, PV.W, PS,
+; EG-NEXT: MOV T9.X, PV.W,
+; EG-NEXT: MOV * T0.Y, T2.X,
+; EG-NEXT: ALU clause starting at 118:
+; EG-NEXT: AND_INT T0.W, T0.Y, literal.x,
+; EG-NEXT: LSHL * T1.W, T12.X, literal.y,
+; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
+; EG-NEXT: OR_INT * T0.W, PV.W, PS,
+; EG-NEXT: MOV T2.X, PV.W,
+; EG-NEXT: MOV * T0.Y, T4.X,
+; EG-NEXT: ALU clause starting at 124:
+; EG-NEXT: AND_INT T0.W, T0.Y, literal.x,
+; EG-NEXT: LSHL * T1.W, T12.X, literal.y,
+; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
+; EG-NEXT: OR_INT * T0.W, PV.W, PS,
+; EG-NEXT: MOV T4.X, PV.W,
+; EG-NEXT: MOV * T0.Y, T6.X,
+; EG-NEXT: ALU clause starting at 130:
+; EG-NEXT: AND_INT T0.W, T0.Y, literal.x,
+; EG-NEXT: LSHL * T1.W, T12.X, literal.y,
+; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
+; EG-NEXT: OR_INT * T0.W, PV.W, PS,
+; EG-NEXT: MOV T6.X, PV.W,
+; EG-NEXT: MOV * T0.Y, T8.X,
+; EG-NEXT: ALU clause starting at 136:
+; EG-NEXT: AND_INT T0.W, T0.Y, literal.x,
+; EG-NEXT: LSHL * T1.W, T12.X, literal.y,
+; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
+; EG-NEXT: OR_INT * T0.W, PV.W, PS,
+; EG-NEXT: MOV T8.X, PV.W,
+; EG-NEXT: MOV * T0.Y, T2.X,
+; EG-NEXT: ALU clause starting at 142:
+; EG-NEXT: AND_INT T0.W, T0.Y, literal.x,
+; EG-NEXT: AND_INT * T1.W, T12.X, literal.y,
+; EG-NEXT: -65536(nan), 65535(9.183409e-41)
+; EG-NEXT: OR_INT * T12.Z, PV.W, PS,
+; EG-NEXT: MOV T2.X, PV.Z,
+; EG-NEXT: MOV * T0.Y, T4.X,
+; EG-NEXT: ALU clause starting at 148:
+; EG-NEXT: AND_INT T0.W, T0.Y, literal.x,
+; EG-NEXT: AND_INT * T1.W, T12.X, literal.y,
+; EG-NEXT: -65536(nan), 65535(9.183409e-41)
+; EG-NEXT: OR_INT * T12.X, PV.W, PS,
+; EG-NEXT: MOV T4.X, PV.X,
+; EG-NEXT: MOV * T0.Y, T6.X,
+; EG-NEXT: ALU clause starting at 154:
+; EG-NEXT: AND_INT T0.W, T0.Y, literal.x,
+; EG-NEXT: AND_INT * T1.W, T13.X, literal.y,
+; EG-NEXT: -65536(nan), 65535(9.183409e-41)
+; EG-NEXT: OR_INT * T11.Z, PV.W, PS,
+; EG-NEXT: MOV T6.X, PV.Z,
+; EG-NEXT: MOV * T0.Y, T8.X,
+; EG-NEXT: ALU clause starting at 160:
+; EG-NEXT: LSHR T13.X, KC0[2].Y, literal.x,
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44)
-; EG-NEXT: LSHR T2.X, PV.W, literal.x,
-; EG-NEXT: MOV T7.W, T4.X,
-; EG-NEXT: MOV * T1.W, T3.X,
-; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
+; EG-NEXT: LSHR T14.X, PV.W, literal.x,
+; EG-NEXT: AND_INT T0.W, T0.Y, literal.y,
+; EG-NEXT: AND_INT * T1.W, T11.X, literal.z,
+; EG-NEXT: 2(2.802597e-45), -65536(nan)
+; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T11.X, PV.W, PS,
+; EG-NEXT: MOV T8.X, PV.X,
+; EG-NEXT: MOV * T12.W, T3.X,
+; EG-NEXT: MOV T12.Y, T5.X,
+; EG-NEXT: MOV T11.W, T7.X, BS:VEC_120/SCL_212
+; EG-NEXT: MOV * T11.Y, T9.X,
;
; CM-LABEL: v16i16_arg:
; CM: ; %bb.0: ; %entry
-; CM-NEXT: ALU 0, @22, KC0[], KC1[]
-; CM-NEXT: TEX 7 @6
-; CM-NEXT: ALU 11, @23, KC0[CB0:0-32], KC1[]
-; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T7, T2.X
-; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T1, T0.X
+; CM-NEXT: ALU 1, @68, KC0[], KC1[]
+; CM-NEXT: TEX 0 @36
+; CM-NEXT: ALU 5, @70, KC0[], KC1[]
+; CM-NEXT: TEX 0 @38
+; CM-NEXT: ALU 5, @76, KC0[], KC1[]
+; CM-NEXT: TEX 0 @40
+; CM-NEXT: ALU 5, @82, KC0[], KC1[]
+; CM-NEXT: TEX 0 @42
+; CM-NEXT: ALU 5, @88, KC0[], KC1[]
+; CM-NEXT: TEX 0 @44
+; CM-NEXT: ALU 5, @94, KC0[], KC1[]
+; CM-NEXT: TEX 0 @46
+; CM-NEXT: ALU 5, @100, KC0[], KC1[]
+; CM-NEXT: TEX 0 @48
+; CM-NEXT: ALU 5, @106, KC0[], KC1[]
+; CM-NEXT: TEX 0 @50
+; CM-NEXT: ALU 5, @112, KC0[], KC1[]
+; CM-NEXT: TEX 0 @52
+; CM-NEXT: ALU 5, @118, KC0[], KC1[]
+; CM-NEXT: TEX 0 @54
+; CM-NEXT: ALU 5, @124, KC0[], KC1[]
+; CM-NEXT: TEX 0 @56
+; CM-NEXT: ALU 5, @130, KC0[], KC1[]
+; CM-NEXT: TEX 0 @58
+; CM-NEXT: ALU 5, @136, KC0[], KC1[]
+; CM-NEXT: TEX 0 @60
+; CM-NEXT: ALU 5, @142, KC0[], KC1[]
+; CM-NEXT: TEX 0 @62
+; CM-NEXT: ALU 5, @148, KC0[], KC1[]
+; CM-NEXT: TEX 0 @64
+; CM-NEXT: ALU 5, @154, KC0[], KC1[]
+; CM-NEXT: TEX 0 @66
+; CM-NEXT: ALU 14, @160, KC0[CB0:0-32], KC1[]
+; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T11, T14.X
+; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T12, T13.X
; CM-NEXT: CF_END
-; CM-NEXT: Fetch clause starting at 6:
-; CM-NEXT: VTX_READ_16 T1.X, T0.X, 84, #3
-; CM-NEXT: VTX_READ_16 T2.X, T0.X, 86, #3
-; CM-NEXT: VTX_READ_16 T3.X, T0.X, 78, #3
-; CM-NEXT: VTX_READ_16 T4.X, T0.X, 94, #3
-; CM-NEXT: VTX_READ_16 T5.X, T0.X, 76, #3
-; CM-NEXT: VTX_READ_16 T6.X, T0.X, 92, #3
-; CM-NEXT: VTX_READ_16 T7.X, T0.X, 68, #3
-; CM-NEXT: VTX_READ_16 T0.X, T0.X, 70, #3
-; CM-NEXT: ALU clause starting at 22:
-; CM-NEXT: MOV * T0.X, 0.0,
-; CM-NEXT: ALU clause starting at 23:
-; CM-NEXT: MOV * T1.Y, T2.X,
-; CM-NEXT: MOV T7.Y, T0.X,
-; CM-NEXT: MOV T1.Z, T6.X, BS:VEC_120/SCL_212
+; CM-NEXT: Fetch clause starting at 36:
+; CM-NEXT: VTX_READ_16 T12.X, T11.X, 98, #3
+; CM-NEXT: Fetch clause starting at 38:
+; CM-NEXT: VTX_READ_16 T12.X, T11.X, 90, #3
+; CM-NEXT: Fetch clause starting at 40:
+; CM-NEXT: VTX_READ_16 T12.X, T11.X, 82, #3
+; CM-NEXT: Fetch clause starting at 42:
+; CM-NEXT: VTX_READ_16 T12.X, T11.X, 74, #3
+; CM-NEXT: Fetch clause starting at 44:
+; CM-NEXT: VTX_READ_16 T12.X, T11.X, 96, #3
+; CM-NEXT: Fetch clause starting at 46:
+; CM-NEXT: VTX_READ_16 T12.X, T11.X, 88, #3
+; CM-NEXT: Fetch clause starting at 48:
+; CM-NEXT: VTX_READ_16 T12.X, T11.X, 80, #3
+; CM-NEXT: Fetch clause starting at 50:
+; CM-NEXT: VTX_READ_16 T12.X, T11.X, 72, #3
+; CM-NEXT: Fetch clause starting at 52:
+; CM-NEXT: VTX_READ_16 T12.X, T11.X, 94, #3
+; CM-NEXT: Fetch clause starting at 54:
+; CM-NEXT: VTX_READ_16 T12.X, T11.X, 86, #3
+; CM-NEXT: Fetch clause starting at 56:
+; CM-NEXT: VTX_READ_16 T12.X, T11.X, 78, #3
+; CM-NEXT: Fetch clause starting at 58:
+; CM-NEXT: VTX_READ_16 T12.X, T11.X, 70, #3
+; CM-NEXT: Fetch clause starting at 60:
+; CM-NEXT: VTX_READ_16 T12.X, T11.X, 92, #3
+; CM-NEXT: Fetch clause starting at 62:
+; CM-NEXT: VTX_READ_16 T12.X, T11.X, 84, #3
+; CM-NEXT: Fetch clause starting at 64:
+; CM-NEXT: VTX_READ_16 T13.X, T11.X, 76, #3
+; CM-NEXT: Fetch clause starting at 66:
+; CM-NEXT: VTX_READ_16 T11.X, T11.X, 68, #3
+; CM-NEXT: ALU clause starting at 68:
+; CM-NEXT: MOV * T0.Y, T3.X,
+; CM-NEXT: MOV * T11.X, 0.0,
+; CM-NEXT: ALU clause starting at 70:
+; CM-NEXT: LSHL T0.Z, T12.X, literal.x,
+; CM-NEXT: AND_INT * T0.W, T0.Y, literal.y,
+; CM-NEXT: 16(2.242078e-44), 65535(9.183409e-41)
+; CM-NEXT: OR_INT * T0.W, PV.W, PV.Z,
+; CM-NEXT: MOV T3.X, PV.W,
+; CM-NEXT: MOV * T0.Y, T5.X,
+; CM-NEXT: ALU clause starting at 76:
+; CM-NEXT: AND_INT T0.Z, T0.Y, literal.x,
+; CM-NEXT: LSHL * T0.W, T12.X, literal.y,
+; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
+; CM-NEXT: OR_INT * T0.W, PV.Z, PV.W,
+; CM-NEXT: MOV T5.X, PV.W,
+; CM-NEXT: MOV * T0.Y, T7.X,
+; CM-NEXT: ALU clause starting at 82:
+; CM-NEXT: AND_INT T0.Z, T0.Y, literal.x,
+; CM-NEXT: LSHL * T0.W, T12.X, literal.y,
+; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
+; CM-NEXT: OR_INT * T0.W, PV.Z, PV.W,
+; CM-NEXT: MOV T7.X, PV.W,
+; CM-NEXT: MOV * T0.Y, T9.X,
+; CM-NEXT: ALU clause starting at 88:
+; CM-NEXT: AND_INT T0.Z, T0.Y, literal.x,
+; CM-NEXT: LSHL * T0.W, T12.X, literal.y,
+; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
+; CM-NEXT: OR_INT * T0.W, PV.Z, PV.W,
+; CM-NEXT: MOV T9.X, PV.W,
+; CM-NEXT: MOV * T0.Y, T3.X,
+; CM-NEXT: ALU clause starting at 94:
+; CM-NEXT: AND_INT T0.Z, T0.Y, literal.x,
+; CM-NEXT: AND_INT * T0.W, T12.X, literal.y,
+; CM-NEXT: -65536(nan), 65535(9.183409e-41)
+; CM-NEXT: OR_INT * T0.W, PV.Z, PV.W,
+; CM-NEXT: MOV T3.X, PV.W,
+; CM-NEXT: MOV * T0.Y, T5.X,
+; CM-NEXT: ALU clause starting at 100:
+; CM-NEXT: AND_INT T0.Z, T0.Y, literal.x,
+; CM-NEXT: AND_INT * T0.W, T12.X, literal.y,
+; CM-NEXT: -65536(nan), 65535(9.183409e-41)
+; CM-NEXT: OR_INT * T0.W, PV.Z, PV.W,
+; CM-NEXT: MOV T5.X, PV.W,
+; CM-NEXT: MOV * T0.Y, T7.X,
+; CM-NEXT: ALU clause starting at 106:
+; CM-NEXT: AND_INT T0.Z, T0.Y, literal.x,
+; CM-NEXT: AND_INT * T0.W, T12.X, literal.y,
+; CM-NEXT: -65536(nan), 65535(9.183409e-41)
+; CM-NEXT: OR_INT * T0.W, PV.Z, PV.W,
+; CM-NEXT: MOV T7.X, PV.W,
+; CM-NEXT: MOV * T0.Y, T9.X,
+; CM-NEXT: ALU clause starting at 112:
+; CM-NEXT: AND_INT T0.Z, T0.Y, literal.x,
+; CM-NEXT: AND_INT * T0.W, T12.X, literal.y,
+; CM-NEXT: -65536(nan), 65535(9.183409e-41)
+; CM-NEXT: OR_INT * T0.W, PV.Z, PV.W,
+; CM-NEXT: MOV T9.X, PV.W,
+; CM-NEXT: MOV * T0.Y, T2.X,
+; CM-NEXT: ALU clause starting at 118:
+; CM-NEXT: AND_INT T0.Z, T0.Y, literal.x,
+; CM-NEXT: LSHL * T0.W, T12.X, literal.y,
+; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
+; CM-NEXT: OR_INT * T0.W, PV.Z, PV.W,
+; CM-NEXT: MOV T2.X, PV.W,
+; CM-NEXT: MOV * T0.Y, T4.X,
+; CM-NEXT: ALU clause starting at 124:
+; CM-NEXT: AND_INT T0.Z, T0.Y, literal.x,
+; CM-NEXT: LSHL * T0.W, T12.X, literal.y,
+; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
+; CM-NEXT: OR_INT * T0.W, PV.Z, PV.W,
+; CM-NEXT: MOV T4.X, PV.W,
+; CM-NEXT: MOV * T0.Y, T6.X,
+; CM-NEXT: ALU clause starting at 130:
+; CM-NEXT: AND_INT T0.Z, T0.Y, literal.x,
+; CM-NEXT: LSHL * T0.W, T12.X, literal.y,
+; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
+; CM-NEXT: OR_INT * T0.W, PV.Z, PV.W,
+; CM-NEXT: MOV T6.X, PV.W,
+; CM-NEXT: MOV * T0.Y, T8.X,
+; CM-NEXT: ALU clause starting at 136:
+; CM-NEXT: AND_INT T0.Z, T0.Y, literal.x,
+; CM-NEXT: LSHL * T0.W, T12.X, literal.y,
+; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
+; CM-NEXT: OR_INT * T0.W, PV.Z, PV.W,
+; CM-NEXT: MOV T8.X, PV.W,
+; CM-NEXT: MOV * T0.Y, T2.X,
+; CM-NEXT: ALU clause starting at 142:
+; CM-NEXT: AND_INT T0.Z, T0.Y, literal.x,
+; CM-NEXT: AND_INT * T0.W, T12.X, literal.y,
+; CM-NEXT: -65536(nan), 65535(9.183409e-41)
+; CM-NEXT: OR_INT * T12.Z, PV.Z, PV.W,
+; CM-NEXT: MOV T2.X, PV.Z,
+; CM-NEXT: MOV * T0.Y, T4.X,
+; CM-NEXT: ALU clause starting at 148:
+; CM-NEXT: AND_INT T0.Z, T0.Y, literal.x,
+; CM-NEXT: AND_INT * T0.W, T12.X, literal.y,
+; CM-NEXT: -65536(nan), 65535(9.183409e-41)
+; CM-NEXT: OR_INT * T12.X, PV.Z, PV.W,
+; CM-NEXT: MOV T4.X, PV.X,
+; CM-NEXT: MOV * T0.Y, T6.X,
+; CM-NEXT: ALU clause starting at 154:
+; CM-NEXT: AND_INT T0.Z, T0.Y, literal.x,
+; CM-NEXT: AND_INT * T0.W, T13.X, literal.y,
+; CM-NEXT: -65536(nan), 65535(9.183409e-41)
+; CM-NEXT: OR_INT * T11.Z, PV.Z, PV.W,
+; CM-NEXT: MOV T6.X, PV.Z,
+; CM-NEXT: MOV * T0.Y, T8.X,
+; CM-NEXT: ALU clause starting at 160:
; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x,
; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00)
-; CM-NEXT: LSHR T0.X, PV.W, literal.x,
-; CM-NEXT: MOV T7.Z, T5.X,
-; CM-NEXT: MOV * T1.W, T4.X, BS:VEC_120/SCL_212
-; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
-; CM-NEXT: LSHR T2.X, KC0[2].Y, literal.x,
-; CM-NEXT: MOV * T7.W, T3.X,
+; CM-NEXT: LSHR * T13.X, PV.W, literal.x,
; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
+; CM-NEXT: LSHR T14.X, KC0[2].Y, literal.x,
+; CM-NEXT: AND_INT T0.Z, T0.Y, literal.y,
+; CM-NEXT: AND_INT * T0.W, T11.X, literal.z,
+; CM-NEXT: 2(2.802597e-45), -65536(nan)
+; CM-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
+; CM-NEXT: OR_INT * T11.X, PV.Z, PV.W,
+; CM-NEXT: MOV T8.X, PV.X,
+; CM-NEXT: MOV * T12.W, T3.X,
+; CM-NEXT: MOV T12.Y, T5.X,
+; CM-NEXT: MOV * T11.W, T7.X, BS:VEC_120/SCL_212
+; CM-NEXT: MOV * T11.Y, T9.X,
entry:
store <16 x i16> %in, ptr addrspace(1) %out
ret void
diff --git a/llvm/test/CodeGen/AMDGPU/load-constant-i16.ll b/llvm/test/CodeGen/AMDGPU/load-constant-i16.ll
index 4491c4b766db9..8c8dd83c7a4bf 100644
--- a/llvm/test/CodeGen/AMDGPU/load-constant-i16.ll
+++ b/llvm/test/CodeGen/AMDGPU/load-constant-i16.ll
@@ -232,32 +232,38 @@ define amdgpu_kernel void @constant_load_v3i16(ptr addrspace(1) %out, ptr addrsp
;
; EG-LABEL: constant_load_v3i16:
; EG: ; %bb.0: ; %entry
-; EG-NEXT: ALU 0, @10, KC0[CB0:0-32], KC1[]
-; EG-NEXT: TEX 1 @6
-; EG-NEXT: ALU 14, @11, KC0[CB0:0-32], KC1[]
-; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.X, T3.X, 0
-; EG-NEXT: MEM_RAT MSKOR T2.XW, T0.X
+; EG-NEXT: ALU 0, @12, KC0[CB0:0-32], KC1[]
+; EG-NEXT: TEX 2 @6
+; EG-NEXT: ALU 19, @13, KC0[CB0:0-32], KC1[]
+; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T6.X, T7.X, 0
+; EG-NEXT: MEM_RAT MSKOR T5.XW, T8.X
; EG-NEXT: CF_END
; EG-NEXT: Fetch clause starting at 6:
-; EG-NEXT: VTX_READ_16 T1.X, T0.X, 0, #1
-; EG-NEXT: VTX_READ_16 T0.X, T0.X, 4, #1
-; EG-NEXT: ALU clause starting at 10:
-; EG-NEXT: MOV * T0.X, KC0[2].Z,
-; EG-NEXT: ALU clause starting at 11:
+; EG-NEXT: VTX_READ_16 T6.X, T5.X, 0, #1
+; EG-NEXT: VTX_READ_16 T7.X, T5.X, 2, #1
+; EG-NEXT: VTX_READ_16 T5.X, T5.X, 4, #1
+; EG-NEXT: ALU clause starting at 12:
+; EG-NEXT: MOV * T5.X, KC0[2].Z,
+; EG-NEXT: ALU clause starting at 13:
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x,
; EG-NEXT: 4(5.605194e-45), 0(0.000000e+00)
; EG-NEXT: AND_INT T1.W, PV.W, literal.x,
-; EG-NEXT: AND_INT * T2.W, T0.X, literal.y,
+; EG-NEXT: AND_INT * T2.W, T5.X, literal.y,
; EG-NEXT: 3(4.203895e-45), 65535(9.183409e-41)
; EG-NEXT: LSHL * T1.W, PV.W, literal.x,
; EG-NEXT: 3(4.203895e-45), 0(0.000000e+00)
-; EG-NEXT: LSHL T2.X, T2.W, PV.W,
-; EG-NEXT: LSHL * T2.W, literal.x, PV.W,
+; EG-NEXT: LSHL T5.X, T2.W, PV.W,
+; EG-NEXT: LSHL * T5.W, literal.x, PV.W,
+; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT: MOV T5.Y, 0.0,
+; EG-NEXT: MOV * T5.Z, 0.0,
+; EG-NEXT: LSHR T8.X, T0.W, literal.x,
+; EG-NEXT: LSHL T0.W, T7.X, literal.y,
+; EG-NEXT: AND_INT * T1.W, T6.X, literal.z,
+; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44)
; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
-; EG-NEXT: MOV T2.Y, 0.0,
-; EG-NEXT: MOV * T2.Z, 0.0,
-; EG-NEXT: LSHR T0.X, T0.W, literal.x,
-; EG-NEXT: LSHR * T3.X, KC0[2].Y, literal.x,
+; EG-NEXT: OR_INT T6.X, PV.W, PS,
+; EG-NEXT: LSHR * T7.X, KC0[2].Y, literal.x,
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
;
; GFX12-LABEL: constant_load_v3i16:
diff --git a/llvm/test/CodeGen/AMDGPU/load-constant-i8.ll b/llvm/test/CodeGen/AMDGPU/load-constant-i8.ll
index b39b38a420233..5c4bc95578bb4 100644
--- a/llvm/test/CodeGen/AMDGPU/load-constant-i8.ll
+++ b/llvm/test/CodeGen/AMDGPU/load-constant-i8.ll
@@ -9832,24 +9832,50 @@ define amdgpu_kernel void @constant_zextload_v4i8_to_v4i16(ptr addrspace(1) %out
;
; EG-LABEL: constant_zextload_v4i8_to_v4i16:
; EG: ; %bb.0:
-; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[]
+; EG-NEXT: ALU 1, @8, KC0[CB0:0-32], KC1[]
; EG-NEXT: TEX 0 @6
-; EG-NEXT: ALU 6, @9, KC0[CB0:0-32], KC1[]
-; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T4.XY, T5.X, 1
+; EG-NEXT: ALU 31, @10, KC0[CB0:0-32], KC1[]
+; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T8.XY, T7.X, 1
; EG-NEXT: CF_END
; EG-NEXT: PAD
; EG-NEXT: Fetch clause starting at 6:
-; EG-NEXT: VTX_READ_32 T4.X, T4.X, 0, #1
+; EG-NEXT: VTX_READ_32 T7.X, T7.X, 0, #1
; EG-NEXT: ALU clause starting at 8:
-; EG-NEXT: MOV * T4.X, KC0[2].Z,
-; EG-NEXT: ALU clause starting at 9:
+; EG-NEXT: MOV * T0.Y, T4.X,
+; EG-NEXT: MOV * T7.X, KC0[2].Z,
+; EG-NEXT: ALU clause starting at 10:
+; EG-NEXT: AND_INT T0.W, T7.X, literal.x,
+; EG-NEXT: AND_INT * T1.W, T0.Y, literal.y,
+; EG-NEXT: 255(3.573311e-43), -65536(nan)
+; EG-NEXT: OR_INT * T0.W, PS, PV.W,
+; EG-NEXT: MOV * T4.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: LSHL * T0.W, T7.X, literal.x,
+; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT: AND_INT * T0.W, PV.W, literal.y,
+; EG-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38)
+; EG-NEXT: OR_INT * T0.W, PV.W, PS,
+; EG-NEXT: MOV T4.X, PV.W,
+; EG-NEXT: MOV T0.Y, T5.X,
; EG-NEXT: MOV * T0.W, literal.x,
; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
-; EG-NEXT: BFE_UINT * T4.Y, T4.X, literal.x, PV.W,
+; EG-NEXT: BFE_UINT T0.W, T7.X, literal.x, PV.W,
+; EG-NEXT: AND_INT * T1.W, PV.Y, literal.y,
+; EG-NEXT: 16(2.242078e-44), -65536(nan)
+; EG-NEXT: OR_INT * T0.W, PS, PV.W,
+; EG-NEXT: MOV * T5.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: LSHR * T0.W, T7.X, literal.x,
; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
-; EG-NEXT: AND_INT T4.X, T4.X, literal.x,
-; EG-NEXT: LSHR * T5.X, KC0[2].Y, literal.y,
-; EG-NEXT: 255(3.573311e-43), 2(2.802597e-45)
+; EG-NEXT: AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT: AND_INT * T0.W, PV.W, literal.y,
+; EG-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38)
+; EG-NEXT: LSHR T7.X, KC0[2].Y, literal.x,
+; EG-NEXT: OR_INT * T8.Y, PV.W, PS,
+; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
+; EG-NEXT: MOV T5.X, PV.Y,
+; EG-NEXT: MOV * T8.X, T4.X,
;
; GFX12-LABEL: constant_zextload_v4i8_to_v4i16:
; GFX12: ; %bb.0:
@@ -9951,23 +9977,56 @@ define amdgpu_kernel void @constant_sextload_v4i8_to_v4i16(ptr addrspace(1) %out
;
; EG-LABEL: constant_sextload_v4i8_to_v4i16:
; EG: ; %bb.0:
-; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[]
+; EG-NEXT: ALU 1, @8, KC0[CB0:0-32], KC1[]
; EG-NEXT: TEX 0 @6
-; EG-NEXT: ALU 5, @9, KC0[CB0:0-32], KC1[]
-; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T5.XY, T4.X, 1
+; EG-NEXT: ALU 37, @10, KC0[CB0:0-32], KC1[]
+; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T8.XY, T7.X, 1
; EG-NEXT: CF_END
; EG-NEXT: PAD
; EG-NEXT: Fetch clause starting at 6:
-; EG-NEXT: VTX_READ_32 T4.X, T4.X, 0, #1
+; EG-NEXT: VTX_READ_32 T7.X, T7.X, 0, #1
; EG-NEXT: ALU clause starting at 8:
-; EG-NEXT: MOV * T4.X, KC0[2].Z,
-; EG-NEXT: ALU clause starting at 9:
-; EG-NEXT: BFE_INT T5.X, T4.X, 0.0, literal.x,
-; EG-NEXT: LSHR T0.W, T4.X, literal.x,
-; EG-NEXT: LSHR * T4.X, KC0[2].Y, literal.y,
-; EG-NEXT: 8(1.121039e-44), 2(2.802597e-45)
-; EG-NEXT: BFE_INT * T5.Y, PV.W, 0.0, literal.x,
+; EG-NEXT: MOV * T0.Y, T4.X,
+; EG-NEXT: MOV * T7.X, KC0[2].Z,
+; EG-NEXT: ALU clause starting at 10:
+; EG-NEXT: BFE_INT * T0.W, T7.X, 0.0, literal.x,
; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT T0.W, PV.W, literal.x,
+; EG-NEXT: AND_INT * T1.W, T0.Y, literal.y,
+; EG-NEXT: 65535(9.183409e-41), -65536(nan)
+; EG-NEXT: OR_INT * T0.W, PS, PV.W,
+; EG-NEXT: MOV * T4.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: LSHR * T0.W, T7.X, literal.x,
+; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT: BFE_INT T0.W, PV.W, 0.0, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.Y, literal.y,
+; EG-NEXT: 8(1.121039e-44), 65535(9.183409e-41)
+; EG-NEXT: LSHL * T0.W, PV.W, literal.x,
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT: MOV T4.X, PV.W,
+; EG-NEXT: MOV T0.Y, T5.X,
+; EG-NEXT: LSHR * T0.W, T7.X, literal.x, BS:VEC_120/SCL_212
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: BFE_INT T0.W, PV.W, 0.0, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.Y, literal.y,
+; EG-NEXT: 8(1.121039e-44), -65536(nan)
+; EG-NEXT: AND_INT * T0.W, PV.W, literal.x,
+; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT: MOV * T5.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: ASHR * T0.W, T7.X, literal.x,
+; EG-NEXT: 24(3.363116e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT: LSHL * T0.W, PV.W, literal.y,
+; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
+; EG-NEXT: LSHR T7.X, KC0[2].Y, literal.x,
+; EG-NEXT: OR_INT * T8.Y, PV.W, PS,
+; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
+; EG-NEXT: MOV T5.X, PV.Y,
+; EG-NEXT: MOV * T8.X, T4.X,
;
; GFX12-LABEL: constant_sextload_v4i8_to_v4i16:
; GFX12: ; %bb.0:
@@ -10088,27 +10147,80 @@ define amdgpu_kernel void @constant_zextload_v8i8_to_v8i16(ptr addrspace(1) %out
;
; EG-LABEL: constant_zextload_v8i8_to_v8i16:
; EG: ; %bb.0:
-; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[]
+; EG-NEXT: ALU 1, @8, KC0[CB0:0-32], KC1[]
; EG-NEXT: TEX 0 @6
-; EG-NEXT: ALU 9, @9, KC0[CB0:0-32], KC1[]
-; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T6.XYZW, T5.X, 1
+; EG-NEXT: ALU 61, @10, KC0[CB0:0-32], KC1[]
+; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T12.XYZW, T11.X, 1
; EG-NEXT: CF_END
; EG-NEXT: PAD
; EG-NEXT: Fetch clause starting at 6:
-; EG-NEXT: VTX_READ_64 T5.XY, T5.X, 0, #1
+; EG-NEXT: VTX_READ_64 T11.XY, T11.X, 0, #1
; EG-NEXT: ALU clause starting at 8:
-; EG-NEXT: MOV * T5.X, KC0[2].Z,
-; EG-NEXT: ALU clause starting at 9:
+; EG-NEXT: MOV * T0.Y, T8.X,
+; EG-NEXT: MOV * T11.X, KC0[2].Z,
+; EG-NEXT: ALU clause starting at 10:
+; EG-NEXT: AND_INT T0.W, T11.X, literal.x,
+; EG-NEXT: AND_INT * T1.W, T0.Y, literal.y,
+; EG-NEXT: 255(3.573311e-43), -65536(nan)
+; EG-NEXT: OR_INT * T0.W, PS, PV.W,
+; EG-NEXT: MOV * T8.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: LSHL * T0.W, T11.X, literal.x,
+; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT: AND_INT * T0.W, PV.W, literal.y,
+; EG-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38)
+; EG-NEXT: OR_INT * T0.W, PV.W, PS,
+; EG-NEXT: MOV T8.X, PV.W,
+; EG-NEXT: MOV T0.Y, T9.X,
; EG-NEXT: MOV * T0.W, literal.x,
; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
-; EG-NEXT: BFE_UINT * T6.W, T5.Y, literal.x, PV.W,
+; EG-NEXT: BFE_UINT T1.W, T11.X, literal.x, PV.W,
+; EG-NEXT: AND_INT * T2.W, PV.Y, literal.y,
+; EG-NEXT: 16(2.242078e-44), -65536(nan)
+; EG-NEXT: OR_INT * T1.W, PS, PV.W,
+; EG-NEXT: MOV * T9.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: LSHR * T1.W, T11.X, literal.x,
+; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT T2.W, PV.Y, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.W, literal.y,
+; EG-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38)
+; EG-NEXT: OR_INT * T12.Y, PV.W, PS,
+; EG-NEXT: MOV T9.X, PV.Y,
+; EG-NEXT: MOV * T0.Y, T4.X,
+; EG-NEXT: AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT: AND_INT * T2.W, T11.Y, literal.y,
+; EG-NEXT: -65536(nan), 255(3.573311e-43)
+; EG-NEXT: OR_INT * T1.W, PV.W, PS,
+; EG-NEXT: MOV * T4.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: LSHL * T1.W, T11.Y, literal.x,
; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
-; EG-NEXT: BFE_UINT T6.Y, T5.X, literal.x, T0.W,
-; EG-NEXT: AND_INT * T6.Z, T5.Y, literal.y,
-; EG-NEXT: 8(1.121039e-44), 255(3.573311e-43)
-; EG-NEXT: AND_INT T6.X, T5.X, literal.x,
-; EG-NEXT: LSHR * T5.X, KC0[2].Y, literal.y,
-; EG-NEXT: 255(3.573311e-43), 2(2.802597e-45)
+; EG-NEXT: AND_INT T2.W, PV.Y, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.W, literal.y,
+; EG-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38)
+; EG-NEXT: OR_INT * T1.W, PV.W, PS,
+; EG-NEXT: MOV T4.X, PV.W,
+; EG-NEXT: MOV T0.Y, T5.X,
+; EG-NEXT: BFE_UINT * T0.W, T11.Y, literal.x, T0.W,
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT * T1.W, PV.Y, literal.x,
+; EG-NEXT: -65536(nan), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T0.W, PV.W, T0.W,
+; EG-NEXT: MOV * T5.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: LSHR * T0.W, T11.Y, literal.x,
+; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT: AND_INT * T0.W, PV.W, literal.y,
+; EG-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38)
+; EG-NEXT: LSHR T11.X, KC0[2].Y, literal.x,
+; EG-NEXT: OR_INT * T12.W, PV.W, PS,
+; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
+; EG-NEXT: MOV T5.X, PV.W,
+; EG-NEXT: MOV * T12.X, T8.X,
+; EG-NEXT: MOV * T12.Z, T4.X,
;
; GFX12-LABEL: constant_zextload_v8i8_to_v8i16:
; GFX12: ; %bb.0:
@@ -10255,28 +10367,93 @@ define amdgpu_kernel void @constant_sextload_v8i8_to_v8i16(ptr addrspace(1) %out
;
; EG-LABEL: constant_sextload_v8i8_to_v8i16:
; EG: ; %bb.0:
-; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[]
+; EG-NEXT: ALU 1, @8, KC0[CB0:0-32], KC1[]
; EG-NEXT: TEX 0 @6
-; EG-NEXT: ALU 10, @9, KC0[CB0:0-32], KC1[]
-; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T6.XYZW, T5.X, 1
+; EG-NEXT: ALU 74, @10, KC0[CB0:0-32], KC1[]
+; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T12.XYZW, T11.X, 1
; EG-NEXT: CF_END
; EG-NEXT: PAD
; EG-NEXT: Fetch clause starting at 6:
-; EG-NEXT: VTX_READ_64 T5.XY, T5.X, 0, #1
+; EG-NEXT: VTX_READ_64 T11.XY, T11.X, 0, #1
; EG-NEXT: ALU clause starting at 8:
-; EG-NEXT: MOV * T5.X, KC0[2].Z,
-; EG-NEXT: ALU clause starting at 9:
-; EG-NEXT: BFE_INT * T6.Z, T5.Y, 0.0, literal.x,
+; EG-NEXT: MOV * T0.Y, T8.X,
+; EG-NEXT: MOV * T11.X, KC0[2].Z,
+; EG-NEXT: ALU clause starting at 10:
+; EG-NEXT: BFE_INT * T0.W, T11.X, 0.0, literal.x,
; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
-; EG-NEXT: BFE_INT T6.X, T5.X, 0.0, literal.x,
-; EG-NEXT: LSHR * T0.W, T5.Y, literal.x,
+; EG-NEXT: AND_INT T0.W, PV.W, literal.x,
+; EG-NEXT: AND_INT * T1.W, T0.Y, literal.y,
+; EG-NEXT: 65535(9.183409e-41), -65536(nan)
+; EG-NEXT: OR_INT * T0.W, PS, PV.W,
+; EG-NEXT: MOV * T8.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: LSHR * T0.W, T11.X, literal.x,
; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
-; EG-NEXT: BFE_INT T6.W, PV.W, 0.0, literal.x,
-; EG-NEXT: LSHR * T0.W, T5.X, literal.x,
+; EG-NEXT: BFE_INT T0.W, PV.W, 0.0, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.Y, literal.y,
+; EG-NEXT: 8(1.121039e-44), 65535(9.183409e-41)
+; EG-NEXT: LSHL * T0.W, PV.W, literal.x,
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT: MOV T8.X, PV.W,
+; EG-NEXT: MOV T0.Y, T9.X,
+; EG-NEXT: LSHR * T0.W, T11.X, literal.x, BS:VEC_120/SCL_212
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: BFE_INT T0.W, PV.W, 0.0, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.Y, literal.y,
+; EG-NEXT: 8(1.121039e-44), -65536(nan)
+; EG-NEXT: AND_INT * T0.W, PV.W, literal.x,
+; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT: MOV * T9.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: ASHR * T0.W, T11.X, literal.x,
+; EG-NEXT: 24(3.363116e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT: LSHL * T0.W, PV.W, literal.y,
+; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
+; EG-NEXT: OR_INT * T12.Y, PV.W, PS,
+; EG-NEXT: MOV T9.X, PV.Y,
+; EG-NEXT: MOV T0.Y, T4.X,
+; EG-NEXT: BFE_INT * T0.W, T11.Y, 0.0, literal.x,
; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
-; EG-NEXT: LSHR T5.X, KC0[2].Y, literal.x,
-; EG-NEXT: BFE_INT * T6.Y, PS, 0.0, literal.y,
-; EG-NEXT: 2(2.802597e-45), 8(1.121039e-44)
+; EG-NEXT: AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT: AND_INT * T0.W, PV.W, literal.y,
+; EG-NEXT: -65536(nan), 65535(9.183409e-41)
+; EG-NEXT: OR_INT * T0.W, PV.W, PS,
+; EG-NEXT: MOV * T4.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: LSHR * T0.W, T11.Y, literal.x,
+; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT: BFE_INT T0.W, PV.W, 0.0, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.Y, literal.y,
+; EG-NEXT: 8(1.121039e-44), 65535(9.183409e-41)
+; EG-NEXT: LSHL * T0.W, PV.W, literal.x,
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT: MOV T4.X, PV.W,
+; EG-NEXT: MOV T0.Y, T5.X,
+; EG-NEXT: LSHR * T0.W, T11.Y, literal.x,
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: BFE_INT T0.W, PV.W, 0.0, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.Y, literal.y,
+; EG-NEXT: 8(1.121039e-44), -65536(nan)
+; EG-NEXT: AND_INT * T0.W, PV.W, literal.x,
+; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT: MOV * T5.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: ASHR * T0.W, T11.Y, literal.x,
+; EG-NEXT: 24(3.363116e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT: LSHL * T0.W, PV.W, literal.y,
+; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
+; EG-NEXT: LSHR T11.X, KC0[2].Y, literal.x,
+; EG-NEXT: OR_INT * T12.W, PV.W, PS,
+; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
+; EG-NEXT: MOV T5.X, PV.W,
+; EG-NEXT: MOV * T12.X, T8.X,
+; EG-NEXT: MOV * T12.Z, T4.X,
;
; GFX12-LABEL: constant_sextload_v8i8_to_v8i16:
; GFX12: ; %bb.0:
@@ -10472,37 +10649,146 @@ define amdgpu_kernel void @constant_zextload_v16i8_to_v16i16(ptr addrspace(1) %o
;
; EG-LABEL: constant_zextload_v16i8_to_v16i16:
; EG: ; %bb.0:
-; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[]
-; EG-NEXT: TEX 0 @6
-; EG-NEXT: ALU 19, @9, KC0[CB0:0-32], KC1[]
-; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T9.XYZW, T10.X, 0
-; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T8.XYZW, T7.X, 1
+; EG-NEXT: ALU 1, @10, KC0[CB0:0-32], KC1[]
+; EG-NEXT: TEX 0 @8
+; EG-NEXT: ALU 103, @12, KC0[], KC1[]
+; EG-NEXT: ALU 20, @116, KC0[CB0:0-32], KC1[]
+; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T20.XYZW, T22.X, 0
+; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T19.XYZW, T21.X, 1
; EG-NEXT: CF_END
-; EG-NEXT: Fetch clause starting at 6:
-; EG-NEXT: VTX_READ_128 T7.XYZW, T7.X, 0, #1
-; EG-NEXT: ALU clause starting at 8:
-; EG-NEXT: MOV * T7.X, KC0[2].Z,
-; EG-NEXT: ALU clause starting at 9:
+; EG-NEXT: PAD
+; EG-NEXT: Fetch clause starting at 8:
+; EG-NEXT: VTX_READ_128 T19.XYZW, T19.X, 0, #1
+; EG-NEXT: ALU clause starting at 10:
+; EG-NEXT: MOV * T0.Y, T16.X,
+; EG-NEXT: MOV * T19.X, KC0[2].Z,
+; EG-NEXT: ALU clause starting at 12:
+; EG-NEXT: AND_INT T0.W, T19.X, literal.x,
+; EG-NEXT: AND_INT * T1.W, T0.Y, literal.y,
+; EG-NEXT: 255(3.573311e-43), -65536(nan)
+; EG-NEXT: OR_INT * T0.W, PS, PV.W,
+; EG-NEXT: MOV * T16.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: LSHL * T0.W, T19.X, literal.x,
+; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT: AND_INT * T0.W, PV.W, literal.y,
+; EG-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38)
+; EG-NEXT: OR_INT * T0.W, PV.W, PS,
+; EG-NEXT: MOV T16.X, PV.W,
+; EG-NEXT: MOV T0.Y, T17.X,
; EG-NEXT: MOV * T0.W, literal.x,
; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
-; EG-NEXT: BFE_UINT * T8.W, T7.Y, literal.x, PV.W,
+; EG-NEXT: BFE_UINT T1.W, T19.X, literal.x, PV.W,
+; EG-NEXT: AND_INT * T2.W, PV.Y, literal.y,
+; EG-NEXT: 16(2.242078e-44), -65536(nan)
+; EG-NEXT: OR_INT * T1.W, PS, PV.W,
+; EG-NEXT: MOV * T17.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: LSHR * T1.W, T19.X, literal.x,
; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
-; EG-NEXT: BFE_UINT T8.Y, T7.X, literal.x, T0.W,
-; EG-NEXT: AND_INT T8.Z, T7.Y, literal.y,
-; EG-NEXT: BFE_UINT * T9.W, T7.W, literal.x, T0.W,
-; EG-NEXT: 8(1.121039e-44), 255(3.573311e-43)
-; EG-NEXT: AND_INT T8.X, T7.X, literal.x,
-; EG-NEXT: BFE_UINT T9.Y, T7.Z, literal.y, T0.W,
-; EG-NEXT: LSHR * T7.X, KC0[2].Y, literal.z,
-; EG-NEXT: 255(3.573311e-43), 8(1.121039e-44)
-; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
-; EG-NEXT: AND_INT * T9.Z, T7.W, literal.x,
-; EG-NEXT: 255(3.573311e-43), 0(0.000000e+00)
-; EG-NEXT: AND_INT T9.X, T7.Z, literal.x,
-; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
-; EG-NEXT: 255(3.573311e-43), 16(2.242078e-44)
-; EG-NEXT: LSHR * T10.X, PV.W, literal.x,
+; EG-NEXT: AND_INT T2.W, PV.Y, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.W, literal.y,
+; EG-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38)
+; EG-NEXT: OR_INT * T20.Y, PV.W, PS,
+; EG-NEXT: MOV T17.X, PV.Y,
+; EG-NEXT: MOV * T0.Y, T12.X,
+; EG-NEXT: AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT: AND_INT * T2.W, T19.Y, literal.y,
+; EG-NEXT: -65536(nan), 255(3.573311e-43)
+; EG-NEXT: OR_INT * T1.W, PV.W, PS,
+; EG-NEXT: MOV * T12.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: LSHL * T1.W, T19.Y, literal.x,
+; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT T2.W, PV.Y, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.W, literal.y,
+; EG-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38)
+; EG-NEXT: OR_INT * T1.W, PV.W, PS,
+; EG-NEXT: MOV T12.X, PV.W,
+; EG-NEXT: MOV T0.Y, T13.X,
+; EG-NEXT: BFE_UINT * T1.W, T19.Y, literal.x, T0.W,
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT * T2.W, PV.Y, literal.x,
+; EG-NEXT: -65536(nan), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T1.W, PV.W, T1.W,
+; EG-NEXT: MOV * T13.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: LSHR * T1.W, T19.Y, literal.x,
+; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT T2.W, PV.Y, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.W, literal.y,
+; EG-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38)
+; EG-NEXT: OR_INT * T20.W, PV.W, PS,
+; EG-NEXT: MOV T13.X, PV.W,
+; EG-NEXT: MOV * T0.Y, T8.X,
+; EG-NEXT: AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT: AND_INT * T2.W, T19.Z, literal.y,
+; EG-NEXT: -65536(nan), 255(3.573311e-43)
+; EG-NEXT: OR_INT * T1.W, PV.W, PS,
+; EG-NEXT: MOV * T8.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: LSHL * T1.W, T19.Z, literal.x,
+; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT T2.W, PV.Y, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.W, literal.y,
+; EG-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38)
+; EG-NEXT: OR_INT * T1.W, PV.W, PS,
+; EG-NEXT: MOV T8.X, PV.W,
+; EG-NEXT: MOV T0.Y, T9.X,
+; EG-NEXT: BFE_UINT * T1.W, T19.Z, literal.x, T0.W,
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT * T2.W, PV.Y, literal.x,
+; EG-NEXT: -65536(nan), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T1.W, PV.W, T1.W,
+; EG-NEXT: MOV * T9.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: LSHR * T1.W, T19.Z, literal.x,
+; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT T2.W, PV.Y, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.W, literal.y,
+; EG-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38)
+; EG-NEXT: OR_INT * T19.Y, PV.W, PS,
+; EG-NEXT: MOV T9.X, PV.Y,
+; EG-NEXT: MOV * T0.Y, T4.X,
+; EG-NEXT: AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT: AND_INT * T2.W, T19.W, literal.y,
+; EG-NEXT: -65536(nan), 255(3.573311e-43)
+; EG-NEXT: OR_INT * T1.W, PV.W, PS,
+; EG-NEXT: MOV * T4.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: LSHL * T1.W, T19.W, literal.x,
+; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT T2.W, PV.Y, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.W, literal.y,
+; EG-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38)
+; EG-NEXT: OR_INT * T1.W, PV.W, PS,
+; EG-NEXT: MOV T4.X, PV.W,
+; EG-NEXT: MOV T0.Y, T5.X,
+; EG-NEXT: BFE_UINT * T0.W, T19.W, literal.x, T0.W,
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: ALU clause starting at 116:
+; EG-NEXT: AND_INT * T1.W, T0.Y, literal.x,
+; EG-NEXT: -65536(nan), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T0.W, PV.W, T0.W,
+; EG-NEXT: MOV * T5.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: LSHR T0.W, T19.W, literal.x,
+; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.y,
+; EG-NEXT: 8(1.121039e-44), 16(2.242078e-44)
+; EG-NEXT: LSHR T21.X, PS, literal.x,
+; EG-NEXT: AND_INT T1.W, PV.Y, literal.y,
+; EG-NEXT: AND_INT * T0.W, PV.W, literal.z,
+; EG-NEXT: 2(2.802597e-45), 65535(9.183409e-41)
+; EG-NEXT: 16711680(2.341805e-38), 0(0.000000e+00)
+; EG-NEXT: LSHR T22.X, KC0[2].Y, literal.x,
+; EG-NEXT: OR_INT * T19.W, PV.W, PS,
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
+; EG-NEXT: MOV T5.X, PV.W,
+; EG-NEXT: MOV * T20.X, T16.X,
+; EG-NEXT: MOV * T20.Z, T12.X,
+; EG-NEXT: MOV T19.X, T8.X,
+; EG-NEXT: MOV * T19.Z, T4.X, BS:VEC_120/SCL_212
;
; GFX12-LABEL: constant_zextload_v16i8_to_v16i16:
; GFX12: ; %bb.0:
@@ -10753,38 +11039,173 @@ define amdgpu_kernel void @constant_sextload_v16i8_to_v16i16(ptr addrspace(1) %o
;
; EG-LABEL: constant_sextload_v16i8_to_v16i16:
; EG: ; %bb.0:
-; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[]
-; EG-NEXT: TEX 0 @6
-; EG-NEXT: ALU 20, @9, KC0[CB0:0-32], KC1[]
-; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T9.XYZW, T10.X, 0
-; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T8.XYZW, T7.X, 1
+; EG-NEXT: ALU 1, @10, KC0[CB0:0-32], KC1[]
+; EG-NEXT: TEX 0 @8
+; EG-NEXT: ALU 104, @12, KC0[], KC1[]
+; EG-NEXT: ALU 46, @117, KC0[CB0:0-32], KC1[]
+; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T20.XYZW, T22.X, 0
+; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T19.XYZW, T21.X, 1
; EG-NEXT: CF_END
-; EG-NEXT: Fetch clause starting at 6:
-; EG-NEXT: VTX_READ_128 T7.XYZW, T7.X, 0, #1
-; EG-NEXT: ALU clause starting at 8:
-; EG-NEXT: MOV * T7.X, KC0[2].Z,
-; EG-NEXT: ALU clause starting at 9:
-; EG-NEXT: BFE_INT * T8.Z, T7.Y, 0.0, literal.x,
+; EG-NEXT: PAD
+; EG-NEXT: Fetch clause starting at 8:
+; EG-NEXT: VTX_READ_128 T19.XYZW, T19.X, 0, #1
+; EG-NEXT: ALU clause starting at 10:
+; EG-NEXT: MOV * T0.Y, T16.X,
+; EG-NEXT: MOV * T19.X, KC0[2].Z,
+; EG-NEXT: ALU clause starting at 12:
+; EG-NEXT: BFE_INT * T0.W, T19.X, 0.0, literal.x,
+; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT T0.W, PV.W, literal.x,
+; EG-NEXT: AND_INT * T1.W, T0.Y, literal.y,
+; EG-NEXT: 65535(9.183409e-41), -65536(nan)
+; EG-NEXT: OR_INT * T0.W, PS, PV.W,
+; EG-NEXT: MOV * T16.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: LSHR * T0.W, T19.X, literal.x,
; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
-; EG-NEXT: BFE_INT T8.X, T7.X, 0.0, literal.x,
-; EG-NEXT: BFE_INT T9.Z, T7.W, 0.0, literal.x,
-; EG-NEXT: LSHR * T0.W, T7.Y, literal.x,
+; EG-NEXT: BFE_INT T0.W, PV.W, 0.0, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.Y, literal.y,
+; EG-NEXT: 8(1.121039e-44), 65535(9.183409e-41)
+; EG-NEXT: LSHL * T0.W, PV.W, literal.x,
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT: MOV T16.X, PV.W,
+; EG-NEXT: MOV T0.Y, T17.X,
+; EG-NEXT: LSHR * T0.W, T19.X, literal.x, BS:VEC_120/SCL_212
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: BFE_INT T0.W, PV.W, 0.0, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.Y, literal.y,
+; EG-NEXT: 8(1.121039e-44), -65536(nan)
+; EG-NEXT: AND_INT * T0.W, PV.W, literal.x,
+; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT: MOV * T17.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: ASHR * T0.W, T19.X, literal.x,
+; EG-NEXT: 24(3.363116e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT: LSHL * T0.W, PV.W, literal.y,
+; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
+; EG-NEXT: OR_INT * T20.Y, PV.W, PS,
+; EG-NEXT: MOV T17.X, PV.Y,
+; EG-NEXT: MOV T0.Y, T12.X,
+; EG-NEXT: BFE_INT * T0.W, T19.Y, 0.0, literal.x,
; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
-; EG-NEXT: BFE_INT T9.X, T7.Z, 0.0, literal.x,
-; EG-NEXT: LSHR T0.Z, T7.W, literal.x,
-; EG-NEXT: BFE_INT T8.W, PV.W, 0.0, literal.x,
-; EG-NEXT: LSHR * T0.W, T7.X, literal.x,
+; EG-NEXT: AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT: AND_INT * T0.W, PV.W, literal.y,
+; EG-NEXT: -65536(nan), 65535(9.183409e-41)
+; EG-NEXT: OR_INT * T0.W, PV.W, PS,
+; EG-NEXT: MOV * T12.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: LSHR * T0.W, T19.Y, literal.x,
; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
-; EG-NEXT: LSHR T7.X, KC0[2].Y, literal.x,
-; EG-NEXT: BFE_INT T8.Y, PS, 0.0, literal.y,
-; EG-NEXT: LSHR T1.Z, T7.Z, literal.y,
-; EG-NEXT: BFE_INT T9.W, PV.Z, 0.0, literal.y,
-; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.z,
-; EG-NEXT: 2(2.802597e-45), 8(1.121039e-44)
+; EG-NEXT: BFE_INT T0.W, PV.W, 0.0, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.Y, literal.y,
+; EG-NEXT: 8(1.121039e-44), 65535(9.183409e-41)
+; EG-NEXT: LSHL * T0.W, PV.W, literal.x,
; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
-; EG-NEXT: LSHR T10.X, PS, literal.x,
-; EG-NEXT: BFE_INT * T9.Y, PV.Z, 0.0, literal.y,
-; EG-NEXT: 2(2.802597e-45), 8(1.121039e-44)
+; EG-NEXT: OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT: MOV T12.X, PV.W,
+; EG-NEXT: MOV T0.Y, T13.X,
+; EG-NEXT: LSHR * T0.W, T19.Y, literal.x,
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: BFE_INT T0.W, PV.W, 0.0, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.Y, literal.y,
+; EG-NEXT: 8(1.121039e-44), -65536(nan)
+; EG-NEXT: AND_INT * T0.W, PV.W, literal.x,
+; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT: MOV * T13.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: ASHR * T0.W, T19.Y, literal.x,
+; EG-NEXT: 24(3.363116e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT: LSHL * T0.W, PV.W, literal.y,
+; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
+; EG-NEXT: OR_INT * T20.W, PV.W, PS,
+; EG-NEXT: MOV T13.X, PV.W,
+; EG-NEXT: MOV T0.Y, T8.X,
+; EG-NEXT: BFE_INT * T0.W, T19.Z, 0.0, literal.x,
+; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT: AND_INT * T0.W, PV.W, literal.y,
+; EG-NEXT: -65536(nan), 65535(9.183409e-41)
+; EG-NEXT: OR_INT * T0.W, PV.W, PS,
+; EG-NEXT: MOV * T8.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: LSHR * T0.W, T19.Z, literal.x,
+; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT: BFE_INT T0.W, PV.W, 0.0, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.Y, literal.y,
+; EG-NEXT: 8(1.121039e-44), 65535(9.183409e-41)
+; EG-NEXT: LSHL * T0.W, PV.W, literal.x,
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT: MOV T8.X, PV.W,
+; EG-NEXT: MOV T0.Y, T9.X,
+; EG-NEXT: LSHR * T0.W, T19.Z, literal.x,
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: BFE_INT T0.W, PV.W, 0.0, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.Y, literal.y,
+; EG-NEXT: 8(1.121039e-44), -65536(nan)
+; EG-NEXT: AND_INT * T0.W, PV.W, literal.x,
+; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT: MOV * T9.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: ASHR * T0.W, T19.Z, literal.x,
+; EG-NEXT: 24(3.363116e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT: LSHL * T0.W, PV.W, literal.y,
+; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
+; EG-NEXT: ALU clause starting at 117:
+; EG-NEXT: OR_INT * T19.Y, T1.W, T0.W,
+; EG-NEXT: MOV T9.X, PV.Y,
+; EG-NEXT: MOV T0.Y, T4.X,
+; EG-NEXT: BFE_INT * T0.W, T19.W, 0.0, literal.x,
+; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT: AND_INT * T0.W, PV.W, literal.y,
+; EG-NEXT: -65536(nan), 65535(9.183409e-41)
+; EG-NEXT: OR_INT * T0.W, PV.W, PS,
+; EG-NEXT: MOV * T4.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: LSHR * T0.W, T19.W, literal.x,
+; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT: BFE_INT T0.W, PV.W, 0.0, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.Y, literal.y,
+; EG-NEXT: 8(1.121039e-44), 65535(9.183409e-41)
+; EG-NEXT: LSHL * T0.W, PV.W, literal.x,
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT: MOV T4.X, PV.W,
+; EG-NEXT: MOV T0.Y, T5.X,
+; EG-NEXT: LSHR * T0.W, T19.W, literal.x,
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: BFE_INT T0.W, PV.W, 0.0, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.Y, literal.y,
+; EG-NEXT: 8(1.121039e-44), -65536(nan)
+; EG-NEXT: AND_INT * T0.W, PV.W, literal.x,
+; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT: MOV * T5.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: ASHR T0.W, T19.W, literal.x,
+; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.y,
+; EG-NEXT: 24(3.363116e-44), 16(2.242078e-44)
+; EG-NEXT: LSHR T21.X, PS, literal.x,
+; EG-NEXT: AND_INT T1.W, PV.Y, literal.y,
+; EG-NEXT: LSHL * T0.W, PV.W, literal.z,
+; EG-NEXT: 2(2.802597e-45), 65535(9.183409e-41)
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: LSHR T22.X, KC0[2].Y, literal.x,
+; EG-NEXT: OR_INT * T19.W, PV.W, PS,
+; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
+; EG-NEXT: MOV T5.X, PV.W,
+; EG-NEXT: MOV * T20.X, T16.X,
+; EG-NEXT: MOV * T20.Z, T12.X,
+; EG-NEXT: MOV T19.X, T8.X,
+; EG-NEXT: MOV * T19.Z, T4.X, BS:VEC_120/SCL_212
;
; GFX12-LABEL: constant_sextload_v16i8_to_v16i16:
; GFX12: ; %bb.0:
@@ -11132,58 +11553,276 @@ define amdgpu_kernel void @constant_zextload_v32i8_to_v32i16(ptr addrspace(1) %o
;
; EG-LABEL: constant_zextload_v32i8_to_v32i16:
; EG: ; %bb.0:
-; EG-NEXT: ALU 0, @12, KC0[CB0:0-32], KC1[]
-; EG-NEXT: TEX 1 @8
-; EG-NEXT: ALU 37, @13, KC0[CB0:0-32], KC1[]
-; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T17.XYZW, T18.X, 0
-; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T15.XYZW, T12.X, 0
-; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T14.XYZW, T16.X, 0
-; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T13.XYZW, T11.X, 1
+; EG-NEXT: ALU 1, @14, KC0[CB0:0-32], KC1[]
+; EG-NEXT: TEX 1 @10
+; EG-NEXT: ALU 103, @16, KC0[], KC1[]
+; EG-NEXT: ALU 104, @120, KC0[], KC1[]
+; EG-NEXT: ALU 41, @225, KC0[CB0:0-32], KC1[]
+; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T36.XYZW, T42.X, 0
+; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T37.XYZW, T41.X, 0
+; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T38.XYZW, T40.X, 0
+; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T35.XYZW, T39.X, 1
; EG-NEXT: CF_END
-; EG-NEXT: Fetch clause starting at 8:
-; EG-NEXT: VTX_READ_128 T12.XYZW, T11.X, 16, #1
-; EG-NEXT: VTX_READ_128 T11.XYZW, T11.X, 0, #1
-; EG-NEXT: ALU clause starting at 12:
-; EG-NEXT: MOV * T11.X, KC0[2].Z,
-; EG-NEXT: ALU clause starting at 13:
+; EG-NEXT: Fetch clause starting at 10:
+; EG-NEXT: VTX_READ_128 T37.XYZW, T35.X, 16, #1
+; EG-NEXT: VTX_READ_128 T35.XYZW, T35.X, 0, #1
+; EG-NEXT: ALU clause starting at 14:
+; EG-NEXT: MOV * T0.Y, T16.X,
+; EG-NEXT: MOV * T35.X, KC0[2].Z,
+; EG-NEXT: ALU clause starting at 16:
+; EG-NEXT: AND_INT T0.W, T37.X, literal.x,
+; EG-NEXT: AND_INT * T1.W, T0.Y, literal.y,
+; EG-NEXT: 255(3.573311e-43), -65536(nan)
+; EG-NEXT: OR_INT * T0.W, PS, PV.W,
+; EG-NEXT: MOV * T16.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: LSHL * T0.W, T37.X, literal.x,
+; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT: AND_INT * T0.W, PV.W, literal.y,
+; EG-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38)
+; EG-NEXT: OR_INT * T0.W, PV.W, PS,
+; EG-NEXT: MOV T16.X, PV.W,
+; EG-NEXT: MOV T0.Y, T17.X,
; EG-NEXT: MOV * T0.W, literal.x,
; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
-; EG-NEXT: BFE_UINT * T13.W, T11.Y, literal.x, PV.W,
+; EG-NEXT: BFE_UINT T1.W, T37.X, literal.x, PV.W,
+; EG-NEXT: AND_INT * T2.W, PV.Y, literal.y,
+; EG-NEXT: 16(2.242078e-44), -65536(nan)
+; EG-NEXT: OR_INT * T1.W, PS, PV.W,
+; EG-NEXT: MOV * T17.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: LSHR * T1.W, T37.X, literal.x,
; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
-; EG-NEXT: BFE_UINT T13.Y, T11.X, literal.x, T0.W,
-; EG-NEXT: AND_INT T13.Z, T11.Y, literal.y,
-; EG-NEXT: BFE_UINT * T14.W, T11.W, literal.x, T0.W,
-; EG-NEXT: 8(1.121039e-44), 255(3.573311e-43)
-; EG-NEXT: AND_INT T13.X, T11.X, literal.x,
-; EG-NEXT: BFE_UINT T14.Y, T11.Z, literal.y, T0.W,
-; EG-NEXT: LSHR * T11.X, KC0[2].Y, literal.z,
-; EG-NEXT: 255(3.573311e-43), 8(1.121039e-44)
-; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
-; EG-NEXT: AND_INT T14.Z, T11.W, literal.x,
-; EG-NEXT: BFE_UINT * T15.W, T12.Y, literal.y, T0.W,
-; EG-NEXT: 255(3.573311e-43), 8(1.121039e-44)
-; EG-NEXT: AND_INT T14.X, T11.Z, literal.x,
-; EG-NEXT: BFE_UINT T15.Y, T12.X, literal.y, T0.W,
-; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.z,
-; EG-NEXT: 255(3.573311e-43), 8(1.121039e-44)
+; EG-NEXT: AND_INT T2.W, PV.Y, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.W, literal.y,
+; EG-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38)
+; EG-NEXT: OR_INT * T36.Y, PV.W, PS,
+; EG-NEXT: MOV T17.X, PV.Y,
+; EG-NEXT: MOV * T0.Y, T12.X,
+; EG-NEXT: AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT: AND_INT * T2.W, T37.Y, literal.y,
+; EG-NEXT: -65536(nan), 255(3.573311e-43)
+; EG-NEXT: OR_INT * T1.W, PV.W, PS,
+; EG-NEXT: MOV * T12.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: LSHL * T1.W, T37.Y, literal.x,
+; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT T2.W, PV.Y, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.W, literal.y,
+; EG-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38)
+; EG-NEXT: OR_INT * T1.W, PV.W, PS,
+; EG-NEXT: MOV T12.X, PV.W,
+; EG-NEXT: MOV T0.Y, T13.X,
+; EG-NEXT: BFE_UINT * T1.W, T37.Y, literal.x, T0.W,
; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
-; EG-NEXT: LSHR T16.X, PV.W, literal.x,
-; EG-NEXT: AND_INT T15.Z, T12.Y, literal.y,
-; EG-NEXT: BFE_UINT T17.W, T12.W, literal.z, T0.W,
-; EG-NEXT: AND_INT * T15.X, T12.X, literal.y,
-; EG-NEXT: 2(2.802597e-45), 255(3.573311e-43)
+; EG-NEXT: AND_INT * T2.W, PV.Y, literal.x,
+; EG-NEXT: -65536(nan), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T1.W, PV.W, T1.W,
+; EG-NEXT: MOV * T13.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: LSHR * T1.W, T37.Y, literal.x,
; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
-; EG-NEXT: BFE_UINT T17.Y, T12.Z, literal.x, T0.W,
-; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
-; EG-NEXT: 8(1.121039e-44), 32(4.484155e-44)
-; EG-NEXT: LSHR T12.X, PV.W, literal.x,
-; EG-NEXT: AND_INT T17.Z, T12.W, literal.y,
-; EG-NEXT: AND_INT * T17.X, T12.Z, literal.y,
-; EG-NEXT: 2(2.802597e-45), 255(3.573311e-43)
+; EG-NEXT: AND_INT T2.W, PV.Y, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.W, literal.y,
+; EG-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38)
+; EG-NEXT: OR_INT * T36.W, PV.W, PS,
+; EG-NEXT: MOV T13.X, PV.W,
+; EG-NEXT: MOV * T0.Y, T8.X,
+; EG-NEXT: AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT: AND_INT * T2.W, T37.Z, literal.y,
+; EG-NEXT: -65536(nan), 255(3.573311e-43)
+; EG-NEXT: OR_INT * T1.W, PV.W, PS,
+; EG-NEXT: MOV * T8.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: LSHL * T1.W, T37.Z, literal.x,
+; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT T2.W, PV.Y, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.W, literal.y,
+; EG-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38)
+; EG-NEXT: OR_INT * T1.W, PV.W, PS,
+; EG-NEXT: MOV T8.X, PV.W,
+; EG-NEXT: MOV T0.Y, T9.X,
+; EG-NEXT: BFE_UINT * T1.W, T37.Z, literal.x, T0.W,
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT * T2.W, PV.Y, literal.x,
+; EG-NEXT: -65536(nan), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T1.W, PV.W, T1.W,
+; EG-NEXT: MOV * T9.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: LSHR * T1.W, T37.Z, literal.x,
+; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT T2.W, PV.Y, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.W, literal.y,
+; EG-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38)
+; EG-NEXT: OR_INT * T37.Y, PV.W, PS,
+; EG-NEXT: MOV T9.X, PV.Y,
+; EG-NEXT: MOV * T0.Y, T4.X,
+; EG-NEXT: AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT: AND_INT * T2.W, T37.W, literal.y,
+; EG-NEXT: -65536(nan), 255(3.573311e-43)
+; EG-NEXT: OR_INT * T1.W, PV.W, PS,
+; EG-NEXT: MOV * T4.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: LSHL * T1.W, T37.W, literal.x,
+; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT T2.W, PV.Y, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.W, literal.y,
+; EG-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38)
+; EG-NEXT: OR_INT * T1.W, PV.W, PS,
+; EG-NEXT: MOV T4.X, PV.W,
+; EG-NEXT: MOV T0.Y, T5.X,
+; EG-NEXT: BFE_UINT * T1.W, T37.W, literal.x, T0.W,
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: ALU clause starting at 120:
+; EG-NEXT: AND_INT * T2.W, T0.Y, literal.x,
+; EG-NEXT: -65536(nan), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T1.W, PV.W, T1.W,
+; EG-NEXT: MOV * T5.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: LSHR * T1.W, T37.W, literal.x,
+; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT T2.W, PV.Y, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.W, literal.y,
+; EG-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38)
+; EG-NEXT: OR_INT * T37.W, PV.W, PS,
+; EG-NEXT: MOV T5.X, PV.W,
+; EG-NEXT: MOV * T0.Y, T32.X,
+; EG-NEXT: AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT: AND_INT * T2.W, T35.X, literal.y,
+; EG-NEXT: -65536(nan), 255(3.573311e-43)
+; EG-NEXT: OR_INT * T1.W, PV.W, PS,
+; EG-NEXT: MOV * T32.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: LSHL * T1.W, T35.X, literal.x,
+; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT T2.W, PV.Y, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.W, literal.y,
+; EG-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38)
+; EG-NEXT: OR_INT * T1.W, PV.W, PS,
+; EG-NEXT: MOV T32.X, PV.W,
+; EG-NEXT: MOV T0.Y, T33.X,
+; EG-NEXT: BFE_UINT * T1.W, T35.X, literal.x, T0.W, BS:VEC_120/SCL_212
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT * T2.W, PV.Y, literal.x,
+; EG-NEXT: -65536(nan), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T1.W, PV.W, T1.W,
+; EG-NEXT: MOV * T33.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: LSHR * T1.W, T35.X, literal.x,
+; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT T2.W, PV.Y, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.W, literal.y,
+; EG-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38)
+; EG-NEXT: OR_INT * T38.Y, PV.W, PS,
+; EG-NEXT: MOV T33.X, PV.Y,
+; EG-NEXT: MOV * T0.Y, T28.X,
+; EG-NEXT: AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT: AND_INT * T2.W, T35.Y, literal.y,
+; EG-NEXT: -65536(nan), 255(3.573311e-43)
+; EG-NEXT: OR_INT * T1.W, PV.W, PS,
+; EG-NEXT: MOV * T28.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: LSHL * T1.W, T35.Y, literal.x,
+; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT T2.W, PV.Y, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.W, literal.y,
+; EG-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38)
+; EG-NEXT: OR_INT * T1.W, PV.W, PS,
+; EG-NEXT: MOV T28.X, PV.W,
+; EG-NEXT: MOV T0.Y, T29.X,
+; EG-NEXT: BFE_UINT * T1.W, T35.Y, literal.x, T0.W,
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT * T2.W, PV.Y, literal.x,
+; EG-NEXT: -65536(nan), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T1.W, PV.W, T1.W,
+; EG-NEXT: MOV * T29.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: LSHR * T1.W, T35.Y, literal.x,
+; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT T2.W, PV.Y, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.W, literal.y,
+; EG-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38)
+; EG-NEXT: OR_INT * T38.W, PV.W, PS,
+; EG-NEXT: MOV T29.X, PV.W,
+; EG-NEXT: MOV * T0.Y, T24.X,
+; EG-NEXT: AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT: AND_INT * T2.W, T35.Z, literal.y,
+; EG-NEXT: -65536(nan), 255(3.573311e-43)
+; EG-NEXT: OR_INT * T1.W, PV.W, PS,
+; EG-NEXT: MOV * T24.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: LSHL * T1.W, T35.Z, literal.x,
+; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT T2.W, PV.Y, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.W, literal.y,
+; EG-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38)
+; EG-NEXT: OR_INT * T1.W, PV.W, PS,
+; EG-NEXT: MOV T24.X, PV.W,
+; EG-NEXT: MOV T0.Y, T25.X,
+; EG-NEXT: BFE_UINT * T1.W, T35.Z, literal.x, T0.W,
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT * T2.W, PV.Y, literal.x,
+; EG-NEXT: -65536(nan), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T1.W, PV.W, T1.W,
+; EG-NEXT: MOV * T25.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: LSHR * T1.W, T35.Z, literal.x,
+; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT T2.W, PV.Y, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.W, literal.y,
+; EG-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38)
+; EG-NEXT: OR_INT * T35.Y, PV.W, PS,
+; EG-NEXT: MOV T25.X, PV.Y,
+; EG-NEXT: MOV * T0.Y, T20.X,
+; EG-NEXT: AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT: AND_INT * T2.W, T35.W, literal.y,
+; EG-NEXT: -65536(nan), 255(3.573311e-43)
+; EG-NEXT: OR_INT * T1.W, PV.W, PS,
+; EG-NEXT: MOV * T20.X, PV.W,
+; EG-NEXT: ALU clause starting at 225:
+; EG-NEXT: MOV T0.Y, T20.X,
+; EG-NEXT: LSHL * T1.W, T35.W, literal.x,
+; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT T2.W, PV.Y, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.W, literal.y,
+; EG-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38)
+; EG-NEXT: OR_INT * T1.W, PV.W, PS,
+; EG-NEXT: MOV T20.X, PV.W,
+; EG-NEXT: MOV T0.Y, T21.X,
+; EG-NEXT: BFE_UINT * T0.W, T35.W, literal.x, T0.W,
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT * T1.W, PV.Y, literal.x,
+; EG-NEXT: -65536(nan), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T0.W, PV.W, T0.W,
+; EG-NEXT: MOV * T21.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x,
-; EG-NEXT: 48(6.726233e-44), 0(0.000000e+00)
-; EG-NEXT: LSHR * T18.X, PV.W, literal.x,
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: LSHR T39.X, PV.W, literal.x,
+; EG-NEXT: LSHR * T40.X, KC0[2].Y, literal.x,
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
+; EG-NEXT: LSHR T0.W, T35.W, literal.x,
+; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.y,
+; EG-NEXT: 8(1.121039e-44), 48(6.726233e-44)
+; EG-NEXT: LSHR T41.X, PS, literal.x,
+; EG-NEXT: AND_INT T0.Z, T0.Y, literal.y,
+; EG-NEXT: AND_INT T0.W, PV.W, literal.z,
+; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.w,
+; EG-NEXT: 2(2.802597e-45), 65535(9.183409e-41)
+; EG-NEXT: 16711680(2.341805e-38), 32(4.484155e-44)
+; EG-NEXT: LSHR T42.X, PS, literal.x,
+; EG-NEXT: OR_INT * T35.W, PV.Z, PV.W,
+; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
+; EG-NEXT: MOV T21.X, PV.W,
+; EG-NEXT: MOV * T36.X, T16.X,
+; EG-NEXT: MOV * T36.Z, T12.X,
+; EG-NEXT: MOV T37.X, T8.X,
+; EG-NEXT: MOV T37.Z, T4.X, BS:VEC_120/SCL_212
+; EG-NEXT: MOV * T38.X, T32.X,
+; EG-NEXT: MOV * T38.Z, T28.X,
+; EG-NEXT: MOV T35.X, T24.X,
+; EG-NEXT: MOV * T35.Z, T20.X, BS:VEC_120/SCL_212
;
; GFX12-LABEL: constant_zextload_v32i8_to_v32i16:
; GFX12: ; %bb.0:
@@ -11642,60 +12281,331 @@ define amdgpu_kernel void @constant_sextload_v32i8_to_v32i16(ptr addrspace(1) %o
;
; EG-LABEL: constant_sextload_v32i8_to_v32i16:
; EG: ; %bb.0:
-; EG-NEXT: ALU 0, @12, KC0[CB0:0-32], KC1[]
-; EG-NEXT: TEX 1 @8
-; EG-NEXT: ALU 39, @13, KC0[CB0:0-32], KC1[]
-; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T18.XYZW, T12.X, 0
-; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T17.XYZW, T11.X, 0
-; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T16.XYZW, T14.X, 0
-; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T15.XYZW, T13.X, 1
+; EG-NEXT: ALU 1, @14, KC0[CB0:0-32], KC1[]
+; EG-NEXT: TEX 1 @10
+; EG-NEXT: ALU 104, @16, KC0[], KC1[]
+; EG-NEXT: ALU 104, @121, KC0[], KC1[]
+; EG-NEXT: ALU 95, @226, KC0[CB0:0-32], KC1[]
+; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T36.XYZW, T42.X, 0
+; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T37.XYZW, T41.X, 0
+; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T38.XYZW, T40.X, 0
+; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T35.XYZW, T39.X, 1
; EG-NEXT: CF_END
-; EG-NEXT: Fetch clause starting at 8:
-; EG-NEXT: VTX_READ_128 T12.XYZW, T11.X, 16, #1
-; EG-NEXT: VTX_READ_128 T11.XYZW, T11.X, 0, #1
-; EG-NEXT: ALU clause starting at 12:
-; EG-NEXT: MOV * T11.X, KC0[2].Z,
-; EG-NEXT: ALU clause starting at 13:
-; EG-NEXT: LSHR T13.X, KC0[2].Y, literal.x,
-; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
-; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44)
-; EG-NEXT: LSHR T14.X, PV.W, literal.x,
-; EG-NEXT: BFE_INT * T15.Z, T11.Y, 0.0, literal.y,
-; EG-NEXT: 2(2.802597e-45), 8(1.121039e-44)
-; EG-NEXT: BFE_INT T15.X, T11.X, 0.0, literal.x,
-; EG-NEXT: LSHR T0.Y, T12.W, literal.x,
-; EG-NEXT: BFE_INT T16.Z, T11.W, 0.0, literal.x, BS:VEC_120/SCL_212
-; EG-NEXT: LSHR T0.W, T12.Y, literal.x,
-; EG-NEXT: LSHR * T1.W, T11.Y, literal.x,
+; EG-NEXT: Fetch clause starting at 10:
+; EG-NEXT: VTX_READ_128 T37.XYZW, T35.X, 16, #1
+; EG-NEXT: VTX_READ_128 T35.XYZW, T35.X, 0, #1
+; EG-NEXT: ALU clause starting at 14:
+; EG-NEXT: MOV * T0.Y, T16.X,
+; EG-NEXT: MOV * T35.X, KC0[2].Z,
+; EG-NEXT: ALU clause starting at 16:
+; EG-NEXT: BFE_INT * T0.W, T37.X, 0.0, literal.x,
; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
-; EG-NEXT: BFE_INT T16.X, T11.Z, 0.0, literal.x,
-; EG-NEXT: LSHR T1.Y, T11.W, literal.x,
-; EG-NEXT: BFE_INT T17.Z, T12.Y, 0.0, literal.x,
-; EG-NEXT: BFE_INT T15.W, PS, 0.0, literal.x,
-; EG-NEXT: LSHR * T1.W, T11.X, literal.x,
+; EG-NEXT: AND_INT T0.W, PV.W, literal.x,
+; EG-NEXT: AND_INT * T1.W, T0.Y, literal.y,
+; EG-NEXT: 65535(9.183409e-41), -65536(nan)
+; EG-NEXT: OR_INT * T0.W, PS, PV.W,
+; EG-NEXT: MOV * T16.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: LSHR * T0.W, T37.X, literal.x,
; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
-; EG-NEXT: BFE_INT T17.X, T12.X, 0.0, literal.x,
-; EG-NEXT: BFE_INT T15.Y, PS, 0.0, literal.x,
-; EG-NEXT: BFE_INT T18.Z, T12.W, 0.0, literal.x,
-; EG-NEXT: BFE_INT T16.W, PV.Y, 0.0, literal.x,
-; EG-NEXT: LSHR * T1.W, T11.Z, literal.x,
+; EG-NEXT: BFE_INT T0.W, PV.W, 0.0, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.Y, literal.y,
+; EG-NEXT: 8(1.121039e-44), 65535(9.183409e-41)
+; EG-NEXT: LSHL * T0.W, PV.W, literal.x,
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT: MOV T16.X, PV.W,
+; EG-NEXT: MOV T0.Y, T17.X,
+; EG-NEXT: LSHR * T0.W, T37.X, literal.x, BS:VEC_120/SCL_212
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: BFE_INT T0.W, PV.W, 0.0, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.Y, literal.y,
+; EG-NEXT: 8(1.121039e-44), -65536(nan)
+; EG-NEXT: AND_INT * T0.W, PV.W, literal.x,
+; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT: MOV * T17.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: ASHR * T0.W, T37.X, literal.x,
+; EG-NEXT: 24(3.363116e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT: LSHL * T0.W, PV.W, literal.y,
+; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
+; EG-NEXT: OR_INT * T36.Y, PV.W, PS,
+; EG-NEXT: MOV T17.X, PV.Y,
+; EG-NEXT: MOV T0.Y, T12.X,
+; EG-NEXT: BFE_INT * T0.W, T37.Y, 0.0, literal.x,
; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
-; EG-NEXT: BFE_INT T18.X, T12.Z, 0.0, literal.x,
-; EG-NEXT: BFE_INT T16.Y, PS, 0.0, literal.x,
-; EG-NEXT: LSHR T0.Z, T12.X, literal.x,
-; EG-NEXT: BFE_INT T17.W, T0.W, 0.0, literal.x,
-; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
-; EG-NEXT: 8(1.121039e-44), 32(4.484155e-44)
-; EG-NEXT: LSHR T11.X, PS, literal.x,
-; EG-NEXT: BFE_INT T17.Y, PV.Z, 0.0, literal.y,
-; EG-NEXT: LSHR T0.Z, T12.Z, literal.y,
-; EG-NEXT: BFE_INT T18.W, T0.Y, 0.0, literal.y,
-; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.z,
-; EG-NEXT: 2(2.802597e-45), 8(1.121039e-44)
-; EG-NEXT: 48(6.726233e-44), 0(0.000000e+00)
-; EG-NEXT: LSHR T12.X, PS, literal.x,
-; EG-NEXT: BFE_INT * T18.Y, PV.Z, 0.0, literal.y,
-; EG-NEXT: 2(2.802597e-45), 8(1.121039e-44)
+; EG-NEXT: AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT: AND_INT * T0.W, PV.W, literal.y,
+; EG-NEXT: -65536(nan), 65535(9.183409e-41)
+; EG-NEXT: OR_INT * T0.W, PV.W, PS,
+; EG-NEXT: MOV * T12.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: LSHR * T0.W, T37.Y, literal.x,
+; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT: BFE_INT T0.W, PV.W, 0.0, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.Y, literal.y,
+; EG-NEXT: 8(1.121039e-44), 65535(9.183409e-41)
+; EG-NEXT: LSHL * T0.W, PV.W, literal.x,
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT: MOV T12.X, PV.W,
+; EG-NEXT: MOV T0.Y, T13.X,
+; EG-NEXT: LSHR * T0.W, T37.Y, literal.x,
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: BFE_INT T0.W, PV.W, 0.0, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.Y, literal.y,
+; EG-NEXT: 8(1.121039e-44), -65536(nan)
+; EG-NEXT: AND_INT * T0.W, PV.W, literal.x,
+; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT: MOV * T13.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: ASHR * T0.W, T37.Y, literal.x,
+; EG-NEXT: 24(3.363116e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT: LSHL * T0.W, PV.W, literal.y,
+; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
+; EG-NEXT: OR_INT * T36.W, PV.W, PS,
+; EG-NEXT: MOV T13.X, PV.W,
+; EG-NEXT: MOV T0.Y, T8.X,
+; EG-NEXT: BFE_INT * T0.W, T37.Z, 0.0, literal.x,
+; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT: AND_INT * T0.W, PV.W, literal.y,
+; EG-NEXT: -65536(nan), 65535(9.183409e-41)
+; EG-NEXT: OR_INT * T0.W, PV.W, PS,
+; EG-NEXT: MOV * T8.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: LSHR * T0.W, T37.Z, literal.x,
+; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT: BFE_INT T0.W, PV.W, 0.0, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.Y, literal.y,
+; EG-NEXT: 8(1.121039e-44), 65535(9.183409e-41)
+; EG-NEXT: LSHL * T0.W, PV.W, literal.x,
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT: MOV T8.X, PV.W,
+; EG-NEXT: MOV T0.Y, T9.X,
+; EG-NEXT: LSHR * T0.W, T37.Z, literal.x,
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: BFE_INT T0.W, PV.W, 0.0, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.Y, literal.y,
+; EG-NEXT: 8(1.121039e-44), -65536(nan)
+; EG-NEXT: AND_INT * T0.W, PV.W, literal.x,
+; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT: MOV * T9.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: ASHR * T0.W, T37.Z, literal.x,
+; EG-NEXT: 24(3.363116e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT: LSHL * T0.W, PV.W, literal.y,
+; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
+; EG-NEXT: ALU clause starting at 121:
+; EG-NEXT: OR_INT * T37.Y, T1.W, T0.W,
+; EG-NEXT: MOV T9.X, PV.Y,
+; EG-NEXT: MOV T0.Y, T4.X,
+; EG-NEXT: BFE_INT * T0.W, T37.W, 0.0, literal.x,
+; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT: AND_INT * T0.W, PV.W, literal.y,
+; EG-NEXT: -65536(nan), 65535(9.183409e-41)
+; EG-NEXT: OR_INT * T0.W, PV.W, PS,
+; EG-NEXT: MOV * T4.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: LSHR * T0.W, T37.W, literal.x,
+; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT: BFE_INT T0.W, PV.W, 0.0, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.Y, literal.y,
+; EG-NEXT: 8(1.121039e-44), 65535(9.183409e-41)
+; EG-NEXT: LSHL * T0.W, PV.W, literal.x,
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT: MOV T4.X, PV.W,
+; EG-NEXT: MOV T0.Y, T5.X,
+; EG-NEXT: LSHR * T0.W, T37.W, literal.x,
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: BFE_INT T0.W, PV.W, 0.0, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.Y, literal.y,
+; EG-NEXT: 8(1.121039e-44), -65536(nan)
+; EG-NEXT: AND_INT * T0.W, PV.W, literal.x,
+; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT: MOV * T5.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: ASHR * T0.W, T37.W, literal.x,
+; EG-NEXT: 24(3.363116e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT: LSHL * T0.W, PV.W, literal.y,
+; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
+; EG-NEXT: OR_INT * T37.W, PV.W, PS,
+; EG-NEXT: MOV T5.X, PV.W,
+; EG-NEXT: MOV T0.Y, T32.X,
+; EG-NEXT: BFE_INT * T0.W, T35.X, 0.0, literal.x, BS:VEC_120/SCL_212
+; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT: AND_INT * T0.W, PV.W, literal.y,
+; EG-NEXT: -65536(nan), 65535(9.183409e-41)
+; EG-NEXT: OR_INT * T0.W, PV.W, PS,
+; EG-NEXT: MOV * T32.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: LSHR * T0.W, T35.X, literal.x,
+; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT: BFE_INT T0.W, PV.W, 0.0, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.Y, literal.y,
+; EG-NEXT: 8(1.121039e-44), 65535(9.183409e-41)
+; EG-NEXT: LSHL * T0.W, PV.W, literal.x,
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT: MOV T32.X, PV.W,
+; EG-NEXT: MOV T0.Y, T33.X,
+; EG-NEXT: LSHR * T0.W, T35.X, literal.x, BS:VEC_120/SCL_212
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: BFE_INT T0.W, PV.W, 0.0, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.Y, literal.y,
+; EG-NEXT: 8(1.121039e-44), -65536(nan)
+; EG-NEXT: AND_INT * T0.W, PV.W, literal.x,
+; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT: MOV * T33.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: ASHR * T0.W, T35.X, literal.x,
+; EG-NEXT: 24(3.363116e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT: LSHL * T0.W, PV.W, literal.y,
+; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
+; EG-NEXT: OR_INT * T38.Y, PV.W, PS,
+; EG-NEXT: MOV T33.X, PV.Y,
+; EG-NEXT: MOV T0.Y, T28.X,
+; EG-NEXT: BFE_INT * T0.W, T35.Y, 0.0, literal.x,
+; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT: AND_INT * T0.W, PV.W, literal.y,
+; EG-NEXT: -65536(nan), 65535(9.183409e-41)
+; EG-NEXT: OR_INT * T0.W, PV.W, PS,
+; EG-NEXT: MOV * T28.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: LSHR * T0.W, T35.Y, literal.x,
+; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT: BFE_INT T0.W, PV.W, 0.0, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.Y, literal.y,
+; EG-NEXT: 8(1.121039e-44), 65535(9.183409e-41)
+; EG-NEXT: LSHL * T0.W, PV.W, literal.x,
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT: MOV T28.X, PV.W,
+; EG-NEXT: MOV T0.Y, T29.X,
+; EG-NEXT: LSHR * T0.W, T35.Y, literal.x,
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: BFE_INT T0.W, PV.W, 0.0, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.Y, literal.y,
+; EG-NEXT: 8(1.121039e-44), -65536(nan)
+; EG-NEXT: AND_INT * T0.W, PV.W, literal.x,
+; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT: MOV * T29.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: ASHR * T0.W, T35.Y, literal.x,
+; EG-NEXT: 24(3.363116e-44), 0(0.000000e+00)
+; EG-NEXT: ALU clause starting at 226:
+; EG-NEXT: AND_INT T1.W, T0.Y, literal.x,
+; EG-NEXT: LSHL * T0.W, T0.W, literal.y,
+; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
+; EG-NEXT: OR_INT * T38.W, PV.W, PS,
+; EG-NEXT: MOV T29.X, PV.W,
+; EG-NEXT: MOV T0.Y, T24.X,
+; EG-NEXT: BFE_INT * T0.W, T35.Z, 0.0, literal.x,
+; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT: AND_INT * T0.W, PV.W, literal.y,
+; EG-NEXT: -65536(nan), 65535(9.183409e-41)
+; EG-NEXT: OR_INT * T0.W, PV.W, PS,
+; EG-NEXT: MOV * T24.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: LSHR * T0.W, T35.Z, literal.x,
+; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT: BFE_INT T0.W, PV.W, 0.0, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.Y, literal.y,
+; EG-NEXT: 8(1.121039e-44), 65535(9.183409e-41)
+; EG-NEXT: LSHL * T0.W, PV.W, literal.x,
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT: MOV T24.X, PV.W,
+; EG-NEXT: MOV T0.Y, T25.X,
+; EG-NEXT: LSHR * T0.W, T35.Z, literal.x,
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: BFE_INT T0.W, PV.W, 0.0, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.Y, literal.y,
+; EG-NEXT: 8(1.121039e-44), -65536(nan)
+; EG-NEXT: AND_INT * T0.W, PV.W, literal.x,
+; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT: MOV * T25.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: ASHR * T0.W, T35.Z, literal.x,
+; EG-NEXT: 24(3.363116e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT: LSHL * T0.W, PV.W, literal.y,
+; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
+; EG-NEXT: OR_INT * T35.Y, PV.W, PS,
+; EG-NEXT: MOV T25.X, PV.Y,
+; EG-NEXT: MOV T0.Y, T20.X,
+; EG-NEXT: BFE_INT * T0.W, T35.W, 0.0, literal.x,
+; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT: AND_INT * T0.W, PV.W, literal.y,
+; EG-NEXT: -65536(nan), 65535(9.183409e-41)
+; EG-NEXT: OR_INT * T0.W, PV.W, PS,
+; EG-NEXT: MOV * T20.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: LSHR * T0.W, T35.W, literal.x,
+; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT: BFE_INT T0.W, PV.W, 0.0, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.Y, literal.y,
+; EG-NEXT: 8(1.121039e-44), 65535(9.183409e-41)
+; EG-NEXT: LSHL * T0.W, PV.W, literal.x,
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT: MOV T20.X, PV.W,
+; EG-NEXT: MOV T0.Y, T21.X,
+; EG-NEXT: LSHR * T0.W, T35.W, literal.x,
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: BFE_INT T0.W, PV.W, 0.0, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.Y, literal.y,
+; EG-NEXT: 8(1.121039e-44), -65536(nan)
+; EG-NEXT: AND_INT * T0.W, PV.W, literal.x,
+; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT: MOV * T21.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x,
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: LSHR T39.X, PV.W, literal.x,
+; EG-NEXT: LSHR * T40.X, KC0[2].Y, literal.x,
+; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
+; EG-NEXT: ASHR T0.W, T35.W, literal.x,
+; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.y,
+; EG-NEXT: 24(3.363116e-44), 48(6.726233e-44)
+; EG-NEXT: LSHR T41.X, PS, literal.x,
+; EG-NEXT: AND_INT T0.Z, T0.Y, literal.y,
+; EG-NEXT: LSHL T0.W, PV.W, literal.z,
+; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.w,
+; EG-NEXT: 2(2.802597e-45), 65535(9.183409e-41)
+; EG-NEXT: 16(2.242078e-44), 32(4.484155e-44)
+; EG-NEXT: LSHR T42.X, PS, literal.x,
+; EG-NEXT: OR_INT * T35.W, PV.Z, PV.W,
+; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
+; EG-NEXT: MOV T21.X, PV.W,
+; EG-NEXT: MOV * T36.X, T16.X,
+; EG-NEXT: MOV * T36.Z, T12.X,
+; EG-NEXT: MOV T37.X, T8.X,
+; EG-NEXT: MOV T37.Z, T4.X, BS:VEC_120/SCL_212
+; EG-NEXT: MOV * T38.X, T32.X,
+; EG-NEXT: MOV * T38.Z, T28.X,
+; EG-NEXT: MOV T35.X, T24.X,
+; EG-NEXT: MOV * T35.Z, T20.X, BS:VEC_120/SCL_212
;
; GFX12-LABEL: constant_sextload_v32i8_to_v32i16:
; GFX12: ; %bb.0:
diff --git a/llvm/test/CodeGen/AMDGPU/load-global-i16.ll b/llvm/test/CodeGen/AMDGPU/load-global-i16.ll
index 3753737d251e4..ff5b9aadc87fb 100644
--- a/llvm/test/CodeGen/AMDGPU/load-global-i16.ll
+++ b/llvm/test/CodeGen/AMDGPU/load-global-i16.ll
@@ -263,63 +263,74 @@ define amdgpu_kernel void @global_load_v3i16(ptr addrspace(1) %out, ptr addrspac
;
; EG-LABEL: global_load_v3i16:
; EG: ; %bb.0: ; %entry
-; EG-NEXT: ALU 0, @10, KC0[CB0:0-32], KC1[]
-; EG-NEXT: TEX 1 @6
-; EG-NEXT: ALU 14, @11, KC0[CB0:0-32], KC1[]
-; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.X, T3.X, 0
-; EG-NEXT: MEM_RAT MSKOR T2.XW, T0.X
+; EG-NEXT: ALU 0, @12, KC0[CB0:0-32], KC1[]
+; EG-NEXT: TEX 2 @6
+; EG-NEXT: ALU 19, @13, KC0[CB0:0-32], KC1[]
+; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T6.X, T7.X, 0
+; EG-NEXT: MEM_RAT MSKOR T5.XW, T8.X
; EG-NEXT: CF_END
; EG-NEXT: Fetch clause starting at 6:
-; EG-NEXT: VTX_READ_16 T1.X, T0.X, 0, #1
-; EG-NEXT: VTX_READ_16 T0.X, T0.X, 4, #1
-; EG-NEXT: ALU clause starting at 10:
-; EG-NEXT: MOV * T0.X, KC0[2].Z,
-; EG-NEXT: ALU clause starting at 11:
+; EG-NEXT: VTX_READ_16 T6.X, T5.X, 0, #1
+; EG-NEXT: VTX_READ_16 T7.X, T5.X, 2, #1
+; EG-NEXT: VTX_READ_16 T5.X, T5.X, 4, #1
+; EG-NEXT: ALU clause starting at 12:
+; EG-NEXT: MOV * T5.X, KC0[2].Z,
+; EG-NEXT: ALU clause starting at 13:
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x,
; EG-NEXT: 4(5.605194e-45), 0(0.000000e+00)
; EG-NEXT: AND_INT T1.W, PV.W, literal.x,
-; EG-NEXT: AND_INT * T2.W, T0.X, literal.y,
+; EG-NEXT: AND_INT * T2.W, T5.X, literal.y,
; EG-NEXT: 3(4.203895e-45), 65535(9.183409e-41)
; EG-NEXT: LSHL * T1.W, PV.W, literal.x,
; EG-NEXT: 3(4.203895e-45), 0(0.000000e+00)
-; EG-NEXT: LSHL T2.X, T2.W, PV.W,
-; EG-NEXT: LSHL * T2.W, literal.x, PV.W,
+; EG-NEXT: LSHL T5.X, T2.W, PV.W,
+; EG-NEXT: LSHL * T5.W, literal.x, PV.W,
+; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT: MOV T5.Y, 0.0,
+; EG-NEXT: MOV * T5.Z, 0.0,
+; EG-NEXT: LSHR T8.X, T0.W, literal.x,
+; EG-NEXT: LSHL T0.W, T7.X, literal.y,
+; EG-NEXT: AND_INT * T1.W, T6.X, literal.z,
+; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44)
; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
-; EG-NEXT: MOV T2.Y, 0.0,
-; EG-NEXT: MOV * T2.Z, 0.0,
-; EG-NEXT: LSHR T0.X, T0.W, literal.x,
-; EG-NEXT: LSHR * T3.X, KC0[2].Y, literal.x,
+; EG-NEXT: OR_INT T6.X, PV.W, PS,
+; EG-NEXT: LSHR * T7.X, KC0[2].Y, literal.x,
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
;
; CM-LABEL: global_load_v3i16:
; CM: ; %bb.0: ; %entry
-; CM-NEXT: ALU 0, @10, KC0[CB0:0-32], KC1[]
-; CM-NEXT: TEX 1 @6
-; CM-NEXT: ALU 15, @11, KC0[CB0:0-32], KC1[]
-; CM-NEXT: MEM_RAT MSKOR T2.XW, T3.X
-; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T1.X, T0.X
+; CM-NEXT: ALU 0, @12, KC0[CB0:0-32], KC1[]
+; CM-NEXT: TEX 2 @6
+; CM-NEXT: ALU 19, @13, KC0[CB0:0-32], KC1[]
+; CM-NEXT: MEM_RAT MSKOR T5.XW, T8.X
+; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T6.X, T7.X
; CM-NEXT: CF_END
; CM-NEXT: Fetch clause starting at 6:
-; CM-NEXT: VTX_READ_16 T1.X, T0.X, 0, #1
-; CM-NEXT: VTX_READ_16 T0.X, T0.X, 4, #1
-; CM-NEXT: ALU clause starting at 10:
-; CM-NEXT: MOV * T0.X, KC0[2].Z,
-; CM-NEXT: ALU clause starting at 11:
+; CM-NEXT: VTX_READ_16 T6.X, T5.X, 0, #1
+; CM-NEXT: VTX_READ_16 T7.X, T5.X, 2, #1
+; CM-NEXT: VTX_READ_16 T5.X, T5.X, 4, #1
+; CM-NEXT: ALU clause starting at 12:
+; CM-NEXT: MOV * T5.X, KC0[2].Z,
+; CM-NEXT: ALU clause starting at 13:
; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x,
; CM-NEXT: 4(5.605194e-45), 0(0.000000e+00)
; CM-NEXT: AND_INT * T1.W, PV.W, literal.x,
; CM-NEXT: 3(4.203895e-45), 0(0.000000e+00)
-; CM-NEXT: AND_INT T0.Z, T0.X, literal.x,
+; CM-NEXT: AND_INT T0.Z, T5.X, literal.x,
; CM-NEXT: LSHL * T1.W, PV.W, literal.y,
; CM-NEXT: 65535(9.183409e-41), 3(4.203895e-45)
-; CM-NEXT: LSHL T2.X, PV.Z, PV.W,
-; CM-NEXT: LSHL * T2.W, literal.x, PV.W,
+; CM-NEXT: LSHL T5.X, PV.Z, PV.W,
+; CM-NEXT: LSHL * T5.W, literal.x, PV.W,
; CM-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
-; CM-NEXT: MOV T2.Y, 0.0,
-; CM-NEXT: MOV * T2.Z, 0.0,
-; CM-NEXT: LSHR * T0.X, KC0[2].Y, literal.x,
+; CM-NEXT: MOV T5.Y, 0.0,
+; CM-NEXT: MOV * T5.Z, 0.0,
+; CM-NEXT: LSHL T0.Z, T7.X, literal.x,
+; CM-NEXT: AND_INT * T1.W, T6.X, literal.y, BS:VEC_120/SCL_212
+; CM-NEXT: 16(2.242078e-44), 65535(9.183409e-41)
+; CM-NEXT: OR_INT * T6.X, PV.Z, PV.W,
+; CM-NEXT: LSHR * T7.X, KC0[2].Y, literal.x,
; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
-; CM-NEXT: LSHR * T3.X, T0.W, literal.x,
+; CM-NEXT: LSHR * T8.X, T0.W, literal.x,
; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
entry:
%ld = load <3 x i16>, ptr addrspace(1) %in
diff --git a/llvm/test/CodeGen/AMDGPU/load-global-i8.ll b/llvm/test/CodeGen/AMDGPU/load-global-i8.ll
index 5bc02c4d63181..6a39df95f6aba 100644
--- a/llvm/test/CodeGen/AMDGPU/load-global-i8.ll
+++ b/llvm/test/CodeGen/AMDGPU/load-global-i8.ll
@@ -9887,46 +9887,97 @@ define amdgpu_kernel void @global_zextload_v4i8_to_v4i16(ptr addrspace(1) %out,
;
; EG-LABEL: global_zextload_v4i8_to_v4i16:
; EG: ; %bb.0:
-; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[]
+; EG-NEXT: ALU 1, @8, KC0[CB0:0-32], KC1[]
; EG-NEXT: TEX 0 @6
-; EG-NEXT: ALU 6, @9, KC0[CB0:0-32], KC1[]
-; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T4.XY, T5.X, 1
+; EG-NEXT: ALU 31, @10, KC0[CB0:0-32], KC1[]
+; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T8.XY, T7.X, 1
; EG-NEXT: CF_END
; EG-NEXT: PAD
; EG-NEXT: Fetch clause starting at 6:
-; EG-NEXT: VTX_READ_32 T4.X, T4.X, 0, #1
+; EG-NEXT: VTX_READ_32 T7.X, T7.X, 0, #1
; EG-NEXT: ALU clause starting at 8:
-; EG-NEXT: MOV * T4.X, KC0[2].Z,
-; EG-NEXT: ALU clause starting at 9:
+; EG-NEXT: MOV * T0.Y, T4.X,
+; EG-NEXT: MOV * T7.X, KC0[2].Z,
+; EG-NEXT: ALU clause starting at 10:
+; EG-NEXT: AND_INT T0.W, T7.X, literal.x,
+; EG-NEXT: AND_INT * T1.W, T0.Y, literal.y,
+; EG-NEXT: 255(3.573311e-43), -65536(nan)
+; EG-NEXT: OR_INT * T0.W, PS, PV.W,
+; EG-NEXT: MOV * T4.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: LSHL * T0.W, T7.X, literal.x,
+; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT: AND_INT * T0.W, PV.W, literal.y,
+; EG-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38)
+; EG-NEXT: OR_INT * T0.W, PV.W, PS,
+; EG-NEXT: MOV T4.X, PV.W,
+; EG-NEXT: MOV T0.Y, T5.X,
; EG-NEXT: MOV * T0.W, literal.x,
; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
-; EG-NEXT: BFE_UINT * T4.Y, T4.X, literal.x, PV.W,
+; EG-NEXT: BFE_UINT T0.W, T7.X, literal.x, PV.W,
+; EG-NEXT: AND_INT * T1.W, PV.Y, literal.y,
+; EG-NEXT: 16(2.242078e-44), -65536(nan)
+; EG-NEXT: OR_INT * T0.W, PS, PV.W,
+; EG-NEXT: MOV * T5.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: LSHR * T0.W, T7.X, literal.x,
; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
-; EG-NEXT: AND_INT T4.X, T4.X, literal.x,
-; EG-NEXT: LSHR * T5.X, KC0[2].Y, literal.y,
-; EG-NEXT: 255(3.573311e-43), 2(2.802597e-45)
+; EG-NEXT: AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT: AND_INT * T0.W, PV.W, literal.y,
+; EG-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38)
+; EG-NEXT: LSHR T7.X, KC0[2].Y, literal.x,
+; EG-NEXT: OR_INT * T8.Y, PV.W, PS,
+; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
+; EG-NEXT: MOV T5.X, PV.Y,
+; EG-NEXT: MOV * T8.X, T4.X,
;
; CM-LABEL: global_zextload_v4i8_to_v4i16:
; CM: ; %bb.0:
-; CM-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[]
+; CM-NEXT: ALU 1, @8, KC0[CB0:0-32], KC1[]
; CM-NEXT: TEX 0 @6
-; CM-NEXT: ALU 7, @9, KC0[CB0:0-32], KC1[]
-; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T4, T5.X
+; CM-NEXT: ALU 31, @10, KC0[CB0:0-32], KC1[]
+; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T8, T7.X
; CM-NEXT: CF_END
; CM-NEXT: PAD
; CM-NEXT: Fetch clause starting at 6:
-; CM-NEXT: VTX_READ_32 T4.X, T4.X, 0, #1
+; CM-NEXT: VTX_READ_32 T7.X, T7.X, 0, #1
; CM-NEXT: ALU clause starting at 8:
-; CM-NEXT: MOV * T4.X, KC0[2].Z,
-; CM-NEXT: ALU clause starting at 9:
+; CM-NEXT: MOV * T0.Y, T4.X,
+; CM-NEXT: MOV * T7.X, KC0[2].Z,
+; CM-NEXT: ALU clause starting at 10:
+; CM-NEXT: AND_INT T0.Z, T7.X, literal.x,
+; CM-NEXT: AND_INT * T0.W, T0.Y, literal.y,
+; CM-NEXT: 255(3.573311e-43), -65536(nan)
+; CM-NEXT: OR_INT * T0.W, PV.W, PV.Z,
+; CM-NEXT: MOV * T4.X, PV.W,
+; CM-NEXT: MOV T0.Y, PV.X,
+; CM-NEXT: LSHL * T0.W, T7.X, literal.x,
+; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT: AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT: AND_INT * T0.W, PV.W, literal.y,
+; CM-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38)
+; CM-NEXT: OR_INT * T0.W, PV.Z, PV.W,
+; CM-NEXT: MOV T4.X, PV.W,
+; CM-NEXT: MOV T0.Y, T5.X,
; CM-NEXT: MOV * T0.W, literal.x,
; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
-; CM-NEXT: BFE_UINT * T4.Y, T4.X, literal.x, PV.W,
+; CM-NEXT: AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT: BFE_UINT * T0.W, T7.X, literal.y, PV.W,
+; CM-NEXT: -65536(nan), 16(2.242078e-44)
+; CM-NEXT: OR_INT * T0.W, PV.Z, PV.W,
+; CM-NEXT: MOV * T5.X, PV.W,
+; CM-NEXT: MOV T0.Y, PV.X,
+; CM-NEXT: LSHR * T0.W, T7.X, literal.x,
; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
-; CM-NEXT: AND_INT * T4.X, T4.X, literal.x,
-; CM-NEXT: 255(3.573311e-43), 0(0.000000e+00)
-; CM-NEXT: LSHR * T5.X, KC0[2].Y, literal.x,
+; CM-NEXT: AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT: AND_INT * T0.W, PV.W, literal.y,
+; CM-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38)
+; CM-NEXT: LSHR T7.X, KC0[2].Y, literal.x,
+; CM-NEXT: OR_INT * T8.Y, PV.Z, PV.W,
; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
+; CM-NEXT: MOV * T5.X, PV.Y,
+; CM-NEXT: MOV * T8.X, T4.X,
%load = load <4 x i8>, ptr addrspace(1) %in
%ext = zext <4 x i8> %load to <4 x i16>
store <4 x i16> %ext, ptr addrspace(1) %out
@@ -10017,43 +10068,109 @@ define amdgpu_kernel void @global_sextload_v4i8_to_v4i16(ptr addrspace(1) %out,
;
; EG-LABEL: global_sextload_v4i8_to_v4i16:
; EG: ; %bb.0:
-; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[]
+; EG-NEXT: ALU 1, @8, KC0[CB0:0-32], KC1[]
; EG-NEXT: TEX 0 @6
-; EG-NEXT: ALU 5, @9, KC0[CB0:0-32], KC1[]
-; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T5.XY, T4.X, 1
+; EG-NEXT: ALU 37, @10, KC0[CB0:0-32], KC1[]
+; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T8.XY, T7.X, 1
; EG-NEXT: CF_END
; EG-NEXT: PAD
; EG-NEXT: Fetch clause starting at 6:
-; EG-NEXT: VTX_READ_32 T4.X, T4.X, 0, #1
+; EG-NEXT: VTX_READ_32 T7.X, T7.X, 0, #1
; EG-NEXT: ALU clause starting at 8:
-; EG-NEXT: MOV * T4.X, KC0[2].Z,
-; EG-NEXT: ALU clause starting at 9:
-; EG-NEXT: BFE_INT T5.X, T4.X, 0.0, literal.x,
-; EG-NEXT: LSHR T0.W, T4.X, literal.x,
-; EG-NEXT: LSHR * T4.X, KC0[2].Y, literal.y,
-; EG-NEXT: 8(1.121039e-44), 2(2.802597e-45)
-; EG-NEXT: BFE_INT * T5.Y, PV.W, 0.0, literal.x,
+; EG-NEXT: MOV * T0.Y, T4.X,
+; EG-NEXT: MOV * T7.X, KC0[2].Z,
+; EG-NEXT: ALU clause starting at 10:
+; EG-NEXT: BFE_INT * T0.W, T7.X, 0.0, literal.x,
+; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT T0.W, PV.W, literal.x,
+; EG-NEXT: AND_INT * T1.W, T0.Y, literal.y,
+; EG-NEXT: 65535(9.183409e-41), -65536(nan)
+; EG-NEXT: OR_INT * T0.W, PS, PV.W,
+; EG-NEXT: MOV * T4.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: LSHR * T0.W, T7.X, literal.x,
; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT: BFE_INT T0.W, PV.W, 0.0, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.Y, literal.y,
+; EG-NEXT: 8(1.121039e-44), 65535(9.183409e-41)
+; EG-NEXT: LSHL * T0.W, PV.W, literal.x,
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT: MOV T4.X, PV.W,
+; EG-NEXT: MOV T0.Y, T5.X,
+; EG-NEXT: LSHR * T0.W, T7.X, literal.x, BS:VEC_120/SCL_212
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: BFE_INT T0.W, PV.W, 0.0, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.Y, literal.y,
+; EG-NEXT: 8(1.121039e-44), -65536(nan)
+; EG-NEXT: AND_INT * T0.W, PV.W, literal.x,
+; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT: MOV * T5.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: ASHR * T0.W, T7.X, literal.x,
+; EG-NEXT: 24(3.363116e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT: LSHL * T0.W, PV.W, literal.y,
+; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
+; EG-NEXT: LSHR T7.X, KC0[2].Y, literal.x,
+; EG-NEXT: OR_INT * T8.Y, PV.W, PS,
+; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
+; EG-NEXT: MOV T5.X, PV.Y,
+; EG-NEXT: MOV * T8.X, T4.X,
;
; CM-LABEL: global_sextload_v4i8_to_v4i16:
; CM: ; %bb.0:
-; CM-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[]
+; CM-NEXT: ALU 1, @8, KC0[CB0:0-32], KC1[]
; CM-NEXT: TEX 0 @6
-; CM-NEXT: ALU 5, @9, KC0[CB0:0-32], KC1[]
-; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T5, T4.X
+; CM-NEXT: ALU 37, @10, KC0[CB0:0-32], KC1[]
+; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T8, T7.X
; CM-NEXT: CF_END
; CM-NEXT: PAD
; CM-NEXT: Fetch clause starting at 6:
-; CM-NEXT: VTX_READ_32 T4.X, T4.X, 0, #1
+; CM-NEXT: VTX_READ_32 T7.X, T7.X, 0, #1
; CM-NEXT: ALU clause starting at 8:
-; CM-NEXT: MOV * T4.X, KC0[2].Z,
-; CM-NEXT: ALU clause starting at 9:
-; CM-NEXT: BFE_INT T5.X, T4.X, 0.0, literal.x,
-; CM-NEXT: LSHR * T0.W, T4.X, literal.x,
+; CM-NEXT: MOV * T0.Y, T4.X,
+; CM-NEXT: MOV * T7.X, KC0[2].Z,
+; CM-NEXT: ALU clause starting at 10:
+; CM-NEXT: BFE_INT * T0.W, T7.X, 0.0, literal.x,
; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
-; CM-NEXT: LSHR T4.X, KC0[2].Y, literal.x,
-; CM-NEXT: BFE_INT * T5.Y, PV.W, 0.0, literal.y,
-; CM-NEXT: 2(2.802597e-45), 8(1.121039e-44)
+; CM-NEXT: AND_INT T0.Z, PV.W, literal.x,
+; CM-NEXT: AND_INT * T0.W, T0.Y, literal.y,
+; CM-NEXT: 65535(9.183409e-41), -65536(nan)
+; CM-NEXT: OR_INT * T0.W, PV.W, PV.Z,
+; CM-NEXT: MOV * T4.X, PV.W,
+; CM-NEXT: MOV T0.Y, PV.X,
+; CM-NEXT: LSHR * T0.W, T7.X, literal.x,
+; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT: BFE_INT * T0.W, PV.W, 0.0, literal.x,
+; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT: AND_INT T0.Z, T0.Y, literal.x,
+; CM-NEXT: LSHL * T0.W, PV.W, literal.y,
+; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
+; CM-NEXT: OR_INT * T0.W, PV.Z, PV.W,
+; CM-NEXT: MOV T4.X, PV.W,
+; CM-NEXT: MOV T0.Y, T5.X,
+; CM-NEXT: LSHR * T0.W, T7.X, literal.x, BS:VEC_120/SCL_212
+; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; CM-NEXT: BFE_INT * T0.W, PV.W, 0.0, literal.x,
+; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT: AND_INT T0.Z, T0.Y, literal.x,
+; CM-NEXT: AND_INT * T0.W, PV.W, literal.y,
+; CM-NEXT: -65536(nan), 65535(9.183409e-41)
+; CM-NEXT: OR_INT * T0.W, PV.Z, PV.W,
+; CM-NEXT: MOV * T5.X, PV.W,
+; CM-NEXT: MOV T0.Y, PV.X,
+; CM-NEXT: ASHR * T0.W, T7.X, literal.x,
+; CM-NEXT: 24(3.363116e-44), 0(0.000000e+00)
+; CM-NEXT: AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT: LSHL * T0.W, PV.W, literal.y,
+; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
+; CM-NEXT: LSHR T7.X, KC0[2].Y, literal.x,
+; CM-NEXT: OR_INT * T8.Y, PV.Z, PV.W,
+; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
+; CM-NEXT: MOV * T5.X, PV.Y,
+; CM-NEXT: MOV * T8.X, T4.X,
%load = load <4 x i8>, ptr addrspace(1) %in
%ext = sext <4 x i8> %load to <4 x i16>
store <4 x i16> %ext, ptr addrspace(1) %out
@@ -10158,52 +10275,156 @@ define amdgpu_kernel void @global_zextload_v8i8_to_v8i16(ptr addrspace(1) %out,
;
; EG-LABEL: global_zextload_v8i8_to_v8i16:
; EG: ; %bb.0:
-; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[]
+; EG-NEXT: ALU 1, @8, KC0[CB0:0-32], KC1[]
; EG-NEXT: TEX 0 @6
-; EG-NEXT: ALU 9, @9, KC0[CB0:0-32], KC1[]
-; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T6.XYZW, T5.X, 1
+; EG-NEXT: ALU 61, @10, KC0[CB0:0-32], KC1[]
+; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T12.XYZW, T11.X, 1
; EG-NEXT: CF_END
; EG-NEXT: PAD
; EG-NEXT: Fetch clause starting at 6:
-; EG-NEXT: VTX_READ_64 T5.XY, T5.X, 0, #1
+; EG-NEXT: VTX_READ_64 T11.XY, T11.X, 0, #1
; EG-NEXT: ALU clause starting at 8:
-; EG-NEXT: MOV * T5.X, KC0[2].Z,
-; EG-NEXT: ALU clause starting at 9:
+; EG-NEXT: MOV * T0.Y, T8.X,
+; EG-NEXT: MOV * T11.X, KC0[2].Z,
+; EG-NEXT: ALU clause starting at 10:
+; EG-NEXT: AND_INT T0.W, T11.X, literal.x,
+; EG-NEXT: AND_INT * T1.W, T0.Y, literal.y,
+; EG-NEXT: 255(3.573311e-43), -65536(nan)
+; EG-NEXT: OR_INT * T0.W, PS, PV.W,
+; EG-NEXT: MOV * T8.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: LSHL * T0.W, T11.X, literal.x,
+; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT: AND_INT * T0.W, PV.W, literal.y,
+; EG-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38)
+; EG-NEXT: OR_INT * T0.W, PV.W, PS,
+; EG-NEXT: MOV T8.X, PV.W,
+; EG-NEXT: MOV T0.Y, T9.X,
; EG-NEXT: MOV * T0.W, literal.x,
; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
-; EG-NEXT: BFE_UINT * T6.W, T5.Y, literal.x, PV.W,
+; EG-NEXT: BFE_UINT T1.W, T11.X, literal.x, PV.W,
+; EG-NEXT: AND_INT * T2.W, PV.Y, literal.y,
+; EG-NEXT: 16(2.242078e-44), -65536(nan)
+; EG-NEXT: OR_INT * T1.W, PS, PV.W,
+; EG-NEXT: MOV * T9.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: LSHR * T1.W, T11.X, literal.x,
; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
-; EG-NEXT: BFE_UINT T6.Y, T5.X, literal.x, T0.W,
-; EG-NEXT: AND_INT * T6.Z, T5.Y, literal.y,
-; EG-NEXT: 8(1.121039e-44), 255(3.573311e-43)
-; EG-NEXT: AND_INT T6.X, T5.X, literal.x,
-; EG-NEXT: LSHR * T5.X, KC0[2].Y, literal.y,
-; EG-NEXT: 255(3.573311e-43), 2(2.802597e-45)
+; EG-NEXT: AND_INT T2.W, PV.Y, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.W, literal.y,
+; EG-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38)
+; EG-NEXT: OR_INT * T12.Y, PV.W, PS,
+; EG-NEXT: MOV T9.X, PV.Y,
+; EG-NEXT: MOV * T0.Y, T4.X,
+; EG-NEXT: AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT: AND_INT * T2.W, T11.Y, literal.y,
+; EG-NEXT: -65536(nan), 255(3.573311e-43)
+; EG-NEXT: OR_INT * T1.W, PV.W, PS,
+; EG-NEXT: MOV * T4.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: LSHL * T1.W, T11.Y, literal.x,
+; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT T2.W, PV.Y, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.W, literal.y,
+; EG-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38)
+; EG-NEXT: OR_INT * T1.W, PV.W, PS,
+; EG-NEXT: MOV T4.X, PV.W,
+; EG-NEXT: MOV T0.Y, T5.X,
+; EG-NEXT: BFE_UINT * T0.W, T11.Y, literal.x, T0.W,
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT * T1.W, PV.Y, literal.x,
+; EG-NEXT: -65536(nan), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T0.W, PV.W, T0.W,
+; EG-NEXT: MOV * T5.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: LSHR * T0.W, T11.Y, literal.x,
+; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT: AND_INT * T0.W, PV.W, literal.y,
+; EG-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38)
+; EG-NEXT: LSHR T11.X, KC0[2].Y, literal.x,
+; EG-NEXT: OR_INT * T12.W, PV.W, PS,
+; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
+; EG-NEXT: MOV T5.X, PV.W,
+; EG-NEXT: MOV * T12.X, T8.X,
+; EG-NEXT: MOV * T12.Z, T4.X,
;
; CM-LABEL: global_zextload_v8i8_to_v8i16:
; CM: ; %bb.0:
-; CM-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[]
+; CM-NEXT: ALU 1, @8, KC0[CB0:0-32], KC1[]
; CM-NEXT: TEX 0 @6
-; CM-NEXT: ALU 10, @9, KC0[CB0:0-32], KC1[]
-; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T6, T5.X
+; CM-NEXT: ALU 60, @10, KC0[CB0:0-32], KC1[]
+; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T12, T11.X
; CM-NEXT: CF_END
; CM-NEXT: PAD
; CM-NEXT: Fetch clause starting at 6:
-; CM-NEXT: VTX_READ_64 T5.XY, T5.X, 0, #1
+; CM-NEXT: VTX_READ_64 T11.XY, T11.X, 0, #1
; CM-NEXT: ALU clause starting at 8:
-; CM-NEXT: MOV * T5.X, KC0[2].Z,
-; CM-NEXT: ALU clause starting at 9:
+; CM-NEXT: MOV * T0.Y, T8.X,
+; CM-NEXT: MOV * T11.X, KC0[2].Z,
+; CM-NEXT: ALU clause starting at 10:
+; CM-NEXT: AND_INT T0.Z, T11.X, literal.x,
+; CM-NEXT: AND_INT * T0.W, T0.Y, literal.y,
+; CM-NEXT: 255(3.573311e-43), -65536(nan)
+; CM-NEXT: OR_INT * T0.W, PV.W, PV.Z,
+; CM-NEXT: MOV * T8.X, PV.W,
+; CM-NEXT: MOV T0.Y, PV.X,
+; CM-NEXT: LSHL * T0.W, T11.X, literal.x,
+; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT: AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT: AND_INT * T0.W, PV.W, literal.y,
+; CM-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38)
+; CM-NEXT: OR_INT * T0.W, PV.Z, PV.W,
+; CM-NEXT: MOV T8.X, PV.W,
+; CM-NEXT: MOV T0.Y, T9.X,
; CM-NEXT: MOV * T0.W, literal.x,
; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
-; CM-NEXT: BFE_UINT * T6.W, T5.Y, literal.x, PV.W,
+; CM-NEXT: AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT: BFE_UINT * T1.W, T11.X, literal.y, PV.W,
+; CM-NEXT: -65536(nan), 16(2.242078e-44)
+; CM-NEXT: OR_INT * T1.W, PV.Z, PV.W,
+; CM-NEXT: MOV * T9.X, PV.W,
+; CM-NEXT: MOV T0.Y, PV.X,
+; CM-NEXT: LSHR * T1.W, T11.X, literal.x,
; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
-; CM-NEXT: BFE_UINT T6.Y, T5.X, literal.x, T0.W,
-; CM-NEXT: AND_INT * T6.Z, T5.Y, literal.y,
-; CM-NEXT: 8(1.121039e-44), 255(3.573311e-43)
-; CM-NEXT: AND_INT * T6.X, T5.X, literal.x,
-; CM-NEXT: 255(3.573311e-43), 0(0.000000e+00)
-; CM-NEXT: LSHR * T5.X, KC0[2].Y, literal.x,
+; CM-NEXT: AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT: AND_INT * T1.W, PV.W, literal.y,
+; CM-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38)
+; CM-NEXT: OR_INT * T12.Y, PV.Z, PV.W,
+; CM-NEXT: MOV T9.X, PV.Y,
+; CM-NEXT: MOV * T0.Y, T4.X,
+; CM-NEXT: AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT: AND_INT * T1.W, T11.Y, literal.y,
+; CM-NEXT: -65536(nan), 255(3.573311e-43)
+; CM-NEXT: OR_INT * T1.W, PV.Z, PV.W,
+; CM-NEXT: MOV * T4.X, PV.W,
+; CM-NEXT: MOV T0.Y, PV.X,
+; CM-NEXT: LSHL * T1.W, T11.Y, literal.x,
+; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT: AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT: AND_INT * T1.W, PV.W, literal.y,
+; CM-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38)
+; CM-NEXT: OR_INT * T1.W, PV.Z, PV.W,
+; CM-NEXT: MOV T4.X, PV.W,
+; CM-NEXT: MOV * T0.Y, T5.X,
+; CM-NEXT: AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT: BFE_UINT * T0.W, T11.Y, literal.y, T0.W,
+; CM-NEXT: -65536(nan), 16(2.242078e-44)
+; CM-NEXT: OR_INT * T0.W, PV.Z, PV.W,
+; CM-NEXT: MOV * T5.X, PV.W,
+; CM-NEXT: MOV T0.Y, PV.X,
+; CM-NEXT: LSHR * T0.W, T11.Y, literal.x,
+; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT: AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT: AND_INT * T0.W, PV.W, literal.y,
+; CM-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38)
+; CM-NEXT: LSHR T11.X, KC0[2].Y, literal.x,
+; CM-NEXT: OR_INT * T12.W, PV.Z, PV.W,
; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
+; CM-NEXT: MOV * T5.X, PV.W,
+; CM-NEXT: MOV T12.X, T8.X,
+; CM-NEXT: MOV * T12.Z, T4.X, BS:VEC_120/SCL_212
%load = load <8 x i8>, ptr addrspace(1) %in
%ext = zext <8 x i8> %load to <8 x i16>
store <8 x i16> %ext, ptr addrspace(1) %out
@@ -10344,53 +10565,183 @@ define amdgpu_kernel void @global_sextload_v8i8_to_v8i16(ptr addrspace(1) %out,
;
; EG-LABEL: global_sextload_v8i8_to_v8i16:
; EG: ; %bb.0:
-; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[]
+; EG-NEXT: ALU 1, @8, KC0[CB0:0-32], KC1[]
; EG-NEXT: TEX 0 @6
-; EG-NEXT: ALU 10, @9, KC0[CB0:0-32], KC1[]
-; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T6.XYZW, T5.X, 1
+; EG-NEXT: ALU 74, @10, KC0[CB0:0-32], KC1[]
+; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T12.XYZW, T11.X, 1
; EG-NEXT: CF_END
; EG-NEXT: PAD
; EG-NEXT: Fetch clause starting at 6:
-; EG-NEXT: VTX_READ_64 T5.XY, T5.X, 0, #1
+; EG-NEXT: VTX_READ_64 T11.XY, T11.X, 0, #1
; EG-NEXT: ALU clause starting at 8:
-; EG-NEXT: MOV * T5.X, KC0[2].Z,
-; EG-NEXT: ALU clause starting at 9:
-; EG-NEXT: BFE_INT * T6.Z, T5.Y, 0.0, literal.x,
+; EG-NEXT: MOV * T0.Y, T8.X,
+; EG-NEXT: MOV * T11.X, KC0[2].Z,
+; EG-NEXT: ALU clause starting at 10:
+; EG-NEXT: BFE_INT * T0.W, T11.X, 0.0, literal.x,
; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
-; EG-NEXT: BFE_INT T6.X, T5.X, 0.0, literal.x,
-; EG-NEXT: LSHR * T0.W, T5.Y, literal.x,
+; EG-NEXT: AND_INT T0.W, PV.W, literal.x,
+; EG-NEXT: AND_INT * T1.W, T0.Y, literal.y,
+; EG-NEXT: 65535(9.183409e-41), -65536(nan)
+; EG-NEXT: OR_INT * T0.W, PS, PV.W,
+; EG-NEXT: MOV * T8.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: LSHR * T0.W, T11.X, literal.x,
; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
-; EG-NEXT: BFE_INT T6.W, PV.W, 0.0, literal.x,
-; EG-NEXT: LSHR * T0.W, T5.X, literal.x,
+; EG-NEXT: BFE_INT T0.W, PV.W, 0.0, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.Y, literal.y,
+; EG-NEXT: 8(1.121039e-44), 65535(9.183409e-41)
+; EG-NEXT: LSHL * T0.W, PV.W, literal.x,
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT: MOV T8.X, PV.W,
+; EG-NEXT: MOV T0.Y, T9.X,
+; EG-NEXT: LSHR * T0.W, T11.X, literal.x, BS:VEC_120/SCL_212
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: BFE_INT T0.W, PV.W, 0.0, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.Y, literal.y,
+; EG-NEXT: 8(1.121039e-44), -65536(nan)
+; EG-NEXT: AND_INT * T0.W, PV.W, literal.x,
+; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT: MOV * T9.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: ASHR * T0.W, T11.X, literal.x,
+; EG-NEXT: 24(3.363116e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT: LSHL * T0.W, PV.W, literal.y,
+; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
+; EG-NEXT: OR_INT * T12.Y, PV.W, PS,
+; EG-NEXT: MOV T9.X, PV.Y,
+; EG-NEXT: MOV T0.Y, T4.X,
+; EG-NEXT: BFE_INT * T0.W, T11.Y, 0.0, literal.x,
; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
-; EG-NEXT: LSHR T5.X, KC0[2].Y, literal.x,
-; EG-NEXT: BFE_INT * T6.Y, PS, 0.0, literal.y,
-; EG-NEXT: 2(2.802597e-45), 8(1.121039e-44)
+; EG-NEXT: AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT: AND_INT * T0.W, PV.W, literal.y,
+; EG-NEXT: -65536(nan), 65535(9.183409e-41)
+; EG-NEXT: OR_INT * T0.W, PV.W, PS,
+; EG-NEXT: MOV * T4.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: LSHR * T0.W, T11.Y, literal.x,
+; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT: BFE_INT T0.W, PV.W, 0.0, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.Y, literal.y,
+; EG-NEXT: 8(1.121039e-44), 65535(9.183409e-41)
+; EG-NEXT: LSHL * T0.W, PV.W, literal.x,
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT: MOV T4.X, PV.W,
+; EG-NEXT: MOV T0.Y, T5.X,
+; EG-NEXT: LSHR * T0.W, T11.Y, literal.x,
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: BFE_INT T0.W, PV.W, 0.0, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.Y, literal.y,
+; EG-NEXT: 8(1.121039e-44), -65536(nan)
+; EG-NEXT: AND_INT * T0.W, PV.W, literal.x,
+; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT: MOV * T5.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: ASHR * T0.W, T11.Y, literal.x,
+; EG-NEXT: 24(3.363116e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT: LSHL * T0.W, PV.W, literal.y,
+; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
+; EG-NEXT: LSHR T11.X, KC0[2].Y, literal.x,
+; EG-NEXT: OR_INT * T12.W, PV.W, PS,
+; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
+; EG-NEXT: MOV T5.X, PV.W,
+; EG-NEXT: MOV * T12.X, T8.X,
+; EG-NEXT: MOV * T12.Z, T4.X,
;
; CM-LABEL: global_sextload_v8i8_to_v8i16:
; CM: ; %bb.0:
-; CM-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[]
+; CM-NEXT: ALU 1, @8, KC0[CB0:0-32], KC1[]
; CM-NEXT: TEX 0 @6
-; CM-NEXT: ALU 10, @9, KC0[CB0:0-32], KC1[]
-; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T6, T5.X
+; CM-NEXT: ALU 74, @10, KC0[CB0:0-32], KC1[]
+; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T12, T11.X
; CM-NEXT: CF_END
; CM-NEXT: PAD
; CM-NEXT: Fetch clause starting at 6:
-; CM-NEXT: VTX_READ_64 T5.XY, T5.X, 0, #1
+; CM-NEXT: VTX_READ_64 T11.XY, T11.X, 0, #1
; CM-NEXT: ALU clause starting at 8:
-; CM-NEXT: MOV * T5.X, KC0[2].Z,
-; CM-NEXT: ALU clause starting at 9:
-; CM-NEXT: BFE_INT * T6.Z, T5.Y, 0.0, literal.x,
+; CM-NEXT: MOV * T0.Y, T8.X,
+; CM-NEXT: MOV * T11.X, KC0[2].Z,
+; CM-NEXT: ALU clause starting at 10:
+; CM-NEXT: BFE_INT * T0.W, T11.X, 0.0, literal.x,
; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
-; CM-NEXT: BFE_INT T6.X, T5.X, 0.0, literal.x,
-; CM-NEXT: LSHR * T0.W, T5.Y, literal.x,
+; CM-NEXT: AND_INT T0.Z, PV.W, literal.x,
+; CM-NEXT: AND_INT * T0.W, T0.Y, literal.y,
+; CM-NEXT: 65535(9.183409e-41), -65536(nan)
+; CM-NEXT: OR_INT * T0.W, PV.W, PV.Z,
+; CM-NEXT: MOV * T8.X, PV.W,
+; CM-NEXT: MOV T0.Y, PV.X,
+; CM-NEXT: LSHR * T0.W, T11.X, literal.x,
; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
-; CM-NEXT: LSHR T0.Z, T5.X, literal.x,
-; CM-NEXT: BFE_INT * T6.W, PV.W, 0.0, literal.x,
+; CM-NEXT: BFE_INT * T0.W, PV.W, 0.0, literal.x,
; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
-; CM-NEXT: LSHR T5.X, KC0[2].Y, literal.x,
-; CM-NEXT: BFE_INT * T6.Y, PV.Z, 0.0, literal.y,
-; CM-NEXT: 2(2.802597e-45), 8(1.121039e-44)
+; CM-NEXT: AND_INT T0.Z, T0.Y, literal.x,
+; CM-NEXT: LSHL * T0.W, PV.W, literal.y,
+; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
+; CM-NEXT: OR_INT * T0.W, PV.Z, PV.W,
+; CM-NEXT: MOV T8.X, PV.W,
+; CM-NEXT: MOV T0.Y, T9.X,
+; CM-NEXT: LSHR * T0.W, T11.X, literal.x, BS:VEC_120/SCL_212
+; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; CM-NEXT: BFE_INT * T0.W, PV.W, 0.0, literal.x,
+; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT: AND_INT T0.Z, T0.Y, literal.x,
+; CM-NEXT: AND_INT * T0.W, PV.W, literal.y,
+; CM-NEXT: -65536(nan), 65535(9.183409e-41)
+; CM-NEXT: OR_INT * T0.W, PV.Z, PV.W,
+; CM-NEXT: MOV * T9.X, PV.W,
+; CM-NEXT: MOV T0.Y, PV.X,
+; CM-NEXT: ASHR * T0.W, T11.X, literal.x,
+; CM-NEXT: 24(3.363116e-44), 0(0.000000e+00)
+; CM-NEXT: AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT: LSHL * T0.W, PV.W, literal.y,
+; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
+; CM-NEXT: OR_INT * T12.Y, PV.Z, PV.W,
+; CM-NEXT: MOV T9.X, PV.Y,
+; CM-NEXT: MOV T0.Y, T4.X,
+; CM-NEXT: BFE_INT * T0.W, T11.Y, 0.0, literal.x,
+; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT: AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT: AND_INT * T0.W, PV.W, literal.y,
+; CM-NEXT: -65536(nan), 65535(9.183409e-41)
+; CM-NEXT: OR_INT * T0.W, PV.Z, PV.W,
+; CM-NEXT: MOV * T4.X, PV.W,
+; CM-NEXT: MOV T0.Y, PV.X,
+; CM-NEXT: LSHR * T0.W, T11.Y, literal.x,
+; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT: BFE_INT * T0.W, PV.W, 0.0, literal.x,
+; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT: AND_INT T0.Z, T0.Y, literal.x,
+; CM-NEXT: LSHL * T0.W, PV.W, literal.y,
+; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
+; CM-NEXT: OR_INT * T0.W, PV.Z, PV.W,
+; CM-NEXT: MOV T4.X, PV.W,
+; CM-NEXT: MOV T0.Y, T5.X,
+; CM-NEXT: LSHR * T0.W, T11.Y, literal.x,
+; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; CM-NEXT: BFE_INT * T0.W, PV.W, 0.0, literal.x,
+; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT: AND_INT T0.Z, T0.Y, literal.x,
+; CM-NEXT: AND_INT * T0.W, PV.W, literal.y,
+; CM-NEXT: -65536(nan), 65535(9.183409e-41)
+; CM-NEXT: OR_INT * T0.W, PV.Z, PV.W,
+; CM-NEXT: MOV * T5.X, PV.W,
+; CM-NEXT: MOV T0.Y, PV.X,
+; CM-NEXT: ASHR * T0.W, T11.Y, literal.x,
+; CM-NEXT: 24(3.363116e-44), 0(0.000000e+00)
+; CM-NEXT: AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT: LSHL * T0.W, PV.W, literal.y,
+; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
+; CM-NEXT: LSHR T11.X, KC0[2].Y, literal.x,
+; CM-NEXT: OR_INT * T12.W, PV.Z, PV.W,
+; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
+; CM-NEXT: MOV * T5.X, PV.W,
+; CM-NEXT: MOV T12.X, T8.X,
+; CM-NEXT: MOV * T12.Z, T4.X, BS:VEC_120/SCL_212
%load = load <8 x i8>, ptr addrspace(1) %in
%ext = sext <8 x i8> %load to <8 x i16>
store <8 x i16> %ext, ptr addrspace(1) %out
@@ -10547,71 +10898,287 @@ define amdgpu_kernel void @global_zextload_v16i8_to_v16i16(ptr addrspace(1) %out
;
; EG-LABEL: global_zextload_v16i8_to_v16i16:
; EG: ; %bb.0:
-; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[]
-; EG-NEXT: TEX 0 @6
-; EG-NEXT: ALU 19, @9, KC0[CB0:0-32], KC1[]
-; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T9.XYZW, T10.X, 0
-; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T8.XYZW, T7.X, 1
+; EG-NEXT: ALU 1, @10, KC0[CB0:0-32], KC1[]
+; EG-NEXT: TEX 0 @8
+; EG-NEXT: ALU 103, @12, KC0[], KC1[]
+; EG-NEXT: ALU 20, @116, KC0[CB0:0-32], KC1[]
+; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T20.XYZW, T22.X, 0
+; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T19.XYZW, T21.X, 1
; EG-NEXT: CF_END
-; EG-NEXT: Fetch clause starting at 6:
-; EG-NEXT: VTX_READ_128 T7.XYZW, T7.X, 0, #1
-; EG-NEXT: ALU clause starting at 8:
-; EG-NEXT: MOV * T7.X, KC0[2].Z,
-; EG-NEXT: ALU clause starting at 9:
+; EG-NEXT: PAD
+; EG-NEXT: Fetch clause starting at 8:
+; EG-NEXT: VTX_READ_128 T19.XYZW, T19.X, 0, #1
+; EG-NEXT: ALU clause starting at 10:
+; EG-NEXT: MOV * T0.Y, T16.X,
+; EG-NEXT: MOV * T19.X, KC0[2].Z,
+; EG-NEXT: ALU clause starting at 12:
+; EG-NEXT: AND_INT T0.W, T19.X, literal.x,
+; EG-NEXT: AND_INT * T1.W, T0.Y, literal.y,
+; EG-NEXT: 255(3.573311e-43), -65536(nan)
+; EG-NEXT: OR_INT * T0.W, PS, PV.W,
+; EG-NEXT: MOV * T16.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: LSHL * T0.W, T19.X, literal.x,
+; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT: AND_INT * T0.W, PV.W, literal.y,
+; EG-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38)
+; EG-NEXT: OR_INT * T0.W, PV.W, PS,
+; EG-NEXT: MOV T16.X, PV.W,
+; EG-NEXT: MOV T0.Y, T17.X,
; EG-NEXT: MOV * T0.W, literal.x,
; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
-; EG-NEXT: BFE_UINT * T8.W, T7.Y, literal.x, PV.W,
+; EG-NEXT: BFE_UINT T1.W, T19.X, literal.x, PV.W,
+; EG-NEXT: AND_INT * T2.W, PV.Y, literal.y,
+; EG-NEXT: 16(2.242078e-44), -65536(nan)
+; EG-NEXT: OR_INT * T1.W, PS, PV.W,
+; EG-NEXT: MOV * T17.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: LSHR * T1.W, T19.X, literal.x,
; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
-; EG-NEXT: BFE_UINT T8.Y, T7.X, literal.x, T0.W,
-; EG-NEXT: AND_INT T8.Z, T7.Y, literal.y,
-; EG-NEXT: BFE_UINT * T9.W, T7.W, literal.x, T0.W,
-; EG-NEXT: 8(1.121039e-44), 255(3.573311e-43)
-; EG-NEXT: AND_INT T8.X, T7.X, literal.x,
-; EG-NEXT: BFE_UINT T9.Y, T7.Z, literal.y, T0.W,
-; EG-NEXT: LSHR * T7.X, KC0[2].Y, literal.z,
-; EG-NEXT: 255(3.573311e-43), 8(1.121039e-44)
-; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
-; EG-NEXT: AND_INT * T9.Z, T7.W, literal.x,
-; EG-NEXT: 255(3.573311e-43), 0(0.000000e+00)
-; EG-NEXT: AND_INT T9.X, T7.Z, literal.x,
-; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
-; EG-NEXT: 255(3.573311e-43), 16(2.242078e-44)
-; EG-NEXT: LSHR * T10.X, PV.W, literal.x,
+; EG-NEXT: AND_INT T2.W, PV.Y, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.W, literal.y,
+; EG-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38)
+; EG-NEXT: OR_INT * T20.Y, PV.W, PS,
+; EG-NEXT: MOV T17.X, PV.Y,
+; EG-NEXT: MOV * T0.Y, T12.X,
+; EG-NEXT: AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT: AND_INT * T2.W, T19.Y, literal.y,
+; EG-NEXT: -65536(nan), 255(3.573311e-43)
+; EG-NEXT: OR_INT * T1.W, PV.W, PS,
+; EG-NEXT: MOV * T12.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: LSHL * T1.W, T19.Y, literal.x,
+; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT T2.W, PV.Y, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.W, literal.y,
+; EG-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38)
+; EG-NEXT: OR_INT * T1.W, PV.W, PS,
+; EG-NEXT: MOV T12.X, PV.W,
+; EG-NEXT: MOV T0.Y, T13.X,
+; EG-NEXT: BFE_UINT * T1.W, T19.Y, literal.x, T0.W,
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT * T2.W, PV.Y, literal.x,
+; EG-NEXT: -65536(nan), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T1.W, PV.W, T1.W,
+; EG-NEXT: MOV * T13.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: LSHR * T1.W, T19.Y, literal.x,
+; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT T2.W, PV.Y, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.W, literal.y,
+; EG-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38)
+; EG-NEXT: OR_INT * T20.W, PV.W, PS,
+; EG-NEXT: MOV T13.X, PV.W,
+; EG-NEXT: MOV * T0.Y, T8.X,
+; EG-NEXT: AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT: AND_INT * T2.W, T19.Z, literal.y,
+; EG-NEXT: -65536(nan), 255(3.573311e-43)
+; EG-NEXT: OR_INT * T1.W, PV.W, PS,
+; EG-NEXT: MOV * T8.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: LSHL * T1.W, T19.Z, literal.x,
+; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT T2.W, PV.Y, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.W, literal.y,
+; EG-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38)
+; EG-NEXT: OR_INT * T1.W, PV.W, PS,
+; EG-NEXT: MOV T8.X, PV.W,
+; EG-NEXT: MOV T0.Y, T9.X,
+; EG-NEXT: BFE_UINT * T1.W, T19.Z, literal.x, T0.W,
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT * T2.W, PV.Y, literal.x,
+; EG-NEXT: -65536(nan), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T1.W, PV.W, T1.W,
+; EG-NEXT: MOV * T9.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: LSHR * T1.W, T19.Z, literal.x,
+; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT T2.W, PV.Y, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.W, literal.y,
+; EG-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38)
+; EG-NEXT: OR_INT * T19.Y, PV.W, PS,
+; EG-NEXT: MOV T9.X, PV.Y,
+; EG-NEXT: MOV * T0.Y, T4.X,
+; EG-NEXT: AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT: AND_INT * T2.W, T19.W, literal.y,
+; EG-NEXT: -65536(nan), 255(3.573311e-43)
+; EG-NEXT: OR_INT * T1.W, PV.W, PS,
+; EG-NEXT: MOV * T4.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: LSHL * T1.W, T19.W, literal.x,
+; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT T2.W, PV.Y, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.W, literal.y,
+; EG-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38)
+; EG-NEXT: OR_INT * T1.W, PV.W, PS,
+; EG-NEXT: MOV T4.X, PV.W,
+; EG-NEXT: MOV T0.Y, T5.X,
+; EG-NEXT: BFE_UINT * T0.W, T19.W, literal.x, T0.W,
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: ALU clause starting at 116:
+; EG-NEXT: AND_INT * T1.W, T0.Y, literal.x,
+; EG-NEXT: -65536(nan), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T0.W, PV.W, T0.W,
+; EG-NEXT: MOV * T5.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: LSHR T0.W, T19.W, literal.x,
+; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.y,
+; EG-NEXT: 8(1.121039e-44), 16(2.242078e-44)
+; EG-NEXT: LSHR T21.X, PS, literal.x,
+; EG-NEXT: AND_INT T1.W, PV.Y, literal.y,
+; EG-NEXT: AND_INT * T0.W, PV.W, literal.z,
+; EG-NEXT: 2(2.802597e-45), 65535(9.183409e-41)
+; EG-NEXT: 16711680(2.341805e-38), 0(0.000000e+00)
+; EG-NEXT: LSHR T22.X, KC0[2].Y, literal.x,
+; EG-NEXT: OR_INT * T19.W, PV.W, PS,
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
+; EG-NEXT: MOV T5.X, PV.W,
+; EG-NEXT: MOV * T20.X, T16.X,
+; EG-NEXT: MOV * T20.Z, T12.X,
+; EG-NEXT: MOV T19.X, T8.X,
+; EG-NEXT: MOV * T19.Z, T4.X, BS:VEC_120/SCL_212
;
; CM-LABEL: global_zextload_v16i8_to_v16i16:
; CM: ; %bb.0:
-; CM-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[]
-; CM-NEXT: TEX 0 @6
-; CM-NEXT: ALU 19, @9, KC0[CB0:0-32], KC1[]
-; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T9, T7.X
-; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T8, T10.X
+; CM-NEXT: ALU 1, @10, KC0[CB0:0-32], KC1[]
+; CM-NEXT: TEX 0 @8
+; CM-NEXT: ALU 101, @12, KC0[], KC1[]
+; CM-NEXT: ALU 20, @114, KC0[CB0:0-32], KC1[]
+; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T19, T22.X
+; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T20, T21.X
; CM-NEXT: CF_END
-; CM-NEXT: Fetch clause starting at 6:
-; CM-NEXT: VTX_READ_128 T7.XYZW, T7.X, 0, #1
-; CM-NEXT: ALU clause starting at 8:
-; CM-NEXT: MOV * T7.X, KC0[2].Z,
-; CM-NEXT: ALU clause starting at 9:
+; CM-NEXT: PAD
+; CM-NEXT: Fetch clause starting at 8:
+; CM-NEXT: VTX_READ_128 T19.XYZW, T19.X, 0, #1
+; CM-NEXT: ALU clause starting at 10:
+; CM-NEXT: MOV * T0.Y, T16.X,
+; CM-NEXT: MOV * T19.X, KC0[2].Z,
+; CM-NEXT: ALU clause starting at 12:
+; CM-NEXT: AND_INT T0.Z, T19.X, literal.x,
+; CM-NEXT: AND_INT * T0.W, T0.Y, literal.y,
+; CM-NEXT: 255(3.573311e-43), -65536(nan)
+; CM-NEXT: OR_INT * T0.W, PV.W, PV.Z,
+; CM-NEXT: MOV * T16.X, PV.W,
+; CM-NEXT: MOV T0.Y, PV.X,
+; CM-NEXT: LSHL * T0.W, T19.X, literal.x,
+; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT: AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT: AND_INT * T0.W, PV.W, literal.y,
+; CM-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38)
+; CM-NEXT: OR_INT * T0.W, PV.Z, PV.W,
+; CM-NEXT: MOV T16.X, PV.W,
+; CM-NEXT: MOV T0.Y, T17.X,
; CM-NEXT: MOV * T0.W, literal.x,
; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
-; CM-NEXT: BFE_UINT * T8.W, T7.W, literal.x, PV.W,
+; CM-NEXT: AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT: BFE_UINT * T1.W, T19.X, literal.y, PV.W,
+; CM-NEXT: -65536(nan), 16(2.242078e-44)
+; CM-NEXT: OR_INT * T1.W, PV.Z, PV.W,
+; CM-NEXT: MOV * T17.X, PV.W,
+; CM-NEXT: MOV T0.Y, PV.X,
+; CM-NEXT: LSHR * T1.W, T19.X, literal.x,
+; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT: AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT: AND_INT * T1.W, PV.W, literal.y,
+; CM-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38)
+; CM-NEXT: OR_INT * T20.Y, PV.Z, PV.W,
+; CM-NEXT: MOV T17.X, PV.Y,
+; CM-NEXT: MOV * T0.Y, T12.X,
+; CM-NEXT: AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT: AND_INT * T1.W, T19.Y, literal.y,
+; CM-NEXT: -65536(nan), 255(3.573311e-43)
+; CM-NEXT: OR_INT * T1.W, PV.Z, PV.W,
+; CM-NEXT: MOV * T12.X, PV.W,
+; CM-NEXT: MOV T0.Y, PV.X,
+; CM-NEXT: LSHL * T1.W, T19.Y, literal.x,
; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
-; CM-NEXT: BFE_UINT T8.Y, T7.Z, literal.x, T0.W,
-; CM-NEXT: AND_INT T8.Z, T7.W, literal.y,
-; CM-NEXT: BFE_UINT * T9.W, T7.Y, literal.x, T0.W,
-; CM-NEXT: 8(1.121039e-44), 255(3.573311e-43)
-; CM-NEXT: AND_INT T8.X, T7.Z, literal.x,
-; CM-NEXT: BFE_UINT T9.Y, T7.X, literal.y, T0.W,
-; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.z,
-; CM-NEXT: 255(3.573311e-43), 8(1.121039e-44)
+; CM-NEXT: AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT: AND_INT * T1.W, PV.W, literal.y,
+; CM-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38)
+; CM-NEXT: OR_INT * T1.W, PV.Z, PV.W,
+; CM-NEXT: MOV T12.X, PV.W,
+; CM-NEXT: MOV * T0.Y, T13.X,
+; CM-NEXT: AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT: BFE_UINT * T1.W, T19.Y, literal.y, T0.W,
+; CM-NEXT: -65536(nan), 16(2.242078e-44)
+; CM-NEXT: OR_INT * T1.W, PV.Z, PV.W,
+; CM-NEXT: MOV * T13.X, PV.W,
+; CM-NEXT: MOV T0.Y, PV.X,
+; CM-NEXT: LSHR * T1.W, T19.Y, literal.x,
+; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT: AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT: AND_INT * T1.W, PV.W, literal.y,
+; CM-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38)
+; CM-NEXT: OR_INT * T20.W, PV.Z, PV.W,
+; CM-NEXT: MOV T13.X, PV.W,
+; CM-NEXT: MOV * T0.Y, T8.X,
+; CM-NEXT: AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT: AND_INT * T1.W, T19.Z, literal.y,
+; CM-NEXT: -65536(nan), 255(3.573311e-43)
+; CM-NEXT: OR_INT * T1.W, PV.Z, PV.W,
+; CM-NEXT: MOV * T8.X, PV.W,
+; CM-NEXT: MOV T0.Y, PV.X,
+; CM-NEXT: LSHL * T1.W, T19.Z, literal.x,
+; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT: AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT: AND_INT * T1.W, PV.W, literal.y,
+; CM-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38)
+; CM-NEXT: OR_INT * T1.W, PV.Z, PV.W,
+; CM-NEXT: MOV T8.X, PV.W,
+; CM-NEXT: MOV * T0.Y, T9.X,
+; CM-NEXT: AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT: BFE_UINT * T1.W, T19.Z, literal.y, T0.W,
+; CM-NEXT: -65536(nan), 16(2.242078e-44)
+; CM-NEXT: OR_INT * T1.W, PV.Z, PV.W,
+; CM-NEXT: MOV * T9.X, PV.W,
+; CM-NEXT: MOV T0.Y, PV.X,
+; CM-NEXT: LSHR * T1.W, T19.Z, literal.x,
+; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT: AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT: AND_INT * T1.W, PV.W, literal.y,
+; CM-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38)
+; CM-NEXT: OR_INT * T19.Y, PV.Z, PV.W,
+; CM-NEXT: MOV T9.X, PV.Y,
+; CM-NEXT: MOV * T0.Y, T4.X,
+; CM-NEXT: AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT: AND_INT * T1.W, T19.W, literal.y,
+; CM-NEXT: -65536(nan), 255(3.573311e-43)
+; CM-NEXT: OR_INT * T1.W, PV.Z, PV.W,
+; CM-NEXT: MOV * T4.X, PV.W,
+; CM-NEXT: MOV T0.Y, PV.X,
+; CM-NEXT: LSHL * T1.W, T19.W, literal.x,
+; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT: AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT: AND_INT * T1.W, PV.W, literal.y,
+; CM-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38)
+; CM-NEXT: OR_INT * T1.W, PV.Z, PV.W,
+; CM-NEXT: MOV T4.X, PV.W,
+; CM-NEXT: MOV * T0.Y, T5.X,
+; CM-NEXT: AND_INT * T0.Z, PV.Y, literal.x,
+; CM-NEXT: -65536(nan), 0(0.000000e+00)
+; CM-NEXT: ALU clause starting at 114:
+; CM-NEXT: BFE_UINT * T0.W, T19.W, literal.x, T0.W,
; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00)
-; CM-NEXT: LSHR T10.X, PV.W, literal.x,
-; CM-NEXT: AND_INT * T9.Z, T7.Y, literal.y,
-; CM-NEXT: 2(2.802597e-45), 255(3.573311e-43)
-; CM-NEXT: AND_INT * T9.X, T7.X, literal.x,
-; CM-NEXT: 255(3.573311e-43), 0(0.000000e+00)
-; CM-NEXT: LSHR * T7.X, KC0[2].Y, literal.x,
+; CM-NEXT: OR_INT * T0.W, T0.Z, PV.W,
+; CM-NEXT: MOV * T5.X, PV.W,
+; CM-NEXT: MOV T0.Y, PV.X,
+; CM-NEXT: LSHR * T0.W, T19.W, literal.x,
+; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT: LSHR T21.X, KC0[2].Y, literal.x,
+; CM-NEXT: AND_INT T0.Y, PV.Y, literal.y,
+; CM-NEXT: AND_INT T0.Z, PV.W, literal.z,
+; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.w,
+; CM-NEXT: 2(2.802597e-45), 65535(9.183409e-41)
+; CM-NEXT: 16711680(2.341805e-38), 16(2.242078e-44)
+; CM-NEXT: LSHR T22.X, PV.W, literal.x,
+; CM-NEXT: OR_INT * T19.W, PV.Y, PV.Z,
; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
+; CM-NEXT: MOV * T5.X, PV.W,
+; CM-NEXT: MOV T20.X, T16.X,
+; CM-NEXT: MOV * T20.Z, T12.X, BS:VEC_120/SCL_212
+; CM-NEXT: MOV T19.X, T8.X,
+; CM-NEXT: MOV * T19.Z, T4.X, BS:VEC_120/SCL_212
%load = load <16 x i8>, ptr addrspace(1) %in
%ext = zext <16 x i8> %load to <16 x i16>
store <16 x i16> %ext, ptr addrspace(1) %out
@@ -10844,72 +11411,343 @@ define amdgpu_kernel void @global_sextload_v16i8_to_v16i16(ptr addrspace(1) %out
;
; EG-LABEL: global_sextload_v16i8_to_v16i16:
; EG: ; %bb.0:
-; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[]
-; EG-NEXT: TEX 0 @6
-; EG-NEXT: ALU 20, @9, KC0[CB0:0-32], KC1[]
-; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T9.XYZW, T10.X, 0
-; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T8.XYZW, T7.X, 1
+; EG-NEXT: ALU 1, @10, KC0[CB0:0-32], KC1[]
+; EG-NEXT: TEX 0 @8
+; EG-NEXT: ALU 104, @12, KC0[], KC1[]
+; EG-NEXT: ALU 46, @117, KC0[CB0:0-32], KC1[]
+; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T20.XYZW, T22.X, 0
+; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T19.XYZW, T21.X, 1
; EG-NEXT: CF_END
-; EG-NEXT: Fetch clause starting at 6:
-; EG-NEXT: VTX_READ_128 T7.XYZW, T7.X, 0, #1
-; EG-NEXT: ALU clause starting at 8:
-; EG-NEXT: MOV * T7.X, KC0[2].Z,
-; EG-NEXT: ALU clause starting at 9:
-; EG-NEXT: BFE_INT * T8.Z, T7.Y, 0.0, literal.x,
+; EG-NEXT: PAD
+; EG-NEXT: Fetch clause starting at 8:
+; EG-NEXT: VTX_READ_128 T19.XYZW, T19.X, 0, #1
+; EG-NEXT: ALU clause starting at 10:
+; EG-NEXT: MOV * T0.Y, T16.X,
+; EG-NEXT: MOV * T19.X, KC0[2].Z,
+; EG-NEXT: ALU clause starting at 12:
+; EG-NEXT: BFE_INT * T0.W, T19.X, 0.0, literal.x,
; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
-; EG-NEXT: BFE_INT T8.X, T7.X, 0.0, literal.x,
-; EG-NEXT: BFE_INT T9.Z, T7.W, 0.0, literal.x,
-; EG-NEXT: LSHR * T0.W, T7.Y, literal.x,
+; EG-NEXT: AND_INT T0.W, PV.W, literal.x,
+; EG-NEXT: AND_INT * T1.W, T0.Y, literal.y,
+; EG-NEXT: 65535(9.183409e-41), -65536(nan)
+; EG-NEXT: OR_INT * T0.W, PS, PV.W,
+; EG-NEXT: MOV * T16.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: LSHR * T0.W, T19.X, literal.x,
; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
-; EG-NEXT: BFE_INT T9.X, T7.Z, 0.0, literal.x,
-; EG-NEXT: LSHR T0.Z, T7.W, literal.x,
-; EG-NEXT: BFE_INT T8.W, PV.W, 0.0, literal.x,
-; EG-NEXT: LSHR * T0.W, T7.X, literal.x,
+; EG-NEXT: BFE_INT T0.W, PV.W, 0.0, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.Y, literal.y,
+; EG-NEXT: 8(1.121039e-44), 65535(9.183409e-41)
+; EG-NEXT: LSHL * T0.W, PV.W, literal.x,
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT: MOV T16.X, PV.W,
+; EG-NEXT: MOV T0.Y, T17.X,
+; EG-NEXT: LSHR * T0.W, T19.X, literal.x, BS:VEC_120/SCL_212
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: BFE_INT T0.W, PV.W, 0.0, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.Y, literal.y,
+; EG-NEXT: 8(1.121039e-44), -65536(nan)
+; EG-NEXT: AND_INT * T0.W, PV.W, literal.x,
+; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT: MOV * T17.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: ASHR * T0.W, T19.X, literal.x,
+; EG-NEXT: 24(3.363116e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT: LSHL * T0.W, PV.W, literal.y,
+; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
+; EG-NEXT: OR_INT * T20.Y, PV.W, PS,
+; EG-NEXT: MOV T17.X, PV.Y,
+; EG-NEXT: MOV T0.Y, T12.X,
+; EG-NEXT: BFE_INT * T0.W, T19.Y, 0.0, literal.x,
; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
-; EG-NEXT: LSHR T7.X, KC0[2].Y, literal.x,
-; EG-NEXT: BFE_INT T8.Y, PS, 0.0, literal.y,
-; EG-NEXT: LSHR T1.Z, T7.Z, literal.y,
-; EG-NEXT: BFE_INT T9.W, PV.Z, 0.0, literal.y,
-; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.z,
-; EG-NEXT: 2(2.802597e-45), 8(1.121039e-44)
+; EG-NEXT: AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT: AND_INT * T0.W, PV.W, literal.y,
+; EG-NEXT: -65536(nan), 65535(9.183409e-41)
+; EG-NEXT: OR_INT * T0.W, PV.W, PS,
+; EG-NEXT: MOV * T12.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: LSHR * T0.W, T19.Y, literal.x,
+; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT: BFE_INT T0.W, PV.W, 0.0, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.Y, literal.y,
+; EG-NEXT: 8(1.121039e-44), 65535(9.183409e-41)
+; EG-NEXT: LSHL * T0.W, PV.W, literal.x,
; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
-; EG-NEXT: LSHR T10.X, PS, literal.x,
-; EG-NEXT: BFE_INT * T9.Y, PV.Z, 0.0, literal.y,
-; EG-NEXT: 2(2.802597e-45), 8(1.121039e-44)
+; EG-NEXT: OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT: MOV T12.X, PV.W,
+; EG-NEXT: MOV T0.Y, T13.X,
+; EG-NEXT: LSHR * T0.W, T19.Y, literal.x,
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: BFE_INT T0.W, PV.W, 0.0, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.Y, literal.y,
+; EG-NEXT: 8(1.121039e-44), -65536(nan)
+; EG-NEXT: AND_INT * T0.W, PV.W, literal.x,
+; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT: MOV * T13.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: ASHR * T0.W, T19.Y, literal.x,
+; EG-NEXT: 24(3.363116e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT: LSHL * T0.W, PV.W, literal.y,
+; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
+; EG-NEXT: OR_INT * T20.W, PV.W, PS,
+; EG-NEXT: MOV T13.X, PV.W,
+; EG-NEXT: MOV T0.Y, T8.X,
+; EG-NEXT: BFE_INT * T0.W, T19.Z, 0.0, literal.x,
+; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT: AND_INT * T0.W, PV.W, literal.y,
+; EG-NEXT: -65536(nan), 65535(9.183409e-41)
+; EG-NEXT: OR_INT * T0.W, PV.W, PS,
+; EG-NEXT: MOV * T8.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: LSHR * T0.W, T19.Z, literal.x,
+; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT: BFE_INT T0.W, PV.W, 0.0, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.Y, literal.y,
+; EG-NEXT: 8(1.121039e-44), 65535(9.183409e-41)
+; EG-NEXT: LSHL * T0.W, PV.W, literal.x,
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT: MOV T8.X, PV.W,
+; EG-NEXT: MOV T0.Y, T9.X,
+; EG-NEXT: LSHR * T0.W, T19.Z, literal.x,
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: BFE_INT T0.W, PV.W, 0.0, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.Y, literal.y,
+; EG-NEXT: 8(1.121039e-44), -65536(nan)
+; EG-NEXT: AND_INT * T0.W, PV.W, literal.x,
+; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT: MOV * T9.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: ASHR * T0.W, T19.Z, literal.x,
+; EG-NEXT: 24(3.363116e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT: LSHL * T0.W, PV.W, literal.y,
+; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
+; EG-NEXT: ALU clause starting at 117:
+; EG-NEXT: OR_INT * T19.Y, T1.W, T0.W,
+; EG-NEXT: MOV T9.X, PV.Y,
+; EG-NEXT: MOV T0.Y, T4.X,
+; EG-NEXT: BFE_INT * T0.W, T19.W, 0.0, literal.x,
+; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT: AND_INT * T0.W, PV.W, literal.y,
+; EG-NEXT: -65536(nan), 65535(9.183409e-41)
+; EG-NEXT: OR_INT * T0.W, PV.W, PS,
+; EG-NEXT: MOV * T4.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: LSHR * T0.W, T19.W, literal.x,
+; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT: BFE_INT T0.W, PV.W, 0.0, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.Y, literal.y,
+; EG-NEXT: 8(1.121039e-44), 65535(9.183409e-41)
+; EG-NEXT: LSHL * T0.W, PV.W, literal.x,
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT: MOV T4.X, PV.W,
+; EG-NEXT: MOV T0.Y, T5.X,
+; EG-NEXT: LSHR * T0.W, T19.W, literal.x,
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: BFE_INT T0.W, PV.W, 0.0, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.Y, literal.y,
+; EG-NEXT: 8(1.121039e-44), -65536(nan)
+; EG-NEXT: AND_INT * T0.W, PV.W, literal.x,
+; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT: MOV * T5.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: ASHR T0.W, T19.W, literal.x,
+; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.y,
+; EG-NEXT: 24(3.363116e-44), 16(2.242078e-44)
+; EG-NEXT: LSHR T21.X, PS, literal.x,
+; EG-NEXT: AND_INT T1.W, PV.Y, literal.y,
+; EG-NEXT: LSHL * T0.W, PV.W, literal.z,
+; EG-NEXT: 2(2.802597e-45), 65535(9.183409e-41)
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: LSHR T22.X, KC0[2].Y, literal.x,
+; EG-NEXT: OR_INT * T19.W, PV.W, PS,
+; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
+; EG-NEXT: MOV T5.X, PV.W,
+; EG-NEXT: MOV * T20.X, T16.X,
+; EG-NEXT: MOV * T20.Z, T12.X,
+; EG-NEXT: MOV T19.X, T8.X,
+; EG-NEXT: MOV * T19.Z, T4.X, BS:VEC_120/SCL_212
;
; CM-LABEL: global_sextload_v16i8_to_v16i16:
; CM: ; %bb.0:
-; CM-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[]
-; CM-NEXT: TEX 0 @6
-; CM-NEXT: ALU 19, @9, KC0[CB0:0-32], KC1[]
-; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T9, T7.X
-; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T8, T10.X
+; CM-NEXT: ALU 1, @10, KC0[CB0:0-32], KC1[]
+; CM-NEXT: TEX 0 @8
+; CM-NEXT: ALU 104, @12, KC0[], KC1[]
+; CM-NEXT: ALU 46, @117, KC0[CB0:0-32], KC1[]
+; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T19, T22.X
+; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T20, T21.X
; CM-NEXT: CF_END
-; CM-NEXT: Fetch clause starting at 6:
-; CM-NEXT: VTX_READ_128 T7.XYZW, T7.X, 0, #1
-; CM-NEXT: ALU clause starting at 8:
-; CM-NEXT: MOV * T7.X, KC0[2].Z,
-; CM-NEXT: ALU clause starting at 9:
-; CM-NEXT: BFE_INT * T8.Z, T7.W, 0.0, literal.x,
+; CM-NEXT: PAD
+; CM-NEXT: Fetch clause starting at 8:
+; CM-NEXT: VTX_READ_128 T19.XYZW, T19.X, 0, #1
+; CM-NEXT: ALU clause starting at 10:
+; CM-NEXT: MOV * T0.Y, T16.X,
+; CM-NEXT: MOV * T19.X, KC0[2].Z,
+; CM-NEXT: ALU clause starting at 12:
+; CM-NEXT: BFE_INT * T0.W, T19.X, 0.0, literal.x,
; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
-; CM-NEXT: BFE_INT T8.X, T7.Z, 0.0, literal.x,
-; CM-NEXT: LSHR T0.Y, T7.Y, literal.x,
-; CM-NEXT: BFE_INT T9.Z, T7.Y, 0.0, literal.x,
-; CM-NEXT: LSHR * T0.W, T7.W, literal.x,
+; CM-NEXT: AND_INT T0.Z, PV.W, literal.x,
+; CM-NEXT: AND_INT * T0.W, T0.Y, literal.y,
+; CM-NEXT: 65535(9.183409e-41), -65536(nan)
+; CM-NEXT: OR_INT * T0.W, PV.W, PV.Z,
+; CM-NEXT: MOV * T16.X, PV.W,
+; CM-NEXT: MOV T0.Y, PV.X,
+; CM-NEXT: LSHR * T0.W, T19.X, literal.x,
; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
-; CM-NEXT: BFE_INT T9.X, T7.X, 0.0, literal.x,
-; CM-NEXT: LSHR T1.Y, T7.Z, literal.x,
-; CM-NEXT: ADD_INT T0.Z, KC0[2].Y, literal.y,
-; CM-NEXT: BFE_INT * T8.W, PV.W, 0.0, literal.x,
-; CM-NEXT: 8(1.121039e-44), 16(2.242078e-44)
-; CM-NEXT: LSHR T10.X, PV.Z, literal.x,
-; CM-NEXT: BFE_INT T8.Y, PV.Y, 0.0, literal.y,
-; CM-NEXT: LSHR T0.Z, T7.X, literal.y,
-; CM-NEXT: BFE_INT * T9.W, T0.Y, 0.0, literal.y,
-; CM-NEXT: 2(2.802597e-45), 8(1.121039e-44)
-; CM-NEXT: LSHR T7.X, KC0[2].Y, literal.x,
-; CM-NEXT: BFE_INT * T9.Y, PV.Z, 0.0, literal.y,
-; CM-NEXT: 2(2.802597e-45), 8(1.121039e-44)
+; CM-NEXT: BFE_INT * T0.W, PV.W, 0.0, literal.x,
+; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT: AND_INT T0.Z, T0.Y, literal.x,
+; CM-NEXT: LSHL * T0.W, PV.W, literal.y,
+; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
+; CM-NEXT: OR_INT * T0.W, PV.Z, PV.W,
+; CM-NEXT: MOV T16.X, PV.W,
+; CM-NEXT: MOV T0.Y, T17.X,
+; CM-NEXT: LSHR * T0.W, T19.X, literal.x, BS:VEC_120/SCL_212
+; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; CM-NEXT: BFE_INT * T0.W, PV.W, 0.0, literal.x,
+; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT: AND_INT T0.Z, T0.Y, literal.x,
+; CM-NEXT: AND_INT * T0.W, PV.W, literal.y,
+; CM-NEXT: -65536(nan), 65535(9.183409e-41)
+; CM-NEXT: OR_INT * T0.W, PV.Z, PV.W,
+; CM-NEXT: MOV * T17.X, PV.W,
+; CM-NEXT: MOV T0.Y, PV.X,
+; CM-NEXT: ASHR * T0.W, T19.X, literal.x,
+; CM-NEXT: 24(3.363116e-44), 0(0.000000e+00)
+; CM-NEXT: AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT: LSHL * T0.W, PV.W, literal.y,
+; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
+; CM-NEXT: OR_INT * T20.Y, PV.Z, PV.W,
+; CM-NEXT: MOV T17.X, PV.Y,
+; CM-NEXT: MOV T0.Y, T12.X,
+; CM-NEXT: BFE_INT * T0.W, T19.Y, 0.0, literal.x,
+; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT: AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT: AND_INT * T0.W, PV.W, literal.y,
+; CM-NEXT: -65536(nan), 65535(9.183409e-41)
+; CM-NEXT: OR_INT * T0.W, PV.Z, PV.W,
+; CM-NEXT: MOV * T12.X, PV.W,
+; CM-NEXT: MOV T0.Y, PV.X,
+; CM-NEXT: LSHR * T0.W, T19.Y, literal.x,
+; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT: BFE_INT * T0.W, PV.W, 0.0, literal.x,
+; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT: AND_INT T0.Z, T0.Y, literal.x,
+; CM-NEXT: LSHL * T0.W, PV.W, literal.y,
+; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
+; CM-NEXT: OR_INT * T0.W, PV.Z, PV.W,
+; CM-NEXT: MOV T12.X, PV.W,
+; CM-NEXT: MOV T0.Y, T13.X,
+; CM-NEXT: LSHR * T0.W, T19.Y, literal.x,
+; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; CM-NEXT: BFE_INT * T0.W, PV.W, 0.0, literal.x,
+; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT: AND_INT T0.Z, T0.Y, literal.x,
+; CM-NEXT: AND_INT * T0.W, PV.W, literal.y,
+; CM-NEXT: -65536(nan), 65535(9.183409e-41)
+; CM-NEXT: OR_INT * T0.W, PV.Z, PV.W,
+; CM-NEXT: MOV * T13.X, PV.W,
+; CM-NEXT: MOV T0.Y, PV.X,
+; CM-NEXT: ASHR * T0.W, T19.Y, literal.x,
+; CM-NEXT: 24(3.363116e-44), 0(0.000000e+00)
+; CM-NEXT: AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT: LSHL * T0.W, PV.W, literal.y,
+; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
+; CM-NEXT: OR_INT * T20.W, PV.Z, PV.W,
+; CM-NEXT: MOV T13.X, PV.W,
+; CM-NEXT: MOV T0.Y, T8.X,
+; CM-NEXT: BFE_INT * T0.W, T19.Z, 0.0, literal.x,
+; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT: AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT: AND_INT * T0.W, PV.W, literal.y,
+; CM-NEXT: -65536(nan), 65535(9.183409e-41)
+; CM-NEXT: OR_INT * T0.W, PV.Z, PV.W,
+; CM-NEXT: MOV * T8.X, PV.W,
+; CM-NEXT: MOV T0.Y, PV.X,
+; CM-NEXT: LSHR * T0.W, T19.Z, literal.x,
+; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT: BFE_INT * T0.W, PV.W, 0.0, literal.x,
+; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT: AND_INT T0.Z, T0.Y, literal.x,
+; CM-NEXT: LSHL * T0.W, PV.W, literal.y,
+; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
+; CM-NEXT: OR_INT * T0.W, PV.Z, PV.W,
+; CM-NEXT: MOV T8.X, PV.W,
+; CM-NEXT: MOV T0.Y, T9.X,
+; CM-NEXT: LSHR * T0.W, T19.Z, literal.x,
+; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; CM-NEXT: BFE_INT * T0.W, PV.W, 0.0, literal.x,
+; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT: AND_INT T0.Z, T0.Y, literal.x,
+; CM-NEXT: AND_INT * T0.W, PV.W, literal.y,
+; CM-NEXT: -65536(nan), 65535(9.183409e-41)
+; CM-NEXT: OR_INT * T0.W, PV.Z, PV.W,
+; CM-NEXT: MOV * T9.X, PV.W,
+; CM-NEXT: MOV T0.Y, PV.X,
+; CM-NEXT: ASHR * T0.W, T19.Z, literal.x,
+; CM-NEXT: 24(3.363116e-44), 0(0.000000e+00)
+; CM-NEXT: AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT: LSHL * T0.W, PV.W, literal.y,
+; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
+; CM-NEXT: ALU clause starting at 117:
+; CM-NEXT: OR_INT * T19.Y, T0.Z, T0.W,
+; CM-NEXT: MOV T9.X, PV.Y,
+; CM-NEXT: MOV T0.Y, T4.X,
+; CM-NEXT: BFE_INT * T0.W, T19.W, 0.0, literal.x,
+; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT: AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT: AND_INT * T0.W, PV.W, literal.y,
+; CM-NEXT: -65536(nan), 65535(9.183409e-41)
+; CM-NEXT: OR_INT * T0.W, PV.Z, PV.W,
+; CM-NEXT: MOV * T4.X, PV.W,
+; CM-NEXT: MOV T0.Y, PV.X,
+; CM-NEXT: LSHR * T0.W, T19.W, literal.x,
+; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT: BFE_INT * T0.W, PV.W, 0.0, literal.x,
+; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT: AND_INT T0.Z, T0.Y, literal.x,
+; CM-NEXT: LSHL * T0.W, PV.W, literal.y,
+; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
+; CM-NEXT: OR_INT * T0.W, PV.Z, PV.W,
+; CM-NEXT: MOV T4.X, PV.W,
+; CM-NEXT: MOV T0.Y, T5.X,
+; CM-NEXT: LSHR * T0.W, T19.W, literal.x,
+; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; CM-NEXT: BFE_INT * T0.W, PV.W, 0.0, literal.x,
+; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT: AND_INT T0.Z, T0.Y, literal.x,
+; CM-NEXT: AND_INT * T0.W, PV.W, literal.y,
+; CM-NEXT: -65536(nan), 65535(9.183409e-41)
+; CM-NEXT: OR_INT * T0.W, PV.Z, PV.W,
+; CM-NEXT: MOV * T5.X, PV.W,
+; CM-NEXT: MOV T0.Y, PV.X,
+; CM-NEXT: ASHR * T0.W, T19.W, literal.x,
+; CM-NEXT: 24(3.363116e-44), 0(0.000000e+00)
+; CM-NEXT: LSHR T21.X, KC0[2].Y, literal.x,
+; CM-NEXT: AND_INT T0.Y, PV.Y, literal.y,
+; CM-NEXT: LSHL T0.Z, PV.W, literal.z,
+; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.z,
+; CM-NEXT: 2(2.802597e-45), 65535(9.183409e-41)
+; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; CM-NEXT: LSHR T22.X, PV.W, literal.x,
+; CM-NEXT: OR_INT * T19.W, PV.Y, PV.Z,
+; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
+; CM-NEXT: MOV * T5.X, PV.W,
+; CM-NEXT: MOV T20.X, T16.X,
+; CM-NEXT: MOV * T20.Z, T12.X, BS:VEC_120/SCL_212
+; CM-NEXT: MOV T19.X, T8.X,
+; CM-NEXT: MOV * T19.Z, T4.X, BS:VEC_120/SCL_212
%load = load <16 x i8>, ptr addrspace(1) %in
%ext = sext <16 x i8> %load to <16 x i16>
store <16 x i16> %ext, ptr addrspace(1) %out
@@ -11181,115 +12019,543 @@ define amdgpu_kernel void @global_zextload_v32i8_to_v32i16(ptr addrspace(1) %out
;
; EG-LABEL: global_zextload_v32i8_to_v32i16:
; EG: ; %bb.0:
-; EG-NEXT: ALU 0, @12, KC0[CB0:0-32], KC1[]
-; EG-NEXT: TEX 1 @8
-; EG-NEXT: ALU 37, @13, KC0[CB0:0-32], KC1[]
-; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T17.XYZW, T18.X, 0
-; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T15.XYZW, T12.X, 0
-; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T14.XYZW, T16.X, 0
-; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T13.XYZW, T11.X, 1
+; EG-NEXT: ALU 1, @14, KC0[CB0:0-32], KC1[]
+; EG-NEXT: TEX 1 @10
+; EG-NEXT: ALU 103, @16, KC0[], KC1[]
+; EG-NEXT: ALU 104, @120, KC0[], KC1[]
+; EG-NEXT: ALU 41, @225, KC0[CB0:0-32], KC1[]
+; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T36.XYZW, T42.X, 0
+; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T37.XYZW, T41.X, 0
+; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T38.XYZW, T40.X, 0
+; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T35.XYZW, T39.X, 1
; EG-NEXT: CF_END
-; EG-NEXT: Fetch clause starting at 8:
-; EG-NEXT: VTX_READ_128 T12.XYZW, T11.X, 16, #1
-; EG-NEXT: VTX_READ_128 T11.XYZW, T11.X, 0, #1
-; EG-NEXT: ALU clause starting at 12:
-; EG-NEXT: MOV * T11.X, KC0[2].Z,
-; EG-NEXT: ALU clause starting at 13:
+; EG-NEXT: Fetch clause starting at 10:
+; EG-NEXT: VTX_READ_128 T37.XYZW, T35.X, 16, #1
+; EG-NEXT: VTX_READ_128 T35.XYZW, T35.X, 0, #1
+; EG-NEXT: ALU clause starting at 14:
+; EG-NEXT: MOV * T0.Y, T16.X,
+; EG-NEXT: MOV * T35.X, KC0[2].Z,
+; EG-NEXT: ALU clause starting at 16:
+; EG-NEXT: AND_INT T0.W, T37.X, literal.x,
+; EG-NEXT: AND_INT * T1.W, T0.Y, literal.y,
+; EG-NEXT: 255(3.573311e-43), -65536(nan)
+; EG-NEXT: OR_INT * T0.W, PS, PV.W,
+; EG-NEXT: MOV * T16.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: LSHL * T0.W, T37.X, literal.x,
+; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT: AND_INT * T0.W, PV.W, literal.y,
+; EG-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38)
+; EG-NEXT: OR_INT * T0.W, PV.W, PS,
+; EG-NEXT: MOV T16.X, PV.W,
+; EG-NEXT: MOV T0.Y, T17.X,
; EG-NEXT: MOV * T0.W, literal.x,
; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
-; EG-NEXT: BFE_UINT * T13.W, T11.Y, literal.x, PV.W,
+; EG-NEXT: BFE_UINT T1.W, T37.X, literal.x, PV.W,
+; EG-NEXT: AND_INT * T2.W, PV.Y, literal.y,
+; EG-NEXT: 16(2.242078e-44), -65536(nan)
+; EG-NEXT: OR_INT * T1.W, PS, PV.W,
+; EG-NEXT: MOV * T17.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: LSHR * T1.W, T37.X, literal.x,
; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
-; EG-NEXT: BFE_UINT T13.Y, T11.X, literal.x, T0.W,
-; EG-NEXT: AND_INT T13.Z, T11.Y, literal.y,
-; EG-NEXT: BFE_UINT * T14.W, T11.W, literal.x, T0.W,
-; EG-NEXT: 8(1.121039e-44), 255(3.573311e-43)
-; EG-NEXT: AND_INT T13.X, T11.X, literal.x,
-; EG-NEXT: BFE_UINT T14.Y, T11.Z, literal.y, T0.W,
-; EG-NEXT: LSHR * T11.X, KC0[2].Y, literal.z,
-; EG-NEXT: 255(3.573311e-43), 8(1.121039e-44)
-; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
-; EG-NEXT: AND_INT T14.Z, T11.W, literal.x,
-; EG-NEXT: BFE_UINT * T15.W, T12.Y, literal.y, T0.W,
-; EG-NEXT: 255(3.573311e-43), 8(1.121039e-44)
-; EG-NEXT: AND_INT T14.X, T11.Z, literal.x,
-; EG-NEXT: BFE_UINT T15.Y, T12.X, literal.y, T0.W,
-; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.z,
-; EG-NEXT: 255(3.573311e-43), 8(1.121039e-44)
+; EG-NEXT: AND_INT T2.W, PV.Y, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.W, literal.y,
+; EG-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38)
+; EG-NEXT: OR_INT * T36.Y, PV.W, PS,
+; EG-NEXT: MOV T17.X, PV.Y,
+; EG-NEXT: MOV * T0.Y, T12.X,
+; EG-NEXT: AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT: AND_INT * T2.W, T37.Y, literal.y,
+; EG-NEXT: -65536(nan), 255(3.573311e-43)
+; EG-NEXT: OR_INT * T1.W, PV.W, PS,
+; EG-NEXT: MOV * T12.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: LSHL * T1.W, T37.Y, literal.x,
+; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT T2.W, PV.Y, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.W, literal.y,
+; EG-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38)
+; EG-NEXT: OR_INT * T1.W, PV.W, PS,
+; EG-NEXT: MOV T12.X, PV.W,
+; EG-NEXT: MOV T0.Y, T13.X,
+; EG-NEXT: BFE_UINT * T1.W, T37.Y, literal.x, T0.W,
; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
-; EG-NEXT: LSHR T16.X, PV.W, literal.x,
-; EG-NEXT: AND_INT T15.Z, T12.Y, literal.y,
-; EG-NEXT: BFE_UINT T17.W, T12.W, literal.z, T0.W,
-; EG-NEXT: AND_INT * T15.X, T12.X, literal.y,
-; EG-NEXT: 2(2.802597e-45), 255(3.573311e-43)
+; EG-NEXT: AND_INT * T2.W, PV.Y, literal.x,
+; EG-NEXT: -65536(nan), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T1.W, PV.W, T1.W,
+; EG-NEXT: MOV * T13.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: LSHR * T1.W, T37.Y, literal.x,
; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
-; EG-NEXT: BFE_UINT T17.Y, T12.Z, literal.x, T0.W,
-; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
-; EG-NEXT: 8(1.121039e-44), 32(4.484155e-44)
-; EG-NEXT: LSHR T12.X, PV.W, literal.x,
-; EG-NEXT: AND_INT T17.Z, T12.W, literal.y,
-; EG-NEXT: AND_INT * T17.X, T12.Z, literal.y,
-; EG-NEXT: 2(2.802597e-45), 255(3.573311e-43)
+; EG-NEXT: AND_INT T2.W, PV.Y, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.W, literal.y,
+; EG-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38)
+; EG-NEXT: OR_INT * T36.W, PV.W, PS,
+; EG-NEXT: MOV T13.X, PV.W,
+; EG-NEXT: MOV * T0.Y, T8.X,
+; EG-NEXT: AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT: AND_INT * T2.W, T37.Z, literal.y,
+; EG-NEXT: -65536(nan), 255(3.573311e-43)
+; EG-NEXT: OR_INT * T1.W, PV.W, PS,
+; EG-NEXT: MOV * T8.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: LSHL * T1.W, T37.Z, literal.x,
+; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT T2.W, PV.Y, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.W, literal.y,
+; EG-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38)
+; EG-NEXT: OR_INT * T1.W, PV.W, PS,
+; EG-NEXT: MOV T8.X, PV.W,
+; EG-NEXT: MOV T0.Y, T9.X,
+; EG-NEXT: BFE_UINT * T1.W, T37.Z, literal.x, T0.W,
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT * T2.W, PV.Y, literal.x,
+; EG-NEXT: -65536(nan), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T1.W, PV.W, T1.W,
+; EG-NEXT: MOV * T9.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: LSHR * T1.W, T37.Z, literal.x,
+; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT T2.W, PV.Y, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.W, literal.y,
+; EG-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38)
+; EG-NEXT: OR_INT * T37.Y, PV.W, PS,
+; EG-NEXT: MOV T9.X, PV.Y,
+; EG-NEXT: MOV * T0.Y, T4.X,
+; EG-NEXT: AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT: AND_INT * T2.W, T37.W, literal.y,
+; EG-NEXT: -65536(nan), 255(3.573311e-43)
+; EG-NEXT: OR_INT * T1.W, PV.W, PS,
+; EG-NEXT: MOV * T4.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: LSHL * T1.W, T37.W, literal.x,
+; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT T2.W, PV.Y, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.W, literal.y,
+; EG-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38)
+; EG-NEXT: OR_INT * T1.W, PV.W, PS,
+; EG-NEXT: MOV T4.X, PV.W,
+; EG-NEXT: MOV T0.Y, T5.X,
+; EG-NEXT: BFE_UINT * T1.W, T37.W, literal.x, T0.W,
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: ALU clause starting at 120:
+; EG-NEXT: AND_INT * T2.W, T0.Y, literal.x,
+; EG-NEXT: -65536(nan), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T1.W, PV.W, T1.W,
+; EG-NEXT: MOV * T5.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: LSHR * T1.W, T37.W, literal.x,
+; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT T2.W, PV.Y, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.W, literal.y,
+; EG-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38)
+; EG-NEXT: OR_INT * T37.W, PV.W, PS,
+; EG-NEXT: MOV T5.X, PV.W,
+; EG-NEXT: MOV * T0.Y, T32.X,
+; EG-NEXT: AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT: AND_INT * T2.W, T35.X, literal.y,
+; EG-NEXT: -65536(nan), 255(3.573311e-43)
+; EG-NEXT: OR_INT * T1.W, PV.W, PS,
+; EG-NEXT: MOV * T32.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: LSHL * T1.W, T35.X, literal.x,
+; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT T2.W, PV.Y, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.W, literal.y,
+; EG-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38)
+; EG-NEXT: OR_INT * T1.W, PV.W, PS,
+; EG-NEXT: MOV T32.X, PV.W,
+; EG-NEXT: MOV T0.Y, T33.X,
+; EG-NEXT: BFE_UINT * T1.W, T35.X, literal.x, T0.W, BS:VEC_120/SCL_212
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT * T2.W, PV.Y, literal.x,
+; EG-NEXT: -65536(nan), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T1.W, PV.W, T1.W,
+; EG-NEXT: MOV * T33.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: LSHR * T1.W, T35.X, literal.x,
+; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT T2.W, PV.Y, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.W, literal.y,
+; EG-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38)
+; EG-NEXT: OR_INT * T38.Y, PV.W, PS,
+; EG-NEXT: MOV T33.X, PV.Y,
+; EG-NEXT: MOV * T0.Y, T28.X,
+; EG-NEXT: AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT: AND_INT * T2.W, T35.Y, literal.y,
+; EG-NEXT: -65536(nan), 255(3.573311e-43)
+; EG-NEXT: OR_INT * T1.W, PV.W, PS,
+; EG-NEXT: MOV * T28.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: LSHL * T1.W, T35.Y, literal.x,
+; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT T2.W, PV.Y, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.W, literal.y,
+; EG-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38)
+; EG-NEXT: OR_INT * T1.W, PV.W, PS,
+; EG-NEXT: MOV T28.X, PV.W,
+; EG-NEXT: MOV T0.Y, T29.X,
+; EG-NEXT: BFE_UINT * T1.W, T35.Y, literal.x, T0.W,
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT * T2.W, PV.Y, literal.x,
+; EG-NEXT: -65536(nan), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T1.W, PV.W, T1.W,
+; EG-NEXT: MOV * T29.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: LSHR * T1.W, T35.Y, literal.x,
+; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT T2.W, PV.Y, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.W, literal.y,
+; EG-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38)
+; EG-NEXT: OR_INT * T38.W, PV.W, PS,
+; EG-NEXT: MOV T29.X, PV.W,
+; EG-NEXT: MOV * T0.Y, T24.X,
+; EG-NEXT: AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT: AND_INT * T2.W, T35.Z, literal.y,
+; EG-NEXT: -65536(nan), 255(3.573311e-43)
+; EG-NEXT: OR_INT * T1.W, PV.W, PS,
+; EG-NEXT: MOV * T24.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: LSHL * T1.W, T35.Z, literal.x,
+; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT T2.W, PV.Y, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.W, literal.y,
+; EG-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38)
+; EG-NEXT: OR_INT * T1.W, PV.W, PS,
+; EG-NEXT: MOV T24.X, PV.W,
+; EG-NEXT: MOV T0.Y, T25.X,
+; EG-NEXT: BFE_UINT * T1.W, T35.Z, literal.x, T0.W,
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT * T2.W, PV.Y, literal.x,
+; EG-NEXT: -65536(nan), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T1.W, PV.W, T1.W,
+; EG-NEXT: MOV * T25.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: LSHR * T1.W, T35.Z, literal.x,
+; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT T2.W, PV.Y, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.W, literal.y,
+; EG-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38)
+; EG-NEXT: OR_INT * T35.Y, PV.W, PS,
+; EG-NEXT: MOV T25.X, PV.Y,
+; EG-NEXT: MOV * T0.Y, T20.X,
+; EG-NEXT: AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT: AND_INT * T2.W, T35.W, literal.y,
+; EG-NEXT: -65536(nan), 255(3.573311e-43)
+; EG-NEXT: OR_INT * T1.W, PV.W, PS,
+; EG-NEXT: MOV * T20.X, PV.W,
+; EG-NEXT: ALU clause starting at 225:
+; EG-NEXT: MOV T0.Y, T20.X,
+; EG-NEXT: LSHL * T1.W, T35.W, literal.x,
+; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT T2.W, PV.Y, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.W, literal.y,
+; EG-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38)
+; EG-NEXT: OR_INT * T1.W, PV.W, PS,
+; EG-NEXT: MOV T20.X, PV.W,
+; EG-NEXT: MOV T0.Y, T21.X,
+; EG-NEXT: BFE_UINT * T0.W, T35.W, literal.x, T0.W,
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT * T1.W, PV.Y, literal.x,
+; EG-NEXT: -65536(nan), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T0.W, PV.W, T0.W,
+; EG-NEXT: MOV * T21.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x,
-; EG-NEXT: 48(6.726233e-44), 0(0.000000e+00)
-; EG-NEXT: LSHR * T18.X, PV.W, literal.x,
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: LSHR T39.X, PV.W, literal.x,
+; EG-NEXT: LSHR * T40.X, KC0[2].Y, literal.x,
+; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
+; EG-NEXT: LSHR T0.W, T35.W, literal.x,
+; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.y,
+; EG-NEXT: 8(1.121039e-44), 48(6.726233e-44)
+; EG-NEXT: LSHR T41.X, PS, literal.x,
+; EG-NEXT: AND_INT T0.Z, T0.Y, literal.y,
+; EG-NEXT: AND_INT T0.W, PV.W, literal.z,
+; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.w,
+; EG-NEXT: 2(2.802597e-45), 65535(9.183409e-41)
+; EG-NEXT: 16711680(2.341805e-38), 32(4.484155e-44)
+; EG-NEXT: LSHR T42.X, PS, literal.x,
+; EG-NEXT: OR_INT * T35.W, PV.Z, PV.W,
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
+; EG-NEXT: MOV T21.X, PV.W,
+; EG-NEXT: MOV * T36.X, T16.X,
+; EG-NEXT: MOV * T36.Z, T12.X,
+; EG-NEXT: MOV T37.X, T8.X,
+; EG-NEXT: MOV T37.Z, T4.X, BS:VEC_120/SCL_212
+; EG-NEXT: MOV * T38.X, T32.X,
+; EG-NEXT: MOV * T38.Z, T28.X,
+; EG-NEXT: MOV T35.X, T24.X,
+; EG-NEXT: MOV * T35.Z, T20.X, BS:VEC_120/SCL_212
;
; CM-LABEL: global_zextload_v32i8_to_v32i16:
; CM: ; %bb.0:
-; CM-NEXT: ALU 0, @12, KC0[CB0:0-32], KC1[]
-; CM-NEXT: TEX 1 @8
-; CM-NEXT: ALU 39, @13, KC0[CB0:0-32], KC1[]
-; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T17, T12.X
-; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T11, T18.X
-; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T14, T16.X
-; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T13, T15.X
+; CM-NEXT: ALU 1, @14, KC0[CB0:0-32], KC1[]
+; CM-NEXT: TEX 1 @10
+; CM-NEXT: ALU 101, @16, KC0[], KC1[]
+; CM-NEXT: ALU 101, @118, KC0[], KC1[]
+; CM-NEXT: ALU 40, @220, KC0[CB0:0-32], KC1[]
+; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T36, T42.X
+; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T38, T41.X
+; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T37, T40.X
+; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T35, T39.X
; CM-NEXT: CF_END
-; CM-NEXT: Fetch clause starting at 8:
-; CM-NEXT: VTX_READ_128 T12.XYZW, T11.X, 0, #1
-; CM-NEXT: VTX_READ_128 T11.XYZW, T11.X, 16, #1
-; CM-NEXT: ALU clause starting at 12:
-; CM-NEXT: MOV * T11.X, KC0[2].Z,
-; CM-NEXT: ALU clause starting at 13:
+; CM-NEXT: Fetch clause starting at 10:
+; CM-NEXT: VTX_READ_128 T37.XYZW, T35.X, 16, #1
+; CM-NEXT: VTX_READ_128 T36.XYZW, T35.X, 0, #1
+; CM-NEXT: ALU clause starting at 14:
+; CM-NEXT: MOV * T0.Y, T16.X,
+; CM-NEXT: MOV * T35.X, KC0[2].Z,
+; CM-NEXT: ALU clause starting at 16:
+; CM-NEXT: AND_INT T0.Z, T37.X, literal.x,
+; CM-NEXT: AND_INT * T0.W, T0.Y, literal.y,
+; CM-NEXT: 255(3.573311e-43), -65536(nan)
+; CM-NEXT: OR_INT * T0.W, PV.W, PV.Z,
+; CM-NEXT: MOV * T16.X, PV.W,
+; CM-NEXT: MOV T0.Y, PV.X,
+; CM-NEXT: LSHL * T0.W, T37.X, literal.x,
+; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT: AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT: AND_INT * T0.W, PV.W, literal.y,
+; CM-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38)
+; CM-NEXT: OR_INT * T0.W, PV.Z, PV.W,
+; CM-NEXT: MOV T16.X, PV.W,
+; CM-NEXT: MOV T0.Y, T17.X,
; CM-NEXT: MOV * T0.W, literal.x,
; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
-; CM-NEXT: BFE_UINT * T13.W, T11.W, literal.x, PV.W,
+; CM-NEXT: AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT: BFE_UINT * T1.W, T37.X, literal.y, PV.W,
+; CM-NEXT: -65536(nan), 16(2.242078e-44)
+; CM-NEXT: OR_INT * T1.W, PV.Z, PV.W,
+; CM-NEXT: MOV * T17.X, PV.W,
+; CM-NEXT: MOV T0.Y, PV.X,
+; CM-NEXT: LSHR * T1.W, T37.X, literal.x,
; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
-; CM-NEXT: BFE_UINT T13.Y, T11.Z, literal.x, T0.W,
-; CM-NEXT: AND_INT T13.Z, T11.W, literal.y,
-; CM-NEXT: BFE_UINT * T14.W, T11.Y, literal.x, T0.W,
-; CM-NEXT: 8(1.121039e-44), 255(3.573311e-43)
-; CM-NEXT: AND_INT T13.X, T11.Z, literal.x,
-; CM-NEXT: BFE_UINT T14.Y, T11.X, literal.y, T0.W,
-; CM-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.z,
-; CM-NEXT: 255(3.573311e-43), 8(1.121039e-44)
-; CM-NEXT: 48(6.726233e-44), 0(0.000000e+00)
-; CM-NEXT: LSHR T15.X, PV.W, literal.x,
-; CM-NEXT: AND_INT T14.Z, T11.Y, literal.y,
-; CM-NEXT: BFE_UINT * T11.W, T12.W, literal.z, T0.W,
-; CM-NEXT: 2(2.802597e-45), 255(3.573311e-43)
+; CM-NEXT: AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT: AND_INT * T1.W, PV.W, literal.y,
+; CM-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38)
+; CM-NEXT: OR_INT * T35.Y, PV.Z, PV.W,
+; CM-NEXT: MOV T17.X, PV.Y,
+; CM-NEXT: MOV * T0.Y, T12.X,
+; CM-NEXT: AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT: AND_INT * T1.W, T37.Y, literal.y,
+; CM-NEXT: -65536(nan), 255(3.573311e-43)
+; CM-NEXT: OR_INT * T1.W, PV.Z, PV.W,
+; CM-NEXT: MOV * T12.X, PV.W,
+; CM-NEXT: MOV T0.Y, PV.X,
+; CM-NEXT: LSHL * T1.W, T37.Y, literal.x,
; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
-; CM-NEXT: AND_INT T14.X, T11.X, literal.x,
-; CM-NEXT: BFE_UINT T11.Y, T12.Z, literal.y, T0.W,
-; CM-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.z,
-; CM-NEXT: 255(3.573311e-43), 8(1.121039e-44)
-; CM-NEXT: 32(4.484155e-44), 0(0.000000e+00)
-; CM-NEXT: LSHR T16.X, PV.W, literal.x,
-; CM-NEXT: AND_INT T11.Z, T12.W, literal.y,
-; CM-NEXT: BFE_UINT * T17.W, T12.Y, literal.z, T0.W,
-; CM-NEXT: 2(2.802597e-45), 255(3.573311e-43)
+; CM-NEXT: AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT: AND_INT * T1.W, PV.W, literal.y,
+; CM-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38)
+; CM-NEXT: OR_INT * T1.W, PV.Z, PV.W,
+; CM-NEXT: MOV T12.X, PV.W,
+; CM-NEXT: MOV * T0.Y, T13.X,
+; CM-NEXT: AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT: BFE_UINT * T1.W, T37.Y, literal.y, T0.W,
+; CM-NEXT: -65536(nan), 16(2.242078e-44)
+; CM-NEXT: OR_INT * T1.W, PV.Z, PV.W,
+; CM-NEXT: MOV * T13.X, PV.W,
+; CM-NEXT: MOV T0.Y, PV.X,
+; CM-NEXT: LSHR * T1.W, T37.Y, literal.x,
; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
-; CM-NEXT: AND_INT T11.X, T12.Z, literal.x,
-; CM-NEXT: BFE_UINT T17.Y, T12.X, literal.y, T0.W,
-; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.z,
-; CM-NEXT: 255(3.573311e-43), 8(1.121039e-44)
+; CM-NEXT: AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT: AND_INT * T1.W, PV.W, literal.y,
+; CM-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38)
+; CM-NEXT: OR_INT * T35.W, PV.Z, PV.W,
+; CM-NEXT: MOV T13.X, PV.W,
+; CM-NEXT: MOV * T0.Y, T8.X,
+; CM-NEXT: AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT: AND_INT * T1.W, T37.Z, literal.y,
+; CM-NEXT: -65536(nan), 255(3.573311e-43)
+; CM-NEXT: OR_INT * T1.W, PV.Z, PV.W,
+; CM-NEXT: MOV * T8.X, PV.W,
+; CM-NEXT: MOV T0.Y, PV.X,
+; CM-NEXT: LSHL * T1.W, T37.Z, literal.x,
+; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT: AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT: AND_INT * T1.W, PV.W, literal.y,
+; CM-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38)
+; CM-NEXT: OR_INT * T1.W, PV.Z, PV.W,
+; CM-NEXT: MOV T8.X, PV.W,
+; CM-NEXT: MOV * T0.Y, T9.X,
+; CM-NEXT: AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT: BFE_UINT * T1.W, T37.Z, literal.y, T0.W,
+; CM-NEXT: -65536(nan), 16(2.242078e-44)
+; CM-NEXT: OR_INT * T1.W, PV.Z, PV.W,
+; CM-NEXT: MOV * T9.X, PV.W,
+; CM-NEXT: MOV T0.Y, PV.X,
+; CM-NEXT: LSHR * T1.W, T37.Z, literal.x,
+; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT: AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT: AND_INT * T1.W, PV.W, literal.y,
+; CM-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38)
+; CM-NEXT: OR_INT * T37.Y, PV.Z, PV.W,
+; CM-NEXT: MOV T9.X, PV.Y,
+; CM-NEXT: MOV * T0.Y, T4.X,
+; CM-NEXT: AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT: AND_INT * T1.W, T37.W, literal.y,
+; CM-NEXT: -65536(nan), 255(3.573311e-43)
+; CM-NEXT: OR_INT * T1.W, PV.Z, PV.W,
+; CM-NEXT: MOV * T4.X, PV.W,
+; CM-NEXT: MOV T0.Y, PV.X,
+; CM-NEXT: LSHL * T1.W, T37.W, literal.x,
+; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT: AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT: AND_INT * T1.W, PV.W, literal.y,
+; CM-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38)
+; CM-NEXT: OR_INT * T1.W, PV.Z, PV.W,
+; CM-NEXT: MOV T4.X, PV.W,
+; CM-NEXT: MOV * T0.Y, T5.X,
+; CM-NEXT: AND_INT * T0.Z, PV.Y, literal.x,
+; CM-NEXT: -65536(nan), 0(0.000000e+00)
+; CM-NEXT: ALU clause starting at 118:
+; CM-NEXT: BFE_UINT * T1.W, T37.W, literal.x, T0.W,
; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00)
-; CM-NEXT: LSHR T18.X, PV.W, literal.x,
-; CM-NEXT: AND_INT * T17.Z, T12.Y, literal.y,
-; CM-NEXT: 2(2.802597e-45), 255(3.573311e-43)
-; CM-NEXT: AND_INT * T17.X, T12.X, literal.x,
-; CM-NEXT: 255(3.573311e-43), 0(0.000000e+00)
-; CM-NEXT: LSHR * T12.X, KC0[2].Y, literal.x,
+; CM-NEXT: OR_INT * T1.W, T0.Z, PV.W,
+; CM-NEXT: MOV * T5.X, PV.W,
+; CM-NEXT: MOV T0.Y, PV.X,
+; CM-NEXT: LSHR * T1.W, T37.W, literal.x,
+; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT: AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT: AND_INT * T1.W, PV.W, literal.y,
+; CM-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38)
+; CM-NEXT: OR_INT * T37.W, PV.Z, PV.W,
+; CM-NEXT: MOV T5.X, PV.W,
+; CM-NEXT: MOV * T0.Y, T32.X,
+; CM-NEXT: AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT: AND_INT * T1.W, T36.X, literal.y,
+; CM-NEXT: -65536(nan), 255(3.573311e-43)
+; CM-NEXT: OR_INT * T1.W, PV.Z, PV.W,
+; CM-NEXT: MOV * T32.X, PV.W,
+; CM-NEXT: MOV T0.Y, PV.X,
+; CM-NEXT: LSHL * T1.W, T36.X, literal.x,
+; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT: AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT: AND_INT * T1.W, PV.W, literal.y,
+; CM-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38)
+; CM-NEXT: OR_INT * T1.W, PV.Z, PV.W,
+; CM-NEXT: MOV T32.X, PV.W,
+; CM-NEXT: MOV * T0.Y, T33.X,
+; CM-NEXT: AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT: BFE_UINT * T1.W, T36.X, literal.y, T0.W,
+; CM-NEXT: -65536(nan), 16(2.242078e-44)
+; CM-NEXT: OR_INT * T1.W, PV.Z, PV.W,
+; CM-NEXT: MOV * T33.X, PV.W,
+; CM-NEXT: MOV T0.Y, PV.X,
+; CM-NEXT: LSHR * T1.W, T36.X, literal.x,
+; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT: AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT: AND_INT * T1.W, PV.W, literal.y,
+; CM-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38)
+; CM-NEXT: OR_INT * T38.Y, PV.Z, PV.W,
+; CM-NEXT: MOV T33.X, PV.Y,
+; CM-NEXT: MOV * T0.Y, T28.X,
+; CM-NEXT: AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT: AND_INT * T1.W, T36.Y, literal.y,
+; CM-NEXT: -65536(nan), 255(3.573311e-43)
+; CM-NEXT: OR_INT * T1.W, PV.Z, PV.W,
+; CM-NEXT: MOV * T28.X, PV.W,
+; CM-NEXT: MOV T0.Y, PV.X,
+; CM-NEXT: LSHL * T1.W, T36.Y, literal.x,
+; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT: AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT: AND_INT * T1.W, PV.W, literal.y,
+; CM-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38)
+; CM-NEXT: OR_INT * T1.W, PV.Z, PV.W,
+; CM-NEXT: MOV T28.X, PV.W,
+; CM-NEXT: MOV * T0.Y, T29.X,
+; CM-NEXT: AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT: BFE_UINT * T1.W, T36.Y, literal.y, T0.W,
+; CM-NEXT: -65536(nan), 16(2.242078e-44)
+; CM-NEXT: OR_INT * T1.W, PV.Z, PV.W,
+; CM-NEXT: MOV * T29.X, PV.W,
+; CM-NEXT: MOV T0.Y, PV.X,
+; CM-NEXT: LSHR * T1.W, T36.Y, literal.x,
+; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT: AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT: AND_INT * T1.W, PV.W, literal.y,
+; CM-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38)
+; CM-NEXT: OR_INT * T38.W, PV.Z, PV.W,
+; CM-NEXT: MOV T29.X, PV.W,
+; CM-NEXT: MOV * T0.Y, T24.X,
+; CM-NEXT: AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT: AND_INT * T1.W, T36.Z, literal.y,
+; CM-NEXT: -65536(nan), 255(3.573311e-43)
+; CM-NEXT: OR_INT * T1.W, PV.Z, PV.W,
+; CM-NEXT: MOV * T24.X, PV.W,
+; CM-NEXT: MOV T0.Y, PV.X,
+; CM-NEXT: LSHL * T1.W, T36.Z, literal.x,
+; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT: AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT: AND_INT * T1.W, PV.W, literal.y,
+; CM-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38)
+; CM-NEXT: OR_INT * T1.W, PV.Z, PV.W,
+; CM-NEXT: MOV T24.X, PV.W,
+; CM-NEXT: MOV * T0.Y, T25.X,
+; CM-NEXT: AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT: BFE_UINT * T1.W, T36.Z, literal.y, T0.W,
+; CM-NEXT: -65536(nan), 16(2.242078e-44)
+; CM-NEXT: OR_INT * T1.W, PV.Z, PV.W,
+; CM-NEXT: MOV * T25.X, PV.W,
+; CM-NEXT: MOV T0.Y, PV.X,
+; CM-NEXT: LSHR * T1.W, T36.Z, literal.x,
+; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT: AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT: AND_INT * T1.W, PV.W, literal.y,
+; CM-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38)
+; CM-NEXT: OR_INT * T36.Y, PV.Z, PV.W,
+; CM-NEXT: MOV T25.X, PV.Y,
+; CM-NEXT: MOV * T0.Y, T20.X,
+; CM-NEXT: AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT: AND_INT * T1.W, T36.W, literal.y,
+; CM-NEXT: -65536(nan), 255(3.573311e-43)
+; CM-NEXT: OR_INT * T1.W, PV.Z, PV.W,
+; CM-NEXT: MOV * T20.X, PV.W,
+; CM-NEXT: ALU clause starting at 220:
+; CM-NEXT: MOV T0.Y, T20.X,
+; CM-NEXT: LSHL * T1.W, T36.W, literal.x,
+; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT: AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT: AND_INT * T1.W, PV.W, literal.y,
+; CM-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38)
+; CM-NEXT: OR_INT * T1.W, PV.Z, PV.W,
+; CM-NEXT: MOV T20.X, PV.W,
+; CM-NEXT: MOV * T0.Y, T21.X,
+; CM-NEXT: AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT: BFE_UINT * T0.W, T36.W, literal.y, T0.W,
+; CM-NEXT: -65536(nan), 16(2.242078e-44)
+; CM-NEXT: OR_INT * T0.W, PV.Z, PV.W,
+; CM-NEXT: MOV * T21.X, PV.W,
+; CM-NEXT: MOV T0.Y, PV.X,
+; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x,
+; CM-NEXT: 32(4.484155e-44), 0(0.000000e+00)
+; CM-NEXT: LSHR T39.X, PV.W, literal.x,
+; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
+; CM-NEXT: 2(2.802597e-45), 48(6.726233e-44)
+; CM-NEXT: LSHR T40.X, PV.W, literal.x,
+; CM-NEXT: LSHR * T0.W, T36.W, literal.y,
+; CM-NEXT: 2(2.802597e-45), 8(1.121039e-44)
+; CM-NEXT: LSHR T41.X, KC0[2].Y, literal.x,
+; CM-NEXT: AND_INT T0.Y, T0.Y, literal.y,
+; CM-NEXT: AND_INT T0.Z, PV.W, literal.z,
+; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.w,
+; CM-NEXT: 2(2.802597e-45), 65535(9.183409e-41)
+; CM-NEXT: 16711680(2.341805e-38), 16(2.242078e-44)
+; CM-NEXT: LSHR T42.X, PV.W, literal.x,
+; CM-NEXT: OR_INT * T36.W, PV.Y, PV.Z,
; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
+; CM-NEXT: MOV * T21.X, PV.W,
+; CM-NEXT: MOV T35.X, T16.X,
+; CM-NEXT: MOV * T35.Z, T12.X, BS:VEC_120/SCL_212
+; CM-NEXT: MOV T37.X, T8.X,
+; CM-NEXT: MOV * T37.Z, T4.X, BS:VEC_120/SCL_212
+; CM-NEXT: MOV T38.X, T32.X,
+; CM-NEXT: MOV * T38.Z, T28.X, BS:VEC_120/SCL_212
+; CM-NEXT: MOV T36.X, T24.X,
+; CM-NEXT: MOV * T36.Z, T20.X, BS:VEC_120/SCL_212
%load = load <32 x i8>, ptr addrspace(1) %in
%ext = zext <32 x i8> %load to <32 x i16>
store <32 x i16> %ext, ptr addrspace(1) %out
@@ -11717,118 +12983,659 @@ define amdgpu_kernel void @global_sextload_v32i8_to_v32i16(ptr addrspace(1) %out
;
; EG-LABEL: global_sextload_v32i8_to_v32i16:
; EG: ; %bb.0:
-; EG-NEXT: ALU 0, @12, KC0[CB0:0-32], KC1[]
-; EG-NEXT: TEX 1 @8
-; EG-NEXT: ALU 39, @13, KC0[CB0:0-32], KC1[]
-; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T18.XYZW, T12.X, 0
-; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T17.XYZW, T11.X, 0
-; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T16.XYZW, T14.X, 0
-; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T15.XYZW, T13.X, 1
+; EG-NEXT: ALU 1, @14, KC0[CB0:0-32], KC1[]
+; EG-NEXT: TEX 1 @10
+; EG-NEXT: ALU 104, @16, KC0[], KC1[]
+; EG-NEXT: ALU 104, @121, KC0[], KC1[]
+; EG-NEXT: ALU 95, @226, KC0[CB0:0-32], KC1[]
+; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T36.XYZW, T42.X, 0
+; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T37.XYZW, T41.X, 0
+; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T38.XYZW, T40.X, 0
+; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T35.XYZW, T39.X, 1
; EG-NEXT: CF_END
-; EG-NEXT: Fetch clause starting at 8:
-; EG-NEXT: VTX_READ_128 T12.XYZW, T11.X, 16, #1
-; EG-NEXT: VTX_READ_128 T11.XYZW, T11.X, 0, #1
-; EG-NEXT: ALU clause starting at 12:
-; EG-NEXT: MOV * T11.X, KC0[2].Z,
-; EG-NEXT: ALU clause starting at 13:
-; EG-NEXT: LSHR T13.X, KC0[2].Y, literal.x,
-; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
-; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44)
-; EG-NEXT: LSHR T14.X, PV.W, literal.x,
-; EG-NEXT: BFE_INT * T15.Z, T11.Y, 0.0, literal.y,
-; EG-NEXT: 2(2.802597e-45), 8(1.121039e-44)
-; EG-NEXT: BFE_INT T15.X, T11.X, 0.0, literal.x,
-; EG-NEXT: LSHR T0.Y, T12.W, literal.x,
-; EG-NEXT: BFE_INT T16.Z, T11.W, 0.0, literal.x, BS:VEC_120/SCL_212
-; EG-NEXT: LSHR T0.W, T12.Y, literal.x,
-; EG-NEXT: LSHR * T1.W, T11.Y, literal.x,
+; EG-NEXT: Fetch clause starting at 10:
+; EG-NEXT: VTX_READ_128 T37.XYZW, T35.X, 16, #1
+; EG-NEXT: VTX_READ_128 T35.XYZW, T35.X, 0, #1
+; EG-NEXT: ALU clause starting at 14:
+; EG-NEXT: MOV * T0.Y, T16.X,
+; EG-NEXT: MOV * T35.X, KC0[2].Z,
+; EG-NEXT: ALU clause starting at 16:
+; EG-NEXT: BFE_INT * T0.W, T37.X, 0.0, literal.x,
; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
-; EG-NEXT: BFE_INT T16.X, T11.Z, 0.0, literal.x,
-; EG-NEXT: LSHR T1.Y, T11.W, literal.x,
-; EG-NEXT: BFE_INT T17.Z, T12.Y, 0.0, literal.x,
-; EG-NEXT: BFE_INT T15.W, PS, 0.0, literal.x,
-; EG-NEXT: LSHR * T1.W, T11.X, literal.x,
+; EG-NEXT: AND_INT T0.W, PV.W, literal.x,
+; EG-NEXT: AND_INT * T1.W, T0.Y, literal.y,
+; EG-NEXT: 65535(9.183409e-41), -65536(nan)
+; EG-NEXT: OR_INT * T0.W, PS, PV.W,
+; EG-NEXT: MOV * T16.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: LSHR * T0.W, T37.X, literal.x,
; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
-; EG-NEXT: BFE_INT T17.X, T12.X, 0.0, literal.x,
-; EG-NEXT: BFE_INT T15.Y, PS, 0.0, literal.x,
-; EG-NEXT: BFE_INT T18.Z, T12.W, 0.0, literal.x,
-; EG-NEXT: BFE_INT T16.W, PV.Y, 0.0, literal.x,
-; EG-NEXT: LSHR * T1.W, T11.Z, literal.x,
+; EG-NEXT: BFE_INT T0.W, PV.W, 0.0, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.Y, literal.y,
+; EG-NEXT: 8(1.121039e-44), 65535(9.183409e-41)
+; EG-NEXT: LSHL * T0.W, PV.W, literal.x,
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT: MOV T16.X, PV.W,
+; EG-NEXT: MOV T0.Y, T17.X,
+; EG-NEXT: LSHR * T0.W, T37.X, literal.x, BS:VEC_120/SCL_212
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: BFE_INT T0.W, PV.W, 0.0, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.Y, literal.y,
+; EG-NEXT: 8(1.121039e-44), -65536(nan)
+; EG-NEXT: AND_INT * T0.W, PV.W, literal.x,
+; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT: MOV * T17.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: ASHR * T0.W, T37.X, literal.x,
+; EG-NEXT: 24(3.363116e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT: LSHL * T0.W, PV.W, literal.y,
+; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
+; EG-NEXT: OR_INT * T36.Y, PV.W, PS,
+; EG-NEXT: MOV T17.X, PV.Y,
+; EG-NEXT: MOV T0.Y, T12.X,
+; EG-NEXT: BFE_INT * T0.W, T37.Y, 0.0, literal.x,
; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
-; EG-NEXT: BFE_INT T18.X, T12.Z, 0.0, literal.x,
-; EG-NEXT: BFE_INT T16.Y, PS, 0.0, literal.x,
-; EG-NEXT: LSHR T0.Z, T12.X, literal.x,
-; EG-NEXT: BFE_INT T17.W, T0.W, 0.0, literal.x,
-; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
-; EG-NEXT: 8(1.121039e-44), 32(4.484155e-44)
-; EG-NEXT: LSHR T11.X, PS, literal.x,
-; EG-NEXT: BFE_INT T17.Y, PV.Z, 0.0, literal.y,
-; EG-NEXT: LSHR T0.Z, T12.Z, literal.y,
-; EG-NEXT: BFE_INT T18.W, T0.Y, 0.0, literal.y,
-; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.z,
-; EG-NEXT: 2(2.802597e-45), 8(1.121039e-44)
-; EG-NEXT: 48(6.726233e-44), 0(0.000000e+00)
-; EG-NEXT: LSHR T12.X, PS, literal.x,
-; EG-NEXT: BFE_INT * T18.Y, PV.Z, 0.0, literal.y,
-; EG-NEXT: 2(2.802597e-45), 8(1.121039e-44)
+; EG-NEXT: AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT: AND_INT * T0.W, PV.W, literal.y,
+; EG-NEXT: -65536(nan), 65535(9.183409e-41)
+; EG-NEXT: OR_INT * T0.W, PV.W, PS,
+; EG-NEXT: MOV * T12.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: LSHR * T0.W, T37.Y, literal.x,
+; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT: BFE_INT T0.W, PV.W, 0.0, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.Y, literal.y,
+; EG-NEXT: 8(1.121039e-44), 65535(9.183409e-41)
+; EG-NEXT: LSHL * T0.W, PV.W, literal.x,
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT: MOV T12.X, PV.W,
+; EG-NEXT: MOV T0.Y, T13.X,
+; EG-NEXT: LSHR * T0.W, T37.Y, literal.x,
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: BFE_INT T0.W, PV.W, 0.0, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.Y, literal.y,
+; EG-NEXT: 8(1.121039e-44), -65536(nan)
+; EG-NEXT: AND_INT * T0.W, PV.W, literal.x,
+; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT: MOV * T13.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: ASHR * T0.W, T37.Y, literal.x,
+; EG-NEXT: 24(3.363116e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT: LSHL * T0.W, PV.W, literal.y,
+; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
+; EG-NEXT: OR_INT * T36.W, PV.W, PS,
+; EG-NEXT: MOV T13.X, PV.W,
+; EG-NEXT: MOV T0.Y, T8.X,
+; EG-NEXT: BFE_INT * T0.W, T37.Z, 0.0, literal.x,
+; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT: AND_INT * T0.W, PV.W, literal.y,
+; EG-NEXT: -65536(nan), 65535(9.183409e-41)
+; EG-NEXT: OR_INT * T0.W, PV.W, PS,
+; EG-NEXT: MOV * T8.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: LSHR * T0.W, T37.Z, literal.x,
+; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT: BFE_INT T0.W, PV.W, 0.0, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.Y, literal.y,
+; EG-NEXT: 8(1.121039e-44), 65535(9.183409e-41)
+; EG-NEXT: LSHL * T0.W, PV.W, literal.x,
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT: MOV T8.X, PV.W,
+; EG-NEXT: MOV T0.Y, T9.X,
+; EG-NEXT: LSHR * T0.W, T37.Z, literal.x,
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: BFE_INT T0.W, PV.W, 0.0, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.Y, literal.y,
+; EG-NEXT: 8(1.121039e-44), -65536(nan)
+; EG-NEXT: AND_INT * T0.W, PV.W, literal.x,
+; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT: MOV * T9.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: ASHR * T0.W, T37.Z, literal.x,
+; EG-NEXT: 24(3.363116e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT: LSHL * T0.W, PV.W, literal.y,
+; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
+; EG-NEXT: ALU clause starting at 121:
+; EG-NEXT: OR_INT * T37.Y, T1.W, T0.W,
+; EG-NEXT: MOV T9.X, PV.Y,
+; EG-NEXT: MOV T0.Y, T4.X,
+; EG-NEXT: BFE_INT * T0.W, T37.W, 0.0, literal.x,
+; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT: AND_INT * T0.W, PV.W, literal.y,
+; EG-NEXT: -65536(nan), 65535(9.183409e-41)
+; EG-NEXT: OR_INT * T0.W, PV.W, PS,
+; EG-NEXT: MOV * T4.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: LSHR * T0.W, T37.W, literal.x,
+; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT: BFE_INT T0.W, PV.W, 0.0, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.Y, literal.y,
+; EG-NEXT: 8(1.121039e-44), 65535(9.183409e-41)
+; EG-NEXT: LSHL * T0.W, PV.W, literal.x,
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT: MOV T4.X, PV.W,
+; EG-NEXT: MOV T0.Y, T5.X,
+; EG-NEXT: LSHR * T0.W, T37.W, literal.x,
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: BFE_INT T0.W, PV.W, 0.0, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.Y, literal.y,
+; EG-NEXT: 8(1.121039e-44), -65536(nan)
+; EG-NEXT: AND_INT * T0.W, PV.W, literal.x,
+; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT: MOV * T5.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: ASHR * T0.W, T37.W, literal.x,
+; EG-NEXT: 24(3.363116e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT: LSHL * T0.W, PV.W, literal.y,
+; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
+; EG-NEXT: OR_INT * T37.W, PV.W, PS,
+; EG-NEXT: MOV T5.X, PV.W,
+; EG-NEXT: MOV T0.Y, T32.X,
+; EG-NEXT: BFE_INT * T0.W, T35.X, 0.0, literal.x, BS:VEC_120/SCL_212
+; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT: AND_INT * T0.W, PV.W, literal.y,
+; EG-NEXT: -65536(nan), 65535(9.183409e-41)
+; EG-NEXT: OR_INT * T0.W, PV.W, PS,
+; EG-NEXT: MOV * T32.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: LSHR * T0.W, T35.X, literal.x,
+; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT: BFE_INT T0.W, PV.W, 0.0, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.Y, literal.y,
+; EG-NEXT: 8(1.121039e-44), 65535(9.183409e-41)
+; EG-NEXT: LSHL * T0.W, PV.W, literal.x,
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT: MOV T32.X, PV.W,
+; EG-NEXT: MOV T0.Y, T33.X,
+; EG-NEXT: LSHR * T0.W, T35.X, literal.x, BS:VEC_120/SCL_212
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: BFE_INT T0.W, PV.W, 0.0, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.Y, literal.y,
+; EG-NEXT: 8(1.121039e-44), -65536(nan)
+; EG-NEXT: AND_INT * T0.W, PV.W, literal.x,
+; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT: MOV * T33.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: ASHR * T0.W, T35.X, literal.x,
+; EG-NEXT: 24(3.363116e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT: LSHL * T0.W, PV.W, literal.y,
+; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
+; EG-NEXT: OR_INT * T38.Y, PV.W, PS,
+; EG-NEXT: MOV T33.X, PV.Y,
+; EG-NEXT: MOV T0.Y, T28.X,
+; EG-NEXT: BFE_INT * T0.W, T35.Y, 0.0, literal.x,
+; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT: AND_INT * T0.W, PV.W, literal.y,
+; EG-NEXT: -65536(nan), 65535(9.183409e-41)
+; EG-NEXT: OR_INT * T0.W, PV.W, PS,
+; EG-NEXT: MOV * T28.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: LSHR * T0.W, T35.Y, literal.x,
+; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT: BFE_INT T0.W, PV.W, 0.0, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.Y, literal.y,
+; EG-NEXT: 8(1.121039e-44), 65535(9.183409e-41)
+; EG-NEXT: LSHL * T0.W, PV.W, literal.x,
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT: MOV T28.X, PV.W,
+; EG-NEXT: MOV T0.Y, T29.X,
+; EG-NEXT: LSHR * T0.W, T35.Y, literal.x,
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: BFE_INT T0.W, PV.W, 0.0, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.Y, literal.y,
+; EG-NEXT: 8(1.121039e-44), -65536(nan)
+; EG-NEXT: AND_INT * T0.W, PV.W, literal.x,
+; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT: MOV * T29.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: ASHR * T0.W, T35.Y, literal.x,
+; EG-NEXT: 24(3.363116e-44), 0(0.000000e+00)
+; EG-NEXT: ALU clause starting at 226:
+; EG-NEXT: AND_INT T1.W, T0.Y, literal.x,
+; EG-NEXT: LSHL * T0.W, T0.W, literal.y,
+; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
+; EG-NEXT: OR_INT * T38.W, PV.W, PS,
+; EG-NEXT: MOV T29.X, PV.W,
+; EG-NEXT: MOV T0.Y, T24.X,
+; EG-NEXT: BFE_INT * T0.W, T35.Z, 0.0, literal.x,
+; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT: AND_INT * T0.W, PV.W, literal.y,
+; EG-NEXT: -65536(nan), 65535(9.183409e-41)
+; EG-NEXT: OR_INT * T0.W, PV.W, PS,
+; EG-NEXT: MOV * T24.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: LSHR * T0.W, T35.Z, literal.x,
+; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT: BFE_INT T0.W, PV.W, 0.0, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.Y, literal.y,
+; EG-NEXT: 8(1.121039e-44), 65535(9.183409e-41)
+; EG-NEXT: LSHL * T0.W, PV.W, literal.x,
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT: MOV T24.X, PV.W,
+; EG-NEXT: MOV T0.Y, T25.X,
+; EG-NEXT: LSHR * T0.W, T35.Z, literal.x,
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: BFE_INT T0.W, PV.W, 0.0, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.Y, literal.y,
+; EG-NEXT: 8(1.121039e-44), -65536(nan)
+; EG-NEXT: AND_INT * T0.W, PV.W, literal.x,
+; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT: MOV * T25.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: ASHR * T0.W, T35.Z, literal.x,
+; EG-NEXT: 24(3.363116e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT: LSHL * T0.W, PV.W, literal.y,
+; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
+; EG-NEXT: OR_INT * T35.Y, PV.W, PS,
+; EG-NEXT: MOV T25.X, PV.Y,
+; EG-NEXT: MOV T0.Y, T20.X,
+; EG-NEXT: BFE_INT * T0.W, T35.W, 0.0, literal.x,
+; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT: AND_INT * T0.W, PV.W, literal.y,
+; EG-NEXT: -65536(nan), 65535(9.183409e-41)
+; EG-NEXT: OR_INT * T0.W, PV.W, PS,
+; EG-NEXT: MOV * T20.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: LSHR * T0.W, T35.W, literal.x,
+; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT: BFE_INT T0.W, PV.W, 0.0, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.Y, literal.y,
+; EG-NEXT: 8(1.121039e-44), 65535(9.183409e-41)
+; EG-NEXT: LSHL * T0.W, PV.W, literal.x,
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT: MOV T20.X, PV.W,
+; EG-NEXT: MOV T0.Y, T21.X,
+; EG-NEXT: LSHR * T0.W, T35.W, literal.x,
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: BFE_INT T0.W, PV.W, 0.0, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.Y, literal.y,
+; EG-NEXT: 8(1.121039e-44), -65536(nan)
+; EG-NEXT: AND_INT * T0.W, PV.W, literal.x,
+; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT: MOV * T21.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x,
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: LSHR T39.X, PV.W, literal.x,
+; EG-NEXT: LSHR * T40.X, KC0[2].Y, literal.x,
+; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
+; EG-NEXT: ASHR T0.W, T35.W, literal.x,
+; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.y,
+; EG-NEXT: 24(3.363116e-44), 48(6.726233e-44)
+; EG-NEXT: LSHR T41.X, PS, literal.x,
+; EG-NEXT: AND_INT T0.Z, T0.Y, literal.y,
+; EG-NEXT: LSHL T0.W, PV.W, literal.z,
+; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.w,
+; EG-NEXT: 2(2.802597e-45), 65535(9.183409e-41)
+; EG-NEXT: 16(2.242078e-44), 32(4.484155e-44)
+; EG-NEXT: LSHR T42.X, PS, literal.x,
+; EG-NEXT: OR_INT * T35.W, PV.Z, PV.W,
+; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
+; EG-NEXT: MOV T21.X, PV.W,
+; EG-NEXT: MOV * T36.X, T16.X,
+; EG-NEXT: MOV * T36.Z, T12.X,
+; EG-NEXT: MOV T37.X, T8.X,
+; EG-NEXT: MOV T37.Z, T4.X, BS:VEC_120/SCL_212
+; EG-NEXT: MOV * T38.X, T32.X,
+; EG-NEXT: MOV * T38.Z, T28.X,
+; EG-NEXT: MOV T35.X, T24.X,
+; EG-NEXT: MOV * T35.Z, T20.X, BS:VEC_120/SCL_212
;
; CM-LABEL: global_sextload_v32i8_to_v32i16:
; CM: ; %bb.0:
-; CM-NEXT: ALU 0, @12, KC0[CB0:0-32], KC1[]
-; CM-NEXT: TEX 1 @8
-; CM-NEXT: ALU 40, @13, KC0[CB0:0-32], KC1[]
-; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T17, T11.X
-; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T12, T18.X
-; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T16, T14.X
-; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T15, T13.X
+; CM-NEXT: ALU 1, @14, KC0[CB0:0-32], KC1[]
+; CM-NEXT: TEX 1 @10
+; CM-NEXT: ALU 104, @16, KC0[], KC1[]
+; CM-NEXT: ALU 104, @121, KC0[], KC1[]
+; CM-NEXT: ALU 95, @226, KC0[CB0:0-32], KC1[]
+; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T35, T42.X
+; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T38, T41.X
+; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T37, T40.X
+; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T36, T39.X
; CM-NEXT: CF_END
-; CM-NEXT: Fetch clause starting at 8:
-; CM-NEXT: VTX_READ_128 T12.XYZW, T11.X, 16, #1
-; CM-NEXT: VTX_READ_128 T11.XYZW, T11.X, 0, #1
-; CM-NEXT: ALU clause starting at 12:
-; CM-NEXT: MOV * T11.X, KC0[2].Z,
-; CM-NEXT: ALU clause starting at 13:
+; CM-NEXT: Fetch clause starting at 10:
+; CM-NEXT: VTX_READ_128 T37.XYZW, T35.X, 16, #1
+; CM-NEXT: VTX_READ_128 T35.XYZW, T35.X, 0, #1
+; CM-NEXT: ALU clause starting at 14:
+; CM-NEXT: MOV * T0.Y, T16.X,
+; CM-NEXT: MOV * T35.X, KC0[2].Z,
+; CM-NEXT: ALU clause starting at 16:
+; CM-NEXT: BFE_INT * T0.W, T37.X, 0.0, literal.x,
+; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT: AND_INT T0.Z, PV.W, literal.x,
+; CM-NEXT: AND_INT * T0.W, T0.Y, literal.y,
+; CM-NEXT: 65535(9.183409e-41), -65536(nan)
+; CM-NEXT: OR_INT * T0.W, PV.W, PV.Z,
+; CM-NEXT: MOV * T16.X, PV.W,
+; CM-NEXT: MOV T0.Y, PV.X,
+; CM-NEXT: LSHR * T0.W, T37.X, literal.x,
+; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT: BFE_INT * T0.W, PV.W, 0.0, literal.x,
+; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT: AND_INT T0.Z, T0.Y, literal.x,
+; CM-NEXT: LSHL * T0.W, PV.W, literal.y,
+; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
+; CM-NEXT: OR_INT * T0.W, PV.Z, PV.W,
+; CM-NEXT: MOV T16.X, PV.W,
+; CM-NEXT: MOV T0.Y, T17.X,
+; CM-NEXT: LSHR * T0.W, T37.X, literal.x, BS:VEC_120/SCL_212
+; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; CM-NEXT: BFE_INT * T0.W, PV.W, 0.0, literal.x,
+; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT: AND_INT T0.Z, T0.Y, literal.x,
+; CM-NEXT: AND_INT * T0.W, PV.W, literal.y,
+; CM-NEXT: -65536(nan), 65535(9.183409e-41)
+; CM-NEXT: OR_INT * T0.W, PV.Z, PV.W,
+; CM-NEXT: MOV * T17.X, PV.W,
+; CM-NEXT: MOV T0.Y, PV.X,
+; CM-NEXT: ASHR * T0.W, T37.X, literal.x,
+; CM-NEXT: 24(3.363116e-44), 0(0.000000e+00)
+; CM-NEXT: AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT: LSHL * T0.W, PV.W, literal.y,
+; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
+; CM-NEXT: OR_INT * T36.Y, PV.Z, PV.W,
+; CM-NEXT: MOV T17.X, PV.Y,
+; CM-NEXT: MOV T0.Y, T12.X,
+; CM-NEXT: BFE_INT * T0.W, T37.Y, 0.0, literal.x,
+; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT: AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT: AND_INT * T0.W, PV.W, literal.y,
+; CM-NEXT: -65536(nan), 65535(9.183409e-41)
+; CM-NEXT: OR_INT * T0.W, PV.Z, PV.W,
+; CM-NEXT: MOV * T12.X, PV.W,
+; CM-NEXT: MOV T0.Y, PV.X,
+; CM-NEXT: LSHR * T0.W, T37.Y, literal.x,
+; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT: BFE_INT * T0.W, PV.W, 0.0, literal.x,
+; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT: AND_INT T0.Z, T0.Y, literal.x,
+; CM-NEXT: LSHL * T0.W, PV.W, literal.y,
+; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
+; CM-NEXT: OR_INT * T0.W, PV.Z, PV.W,
+; CM-NEXT: MOV T12.X, PV.W,
+; CM-NEXT: MOV T0.Y, T13.X,
+; CM-NEXT: LSHR * T0.W, T37.Y, literal.x,
+; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; CM-NEXT: BFE_INT * T0.W, PV.W, 0.0, literal.x,
+; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT: AND_INT T0.Z, T0.Y, literal.x,
+; CM-NEXT: AND_INT * T0.W, PV.W, literal.y,
+; CM-NEXT: -65536(nan), 65535(9.183409e-41)
+; CM-NEXT: OR_INT * T0.W, PV.Z, PV.W,
+; CM-NEXT: MOV * T13.X, PV.W,
+; CM-NEXT: MOV T0.Y, PV.X,
+; CM-NEXT: ASHR * T0.W, T37.Y, literal.x,
+; CM-NEXT: 24(3.363116e-44), 0(0.000000e+00)
+; CM-NEXT: AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT: LSHL * T0.W, PV.W, literal.y,
+; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
+; CM-NEXT: OR_INT * T36.W, PV.Z, PV.W,
+; CM-NEXT: MOV T13.X, PV.W,
+; CM-NEXT: MOV T0.Y, T8.X,
+; CM-NEXT: BFE_INT * T0.W, T37.Z, 0.0, literal.x,
+; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT: AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT: AND_INT * T0.W, PV.W, literal.y,
+; CM-NEXT: -65536(nan), 65535(9.183409e-41)
+; CM-NEXT: OR_INT * T0.W, PV.Z, PV.W,
+; CM-NEXT: MOV * T8.X, PV.W,
+; CM-NEXT: MOV T0.Y, PV.X,
+; CM-NEXT: LSHR * T0.W, T37.Z, literal.x,
+; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT: BFE_INT * T0.W, PV.W, 0.0, literal.x,
+; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT: AND_INT T0.Z, T0.Y, literal.x,
+; CM-NEXT: LSHL * T0.W, PV.W, literal.y,
+; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
+; CM-NEXT: OR_INT * T0.W, PV.Z, PV.W,
+; CM-NEXT: MOV T8.X, PV.W,
+; CM-NEXT: MOV T0.Y, T9.X,
+; CM-NEXT: LSHR * T0.W, T37.Z, literal.x,
+; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; CM-NEXT: BFE_INT * T0.W, PV.W, 0.0, literal.x,
+; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT: AND_INT T0.Z, T0.Y, literal.x,
+; CM-NEXT: AND_INT * T0.W, PV.W, literal.y,
+; CM-NEXT: -65536(nan), 65535(9.183409e-41)
+; CM-NEXT: OR_INT * T0.W, PV.Z, PV.W,
+; CM-NEXT: MOV * T9.X, PV.W,
+; CM-NEXT: MOV T0.Y, PV.X,
+; CM-NEXT: ASHR * T0.W, T37.Z, literal.x,
+; CM-NEXT: 24(3.363116e-44), 0(0.000000e+00)
+; CM-NEXT: AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT: LSHL * T0.W, PV.W, literal.y,
+; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
+; CM-NEXT: ALU clause starting at 121:
+; CM-NEXT: OR_INT * T37.Y, T0.Z, T0.W,
+; CM-NEXT: MOV T9.X, PV.Y,
+; CM-NEXT: MOV T0.Y, T4.X,
+; CM-NEXT: BFE_INT * T0.W, T37.W, 0.0, literal.x,
+; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT: AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT: AND_INT * T0.W, PV.W, literal.y,
+; CM-NEXT: -65536(nan), 65535(9.183409e-41)
+; CM-NEXT: OR_INT * T0.W, PV.Z, PV.W,
+; CM-NEXT: MOV * T4.X, PV.W,
+; CM-NEXT: MOV T0.Y, PV.X,
+; CM-NEXT: LSHR * T0.W, T37.W, literal.x,
+; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT: BFE_INT * T0.W, PV.W, 0.0, literal.x,
+; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT: AND_INT T0.Z, T0.Y, literal.x,
+; CM-NEXT: LSHL * T0.W, PV.W, literal.y,
+; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
+; CM-NEXT: OR_INT * T0.W, PV.Z, PV.W,
+; CM-NEXT: MOV T4.X, PV.W,
+; CM-NEXT: MOV T0.Y, T5.X,
+; CM-NEXT: LSHR * T0.W, T37.W, literal.x,
+; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; CM-NEXT: BFE_INT * T0.W, PV.W, 0.0, literal.x,
+; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT: AND_INT T0.Z, T0.Y, literal.x,
+; CM-NEXT: AND_INT * T0.W, PV.W, literal.y,
+; CM-NEXT: -65536(nan), 65535(9.183409e-41)
+; CM-NEXT: OR_INT * T0.W, PV.Z, PV.W,
+; CM-NEXT: MOV * T5.X, PV.W,
+; CM-NEXT: MOV T0.Y, PV.X,
+; CM-NEXT: ASHR * T0.W, T37.W, literal.x,
+; CM-NEXT: 24(3.363116e-44), 0(0.000000e+00)
+; CM-NEXT: AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT: LSHL * T0.W, PV.W, literal.y,
+; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
+; CM-NEXT: OR_INT * T37.W, PV.Z, PV.W,
+; CM-NEXT: MOV T5.X, PV.W,
+; CM-NEXT: MOV T0.Y, T32.X,
+; CM-NEXT: BFE_INT * T0.W, T35.X, 0.0, literal.x, BS:VEC_120/SCL_212
+; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT: AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT: AND_INT * T0.W, PV.W, literal.y,
+; CM-NEXT: -65536(nan), 65535(9.183409e-41)
+; CM-NEXT: OR_INT * T0.W, PV.Z, PV.W,
+; CM-NEXT: MOV * T32.X, PV.W,
+; CM-NEXT: MOV T0.Y, PV.X,
+; CM-NEXT: LSHR * T0.W, T35.X, literal.x,
+; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT: BFE_INT * T0.W, PV.W, 0.0, literal.x,
+; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT: AND_INT T0.Z, T0.Y, literal.x,
+; CM-NEXT: LSHL * T0.W, PV.W, literal.y,
+; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
+; CM-NEXT: OR_INT * T0.W, PV.Z, PV.W,
+; CM-NEXT: MOV T32.X, PV.W,
+; CM-NEXT: MOV T0.Y, T33.X,
+; CM-NEXT: LSHR * T0.W, T35.X, literal.x, BS:VEC_120/SCL_212
+; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; CM-NEXT: BFE_INT * T0.W, PV.W, 0.0, literal.x,
+; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT: AND_INT T0.Z, T0.Y, literal.x,
+; CM-NEXT: AND_INT * T0.W, PV.W, literal.y,
+; CM-NEXT: -65536(nan), 65535(9.183409e-41)
+; CM-NEXT: OR_INT * T0.W, PV.Z, PV.W,
+; CM-NEXT: MOV * T33.X, PV.W,
+; CM-NEXT: MOV T0.Y, PV.X,
+; CM-NEXT: ASHR * T0.W, T35.X, literal.x,
+; CM-NEXT: 24(3.363116e-44), 0(0.000000e+00)
+; CM-NEXT: AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT: LSHL * T0.W, PV.W, literal.y,
+; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
+; CM-NEXT: OR_INT * T38.Y, PV.Z, PV.W,
+; CM-NEXT: MOV T33.X, PV.Y,
+; CM-NEXT: MOV T0.Y, T28.X,
+; CM-NEXT: BFE_INT * T0.W, T35.Y, 0.0, literal.x,
+; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT: AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT: AND_INT * T0.W, PV.W, literal.y,
+; CM-NEXT: -65536(nan), 65535(9.183409e-41)
+; CM-NEXT: OR_INT * T0.W, PV.Z, PV.W,
+; CM-NEXT: MOV * T28.X, PV.W,
+; CM-NEXT: MOV T0.Y, PV.X,
+; CM-NEXT: LSHR * T0.W, T35.Y, literal.x,
+; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT: BFE_INT * T0.W, PV.W, 0.0, literal.x,
+; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT: AND_INT T0.Z, T0.Y, literal.x,
+; CM-NEXT: LSHL * T0.W, PV.W, literal.y,
+; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
+; CM-NEXT: OR_INT * T0.W, PV.Z, PV.W,
+; CM-NEXT: MOV T28.X, PV.W,
+; CM-NEXT: MOV T0.Y, T29.X,
+; CM-NEXT: LSHR * T0.W, T35.Y, literal.x,
+; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; CM-NEXT: BFE_INT * T0.W, PV.W, 0.0, literal.x,
+; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT: AND_INT T0.Z, T0.Y, literal.x,
+; CM-NEXT: AND_INT * T0.W, PV.W, literal.y,
+; CM-NEXT: -65536(nan), 65535(9.183409e-41)
+; CM-NEXT: OR_INT * T0.W, PV.Z, PV.W,
+; CM-NEXT: MOV * T29.X, PV.W,
+; CM-NEXT: MOV T0.Y, PV.X,
+; CM-NEXT: ASHR * T0.W, T35.Y, literal.x,
+; CM-NEXT: 24(3.363116e-44), 0(0.000000e+00)
+; CM-NEXT: ALU clause starting at 226:
+; CM-NEXT: AND_INT T0.Z, T0.Y, literal.x,
+; CM-NEXT: LSHL * T0.W, T0.W, literal.y,
+; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
+; CM-NEXT: OR_INT * T38.W, PV.Z, PV.W,
+; CM-NEXT: MOV T29.X, PV.W,
+; CM-NEXT: MOV T0.Y, T24.X,
+; CM-NEXT: BFE_INT * T0.W, T35.Z, 0.0, literal.x,
+; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT: AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT: AND_INT * T0.W, PV.W, literal.y,
+; CM-NEXT: -65536(nan), 65535(9.183409e-41)
+; CM-NEXT: OR_INT * T0.W, PV.Z, PV.W,
+; CM-NEXT: MOV * T24.X, PV.W,
+; CM-NEXT: MOV T0.Y, PV.X,
+; CM-NEXT: LSHR * T0.W, T35.Z, literal.x,
+; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT: BFE_INT * T0.W, PV.W, 0.0, literal.x,
+; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT: AND_INT T0.Z, T0.Y, literal.x,
+; CM-NEXT: LSHL * T0.W, PV.W, literal.y,
+; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
+; CM-NEXT: OR_INT * T0.W, PV.Z, PV.W,
+; CM-NEXT: MOV T24.X, PV.W,
+; CM-NEXT: MOV T0.Y, T25.X,
+; CM-NEXT: LSHR * T0.W, T35.Z, literal.x,
+; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; CM-NEXT: BFE_INT * T0.W, PV.W, 0.0, literal.x,
+; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT: AND_INT T0.Z, T0.Y, literal.x,
+; CM-NEXT: AND_INT * T0.W, PV.W, literal.y,
+; CM-NEXT: -65536(nan), 65535(9.183409e-41)
+; CM-NEXT: OR_INT * T0.W, PV.Z, PV.W,
+; CM-NEXT: MOV * T25.X, PV.W,
+; CM-NEXT: MOV T0.Y, PV.X,
+; CM-NEXT: ASHR * T0.W, T35.Z, literal.x,
+; CM-NEXT: 24(3.363116e-44), 0(0.000000e+00)
+; CM-NEXT: AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT: LSHL * T0.W, PV.W, literal.y,
+; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
+; CM-NEXT: OR_INT * T35.Y, PV.Z, PV.W,
+; CM-NEXT: MOV T25.X, PV.Y,
+; CM-NEXT: MOV T0.Y, T20.X,
+; CM-NEXT: BFE_INT * T0.W, T35.W, 0.0, literal.x,
+; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT: AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT: AND_INT * T0.W, PV.W, literal.y,
+; CM-NEXT: -65536(nan), 65535(9.183409e-41)
+; CM-NEXT: OR_INT * T0.W, PV.Z, PV.W,
+; CM-NEXT: MOV * T20.X, PV.W,
+; CM-NEXT: MOV T0.Y, PV.X,
+; CM-NEXT: LSHR * T0.W, T35.W, literal.x,
+; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT: BFE_INT * T0.W, PV.W, 0.0, literal.x,
+; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT: AND_INT T0.Z, T0.Y, literal.x,
+; CM-NEXT: LSHL * T0.W, PV.W, literal.y,
+; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
+; CM-NEXT: OR_INT * T0.W, PV.Z, PV.W,
+; CM-NEXT: MOV T20.X, PV.W,
+; CM-NEXT: MOV T0.Y, T21.X,
+; CM-NEXT: LSHR * T0.W, T35.W, literal.x,
+; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; CM-NEXT: BFE_INT * T0.W, PV.W, 0.0, literal.x,
+; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT: AND_INT T0.Z, T0.Y, literal.x,
+; CM-NEXT: AND_INT * T0.W, PV.W, literal.y,
+; CM-NEXT: -65536(nan), 65535(9.183409e-41)
+; CM-NEXT: OR_INT * T0.W, PV.Z, PV.W,
+; CM-NEXT: MOV * T21.X, PV.W,
+; CM-NEXT: MOV T0.Y, PV.X,
; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x,
-; CM-NEXT: 48(6.726233e-44), 0(0.000000e+00)
-; CM-NEXT: LSHR T13.X, PV.W, literal.x,
-; CM-NEXT: LSHR T0.Y, T11.Y, literal.y,
-; CM-NEXT: LSHR T0.Z, T11.Z, literal.y,
-; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.z,
-; CM-NEXT: 2(2.802597e-45), 8(1.121039e-44)
; CM-NEXT: 32(4.484155e-44), 0(0.000000e+00)
-; CM-NEXT: LSHR T14.X, PV.W, literal.x,
-; CM-NEXT: LSHR T1.Y, T11.W, literal.y,
-; CM-NEXT: BFE_INT T15.Z, T12.W, 0.0, literal.y, BS:VEC_120/SCL_212
-; CM-NEXT: LSHR * T0.W, T12.X, literal.y,
-; CM-NEXT: 2(2.802597e-45), 8(1.121039e-44)
-; CM-NEXT: BFE_INT T15.X, T12.Z, 0.0, literal.x,
-; CM-NEXT: LSHR T2.Y, T12.Y, literal.x,
-; CM-NEXT: BFE_INT T16.Z, T12.Y, 0.0, literal.x,
-; CM-NEXT: LSHR * T1.W, T12.W, literal.x,
-; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
-; CM-NEXT: BFE_INT T16.X, T12.X, 0.0, literal.x,
-; CM-NEXT: LSHR T3.Y, T12.Z, literal.x,
-; CM-NEXT: BFE_INT T12.Z, T11.W, 0.0, literal.x,
-; CM-NEXT: BFE_INT * T15.W, PV.W, 0.0, literal.x,
-; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
-; CM-NEXT: BFE_INT T12.X, T11.Z, 0.0, literal.x,
-; CM-NEXT: BFE_INT T15.Y, PV.Y, 0.0, literal.x,
-; CM-NEXT: BFE_INT T17.Z, T11.Y, 0.0, literal.x,
-; CM-NEXT: BFE_INT * T16.W, T2.Y, 0.0, literal.x, BS:VEC_120/SCL_212
-; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
-; CM-NEXT: BFE_INT T17.X, T11.X, 0.0, literal.x,
-; CM-NEXT: BFE_INT T16.Y, T0.W, 0.0, literal.x,
-; CM-NEXT: ADD_INT T1.Z, KC0[2].Y, literal.y,
-; CM-NEXT: BFE_INT * T12.W, T1.Y, 0.0, literal.x,
-; CM-NEXT: 8(1.121039e-44), 16(2.242078e-44)
-; CM-NEXT: LSHR T18.X, PV.Z, literal.x,
-; CM-NEXT: BFE_INT T12.Y, T0.Z, 0.0, literal.y,
-; CM-NEXT: LSHR T0.Z, T11.X, literal.y,
-; CM-NEXT: BFE_INT * T17.W, T0.Y, 0.0, literal.y,
-; CM-NEXT: 2(2.802597e-45), 8(1.121039e-44)
-; CM-NEXT: LSHR T11.X, KC0[2].Y, literal.x,
-; CM-NEXT: BFE_INT * T17.Y, PV.Z, 0.0, literal.y,
-; CM-NEXT: 2(2.802597e-45), 8(1.121039e-44)
+; CM-NEXT: LSHR T39.X, PV.W, literal.x,
+; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
+; CM-NEXT: 2(2.802597e-45), 48(6.726233e-44)
+; CM-NEXT: LSHR T40.X, PV.W, literal.x,
+; CM-NEXT: ASHR * T0.W, T35.W, literal.y,
+; CM-NEXT: 2(2.802597e-45), 24(3.363116e-44)
+; CM-NEXT: LSHR T41.X, KC0[2].Y, literal.x,
+; CM-NEXT: AND_INT T0.Y, T0.Y, literal.y,
+; CM-NEXT: LSHL T0.Z, PV.W, literal.z,
+; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.z,
+; CM-NEXT: 2(2.802597e-45), 65535(9.183409e-41)
+; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; CM-NEXT: LSHR T42.X, PV.W, literal.x,
+; CM-NEXT: OR_INT * T35.W, PV.Y, PV.Z,
+; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
+; CM-NEXT: MOV * T21.X, PV.W,
+; CM-NEXT: MOV T36.X, T16.X,
+; CM-NEXT: MOV * T36.Z, T12.X, BS:VEC_120/SCL_212
+; CM-NEXT: MOV T37.X, T8.X,
+; CM-NEXT: MOV * T37.Z, T4.X, BS:VEC_120/SCL_212
+; CM-NEXT: MOV T38.X, T32.X,
+; CM-NEXT: MOV * T38.Z, T28.X, BS:VEC_120/SCL_212
+; CM-NEXT: MOV T35.X, T24.X,
+; CM-NEXT: MOV * T35.Z, T20.X, BS:VEC_120/SCL_212
%load = load <32 x i8>, ptr addrspace(1) %in
%ext = sext <32 x i8> %load to <32 x i16>
store <32 x i16> %ext, ptr addrspace(1) %out
diff --git a/llvm/test/CodeGen/AMDGPU/load-local-i16.ll b/llvm/test/CodeGen/AMDGPU/load-local-i16.ll
index 8dcecfe291177..a209de78cd299 100644
--- a/llvm/test/CodeGen/AMDGPU/load-local-i16.ll
+++ b/llvm/test/CodeGen/AMDGPU/load-local-i16.ll
@@ -151,19 +151,27 @@ define amdgpu_kernel void @local_load_v3i16(ptr addrspace(3) %out, ptr addrspace
;
; EG-LABEL: local_load_v3i16:
; EG: ; %bb.0: ; %entry
-; EG-NEXT: ALU 11, @2, KC0[CB0:0-32], KC1[]
-; EG-NEXT: ADD_INT * T0.W, KC0[2].Z, literal.x,
-; EG-NEXT: 4(5.605194e-45), 0(0.000000e+00)
-; EG-NEXT: LDS_USHORT_READ_RET * OQAP, T0.W
-; EG-NEXT: MOV T0.X, OQAP,
+; EG-NEXT: ALU 19, @2, KC0[CB0:0-32], KC1[]
; EG-NEXT: MOV * T0.W, KC0[2].Z,
; EG-NEXT: LDS_USHORT_READ_RET * OQAP, T0.W
; EG-NEXT: MOV T0.Y, OQAP,
-; EG-NEXT: MOV * T0.W, KC0[2].Y,
-; EG-NEXT: LDS_WRITE * T0.W, T0.Y,
+; EG-NEXT: ADD_INT * T0.W, KC0[2].Z, literal.x,
+; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
+; EG-NEXT: LDS_USHORT_READ_RET * OQAP, T0.W
+; EG-NEXT: MOV * T0.Z, OQAP,
+; EG-NEXT: LSHL T0.Z, PV.Z, literal.x,
+; EG-NEXT: AND_INT T0.W, T0.Y, literal.y,
+; EG-NEXT: ADD_INT * T1.W, KC0[2].Z, literal.z,
+; EG-NEXT: 16(2.242078e-44), 65535(9.183409e-41)
+; EG-NEXT: 4(5.605194e-45), 0(0.000000e+00)
+; EG-NEXT: LDS_USHORT_READ_RET * OQAP, T1.W
+; EG-NEXT: MOV T0.Y, OQAP,
+; EG-NEXT: OR_INT T0.W, T0.Z, T0.W,
+; EG-NEXT: MOV * T1.W, KC0[2].Y,
+; EG-NEXT: LDS_WRITE * T1.W, T0.W,
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x,
; EG-NEXT: 4(5.605194e-45), 0(0.000000e+00)
-; EG-NEXT: LDS_SHORT_WRITE * T0.W, T0.X,
+; EG-NEXT: LDS_SHORT_WRITE * T0.W, T0.Y,
; EG-NEXT: RETURN
entry:
%ld = load <3 x i16>, ptr addrspace(3) %in
diff --git a/llvm/test/CodeGen/AMDGPU/min.ll b/llvm/test/CodeGen/AMDGPU/min.ll
index 721f9741ba418..311527d5d04cc 100644
--- a/llvm/test/CodeGen/AMDGPU/min.ll
+++ b/llvm/test/CodeGen/AMDGPU/min.ll
@@ -991,30 +991,81 @@ define amdgpu_kernel void @s_test_imin_sle_v2i16(ptr addrspace(1) %out, <2 x i16
define amdgpu_kernel void @s_test_imin_sle_v4i16(ptr addrspace(1) %out, <4 x i16> %a, <4 x i16> %b) #0 {
; EG-LABEL: s_test_imin_sle_v4i16:
; EG: ; %bb.0:
-; EG-NEXT: ALU 0, @14, KC0[], KC1[]
-; EG-NEXT: TEX 3 @6
-; EG-NEXT: ALU 9, @15, KC0[CB0:0-32], KC1[]
-; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1
+; EG-NEXT: ALU 1, @28, KC0[], KC1[]
+; EG-NEXT: TEX 1 @12
+; EG-NEXT: ALU 9, @30, KC0[], KC1[]
+; EG-NEXT: TEX 1 @16
+; EG-NEXT: ALU 10, @40, KC0[], KC1[]
+; EG-NEXT: TEX 1 @20
+; EG-NEXT: ALU 10, @51, KC0[], KC1[]
+; EG-NEXT: TEX 1 @24
+; EG-NEXT: ALU 11, @62, KC0[CB0:0-32], KC1[]
+; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T6.XY, T5.X, 1
; EG-NEXT: CF_END
; EG-NEXT: PAD
-; EG-NEXT: Fetch clause starting at 6:
-; EG-NEXT: VTX_READ_16 T1.X, T0.X, 46, #3
-; EG-NEXT: VTX_READ_16 T2.X, T0.X, 52, #3
-; EG-NEXT: VTX_READ_16 T3.X, T0.X, 44, #3
-; EG-NEXT: VTX_READ_16 T0.X, T0.X, 54, #3
-; EG-NEXT: ALU clause starting at 14:
-; EG-NEXT: MOV * T0.X, 0.0,
-; EG-NEXT: ALU clause starting at 15:
-; EG-NEXT: BFE_INT T0.Z, T1.X, 0.0, literal.x,
-; EG-NEXT: BFE_INT * T0.W, T0.X, 0.0, literal.x, BS:VEC_120/SCL_212
+; EG-NEXT: Fetch clause starting at 12:
+; EG-NEXT: VTX_READ_16 T6.X, T5.X, 50, #3
+; EG-NEXT: VTX_READ_16 T7.X, T5.X, 58, #3
+; EG-NEXT: Fetch clause starting at 16:
+; EG-NEXT: VTX_READ_16 T6.X, T5.X, 48, #3
+; EG-NEXT: VTX_READ_16 T7.X, T5.X, 56, #3
+; EG-NEXT: Fetch clause starting at 20:
+; EG-NEXT: VTX_READ_16 T6.X, T5.X, 46, #3
+; EG-NEXT: VTX_READ_16 T7.X, T5.X, 54, #3
+; EG-NEXT: Fetch clause starting at 24:
+; EG-NEXT: VTX_READ_16 T6.X, T5.X, 44, #3
+; EG-NEXT: VTX_READ_16 T5.X, T5.X, 52, #3
+; EG-NEXT: ALU clause starting at 28:
+; EG-NEXT: MOV * T0.Y, T3.X,
+; EG-NEXT: MOV * T5.X, 0.0,
+; EG-NEXT: ALU clause starting at 30:
+; EG-NEXT: BFE_INT T0.Z, T6.X, 0.0, literal.x,
+; EG-NEXT: BFE_INT * T0.W, T7.X, 0.0, literal.x, BS:VEC_120/SCL_212
; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
-; EG-NEXT: MIN_INT T0.Y, PV.Z, PV.W,
-; EG-NEXT: BFE_INT T0.Z, T3.X, 0.0, literal.x,
-; EG-NEXT: BFE_INT * T0.W, T2.X, 0.0, literal.x, BS:VEC_120/SCL_212
+; EG-NEXT: MIN_INT * T0.W, PV.Z, PV.W,
+; EG-NEXT: LSHL T0.W, PV.W, literal.x,
+; EG-NEXT: AND_INT * T1.W, T0.Y, literal.y,
+; EG-NEXT: 16(2.242078e-44), 65535(9.183409e-41)
+; EG-NEXT: OR_INT * T0.W, PS, PV.W,
+; EG-NEXT: MOV * T3.X, PV.W,
+; EG-NEXT: MOV * T0.Y, PV.X,
+; EG-NEXT: ALU clause starting at 40:
+; EG-NEXT: BFE_INT T0.Z, T6.X, 0.0, literal.x,
+; EG-NEXT: BFE_INT * T0.W, T7.X, 0.0, literal.x, BS:VEC_120/SCL_212
; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
-; EG-NEXT: MIN_INT T0.X, PV.Z, PV.W,
-; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
-; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
+; EG-NEXT: MIN_INT T0.W, PV.Z, PV.W,
+; EG-NEXT: AND_INT * T1.W, T0.Y, literal.x,
+; EG-NEXT: -65536(nan), 0(0.000000e+00)
+; EG-NEXT: AND_INT * T0.W, PV.W, literal.x,
+; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT: MOV T3.X, PV.W,
+; EG-NEXT: MOV * T0.Y, T2.X,
+; EG-NEXT: ALU clause starting at 51:
+; EG-NEXT: BFE_INT T0.Z, T6.X, 0.0, literal.x,
+; EG-NEXT: BFE_INT * T0.W, T7.X, 0.0, literal.x, BS:VEC_120/SCL_212
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: MIN_INT T0.W, PV.Z, PV.W,
+; EG-NEXT: AND_INT * T1.W, T0.Y, literal.x,
+; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT: LSHL * T0.W, PV.W, literal.x,
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT: MOV * T2.X, PV.W,
+; EG-NEXT: MOV * T0.Y, PV.X,
+; EG-NEXT: ALU clause starting at 62:
+; EG-NEXT: BFE_INT T0.Z, T6.X, 0.0, literal.x,
+; EG-NEXT: BFE_INT * T0.W, T5.X, 0.0, literal.x, BS:VEC_120/SCL_212
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: MIN_INT * T0.W, PV.Z, PV.W,
+; EG-NEXT: LSHR T5.X, KC0[2].Y, literal.x,
+; EG-NEXT: AND_INT T1.W, T0.Y, literal.y,
+; EG-NEXT: AND_INT * T0.W, PV.W, literal.z,
+; EG-NEXT: 2(2.802597e-45), -65536(nan)
+; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T6.X, PV.W, PS,
+; EG-NEXT: MOV T2.X, PV.X,
+; EG-NEXT: MOV * T6.Y, T3.X,
;
; CI-LABEL: s_test_imin_sle_v4i16:
; CI: ; %bb.0:
@@ -2154,40 +2205,49 @@ define amdgpu_kernel void @v_test_umin_ule_v3i32(ptr addrspace(1) %out, ptr addr
define amdgpu_kernel void @v_test_umin_ule_v3i16(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 {
; EG-LABEL: v_test_umin_ule_v3i16:
; EG: ; %bb.0:
-; EG-NEXT: ALU 3, @14, KC0[CB0:0-32], KC1[]
-; EG-NEXT: TEX 3 @6
-; EG-NEXT: ALU 17, @18, KC0[CB0:0-32], KC1[]
-; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.X, T2.X, 0
-; EG-NEXT: MEM_RAT MSKOR T4.XW, T0.X
+; EG-NEXT: ALU 3, @20, KC0[CB0:0-32], KC1[]
+; EG-NEXT: TEX 1 @8
+; EG-NEXT: ALU 11, @24, KC0[CB0:0-32], KC1[]
+; EG-NEXT: TEX 3 @12
+; EG-NEXT: ALU 8, @36, KC0[], KC1[]
+; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T6.X, T8.X, 0
+; EG-NEXT: MEM_RAT MSKOR T7.XW, T0.X
; EG-NEXT: CF_END
-; EG-NEXT: Fetch clause starting at 6:
-; EG-NEXT: VTX_READ_16 T2.X, T1.X, 0, #1
-; EG-NEXT: VTX_READ_16 T3.X, T0.X, 0, #1
-; EG-NEXT: VTX_READ_16 T1.X, T1.X, 4, #1
-; EG-NEXT: VTX_READ_16 T0.X, T0.X, 4, #1
-; EG-NEXT: ALU clause starting at 14:
+; EG-NEXT: Fetch clause starting at 8:
+; EG-NEXT: VTX_READ_16 T7.X, T6.X, 4, #1
+; EG-NEXT: VTX_READ_16 T8.X, T0.X, 4, #1
+; EG-NEXT: Fetch clause starting at 12:
+; EG-NEXT: VTX_READ_16 T8.X, T6.X, 0, #1
+; EG-NEXT: VTX_READ_16 T9.X, T0.X, 0, #1
+; EG-NEXT: VTX_READ_16 T6.X, T6.X, 2, #1
+; EG-NEXT: VTX_READ_16 T0.X, T0.X, 2, #1
+; EG-NEXT: ALU clause starting at 20:
; EG-NEXT: LSHL * T0.W, T0.X, literal.x,
; EG-NEXT: 3(4.203895e-45), 0(0.000000e+00)
; EG-NEXT: ADD_INT T0.X, KC0[2].Z, PV.W,
-; EG-NEXT: ADD_INT * T1.X, KC0[2].W, PV.W,
-; EG-NEXT: ALU clause starting at 18:
+; EG-NEXT: ADD_INT * T6.X, KC0[2].W, PV.W,
+; EG-NEXT: ALU clause starting at 24:
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, T0.W,
; EG-NEXT: ADD_INT * T1.W, PV.W, literal.x,
; EG-NEXT: 4(5.605194e-45), 0(0.000000e+00)
; EG-NEXT: AND_INT * T2.W, PV.W, literal.x,
; EG-NEXT: 3(4.203895e-45), 0(0.000000e+00)
; EG-NEXT: LSHL T2.W, PV.W, literal.x,
-; EG-NEXT: MIN_UINT * T3.W, T0.X, T1.X,
+; EG-NEXT: MIN_UINT * T3.W, T8.X, T7.X,
; EG-NEXT: 3(4.203895e-45), 0(0.000000e+00)
-; EG-NEXT: LSHL T4.X, PS, PV.W,
-; EG-NEXT: LSHL * T4.W, literal.x, PV.W,
+; EG-NEXT: LSHL T7.X, PS, PV.W,
+; EG-NEXT: LSHL * T7.W, literal.x, PV.W,
; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
-; EG-NEXT: MOV T4.Y, 0.0,
-; EG-NEXT: MOV * T4.Z, 0.0,
+; EG-NEXT: MOV * T7.Y, 0.0,
+; EG-NEXT: ALU clause starting at 36:
+; EG-NEXT: MOV T7.Z, 0.0,
+; EG-NEXT: MIN_UINT * T2.W, T0.X, T6.X,
; EG-NEXT: LSHR T0.X, T1.W, literal.x,
-; EG-NEXT: MIN_UINT * T1.X, T3.X, T2.X,
-; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
-; EG-NEXT: LSHR * T2.X, T0.W, literal.x,
+; EG-NEXT: LSHL T1.W, PV.W, literal.y,
+; EG-NEXT: MIN_UINT * T2.W, T9.X, T8.X,
+; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44)
+; EG-NEXT: OR_INT T6.X, PV.W, PS,
+; EG-NEXT: LSHR * T8.X, T0.W, literal.x,
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
;
; CI-LABEL: v_test_umin_ule_v3i16:
@@ -3483,46 +3543,142 @@ define amdgpu_kernel void @s_test_umin_ult_v8i32(ptr addrspace(1) %out, <8 x i32
define amdgpu_kernel void @s_test_umin_ult_v8i16(ptr addrspace(1) %out, <8 x i16> %a, <8 x i16> %b) #0 {
; EG-LABEL: s_test_umin_ult_v8i16:
; EG: ; %bb.0:
-; EG-NEXT: ALU 0, @24, KC0[], KC1[]
-; EG-NEXT: TEX 2 @8
-; EG-NEXT: ALU 2, @25, KC0[], KC1[]
-; EG-NEXT: TEX 4 @14
-; EG-NEXT: ALU 14, @28, KC0[CB0:0-32], KC1[]
-; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XYZW, T1.X, 1
+; EG-NEXT: ALU 1, @52, KC0[], KC1[]
+; EG-NEXT: TEX 1 @20
+; EG-NEXT: ALU 9, @54, KC0[], KC1[]
+; EG-NEXT: TEX 1 @24
+; EG-NEXT: ALU 8, @64, KC0[], KC1[]
+; EG-NEXT: TEX 1 @28
+; EG-NEXT: ALU 10, @73, KC0[], KC1[]
+; EG-NEXT: TEX 1 @32
+; EG-NEXT: ALU 8, @84, KC0[], KC1[]
+; EG-NEXT: TEX 1 @36
+; EG-NEXT: ALU 10, @93, KC0[], KC1[]
+; EG-NEXT: TEX 1 @40
+; EG-NEXT: ALU 8, @104, KC0[], KC1[]
+; EG-NEXT: TEX 1 @44
+; EG-NEXT: ALU 10, @113, KC0[], KC1[]
+; EG-NEXT: TEX 1 @48
+; EG-NEXT: ALU 10, @124, KC0[CB0:0-32], KC1[]
+; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T7.XYZW, T8.X, 1
; EG-NEXT: CF_END
; EG-NEXT: PAD
-; EG-NEXT: Fetch clause starting at 8:
-; EG-NEXT: VTX_READ_16 T1.X, T0.X, 62, #3
-; EG-NEXT: VTX_READ_16 T2.X, T0.X, 60, #3
-; EG-NEXT: VTX_READ_16 T3.X, T0.X, 78, #3
-; EG-NEXT: Fetch clause starting at 14:
-; EG-NEXT: VTX_READ_16 T1.X, T0.X, 68, #3
-; EG-NEXT: VTX_READ_16 T3.X, T0.X, 52, #3
-; EG-NEXT: VTX_READ_16 T4.X, T0.X, 70, #3
-; EG-NEXT: VTX_READ_16 T5.X, T0.X, 54, #3
-; EG-NEXT: VTX_READ_16 T0.X, T0.X, 76, #3
-; EG-NEXT: ALU clause starting at 24:
-; EG-NEXT: MOV * T0.X, 0.0,
-; EG-NEXT: ALU clause starting at 25:
-; EG-NEXT: AND_INT T0.W, T1.X, literal.x,
-; EG-NEXT: AND_INT * T1.W, T3.X, literal.x,
+; EG-NEXT: Fetch clause starting at 20:
+; EG-NEXT: VTX_READ_16 T8.X, T7.X, 66, #3
+; EG-NEXT: VTX_READ_16 T9.X, T7.X, 82, #3
+; EG-NEXT: Fetch clause starting at 24:
+; EG-NEXT: VTX_READ_16 T8.X, T7.X, 64, #3
+; EG-NEXT: VTX_READ_16 T9.X, T7.X, 80, #3
+; EG-NEXT: Fetch clause starting at 28:
+; EG-NEXT: VTX_READ_16 T8.X, T7.X, 62, #3
+; EG-NEXT: VTX_READ_16 T9.X, T7.X, 78, #3
+; EG-NEXT: Fetch clause starting at 32:
+; EG-NEXT: VTX_READ_16 T8.X, T7.X, 60, #3
+; EG-NEXT: VTX_READ_16 T9.X, T7.X, 76, #3
+; EG-NEXT: Fetch clause starting at 36:
+; EG-NEXT: VTX_READ_16 T8.X, T7.X, 58, #3
+; EG-NEXT: VTX_READ_16 T9.X, T7.X, 74, #3
+; EG-NEXT: Fetch clause starting at 40:
+; EG-NEXT: VTX_READ_16 T8.X, T7.X, 56, #3
+; EG-NEXT: VTX_READ_16 T9.X, T7.X, 72, #3
+; EG-NEXT: Fetch clause starting at 44:
+; EG-NEXT: VTX_READ_16 T8.X, T7.X, 54, #3
+; EG-NEXT: VTX_READ_16 T9.X, T7.X, 70, #3
+; EG-NEXT: Fetch clause starting at 48:
+; EG-NEXT: VTX_READ_16 T8.X, T7.X, 52, #3
+; EG-NEXT: VTX_READ_16 T7.X, T7.X, 68, #3
+; EG-NEXT: ALU clause starting at 52:
+; EG-NEXT: MOV * T0.Y, T3.X,
+; EG-NEXT: MOV * T7.X, 0.0,
+; EG-NEXT: ALU clause starting at 54:
+; EG-NEXT: AND_INT T0.W, T8.X, literal.x,
+; EG-NEXT: AND_INT * T1.W, T9.X, literal.x,
; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
-; EG-NEXT: ALU clause starting at 28:
-; EG-NEXT: AND_INT T0.Z, T2.X, literal.x,
-; EG-NEXT: AND_INT T2.W, T0.X, literal.x, BS:VEC_120/SCL_212
-; EG-NEXT: MIN_UINT * T0.W, T0.W, T1.W,
+; EG-NEXT: MIN_UINT * T0.W, PV.W, PS,
+; EG-NEXT: LSHL T0.W, PV.W, literal.x,
+; EG-NEXT: AND_INT * T1.W, T0.Y, literal.y,
+; EG-NEXT: 16(2.242078e-44), 65535(9.183409e-41)
+; EG-NEXT: OR_INT * T0.W, PS, PV.W,
+; EG-NEXT: MOV * T3.X, PV.W,
+; EG-NEXT: MOV * T0.Y, PV.X,
+; EG-NEXT: ALU clause starting at 64:
+; EG-NEXT: AND_INT T0.W, T8.X, literal.x,
+; EG-NEXT: AND_INT * T1.W, T9.X, literal.x,
; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
-; EG-NEXT: MIN_UINT T0.Z, PV.Z, PV.W,
-; EG-NEXT: AND_INT T1.W, T5.X, literal.x,
-; EG-NEXT: AND_INT * T2.W, T4.X, literal.x,
+; EG-NEXT: AND_INT T2.W, T0.Y, literal.x,
+; EG-NEXT: MIN_UINT * T0.W, PV.W, PS,
+; EG-NEXT: -65536(nan), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T0.W, PV.W, PS,
+; EG-NEXT: MOV T3.X, PV.W,
+; EG-NEXT: MOV * T0.Y, T2.X,
+; EG-NEXT: ALU clause starting at 73:
+; EG-NEXT: AND_INT T0.W, T8.X, literal.x,
+; EG-NEXT: AND_INT * T1.W, T9.X, literal.x,
; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
-; EG-NEXT: MIN_UINT T0.Y, PV.W, PS,
-; EG-NEXT: AND_INT T1.W, T3.X, literal.x,
-; EG-NEXT: AND_INT * T2.W, T1.X, literal.x,
+; EG-NEXT: MIN_UINT T0.W, PV.W, PS,
+; EG-NEXT: AND_INT * T1.W, T0.Y, literal.x,
; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
-; EG-NEXT: MIN_UINT T0.X, PV.W, PS,
-; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
-; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
+; EG-NEXT: LSHL * T0.W, PV.W, literal.x,
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT: MOV * T2.X, PV.W,
+; EG-NEXT: MOV * T0.Y, PV.X,
+; EG-NEXT: ALU clause starting at 84:
+; EG-NEXT: AND_INT T0.W, T8.X, literal.x,
+; EG-NEXT: AND_INT * T1.W, T9.X, literal.x,
+; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT: AND_INT T2.W, T0.Y, literal.x,
+; EG-NEXT: MIN_UINT * T0.W, PV.W, PS,
+; EG-NEXT: -65536(nan), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T7.Z, PV.W, PS,
+; EG-NEXT: MOV T2.X, PV.Z,
+; EG-NEXT: MOV * T0.Y, T5.X,
+; EG-NEXT: ALU clause starting at 93:
+; EG-NEXT: AND_INT T0.W, T8.X, literal.x,
+; EG-NEXT: AND_INT * T1.W, T9.X, literal.x,
+; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT: MIN_UINT T0.W, PV.W, PS,
+; EG-NEXT: AND_INT * T1.W, T0.Y, literal.x,
+; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT: LSHL * T0.W, PV.W, literal.x,
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT: MOV * T5.X, PV.W,
+; EG-NEXT: MOV * T0.Y, PV.X,
+; EG-NEXT: ALU clause starting at 104:
+; EG-NEXT: AND_INT T0.W, T8.X, literal.x,
+; EG-NEXT: AND_INT * T1.W, T9.X, literal.x,
+; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT: AND_INT T2.W, T0.Y, literal.x,
+; EG-NEXT: MIN_UINT * T0.W, PV.W, PS,
+; EG-NEXT: -65536(nan), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T0.W, PV.W, PS,
+; EG-NEXT: MOV T5.X, PV.W,
+; EG-NEXT: MOV * T0.Y, T4.X,
+; EG-NEXT: ALU clause starting at 113:
+; EG-NEXT: AND_INT T0.W, T8.X, literal.x,
+; EG-NEXT: AND_INT * T1.W, T9.X, literal.x,
+; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT: MIN_UINT T0.W, PV.W, PS,
+; EG-NEXT: AND_INT * T1.W, T0.Y, literal.x,
+; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT: LSHL * T0.W, PV.W, literal.x,
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT: MOV * T4.X, PV.W,
+; EG-NEXT: MOV * T0.Y, PV.X,
+; EG-NEXT: ALU clause starting at 124:
+; EG-NEXT: AND_INT T0.W, T8.X, literal.x,
+; EG-NEXT: AND_INT * T1.W, T7.X, literal.x,
+; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT: LSHR T8.X, KC0[2].Y, literal.x,
+; EG-NEXT: AND_INT T2.W, T0.Y, literal.y,
+; EG-NEXT: MIN_UINT * T0.W, PV.W, PS,
+; EG-NEXT: 2(2.802597e-45), -65536(nan)
+; EG-NEXT: OR_INT * T7.X, PV.W, PS,
+; EG-NEXT: MOV T4.X, PV.X,
+; EG-NEXT: MOV * T7.W, T3.X,
+; EG-NEXT: MOV * T7.Y, T5.X,
;
; CI-LABEL: s_test_umin_ult_v8i16:
; CI: ; %bb.0:
diff --git a/llvm/test/CodeGen/AMDGPU/shl.ll b/llvm/test/CodeGen/AMDGPU/shl.ll
index 7aa7342ec8706..28330bfc9bb69 100644
--- a/llvm/test/CodeGen/AMDGPU/shl.ll
+++ b/llvm/test/CodeGen/AMDGPU/shl.ll
@@ -681,30 +681,63 @@ define amdgpu_kernel void @shl_v4i16(ptr addrspace(1) %out, ptr addrspace(1) %in
;
; EG-LABEL: shl_v4i16:
; EG: ; %bb.0:
-; EG-NEXT: ALU 2, @8, KC0[CB0:0-32], KC1[]
+; EG-NEXT: ALU 3, @8, KC0[CB0:0-32], KC1[]
; EG-NEXT: TEX 0 @6
-; EG-NEXT: ALU 10, @11, KC0[CB0:0-32], KC1[]
-; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T8.X, 1
+; EG-NEXT: ALU 42, @12, KC0[CB0:0-32], KC1[]
+; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T10.XY, T0.X, 1
; EG-NEXT: CF_END
; EG-NEXT: PAD
; EG-NEXT: Fetch clause starting at 6:
-; EG-NEXT: VTX_READ_128 T8.XYZW, T0.X, 0, #1
+; EG-NEXT: VTX_READ_128 T10.XYZW, T0.X, 0, #1
; EG-NEXT: ALU clause starting at 8:
-; EG-NEXT: LSHL * T0.W, T0.X, literal.x,
+; EG-NEXT: MOV T0.Y, T6.X,
+; EG-NEXT: LSHL * T0.W, T0.X, literal.x, BS:VEC_120/SCL_212
; EG-NEXT: 3(4.203895e-45), 0(0.000000e+00)
; EG-NEXT: ADD_INT * T0.X, KC0[2].Z, PV.W,
-; EG-NEXT: ALU clause starting at 11:
-; EG-NEXT: LSHR T1.W, T8.Z, literal.x,
-; EG-NEXT: LSHR * T2.W, T8.X, literal.x,
+; EG-NEXT: ALU clause starting at 12:
+; EG-NEXT: AND_INT * T1.W, T10.Z, literal.x,
+; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT: LSHL * T1.W, T10.X, PV.W,
+; EG-NEXT: AND_INT T1.W, PV.W, literal.x,
+; EG-NEXT: AND_INT * T2.W, T0.Y, literal.y,
+; EG-NEXT: 65535(9.183409e-41), -65536(nan)
+; EG-NEXT: OR_INT * T1.W, PS, PV.W,
+; EG-NEXT: MOV * T6.X, PV.W,
+; EG-NEXT: MOV T0.X, PV.X,
+; EG-NEXT: LSHR T1.W, T10.Z, literal.x,
+; EG-NEXT: LSHR * T2.W, T10.X, literal.x,
; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
-; EG-NEXT: LSHL T0.Y, PS, PV.W,
-; EG-NEXT: AND_INT T1.W, T8.Z, literal.x,
-; EG-NEXT: AND_INT * T2.W, T8.X, literal.x,
+; EG-NEXT: LSHL T1.W, PS, PV.W,
+; EG-NEXT: AND_INT * T2.W, PV.X, literal.x,
; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
-; EG-NEXT: LSHL T0.X, PS, PV.W,
+; EG-NEXT: LSHL * T1.W, PV.W, literal.x,
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T1.W, T2.W, PV.W,
+; EG-NEXT: MOV T6.X, PV.W,
+; EG-NEXT: MOV * T0.X, T7.X,
+; EG-NEXT: AND_INT * T1.W, T10.W, literal.x,
+; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT: LSHL T1.W, T10.Y, PV.W,
+; EG-NEXT: AND_INT * T2.W, T0.X, literal.x,
+; EG-NEXT: -65536(nan), 0(0.000000e+00)
+; EG-NEXT: AND_INT * T1.W, PV.W, literal.x,
+; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T1.W, T2.W, PV.W,
+; EG-NEXT: MOV * T7.X, PV.W,
+; EG-NEXT: MOV T0.X, PV.X,
+; EG-NEXT: LSHR T1.W, T10.W, literal.x,
+; EG-NEXT: LSHR * T2.W, T10.Y, literal.x,
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: LSHL * T1.W, PS, PV.W,
+; EG-NEXT: AND_INT T0.Z, T0.X, literal.x,
+; EG-NEXT: LSHL T1.W, PV.W, literal.y,
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, T0.W,
-; EG-NEXT: LSHR * T8.X, PV.W, literal.x,
+; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
+; EG-NEXT: LSHR T0.X, PS, literal.x,
+; EG-NEXT: OR_INT * T10.Y, PV.Z, PV.W,
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
+; EG-NEXT: MOV T7.X, PV.Y,
+; EG-NEXT: MOV * T10.X, T6.X,
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
%gep = getelementptr inbounds <4 x i16>, ptr addrspace(1) %in, i32 %tid
%gep.out = getelementptr inbounds <4 x i16>, ptr addrspace(1) %out, i32 %tid
diff --git a/llvm/test/CodeGen/AMDGPU/sra.ll b/llvm/test/CodeGen/AMDGPU/sra.ll
index 5d169c142b2e9..80c0d0f45eb97 100644
--- a/llvm/test/CodeGen/AMDGPU/sra.ll
+++ b/llvm/test/CodeGen/AMDGPU/sra.ll
@@ -320,28 +320,67 @@ define amdgpu_kernel void @ashr_v4i16(ptr addrspace(1) %out, ptr addrspace(1) %i
;
; EG-LABEL: ashr_v4i16:
; EG: ; %bb.0:
-; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[]
+; EG-NEXT: ALU 1, @8, KC0[CB0:0-32], KC1[]
; EG-NEXT: TEX 0 @6
-; EG-NEXT: ALU 10, @9, KC0[CB0:0-32], KC1[]
-; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T7.XY, T8.X, 1
+; EG-NEXT: ALU 48, @10, KC0[CB0:0-32], KC1[]
+; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T10.XY, T9.X, 1
; EG-NEXT: CF_END
; EG-NEXT: PAD
; EG-NEXT: Fetch clause starting at 6:
-; EG-NEXT: VTX_READ_128 T7.XYZW, T7.X, 0, #1
+; EG-NEXT: VTX_READ_128 T9.XYZW, T9.X, 0, #1
; EG-NEXT: ALU clause starting at 8:
-; EG-NEXT: MOV * T7.X, KC0[2].Z,
-; EG-NEXT: ALU clause starting at 9:
-; EG-NEXT: LSHR T0.Z, T7.X, literal.x,
-; EG-NEXT: BFE_INT T0.W, T7.X, 0.0, literal.x,
-; EG-NEXT: AND_INT * T1.W, T7.Z, literal.y,
+; EG-NEXT: MOV * T0.Y, T6.X,
+; EG-NEXT: MOV * T9.X, KC0[2].Z,
+; EG-NEXT: ALU clause starting at 10:
+; EG-NEXT: BFE_INT T0.W, T9.X, 0.0, literal.x,
+; EG-NEXT: AND_INT * T1.W, T9.Z, literal.y,
; EG-NEXT: 16(2.242078e-44), 65535(9.183409e-41)
-; EG-NEXT: ASHR T7.X, PV.W, PS,
-; EG-NEXT: BFE_INT T0.W, PV.Z, 0.0, literal.x,
-; EG-NEXT: LSHR * T1.W, T7.Z, literal.x,
+; EG-NEXT: ASHR * T0.W, PV.W, PS,
+; EG-NEXT: AND_INT T0.W, PV.W, literal.x,
+; EG-NEXT: AND_INT * T1.W, T0.Y, literal.y,
+; EG-NEXT: 65535(9.183409e-41), -65536(nan)
+; EG-NEXT: OR_INT * T0.W, PS, PV.W,
+; EG-NEXT: MOV * T6.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: LSHR * T0.W, T9.X, literal.x,
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: BFE_INT T0.W, PV.W, 0.0, literal.x,
+; EG-NEXT: LSHR * T1.W, T9.Z, literal.x,
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: ASHR T0.W, PV.W, PS,
+; EG-NEXT: AND_INT * T1.W, T0.Y, literal.x,
+; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT: LSHL * T0.W, PV.W, literal.x,
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT: MOV T6.X, PV.W,
+; EG-NEXT: MOV T0.Y, T7.X,
+; EG-NEXT: BFE_INT T0.W, T9.Y, 0.0, literal.x,
+; EG-NEXT: AND_INT * T1.W, T9.W, literal.y,
+; EG-NEXT: 16(2.242078e-44), 65535(9.183409e-41)
+; EG-NEXT: ASHR T0.W, PV.W, PS,
+; EG-NEXT: AND_INT * T1.W, PV.Y, literal.x,
+; EG-NEXT: -65536(nan), 0(0.000000e+00)
+; EG-NEXT: AND_INT * T0.W, PV.W, literal.x,
+; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT: MOV * T7.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: LSHR * T0.W, T9.Y, literal.x,
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: BFE_INT T0.W, PV.W, 0.0, literal.x,
+; EG-NEXT: LSHR * T1.W, T9.W, literal.x,
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: ASHR T0.W, PV.W, PS,
+; EG-NEXT: AND_INT * T1.W, T0.Y, literal.x,
+; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT: LSHL * T0.W, PV.W, literal.x,
; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
-; EG-NEXT: LSHR T8.X, KC0[2].Y, literal.x,
-; EG-NEXT: ASHR * T7.Y, PV.W, PS,
+; EG-NEXT: LSHR T9.X, KC0[2].Y, literal.x,
+; EG-NEXT: OR_INT * T10.Y, T1.W, PV.W,
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
+; EG-NEXT: MOV T7.X, PV.Y,
+; EG-NEXT: MOV * T10.X, T6.X,
%b_ptr = getelementptr <4 x i16>, ptr addrspace(1) %in, i16 1
%a = load <4 x i16>, ptr addrspace(1) %in
%b = load <4 x i16>, ptr addrspace(1) %b_ptr
diff --git a/llvm/test/CodeGen/RISCV/rvv/fp4-bitcast.ll b/llvm/test/CodeGen/RISCV/rvv/fp4-bitcast.ll
new file mode 100644
index 0000000000000..ac3cd84694f4c
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/fp4-bitcast.ll
@@ -0,0 +1,33 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=riscv64 -mattr='+v' < %s | FileCheck %s
+
+define <2 x i8> @fp4(<4 x i4> %0) nounwind {
+; CHECK-LABEL: fp4:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
+; CHECK-NEXT: vmv.x.s a0, v8
+; CHECK-NEXT: vslidedown.vi v9, v8, 1
+; CHECK-NEXT: vmv.x.s a1, v9
+; CHECK-NEXT: vslidedown.vi v9, v8, 2
+; CHECK-NEXT: vslidedown.vi v8, v8, 3
+; CHECK-NEXT: andi a0, a0, 15
+; CHECK-NEXT: vmv.x.s a2, v9
+; CHECK-NEXT: andi a1, a1, 15
+; CHECK-NEXT: slli a1, a1, 4
+; CHECK-NEXT: or a0, a0, a1
+; CHECK-NEXT: vmv.x.s a1, v8
+; CHECK-NEXT: andi a2, a2, 15
+; CHECK-NEXT: slli a1, a1, 12
+; CHECK-NEXT: slli a2, a2, 8
+; CHECK-NEXT: or a1, a2, a1
+; CHECK-NEXT: or a0, a0, a1
+; CHECK-NEXT: sh a0, 14(sp)
+; CHECK-NEXT: addi a0, sp, 14
+; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
+; CHECK-NEXT: vle8.v v8, (a0)
+; CHECK-NEXT: addi sp, sp, 16
+; CHECK-NEXT: ret
+ %2 = bitcast <4 x i4> %0 to <2 x i8>
+ ret <2 x i8> %2
+}
More information about the llvm-commits
mailing list