[llvm] cb798f0 - [DAG] computeKnownBits - Move (most) ISD::SRL handling into KnownBits::lshr

Simon Pilgrim via llvm-commits llvm-commits at lists.llvm.org
Tue Nov 3 09:45:43 PST 2020


Author: Simon Pilgrim
Date: 2020-11-03T17:30:36Z
New Revision: cb798f040a768dbd1fc918fe581088b2fe7932bf

URL: https://github.com/llvm/llvm-project/commit/cb798f040a768dbd1fc918fe581088b2fe7932bf
DIFF: https://github.com/llvm/llvm-project/commit/cb798f040a768dbd1fc918fe581088b2fe7932bf.diff

LOG: [DAG] computeKnownBits - Move (most) ISD::SRL handling into KnownBits::lshr

As discussed on D90527, we should be be trying to move shift handling functionality into KnownBits to avoid code duplication in SelectionDAG/GlobalISel/ValueTracking.

The refactor to use the KnownBits fixed/min/max constant helpers allows us to hit a couple of cases that we were missing before.

We still need the getValidMinimumShiftAmountConstant case as KnownBits doesn't handle per-element vector cases.

Added: 
    

Modified: 
    llvm/include/llvm/Support/KnownBits.h
    llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
    llvm/lib/Support/KnownBits.cpp
    llvm/test/CodeGen/AMDGPU/load-constant-i16.ll
    llvm/test/CodeGen/AMDGPU/load-global-i16.ll

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/Support/KnownBits.h b/llvm/include/llvm/Support/KnownBits.h
index 81534bce631e..da8b099357c7 100644
--- a/llvm/include/llvm/Support/KnownBits.h
+++ b/llvm/include/llvm/Support/KnownBits.h
@@ -274,6 +274,10 @@ struct KnownBits {
   /// NOTE: RHS (shift amount) bitwidth doesn't need to be the same as LHS.
   static KnownBits shl(const KnownBits &LHS, const KnownBits &RHS);
 
+  /// Compute known bits for lshr(LHS, RHS).
+  /// NOTE: RHS (shift amount) bitwidth doesn't need to be the same as LHS.
+  static KnownBits lshr(const KnownBits &LHS, const KnownBits &RHS);
+
   /// Insert the bits from a smaller known bits starting at bitPosition.
   void insertBits(const KnownBits &SubBits, unsigned BitPosition) {
     Zero.insertBits(SubBits.Zero, BitPosition);

diff  --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 2a6ec9822afd..a1e1041e1098 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -2970,19 +2970,8 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
     break;
   case ISD::SRL:
     Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
-
-    if (const APInt *ShAmt = getValidShiftAmountConstant(Op, DemandedElts)) {
-      unsigned Shift = ShAmt->getZExtValue();
-      Known.Zero.lshrInPlace(Shift);
-      Known.One.lshrInPlace(Shift);
-      // High bits are known zero.
-      Known.Zero.setHighBits(Shift);
-      break;
-    }
-
-    // No matter the shift amount, the leading zeros will stay zero.
-    Known.Zero = APInt::getHighBitsSet(BitWidth, Known.countMinLeadingZeros());
-    Known.One.clearAllBits();
+    Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
+    Known = KnownBits::lshr(Known, Known2);
 
     // Minimum shift high bits are known zero.
     if (const APInt *ShMinAmt =

diff  --git a/llvm/lib/Support/KnownBits.cpp b/llvm/lib/Support/KnownBits.cpp
index 8f052feda2bb..c270b481c2c2 100644
--- a/llvm/lib/Support/KnownBits.cpp
+++ b/llvm/lib/Support/KnownBits.cpp
@@ -169,6 +169,29 @@ KnownBits KnownBits::shl(const KnownBits &LHS, const KnownBits &RHS) {
   return Known;
 }
 
+KnownBits KnownBits::lshr(const KnownBits &LHS, const KnownBits &RHS) {
+  unsigned BitWidth = LHS.getBitWidth();
+  KnownBits Known(BitWidth);
+
+  if (RHS.isConstant() && RHS.getConstant().ult(BitWidth)) {
+    unsigned Shift = RHS.getConstant().getZExtValue();
+    Known = LHS;
+    Known.Zero.lshrInPlace(Shift);
+    Known.One.lshrInPlace(Shift);
+    // High bits are known zero.
+    Known.Zero.setHighBits(Shift);
+    return Known;
+  }
+
+  // Minimum shift amount high bits are known zero.
+  if (RHS.getMinValue().ult(BitWidth))
+    Known.Zero.setHighBits(RHS.getMinValue().getZExtValue());
+
+  // No matter the shift amount, the leading zeros will stay zero.
+  Known.Zero.setHighBits(LHS.countMinLeadingZeros());
+  return Known;
+}
+
 KnownBits KnownBits::abs() const {
   // If the source's MSB is zero then we know the rest of the bits already.
   if (isNonNegative())

diff  --git a/llvm/test/CodeGen/AMDGPU/load-constant-i16.ll b/llvm/test/CodeGen/AMDGPU/load-constant-i16.ll
index 1df8598a0b1a..c08eeaedae59 100644
--- a/llvm/test/CodeGen/AMDGPU/load-constant-i16.ll
+++ b/llvm/test/CodeGen/AMDGPU/load-constant-i16.ll
@@ -931,7 +931,7 @@ define amdgpu_kernel void @constant_zextload_v2i16_to_v2i32(<2 x i32> addrspace(
 ; EG:       ; %bb.0:
 ; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
 ; EG-NEXT:    TEX 0 @6
-; EG-NEXT:    ALU 6, @9, KC0[CB0:0-32], KC1[]
+; EG-NEXT:    ALU 4, @9, KC0[CB0:0-32], KC1[]
 ; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T4.XY, T5.X, 1
 ; EG-NEXT:    CF_END
 ; EG-NEXT:    PAD
@@ -940,9 +940,7 @@ define amdgpu_kernel void @constant_zextload_v2i16_to_v2i32(<2 x i32> addrspace(
 ; EG-NEXT:    ALU clause starting at 8:
 ; EG-NEXT:     MOV * T4.X, KC0[2].Z,
 ; EG-NEXT:    ALU clause starting at 9:
-; EG-NEXT:     MOV * T0.W, literal.x,
-; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
-; EG-NEXT:     BFE_UINT * T4.Y, T4.X, literal.x, PV.W,
+; EG-NEXT:     LSHR * T4.Y, T4.X, literal.x,
 ; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
 ; EG-NEXT:     AND_INT T4.X, T4.X, literal.x,
 ; EG-NEXT:     LSHR * T5.X, KC0[2].Y, literal.y,
@@ -1272,7 +1270,7 @@ define amdgpu_kernel void @constant_zextload_v4i16_to_v4i32(<4 x i32> addrspace(
 ; EG:       ; %bb.0:
 ; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
 ; EG-NEXT:    TEX 0 @6
-; EG-NEXT:    ALU 13, @9, KC0[CB0:0-32], KC1[]
+; EG-NEXT:    ALU 12, @9, KC0[CB0:0-32], KC1[]
 ; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T5.XYZW, T6.X, 1
 ; EG-NEXT:    CF_END
 ; EG-NEXT:    PAD
@@ -1284,14 +1282,13 @@ define amdgpu_kernel void @constant_zextload_v4i16_to_v4i32(<4 x i32> addrspace(
 ; EG-NEXT:     MOV T2.X, T5.X,
 ; EG-NEXT:     MOV * T3.X, T5.Y,
 ; EG-NEXT:     MOV T0.Y, PV.X,
-; EG-NEXT:     MOV T0.Z, PS,
-; EG-NEXT:     MOV * T0.W, literal.x,
+; EG-NEXT:     MOV * T0.Z, PS,
+; EG-NEXT:     LSHR * T5.W, PV.Z, literal.x,
 ; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
-; EG-NEXT:     BFE_UINT * T5.W, PV.Z, literal.x, PV.W,
+; EG-NEXT:     AND_INT * T5.Z, T0.Z, literal.x,
+; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT:     LSHR * T5.Y, T0.Y, literal.x,
 ; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
-; EG-NEXT:     BFE_UINT T5.Y, T0.Y, literal.x, T0.W,
-; EG-NEXT:     AND_INT * T5.Z, T0.Z, literal.y,
-; EG-NEXT:    16(2.242078e-44), 65535(9.183409e-41)
 ; EG-NEXT:     AND_INT T5.X, T0.Y, literal.x,
 ; EG-NEXT:     LSHR * T6.X, KC0[2].Y, literal.y,
 ; EG-NEXT:    65535(9.183409e-41), 2(2.802597e-45)
@@ -1501,7 +1498,7 @@ define amdgpu_kernel void @constant_zextload_v8i16_to_v8i32(<8 x i32> addrspace(
 ; EG:       ; %bb.0:
 ; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
 ; EG-NEXT:    TEX 0 @6
-; EG-NEXT:    ALU 19, @9, KC0[CB0:0-32], KC1[]
+; EG-NEXT:    ALU 17, @9, KC0[CB0:0-32], KC1[]
 ; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T9.XYZW, T10.X, 0
 ; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T8.XYZW, T7.X, 1
 ; EG-NEXT:    CF_END
@@ -1510,21 +1507,19 @@ define amdgpu_kernel void @constant_zextload_v8i16_to_v8i32(<8 x i32> addrspace(
 ; EG-NEXT:    ALU clause starting at 8:
 ; EG-NEXT:     MOV * T7.X, KC0[2].Z,
 ; EG-NEXT:    ALU clause starting at 9:
-; EG-NEXT:     MOV * T0.W, literal.x,
+; EG-NEXT:     LSHR * T8.W, T7.Y, literal.x,
 ; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
-; EG-NEXT:     BFE_UINT * T8.W, T7.Y, literal.x, PV.W,
+; EG-NEXT:     AND_INT * T8.Z, T7.Y, literal.x,
+; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT:     LSHR T8.Y, T7.X, literal.x,
+; EG-NEXT:     LSHR * T9.W, T7.W, literal.x,
 ; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
-; EG-NEXT:     BFE_UINT T8.Y, T7.X, literal.x, T0.W,
-; EG-NEXT:     AND_INT T8.Z, T7.Y, literal.y,
-; EG-NEXT:     BFE_UINT * T9.W, T7.W, literal.x, T0.W,
-; EG-NEXT:    16(2.242078e-44), 65535(9.183409e-41)
 ; EG-NEXT:     AND_INT T8.X, T7.X, literal.x,
-; EG-NEXT:     BFE_UINT T9.Y, T7.Z, literal.y, T0.W,
-; EG-NEXT:     LSHR * T7.X, KC0[2].Y, literal.z,
-; EG-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
-; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
-; EG-NEXT:     AND_INT * T9.Z, T7.W, literal.x,
-; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT:     AND_INT T9.Z, T7.W, literal.x,
+; EG-NEXT:     LSHR * T7.X, KC0[2].Y, literal.y,
+; EG-NEXT:    65535(9.183409e-41), 2(2.802597e-45)
+; EG-NEXT:     LSHR * T9.Y, T7.Z, literal.x,
+; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
 ; EG-NEXT:     AND_INT T9.X, T7.Z, literal.x,
 ; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
 ; EG-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
@@ -1835,51 +1830,50 @@ define amdgpu_kernel void @constant_zextload_v16i16_to_v16i32(<16 x i32> addrspa
 ; EG:       ; %bb.0:
 ; EG-NEXT:    ALU 0, @12, KC0[CB0:0-32], KC1[]
 ; EG-NEXT:    TEX 1 @8
-; EG-NEXT:    ALU 36, @13, KC0[CB0:0-32], KC1[]
+; EG-NEXT:    ALU 35, @13, KC0[CB0:0-32], KC1[]
 ; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T17.XYZW, T18.X, 0
-; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T15.XYZW, T12.X, 0
+; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T15.XYZW, T11.X, 0
 ; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T14.XYZW, T16.X, 0
-; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T13.XYZW, T11.X, 1
+; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T13.XYZW, T12.X, 1
 ; EG-NEXT:    CF_END
 ; EG-NEXT:    Fetch clause starting at 8:
-; EG-NEXT:     VTX_READ_128 T12.XYZW, T11.X, 16, #1
-; EG-NEXT:     VTX_READ_128 T11.XYZW, T11.X, 0, #1
+; EG-NEXT:     VTX_READ_128 T12.XYZW, T11.X, 0, #1
+; EG-NEXT:     VTX_READ_128 T11.XYZW, T11.X, 16, #1
 ; EG-NEXT:    ALU clause starting at 12:
 ; EG-NEXT:     MOV * T11.X, KC0[2].Z,
 ; EG-NEXT:    ALU clause starting at 13:
-; EG-NEXT:     MOV * T0.W, literal.x,
+; EG-NEXT:     LSHR * T13.W, T12.Y, literal.x,
 ; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
-; EG-NEXT:     BFE_UINT * T13.W, T11.Y, literal.x, PV.W,
+; EG-NEXT:     AND_INT * T13.Z, T12.Y, literal.x,
+; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT:     LSHR T13.Y, T12.X, literal.x,
+; EG-NEXT:     LSHR * T14.W, T12.W, literal.x,
 ; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
-; EG-NEXT:     BFE_UINT T13.Y, T11.X, literal.x, T0.W,
-; EG-NEXT:     AND_INT T13.Z, T11.Y, literal.y,
-; EG-NEXT:     BFE_UINT * T14.W, T11.W, literal.x, T0.W,
-; EG-NEXT:    16(2.242078e-44), 65535(9.183409e-41)
-; EG-NEXT:     AND_INT T13.X, T11.X, literal.x,
-; EG-NEXT:     BFE_UINT T14.Y, T11.Z, literal.y, T0.W,
-; EG-NEXT:     LSHR * T11.X, KC0[2].Y, literal.z,
-; EG-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
-; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
-; EG-NEXT:     AND_INT T14.Z, T11.W, literal.x,
-; EG-NEXT:     BFE_UINT * T15.W, T12.Y, literal.y, T0.W,
-; EG-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
-; EG-NEXT:     AND_INT T14.X, T11.Z, literal.x,
-; EG-NEXT:     BFE_UINT T15.Y, T12.X, literal.y, T0.W,
-; EG-NEXT:     ADD_INT * T1.W, KC0[2].Y, literal.y,
+; EG-NEXT:     AND_INT T13.X, T12.X, literal.x,
+; EG-NEXT:     AND_INT T14.Z, T12.W, literal.x,
+; EG-NEXT:     LSHR * T12.X, KC0[2].Y, literal.y,
+; EG-NEXT:    65535(9.183409e-41), 2(2.802597e-45)
+; EG-NEXT:     LSHR T14.Y, T12.Z, literal.x,
+; EG-NEXT:     LSHR * T15.W, T11.Y, literal.x,
+; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT:     AND_INT T14.X, T12.Z, literal.x,
+; EG-NEXT:     AND_INT T15.Z, T11.Y, literal.x,
+; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
 ; EG-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
 ; EG-NEXT:     LSHR T16.X, PV.W, literal.x,
-; EG-NEXT:     AND_INT T15.Z, T12.Y, literal.y,
-; EG-NEXT:     BFE_UINT T17.W, T12.W, literal.z, T0.W,
-; EG-NEXT:     AND_INT * T15.X, T12.X, literal.y,
-; EG-NEXT:    2(2.802597e-45), 65535(9.183409e-41)
-; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
-; EG-NEXT:     BFE_UINT T17.Y, T12.Z, literal.x, T0.W,
+; EG-NEXT:     LSHR T15.Y, T11.X, literal.y,
+; EG-NEXT:     LSHR T17.W, T11.W, literal.y,
+; EG-NEXT:     AND_INT * T15.X, T11.X, literal.z,
+; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
+; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT:     AND_INT T17.Z, T11.W, literal.x,
 ; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
-; EG-NEXT:    16(2.242078e-44), 32(4.484155e-44)
-; EG-NEXT:     LSHR T12.X, PV.W, literal.x,
-; EG-NEXT:     AND_INT T17.Z, T12.W, literal.y,
-; EG-NEXT:     AND_INT * T17.X, T12.Z, literal.y,
-; EG-NEXT:    2(2.802597e-45), 65535(9.183409e-41)
+; EG-NEXT:    65535(9.183409e-41), 32(4.484155e-44)
+; EG-NEXT:     LSHR T11.X, PV.W, literal.x,
+; EG-NEXT:     LSHR T17.Y, T11.Z, literal.y,
+; EG-NEXT:     AND_INT * T17.X, T11.Z, literal.z,
+; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
+; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
 ; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.x,
 ; EG-NEXT:    48(6.726233e-44), 0(0.000000e+00)
 ; EG-NEXT:     LSHR * T18.X, PV.W, literal.x,
@@ -2403,93 +2397,92 @@ define amdgpu_kernel void @constant_zextload_v32i16_to_v32i32(<32 x i32> addrspa
 ; EG:       ; %bb.0:
 ; EG-NEXT:    ALU 0, @20, KC0[CB0:0-32], KC1[]
 ; EG-NEXT:    TEX 3 @12
-; EG-NEXT:    ALU 72, @21, KC0[CB0:0-32], KC1[]
+; EG-NEXT:    ALU 71, @21, KC0[CB0:0-32], KC1[]
 ; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T33.XYZW, T34.X, 0
-; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T31.XYZW, T20.X, 0
+; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T31.XYZW, T21.X, 0
 ; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T30.XYZW, T32.X, 0
-; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T28.XYZW, T21.X, 0
+; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T28.XYZW, T22.X, 0
 ; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T27.XYZW, T29.X, 0
-; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T25.XYZW, T22.X, 0
+; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T25.XYZW, T19.X, 0
 ; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T24.XYZW, T26.X, 0
-; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T23.XYZW, T19.X, 1
+; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T23.XYZW, T20.X, 1
 ; EG-NEXT:    CF_END
 ; EG-NEXT:    Fetch clause starting at 12:
-; EG-NEXT:     VTX_READ_128 T20.XYZW, T19.X, 48, #1
-; EG-NEXT:     VTX_READ_128 T21.XYZW, T19.X, 32, #1
-; EG-NEXT:     VTX_READ_128 T22.XYZW, T19.X, 16, #1
-; EG-NEXT:     VTX_READ_128 T19.XYZW, T19.X, 0, #1
+; EG-NEXT:     VTX_READ_128 T20.XYZW, T19.X, 0, #1
+; EG-NEXT:     VTX_READ_128 T21.XYZW, T19.X, 48, #1
+; EG-NEXT:     VTX_READ_128 T22.XYZW, T19.X, 32, #1
+; EG-NEXT:     VTX_READ_128 T19.XYZW, T19.X, 16, #1
 ; EG-NEXT:    ALU clause starting at 20:
 ; EG-NEXT:     MOV * T19.X, KC0[2].Z,
 ; EG-NEXT:    ALU clause starting at 21:
-; EG-NEXT:     MOV * T0.W, literal.x,
+; EG-NEXT:     LSHR * T23.W, T20.Y, literal.x,
 ; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
-; EG-NEXT:     BFE_UINT * T23.W, T19.Y, literal.x, PV.W,
+; EG-NEXT:     AND_INT * T23.Z, T20.Y, literal.x,
+; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT:     LSHR T23.Y, T20.X, literal.x,
+; EG-NEXT:     LSHR * T24.W, T20.W, literal.x,
 ; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
-; EG-NEXT:     BFE_UINT T23.Y, T19.X, literal.x, T0.W,
-; EG-NEXT:     AND_INT T23.Z, T19.Y, literal.y,
-; EG-NEXT:     BFE_UINT * T24.W, T19.W, literal.x, T0.W,
-; EG-NEXT:    16(2.242078e-44), 65535(9.183409e-41)
-; EG-NEXT:     AND_INT T23.X, T19.X, literal.x,
-; EG-NEXT:     BFE_UINT T24.Y, T19.Z, literal.y, T0.W,
-; EG-NEXT:     LSHR * T19.X, KC0[2].Y, literal.z,
-; EG-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
-; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
-; EG-NEXT:     AND_INT T24.Z, T19.W, literal.x,
-; EG-NEXT:     BFE_UINT * T25.W, T22.Y, literal.y, T0.W,
-; EG-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
-; EG-NEXT:     AND_INT T24.X, T19.Z, literal.x,
-; EG-NEXT:     BFE_UINT T25.Y, T22.X, literal.y, T0.W,
-; EG-NEXT:     ADD_INT * T1.W, KC0[2].Y, literal.y,
+; EG-NEXT:     AND_INT T23.X, T20.X, literal.x,
+; EG-NEXT:     AND_INT T24.Z, T20.W, literal.x,
+; EG-NEXT:     LSHR * T20.X, KC0[2].Y, literal.y,
+; EG-NEXT:    65535(9.183409e-41), 2(2.802597e-45)
+; EG-NEXT:     LSHR T24.Y, T20.Z, literal.x,
+; EG-NEXT:     LSHR * T25.W, T19.Y, literal.x,
+; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT:     AND_INT T24.X, T20.Z, literal.x,
+; EG-NEXT:     AND_INT T25.Z, T19.Y, literal.x,
+; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
 ; EG-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
 ; EG-NEXT:     LSHR T26.X, PV.W, literal.x,
-; EG-NEXT:     AND_INT T25.Z, T22.Y, literal.y,
-; EG-NEXT:     BFE_UINT T27.W, T22.W, literal.z, T0.W,
-; EG-NEXT:     AND_INT * T25.X, T22.X, literal.y,
-; EG-NEXT:    2(2.802597e-45), 65535(9.183409e-41)
-; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
-; EG-NEXT:     BFE_UINT T27.Y, T22.Z, literal.x, T0.W,
-; EG-NEXT:     ADD_INT * T1.W, KC0[2].Y, literal.y,
-; EG-NEXT:    16(2.242078e-44), 32(4.484155e-44)
-; EG-NEXT:     LSHR T22.X, PV.W, literal.x,
-; EG-NEXT:     AND_INT T27.Z, T22.W, literal.y,
-; EG-NEXT:     BFE_UINT T28.W, T21.Y, literal.z, T0.W,
-; EG-NEXT:     AND_INT * T27.X, T22.Z, literal.y,
-; EG-NEXT:    2(2.802597e-45), 65535(9.183409e-41)
-; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
-; EG-NEXT:     BFE_UINT T28.Y, T21.X, literal.x, T0.W,
-; EG-NEXT:     ADD_INT * T1.W, KC0[2].Y, literal.y,
-; EG-NEXT:    16(2.242078e-44), 48(6.726233e-44)
+; EG-NEXT:     LSHR T25.Y, T19.X, literal.y,
+; EG-NEXT:     LSHR T27.W, T19.W, literal.y,
+; EG-NEXT:     AND_INT * T25.X, T19.X, literal.z,
+; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
+; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT:     AND_INT T27.Z, T19.W, literal.x,
+; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
+; EG-NEXT:    65535(9.183409e-41), 32(4.484155e-44)
+; EG-NEXT:     LSHR T19.X, PV.W, literal.x,
+; EG-NEXT:     LSHR T27.Y, T19.Z, literal.y,
+; EG-NEXT:     LSHR T28.W, T22.Y, literal.y,
+; EG-NEXT:     AND_INT * T27.X, T19.Z, literal.z,
+; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
+; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT:     AND_INT T28.Z, T22.Y, literal.x,
+; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
+; EG-NEXT:    65535(9.183409e-41), 48(6.726233e-44)
 ; EG-NEXT:     LSHR T29.X, PV.W, literal.x,
-; EG-NEXT:     AND_INT T28.Z, T21.Y, literal.y,
-; EG-NEXT:     BFE_UINT T30.W, T21.W, literal.z, T0.W,
-; EG-NEXT:     AND_INT * T28.X, T21.X, literal.y,
-; EG-NEXT:    2(2.802597e-45), 65535(9.183409e-41)
-; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
-; EG-NEXT:     BFE_UINT T30.Y, T21.Z, literal.x, T0.W,
-; EG-NEXT:     ADD_INT * T1.W, KC0[2].Y, literal.y,
-; EG-NEXT:    16(2.242078e-44), 64(8.968310e-44)
-; EG-NEXT:     LSHR T21.X, PV.W, literal.x,
-; EG-NEXT:     AND_INT T30.Z, T21.W, literal.y,
-; EG-NEXT:     BFE_UINT T31.W, T20.Y, literal.z, T0.W,
-; EG-NEXT:     AND_INT * T30.X, T21.Z, literal.y,
-; EG-NEXT:    2(2.802597e-45), 65535(9.183409e-41)
-; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
-; EG-NEXT:     BFE_UINT T31.Y, T20.X, literal.x, T0.W,
-; EG-NEXT:     ADD_INT * T1.W, KC0[2].Y, literal.y,
-; EG-NEXT:    16(2.242078e-44), 80(1.121039e-43)
+; EG-NEXT:     LSHR T28.Y, T22.X, literal.y,
+; EG-NEXT:     LSHR T30.W, T22.W, literal.y,
+; EG-NEXT:     AND_INT * T28.X, T22.X, literal.z,
+; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
+; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT:     AND_INT T30.Z, T22.W, literal.x,
+; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
+; EG-NEXT:    65535(9.183409e-41), 64(8.968310e-44)
+; EG-NEXT:     LSHR T22.X, PV.W, literal.x,
+; EG-NEXT:     LSHR T30.Y, T22.Z, literal.y,
+; EG-NEXT:     LSHR T31.W, T21.Y, literal.y,
+; EG-NEXT:     AND_INT * T30.X, T22.Z, literal.z,
+; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
+; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT:     AND_INT T31.Z, T21.Y, literal.x,
+; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
+; EG-NEXT:    65535(9.183409e-41), 80(1.121039e-43)
 ; EG-NEXT:     LSHR T32.X, PV.W, literal.x,
-; EG-NEXT:     AND_INT T31.Z, T20.Y, literal.y,
-; EG-NEXT:     BFE_UINT T33.W, T20.W, literal.z, T0.W,
-; EG-NEXT:     AND_INT * T31.X, T20.X, literal.y,
-; EG-NEXT:    2(2.802597e-45), 65535(9.183409e-41)
-; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
-; EG-NEXT:     BFE_UINT T33.Y, T20.Z, literal.x, T0.W,
+; EG-NEXT:     LSHR T31.Y, T21.X, literal.y,
+; EG-NEXT:     LSHR T33.W, T21.W, literal.y,
+; EG-NEXT:     AND_INT * T31.X, T21.X, literal.z,
+; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
+; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT:     AND_INT T33.Z, T21.W, literal.x,
 ; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
-; EG-NEXT:    16(2.242078e-44), 96(1.345247e-43)
-; EG-NEXT:     LSHR T20.X, PV.W, literal.x,
-; EG-NEXT:     AND_INT T33.Z, T20.W, literal.y,
-; EG-NEXT:     AND_INT * T33.X, T20.Z, literal.y,
-; EG-NEXT:    2(2.802597e-45), 65535(9.183409e-41)
+; EG-NEXT:    65535(9.183409e-41), 96(1.345247e-43)
+; EG-NEXT:     LSHR T21.X, PV.W, literal.x,
+; EG-NEXT:     LSHR T33.Y, T21.Z, literal.y,
+; EG-NEXT:     AND_INT * T33.X, T21.Z, literal.z,
+; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
+; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
 ; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.x,
 ; EG-NEXT:    112(1.569454e-43), 0(0.000000e+00)
 ; EG-NEXT:     LSHR * T34.X, PV.W, literal.x,
@@ -3454,9 +3447,9 @@ define amdgpu_kernel void @constant_zextload_v64i16_to_v64i32(<64 x i32> addrspa
 ; EG:       ; %bb.0:
 ; EG-NEXT:    ALU 0, @38, KC0[CB0:0-32], KC1[]
 ; EG-NEXT:    TEX 3 @22
-; EG-NEXT:    ALU 56, @39, KC0[CB0:0-32], KC1[]
+; EG-NEXT:    ALU 55, @39, KC0[CB0:0-32], KC1[]
 ; EG-NEXT:    TEX 3 @30
-; EG-NEXT:    ALU 87, @96, KC0[CB0:0-32], KC1[]
+; EG-NEXT:    ALU 87, @95, KC0[CB0:0-32], KC1[]
 ; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T65.XYZW, T66.X, 0
 ; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T63.XYZW, T49.X, 0
 ; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T62.XYZW, T64.X, 0
@@ -3466,19 +3459,19 @@ define amdgpu_kernel void @constant_zextload_v64i16_to_v64i32(<64 x i32> addrspa
 ; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T56.XYZW, T58.X, 0
 ; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T54.XYZW, T52.X, 0
 ; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T53.XYZW, T55.X, 0
-; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T35.XYZW, T36.X, 0
+; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T35.XYZW, T37.X, 0
 ; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T47.XYZW, T48.X, 0
-; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T45.XYZW, T37.X, 0
+; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T45.XYZW, T38.X, 0
 ; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T44.XYZW, T46.X, 0
-; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T42.XYZW, T38.X, 0
+; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T42.XYZW, T39.X, 0
 ; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T41.XYZW, T43.X, 0
-; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T40.XYZW, T39.X, 1
+; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T40.XYZW, T36.X, 1
 ; EG-NEXT:    CF_END
 ; EG-NEXT:    Fetch clause starting at 22:
-; EG-NEXT:     VTX_READ_128 T36.XYZW, T35.X, 48, #1
-; EG-NEXT:     VTX_READ_128 T37.XYZW, T35.X, 32, #1
-; EG-NEXT:     VTX_READ_128 T38.XYZW, T35.X, 16, #1
-; EG-NEXT:     VTX_READ_128 T39.XYZW, T35.X, 0, #1
+; EG-NEXT:     VTX_READ_128 T36.XYZW, T35.X, 0, #1
+; EG-NEXT:     VTX_READ_128 T37.XYZW, T35.X, 48, #1
+; EG-NEXT:     VTX_READ_128 T38.XYZW, T35.X, 32, #1
+; EG-NEXT:     VTX_READ_128 T39.XYZW, T35.X, 16, #1
 ; EG-NEXT:    Fetch clause starting at 30:
 ; EG-NEXT:     VTX_READ_128 T49.XYZW, T35.X, 112, #1
 ; EG-NEXT:     VTX_READ_128 T50.XYZW, T35.X, 96, #1
@@ -3487,148 +3480,147 @@ define amdgpu_kernel void @constant_zextload_v64i16_to_v64i32(<64 x i32> addrspa
 ; EG-NEXT:    ALU clause starting at 38:
 ; EG-NEXT:     MOV * T35.X, KC0[2].Z,
 ; EG-NEXT:    ALU clause starting at 39:
-; EG-NEXT:     MOV * T0.W, literal.x,
+; EG-NEXT:     LSHR * T40.W, T36.Y, literal.x,
 ; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
-; EG-NEXT:     BFE_UINT * T40.W, T39.Y, literal.x, PV.W,
+; EG-NEXT:     AND_INT * T40.Z, T36.Y, literal.x,
+; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT:     LSHR T40.Y, T36.X, literal.x,
+; EG-NEXT:     LSHR * T41.W, T36.W, literal.x,
 ; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
-; EG-NEXT:     BFE_UINT T40.Y, T39.X, literal.x, T0.W,
-; EG-NEXT:     AND_INT T40.Z, T39.Y, literal.y,
-; EG-NEXT:     BFE_UINT * T41.W, T39.W, literal.x, T0.W,
-; EG-NEXT:    16(2.242078e-44), 65535(9.183409e-41)
-; EG-NEXT:     AND_INT T40.X, T39.X, literal.x,
-; EG-NEXT:     BFE_UINT T41.Y, T39.Z, literal.y, T0.W,
-; EG-NEXT:     LSHR * T39.X, KC0[2].Y, literal.z,
-; EG-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
-; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
-; EG-NEXT:     AND_INT T41.Z, T39.W, literal.x,
-; EG-NEXT:     BFE_UINT * T42.W, T38.Y, literal.y, T0.W,
-; EG-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
-; EG-NEXT:     AND_INT T41.X, T39.Z, literal.x,
-; EG-NEXT:     BFE_UINT T42.Y, T38.X, literal.y, T0.W,
-; EG-NEXT:     ADD_INT * T1.W, KC0[2].Y, literal.y,
+; EG-NEXT:     AND_INT T40.X, T36.X, literal.x,
+; EG-NEXT:     AND_INT T41.Z, T36.W, literal.x,
+; EG-NEXT:     LSHR * T36.X, KC0[2].Y, literal.y,
+; EG-NEXT:    65535(9.183409e-41), 2(2.802597e-45)
+; EG-NEXT:     LSHR T41.Y, T36.Z, literal.x,
+; EG-NEXT:     LSHR * T42.W, T39.Y, literal.x,
+; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT:     AND_INT T41.X, T36.Z, literal.x,
+; EG-NEXT:     AND_INT T42.Z, T39.Y, literal.x,
+; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
 ; EG-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
 ; EG-NEXT:     LSHR T43.X, PV.W, literal.x,
-; EG-NEXT:     AND_INT T42.Z, T38.Y, literal.y,
-; EG-NEXT:     BFE_UINT T44.W, T38.W, literal.z, T0.W,
-; EG-NEXT:     AND_INT * T42.X, T38.X, literal.y,
-; EG-NEXT:    2(2.802597e-45), 65535(9.183409e-41)
-; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
-; EG-NEXT:     BFE_UINT T44.Y, T38.Z, literal.x, T0.W,
-; EG-NEXT:     ADD_INT * T1.W, KC0[2].Y, literal.y,
-; EG-NEXT:    16(2.242078e-44), 32(4.484155e-44)
-; EG-NEXT:     LSHR T38.X, PV.W, literal.x,
-; EG-NEXT:     AND_INT T44.Z, T38.W, literal.y,
-; EG-NEXT:     BFE_UINT T45.W, T37.Y, literal.z, T0.W,
-; EG-NEXT:     AND_INT * T44.X, T38.Z, literal.y,
-; EG-NEXT:    2(2.802597e-45), 65535(9.183409e-41)
-; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
-; EG-NEXT:     BFE_UINT T45.Y, T37.X, literal.x, T0.W,
-; EG-NEXT:     ADD_INT * T1.W, KC0[2].Y, literal.y,
-; EG-NEXT:    16(2.242078e-44), 48(6.726233e-44)
+; EG-NEXT:     LSHR T42.Y, T39.X, literal.y,
+; EG-NEXT:     LSHR T44.W, T39.W, literal.y,
+; EG-NEXT:     AND_INT * T42.X, T39.X, literal.z,
+; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
+; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT:     AND_INT T44.Z, T39.W, literal.x,
+; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
+; EG-NEXT:    65535(9.183409e-41), 32(4.484155e-44)
+; EG-NEXT:     LSHR T39.X, PV.W, literal.x,
+; EG-NEXT:     LSHR T44.Y, T39.Z, literal.y,
+; EG-NEXT:     LSHR T45.W, T38.Y, literal.y,
+; EG-NEXT:     AND_INT * T44.X, T39.Z, literal.z,
+; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
+; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT:     AND_INT T45.Z, T38.Y, literal.x,
+; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
+; EG-NEXT:    65535(9.183409e-41), 48(6.726233e-44)
 ; EG-NEXT:     LSHR T46.X, PV.W, literal.x,
-; EG-NEXT:     AND_INT T45.Z, T37.Y, literal.y,
-; EG-NEXT:     BFE_UINT T47.W, T37.W, literal.z, T0.W,
-; EG-NEXT:     AND_INT * T45.X, T37.X, literal.y,
+; EG-NEXT:     LSHR T45.Y, T38.X, literal.y,
+; EG-NEXT:     LSHR T47.W, T38.W, literal.y,
+; EG-NEXT:     AND_INT * T45.X, T38.X, literal.z,
+; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
+; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT:     AND_INT T47.Z, T38.W, literal.x,
+; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
+; EG-NEXT:    65535(9.183409e-41), 64(8.968310e-44)
+; EG-NEXT:     LSHR T38.X, PV.W, literal.x,
+; EG-NEXT:     LSHR T47.Y, T38.Z, literal.y,
+; EG-NEXT:     AND_INT * T47.X, T38.Z, literal.z,
+; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
+; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT:     ADD_INT T0.W, KC0[2].Y, literal.x,
+; EG-NEXT:     LSHR * T35.W, T37.Y, literal.y,
+; EG-NEXT:    80(1.121039e-43), 16(2.242078e-44)
+; EG-NEXT:     LSHR T48.X, PV.W, literal.x,
+; EG-NEXT:     AND_INT * T35.Z, T37.Y, literal.y,
 ; EG-NEXT:    2(2.802597e-45), 65535(9.183409e-41)
+; EG-NEXT:    ALU clause starting at 95:
+; EG-NEXT:     LSHR T35.Y, T37.X, literal.x,
+; EG-NEXT:     LSHR * T53.W, T37.W, literal.x,
 ; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
-; EG-NEXT:     BFE_UINT T47.Y, T37.Z, literal.x, T0.W,
-; EG-NEXT:     ADD_INT * T1.W, KC0[2].Y, literal.y,
-; EG-NEXT:    16(2.242078e-44), 64(8.968310e-44)
+; EG-NEXT:     AND_INT T35.X, T37.X, literal.x,
+; EG-NEXT:     AND_INT T53.Z, T37.W, literal.x,
+; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
+; EG-NEXT:    65535(9.183409e-41), 96(1.345247e-43)
 ; EG-NEXT:     LSHR T37.X, PV.W, literal.x,
-; EG-NEXT:     AND_INT T47.Z, T37.W, literal.y,
-; EG-NEXT:     AND_INT * T47.X, T37.Z, literal.y,
-; EG-NEXT:    2(2.802597e-45), 65535(9.183409e-41)
-; EG-NEXT:     BFE_UINT T35.W, T36.Y, literal.x, T0.W,
-; EG-NEXT:     ADD_INT * T1.W, KC0[2].Y, literal.y,
-; EG-NEXT:    16(2.242078e-44), 80(1.121039e-43)
-; EG-NEXT:     LSHR T48.X, PS, literal.x,
-; EG-NEXT:     BFE_UINT * T35.Y, T36.X, literal.y, T0.W,
+; EG-NEXT:     LSHR T53.Y, T37.Z, literal.y,
+; EG-NEXT:     LSHR T54.W, T52.Y, literal.y,
+; EG-NEXT:     AND_INT * T53.X, T37.Z, literal.z,
 ; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
-; EG-NEXT:    ALU clause starting at 96:
-; EG-NEXT:     AND_INT T35.Z, T36.Y, literal.x,
-; EG-NEXT:     BFE_UINT * T53.W, T36.W, literal.y, T0.W,
-; EG-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
-; EG-NEXT:     AND_INT T35.X, T36.X, literal.x,
-; EG-NEXT:     BFE_UINT T53.Y, T36.Z, literal.y, T0.W,
-; EG-NEXT:     ADD_INT * T1.W, KC0[2].Y, literal.z,
-; EG-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
-; EG-NEXT:    96(1.345247e-43), 0(0.000000e+00)
-; EG-NEXT:     LSHR T36.X, PV.W, literal.x,
-; EG-NEXT:     AND_INT T53.Z, T36.W, literal.y,
-; EG-NEXT:     BFE_UINT T54.W, T52.Y, literal.z, T0.W,
-; EG-NEXT:     AND_INT * T53.X, T36.Z, literal.y,
-; EG-NEXT:    2(2.802597e-45), 65535(9.183409e-41)
-; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
-; EG-NEXT:     BFE_UINT T54.Y, T52.X, literal.x, T0.W,
-; EG-NEXT:     ADD_INT * T1.W, KC0[2].Y, literal.y,
-; EG-NEXT:    16(2.242078e-44), 112(1.569454e-43)
+; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT:     AND_INT T54.Z, T52.Y, literal.x,
+; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
+; EG-NEXT:    65535(9.183409e-41), 112(1.569454e-43)
 ; EG-NEXT:     LSHR T55.X, PV.W, literal.x,
-; EG-NEXT:     AND_INT T54.Z, T52.Y, literal.y,
-; EG-NEXT:     BFE_UINT T56.W, T52.W, literal.z, T0.W,
-; EG-NEXT:     AND_INT * T54.X, T52.X, literal.y,
-; EG-NEXT:    2(2.802597e-45), 65535(9.183409e-41)
-; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
-; EG-NEXT:     BFE_UINT T56.Y, T52.Z, literal.x, T0.W,
-; EG-NEXT:     ADD_INT * T1.W, KC0[2].Y, literal.y,
-; EG-NEXT:    16(2.242078e-44), 128(1.793662e-43)
+; EG-NEXT:     LSHR T54.Y, T52.X, literal.y,
+; EG-NEXT:     LSHR T56.W, T52.W, literal.y,
+; EG-NEXT:     AND_INT * T54.X, T52.X, literal.z,
+; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
+; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT:     AND_INT T56.Z, T52.W, literal.x,
+; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
+; EG-NEXT:    65535(9.183409e-41), 128(1.793662e-43)
 ; EG-NEXT:     LSHR T52.X, PV.W, literal.x,
-; EG-NEXT:     AND_INT T56.Z, T52.W, literal.y,
-; EG-NEXT:     BFE_UINT T57.W, T51.Y, literal.z, T0.W,
-; EG-NEXT:     AND_INT * T56.X, T52.Z, literal.y,
-; EG-NEXT:    2(2.802597e-45), 65535(9.183409e-41)
-; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
-; EG-NEXT:     BFE_UINT T57.Y, T51.X, literal.x, T0.W,
-; EG-NEXT:     ADD_INT * T1.W, KC0[2].Y, literal.y,
-; EG-NEXT:    16(2.242078e-44), 144(2.017870e-43)
+; EG-NEXT:     LSHR T56.Y, T52.Z, literal.y,
+; EG-NEXT:     LSHR T57.W, T51.Y, literal.y,
+; EG-NEXT:     AND_INT * T56.X, T52.Z, literal.z,
+; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
+; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT:     AND_INT T57.Z, T51.Y, literal.x,
+; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
+; EG-NEXT:    65535(9.183409e-41), 144(2.017870e-43)
 ; EG-NEXT:     LSHR T58.X, PV.W, literal.x,
-; EG-NEXT:     AND_INT T57.Z, T51.Y, literal.y,
-; EG-NEXT:     BFE_UINT T59.W, T51.W, literal.z, T0.W,
-; EG-NEXT:     AND_INT * T57.X, T51.X, literal.y,
-; EG-NEXT:    2(2.802597e-45), 65535(9.183409e-41)
-; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
-; EG-NEXT:     BFE_UINT T59.Y, T51.Z, literal.x, T0.W,
-; EG-NEXT:     ADD_INT * T1.W, KC0[2].Y, literal.y,
-; EG-NEXT:    16(2.242078e-44), 160(2.242078e-43)
+; EG-NEXT:     LSHR T57.Y, T51.X, literal.y,
+; EG-NEXT:     LSHR T59.W, T51.W, literal.y,
+; EG-NEXT:     AND_INT * T57.X, T51.X, literal.z,
+; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
+; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT:     AND_INT T59.Z, T51.W, literal.x,
+; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
+; EG-NEXT:    65535(9.183409e-41), 160(2.242078e-43)
 ; EG-NEXT:     LSHR T51.X, PV.W, literal.x,
-; EG-NEXT:     AND_INT T59.Z, T51.W, literal.y,
-; EG-NEXT:     BFE_UINT T60.W, T50.Y, literal.z, T0.W,
-; EG-NEXT:     AND_INT * T59.X, T51.Z, literal.y,
-; EG-NEXT:    2(2.802597e-45), 65535(9.183409e-41)
-; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
-; EG-NEXT:     BFE_UINT T60.Y, T50.X, literal.x, T0.W,
-; EG-NEXT:     ADD_INT * T1.W, KC0[2].Y, literal.y,
-; EG-NEXT:    16(2.242078e-44), 176(2.466285e-43)
+; EG-NEXT:     LSHR T59.Y, T51.Z, literal.y,
+; EG-NEXT:     LSHR T60.W, T50.Y, literal.y,
+; EG-NEXT:     AND_INT * T59.X, T51.Z, literal.z,
+; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
+; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT:     AND_INT T60.Z, T50.Y, literal.x,
+; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
+; EG-NEXT:    65535(9.183409e-41), 176(2.466285e-43)
 ; EG-NEXT:     LSHR T61.X, PV.W, literal.x,
-; EG-NEXT:     AND_INT T60.Z, T50.Y, literal.y,
-; EG-NEXT:     BFE_UINT T62.W, T50.W, literal.z, T0.W,
-; EG-NEXT:     AND_INT * T60.X, T50.X, literal.y,
-; EG-NEXT:    2(2.802597e-45), 65535(9.183409e-41)
-; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
-; EG-NEXT:     BFE_UINT T62.Y, T50.Z, literal.x, T0.W,
-; EG-NEXT:     ADD_INT * T1.W, KC0[2].Y, literal.y,
-; EG-NEXT:    16(2.242078e-44), 192(2.690493e-43)
+; EG-NEXT:     LSHR T60.Y, T50.X, literal.y,
+; EG-NEXT:     LSHR T62.W, T50.W, literal.y,
+; EG-NEXT:     AND_INT * T60.X, T50.X, literal.z,
+; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
+; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT:     AND_INT T62.Z, T50.W, literal.x,
+; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
+; EG-NEXT:    65535(9.183409e-41), 192(2.690493e-43)
 ; EG-NEXT:     LSHR T50.X, PV.W, literal.x,
-; EG-NEXT:     AND_INT T62.Z, T50.W, literal.y,
-; EG-NEXT:     BFE_UINT T63.W, T49.Y, literal.z, T0.W,
-; EG-NEXT:     AND_INT * T62.X, T50.Z, literal.y,
-; EG-NEXT:    2(2.802597e-45), 65535(9.183409e-41)
-; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
-; EG-NEXT:     BFE_UINT T63.Y, T49.X, literal.x, T0.W,
-; EG-NEXT:     ADD_INT * T1.W, KC0[2].Y, literal.y,
-; EG-NEXT:    16(2.242078e-44), 208(2.914701e-43)
+; EG-NEXT:     LSHR T62.Y, T50.Z, literal.y,
+; EG-NEXT:     LSHR T63.W, T49.Y, literal.y,
+; EG-NEXT:     AND_INT * T62.X, T50.Z, literal.z,
+; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
+; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT:     AND_INT T63.Z, T49.Y, literal.x,
+; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
+; EG-NEXT:    65535(9.183409e-41), 208(2.914701e-43)
 ; EG-NEXT:     LSHR T64.X, PV.W, literal.x,
-; EG-NEXT:     AND_INT T63.Z, T49.Y, literal.y,
-; EG-NEXT:     BFE_UINT T65.W, T49.W, literal.z, T0.W,
-; EG-NEXT:     AND_INT * T63.X, T49.X, literal.y,
-; EG-NEXT:    2(2.802597e-45), 65535(9.183409e-41)
-; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
-; EG-NEXT:     BFE_UINT T65.Y, T49.Z, literal.x, T0.W,
+; EG-NEXT:     LSHR T63.Y, T49.X, literal.y,
+; EG-NEXT:     LSHR T65.W, T49.W, literal.y,
+; EG-NEXT:     AND_INT * T63.X, T49.X, literal.z,
+; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
+; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT:     AND_INT T65.Z, T49.W, literal.x,
 ; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
-; EG-NEXT:    16(2.242078e-44), 224(3.138909e-43)
+; EG-NEXT:    65535(9.183409e-41), 224(3.138909e-43)
 ; EG-NEXT:     LSHR T49.X, PV.W, literal.x,
-; EG-NEXT:     AND_INT T65.Z, T49.W, literal.y,
-; EG-NEXT:     AND_INT * T65.X, T49.Z, literal.y,
-; EG-NEXT:    2(2.802597e-45), 65535(9.183409e-41)
+; EG-NEXT:     LSHR T65.Y, T49.Z, literal.y,
+; EG-NEXT:     AND_INT * T65.X, T49.Z, literal.z,
+; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
+; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
 ; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.x,
 ; EG-NEXT:    240(3.363116e-43), 0(0.000000e+00)
 ; EG-NEXT:     LSHR * T66.X, PV.W, literal.x,

diff  --git a/llvm/test/CodeGen/AMDGPU/load-global-i16.ll b/llvm/test/CodeGen/AMDGPU/load-global-i16.ll
index 3747245ab3ac..5d10633f4d83 100644
--- a/llvm/test/CodeGen/AMDGPU/load-global-i16.ll
+++ b/llvm/test/CodeGen/AMDGPU/load-global-i16.ll
@@ -1195,7 +1195,7 @@ define amdgpu_kernel void @global_zextload_v2i16_to_v2i32(<2 x i32> addrspace(1)
 ; EG:       ; %bb.0:
 ; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
 ; EG-NEXT:    TEX 0 @6
-; EG-NEXT:    ALU 6, @9, KC0[CB0:0-32], KC1[]
+; EG-NEXT:    ALU 4, @9, KC0[CB0:0-32], KC1[]
 ; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T4.XY, T5.X, 1
 ; EG-NEXT:    CF_END
 ; EG-NEXT:    PAD
@@ -1204,9 +1204,7 @@ define amdgpu_kernel void @global_zextload_v2i16_to_v2i32(<2 x i32> addrspace(1)
 ; EG-NEXT:    ALU clause starting at 8:
 ; EG-NEXT:     MOV * T4.X, KC0[2].Z,
 ; EG-NEXT:    ALU clause starting at 9:
-; EG-NEXT:     MOV * T0.W, literal.x,
-; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
-; EG-NEXT:     BFE_UINT * T4.Y, T4.X, literal.x, PV.W,
+; EG-NEXT:     LSHR * T4.Y, T4.X, literal.x,
 ; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
 ; EG-NEXT:     AND_INT T4.X, T4.X, literal.x,
 ; EG-NEXT:     LSHR * T5.X, KC0[2].Y, literal.y,
@@ -1216,7 +1214,7 @@ define amdgpu_kernel void @global_zextload_v2i16_to_v2i32(<2 x i32> addrspace(1)
 ; CM:       ; %bb.0:
 ; CM-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
 ; CM-NEXT:    TEX 0 @6
-; CM-NEXT:    ALU 7, @9, KC0[CB0:0-32], KC1[]
+; CM-NEXT:    ALU 5, @9, KC0[CB0:0-32], KC1[]
 ; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T4, T5.X
 ; CM-NEXT:    CF_END
 ; CM-NEXT:    PAD
@@ -1225,9 +1223,7 @@ define amdgpu_kernel void @global_zextload_v2i16_to_v2i32(<2 x i32> addrspace(1)
 ; CM-NEXT:    ALU clause starting at 8:
 ; CM-NEXT:     MOV * T4.X, KC0[2].Z,
 ; CM-NEXT:    ALU clause starting at 9:
-; CM-NEXT:     MOV * T0.W, literal.x,
-; CM-NEXT:    16(2.242078e-44), 0(0.000000e+00)
-; CM-NEXT:     BFE_UINT * T4.Y, T4.X, literal.x, PV.W,
+; CM-NEXT:     LSHR * T4.Y, T4.X, literal.x,
 ; CM-NEXT:    16(2.242078e-44), 0(0.000000e+00)
 ; CM-NEXT:     AND_INT * T4.X, T4.X, literal.x,
 ; CM-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
@@ -1636,7 +1632,7 @@ define amdgpu_kernel void @global_zextload_v4i16_to_v4i32(<4 x i32> addrspace(1)
 ; EG:       ; %bb.0:
 ; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
 ; EG-NEXT:    TEX 0 @6
-; EG-NEXT:    ALU 13, @9, KC0[CB0:0-32], KC1[]
+; EG-NEXT:    ALU 12, @9, KC0[CB0:0-32], KC1[]
 ; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T5.XYZW, T6.X, 1
 ; EG-NEXT:    CF_END
 ; EG-NEXT:    PAD
@@ -1648,14 +1644,13 @@ define amdgpu_kernel void @global_zextload_v4i16_to_v4i32(<4 x i32> addrspace(1)
 ; EG-NEXT:     MOV T2.X, T5.X,
 ; EG-NEXT:     MOV * T3.X, T5.Y,
 ; EG-NEXT:     MOV T0.Y, PV.X,
-; EG-NEXT:     MOV T0.Z, PS,
-; EG-NEXT:     MOV * T0.W, literal.x,
+; EG-NEXT:     MOV * T0.Z, PS,
+; EG-NEXT:     LSHR * T5.W, PV.Z, literal.x,
 ; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
-; EG-NEXT:     BFE_UINT * T5.W, PV.Z, literal.x, PV.W,
+; EG-NEXT:     AND_INT * T5.Z, T0.Z, literal.x,
+; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT:     LSHR * T5.Y, T0.Y, literal.x,
 ; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
-; EG-NEXT:     BFE_UINT T5.Y, T0.Y, literal.x, T0.W,
-; EG-NEXT:     AND_INT * T5.Z, T0.Z, literal.y,
-; EG-NEXT:    16(2.242078e-44), 65535(9.183409e-41)
 ; EG-NEXT:     AND_INT T5.X, T0.Y, literal.x,
 ; EG-NEXT:     LSHR * T6.X, KC0[2].Y, literal.y,
 ; EG-NEXT:    65535(9.183409e-41), 2(2.802597e-45)
@@ -1664,7 +1659,7 @@ define amdgpu_kernel void @global_zextload_v4i16_to_v4i32(<4 x i32> addrspace(1)
 ; CM:       ; %bb.0:
 ; CM-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
 ; CM-NEXT:    TEX 0 @6
-; CM-NEXT:    ALU 14, @9, KC0[CB0:0-32], KC1[]
+; CM-NEXT:    ALU 13, @9, KC0[CB0:0-32], KC1[]
 ; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T5, T6.X
 ; CM-NEXT:    CF_END
 ; CM-NEXT:    PAD
@@ -1676,14 +1671,13 @@ define amdgpu_kernel void @global_zextload_v4i16_to_v4i32(<4 x i32> addrspace(1)
 ; CM-NEXT:     MOV * T2.X, T5.X,
 ; CM-NEXT:     MOV T3.X, T5.Y,
 ; CM-NEXT:     MOV * T0.Y, PV.X,
-; CM-NEXT:     MOV T0.Z, PV.X,
-; CM-NEXT:     MOV * T0.W, literal.x,
+; CM-NEXT:     MOV * T0.Z, PV.X,
+; CM-NEXT:     LSHR * T5.W, PV.Z, literal.x,
 ; CM-NEXT:    16(2.242078e-44), 0(0.000000e+00)
-; CM-NEXT:     BFE_UINT * T5.W, PV.Z, literal.x, PV.W,
+; CM-NEXT:     AND_INT * T5.Z, T0.Z, literal.x,
+; CM-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
+; CM-NEXT:     LSHR * T5.Y, T0.Y, literal.x,
 ; CM-NEXT:    16(2.242078e-44), 0(0.000000e+00)
-; CM-NEXT:     BFE_UINT T5.Y, T0.Y, literal.x, T0.W,
-; CM-NEXT:     AND_INT * T5.Z, T0.Z, literal.y,
-; CM-NEXT:    16(2.242078e-44), 65535(9.183409e-41)
 ; CM-NEXT:     AND_INT * T5.X, T0.Y, literal.x,
 ; CM-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
 ; CM-NEXT:     LSHR * T6.X, KC0[2].Y, literal.x,
@@ -1908,7 +1902,7 @@ define amdgpu_kernel void @global_zextload_v8i16_to_v8i32(<8 x i32> addrspace(1)
 ; EG:       ; %bb.0:
 ; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
 ; EG-NEXT:    TEX 0 @6
-; EG-NEXT:    ALU 19, @9, KC0[CB0:0-32], KC1[]
+; EG-NEXT:    ALU 17, @9, KC0[CB0:0-32], KC1[]
 ; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T9.XYZW, T10.X, 0
 ; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T8.XYZW, T7.X, 1
 ; EG-NEXT:    CF_END
@@ -1917,21 +1911,19 @@ define amdgpu_kernel void @global_zextload_v8i16_to_v8i32(<8 x i32> addrspace(1)
 ; EG-NEXT:    ALU clause starting at 8:
 ; EG-NEXT:     MOV * T7.X, KC0[2].Z,
 ; EG-NEXT:    ALU clause starting at 9:
-; EG-NEXT:     MOV * T0.W, literal.x,
+; EG-NEXT:     LSHR * T8.W, T7.Y, literal.x,
 ; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
-; EG-NEXT:     BFE_UINT * T8.W, T7.Y, literal.x, PV.W,
+; EG-NEXT:     AND_INT * T8.Z, T7.Y, literal.x,
+; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT:     LSHR T8.Y, T7.X, literal.x,
+; EG-NEXT:     LSHR * T9.W, T7.W, literal.x,
 ; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
-; EG-NEXT:     BFE_UINT T8.Y, T7.X, literal.x, T0.W,
-; EG-NEXT:     AND_INT T8.Z, T7.Y, literal.y,
-; EG-NEXT:     BFE_UINT * T9.W, T7.W, literal.x, T0.W,
-; EG-NEXT:    16(2.242078e-44), 65535(9.183409e-41)
 ; EG-NEXT:     AND_INT T8.X, T7.X, literal.x,
-; EG-NEXT:     BFE_UINT T9.Y, T7.Z, literal.y, T0.W,
-; EG-NEXT:     LSHR * T7.X, KC0[2].Y, literal.z,
-; EG-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
-; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
-; EG-NEXT:     AND_INT * T9.Z, T7.W, literal.x,
-; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT:     AND_INT T9.Z, T7.W, literal.x,
+; EG-NEXT:     LSHR * T7.X, KC0[2].Y, literal.y,
+; EG-NEXT:    65535(9.183409e-41), 2(2.802597e-45)
+; EG-NEXT:     LSHR * T9.Y, T7.Z, literal.x,
+; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
 ; EG-NEXT:     AND_INT T9.X, T7.Z, literal.x,
 ; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
 ; EG-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
@@ -1942,33 +1934,32 @@ define amdgpu_kernel void @global_zextload_v8i16_to_v8i32(<8 x i32> addrspace(1)
 ; CM:       ; %bb.0:
 ; CM-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
 ; CM-NEXT:    TEX 0 @6
-; CM-NEXT:    ALU 18, @9, KC0[CB0:0-32], KC1[]
-; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T9, T7.X
-; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T8, T10.X
+; CM-NEXT:    ALU 17, @9, KC0[CB0:0-32], KC1[]
+; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T7, T10.X
+; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T8, T9.X
 ; CM-NEXT:    CF_END
 ; CM-NEXT:    Fetch clause starting at 6:
 ; CM-NEXT:     VTX_READ_128 T7.XYZW, T7.X, 0, #1
 ; CM-NEXT:    ALU clause starting at 8:
 ; CM-NEXT:     MOV * T7.X, KC0[2].Z,
 ; CM-NEXT:    ALU clause starting at 9:
-; CM-NEXT:     MOV * T0.W, literal.x,
+; CM-NEXT:     LSHR * T8.W, T7.W, literal.x,
 ; CM-NEXT:    16(2.242078e-44), 0(0.000000e+00)
-; CM-NEXT:     BFE_UINT * T8.W, T7.W, literal.x, PV.W,
+; CM-NEXT:     AND_INT * T8.Z, T7.W, literal.x,
+; CM-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
+; CM-NEXT:     LSHR T8.Y, T7.Z, literal.x,
+; CM-NEXT:     LSHR * T7.W, T7.Y, literal.x,
 ; CM-NEXT:    16(2.242078e-44), 0(0.000000e+00)
-; CM-NEXT:     BFE_UINT T8.Y, T7.Z, literal.x, T0.W,
-; CM-NEXT:     AND_INT T8.Z, T7.W, literal.y,
-; CM-NEXT:     BFE_UINT * T9.W, T7.Y, literal.x, T0.W,
-; CM-NEXT:    16(2.242078e-44), 65535(9.183409e-41)
 ; CM-NEXT:     AND_INT T8.X, T7.Z, literal.x,
-; CM-NEXT:     BFE_UINT T9.Y, T7.X, literal.y, T0.W,
+; CM-NEXT:     AND_INT T7.Z, T7.Y, literal.x,
 ; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
 ; CM-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
-; CM-NEXT:     LSHR T10.X, PV.W, literal.x,
-; CM-NEXT:     AND_INT * T9.Z, T7.Y, literal.y,
-; CM-NEXT:    2(2.802597e-45), 65535(9.183409e-41)
-; CM-NEXT:     AND_INT * T9.X, T7.X, literal.x,
+; CM-NEXT:     LSHR T9.X, PV.W, literal.x,
+; CM-NEXT:     LSHR * T7.Y, T7.X, literal.y,
+; CM-NEXT:    2(2.802597e-45), 16(2.242078e-44)
+; CM-NEXT:     AND_INT * T7.X, T7.X, literal.x,
 ; CM-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
-; CM-NEXT:     LSHR * T7.X, KC0[2].Y, literal.x,
+; CM-NEXT:     LSHR * T10.X, KC0[2].Y, literal.x,
 ; CM-NEXT:    2(2.802597e-45), 0(0.000000e+00)
   %load = load <8 x i16>, <8 x i16> addrspace(1)* %in
   %ext = zext <8 x i16> %load to <8 x i32>
@@ -2263,51 +2254,50 @@ define amdgpu_kernel void @global_zextload_v16i16_to_v16i32(<16 x i32> addrspace
 ; EG:       ; %bb.0:
 ; EG-NEXT:    ALU 0, @12, KC0[CB0:0-32], KC1[]
 ; EG-NEXT:    TEX 1 @8
-; EG-NEXT:    ALU 36, @13, KC0[CB0:0-32], KC1[]
+; EG-NEXT:    ALU 35, @13, KC0[CB0:0-32], KC1[]
 ; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T17.XYZW, T18.X, 0
-; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T15.XYZW, T12.X, 0
+; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T15.XYZW, T11.X, 0
 ; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T14.XYZW, T16.X, 0
-; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T13.XYZW, T11.X, 1
+; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T13.XYZW, T12.X, 1
 ; EG-NEXT:    CF_END
 ; EG-NEXT:    Fetch clause starting at 8:
-; EG-NEXT:     VTX_READ_128 T12.XYZW, T11.X, 16, #1
-; EG-NEXT:     VTX_READ_128 T11.XYZW, T11.X, 0, #1
+; EG-NEXT:     VTX_READ_128 T12.XYZW, T11.X, 0, #1
+; EG-NEXT:     VTX_READ_128 T11.XYZW, T11.X, 16, #1
 ; EG-NEXT:    ALU clause starting at 12:
 ; EG-NEXT:     MOV * T11.X, KC0[2].Z,
 ; EG-NEXT:    ALU clause starting at 13:
-; EG-NEXT:     MOV * T0.W, literal.x,
+; EG-NEXT:     LSHR * T13.W, T12.Y, literal.x,
 ; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
-; EG-NEXT:     BFE_UINT * T13.W, T11.Y, literal.x, PV.W,
+; EG-NEXT:     AND_INT * T13.Z, T12.Y, literal.x,
+; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT:     LSHR T13.Y, T12.X, literal.x,
+; EG-NEXT:     LSHR * T14.W, T12.W, literal.x,
 ; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
-; EG-NEXT:     BFE_UINT T13.Y, T11.X, literal.x, T0.W,
-; EG-NEXT:     AND_INT T13.Z, T11.Y, literal.y,
-; EG-NEXT:     BFE_UINT * T14.W, T11.W, literal.x, T0.W,
-; EG-NEXT:    16(2.242078e-44), 65535(9.183409e-41)
-; EG-NEXT:     AND_INT T13.X, T11.X, literal.x,
-; EG-NEXT:     BFE_UINT T14.Y, T11.Z, literal.y, T0.W,
-; EG-NEXT:     LSHR * T11.X, KC0[2].Y, literal.z,
-; EG-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
-; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
-; EG-NEXT:     AND_INT T14.Z, T11.W, literal.x,
-; EG-NEXT:     BFE_UINT * T15.W, T12.Y, literal.y, T0.W,
-; EG-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
-; EG-NEXT:     AND_INT T14.X, T11.Z, literal.x,
-; EG-NEXT:     BFE_UINT T15.Y, T12.X, literal.y, T0.W,
-; EG-NEXT:     ADD_INT * T1.W, KC0[2].Y, literal.y,
+; EG-NEXT:     AND_INT T13.X, T12.X, literal.x,
+; EG-NEXT:     AND_INT T14.Z, T12.W, literal.x,
+; EG-NEXT:     LSHR * T12.X, KC0[2].Y, literal.y,
+; EG-NEXT:    65535(9.183409e-41), 2(2.802597e-45)
+; EG-NEXT:     LSHR T14.Y, T12.Z, literal.x,
+; EG-NEXT:     LSHR * T15.W, T11.Y, literal.x,
+; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT:     AND_INT T14.X, T12.Z, literal.x,
+; EG-NEXT:     AND_INT T15.Z, T11.Y, literal.x,
+; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
 ; EG-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
 ; EG-NEXT:     LSHR T16.X, PV.W, literal.x,
-; EG-NEXT:     AND_INT T15.Z, T12.Y, literal.y,
-; EG-NEXT:     BFE_UINT T17.W, T12.W, literal.z, T0.W,
-; EG-NEXT:     AND_INT * T15.X, T12.X, literal.y,
-; EG-NEXT:    2(2.802597e-45), 65535(9.183409e-41)
-; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
-; EG-NEXT:     BFE_UINT T17.Y, T12.Z, literal.x, T0.W,
+; EG-NEXT:     LSHR T15.Y, T11.X, literal.y,
+; EG-NEXT:     LSHR T17.W, T11.W, literal.y,
+; EG-NEXT:     AND_INT * T15.X, T11.X, literal.z,
+; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
+; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT:     AND_INT T17.Z, T11.W, literal.x,
 ; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
-; EG-NEXT:    16(2.242078e-44), 32(4.484155e-44)
-; EG-NEXT:     LSHR T12.X, PV.W, literal.x,
-; EG-NEXT:     AND_INT T17.Z, T12.W, literal.y,
-; EG-NEXT:     AND_INT * T17.X, T12.Z, literal.y,
-; EG-NEXT:    2(2.802597e-45), 65535(9.183409e-41)
+; EG-NEXT:    65535(9.183409e-41), 32(4.484155e-44)
+; EG-NEXT:     LSHR T11.X, PV.W, literal.x,
+; EG-NEXT:     LSHR T17.Y, T11.Z, literal.y,
+; EG-NEXT:     AND_INT * T17.X, T11.Z, literal.z,
+; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
+; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
 ; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.x,
 ; EG-NEXT:    48(6.726233e-44), 0(0.000000e+00)
 ; EG-NEXT:     LSHR * T18.X, PV.W, literal.x,
@@ -2317,56 +2307,51 @@ define amdgpu_kernel void @global_zextload_v16i16_to_v16i32(<16 x i32> addrspace
 ; CM:       ; %bb.0:
 ; CM-NEXT:    ALU 0, @12, KC0[CB0:0-32], KC1[]
 ; CM-NEXT:    TEX 1 @8
-; CM-NEXT:    ALU 38, @13, KC0[CB0:0-32], KC1[]
-; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T17, T12.X
+; CM-NEXT:    ALU 33, @13, KC0[CB0:0-32], KC1[]
 ; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T11, T18.X
-; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T14, T16.X
-; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T13, T15.X
+; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T15, T17.X
+; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T12, T16.X
+; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T13, T14.X
 ; CM-NEXT:    CF_END
 ; CM-NEXT:    Fetch clause starting at 8:
-; CM-NEXT:     VTX_READ_128 T12.XYZW, T11.X, 0, #1
-; CM-NEXT:     VTX_READ_128 T11.XYZW, T11.X, 16, #1
+; CM-NEXT:     VTX_READ_128 T12.XYZW, T11.X, 16, #1
+; CM-NEXT:     VTX_READ_128 T11.XYZW, T11.X, 0, #1
 ; CM-NEXT:    ALU clause starting at 12:
 ; CM-NEXT:     MOV * T11.X, KC0[2].Z,
 ; CM-NEXT:    ALU clause starting at 13:
-; CM-NEXT:     MOV * T0.W, literal.x,
-; CM-NEXT:    16(2.242078e-44), 0(0.000000e+00)
-; CM-NEXT:     BFE_UINT * T13.W, T11.W, literal.x, PV.W,
+; CM-NEXT:     LSHR * T13.W, T12.W, literal.x,
 ; CM-NEXT:    16(2.242078e-44), 0(0.000000e+00)
-; CM-NEXT:     BFE_UINT T13.Y, T11.Z, literal.x, T0.W,
-; CM-NEXT:     AND_INT T13.Z, T11.W, literal.y,
-; CM-NEXT:     BFE_UINT * T14.W, T11.Y, literal.x, T0.W,
-; CM-NEXT:    16(2.242078e-44), 65535(9.183409e-41)
-; CM-NEXT:     AND_INT T13.X, T11.Z, literal.x,
-; CM-NEXT:     BFE_UINT T14.Y, T11.X, literal.y, T0.W,
-; CM-NEXT:     ADD_INT * T1.W, KC0[2].Y, literal.z,
-; CM-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
-; CM-NEXT:    48(6.726233e-44), 0(0.000000e+00)
-; CM-NEXT:     LSHR T15.X, PV.W, literal.x,
-; CM-NEXT:     AND_INT T14.Z, T11.Y, literal.y,
-; CM-NEXT:     BFE_UINT * T11.W, T12.W, literal.z, T0.W,
-; CM-NEXT:    2(2.802597e-45), 65535(9.183409e-41)
+; CM-NEXT:     AND_INT * T13.Z, T12.W, literal.x,
+; CM-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
+; CM-NEXT:     LSHR T13.Y, T12.Z, literal.x,
+; CM-NEXT:     LSHR * T12.W, T12.Y, literal.x,
 ; CM-NEXT:    16(2.242078e-44), 0(0.000000e+00)
-; CM-NEXT:     AND_INT T14.X, T11.X, literal.x,
-; CM-NEXT:     BFE_UINT T11.Y, T12.Z, literal.y, T0.W,
-; CM-NEXT:     ADD_INT * T1.W, KC0[2].Y, literal.z,
-; CM-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
-; CM-NEXT:    32(4.484155e-44), 0(0.000000e+00)
+; CM-NEXT:     AND_INT T13.X, T12.Z, literal.x,
+; CM-NEXT:     AND_INT T12.Z, T12.Y, literal.x,
+; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
+; CM-NEXT:    65535(9.183409e-41), 48(6.726233e-44)
+; CM-NEXT:     LSHR T14.X, PV.W, literal.x,
+; CM-NEXT:     LSHR T12.Y, T12.X, literal.y,
+; CM-NEXT:     LSHR * T15.W, T11.W, literal.y,
+; CM-NEXT:    2(2.802597e-45), 16(2.242078e-44)
+; CM-NEXT:     AND_INT T12.X, T12.X, literal.x,
+; CM-NEXT:     AND_INT T15.Z, T11.W, literal.x,
+; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
+; CM-NEXT:    65535(9.183409e-41), 32(4.484155e-44)
 ; CM-NEXT:     LSHR T16.X, PV.W, literal.x,
-; CM-NEXT:     AND_INT T11.Z, T12.W, literal.y,
-; CM-NEXT:     BFE_UINT * T17.W, T12.Y, literal.z, T0.W,
-; CM-NEXT:    2(2.802597e-45), 65535(9.183409e-41)
-; CM-NEXT:    16(2.242078e-44), 0(0.000000e+00)
-; CM-NEXT:     AND_INT T11.X, T12.Z, literal.x,
-; CM-NEXT:     BFE_UINT T17.Y, T12.X, literal.y, T0.W,
+; CM-NEXT:     LSHR T15.Y, T11.Z, literal.y,
+; CM-NEXT:     LSHR * T11.W, T11.Y, literal.y,
+; CM-NEXT:    2(2.802597e-45), 16(2.242078e-44)
+; CM-NEXT:     AND_INT T15.X, T11.Z, literal.x,
+; CM-NEXT:     AND_INT T11.Z, T11.Y, literal.x,
 ; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
 ; CM-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
-; CM-NEXT:     LSHR T18.X, PV.W, literal.x,
-; CM-NEXT:     AND_INT * T17.Z, T12.Y, literal.y,
-; CM-NEXT:    2(2.802597e-45), 65535(9.183409e-41)
-; CM-NEXT:     AND_INT * T17.X, T12.X, literal.x,
+; CM-NEXT:     LSHR T17.X, PV.W, literal.x,
+; CM-NEXT:     LSHR * T11.Y, T11.X, literal.y,
+; CM-NEXT:    2(2.802597e-45), 16(2.242078e-44)
+; CM-NEXT:     AND_INT * T11.X, T11.X, literal.x,
 ; CM-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
-; CM-NEXT:     LSHR * T12.X, KC0[2].Y, literal.x,
+; CM-NEXT:     LSHR * T18.X, KC0[2].Y, literal.x,
 ; CM-NEXT:    2(2.802597e-45), 0(0.000000e+00)
   %load = load <16 x i16>, <16 x i16> addrspace(1)* %in
   %ext = zext <16 x i16> %load to <16 x i32>
@@ -2851,194 +2836,181 @@ define amdgpu_kernel void @global_zextload_v32i16_to_v32i32(<32 x i32> addrspace
 ; EG-NEXT:    ALU 0, @20, KC0[CB0:0-32], KC1[]
 ; EG-NEXT:    TEX 3 @12
 ; EG-NEXT:    ALU 72, @21, KC0[CB0:0-32], KC1[]
-; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T33.XYZW, T20.X, 0
 ; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T21.XYZW, T34.X, 0
-; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T30.XYZW, T32.X, 0
-; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T22.XYZW, T31.X, 0
-; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T27.XYZW, T29.X, 0
-; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T19.XYZW, T28.X, 0
-; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T24.XYZW, T26.X, 0
-; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T23.XYZW, T25.X, 1
+; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T31.XYZW, T33.X, 0
+; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T22.XYZW, T32.X, 0
+; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T28.XYZW, T30.X, 0
+; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T19.XYZW, T29.X, 0
+; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T25.XYZW, T27.X, 0
+; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T20.XYZW, T26.X, 0
+; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T23.XYZW, T24.X, 1
 ; EG-NEXT:    CF_END
 ; EG-NEXT:    Fetch clause starting at 12:
-; EG-NEXT:     VTX_READ_128 T20.XYZW, T19.X, 48, #1
-; EG-NEXT:     VTX_READ_128 T21.XYZW, T19.X, 32, #1
-; EG-NEXT:     VTX_READ_128 T22.XYZW, T19.X, 16, #1
-; EG-NEXT:     VTX_READ_128 T19.XYZW, T19.X, 0, #1
+; EG-NEXT:     VTX_READ_128 T20.XYZW, T19.X, 0, #1
+; EG-NEXT:     VTX_READ_128 T21.XYZW, T19.X, 48, #1
+; EG-NEXT:     VTX_READ_128 T22.XYZW, T19.X, 32, #1
+; EG-NEXT:     VTX_READ_128 T19.XYZW, T19.X, 16, #1
 ; EG-NEXT:    ALU clause starting at 20:
 ; EG-NEXT:     MOV * T19.X, KC0[2].Z,
 ; EG-NEXT:    ALU clause starting at 21:
-; EG-NEXT:     MOV * T0.W, literal.x,
+; EG-NEXT:     LSHR * T23.W, T20.W, literal.x,
 ; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
-; EG-NEXT:     BFE_UINT * T23.W, T19.W, literal.x, PV.W,
+; EG-NEXT:     AND_INT * T23.Z, T20.W, literal.x,
+; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT:     LSHR T23.Y, T20.Z, literal.x,
+; EG-NEXT:     LSHR * T20.W, T20.Y, literal.x,
 ; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
-; EG-NEXT:     BFE_UINT T23.Y, T19.Z, literal.x, T0.W,
-; EG-NEXT:     AND_INT T23.Z, T19.W, literal.y,
-; EG-NEXT:     BFE_UINT * T24.W, T19.Y, literal.x, T0.W,
-; EG-NEXT:    16(2.242078e-44), 65535(9.183409e-41)
-; EG-NEXT:     AND_INT T23.X, T19.Z, literal.x,
-; EG-NEXT:     BFE_UINT T24.Y, T19.X, literal.y, T0.W,
-; EG-NEXT:     ADD_INT * T1.W, KC0[2].Y, literal.y,
+; EG-NEXT:     AND_INT T23.X, T20.Z, literal.x,
+; EG-NEXT:     AND_INT T20.Z, T20.Y, literal.x,
+; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
 ; EG-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
-; EG-NEXT:     LSHR T25.X, PV.W, literal.x,
-; EG-NEXT:     AND_INT T24.Z, T19.Y, literal.y,
-; EG-NEXT:     BFE_UINT T19.W, T22.W, literal.z, T0.W,
-; EG-NEXT:     AND_INT * T24.X, T19.X, literal.y,
-; EG-NEXT:    2(2.802597e-45), 65535(9.183409e-41)
-; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
-; EG-NEXT:     BFE_UINT * T19.Y, T22.Z, literal.x, T0.W,
-; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT:     LSHR T24.X, PV.W, literal.x,
+; EG-NEXT:     LSHR T20.Y, T20.X, literal.y,
+; EG-NEXT:     LSHR T25.W, T19.W, literal.y,
+; EG-NEXT:     AND_INT * T20.X, T20.X, literal.z,
+; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
+; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT:     AND_INT * T25.Z, T19.W, literal.x,
+; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
 ; EG-NEXT:     LSHR T26.X, KC0[2].Y, literal.x,
-; EG-NEXT:     AND_INT T19.Z, T22.W, literal.y,
-; EG-NEXT:     BFE_UINT T27.W, T22.Y, literal.z, T0.W,
-; EG-NEXT:     AND_INT * T19.X, T22.Z, literal.y,
-; EG-NEXT:    2(2.802597e-45), 65535(9.183409e-41)
-; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
-; EG-NEXT:     BFE_UINT T27.Y, T22.X, literal.x, T0.W,
-; EG-NEXT:     ADD_INT * T1.W, KC0[2].Y, literal.y,
-; EG-NEXT:    16(2.242078e-44), 48(6.726233e-44)
-; EG-NEXT:     LSHR T28.X, PV.W, literal.x,
-; EG-NEXT:     AND_INT T27.Z, T22.Y, literal.y,
-; EG-NEXT:     BFE_UINT T22.W, T21.W, literal.z, T0.W,
-; EG-NEXT:     AND_INT * T27.X, T22.X, literal.y,
-; EG-NEXT:    2(2.802597e-45), 65535(9.183409e-41)
-; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
-; EG-NEXT:     BFE_UINT T22.Y, T21.Z, literal.x, T0.W,
-; EG-NEXT:     ADD_INT * T1.W, KC0[2].Y, literal.y,
-; EG-NEXT:    16(2.242078e-44), 32(4.484155e-44)
+; EG-NEXT:     LSHR T25.Y, T19.Z, literal.y,
+; EG-NEXT:     LSHR T19.W, T19.Y, literal.y,
+; EG-NEXT:     AND_INT * T25.X, T19.Z, literal.z,
+; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
+; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT:     AND_INT T19.Z, T19.Y, literal.x,
+; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
+; EG-NEXT:    65535(9.183409e-41), 48(6.726233e-44)
+; EG-NEXT:     LSHR T27.X, PV.W, literal.x,
+; EG-NEXT:     LSHR T19.Y, T19.X, literal.y,
+; EG-NEXT:     LSHR T28.W, T22.W, literal.y,
+; EG-NEXT:     AND_INT * T19.X, T19.X, literal.z,
+; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
+; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT:     AND_INT T28.Z, T22.W, literal.x,
+; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
+; EG-NEXT:    65535(9.183409e-41), 32(4.484155e-44)
 ; EG-NEXT:     LSHR T29.X, PV.W, literal.x,
-; EG-NEXT:     AND_INT T22.Z, T21.W, literal.y,
-; EG-NEXT:     BFE_UINT T30.W, T21.Y, literal.z, T0.W,
-; EG-NEXT:     AND_INT * T22.X, T21.Z, literal.y,
-; EG-NEXT:    2(2.802597e-45), 65535(9.183409e-41)
-; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
-; EG-NEXT:     BFE_UINT T30.Y, T21.X, literal.x, T0.W,
-; EG-NEXT:     ADD_INT * T1.W, KC0[2].Y, literal.y,
-; EG-NEXT:    16(2.242078e-44), 80(1.121039e-43)
-; EG-NEXT:     LSHR T31.X, PV.W, literal.x,
-; EG-NEXT:     AND_INT T30.Z, T21.Y, literal.y,
-; EG-NEXT:     BFE_UINT T21.W, T20.W, literal.z, T0.W,
-; EG-NEXT:     AND_INT * T30.X, T21.X, literal.y,
-; EG-NEXT:    2(2.802597e-45), 65535(9.183409e-41)
-; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
-; EG-NEXT:     BFE_UINT T21.Y, T20.Z, literal.x, T0.W,
-; EG-NEXT:     ADD_INT * T1.W, KC0[2].Y, literal.y,
-; EG-NEXT:    16(2.242078e-44), 64(8.968310e-44)
+; EG-NEXT:     LSHR T28.Y, T22.Z, literal.y,
+; EG-NEXT:     LSHR T22.W, T22.Y, literal.y,
+; EG-NEXT:     AND_INT * T28.X, T22.Z, literal.z,
+; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
+; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT:     AND_INT T22.Z, T22.Y, literal.x,
+; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
+; EG-NEXT:    65535(9.183409e-41), 80(1.121039e-43)
+; EG-NEXT:     LSHR T30.X, PV.W, literal.x,
+; EG-NEXT:     LSHR T22.Y, T22.X, literal.y,
+; EG-NEXT:     LSHR T31.W, T21.W, literal.y,
+; EG-NEXT:     AND_INT * T22.X, T22.X, literal.z,
+; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
+; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT:     AND_INT T31.Z, T21.W, literal.x,
+; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
+; EG-NEXT:    65535(9.183409e-41), 64(8.968310e-44)
 ; EG-NEXT:     LSHR T32.X, PV.W, literal.x,
-; EG-NEXT:     AND_INT T21.Z, T20.W, literal.y,
-; EG-NEXT:     BFE_UINT T33.W, T20.Y, literal.z, T0.W,
-; EG-NEXT:     AND_INT * T21.X, T20.Z, literal.y,
-; EG-NEXT:    2(2.802597e-45), 65535(9.183409e-41)
-; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
-; EG-NEXT:     BFE_UINT T33.Y, T20.X, literal.x, T0.W,
+; EG-NEXT:     LSHR T31.Y, T21.Z, literal.y,
+; EG-NEXT:     LSHR T21.W, T21.Y, literal.y,
+; EG-NEXT:     AND_INT * T31.X, T21.Z, literal.z,
+; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
+; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT:     AND_INT T21.Z, T21.Y, literal.x,
 ; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
-; EG-NEXT:    16(2.242078e-44), 112(1.569454e-43)
-; EG-NEXT:     LSHR T34.X, PV.W, literal.x,
-; EG-NEXT:     AND_INT T33.Z, T20.Y, literal.y,
-; EG-NEXT:     AND_INT * T33.X, T20.X, literal.y,
-; EG-NEXT:    2(2.802597e-45), 65535(9.183409e-41)
+; EG-NEXT:    65535(9.183409e-41), 112(1.569454e-43)
+; EG-NEXT:     LSHR T33.X, PV.W, literal.x,
+; EG-NEXT:     LSHR T21.Y, T21.X, literal.y,
+; EG-NEXT:     AND_INT * T21.X, T21.X, literal.z,
+; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
+; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
 ; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.x,
 ; EG-NEXT:    96(1.345247e-43), 0(0.000000e+00)
-; EG-NEXT:     LSHR * T20.X, PV.W, literal.x,
+; EG-NEXT:     LSHR * T34.X, PV.W, literal.x,
 ; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
 ;
 ; CM-LABEL: global_zextload_v32i16_to_v32i32:
 ; CM:       ; %bb.0:
 ; CM-NEXT:    ALU 0, @20, KC0[CB0:0-32], KC1[]
 ; CM-NEXT:    TEX 3 @12
-; CM-NEXT:    ALU 78, @21, KC0[CB0:0-32], KC1[]
+; CM-NEXT:    ALU 65, @21, KC0[CB0:0-32], KC1[]
 ; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T33, T34.X
-; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T31, T20.X
+; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T31, T21.X
 ; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T30, T32.X
-; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T28, T21.X
+; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T28, T22.X
 ; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T27, T29.X
-; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T25, T22.X
+; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T25, T19.X
 ; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T24, T26.X
-; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T23, T19.X
+; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T23, T20.X
 ; CM-NEXT:    CF_END
 ; CM-NEXT:    Fetch clause starting at 12:
-; CM-NEXT:     VTX_READ_128 T20.XYZW, T19.X, 0, #1
-; CM-NEXT:     VTX_READ_128 T21.XYZW, T19.X, 16, #1
-; CM-NEXT:     VTX_READ_128 T22.XYZW, T19.X, 32, #1
-; CM-NEXT:     VTX_READ_128 T19.XYZW, T19.X, 48, #1
+; CM-NEXT:     VTX_READ_128 T20.XYZW, T19.X, 48, #1
+; CM-NEXT:     VTX_READ_128 T21.XYZW, T19.X, 0, #1
+; CM-NEXT:     VTX_READ_128 T22.XYZW, T19.X, 16, #1
+; CM-NEXT:     VTX_READ_128 T19.XYZW, T19.X, 32, #1
 ; CM-NEXT:    ALU clause starting at 20:
 ; CM-NEXT:     MOV * T19.X, KC0[2].Z,
 ; CM-NEXT:    ALU clause starting at 21:
-; CM-NEXT:     MOV * T0.W, literal.x,
-; CM-NEXT:    16(2.242078e-44), 0(0.000000e+00)
-; CM-NEXT:     BFE_UINT * T23.W, T19.Y, literal.x, PV.W,
+; CM-NEXT:     LSHR * T23.W, T20.Y, literal.x,
 ; CM-NEXT:    16(2.242078e-44), 0(0.000000e+00)
-; CM-NEXT:     BFE_UINT T23.Y, T19.X, literal.x, T0.W,
-; CM-NEXT:     AND_INT T23.Z, T19.Y, literal.y,
-; CM-NEXT:     BFE_UINT * T24.W, T19.W, literal.x, T0.W,
-; CM-NEXT:    16(2.242078e-44), 65535(9.183409e-41)
-; CM-NEXT:     AND_INT T23.X, T19.X, literal.x,
-; CM-NEXT:     BFE_UINT T24.Y, T19.Z, literal.y, T0.W,
-; CM-NEXT:     ADD_INT * T1.W, KC0[2].Y, literal.z,
-; CM-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
-; CM-NEXT:    96(1.345247e-43), 0(0.000000e+00)
-; CM-NEXT:     LSHR T19.X, PV.W, literal.x,
-; CM-NEXT:     AND_INT T24.Z, T19.W, literal.y,
-; CM-NEXT:     BFE_UINT * T25.W, T22.Y, literal.z, T0.W,
-; CM-NEXT:    2(2.802597e-45), 65535(9.183409e-41)
+; CM-NEXT:     AND_INT * T23.Z, T20.Y, literal.x,
+; CM-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
+; CM-NEXT:     LSHR T23.Y, T20.X, literal.x,
+; CM-NEXT:     LSHR * T24.W, T20.W, literal.x,
 ; CM-NEXT:    16(2.242078e-44), 0(0.000000e+00)
-; CM-NEXT:     AND_INT T24.X, T19.Z, literal.x,
-; CM-NEXT:     BFE_UINT T25.Y, T22.X, literal.y, T0.W,
-; CM-NEXT:     ADD_INT * T1.W, KC0[2].Y, literal.z,
-; CM-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
-; CM-NEXT:    112(1.569454e-43), 0(0.000000e+00)
+; CM-NEXT:     AND_INT T23.X, T20.X, literal.x,
+; CM-NEXT:     AND_INT T24.Z, T20.W, literal.x,
+; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
+; CM-NEXT:    65535(9.183409e-41), 96(1.345247e-43)
+; CM-NEXT:     LSHR T20.X, PV.W, literal.x,
+; CM-NEXT:     LSHR T24.Y, T20.Z, literal.y,
+; CM-NEXT:     LSHR * T25.W, T19.Y, literal.y,
+; CM-NEXT:    2(2.802597e-45), 16(2.242078e-44)
+; CM-NEXT:     AND_INT T24.X, T20.Z, literal.x,
+; CM-NEXT:     AND_INT T25.Z, T19.Y, literal.x,
+; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
+; CM-NEXT:    65535(9.183409e-41), 112(1.569454e-43)
 ; CM-NEXT:     LSHR T26.X, PV.W, literal.x,
-; CM-NEXT:     AND_INT T25.Z, T22.Y, literal.y,
-; CM-NEXT:     BFE_UINT * T27.W, T22.W, literal.z, T0.W,
-; CM-NEXT:    2(2.802597e-45), 65535(9.183409e-41)
-; CM-NEXT:    16(2.242078e-44), 0(0.000000e+00)
-; CM-NEXT:     AND_INT T25.X, T22.X, literal.x,
-; CM-NEXT:     BFE_UINT T27.Y, T22.Z, literal.y, T0.W,
-; CM-NEXT:     ADD_INT * T1.W, KC0[2].Y, literal.z,
-; CM-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
-; CM-NEXT:    64(8.968310e-44), 0(0.000000e+00)
-; CM-NEXT:     LSHR T22.X, PV.W, literal.x,
-; CM-NEXT:     AND_INT T27.Z, T22.W, literal.y,
-; CM-NEXT:     BFE_UINT * T28.W, T21.Y, literal.z, T0.W,
-; CM-NEXT:    2(2.802597e-45), 65535(9.183409e-41)
-; CM-NEXT:    16(2.242078e-44), 0(0.000000e+00)
-; CM-NEXT:     AND_INT T27.X, T22.Z, literal.x,
-; CM-NEXT:     BFE_UINT T28.Y, T21.X, literal.y, T0.W,
-; CM-NEXT:     ADD_INT * T1.W, KC0[2].Y, literal.z,
-; CM-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
-; CM-NEXT:    80(1.121039e-43), 0(0.000000e+00)
+; CM-NEXT:     LSHR T25.Y, T19.X, literal.y,
+; CM-NEXT:     LSHR * T27.W, T19.W, literal.y,
+; CM-NEXT:    2(2.802597e-45), 16(2.242078e-44)
+; CM-NEXT:     AND_INT T25.X, T19.X, literal.x,
+; CM-NEXT:     AND_INT T27.Z, T19.W, literal.x,
+; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
+; CM-NEXT:    65535(9.183409e-41), 64(8.968310e-44)
+; CM-NEXT:     LSHR T19.X, PV.W, literal.x,
+; CM-NEXT:     LSHR T27.Y, T19.Z, literal.y,
+; CM-NEXT:     LSHR * T28.W, T22.Y, literal.y,
+; CM-NEXT:    2(2.802597e-45), 16(2.242078e-44)
+; CM-NEXT:     AND_INT T27.X, T19.Z, literal.x,
+; CM-NEXT:     AND_INT T28.Z, T22.Y, literal.x,
+; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
+; CM-NEXT:    65535(9.183409e-41), 80(1.121039e-43)
 ; CM-NEXT:     LSHR T29.X, PV.W, literal.x,
-; CM-NEXT:     AND_INT T28.Z, T21.Y, literal.y,
-; CM-NEXT:     BFE_UINT * T30.W, T21.W, literal.z, T0.W,
-; CM-NEXT:    2(2.802597e-45), 65535(9.183409e-41)
-; CM-NEXT:    16(2.242078e-44), 0(0.000000e+00)
-; CM-NEXT:     AND_INT T28.X, T21.X, literal.x,
-; CM-NEXT:     BFE_UINT T30.Y, T21.Z, literal.y, T0.W,
-; CM-NEXT:     ADD_INT * T1.W, KC0[2].Y, literal.z,
-; CM-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
-; CM-NEXT:    32(4.484155e-44), 0(0.000000e+00)
-; CM-NEXT:     LSHR T21.X, PV.W, literal.x,
-; CM-NEXT:     AND_INT T30.Z, T21.W, literal.y,
-; CM-NEXT:     BFE_UINT * T31.W, T20.Y, literal.z, T0.W,
-; CM-NEXT:    2(2.802597e-45), 65535(9.183409e-41)
-; CM-NEXT:    16(2.242078e-44), 0(0.000000e+00)
-; CM-NEXT:     AND_INT T30.X, T21.Z, literal.x,
-; CM-NEXT:     BFE_UINT T31.Y, T20.X, literal.y, T0.W,
-; CM-NEXT:     ADD_INT * T1.W, KC0[2].Y, literal.z,
-; CM-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
-; CM-NEXT:    48(6.726233e-44), 0(0.000000e+00)
+; CM-NEXT:     LSHR T28.Y, T22.X, literal.y,
+; CM-NEXT:     LSHR * T30.W, T22.W, literal.y,
+; CM-NEXT:    2(2.802597e-45), 16(2.242078e-44)
+; CM-NEXT:     AND_INT T28.X, T22.X, literal.x,
+; CM-NEXT:     AND_INT T30.Z, T22.W, literal.x,
+; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
+; CM-NEXT:    65535(9.183409e-41), 32(4.484155e-44)
+; CM-NEXT:     LSHR T22.X, PV.W, literal.x,
+; CM-NEXT:     LSHR T30.Y, T22.Z, literal.y,
+; CM-NEXT:     LSHR * T31.W, T21.Y, literal.y,
+; CM-NEXT:    2(2.802597e-45), 16(2.242078e-44)
+; CM-NEXT:     AND_INT T30.X, T22.Z, literal.x,
+; CM-NEXT:     AND_INT T31.Z, T21.Y, literal.x,
+; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
+; CM-NEXT:    65535(9.183409e-41), 48(6.726233e-44)
 ; CM-NEXT:     LSHR T32.X, PV.W, literal.x,
-; CM-NEXT:     AND_INT T31.Z, T20.Y, literal.y,
-; CM-NEXT:     BFE_UINT * T33.W, T20.W, literal.z, T0.W,
-; CM-NEXT:    2(2.802597e-45), 65535(9.183409e-41)
-; CM-NEXT:    16(2.242078e-44), 0(0.000000e+00)
-; CM-NEXT:     AND_INT T31.X, T20.X, literal.x,
-; CM-NEXT:     BFE_UINT * T33.Y, T20.Z, literal.y, T0.W,
-; CM-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
-; CM-NEXT:     LSHR T20.X, KC0[2].Y, literal.x,
-; CM-NEXT:     AND_INT * T33.Z, T20.W, literal.y,
-; CM-NEXT:    2(2.802597e-45), 65535(9.183409e-41)
-; CM-NEXT:     AND_INT T33.X, T20.Z, literal.x,
+; CM-NEXT:     LSHR T31.Y, T21.X, literal.y,
+; CM-NEXT:     LSHR * T33.W, T21.W, literal.y,
+; CM-NEXT:    2(2.802597e-45), 16(2.242078e-44)
+; CM-NEXT:     AND_INT T31.X, T21.X, literal.x,
+; CM-NEXT:     AND_INT * T33.Z, T21.W, literal.x,
+; CM-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
+; CM-NEXT:     LSHR T21.X, KC0[2].Y, literal.x,
+; CM-NEXT:     LSHR * T33.Y, T21.Z, literal.y,
+; CM-NEXT:    2(2.802597e-45), 16(2.242078e-44)
+; CM-NEXT:     AND_INT T33.X, T21.Z, literal.x,
 ; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
 ; CM-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
 ; CM-NEXT:     LSHR * T34.X, PV.W, literal.x,
@@ -3939,28 +3911,28 @@ define amdgpu_kernel void @global_zextload_v64i16_to_v64i32(<64 x i32> addrspace
 ; EG-NEXT:    ALU 56, @39, KC0[CB0:0-32], KC1[]
 ; EG-NEXT:    TEX 3 @30
 ; EG-NEXT:    ALU 87, @96, KC0[CB0:0-32], KC1[]
-; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T65.XYZW, T49.X, 0
-; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T50.XYZW, T66.X, 0
-; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T62.XYZW, T64.X, 0
-; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T51.XYZW, T63.X, 0
-; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T59.XYZW, T61.X, 0
-; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T52.XYZW, T60.X, 0
-; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T56.XYZW, T58.X, 0
-; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T36.XYZW, T57.X, 0
-; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T53.XYZW, T55.X, 0
-; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T35.XYZW, T54.X, 0
-; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T47.XYZW, T37.X, 0
+; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T49.XYZW, T66.X, 0
+; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T63.XYZW, T65.X, 0
+; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T50.XYZW, T64.X, 0
+; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T60.XYZW, T62.X, 0
+; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T51.XYZW, T61.X, 0
+; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T57.XYZW, T59.X, 0
+; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T52.XYZW, T58.X, 0
+; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T54.XYZW, T56.X, 0
+; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T37.XYZW, T55.X, 0
+; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T35.XYZW, T53.X, 0
 ; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T38.XYZW, T48.X, 0
-; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T44.XYZW, T46.X, 0
-; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T39.XYZW, T45.X, 0
-; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T41.XYZW, T43.X, 0
-; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T40.XYZW, T42.X, 1
+; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T45.XYZW, T47.X, 0
+; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T39.XYZW, T46.X, 0
+; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T42.XYZW, T44.X, 0
+; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T36.XYZW, T43.X, 0
+; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T40.XYZW, T41.X, 1
 ; EG-NEXT:    CF_END
 ; EG-NEXT:    Fetch clause starting at 22:
-; EG-NEXT:     VTX_READ_128 T36.XYZW, T35.X, 48, #1
-; EG-NEXT:     VTX_READ_128 T37.XYZW, T35.X, 32, #1
-; EG-NEXT:     VTX_READ_128 T38.XYZW, T35.X, 16, #1
-; EG-NEXT:     VTX_READ_128 T39.XYZW, T35.X, 0, #1
+; EG-NEXT:     VTX_READ_128 T36.XYZW, T35.X, 0, #1
+; EG-NEXT:     VTX_READ_128 T37.XYZW, T35.X, 48, #1
+; EG-NEXT:     VTX_READ_128 T38.XYZW, T35.X, 32, #1
+; EG-NEXT:     VTX_READ_128 T39.XYZW, T35.X, 16, #1
 ; EG-NEXT:    Fetch clause starting at 30:
 ; EG-NEXT:     VTX_READ_128 T49.XYZW, T35.X, 112, #1
 ; EG-NEXT:     VTX_READ_128 T50.XYZW, T35.X, 96, #1
@@ -3969,160 +3941,160 @@ define amdgpu_kernel void @global_zextload_v64i16_to_v64i32(<64 x i32> addrspace
 ; EG-NEXT:    ALU clause starting at 38:
 ; EG-NEXT:     MOV * T35.X, KC0[2].Z,
 ; EG-NEXT:    ALU clause starting at 39:
-; EG-NEXT:     MOV * T0.W, literal.x,
+; EG-NEXT:     LSHR * T40.W, T36.W, literal.x,
 ; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
-; EG-NEXT:     BFE_UINT * T40.W, T39.W, literal.x, PV.W,
+; EG-NEXT:     AND_INT * T40.Z, T36.W, literal.x,
+; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT:     LSHR T40.Y, T36.Z, literal.x,
+; EG-NEXT:     LSHR * T36.W, T36.Y, literal.x,
 ; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
-; EG-NEXT:     BFE_UINT T40.Y, T39.Z, literal.x, T0.W,
-; EG-NEXT:     AND_INT T40.Z, T39.W, literal.y,
-; EG-NEXT:     BFE_UINT * T41.W, T39.Y, literal.x, T0.W,
-; EG-NEXT:    16(2.242078e-44), 65535(9.183409e-41)
-; EG-NEXT:     AND_INT T40.X, T39.Z, literal.x,
-; EG-NEXT:     BFE_UINT T41.Y, T39.X, literal.y, T0.W,
-; EG-NEXT:     ADD_INT * T1.W, KC0[2].Y, literal.y,
+; EG-NEXT:     AND_INT T40.X, T36.Z, literal.x,
+; EG-NEXT:     AND_INT T36.Z, T36.Y, literal.x,
+; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
 ; EG-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
-; EG-NEXT:     LSHR T42.X, PV.W, literal.x,
-; EG-NEXT:     AND_INT T41.Z, T39.Y, literal.y,
-; EG-NEXT:     BFE_UINT T39.W, T38.W, literal.z, T0.W,
-; EG-NEXT:     AND_INT * T41.X, T39.X, literal.y,
-; EG-NEXT:    2(2.802597e-45), 65535(9.183409e-41)
-; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
-; EG-NEXT:     BFE_UINT * T39.Y, T38.Z, literal.x, T0.W,
-; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT:     LSHR T41.X, PV.W, literal.x,
+; EG-NEXT:     LSHR T36.Y, T36.X, literal.y,
+; EG-NEXT:     LSHR T42.W, T39.W, literal.y,
+; EG-NEXT:     AND_INT * T36.X, T36.X, literal.z,
+; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
+; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT:     AND_INT * T42.Z, T39.W, literal.x,
+; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
 ; EG-NEXT:     LSHR T43.X, KC0[2].Y, literal.x,
-; EG-NEXT:     AND_INT T39.Z, T38.W, literal.y,
-; EG-NEXT:     BFE_UINT T44.W, T38.Y, literal.z, T0.W,
-; EG-NEXT:     AND_INT * T39.X, T38.Z, literal.y,
-; EG-NEXT:    2(2.802597e-45), 65535(9.183409e-41)
-; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
-; EG-NEXT:     BFE_UINT T44.Y, T38.X, literal.x, T0.W,
-; EG-NEXT:     ADD_INT * T1.W, KC0[2].Y, literal.y,
-; EG-NEXT:    16(2.242078e-44), 48(6.726233e-44)
-; EG-NEXT:     LSHR T45.X, PV.W, literal.x,
-; EG-NEXT:     AND_INT T44.Z, T38.Y, literal.y,
-; EG-NEXT:     BFE_UINT T38.W, T37.W, literal.z, T0.W,
-; EG-NEXT:     AND_INT * T44.X, T38.X, literal.y,
-; EG-NEXT:    2(2.802597e-45), 65535(9.183409e-41)
-; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
-; EG-NEXT:     BFE_UINT T38.Y, T37.Z, literal.x, T0.W,
-; EG-NEXT:     ADD_INT * T1.W, KC0[2].Y, literal.y,
-; EG-NEXT:    16(2.242078e-44), 32(4.484155e-44)
+; EG-NEXT:     LSHR T42.Y, T39.Z, literal.y,
+; EG-NEXT:     LSHR T39.W, T39.Y, literal.y,
+; EG-NEXT:     AND_INT * T42.X, T39.Z, literal.z,
+; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
+; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT:     AND_INT T39.Z, T39.Y, literal.x,
+; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
+; EG-NEXT:    65535(9.183409e-41), 48(6.726233e-44)
+; EG-NEXT:     LSHR T44.X, PV.W, literal.x,
+; EG-NEXT:     LSHR T39.Y, T39.X, literal.y,
+; EG-NEXT:     LSHR T45.W, T38.W, literal.y,
+; EG-NEXT:     AND_INT * T39.X, T39.X, literal.z,
+; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
+; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT:     AND_INT T45.Z, T38.W, literal.x,
+; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
+; EG-NEXT:    65535(9.183409e-41), 32(4.484155e-44)
 ; EG-NEXT:     LSHR T46.X, PV.W, literal.x,
-; EG-NEXT:     AND_INT T38.Z, T37.W, literal.y,
-; EG-NEXT:     BFE_UINT T47.W, T37.Y, literal.z, T0.W,
-; EG-NEXT:     AND_INT * T38.X, T37.Z, literal.y,
-; EG-NEXT:    2(2.802597e-45), 65535(9.183409e-41)
-; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
-; EG-NEXT:     BFE_UINT T47.Y, T37.X, literal.x, T0.W,
-; EG-NEXT:     ADD_INT * T1.W, KC0[2].Y, literal.y,
-; EG-NEXT:    16(2.242078e-44), 80(1.121039e-43)
+; EG-NEXT:     LSHR T45.Y, T38.Z, literal.y,
+; EG-NEXT:     LSHR T38.W, T38.Y, literal.y,
+; EG-NEXT:     AND_INT * T45.X, T38.Z, literal.z,
+; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
+; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT:     AND_INT T38.Z, T38.Y, literal.x,
+; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
+; EG-NEXT:    65535(9.183409e-41), 80(1.121039e-43)
+; EG-NEXT:     LSHR T47.X, PV.W, literal.x,
+; EG-NEXT:     LSHR T38.Y, T38.X, literal.y,
+; EG-NEXT:     AND_INT * T38.X, T38.X, literal.z,
+; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
+; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT:     ADD_INT T0.W, KC0[2].Y, literal.x,
+; EG-NEXT:     LSHR * T35.W, T37.W, literal.y,
+; EG-NEXT:    64(8.968310e-44), 16(2.242078e-44)
 ; EG-NEXT:     LSHR T48.X, PV.W, literal.x,
-; EG-NEXT:     AND_INT T47.Z, T37.Y, literal.y,
-; EG-NEXT:     AND_INT * T47.X, T37.X, literal.y,
+; EG-NEXT:     AND_INT * T35.Z, T37.W, literal.y,
 ; EG-NEXT:    2(2.802597e-45), 65535(9.183409e-41)
-; EG-NEXT:     BFE_UINT T35.W, T36.W, literal.x, T0.W,
-; EG-NEXT:     ADD_INT * T1.W, KC0[2].Y, literal.y,
-; EG-NEXT:    16(2.242078e-44), 64(8.968310e-44)
-; EG-NEXT:     LSHR T37.X, PS, literal.x,
-; EG-NEXT:     BFE_UINT * T35.Y, T36.Z, literal.y, T0.W,
-; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
 ; EG-NEXT:    ALU clause starting at 96:
-; EG-NEXT:     AND_INT T35.Z, T36.W, literal.x,
-; EG-NEXT:     BFE_UINT * T53.W, T36.Y, literal.y, T0.W,
-; EG-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
-; EG-NEXT:     AND_INT T35.X, T36.Z, literal.x,
-; EG-NEXT:     BFE_UINT T53.Y, T36.X, literal.y, T0.W,
-; EG-NEXT:     ADD_INT * T1.W, KC0[2].Y, literal.z,
-; EG-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
-; EG-NEXT:    112(1.569454e-43), 0(0.000000e+00)
-; EG-NEXT:     LSHR T54.X, PV.W, literal.x,
-; EG-NEXT:     AND_INT T53.Z, T36.Y, literal.y,
-; EG-NEXT:     BFE_UINT T36.W, T52.W, literal.z, T0.W,
-; EG-NEXT:     AND_INT * T53.X, T36.X, literal.y,
-; EG-NEXT:    2(2.802597e-45), 65535(9.183409e-41)
+; EG-NEXT:     LSHR T35.Y, T37.Z, literal.x,
+; EG-NEXT:     LSHR * T37.W, T37.Y, literal.x,
 ; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
-; EG-NEXT:     BFE_UINT T36.Y, T52.Z, literal.x, T0.W,
-; EG-NEXT:     ADD_INT * T1.W, KC0[2].Y, literal.y,
-; EG-NEXT:    16(2.242078e-44), 96(1.345247e-43)
+; EG-NEXT:     AND_INT T35.X, T37.Z, literal.x,
+; EG-NEXT:     AND_INT T37.Z, T37.Y, literal.x,
+; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
+; EG-NEXT:    65535(9.183409e-41), 112(1.569454e-43)
+; EG-NEXT:     LSHR T53.X, PV.W, literal.x,
+; EG-NEXT:     LSHR T37.Y, T37.X, literal.y,
+; EG-NEXT:     LSHR T54.W, T52.W, literal.y,
+; EG-NEXT:     AND_INT * T37.X, T37.X, literal.z,
+; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
+; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT:     AND_INT T54.Z, T52.W, literal.x,
+; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
+; EG-NEXT:    65535(9.183409e-41), 96(1.345247e-43)
 ; EG-NEXT:     LSHR T55.X, PV.W, literal.x,
-; EG-NEXT:     AND_INT T36.Z, T52.W, literal.y,
-; EG-NEXT:     BFE_UINT T56.W, T52.Y, literal.z, T0.W,
-; EG-NEXT:     AND_INT * T36.X, T52.Z, literal.y,
-; EG-NEXT:    2(2.802597e-45), 65535(9.183409e-41)
-; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
-; EG-NEXT:     BFE_UINT T56.Y, T52.X, literal.x, T0.W,
-; EG-NEXT:     ADD_INT * T1.W, KC0[2].Y, literal.y,
-; EG-NEXT:    16(2.242078e-44), 144(2.017870e-43)
-; EG-NEXT:     LSHR T57.X, PV.W, literal.x,
-; EG-NEXT:     AND_INT T56.Z, T52.Y, literal.y,
-; EG-NEXT:     BFE_UINT T52.W, T51.W, literal.z, T0.W,
-; EG-NEXT:     AND_INT * T56.X, T52.X, literal.y,
-; EG-NEXT:    2(2.802597e-45), 65535(9.183409e-41)
-; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
-; EG-NEXT:     BFE_UINT T52.Y, T51.Z, literal.x, T0.W,
-; EG-NEXT:     ADD_INT * T1.W, KC0[2].Y, literal.y,
-; EG-NEXT:    16(2.242078e-44), 128(1.793662e-43)
+; EG-NEXT:     LSHR T54.Y, T52.Z, literal.y,
+; EG-NEXT:     LSHR T52.W, T52.Y, literal.y,
+; EG-NEXT:     AND_INT * T54.X, T52.Z, literal.z,
+; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
+; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT:     AND_INT T52.Z, T52.Y, literal.x,
+; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
+; EG-NEXT:    65535(9.183409e-41), 144(2.017870e-43)
+; EG-NEXT:     LSHR T56.X, PV.W, literal.x,
+; EG-NEXT:     LSHR T52.Y, T52.X, literal.y,
+; EG-NEXT:     LSHR T57.W, T51.W, literal.y,
+; EG-NEXT:     AND_INT * T52.X, T52.X, literal.z,
+; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
+; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT:     AND_INT T57.Z, T51.W, literal.x,
+; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
+; EG-NEXT:    65535(9.183409e-41), 128(1.793662e-43)
 ; EG-NEXT:     LSHR T58.X, PV.W, literal.x,
-; EG-NEXT:     AND_INT T52.Z, T51.W, literal.y,
-; EG-NEXT:     BFE_UINT T59.W, T51.Y, literal.z, T0.W,
-; EG-NEXT:     AND_INT * T52.X, T51.Z, literal.y,
-; EG-NEXT:    2(2.802597e-45), 65535(9.183409e-41)
-; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
-; EG-NEXT:     BFE_UINT T59.Y, T51.X, literal.x, T0.W,
-; EG-NEXT:     ADD_INT * T1.W, KC0[2].Y, literal.y,
-; EG-NEXT:    16(2.242078e-44), 176(2.466285e-43)
-; EG-NEXT:     LSHR T60.X, PV.W, literal.x,
-; EG-NEXT:     AND_INT T59.Z, T51.Y, literal.y,
-; EG-NEXT:     BFE_UINT T51.W, T50.W, literal.z, T0.W,
-; EG-NEXT:     AND_INT * T59.X, T51.X, literal.y,
-; EG-NEXT:    2(2.802597e-45), 65535(9.183409e-41)
-; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
-; EG-NEXT:     BFE_UINT T51.Y, T50.Z, literal.x, T0.W,
-; EG-NEXT:     ADD_INT * T1.W, KC0[2].Y, literal.y,
-; EG-NEXT:    16(2.242078e-44), 160(2.242078e-43)
+; EG-NEXT:     LSHR T57.Y, T51.Z, literal.y,
+; EG-NEXT:     LSHR T51.W, T51.Y, literal.y,
+; EG-NEXT:     AND_INT * T57.X, T51.Z, literal.z,
+; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
+; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT:     AND_INT T51.Z, T51.Y, literal.x,
+; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
+; EG-NEXT:    65535(9.183409e-41), 176(2.466285e-43)
+; EG-NEXT:     LSHR T59.X, PV.W, literal.x,
+; EG-NEXT:     LSHR T51.Y, T51.X, literal.y,
+; EG-NEXT:     LSHR T60.W, T50.W, literal.y,
+; EG-NEXT:     AND_INT * T51.X, T51.X, literal.z,
+; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
+; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT:     AND_INT T60.Z, T50.W, literal.x,
+; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
+; EG-NEXT:    65535(9.183409e-41), 160(2.242078e-43)
 ; EG-NEXT:     LSHR T61.X, PV.W, literal.x,
-; EG-NEXT:     AND_INT T51.Z, T50.W, literal.y,
-; EG-NEXT:     BFE_UINT T62.W, T50.Y, literal.z, T0.W,
-; EG-NEXT:     AND_INT * T51.X, T50.Z, literal.y,
-; EG-NEXT:    2(2.802597e-45), 65535(9.183409e-41)
-; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
-; EG-NEXT:     BFE_UINT T62.Y, T50.X, literal.x, T0.W,
-; EG-NEXT:     ADD_INT * T1.W, KC0[2].Y, literal.y,
-; EG-NEXT:    16(2.242078e-44), 208(2.914701e-43)
-; EG-NEXT:     LSHR T63.X, PV.W, literal.x,
-; EG-NEXT:     AND_INT T62.Z, T50.Y, literal.y,
-; EG-NEXT:     BFE_UINT T50.W, T49.W, literal.z, T0.W,
-; EG-NEXT:     AND_INT * T62.X, T50.X, literal.y,
-; EG-NEXT:    2(2.802597e-45), 65535(9.183409e-41)
-; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
-; EG-NEXT:     BFE_UINT T50.Y, T49.Z, literal.x, T0.W,
-; EG-NEXT:     ADD_INT * T1.W, KC0[2].Y, literal.y,
-; EG-NEXT:    16(2.242078e-44), 192(2.690493e-43)
+; EG-NEXT:     LSHR T60.Y, T50.Z, literal.y,
+; EG-NEXT:     LSHR T50.W, T50.Y, literal.y,
+; EG-NEXT:     AND_INT * T60.X, T50.Z, literal.z,
+; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
+; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT:     AND_INT T50.Z, T50.Y, literal.x,
+; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
+; EG-NEXT:    65535(9.183409e-41), 208(2.914701e-43)
+; EG-NEXT:     LSHR T62.X, PV.W, literal.x,
+; EG-NEXT:     LSHR T50.Y, T50.X, literal.y,
+; EG-NEXT:     LSHR T63.W, T49.W, literal.y,
+; EG-NEXT:     AND_INT * T50.X, T50.X, literal.z,
+; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
+; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT:     AND_INT T63.Z, T49.W, literal.x,
+; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
+; EG-NEXT:    65535(9.183409e-41), 192(2.690493e-43)
 ; EG-NEXT:     LSHR T64.X, PV.W, literal.x,
-; EG-NEXT:     AND_INT T50.Z, T49.W, literal.y,
-; EG-NEXT:     BFE_UINT T65.W, T49.Y, literal.z, T0.W,
-; EG-NEXT:     AND_INT * T50.X, T49.Z, literal.y,
-; EG-NEXT:    2(2.802597e-45), 65535(9.183409e-41)
-; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
-; EG-NEXT:     BFE_UINT T65.Y, T49.X, literal.x, T0.W,
+; EG-NEXT:     LSHR T63.Y, T49.Z, literal.y,
+; EG-NEXT:     LSHR T49.W, T49.Y, literal.y,
+; EG-NEXT:     AND_INT * T63.X, T49.Z, literal.z,
+; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
+; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT:     AND_INT T49.Z, T49.Y, literal.x,
 ; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
-; EG-NEXT:    16(2.242078e-44), 240(3.363116e-43)
-; EG-NEXT:     LSHR T66.X, PV.W, literal.x,
-; EG-NEXT:     AND_INT T65.Z, T49.Y, literal.y,
-; EG-NEXT:     AND_INT * T65.X, T49.X, literal.y,
-; EG-NEXT:    2(2.802597e-45), 65535(9.183409e-41)
+; EG-NEXT:    65535(9.183409e-41), 240(3.363116e-43)
+; EG-NEXT:     LSHR T65.X, PV.W, literal.x,
+; EG-NEXT:     LSHR T49.Y, T49.X, literal.y,
+; EG-NEXT:     AND_INT * T49.X, T49.X, literal.z,
+; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
+; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
 ; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.x,
 ; EG-NEXT:    224(3.138909e-43), 0(0.000000e+00)
-; EG-NEXT:     LSHR * T49.X, PV.W, literal.x,
+; EG-NEXT:     LSHR * T66.X, PV.W, literal.x,
 ; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
 ;
 ; CM-LABEL: global_zextload_v64i16_to_v64i32:
 ; CM:       ; %bb.0:
 ; CM-NEXT:    ALU 0, @38, KC0[CB0:0-32], KC1[]
 ; CM-NEXT:    TEX 3 @22
-; CM-NEXT:    ALU 62, @39, KC0[CB0:0-32], KC1[]
+; CM-NEXT:    ALU 50, @39, KC0[CB0:0-32], KC1[]
 ; CM-NEXT:    TEX 3 @30
-; CM-NEXT:    ALU 95, @102, KC0[CB0:0-32], KC1[]
+; CM-NEXT:    ALU 78, @90, KC0[CB0:0-32], KC1[]
 ; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T65, T66.X
 ; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T63, T48.X
 ; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T62, T64.X
@@ -4132,19 +4104,19 @@ define amdgpu_kernel void @global_zextload_v64i16_to_v64i32(<64 x i32> addrspace
 ; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T56, T58.X
 ; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T54, T51.X
 ; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T53, T55.X
-; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T35, T36.X
+; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T35, T37.X
 ; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T47, T52.X
-; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T45, T37.X
+; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T45, T38.X
 ; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T44, T46.X
-; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T42, T38.X
+; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T42, T39.X
 ; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T41, T43.X
-; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T40, T39.X
+; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T40, T36.X
 ; CM-NEXT:    CF_END
 ; CM-NEXT:    Fetch clause starting at 22:
-; CM-NEXT:     VTX_READ_128 T36.XYZW, T35.X, 64, #1
-; CM-NEXT:     VTX_READ_128 T37.XYZW, T35.X, 80, #1
-; CM-NEXT:     VTX_READ_128 T38.XYZW, T35.X, 96, #1
-; CM-NEXT:     VTX_READ_128 T39.XYZW, T35.X, 112, #1
+; CM-NEXT:     VTX_READ_128 T36.XYZW, T35.X, 112, #1
+; CM-NEXT:     VTX_READ_128 T37.XYZW, T35.X, 64, #1
+; CM-NEXT:     VTX_READ_128 T38.XYZW, T35.X, 80, #1
+; CM-NEXT:     VTX_READ_128 T39.XYZW, T35.X, 96, #1
 ; CM-NEXT:    Fetch clause starting at 30:
 ; CM-NEXT:     VTX_READ_128 T48.XYZW, T35.X, 0, #1
 ; CM-NEXT:     VTX_READ_128 T49.XYZW, T35.X, 16, #1
@@ -4153,161 +4125,132 @@ define amdgpu_kernel void @global_zextload_v64i16_to_v64i32(<64 x i32> addrspace
 ; CM-NEXT:    ALU clause starting at 38:
 ; CM-NEXT:     MOV * T35.X, KC0[2].Z,
 ; CM-NEXT:    ALU clause starting at 39:
-; CM-NEXT:     MOV * T0.W, literal.x,
-; CM-NEXT:    16(2.242078e-44), 0(0.000000e+00)
-; CM-NEXT:     BFE_UINT * T40.W, T39.Y, literal.x, PV.W,
+; CM-NEXT:     LSHR * T40.W, T36.Y, literal.x,
 ; CM-NEXT:    16(2.242078e-44), 0(0.000000e+00)
-; CM-NEXT:     BFE_UINT T40.Y, T39.X, literal.x, T0.W,
-; CM-NEXT:     AND_INT T40.Z, T39.Y, literal.y,
-; CM-NEXT:     BFE_UINT * T41.W, T39.W, literal.x, T0.W,
-; CM-NEXT:    16(2.242078e-44), 65535(9.183409e-41)
-; CM-NEXT:     AND_INT T40.X, T39.X, literal.x,
-; CM-NEXT:     BFE_UINT T41.Y, T39.Z, literal.y, T0.W,
-; CM-NEXT:     ADD_INT * T1.W, KC0[2].Y, literal.z,
-; CM-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
-; CM-NEXT:    224(3.138909e-43), 0(0.000000e+00)
-; CM-NEXT:     LSHR T39.X, PV.W, literal.x,
-; CM-NEXT:     AND_INT T41.Z, T39.W, literal.y,
-; CM-NEXT:     BFE_UINT * T42.W, T38.Y, literal.z, T0.W,
-; CM-NEXT:    2(2.802597e-45), 65535(9.183409e-41)
+; CM-NEXT:     AND_INT * T40.Z, T36.Y, literal.x,
+; CM-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
+; CM-NEXT:     LSHR T40.Y, T36.X, literal.x,
+; CM-NEXT:     LSHR * T41.W, T36.W, literal.x,
 ; CM-NEXT:    16(2.242078e-44), 0(0.000000e+00)
-; CM-NEXT:     AND_INT T41.X, T39.Z, literal.x,
-; CM-NEXT:     BFE_UINT T42.Y, T38.X, literal.y, T0.W,
-; CM-NEXT:     ADD_INT * T1.W, KC0[2].Y, literal.z,
-; CM-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
-; CM-NEXT:    240(3.363116e-43), 0(0.000000e+00)
+; CM-NEXT:     AND_INT T40.X, T36.X, literal.x,
+; CM-NEXT:     AND_INT T41.Z, T36.W, literal.x,
+; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
+; CM-NEXT:    65535(9.183409e-41), 224(3.138909e-43)
+; CM-NEXT:     LSHR T36.X, PV.W, literal.x,
+; CM-NEXT:     LSHR T41.Y, T36.Z, literal.y,
+; CM-NEXT:     LSHR * T42.W, T39.Y, literal.y,
+; CM-NEXT:    2(2.802597e-45), 16(2.242078e-44)
+; CM-NEXT:     AND_INT T41.X, T36.Z, literal.x,
+; CM-NEXT:     AND_INT T42.Z, T39.Y, literal.x,
+; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
+; CM-NEXT:    65535(9.183409e-41), 240(3.363116e-43)
 ; CM-NEXT:     LSHR T43.X, PV.W, literal.x,
-; CM-NEXT:     AND_INT T42.Z, T38.Y, literal.y,
-; CM-NEXT:     BFE_UINT * T44.W, T38.W, literal.z, T0.W,
-; CM-NEXT:    2(2.802597e-45), 65535(9.183409e-41)
-; CM-NEXT:    16(2.242078e-44), 0(0.000000e+00)
-; CM-NEXT:     AND_INT T42.X, T38.X, literal.x,
-; CM-NEXT:     BFE_UINT T44.Y, T38.Z, literal.y, T0.W,
-; CM-NEXT:     ADD_INT * T1.W, KC0[2].Y, literal.z,
-; CM-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
-; CM-NEXT:    192(2.690493e-43), 0(0.000000e+00)
-; CM-NEXT:     LSHR T38.X, PV.W, literal.x,
-; CM-NEXT:     AND_INT T44.Z, T38.W, literal.y,
-; CM-NEXT:     BFE_UINT * T45.W, T37.Y, literal.z, T0.W,
-; CM-NEXT:    2(2.802597e-45), 65535(9.183409e-41)
-; CM-NEXT:    16(2.242078e-44), 0(0.000000e+00)
-; CM-NEXT:     AND_INT T44.X, T38.Z, literal.x,
-; CM-NEXT:     BFE_UINT T45.Y, T37.X, literal.y, T0.W,
-; CM-NEXT:     ADD_INT * T1.W, KC0[2].Y, literal.z,
-; CM-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
-; CM-NEXT:    208(2.914701e-43), 0(0.000000e+00)
+; CM-NEXT:     LSHR T42.Y, T39.X, literal.y,
+; CM-NEXT:     LSHR * T44.W, T39.W, literal.y,
+; CM-NEXT:    2(2.802597e-45), 16(2.242078e-44)
+; CM-NEXT:     AND_INT T42.X, T39.X, literal.x,
+; CM-NEXT:     AND_INT T44.Z, T39.W, literal.x,
+; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
+; CM-NEXT:    65535(9.183409e-41), 192(2.690493e-43)
+; CM-NEXT:     LSHR T39.X, PV.W, literal.x,
+; CM-NEXT:     LSHR T44.Y, T39.Z, literal.y,
+; CM-NEXT:     LSHR * T45.W, T38.Y, literal.y,
+; CM-NEXT:    2(2.802597e-45), 16(2.242078e-44)
+; CM-NEXT:     AND_INT T44.X, T39.Z, literal.x,
+; CM-NEXT:     AND_INT T45.Z, T38.Y, literal.x,
+; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
+; CM-NEXT:    65535(9.183409e-41), 208(2.914701e-43)
 ; CM-NEXT:     LSHR T46.X, PV.W, literal.x,
-; CM-NEXT:     AND_INT T45.Z, T37.Y, literal.y,
-; CM-NEXT:     BFE_UINT * T47.W, T37.W, literal.z, T0.W,
-; CM-NEXT:    2(2.802597e-45), 65535(9.183409e-41)
-; CM-NEXT:    16(2.242078e-44), 0(0.000000e+00)
-; CM-NEXT:     AND_INT T45.X, T37.X, literal.x,
-; CM-NEXT:     BFE_UINT T47.Y, T37.Z, literal.y, T0.W,
-; CM-NEXT:     ADD_INT * T1.W, KC0[2].Y, literal.z,
-; CM-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
-; CM-NEXT:    160(2.242078e-43), 0(0.000000e+00)
+; CM-NEXT:     LSHR T45.Y, T38.X, literal.y,
+; CM-NEXT:     LSHR * T47.W, T38.W, literal.y,
+; CM-NEXT:    2(2.802597e-45), 16(2.242078e-44)
+; CM-NEXT:     AND_INT T45.X, T38.X, literal.x,
+; CM-NEXT:     AND_INT T47.Z, T38.W, literal.x,
+; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
+; CM-NEXT:    65535(9.183409e-41), 160(2.242078e-43)
+; CM-NEXT:     LSHR T38.X, PV.W, literal.x,
+; CM-NEXT:     LSHR T47.Y, T38.Z, literal.y,
+; CM-NEXT:     LSHR * T35.W, T37.Y, literal.y,
+; CM-NEXT:    2(2.802597e-45), 16(2.242078e-44)
+; CM-NEXT:     AND_INT T47.X, T38.Z, literal.x,
+; CM-NEXT:     AND_INT T35.Z, T37.Y, literal.x,
+; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
+; CM-NEXT:    65535(9.183409e-41), 176(2.466285e-43)
+; CM-NEXT:    ALU clause starting at 90:
+; CM-NEXT:     LSHR T52.X, T0.W, literal.x,
+; CM-NEXT:     LSHR T35.Y, T37.X, literal.y,
+; CM-NEXT:     LSHR * T53.W, T37.W, literal.y, BS:VEC_120/SCL_212
+; CM-NEXT:    2(2.802597e-45), 16(2.242078e-44)
+; CM-NEXT:     AND_INT T35.X, T37.X, literal.x,
+; CM-NEXT:     AND_INT T53.Z, T37.W, literal.x,
+; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
+; CM-NEXT:    65535(9.183409e-41), 128(1.793662e-43)
 ; CM-NEXT:     LSHR T37.X, PV.W, literal.x,
-; CM-NEXT:     AND_INT T47.Z, T37.W, literal.y,
-; CM-NEXT:     BFE_UINT * T35.W, T36.Y, literal.z, T0.W,
-; CM-NEXT:    2(2.802597e-45), 65535(9.183409e-41)
-; CM-NEXT:    16(2.242078e-44), 0(0.000000e+00)
-; CM-NEXT:     AND_INT T47.X, T37.Z, literal.x,
-; CM-NEXT:     BFE_UINT T35.Y, T36.X, literal.y, T0.W,
-; CM-NEXT:     ADD_INT * T1.W, KC0[2].Y, literal.z,
-; CM-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
-; CM-NEXT:    176(2.466285e-43), 0(0.000000e+00)
-; CM-NEXT:    ALU clause starting at 102:
-; CM-NEXT:     LSHR T52.X, T1.W, literal.x,
-; CM-NEXT:     AND_INT T35.Z, T36.Y, literal.y,
-; CM-NEXT:     BFE_UINT * T53.W, T36.W, literal.z, T0.W, BS:VEC_102/SCL_221
-; CM-NEXT:    2(2.802597e-45), 65535(9.183409e-41)
-; CM-NEXT:    16(2.242078e-44), 0(0.000000e+00)
-; CM-NEXT:     AND_INT T35.X, T36.X, literal.x,
-; CM-NEXT:     BFE_UINT T53.Y, T36.Z, literal.y, T0.W,
-; CM-NEXT:     ADD_INT * T1.W, KC0[2].Y, literal.z,
-; CM-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
-; CM-NEXT:    128(1.793662e-43), 0(0.000000e+00)
-; CM-NEXT:     LSHR T36.X, PV.W, literal.x,
-; CM-NEXT:     AND_INT T53.Z, T36.W, literal.y,
-; CM-NEXT:     BFE_UINT * T54.W, T51.Y, literal.z, T0.W,
-; CM-NEXT:    2(2.802597e-45), 65535(9.183409e-41)
-; CM-NEXT:    16(2.242078e-44), 0(0.000000e+00)
-; CM-NEXT:     AND_INT T53.X, T36.Z, literal.x,
-; CM-NEXT:     BFE_UINT T54.Y, T51.X, literal.y, T0.W,
-; CM-NEXT:     ADD_INT * T1.W, KC0[2].Y, literal.z,
-; CM-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
-; CM-NEXT:    144(2.017870e-43), 0(0.000000e+00)
+; CM-NEXT:     LSHR T53.Y, T37.Z, literal.y,
+; CM-NEXT:     LSHR * T54.W, T51.Y, literal.y,
+; CM-NEXT:    2(2.802597e-45), 16(2.242078e-44)
+; CM-NEXT:     AND_INT T53.X, T37.Z, literal.x,
+; CM-NEXT:     AND_INT T54.Z, T51.Y, literal.x,
+; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
+; CM-NEXT:    65535(9.183409e-41), 144(2.017870e-43)
 ; CM-NEXT:     LSHR T55.X, PV.W, literal.x,
-; CM-NEXT:     AND_INT T54.Z, T51.Y, literal.y,
-; CM-NEXT:     BFE_UINT * T56.W, T51.W, literal.z, T0.W,
-; CM-NEXT:    2(2.802597e-45), 65535(9.183409e-41)
-; CM-NEXT:    16(2.242078e-44), 0(0.000000e+00)
+; CM-NEXT:     LSHR T54.Y, T51.X, literal.y,
+; CM-NEXT:     LSHR * T56.W, T51.W, literal.y,
+; CM-NEXT:    2(2.802597e-45), 16(2.242078e-44)
 ; CM-NEXT:     AND_INT T54.X, T51.X, literal.x,
-; CM-NEXT:     BFE_UINT T56.Y, T51.Z, literal.y, T0.W,
-; CM-NEXT:     ADD_INT * T1.W, KC0[2].Y, literal.z,
-; CM-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
-; CM-NEXT:    96(1.345247e-43), 0(0.000000e+00)
+; CM-NEXT:     AND_INT T56.Z, T51.W, literal.x,
+; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
+; CM-NEXT:    65535(9.183409e-41), 96(1.345247e-43)
 ; CM-NEXT:     LSHR T51.X, PV.W, literal.x,
-; CM-NEXT:     AND_INT T56.Z, T51.W, literal.y,
-; CM-NEXT:     BFE_UINT * T57.W, T50.Y, literal.z, T0.W,
-; CM-NEXT:    2(2.802597e-45), 65535(9.183409e-41)
-; CM-NEXT:    16(2.242078e-44), 0(0.000000e+00)
+; CM-NEXT:     LSHR T56.Y, T51.Z, literal.y,
+; CM-NEXT:     LSHR * T57.W, T50.Y, literal.y,
+; CM-NEXT:    2(2.802597e-45), 16(2.242078e-44)
 ; CM-NEXT:     AND_INT T56.X, T51.Z, literal.x,
-; CM-NEXT:     BFE_UINT T57.Y, T50.X, literal.y, T0.W,
-; CM-NEXT:     ADD_INT * T1.W, KC0[2].Y, literal.z,
-; CM-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
-; CM-NEXT:    112(1.569454e-43), 0(0.000000e+00)
+; CM-NEXT:     AND_INT T57.Z, T50.Y, literal.x,
+; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
+; CM-NEXT:    65535(9.183409e-41), 112(1.569454e-43)
 ; CM-NEXT:     LSHR T58.X, PV.W, literal.x,
-; CM-NEXT:     AND_INT T57.Z, T50.Y, literal.y,
-; CM-NEXT:     BFE_UINT * T59.W, T50.W, literal.z, T0.W,
-; CM-NEXT:    2(2.802597e-45), 65535(9.183409e-41)
-; CM-NEXT:    16(2.242078e-44), 0(0.000000e+00)
+; CM-NEXT:     LSHR T57.Y, T50.X, literal.y,
+; CM-NEXT:     LSHR * T59.W, T50.W, literal.y,
+; CM-NEXT:    2(2.802597e-45), 16(2.242078e-44)
 ; CM-NEXT:     AND_INT T57.X, T50.X, literal.x,
-; CM-NEXT:     BFE_UINT T59.Y, T50.Z, literal.y, T0.W,
-; CM-NEXT:     ADD_INT * T1.W, KC0[2].Y, literal.z,
-; CM-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
-; CM-NEXT:    64(8.968310e-44), 0(0.000000e+00)
+; CM-NEXT:     AND_INT T59.Z, T50.W, literal.x,
+; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
+; CM-NEXT:    65535(9.183409e-41), 64(8.968310e-44)
 ; CM-NEXT:     LSHR T50.X, PV.W, literal.x,
-; CM-NEXT:     AND_INT T59.Z, T50.W, literal.y,
-; CM-NEXT:     BFE_UINT * T60.W, T49.Y, literal.z, T0.W,
-; CM-NEXT:    2(2.802597e-45), 65535(9.183409e-41)
-; CM-NEXT:    16(2.242078e-44), 0(0.000000e+00)
+; CM-NEXT:     LSHR T59.Y, T50.Z, literal.y,
+; CM-NEXT:     LSHR * T60.W, T49.Y, literal.y,
+; CM-NEXT:    2(2.802597e-45), 16(2.242078e-44)
 ; CM-NEXT:     AND_INT T59.X, T50.Z, literal.x,
-; CM-NEXT:     BFE_UINT T60.Y, T49.X, literal.y, T0.W,
-; CM-NEXT:     ADD_INT * T1.W, KC0[2].Y, literal.z,
-; CM-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
-; CM-NEXT:    80(1.121039e-43), 0(0.000000e+00)
+; CM-NEXT:     AND_INT T60.Z, T49.Y, literal.x,
+; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
+; CM-NEXT:    65535(9.183409e-41), 80(1.121039e-43)
 ; CM-NEXT:     LSHR T61.X, PV.W, literal.x,
-; CM-NEXT:     AND_INT T60.Z, T49.Y, literal.y,
-; CM-NEXT:     BFE_UINT * T62.W, T49.W, literal.z, T0.W,
-; CM-NEXT:    2(2.802597e-45), 65535(9.183409e-41)
-; CM-NEXT:    16(2.242078e-44), 0(0.000000e+00)
+; CM-NEXT:     LSHR T60.Y, T49.X, literal.y,
+; CM-NEXT:     LSHR * T62.W, T49.W, literal.y,
+; CM-NEXT:    2(2.802597e-45), 16(2.242078e-44)
 ; CM-NEXT:     AND_INT T60.X, T49.X, literal.x,
-; CM-NEXT:     BFE_UINT T62.Y, T49.Z, literal.y, T0.W,
-; CM-NEXT:     ADD_INT * T1.W, KC0[2].Y, literal.z,
-; CM-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
-; CM-NEXT:    32(4.484155e-44), 0(0.000000e+00)
+; CM-NEXT:     AND_INT T62.Z, T49.W, literal.x,
+; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
+; CM-NEXT:    65535(9.183409e-41), 32(4.484155e-44)
 ; CM-NEXT:     LSHR T49.X, PV.W, literal.x,
-; CM-NEXT:     AND_INT T62.Z, T49.W, literal.y,
-; CM-NEXT:     BFE_UINT * T63.W, T48.Y, literal.z, T0.W,
-; CM-NEXT:    2(2.802597e-45), 65535(9.183409e-41)
-; CM-NEXT:    16(2.242078e-44), 0(0.000000e+00)
+; CM-NEXT:     LSHR T62.Y, T49.Z, literal.y,
+; CM-NEXT:     LSHR * T63.W, T48.Y, literal.y,
+; CM-NEXT:    2(2.802597e-45), 16(2.242078e-44)
 ; CM-NEXT:     AND_INT T62.X, T49.Z, literal.x,
-; CM-NEXT:     BFE_UINT T63.Y, T48.X, literal.y, T0.W,
-; CM-NEXT:     ADD_INT * T1.W, KC0[2].Y, literal.z,
-; CM-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
-; CM-NEXT:    48(6.726233e-44), 0(0.000000e+00)
+; CM-NEXT:     AND_INT T63.Z, T48.Y, literal.x,
+; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
+; CM-NEXT:    65535(9.183409e-41), 48(6.726233e-44)
 ; CM-NEXT:     LSHR T64.X, PV.W, literal.x,
-; CM-NEXT:     AND_INT T63.Z, T48.Y, literal.y,
-; CM-NEXT:     BFE_UINT * T65.W, T48.W, literal.z, T0.W,
-; CM-NEXT:    2(2.802597e-45), 65535(9.183409e-41)
-; CM-NEXT:    16(2.242078e-44), 0(0.000000e+00)
+; CM-NEXT:     LSHR T63.Y, T48.X, literal.y,
+; CM-NEXT:     LSHR * T65.W, T48.W, literal.y,
+; CM-NEXT:    2(2.802597e-45), 16(2.242078e-44)
 ; CM-NEXT:     AND_INT T63.X, T48.X, literal.x,
-; CM-NEXT:     BFE_UINT * T65.Y, T48.Z, literal.y, T0.W,
-; CM-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
+; CM-NEXT:     AND_INT * T65.Z, T48.W, literal.x,
+; CM-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
 ; CM-NEXT:     LSHR T48.X, KC0[2].Y, literal.x,
-; CM-NEXT:     AND_INT * T65.Z, T48.W, literal.y,
-; CM-NEXT:    2(2.802597e-45), 65535(9.183409e-41)
+; CM-NEXT:     LSHR * T65.Y, T48.Z, literal.y,
+; CM-NEXT:    2(2.802597e-45), 16(2.242078e-44)
 ; CM-NEXT:     AND_INT T65.X, T48.Z, literal.x,
 ; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
 ; CM-NEXT:    65535(9.183409e-41), 16(2.242078e-44)


        


More information about the llvm-commits mailing list