[llvm] 80b627d - AMDGPU/GlobalISel: Fix handling of G_ANYEXT with s1 source

Matt Arsenault via llvm-commits llvm-commits at lists.llvm.org
Mon Mar 16 10:02:34 PDT 2020


Author: Matt Arsenault
Date: 2020-03-16T12:59:54-04:00
New Revision: 80b627d69d3457e9b5deac2ba10808f00c96edf6

URL: https://github.com/llvm/llvm-project/commit/80b627d69d3457e9b5deac2ba10808f00c96edf6
DIFF: https://github.com/llvm/llvm-project/commit/80b627d69d3457e9b5deac2ba10808f00c96edf6.diff

LOG: AMDGPU/GlobalISel: Fix handling of G_ANYEXT with s1 source

We were letting G_ANYEXT with a vcc register bank through, which was
incorrect and would select to an invalid copy. Fix this up like G_ZEXT
and G_SEXT. Also drop old code to fixup the non-boolean case in
RegBankSelect. We now have to perform that expansion during selection,
so there's no benefit to doing it during RegBankSelect.

Added: 
    

Modified: 
    llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
    llvm/test/CodeGen/AMDGPU/GlobalISel/mul.ll
    llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-anyext.mir
    llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sext.mir
    llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-zext.mir

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
index da0f6b08264c..e72dc8e20220 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
@@ -220,7 +220,7 @@ unsigned AMDGPURegisterBankInfo::copyCost(const RegisterBank &Dst,
                                           unsigned Size) const {
   // TODO: Should there be a UniformVGPRRegBank which can use readfirstlane?
   if (Dst.getID() == AMDGPU::SGPRRegBankID &&
-      isVectorRegisterBank(Src)) {
+      (isVectorRegisterBank(Src) || Src.getID() == AMDGPU::VCCRegBankID)) {
     return std::numeric_limits<unsigned>::max();
   }
 
@@ -238,9 +238,6 @@ unsigned AMDGPURegisterBankInfo::copyCost(const RegisterBank &Dst,
        Src.getID() == AMDGPU::VCCRegBankID))
     return std::numeric_limits<unsigned>::max();
 
-  if (Src.getID() == AMDGPU::VCCRegBankID)
-    return std::numeric_limits<unsigned>::max();
-
   // There is no direct copy between AGPRs.
   if (Dst.getID() == AMDGPU::AGPRRegBankID &&
       Src.getID() == AMDGPU::AGPRRegBankID)
@@ -2252,10 +2249,13 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
     return;
   }
   case AMDGPU::G_SEXT:
-  case AMDGPU::G_ZEXT: {
+  case AMDGPU::G_ZEXT:
+  case AMDGPU::G_ANYEXT: {
     Register SrcReg = MI.getOperand(1).getReg();
     LLT SrcTy = MRI.getType(SrcReg);
-    bool Signed = Opc == AMDGPU::G_SEXT;
+    const bool Signed = Opc == AMDGPU::G_SEXT;
+
+    assert(empty(OpdMapper.getVRegs(1)));
 
     MachineIRBuilder B(MI);
     const RegisterBank *SrcBank =
@@ -2282,9 +2282,12 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
         auto ShiftAmt = B.buildConstant(S32, 31);
         MRI.setRegBank(ShiftAmt.getReg(0), *SrcBank);
         B.buildAShr(DefRegs[1], DefRegs[0], ShiftAmt);
-      } else {
+      } else if (Opc == AMDGPU::G_ZEXT) {
         B.buildZExtOrTrunc(DefRegs[0], SrcReg);
         B.buildConstant(DefRegs[1], 0);
+      } else {
+        B.buildAnyExtOrTrunc(DefRegs[0], SrcReg);
+        B.buildUndef(DefRegs[1]);
       }
 
       MRI.setRegBank(DstReg, *SrcBank);
@@ -2295,6 +2298,9 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
     if (SrcTy != LLT::scalar(1))
       return;
 
+    // It is not legal to have a legalization artifact with a VCC source. Rather
+    // than introducing a copy, insert the selcet we would have to select the
+    // copy to.
     if (SrcBank == &AMDGPU::VCCRegBank) {
       SmallVector<Register, 2> DefRegs(OpdMapper.getVRegs(0));
 
@@ -2329,24 +2335,7 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
       return;
     }
 
-    // Fixup the case with an s1 src that isn't a condition register. Use shifts
-    // instead of introducing a compare to avoid an unnecessary condition
-    // register (and since there's no scalar 16-bit compares).
-    auto Ext = B.buildAnyExt(DstTy, SrcReg);
-    auto ShiftAmt = B.buildConstant(LLT::scalar(32), DstTy.getSizeInBits() - 1);
-    auto Shl = B.buildShl(DstTy, Ext, ShiftAmt);
-
-    if (MI.getOpcode() == AMDGPU::G_SEXT)
-      B.buildAShr(DstReg, Shl, ShiftAmt);
-    else
-      B.buildLShr(DstReg, Shl, ShiftAmt);
-
-    MRI.setRegBank(DstReg, *SrcBank);
-    MRI.setRegBank(Ext.getReg(0), *SrcBank);
-    MRI.setRegBank(ShiftAmt.getReg(0), *SrcBank);
-    MRI.setRegBank(Shl.getReg(0), *SrcBank);
-    MI.eraseFromParent();
-    return;
+    break;
   }
   case AMDGPU::G_BUILD_VECTOR:
   case AMDGPU::G_BUILD_VECTOR_TRUNC: {
@@ -3423,17 +3412,11 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
       break;
     }
 
-    // TODO: Should anyext be split into 32-bit part as well?
-    if (MI.getOpcode() == AMDGPU::G_ANYEXT) {
-      OpdsMapping[0] = AMDGPU::getValueMapping(DstBank, DstSize);
-      OpdsMapping[1] = AMDGPU::getValueMapping(SrcBank->getID(), SrcSize);
-    } else {
-      // Scalar extend can use 64-bit BFE, but VGPRs require extending to
-      // 32-bits, and then to 64.
-      OpdsMapping[0] = AMDGPU::getValueMappingSGPR64Only(DstBank, DstSize);
-      OpdsMapping[1] = AMDGPU::getValueMappingSGPR64Only(SrcBank->getID(),
-                                                         SrcSize);
-    }
+    // Scalar extend can use 64-bit BFE, but VGPRs require extending to
+    // 32-bits, and then to 64.
+    OpdsMapping[0] = AMDGPU::getValueMappingSGPR64Only(DstBank, DstSize);
+    OpdsMapping[1] = AMDGPU::getValueMappingSGPR64Only(SrcBank->getID(),
+                                                       SrcSize);
     break;
   }
   case AMDGPU::G_FCMP: {

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/mul.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/mul.ll
index 070bfaf8ff03..f34c481824af 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/mul.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/mul.ll
@@ -301,11 +301,10 @@ define amdgpu_ps <3 x i32> @s_mul_i96(i96 inreg %num, i96 inreg %den) {
 ; GFX7-NEXT:    s_mul_i32 s6, s0, s3
 ; GFX7-NEXT:    s_mul_i32 s5, s0, s5
 ; GFX7-NEXT:    s_add_i32 s0, s2, s7
-; GFX7-NEXT:    s_lshl_b32 s8, s8, 31
 ; GFX7-NEXT:    s_add_i32 s0, s0, s5
 ; GFX7-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
 ; GFX7-NEXT:    v_add_i32_e32 v2, vcc, s0, v2
-; GFX7-NEXT:    s_lshr_b32 s8, s8, 31
+; GFX7-NEXT:    s_and_b32 s8, s8, 1
 ; GFX7-NEXT:    v_add_i32_e32 v1, vcc, s8, v1
 ; GFX7-NEXT:    v_add_i32_e32 v2, vcc, v2, v3
 ; GFX7-NEXT:    v_add_i32_e32 v1, vcc, v2, v1
@@ -332,11 +331,10 @@ define amdgpu_ps <3 x i32> @s_mul_i96(i96 inreg %num, i96 inreg %den) {
 ; GFX8-NEXT:    s_mul_i32 s6, s0, s3
 ; GFX8-NEXT:    s_mul_i32 s5, s0, s5
 ; GFX8-NEXT:    s_add_i32 s0, s2, s7
-; GFX8-NEXT:    s_lshl_b32 s8, s8, 31
 ; GFX8-NEXT:    s_add_i32 s0, s0, s5
 ; GFX8-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
 ; GFX8-NEXT:    v_add_u32_e32 v2, vcc, s0, v2
-; GFX8-NEXT:    s_lshr_b32 s8, s8, 31
+; GFX8-NEXT:    s_and_b32 s8, s8, 1
 ; GFX8-NEXT:    v_add_u32_e32 v1, vcc, s8, v1
 ; GFX8-NEXT:    v_add_u32_e32 v2, vcc, v2, v3
 ; GFX8-NEXT:    v_add_u32_e32 v1, vcc, v2, v1
@@ -351,13 +349,11 @@ define amdgpu_ps <3 x i32> @s_mul_i96(i96 inreg %num, i96 inreg %den) {
 ; GFX9-NEXT:    s_mul_i32 s8, s0, s4
 ; GFX9-NEXT:    s_add_u32 s7, s7, s8
 ; GFX9-NEXT:    s_cselect_b32 s8, 1, 0
-; GFX9-NEXT:    s_lshl_b32 s8, s8, 31
 ; GFX9-NEXT:    s_mul_hi_u32 s9, s0, s3
-; GFX9-NEXT:    s_lshr_b32 s8, s8, 31
+; GFX9-NEXT:    s_and_b32 s8, s8, 1
 ; GFX9-NEXT:    s_add_u32 s7, s7, s9
 ; GFX9-NEXT:    s_cselect_b32 s9, 1, 0
-; GFX9-NEXT:    s_lshl_b32 s9, s9, 31
-; GFX9-NEXT:    s_lshr_b32 s9, s9, 31
+; GFX9-NEXT:    s_and_b32 s9, s9, 1
 ; GFX9-NEXT:    s_add_i32 s8, s8, s9
 ; GFX9-NEXT:    s_mul_i32 s9, s1, s4
 ; GFX9-NEXT:    s_mul_i32 s2, s2, s3
@@ -467,27 +463,24 @@ define amdgpu_ps <4 x i32> @s_mul_i128(i128 inreg %num, i128 inreg %den) {
 ; GFX7-NEXT:    s_mul_i32 s10, s0, s5
 ; GFX7-NEXT:    s_add_u32 s9, s9, s10
 ; GFX7-NEXT:    s_cselect_b32 s10, 1, 0
-; GFX7-NEXT:    s_lshl_b32 s10, s10, 31
 ; GFX7-NEXT:    v_add_i32_e32 v0, vcc, s9, v0
-; GFX7-NEXT:    s_lshr_b32 s10, s10, 31
+; GFX7-NEXT:    s_and_b32 s10, s10, 1
 ; GFX7-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
 ; GFX7-NEXT:    v_add_i32_e32 v1, vcc, s10, v1
 ; GFX7-NEXT:    s_mul_i32 s9, s2, s4
 ; GFX7-NEXT:    s_mul_i32 s10, s1, s5
+; GFX7-NEXT:    v_mov_b32_e32 v2, s1
 ; GFX7-NEXT:    s_add_u32 s9, s9, s10
 ; GFX7-NEXT:    s_cselect_b32 s10, 1, 0
-; GFX7-NEXT:    v_mov_b32_e32 v2, s1
-; GFX7-NEXT:    s_lshl_b32 s10, s10, 31
-; GFX7-NEXT:    s_mul_i32 s11, s0, s6
-; GFX7-NEXT:    s_lshr_b32 s10, s10, 31
 ; GFX7-NEXT:    v_mul_hi_u32 v2, v2, s4
+; GFX7-NEXT:    s_mul_i32 s11, s0, s6
+; GFX7-NEXT:    s_and_b32 s10, s10, 1
 ; GFX7-NEXT:    s_add_u32 s9, s9, s11
-; GFX7-NEXT:    s_cselect_b32 s11, 1, 0
 ; GFX7-NEXT:    v_mov_b32_e32 v3, s5
-; GFX7-NEXT:    s_lshl_b32 s11, s11, 31
+; GFX7-NEXT:    s_cselect_b32 s11, 1, 0
 ; GFX7-NEXT:    v_mul_hi_u32 v4, s0, v3
 ; GFX7-NEXT:    v_add_i32_e32 v2, vcc, s9, v2
-; GFX7-NEXT:    s_lshr_b32 s11, s11, 31
+; GFX7-NEXT:    s_and_b32 s11, s11, 1
 ; GFX7-NEXT:    s_add_i32 s10, s10, s11
 ; GFX7-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
 ; GFX7-NEXT:    v_add_i32_e32 v5, vcc, s10, v5
@@ -528,27 +521,24 @@ define amdgpu_ps <4 x i32> @s_mul_i128(i128 inreg %num, i128 inreg %den) {
 ; GFX8-NEXT:    s_mul_i32 s10, s0, s5
 ; GFX8-NEXT:    s_add_u32 s9, s9, s10
 ; GFX8-NEXT:    s_cselect_b32 s10, 1, 0
-; GFX8-NEXT:    s_lshl_b32 s10, s10, 31
 ; GFX8-NEXT:    v_add_u32_e32 v0, vcc, s9, v0
-; GFX8-NEXT:    s_lshr_b32 s10, s10, 31
+; GFX8-NEXT:    s_and_b32 s10, s10, 1
 ; GFX8-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
 ; GFX8-NEXT:    v_add_u32_e32 v1, vcc, s10, v1
 ; GFX8-NEXT:    s_mul_i32 s9, s2, s4
 ; GFX8-NEXT:    s_mul_i32 s10, s1, s5
+; GFX8-NEXT:    v_mov_b32_e32 v2, s1
 ; GFX8-NEXT:    s_add_u32 s9, s9, s10
 ; GFX8-NEXT:    s_cselect_b32 s10, 1, 0
-; GFX8-NEXT:    v_mov_b32_e32 v2, s1
-; GFX8-NEXT:    s_lshl_b32 s10, s10, 31
-; GFX8-NEXT:    s_mul_i32 s11, s0, s6
-; GFX8-NEXT:    s_lshr_b32 s10, s10, 31
 ; GFX8-NEXT:    v_mul_hi_u32 v2, v2, s4
+; GFX8-NEXT:    s_mul_i32 s11, s0, s6
+; GFX8-NEXT:    s_and_b32 s10, s10, 1
 ; GFX8-NEXT:    s_add_u32 s9, s9, s11
-; GFX8-NEXT:    s_cselect_b32 s11, 1, 0
 ; GFX8-NEXT:    v_mov_b32_e32 v3, s5
-; GFX8-NEXT:    s_lshl_b32 s11, s11, 31
+; GFX8-NEXT:    s_cselect_b32 s11, 1, 0
 ; GFX8-NEXT:    v_mul_hi_u32 v4, s0, v3
 ; GFX8-NEXT:    v_add_u32_e32 v2, vcc, s9, v2
-; GFX8-NEXT:    s_lshr_b32 s11, s11, 31
+; GFX8-NEXT:    s_and_b32 s11, s11, 1
 ; GFX8-NEXT:    s_add_i32 s10, s10, s11
 ; GFX8-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
 ; GFX8-NEXT:    v_add_u32_e32 v5, vcc, s10, v5
@@ -587,42 +577,35 @@ define amdgpu_ps <4 x i32> @s_mul_i128(i128 inreg %num, i128 inreg %den) {
 ; GFX9-NEXT:    s_mul_i32 s10, s0, s5
 ; GFX9-NEXT:    s_add_u32 s9, s9, s10
 ; GFX9-NEXT:    s_cselect_b32 s10, 1, 0
-; GFX9-NEXT:    s_lshl_b32 s10, s10, 31
 ; GFX9-NEXT:    s_mul_hi_u32 s11, s0, s4
-; GFX9-NEXT:    s_lshr_b32 s10, s10, 31
+; GFX9-NEXT:    s_and_b32 s10, s10, 1
 ; GFX9-NEXT:    s_add_u32 s9, s9, s11
 ; GFX9-NEXT:    s_cselect_b32 s11, 1, 0
-; GFX9-NEXT:    s_lshl_b32 s11, s11, 31
-; GFX9-NEXT:    s_lshr_b32 s11, s11, 31
+; GFX9-NEXT:    s_and_b32 s11, s11, 1
 ; GFX9-NEXT:    s_add_i32 s10, s10, s11
 ; GFX9-NEXT:    s_mul_i32 s11, s2, s4
 ; GFX9-NEXT:    s_mul_i32 s12, s1, s5
 ; GFX9-NEXT:    s_add_u32 s11, s11, s12
 ; GFX9-NEXT:    s_cselect_b32 s12, 1, 0
-; GFX9-NEXT:    s_lshl_b32 s12, s12, 31
 ; GFX9-NEXT:    s_mul_i32 s13, s0, s6
-; GFX9-NEXT:    s_lshr_b32 s12, s12, 31
+; GFX9-NEXT:    s_and_b32 s12, s12, 1
 ; GFX9-NEXT:    s_add_u32 s11, s11, s13
 ; GFX9-NEXT:    s_cselect_b32 s13, 1, 0
-; GFX9-NEXT:    s_lshl_b32 s13, s13, 31
-; GFX9-NEXT:    s_lshr_b32 s13, s13, 31
+; GFX9-NEXT:    s_and_b32 s13, s13, 1
 ; GFX9-NEXT:    s_mul_hi_u32 s14, s1, s4
 ; GFX9-NEXT:    s_add_i32 s12, s12, s13
 ; GFX9-NEXT:    s_add_u32 s11, s11, s14
 ; GFX9-NEXT:    s_cselect_b32 s13, 1, 0
-; GFX9-NEXT:    s_lshl_b32 s13, s13, 31
-; GFX9-NEXT:    s_lshr_b32 s13, s13, 31
+; GFX9-NEXT:    s_and_b32 s13, s13, 1
 ; GFX9-NEXT:    s_mul_hi_u32 s15, s0, s5
 ; GFX9-NEXT:    s_add_i32 s12, s12, s13
 ; GFX9-NEXT:    s_add_u32 s11, s11, s15
 ; GFX9-NEXT:    s_cselect_b32 s13, 1, 0
-; GFX9-NEXT:    s_lshl_b32 s13, s13, 31
-; GFX9-NEXT:    s_lshr_b32 s13, s13, 31
+; GFX9-NEXT:    s_and_b32 s13, s13, 1
 ; GFX9-NEXT:    s_add_i32 s12, s12, s13
 ; GFX9-NEXT:    s_add_u32 s10, s11, s10
 ; GFX9-NEXT:    s_cselect_b32 s11, 1, 0
-; GFX9-NEXT:    s_lshl_b32 s11, s11, 31
-; GFX9-NEXT:    s_lshr_b32 s11, s11, 31
+; GFX9-NEXT:    s_and_b32 s11, s11, 1
 ; GFX9-NEXT:    s_add_i32 s12, s12, s11
 ; GFX9-NEXT:    s_mul_i32 s11, s2, s5
 ; GFX9-NEXT:    s_mul_i32 s3, s3, s4
@@ -806,148 +789,134 @@ define amdgpu_ps <8 x i32> @s_mul_i256(i256 inreg %num, i256 inreg %den) {
 ; GFX7-NEXT:    s_mul_i32 s18, s0, s9
 ; GFX7-NEXT:    s_add_u32 s17, s17, s18
 ; GFX7-NEXT:    s_cselect_b32 s18, 1, 0
-; GFX7-NEXT:    s_lshl_b32 s18, s18, 31
 ; GFX7-NEXT:    v_add_i32_e32 v0, vcc, s17, v0
-; GFX7-NEXT:    s_lshr_b32 s18, s18, 31
+; GFX7-NEXT:    s_and_b32 s18, s18, 1
 ; GFX7-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
 ; GFX7-NEXT:    v_add_i32_e32 v1, vcc, s18, v1
 ; GFX7-NEXT:    s_mul_i32 s17, s2, s8
 ; GFX7-NEXT:    s_mul_i32 s18, s1, s9
+; GFX7-NEXT:    v_mov_b32_e32 v2, s1
 ; GFX7-NEXT:    s_add_u32 s17, s17, s18
 ; GFX7-NEXT:    s_cselect_b32 s18, 1, 0
-; GFX7-NEXT:    v_mov_b32_e32 v2, s1
-; GFX7-NEXT:    s_lshl_b32 s18, s18, 31
-; GFX7-NEXT:    s_mul_i32 s19, s0, s10
-; GFX7-NEXT:    s_lshr_b32 s18, s18, 31
 ; GFX7-NEXT:    v_mul_hi_u32 v2, v2, s8
+; GFX7-NEXT:    s_mul_i32 s19, s0, s10
+; GFX7-NEXT:    s_and_b32 s18, s18, 1
 ; GFX7-NEXT:    s_add_u32 s17, s17, s19
-; GFX7-NEXT:    s_cselect_b32 s19, 1, 0
-; GFX7-NEXT:    s_lshl_b32 s19, s19, 31
-; GFX7-NEXT:    v_add_i32_e32 v2, vcc, s17, v2
 ; GFX7-NEXT:    v_mov_b32_e32 v3, s9
-; GFX7-NEXT:    s_lshr_b32 s19, s19, 31
+; GFX7-NEXT:    s_cselect_b32 s19, 1, 0
 ; GFX7-NEXT:    v_mul_hi_u32 v4, s0, v3
+; GFX7-NEXT:    v_add_i32_e32 v2, vcc, s17, v2
+; GFX7-NEXT:    s_and_b32 s19, s19, 1
 ; GFX7-NEXT:    s_add_i32 s18, s18, s19
 ; GFX7-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
 ; GFX7-NEXT:    v_add_i32_e32 v5, vcc, s18, v5
+; GFX7-NEXT:    v_add_i32_e32 v2, vcc, v2, v4
 ; GFX7-NEXT:    s_mul_i32 s17, s3, s8
 ; GFX7-NEXT:    s_mul_i32 s18, s2, s9
+; GFX7-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
 ; GFX7-NEXT:    s_add_u32 s17, s17, s18
 ; GFX7-NEXT:    s_cselect_b32 s18, 1, 0
-; GFX7-NEXT:    v_add_i32_e32 v2, vcc, v2, v4
-; GFX7-NEXT:    s_lshl_b32 s18, s18, 31
-; GFX7-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
-; GFX7-NEXT:    s_mul_i32 s19, s1, s10
-; GFX7-NEXT:    s_lshr_b32 s18, s18, 31
-; GFX7-NEXT:    s_add_u32 s17, s17, s19
 ; GFX7-NEXT:    v_add_i32_e32 v4, vcc, v5, v4
-; GFX7-NEXT:    s_cselect_b32 s19, 1, 0
 ; GFX7-NEXT:    v_add_i32_e32 v1, vcc, v2, v1
+; GFX7-NEXT:    s_mul_i32 s19, s1, s10
+; GFX7-NEXT:    s_and_b32 s18, s18, 1
 ; GFX7-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; GFX7-NEXT:    s_lshl_b32 s19, s19, 31
+; GFX7-NEXT:    s_add_u32 s17, s17, s19
+; GFX7-NEXT:    s_cselect_b32 s19, 1, 0
 ; GFX7-NEXT:    v_add_i32_e32 v2, vcc, v4, v2
 ; GFX7-NEXT:    v_mov_b32_e32 v4, s2
-; GFX7-NEXT:    s_lshr_b32 s19, s19, 31
 ; GFX7-NEXT:    v_mul_hi_u32 v5, v4, s8
+; GFX7-NEXT:    s_and_b32 s19, s19, 1
 ; GFX7-NEXT:    s_mul_i32 s20, s0, s11
 ; GFX7-NEXT:    s_add_i32 s18, s18, s19
 ; GFX7-NEXT:    s_add_u32 s17, s17, s20
 ; GFX7-NEXT:    s_cselect_b32 s19, 1, 0
-; GFX7-NEXT:    s_lshl_b32 s19, s19, 31
+; GFX7-NEXT:    v_mul_hi_u32 v3, s1, v3
 ; GFX7-NEXT:    v_add_i32_e32 v5, vcc, s17, v5
-; GFX7-NEXT:    s_lshr_b32 s19, s19, 31
+; GFX7-NEXT:    s_and_b32 s19, s19, 1
+; GFX7-NEXT:    v_mov_b32_e32 v6, s10
 ; GFX7-NEXT:    s_add_i32 s18, s18, s19
 ; GFX7-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
 ; GFX7-NEXT:    v_add_i32_e32 v8, vcc, s18, v8
+; GFX7-NEXT:    v_mul_hi_u32 v7, s0, v6
 ; GFX7-NEXT:    s_mul_i32 s17, s4, s8
 ; GFX7-NEXT:    s_mul_i32 s18, s3, s9
-; GFX7-NEXT:    v_mul_hi_u32 v3, s1, v3
+; GFX7-NEXT:    v_add_i32_e32 v3, vcc, v5, v3
 ; GFX7-NEXT:    s_add_u32 s17, s17, s18
+; GFX7-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
 ; GFX7-NEXT:    s_cselect_b32 s18, 1, 0
-; GFX7-NEXT:    s_lshl_b32 s18, s18, 31
-; GFX7-NEXT:    v_mov_b32_e32 v6, s10
-; GFX7-NEXT:    v_mul_hi_u32 v7, s0, v6
+; GFX7-NEXT:    v_add_i32_e32 v5, vcc, v8, v5
 ; GFX7-NEXT:    s_mul_i32 s19, s2, s10
-; GFX7-NEXT:    s_lshr_b32 s18, s18, 31
-; GFX7-NEXT:    v_add_i32_e32 v3, vcc, v5, v3
+; GFX7-NEXT:    s_and_b32 s18, s18, 1
+; GFX7-NEXT:    v_add_i32_e32 v3, vcc, v3, v7
 ; GFX7-NEXT:    s_add_u32 s17, s17, s19
-; GFX7-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
 ; GFX7-NEXT:    s_cselect_b32 s19, 1, 0
-; GFX7-NEXT:    v_add_i32_e32 v5, vcc, v8, v5
-; GFX7-NEXT:    s_lshl_b32 s19, s19, 31
-; GFX7-NEXT:    v_add_i32_e32 v3, vcc, v3, v7
-; GFX7-NEXT:    s_lshr_b32 s19, s19, 31
 ; GFX7-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
+; GFX7-NEXT:    v_add_i32_e32 v5, vcc, v5, v7
+; GFX7-NEXT:    s_and_b32 s19, s19, 1
+; GFX7-NEXT:    v_add_i32_e32 v2, vcc, v3, v2
 ; GFX7-NEXT:    s_mul_i32 s20, s1, s11
 ; GFX7-NEXT:    s_add_i32 s18, s18, s19
+; GFX7-NEXT:    v_cndmask_b32_e64 v3, 0, 1, vcc
 ; GFX7-NEXT:    s_add_u32 s17, s17, s20
-; GFX7-NEXT:    v_add_i32_e32 v5, vcc, v5, v7
 ; GFX7-NEXT:    s_cselect_b32 s19, 1, 0
-; GFX7-NEXT:    v_add_i32_e32 v2, vcc, v3, v2
-; GFX7-NEXT:    v_cndmask_b32_e64 v3, 0, 1, vcc
-; GFX7-NEXT:    s_lshl_b32 s19, s19, 31
 ; GFX7-NEXT:    v_add_i32_e32 v3, vcc, v5, v3
 ; GFX7-NEXT:    v_mov_b32_e32 v5, s3
-; GFX7-NEXT:    s_lshr_b32 s19, s19, 31
+; GFX7-NEXT:    s_and_b32 s19, s19, 1
+; GFX7-NEXT:    v_mul_hi_u32 v7, v5, s8
 ; GFX7-NEXT:    s_mul_i32 s21, s0, s12
 ; GFX7-NEXT:    s_add_i32 s18, s18, s19
-; GFX7-NEXT:    v_mul_hi_u32 v7, v5, s8
 ; GFX7-NEXT:    s_add_u32 s17, s17, s21
 ; GFX7-NEXT:    s_cselect_b32 s19, 1, 0
-; GFX7-NEXT:    s_lshl_b32 s19, s19, 31
 ; GFX7-NEXT:    v_add_i32_e32 v7, vcc, s17, v7
-; GFX7-NEXT:    s_lshr_b32 s19, s19, 31
+; GFX7-NEXT:    s_and_b32 s19, s19, 1
+; GFX7-NEXT:    v_mul_hi_u32 v4, v4, s9
 ; GFX7-NEXT:    s_add_i32 s18, s18, s19
 ; GFX7-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
 ; GFX7-NEXT:    v_add_i32_e32 v11, vcc, s18, v11
 ; GFX7-NEXT:    s_mul_i32 s17, s5, s8
 ; GFX7-NEXT:    s_mul_i32 s18, s4, s9
 ; GFX7-NEXT:    s_add_u32 s17, s17, s18
-; GFX7-NEXT:    s_cselect_b32 s18, 1, 0
-; GFX7-NEXT:    v_mul_hi_u32 v4, v4, s9
-; GFX7-NEXT:    s_lshl_b32 s18, s18, 31
-; GFX7-NEXT:    s_mul_i32 s19, s3, s10
-; GFX7-NEXT:    s_lshr_b32 s18, s18, 31
-; GFX7-NEXT:    s_add_u32 s17, s17, s19
-; GFX7-NEXT:    s_cselect_b32 s19, 1, 0
 ; GFX7-NEXT:    v_mul_hi_u32 v8, s1, v6
+; GFX7-NEXT:    s_cselect_b32 s18, 1, 0
 ; GFX7-NEXT:    v_add_i32_e32 v4, vcc, v7, v4
-; GFX7-NEXT:    s_lshl_b32 s19, s19, 31
 ; GFX7-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
 ; GFX7-NEXT:    v_mov_b32_e32 v9, s11
-; GFX7-NEXT:    s_lshr_b32 s19, s19, 31
+; GFX7-NEXT:    s_mul_i32 s19, s3, s10
+; GFX7-NEXT:    s_and_b32 s18, s18, 1
 ; GFX7-NEXT:    v_add_i32_e32 v7, vcc, v11, v7
+; GFX7-NEXT:    s_add_u32 s17, s17, s19
 ; GFX7-NEXT:    v_mul_hi_u32 v10, s0, v9
+; GFX7-NEXT:    s_cselect_b32 s19, 1, 0
+; GFX7-NEXT:    v_add_i32_e32 v4, vcc, v4, v8
+; GFX7-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
+; GFX7-NEXT:    s_and_b32 s19, s19, 1
+; GFX7-NEXT:    v_add_i32_e32 v7, vcc, v7, v8
 ; GFX7-NEXT:    s_mul_i32 s20, s2, s11
 ; GFX7-NEXT:    s_add_i32 s18, s18, s19
-; GFX7-NEXT:    v_add_i32_e32 v4, vcc, v4, v8
+; GFX7-NEXT:    v_add_i32_e32 v4, vcc, v4, v10
 ; GFX7-NEXT:    s_add_u32 s17, s17, s20
 ; GFX7-NEXT:    s_cselect_b32 s19, 1, 0
 ; GFX7-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
-; GFX7-NEXT:    s_lshl_b32 s19, s19, 31
 ; GFX7-NEXT:    v_add_i32_e32 v7, vcc, v7, v8
-; GFX7-NEXT:    v_add_i32_e32 v4, vcc, v4, v10
-; GFX7-NEXT:    s_lshr_b32 s19, s19, 31
-; GFX7-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
+; GFX7-NEXT:    s_and_b32 s19, s19, 1
+; GFX7-NEXT:    v_add_i32_e32 v3, vcc, v4, v3
 ; GFX7-NEXT:    s_mul_i32 s21, s1, s12
 ; GFX7-NEXT:    s_add_i32 s18, s18, s19
+; GFX7-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
 ; GFX7-NEXT:    s_add_u32 s17, s17, s21
-; GFX7-NEXT:    v_add_i32_e32 v7, vcc, v7, v8
 ; GFX7-NEXT:    s_cselect_b32 s19, 1, 0
-; GFX7-NEXT:    v_add_i32_e32 v3, vcc, v4, v3
-; GFX7-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
-; GFX7-NEXT:    s_lshl_b32 s19, s19, 31
 ; GFX7-NEXT:    v_add_i32_e32 v4, vcc, v7, v4
 ; GFX7-NEXT:    v_mov_b32_e32 v7, s4
-; GFX7-NEXT:    s_lshr_b32 s19, s19, 31
+; GFX7-NEXT:    s_and_b32 s19, s19, 1
+; GFX7-NEXT:    v_mul_hi_u32 v8, v7, s8
 ; GFX7-NEXT:    s_mul_i32 s22, s0, s13
 ; GFX7-NEXT:    s_add_i32 s18, s18, s19
-; GFX7-NEXT:    v_mul_hi_u32 v8, v7, s8
 ; GFX7-NEXT:    s_add_u32 s17, s17, s22
 ; GFX7-NEXT:    s_cselect_b32 s19, 1, 0
-; GFX7-NEXT:    s_lshl_b32 s19, s19, 31
 ; GFX7-NEXT:    v_add_i32_e32 v8, vcc, s17, v8
-; GFX7-NEXT:    s_lshr_b32 s19, s19, 31
+; GFX7-NEXT:    s_and_b32 s19, s19, 1
+; GFX7-NEXT:    v_mul_hi_u32 v10, v5, s9
 ; GFX7-NEXT:    s_add_i32 s18, s18, s19
 ; GFX7-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
 ; GFX7-NEXT:    v_add_i32_e32 v14, vcc, s18, v14
@@ -955,61 +924,54 @@ define amdgpu_ps <8 x i32> @s_mul_i256(i256 inreg %num, i256 inreg %den) {
 ; GFX7-NEXT:    s_mul_i32 s18, s5, s9
 ; GFX7-NEXT:    s_add_u32 s17, s17, s18
 ; GFX7-NEXT:    s_cselect_b32 s18, 1, 0
-; GFX7-NEXT:    s_lshl_b32 s18, s18, 31
-; GFX7-NEXT:    s_mul_i32 s19, s4, s10
-; GFX7-NEXT:    s_lshr_b32 s18, s18, 31
-; GFX7-NEXT:    v_mul_hi_u32 v10, v5, s9
-; GFX7-NEXT:    s_add_u32 s17, s17, s19
-; GFX7-NEXT:    s_cselect_b32 s19, 1, 0
-; GFX7-NEXT:    s_lshl_b32 s19, s19, 31
-; GFX7-NEXT:    s_lshr_b32 s19, s19, 31
 ; GFX7-NEXT:    v_mul_hi_u32 v6, s2, v6
 ; GFX7-NEXT:    v_add_i32_e32 v8, vcc, v8, v10
-; GFX7-NEXT:    s_mul_i32 s20, s3, s11
-; GFX7-NEXT:    s_add_i32 s18, s18, s19
+; GFX7-NEXT:    s_mul_i32 s19, s4, s10
+; GFX7-NEXT:    s_and_b32 s18, s18, 1
 ; GFX7-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
-; GFX7-NEXT:    s_add_u32 s17, s17, s20
+; GFX7-NEXT:    s_add_u32 s17, s17, s19
 ; GFX7-NEXT:    s_cselect_b32 s19, 1, 0
 ; GFX7-NEXT:    v_add_i32_e32 v10, vcc, v14, v10
 ; GFX7-NEXT:    v_mul_hi_u32 v11, s1, v9
 ; GFX7-NEXT:    v_add_i32_e32 v6, vcc, v8, v6
-; GFX7-NEXT:    s_lshl_b32 s19, s19, 31
+; GFX7-NEXT:    s_and_b32 s19, s19, 1
 ; GFX7-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
 ; GFX7-NEXT:    v_mov_b32_e32 v12, s12
-; GFX7-NEXT:    s_lshr_b32 s19, s19, 31
+; GFX7-NEXT:    s_mul_i32 s20, s3, s11
+; GFX7-NEXT:    s_add_i32 s18, s18, s19
 ; GFX7-NEXT:    v_add_i32_e32 v8, vcc, v10, v8
+; GFX7-NEXT:    s_add_u32 s17, s17, s20
 ; GFX7-NEXT:    v_mul_hi_u32 v13, s0, v12
+; GFX7-NEXT:    s_cselect_b32 s19, 1, 0
+; GFX7-NEXT:    v_add_i32_e32 v6, vcc, v6, v11
+; GFX7-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
+; GFX7-NEXT:    s_and_b32 s19, s19, 1
+; GFX7-NEXT:    v_add_i32_e32 v8, vcc, v8, v10
 ; GFX7-NEXT:    s_mul_i32 s21, s2, s12
 ; GFX7-NEXT:    s_add_i32 s18, s18, s19
-; GFX7-NEXT:    v_add_i32_e32 v6, vcc, v6, v11
+; GFX7-NEXT:    v_add_i32_e32 v6, vcc, v6, v13
 ; GFX7-NEXT:    s_add_u32 s17, s17, s21
 ; GFX7-NEXT:    s_cselect_b32 s19, 1, 0
 ; GFX7-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
-; GFX7-NEXT:    s_lshl_b32 s19, s19, 31
 ; GFX7-NEXT:    v_add_i32_e32 v8, vcc, v8, v10
-; GFX7-NEXT:    v_add_i32_e32 v6, vcc, v6, v13
-; GFX7-NEXT:    s_lshr_b32 s19, s19, 31
-; GFX7-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
+; GFX7-NEXT:    s_and_b32 s19, s19, 1
+; GFX7-NEXT:    v_add_i32_e32 v4, vcc, v6, v4
 ; GFX7-NEXT:    s_mul_i32 s22, s1, s13
 ; GFX7-NEXT:    s_add_i32 s18, s18, s19
-; GFX7-NEXT:    v_add_i32_e32 v8, vcc, v8, v10
+; GFX7-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
 ; GFX7-NEXT:    s_add_u32 s17, s17, s22
 ; GFX7-NEXT:    s_cselect_b32 s19, 1, 0
-; GFX7-NEXT:    v_add_i32_e32 v4, vcc, v6, v4
-; GFX7-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
-; GFX7-NEXT:    s_lshl_b32 s19, s19, 31
 ; GFX7-NEXT:    v_add_i32_e32 v6, vcc, v8, v6
 ; GFX7-NEXT:    v_mov_b32_e32 v8, s5
-; GFX7-NEXT:    s_lshr_b32 s19, s19, 31
 ; GFX7-NEXT:    v_mul_hi_u32 v10, v8, s8
+; GFX7-NEXT:    s_and_b32 s19, s19, 1
 ; GFX7-NEXT:    s_mul_i32 s23, s0, s14
 ; GFX7-NEXT:    s_add_i32 s18, s18, s19
 ; GFX7-NEXT:    s_add_u32 s17, s17, s23
 ; GFX7-NEXT:    s_cselect_b32 s19, 1, 0
-; GFX7-NEXT:    s_lshl_b32 s19, s19, 31
 ; GFX7-NEXT:    v_mul_hi_u32 v11, v7, s9
 ; GFX7-NEXT:    v_add_i32_e32 v10, vcc, s17, v10
-; GFX7-NEXT:    s_lshr_b32 s19, s19, 31
+; GFX7-NEXT:    s_and_b32 s19, s19, 1
 ; GFX7-NEXT:    s_add_i32 s18, s18, s19
 ; GFX7-NEXT:    v_cndmask_b32_e64 v17, 0, 1, vcc
 ; GFX7-NEXT:    v_add_i32_e32 v17, vcc, s18, v17
@@ -1087,148 +1049,134 @@ define amdgpu_ps <8 x i32> @s_mul_i256(i256 inreg %num, i256 inreg %den) {
 ; GFX8-NEXT:    s_mul_i32 s18, s0, s9
 ; GFX8-NEXT:    s_add_u32 s17, s17, s18
 ; GFX8-NEXT:    s_cselect_b32 s18, 1, 0
-; GFX8-NEXT:    s_lshl_b32 s18, s18, 31
 ; GFX8-NEXT:    v_add_u32_e32 v0, vcc, s17, v0
-; GFX8-NEXT:    s_lshr_b32 s18, s18, 31
+; GFX8-NEXT:    s_and_b32 s18, s18, 1
 ; GFX8-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
 ; GFX8-NEXT:    v_add_u32_e32 v1, vcc, s18, v1
 ; GFX8-NEXT:    s_mul_i32 s17, s2, s8
 ; GFX8-NEXT:    s_mul_i32 s18, s1, s9
+; GFX8-NEXT:    v_mov_b32_e32 v2, s1
 ; GFX8-NEXT:    s_add_u32 s17, s17, s18
 ; GFX8-NEXT:    s_cselect_b32 s18, 1, 0
-; GFX8-NEXT:    v_mov_b32_e32 v2, s1
-; GFX8-NEXT:    s_lshl_b32 s18, s18, 31
-; GFX8-NEXT:    s_mul_i32 s19, s0, s10
-; GFX8-NEXT:    s_lshr_b32 s18, s18, 31
 ; GFX8-NEXT:    v_mul_hi_u32 v2, v2, s8
+; GFX8-NEXT:    s_mul_i32 s19, s0, s10
+; GFX8-NEXT:    s_and_b32 s18, s18, 1
 ; GFX8-NEXT:    s_add_u32 s17, s17, s19
-; GFX8-NEXT:    s_cselect_b32 s19, 1, 0
-; GFX8-NEXT:    s_lshl_b32 s19, s19, 31
-; GFX8-NEXT:    v_add_u32_e32 v2, vcc, s17, v2
 ; GFX8-NEXT:    v_mov_b32_e32 v3, s9
-; GFX8-NEXT:    s_lshr_b32 s19, s19, 31
+; GFX8-NEXT:    s_cselect_b32 s19, 1, 0
 ; GFX8-NEXT:    v_mul_hi_u32 v4, s0, v3
+; GFX8-NEXT:    v_add_u32_e32 v2, vcc, s17, v2
+; GFX8-NEXT:    s_and_b32 s19, s19, 1
 ; GFX8-NEXT:    s_add_i32 s18, s18, s19
 ; GFX8-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
 ; GFX8-NEXT:    v_add_u32_e32 v5, vcc, s18, v5
+; GFX8-NEXT:    v_add_u32_e32 v2, vcc, v2, v4
 ; GFX8-NEXT:    s_mul_i32 s17, s3, s8
 ; GFX8-NEXT:    s_mul_i32 s18, s2, s9
+; GFX8-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
 ; GFX8-NEXT:    s_add_u32 s17, s17, s18
 ; GFX8-NEXT:    s_cselect_b32 s18, 1, 0
-; GFX8-NEXT:    v_add_u32_e32 v2, vcc, v2, v4
-; GFX8-NEXT:    s_lshl_b32 s18, s18, 31
-; GFX8-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
-; GFX8-NEXT:    s_mul_i32 s19, s1, s10
-; GFX8-NEXT:    s_lshr_b32 s18, s18, 31
-; GFX8-NEXT:    s_add_u32 s17, s17, s19
 ; GFX8-NEXT:    v_add_u32_e32 v4, vcc, v5, v4
-; GFX8-NEXT:    s_cselect_b32 s19, 1, 0
 ; GFX8-NEXT:    v_add_u32_e32 v1, vcc, v2, v1
+; GFX8-NEXT:    s_mul_i32 s19, s1, s10
+; GFX8-NEXT:    s_and_b32 s18, s18, 1
 ; GFX8-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; GFX8-NEXT:    s_lshl_b32 s19, s19, 31
+; GFX8-NEXT:    s_add_u32 s17, s17, s19
+; GFX8-NEXT:    s_cselect_b32 s19, 1, 0
 ; GFX8-NEXT:    v_add_u32_e32 v2, vcc, v4, v2
 ; GFX8-NEXT:    v_mov_b32_e32 v4, s2
-; GFX8-NEXT:    s_lshr_b32 s19, s19, 31
 ; GFX8-NEXT:    v_mul_hi_u32 v5, v4, s8
+; GFX8-NEXT:    s_and_b32 s19, s19, 1
 ; GFX8-NEXT:    s_mul_i32 s20, s0, s11
 ; GFX8-NEXT:    s_add_i32 s18, s18, s19
 ; GFX8-NEXT:    s_add_u32 s17, s17, s20
 ; GFX8-NEXT:    s_cselect_b32 s19, 1, 0
-; GFX8-NEXT:    s_lshl_b32 s19, s19, 31
+; GFX8-NEXT:    v_mul_hi_u32 v3, s1, v3
 ; GFX8-NEXT:    v_add_u32_e32 v5, vcc, s17, v5
-; GFX8-NEXT:    s_lshr_b32 s19, s19, 31
+; GFX8-NEXT:    s_and_b32 s19, s19, 1
+; GFX8-NEXT:    v_mov_b32_e32 v6, s10
 ; GFX8-NEXT:    s_add_i32 s18, s18, s19
 ; GFX8-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
 ; GFX8-NEXT:    v_add_u32_e32 v8, vcc, s18, v8
+; GFX8-NEXT:    v_mul_hi_u32 v7, s0, v6
 ; GFX8-NEXT:    s_mul_i32 s17, s4, s8
 ; GFX8-NEXT:    s_mul_i32 s18, s3, s9
-; GFX8-NEXT:    v_mul_hi_u32 v3, s1, v3
+; GFX8-NEXT:    v_add_u32_e32 v3, vcc, v5, v3
 ; GFX8-NEXT:    s_add_u32 s17, s17, s18
+; GFX8-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
 ; GFX8-NEXT:    s_cselect_b32 s18, 1, 0
-; GFX8-NEXT:    s_lshl_b32 s18, s18, 31
-; GFX8-NEXT:    v_mov_b32_e32 v6, s10
-; GFX8-NEXT:    v_mul_hi_u32 v7, s0, v6
+; GFX8-NEXT:    v_add_u32_e32 v5, vcc, v8, v5
 ; GFX8-NEXT:    s_mul_i32 s19, s2, s10
-; GFX8-NEXT:    s_lshr_b32 s18, s18, 31
-; GFX8-NEXT:    v_add_u32_e32 v3, vcc, v5, v3
+; GFX8-NEXT:    s_and_b32 s18, s18, 1
+; GFX8-NEXT:    v_add_u32_e32 v3, vcc, v3, v7
 ; GFX8-NEXT:    s_add_u32 s17, s17, s19
-; GFX8-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
 ; GFX8-NEXT:    s_cselect_b32 s19, 1, 0
-; GFX8-NEXT:    v_add_u32_e32 v5, vcc, v8, v5
-; GFX8-NEXT:    s_lshl_b32 s19, s19, 31
-; GFX8-NEXT:    v_add_u32_e32 v3, vcc, v3, v7
-; GFX8-NEXT:    s_lshr_b32 s19, s19, 31
 ; GFX8-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
+; GFX8-NEXT:    v_add_u32_e32 v5, vcc, v5, v7
+; GFX8-NEXT:    s_and_b32 s19, s19, 1
+; GFX8-NEXT:    v_add_u32_e32 v2, vcc, v3, v2
 ; GFX8-NEXT:    s_mul_i32 s20, s1, s11
 ; GFX8-NEXT:    s_add_i32 s18, s18, s19
+; GFX8-NEXT:    v_cndmask_b32_e64 v3, 0, 1, vcc
 ; GFX8-NEXT:    s_add_u32 s17, s17, s20
-; GFX8-NEXT:    v_add_u32_e32 v5, vcc, v5, v7
 ; GFX8-NEXT:    s_cselect_b32 s19, 1, 0
-; GFX8-NEXT:    v_add_u32_e32 v2, vcc, v3, v2
-; GFX8-NEXT:    v_cndmask_b32_e64 v3, 0, 1, vcc
-; GFX8-NEXT:    s_lshl_b32 s19, s19, 31
 ; GFX8-NEXT:    v_add_u32_e32 v3, vcc, v5, v3
 ; GFX8-NEXT:    v_mov_b32_e32 v5, s3
-; GFX8-NEXT:    s_lshr_b32 s19, s19, 31
+; GFX8-NEXT:    s_and_b32 s19, s19, 1
+; GFX8-NEXT:    v_mul_hi_u32 v7, v5, s8
 ; GFX8-NEXT:    s_mul_i32 s21, s0, s12
 ; GFX8-NEXT:    s_add_i32 s18, s18, s19
-; GFX8-NEXT:    v_mul_hi_u32 v7, v5, s8
 ; GFX8-NEXT:    s_add_u32 s17, s17, s21
 ; GFX8-NEXT:    s_cselect_b32 s19, 1, 0
-; GFX8-NEXT:    s_lshl_b32 s19, s19, 31
 ; GFX8-NEXT:    v_add_u32_e32 v7, vcc, s17, v7
-; GFX8-NEXT:    s_lshr_b32 s19, s19, 31
+; GFX8-NEXT:    s_and_b32 s19, s19, 1
+; GFX8-NEXT:    v_mul_hi_u32 v4, v4, s9
 ; GFX8-NEXT:    s_add_i32 s18, s18, s19
 ; GFX8-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
 ; GFX8-NEXT:    v_add_u32_e32 v11, vcc, s18, v11
 ; GFX8-NEXT:    s_mul_i32 s17, s5, s8
 ; GFX8-NEXT:    s_mul_i32 s18, s4, s9
 ; GFX8-NEXT:    s_add_u32 s17, s17, s18
-; GFX8-NEXT:    s_cselect_b32 s18, 1, 0
-; GFX8-NEXT:    v_mul_hi_u32 v4, v4, s9
-; GFX8-NEXT:    s_lshl_b32 s18, s18, 31
-; GFX8-NEXT:    s_mul_i32 s19, s3, s10
-; GFX8-NEXT:    s_lshr_b32 s18, s18, 31
-; GFX8-NEXT:    s_add_u32 s17, s17, s19
-; GFX8-NEXT:    s_cselect_b32 s19, 1, 0
 ; GFX8-NEXT:    v_mul_hi_u32 v8, s1, v6
+; GFX8-NEXT:    s_cselect_b32 s18, 1, 0
 ; GFX8-NEXT:    v_add_u32_e32 v4, vcc, v7, v4
-; GFX8-NEXT:    s_lshl_b32 s19, s19, 31
 ; GFX8-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
 ; GFX8-NEXT:    v_mov_b32_e32 v9, s11
-; GFX8-NEXT:    s_lshr_b32 s19, s19, 31
+; GFX8-NEXT:    s_mul_i32 s19, s3, s10
+; GFX8-NEXT:    s_and_b32 s18, s18, 1
 ; GFX8-NEXT:    v_add_u32_e32 v7, vcc, v11, v7
+; GFX8-NEXT:    s_add_u32 s17, s17, s19
 ; GFX8-NEXT:    v_mul_hi_u32 v10, s0, v9
+; GFX8-NEXT:    s_cselect_b32 s19, 1, 0
+; GFX8-NEXT:    v_add_u32_e32 v4, vcc, v4, v8
+; GFX8-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
+; GFX8-NEXT:    s_and_b32 s19, s19, 1
+; GFX8-NEXT:    v_add_u32_e32 v7, vcc, v7, v8
 ; GFX8-NEXT:    s_mul_i32 s20, s2, s11
 ; GFX8-NEXT:    s_add_i32 s18, s18, s19
-; GFX8-NEXT:    v_add_u32_e32 v4, vcc, v4, v8
+; GFX8-NEXT:    v_add_u32_e32 v4, vcc, v4, v10
 ; GFX8-NEXT:    s_add_u32 s17, s17, s20
 ; GFX8-NEXT:    s_cselect_b32 s19, 1, 0
 ; GFX8-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
-; GFX8-NEXT:    s_lshl_b32 s19, s19, 31
 ; GFX8-NEXT:    v_add_u32_e32 v7, vcc, v7, v8
-; GFX8-NEXT:    v_add_u32_e32 v4, vcc, v4, v10
-; GFX8-NEXT:    s_lshr_b32 s19, s19, 31
-; GFX8-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
+; GFX8-NEXT:    s_and_b32 s19, s19, 1
+; GFX8-NEXT:    v_add_u32_e32 v3, vcc, v4, v3
 ; GFX8-NEXT:    s_mul_i32 s21, s1, s12
 ; GFX8-NEXT:    s_add_i32 s18, s18, s19
+; GFX8-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
 ; GFX8-NEXT:    s_add_u32 s17, s17, s21
-; GFX8-NEXT:    v_add_u32_e32 v7, vcc, v7, v8
 ; GFX8-NEXT:    s_cselect_b32 s19, 1, 0
-; GFX8-NEXT:    v_add_u32_e32 v3, vcc, v4, v3
-; GFX8-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
-; GFX8-NEXT:    s_lshl_b32 s19, s19, 31
 ; GFX8-NEXT:    v_add_u32_e32 v4, vcc, v7, v4
 ; GFX8-NEXT:    v_mov_b32_e32 v7, s4
-; GFX8-NEXT:    s_lshr_b32 s19, s19, 31
+; GFX8-NEXT:    s_and_b32 s19, s19, 1
+; GFX8-NEXT:    v_mul_hi_u32 v8, v7, s8
 ; GFX8-NEXT:    s_mul_i32 s22, s0, s13
 ; GFX8-NEXT:    s_add_i32 s18, s18, s19
-; GFX8-NEXT:    v_mul_hi_u32 v8, v7, s8
 ; GFX8-NEXT:    s_add_u32 s17, s17, s22
 ; GFX8-NEXT:    s_cselect_b32 s19, 1, 0
-; GFX8-NEXT:    s_lshl_b32 s19, s19, 31
 ; GFX8-NEXT:    v_add_u32_e32 v8, vcc, s17, v8
-; GFX8-NEXT:    s_lshr_b32 s19, s19, 31
+; GFX8-NEXT:    s_and_b32 s19, s19, 1
+; GFX8-NEXT:    v_mul_hi_u32 v10, v5, s9
 ; GFX8-NEXT:    s_add_i32 s18, s18, s19
 ; GFX8-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
 ; GFX8-NEXT:    v_add_u32_e32 v14, vcc, s18, v14
@@ -1236,61 +1184,54 @@ define amdgpu_ps <8 x i32> @s_mul_i256(i256 inreg %num, i256 inreg %den) {
 ; GFX8-NEXT:    s_mul_i32 s18, s5, s9
 ; GFX8-NEXT:    s_add_u32 s17, s17, s18
 ; GFX8-NEXT:    s_cselect_b32 s18, 1, 0
-; GFX8-NEXT:    s_lshl_b32 s18, s18, 31
-; GFX8-NEXT:    s_mul_i32 s19, s4, s10
-; GFX8-NEXT:    s_lshr_b32 s18, s18, 31
-; GFX8-NEXT:    v_mul_hi_u32 v10, v5, s9
-; GFX8-NEXT:    s_add_u32 s17, s17, s19
-; GFX8-NEXT:    s_cselect_b32 s19, 1, 0
-; GFX8-NEXT:    s_lshl_b32 s19, s19, 31
-; GFX8-NEXT:    s_lshr_b32 s19, s19, 31
 ; GFX8-NEXT:    v_mul_hi_u32 v6, s2, v6
 ; GFX8-NEXT:    v_add_u32_e32 v8, vcc, v8, v10
-; GFX8-NEXT:    s_mul_i32 s20, s3, s11
-; GFX8-NEXT:    s_add_i32 s18, s18, s19
+; GFX8-NEXT:    s_mul_i32 s19, s4, s10
+; GFX8-NEXT:    s_and_b32 s18, s18, 1
 ; GFX8-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
-; GFX8-NEXT:    s_add_u32 s17, s17, s20
+; GFX8-NEXT:    s_add_u32 s17, s17, s19
 ; GFX8-NEXT:    s_cselect_b32 s19, 1, 0
 ; GFX8-NEXT:    v_add_u32_e32 v10, vcc, v14, v10
 ; GFX8-NEXT:    v_mul_hi_u32 v11, s1, v9
 ; GFX8-NEXT:    v_add_u32_e32 v6, vcc, v8, v6
-; GFX8-NEXT:    s_lshl_b32 s19, s19, 31
+; GFX8-NEXT:    s_and_b32 s19, s19, 1
 ; GFX8-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
 ; GFX8-NEXT:    v_mov_b32_e32 v12, s12
-; GFX8-NEXT:    s_lshr_b32 s19, s19, 31
+; GFX8-NEXT:    s_mul_i32 s20, s3, s11
+; GFX8-NEXT:    s_add_i32 s18, s18, s19
 ; GFX8-NEXT:    v_add_u32_e32 v8, vcc, v10, v8
+; GFX8-NEXT:    s_add_u32 s17, s17, s20
 ; GFX8-NEXT:    v_mul_hi_u32 v13, s0, v12
+; GFX8-NEXT:    s_cselect_b32 s19, 1, 0
+; GFX8-NEXT:    v_add_u32_e32 v6, vcc, v6, v11
+; GFX8-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
+; GFX8-NEXT:    s_and_b32 s19, s19, 1
+; GFX8-NEXT:    v_add_u32_e32 v8, vcc, v8, v10
 ; GFX8-NEXT:    s_mul_i32 s21, s2, s12
 ; GFX8-NEXT:    s_add_i32 s18, s18, s19
-; GFX8-NEXT:    v_add_u32_e32 v6, vcc, v6, v11
+; GFX8-NEXT:    v_add_u32_e32 v6, vcc, v6, v13
 ; GFX8-NEXT:    s_add_u32 s17, s17, s21
 ; GFX8-NEXT:    s_cselect_b32 s19, 1, 0
 ; GFX8-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
-; GFX8-NEXT:    s_lshl_b32 s19, s19, 31
 ; GFX8-NEXT:    v_add_u32_e32 v8, vcc, v8, v10
-; GFX8-NEXT:    v_add_u32_e32 v6, vcc, v6, v13
-; GFX8-NEXT:    s_lshr_b32 s19, s19, 31
-; GFX8-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
+; GFX8-NEXT:    s_and_b32 s19, s19, 1
+; GFX8-NEXT:    v_add_u32_e32 v4, vcc, v6, v4
 ; GFX8-NEXT:    s_mul_i32 s22, s1, s13
 ; GFX8-NEXT:    s_add_i32 s18, s18, s19
-; GFX8-NEXT:    v_add_u32_e32 v8, vcc, v8, v10
+; GFX8-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
 ; GFX8-NEXT:    s_add_u32 s17, s17, s22
 ; GFX8-NEXT:    s_cselect_b32 s19, 1, 0
-; GFX8-NEXT:    v_add_u32_e32 v4, vcc, v6, v4
-; GFX8-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
-; GFX8-NEXT:    s_lshl_b32 s19, s19, 31
 ; GFX8-NEXT:    v_add_u32_e32 v6, vcc, v8, v6
 ; GFX8-NEXT:    v_mov_b32_e32 v8, s5
-; GFX8-NEXT:    s_lshr_b32 s19, s19, 31
 ; GFX8-NEXT:    v_mul_hi_u32 v10, v8, s8
+; GFX8-NEXT:    s_and_b32 s19, s19, 1
 ; GFX8-NEXT:    s_mul_i32 s23, s0, s14
 ; GFX8-NEXT:    s_add_i32 s18, s18, s19
 ; GFX8-NEXT:    s_add_u32 s17, s17, s23
 ; GFX8-NEXT:    s_cselect_b32 s19, 1, 0
-; GFX8-NEXT:    s_lshl_b32 s19, s19, 31
 ; GFX8-NEXT:    v_mul_hi_u32 v11, v7, s9
 ; GFX8-NEXT:    v_add_u32_e32 v10, vcc, s17, v10
-; GFX8-NEXT:    s_lshr_b32 s19, s19, 31
+; GFX8-NEXT:    s_and_b32 s19, s19, 1
 ; GFX8-NEXT:    s_add_i32 s18, s18, s19
 ; GFX8-NEXT:    v_cndmask_b32_e64 v17, 0, 1, vcc
 ; GFX8-NEXT:    v_add_u32_e32 v17, vcc, s18, v17
@@ -1362,283 +1303,235 @@ define amdgpu_ps <8 x i32> @s_mul_i256(i256 inreg %num, i256 inreg %den) {
 ;
 ; GFX9-LABEL: s_mul_i256:
 ; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_mov_b32 s16, s0
 ; GFX9-NEXT:    s_mul_i32 s17, s1, s8
-; GFX9-NEXT:    s_mul_i32 s18, s16, s9
+; GFX9-NEXT:    s_mul_i32 s18, s0, s9
 ; GFX9-NEXT:    s_add_u32 s17, s17, s18
 ; GFX9-NEXT:    s_cselect_b32 s18, 1, 0
-; GFX9-NEXT:    s_lshl_b32 s18, s18, 31
-; GFX9-NEXT:    s_mul_hi_u32 s19, s16, s8
-; GFX9-NEXT:    s_lshr_b32 s18, s18, 31
+; GFX9-NEXT:    s_mul_hi_u32 s19, s0, s8
+; GFX9-NEXT:    s_and_b32 s18, s18, 1
 ; GFX9-NEXT:    s_add_u32 s17, s17, s19
 ; GFX9-NEXT:    s_cselect_b32 s19, 1, 0
-; GFX9-NEXT:    s_lshl_b32 s19, s19, 31
-; GFX9-NEXT:    s_lshr_b32 s19, s19, 31
+; GFX9-NEXT:    s_and_b32 s19, s19, 1
 ; GFX9-NEXT:    s_add_i32 s18, s18, s19
 ; GFX9-NEXT:    s_mul_i32 s19, s2, s8
 ; GFX9-NEXT:    s_mul_i32 s20, s1, s9
 ; GFX9-NEXT:    s_add_u32 s19, s19, s20
 ; GFX9-NEXT:    s_cselect_b32 s20, 1, 0
-; GFX9-NEXT:    s_lshl_b32 s20, s20, 31
-; GFX9-NEXT:    s_mul_i32 s21, s16, s10
-; GFX9-NEXT:    s_lshr_b32 s20, s20, 31
+; GFX9-NEXT:    s_mul_i32 s21, s0, s10
+; GFX9-NEXT:    s_and_b32 s20, s20, 1
 ; GFX9-NEXT:    s_add_u32 s19, s19, s21
 ; GFX9-NEXT:    s_cselect_b32 s21, 1, 0
-; GFX9-NEXT:    s_lshl_b32 s21, s21, 31
-; GFX9-NEXT:    s_lshr_b32 s21, s21, 31
+; GFX9-NEXT:    s_and_b32 s21, s21, 1
 ; GFX9-NEXT:    s_mul_hi_u32 s22, s1, s8
 ; GFX9-NEXT:    s_add_i32 s20, s20, s21
 ; GFX9-NEXT:    s_add_u32 s19, s19, s22
 ; GFX9-NEXT:    s_cselect_b32 s21, 1, 0
-; GFX9-NEXT:    s_lshl_b32 s21, s21, 31
-; GFX9-NEXT:    s_lshr_b32 s21, s21, 31
-; GFX9-NEXT:    s_mul_hi_u32 s23, s16, s9
+; GFX9-NEXT:    s_and_b32 s21, s21, 1
+; GFX9-NEXT:    s_mul_hi_u32 s23, s0, s9
 ; GFX9-NEXT:    s_add_i32 s20, s20, s21
 ; GFX9-NEXT:    s_add_u32 s19, s19, s23
 ; GFX9-NEXT:    s_cselect_b32 s21, 1, 0
-; GFX9-NEXT:    s_lshl_b32 s21, s21, 31
-; GFX9-NEXT:    s_lshr_b32 s21, s21, 31
+; GFX9-NEXT:    s_and_b32 s21, s21, 1
 ; GFX9-NEXT:    s_add_i32 s20, s20, s21
 ; GFX9-NEXT:    s_add_u32 s18, s19, s18
 ; GFX9-NEXT:    s_cselect_b32 s19, 1, 0
-; GFX9-NEXT:    s_lshl_b32 s19, s19, 31
-; GFX9-NEXT:    s_lshr_b32 s19, s19, 31
+; GFX9-NEXT:    s_and_b32 s19, s19, 1
 ; GFX9-NEXT:    s_add_i32 s20, s20, s19
 ; GFX9-NEXT:    s_mul_i32 s19, s3, s8
 ; GFX9-NEXT:    s_mul_i32 s21, s2, s9
 ; GFX9-NEXT:    s_add_u32 s19, s19, s21
 ; GFX9-NEXT:    s_cselect_b32 s21, 1, 0
-; GFX9-NEXT:    s_lshl_b32 s21, s21, 31
 ; GFX9-NEXT:    s_mul_i32 s22, s1, s10
-; GFX9-NEXT:    s_lshr_b32 s21, s21, 31
+; GFX9-NEXT:    s_and_b32 s21, s21, 1
 ; GFX9-NEXT:    s_add_u32 s19, s19, s22
 ; GFX9-NEXT:    s_cselect_b32 s22, 1, 0
-; GFX9-NEXT:    s_lshl_b32 s22, s22, 31
-; GFX9-NEXT:    s_lshr_b32 s22, s22, 31
-; GFX9-NEXT:    s_mul_i32 s23, s16, s11
+; GFX9-NEXT:    s_and_b32 s22, s22, 1
+; GFX9-NEXT:    s_mul_i32 s23, s0, s11
 ; GFX9-NEXT:    s_add_i32 s21, s21, s22
 ; GFX9-NEXT:    s_add_u32 s19, s19, s23
 ; GFX9-NEXT:    s_cselect_b32 s22, 1, 0
-; GFX9-NEXT:    s_lshl_b32 s22, s22, 31
-; GFX9-NEXT:    s_lshr_b32 s22, s22, 31
+; GFX9-NEXT:    s_and_b32 s22, s22, 1
 ; GFX9-NEXT:    s_mul_hi_u32 s24, s2, s8
 ; GFX9-NEXT:    s_add_i32 s21, s21, s22
 ; GFX9-NEXT:    s_add_u32 s19, s19, s24
 ; GFX9-NEXT:    s_cselect_b32 s22, 1, 0
-; GFX9-NEXT:    s_lshl_b32 s22, s22, 31
-; GFX9-NEXT:    s_lshr_b32 s22, s22, 31
+; GFX9-NEXT:    s_and_b32 s22, s22, 1
 ; GFX9-NEXT:    s_mul_hi_u32 s25, s1, s9
 ; GFX9-NEXT:    s_add_i32 s21, s21, s22
 ; GFX9-NEXT:    s_add_u32 s19, s19, s25
 ; GFX9-NEXT:    s_cselect_b32 s22, 1, 0
-; GFX9-NEXT:    s_lshl_b32 s22, s22, 31
-; GFX9-NEXT:    s_lshr_b32 s22, s22, 31
-; GFX9-NEXT:    s_mul_hi_u32 s26, s16, s10
+; GFX9-NEXT:    s_and_b32 s22, s22, 1
+; GFX9-NEXT:    s_mul_hi_u32 s26, s0, s10
 ; GFX9-NEXT:    s_add_i32 s21, s21, s22
 ; GFX9-NEXT:    s_add_u32 s19, s19, s26
 ; GFX9-NEXT:    s_cselect_b32 s22, 1, 0
-; GFX9-NEXT:    s_lshl_b32 s22, s22, 31
-; GFX9-NEXT:    s_lshr_b32 s22, s22, 31
+; GFX9-NEXT:    s_and_b32 s22, s22, 1
 ; GFX9-NEXT:    s_add_i32 s21, s21, s22
 ; GFX9-NEXT:    s_add_u32 s19, s19, s20
 ; GFX9-NEXT:    s_cselect_b32 s20, 1, 0
-; GFX9-NEXT:    s_lshl_b32 s20, s20, 31
-; GFX9-NEXT:    s_lshr_b32 s20, s20, 31
+; GFX9-NEXT:    s_and_b32 s20, s20, 1
 ; GFX9-NEXT:    s_add_i32 s21, s21, s20
 ; GFX9-NEXT:    s_mul_i32 s20, s4, s8
 ; GFX9-NEXT:    s_mul_i32 s22, s3, s9
 ; GFX9-NEXT:    s_add_u32 s20, s20, s22
 ; GFX9-NEXT:    s_cselect_b32 s22, 1, 0
-; GFX9-NEXT:    s_lshl_b32 s22, s22, 31
 ; GFX9-NEXT:    s_mul_i32 s23, s2, s10
-; GFX9-NEXT:    s_lshr_b32 s22, s22, 31
+; GFX9-NEXT:    s_and_b32 s22, s22, 1
 ; GFX9-NEXT:    s_add_u32 s20, s20, s23
 ; GFX9-NEXT:    s_cselect_b32 s23, 1, 0
-; GFX9-NEXT:    s_lshl_b32 s23, s23, 31
-; GFX9-NEXT:    s_lshr_b32 s23, s23, 31
+; GFX9-NEXT:    s_and_b32 s23, s23, 1
 ; GFX9-NEXT:    s_mul_i32 s24, s1, s11
 ; GFX9-NEXT:    s_add_i32 s22, s22, s23
 ; GFX9-NEXT:    s_add_u32 s20, s20, s24
 ; GFX9-NEXT:    s_cselect_b32 s23, 1, 0
-; GFX9-NEXT:    s_lshl_b32 s23, s23, 31
-; GFX9-NEXT:    s_lshr_b32 s23, s23, 31
-; GFX9-NEXT:    s_mul_i32 s25, s16, s12
+; GFX9-NEXT:    s_and_b32 s23, s23, 1
+; GFX9-NEXT:    s_mul_i32 s25, s0, s12
 ; GFX9-NEXT:    s_add_i32 s22, s22, s23
 ; GFX9-NEXT:    s_add_u32 s20, s20, s25
 ; GFX9-NEXT:    s_cselect_b32 s23, 1, 0
-; GFX9-NEXT:    s_lshl_b32 s23, s23, 31
-; GFX9-NEXT:    s_lshr_b32 s23, s23, 31
+; GFX9-NEXT:    s_and_b32 s23, s23, 1
 ; GFX9-NEXT:    s_mul_hi_u32 s26, s3, s8
 ; GFX9-NEXT:    s_add_i32 s22, s22, s23
 ; GFX9-NEXT:    s_add_u32 s20, s20, s26
 ; GFX9-NEXT:    s_cselect_b32 s23, 1, 0
-; GFX9-NEXT:    s_lshl_b32 s23, s23, 31
-; GFX9-NEXT:    s_lshr_b32 s23, s23, 31
+; GFX9-NEXT:    s_and_b32 s23, s23, 1
 ; GFX9-NEXT:    s_mul_hi_u32 s27, s2, s9
 ; GFX9-NEXT:    s_add_i32 s22, s22, s23
 ; GFX9-NEXT:    s_add_u32 s20, s20, s27
 ; GFX9-NEXT:    s_cselect_b32 s23, 1, 0
-; GFX9-NEXT:    s_lshl_b32 s23, s23, 31
-; GFX9-NEXT:    s_lshr_b32 s23, s23, 31
+; GFX9-NEXT:    s_and_b32 s23, s23, 1
 ; GFX9-NEXT:    s_mul_hi_u32 s28, s1, s10
 ; GFX9-NEXT:    s_add_i32 s22, s22, s23
 ; GFX9-NEXT:    s_add_u32 s20, s20, s28
 ; GFX9-NEXT:    s_cselect_b32 s23, 1, 0
-; GFX9-NEXT:    s_lshl_b32 s23, s23, 31
-; GFX9-NEXT:    s_lshr_b32 s23, s23, 31
-; GFX9-NEXT:    s_mul_hi_u32 s29, s16, s11
+; GFX9-NEXT:    s_and_b32 s23, s23, 1
+; GFX9-NEXT:    s_mul_hi_u32 s29, s0, s11
 ; GFX9-NEXT:    s_add_i32 s22, s22, s23
 ; GFX9-NEXT:    s_add_u32 s20, s20, s29
 ; GFX9-NEXT:    s_cselect_b32 s23, 1, 0
-; GFX9-NEXT:    s_lshl_b32 s23, s23, 31
-; GFX9-NEXT:    s_lshr_b32 s23, s23, 31
+; GFX9-NEXT:    s_and_b32 s23, s23, 1
 ; GFX9-NEXT:    s_add_i32 s22, s22, s23
 ; GFX9-NEXT:    s_add_u32 s20, s20, s21
 ; GFX9-NEXT:    s_cselect_b32 s21, 1, 0
-; GFX9-NEXT:    s_lshl_b32 s21, s21, 31
-; GFX9-NEXT:    s_lshr_b32 s21, s21, 31
+; GFX9-NEXT:    s_and_b32 s21, s21, 1
 ; GFX9-NEXT:    s_add_i32 s22, s22, s21
 ; GFX9-NEXT:    s_mul_i32 s21, s5, s8
 ; GFX9-NEXT:    s_mul_i32 s23, s4, s9
 ; GFX9-NEXT:    s_add_u32 s21, s21, s23
 ; GFX9-NEXT:    s_cselect_b32 s23, 1, 0
-; GFX9-NEXT:    s_lshl_b32 s23, s23, 31
 ; GFX9-NEXT:    s_mul_i32 s24, s3, s10
-; GFX9-NEXT:    s_lshr_b32 s23, s23, 31
+; GFX9-NEXT:    s_and_b32 s23, s23, 1
 ; GFX9-NEXT:    s_add_u32 s21, s21, s24
 ; GFX9-NEXT:    s_cselect_b32 s24, 1, 0
-; GFX9-NEXT:    s_lshl_b32 s24, s24, 31
-; GFX9-NEXT:    s_lshr_b32 s24, s24, 31
+; GFX9-NEXT:    s_and_b32 s24, s24, 1
 ; GFX9-NEXT:    s_mul_i32 s25, s2, s11
 ; GFX9-NEXT:    s_add_i32 s23, s23, s24
 ; GFX9-NEXT:    s_add_u32 s21, s21, s25
 ; GFX9-NEXT:    s_cselect_b32 s24, 1, 0
-; GFX9-NEXT:    s_lshl_b32 s24, s24, 31
-; GFX9-NEXT:    s_lshr_b32 s24, s24, 31
+; GFX9-NEXT:    s_and_b32 s24, s24, 1
 ; GFX9-NEXT:    s_mul_i32 s26, s1, s12
 ; GFX9-NEXT:    s_add_i32 s23, s23, s24
 ; GFX9-NEXT:    s_add_u32 s21, s21, s26
 ; GFX9-NEXT:    s_cselect_b32 s24, 1, 0
-; GFX9-NEXT:    s_lshl_b32 s24, s24, 31
-; GFX9-NEXT:    s_lshr_b32 s24, s24, 31
-; GFX9-NEXT:    s_mul_i32 s27, s16, s13
+; GFX9-NEXT:    s_and_b32 s24, s24, 1
+; GFX9-NEXT:    s_mul_i32 s27, s0, s13
 ; GFX9-NEXT:    s_add_i32 s23, s23, s24
 ; GFX9-NEXT:    s_add_u32 s21, s21, s27
 ; GFX9-NEXT:    s_cselect_b32 s24, 1, 0
-; GFX9-NEXT:    s_lshl_b32 s24, s24, 31
-; GFX9-NEXT:    s_lshr_b32 s24, s24, 31
+; GFX9-NEXT:    s_and_b32 s24, s24, 1
 ; GFX9-NEXT:    s_mul_hi_u32 s28, s4, s8
 ; GFX9-NEXT:    s_add_i32 s23, s23, s24
 ; GFX9-NEXT:    s_add_u32 s21, s21, s28
 ; GFX9-NEXT:    s_cselect_b32 s24, 1, 0
-; GFX9-NEXT:    s_lshl_b32 s24, s24, 31
-; GFX9-NEXT:    s_lshr_b32 s24, s24, 31
+; GFX9-NEXT:    s_and_b32 s24, s24, 1
 ; GFX9-NEXT:    s_mul_hi_u32 s29, s3, s9
 ; GFX9-NEXT:    s_add_i32 s23, s23, s24
 ; GFX9-NEXT:    s_add_u32 s21, s21, s29
 ; GFX9-NEXT:    s_cselect_b32 s24, 1, 0
-; GFX9-NEXT:    s_lshl_b32 s24, s24, 31
-; GFX9-NEXT:    s_lshr_b32 s24, s24, 31
+; GFX9-NEXT:    s_and_b32 s24, s24, 1
 ; GFX9-NEXT:    s_mul_hi_u32 s30, s2, s10
 ; GFX9-NEXT:    s_add_i32 s23, s23, s24
 ; GFX9-NEXT:    s_add_u32 s21, s21, s30
 ; GFX9-NEXT:    s_cselect_b32 s24, 1, 0
-; GFX9-NEXT:    s_lshl_b32 s24, s24, 31
-; GFX9-NEXT:    s_lshr_b32 s24, s24, 31
+; GFX9-NEXT:    s_and_b32 s24, s24, 1
 ; GFX9-NEXT:    s_mul_hi_u32 s31, s1, s11
 ; GFX9-NEXT:    s_add_i32 s23, s23, s24
 ; GFX9-NEXT:    s_add_u32 s21, s21, s31
 ; GFX9-NEXT:    s_cselect_b32 s24, 1, 0
-; GFX9-NEXT:    s_lshl_b32 s24, s24, 31
-; GFX9-NEXT:    s_lshr_b32 s24, s24, 31
-; GFX9-NEXT:    s_mul_hi_u32 s32, s16, s12
+; GFX9-NEXT:    s_and_b32 s24, s24, 1
+; GFX9-NEXT:    s_mul_hi_u32 s32, s0, s12
 ; GFX9-NEXT:    s_add_i32 s23, s23, s24
 ; GFX9-NEXT:    s_add_u32 s21, s21, s32
 ; GFX9-NEXT:    s_cselect_b32 s24, 1, 0
-; GFX9-NEXT:    s_lshl_b32 s24, s24, 31
-; GFX9-NEXT:    s_lshr_b32 s24, s24, 31
+; GFX9-NEXT:    s_and_b32 s24, s24, 1
 ; GFX9-NEXT:    s_add_i32 s23, s23, s24
 ; GFX9-NEXT:    s_add_u32 s21, s21, s22
 ; GFX9-NEXT:    s_cselect_b32 s22, 1, 0
-; GFX9-NEXT:    s_lshl_b32 s22, s22, 31
-; GFX9-NEXT:    s_lshr_b32 s22, s22, 31
+; GFX9-NEXT:    s_and_b32 s22, s22, 1
 ; GFX9-NEXT:    s_add_i32 s23, s23, s22
 ; GFX9-NEXT:    s_mul_i32 s22, s6, s8
 ; GFX9-NEXT:    s_mul_i32 s24, s5, s9
 ; GFX9-NEXT:    s_add_u32 s22, s22, s24
 ; GFX9-NEXT:    s_cselect_b32 s24, 1, 0
-; GFX9-NEXT:    s_lshl_b32 s24, s24, 31
 ; GFX9-NEXT:    s_mul_i32 s25, s4, s10
-; GFX9-NEXT:    s_lshr_b32 s24, s24, 31
+; GFX9-NEXT:    s_and_b32 s24, s24, 1
 ; GFX9-NEXT:    s_add_u32 s22, s22, s25
 ; GFX9-NEXT:    s_cselect_b32 s25, 1, 0
-; GFX9-NEXT:    s_lshl_b32 s25, s25, 31
-; GFX9-NEXT:    s_lshr_b32 s25, s25, 31
+; GFX9-NEXT:    s_and_b32 s25, s25, 1
 ; GFX9-NEXT:    s_mul_i32 s26, s3, s11
 ; GFX9-NEXT:    s_add_i32 s24, s24, s25
 ; GFX9-NEXT:    s_add_u32 s22, s22, s26
 ; GFX9-NEXT:    s_cselect_b32 s25, 1, 0
-; GFX9-NEXT:    s_lshl_b32 s25, s25, 31
-; GFX9-NEXT:    s_lshr_b32 s25, s25, 31
+; GFX9-NEXT:    s_and_b32 s25, s25, 1
 ; GFX9-NEXT:    s_mul_i32 s27, s2, s12
 ; GFX9-NEXT:    s_add_i32 s24, s24, s25
 ; GFX9-NEXT:    s_add_u32 s22, s22, s27
 ; GFX9-NEXT:    s_cselect_b32 s25, 1, 0
-; GFX9-NEXT:    s_lshl_b32 s25, s25, 31
-; GFX9-NEXT:    s_lshr_b32 s25, s25, 31
+; GFX9-NEXT:    s_and_b32 s25, s25, 1
 ; GFX9-NEXT:    s_mul_i32 s28, s1, s13
 ; GFX9-NEXT:    s_add_i32 s24, s24, s25
 ; GFX9-NEXT:    s_add_u32 s22, s22, s28
 ; GFX9-NEXT:    s_cselect_b32 s25, 1, 0
-; GFX9-NEXT:    s_lshl_b32 s25, s25, 31
-; GFX9-NEXT:    s_lshr_b32 s25, s25, 31
-; GFX9-NEXT:    s_mul_i32 s29, s16, s14
+; GFX9-NEXT:    s_and_b32 s25, s25, 1
+; GFX9-NEXT:    s_mul_i32 s29, s0, s14
 ; GFX9-NEXT:    s_add_i32 s24, s24, s25
 ; GFX9-NEXT:    s_add_u32 s22, s22, s29
 ; GFX9-NEXT:    s_cselect_b32 s25, 1, 0
-; GFX9-NEXT:    s_lshl_b32 s25, s25, 31
-; GFX9-NEXT:    s_lshr_b32 s25, s25, 31
+; GFX9-NEXT:    s_and_b32 s25, s25, 1
 ; GFX9-NEXT:    s_mul_hi_u32 s30, s5, s8
 ; GFX9-NEXT:    s_add_i32 s24, s24, s25
 ; GFX9-NEXT:    s_add_u32 s22, s22, s30
 ; GFX9-NEXT:    s_cselect_b32 s25, 1, 0
-; GFX9-NEXT:    s_lshl_b32 s25, s25, 31
-; GFX9-NEXT:    s_lshr_b32 s25, s25, 31
+; GFX9-NEXT:    s_and_b32 s25, s25, 1
 ; GFX9-NEXT:    s_mul_hi_u32 s31, s4, s9
 ; GFX9-NEXT:    s_add_i32 s24, s24, s25
 ; GFX9-NEXT:    s_add_u32 s22, s22, s31
 ; GFX9-NEXT:    s_cselect_b32 s25, 1, 0
-; GFX9-NEXT:    s_lshl_b32 s25, s25, 31
-; GFX9-NEXT:    s_lshr_b32 s25, s25, 31
+; GFX9-NEXT:    s_and_b32 s25, s25, 1
 ; GFX9-NEXT:    s_mul_hi_u32 s32, s3, s10
 ; GFX9-NEXT:    s_add_i32 s24, s24, s25
 ; GFX9-NEXT:    s_add_u32 s22, s22, s32
 ; GFX9-NEXT:    s_cselect_b32 s25, 1, 0
-; GFX9-NEXT:    s_lshl_b32 s25, s25, 31
-; GFX9-NEXT:    s_lshr_b32 s25, s25, 31
+; GFX9-NEXT:    s_and_b32 s25, s25, 1
 ; GFX9-NEXT:    s_mul_hi_u32 s33, s2, s11
 ; GFX9-NEXT:    s_add_i32 s24, s24, s25
 ; GFX9-NEXT:    s_add_u32 s22, s22, s33
 ; GFX9-NEXT:    s_cselect_b32 s25, 1, 0
-; GFX9-NEXT:    s_lshl_b32 s25, s25, 31
-; GFX9-NEXT:    s_lshr_b32 s25, s25, 31
+; GFX9-NEXT:    s_and_b32 s25, s25, 1
 ; GFX9-NEXT:    s_mul_hi_u32 s34, s1, s12
 ; GFX9-NEXT:    s_add_i32 s24, s24, s25
 ; GFX9-NEXT:    s_add_u32 s22, s22, s34
 ; GFX9-NEXT:    s_cselect_b32 s25, 1, 0
-; GFX9-NEXT:    s_lshl_b32 s25, s25, 31
-; GFX9-NEXT:    s_lshr_b32 s25, s25, 31
-; GFX9-NEXT:    s_mul_hi_u32 s35, s16, s13
+; GFX9-NEXT:    s_and_b32 s25, s25, 1
+; GFX9-NEXT:    s_mul_hi_u32 s35, s0, s13
 ; GFX9-NEXT:    s_add_i32 s24, s24, s25
 ; GFX9-NEXT:    s_add_u32 s22, s22, s35
 ; GFX9-NEXT:    s_cselect_b32 s25, 1, 0
-; GFX9-NEXT:    s_lshl_b32 s25, s25, 31
-; GFX9-NEXT:    s_lshr_b32 s25, s25, 31
+; GFX9-NEXT:    s_and_b32 s25, s25, 1
 ; GFX9-NEXT:    s_add_i32 s24, s24, s25
 ; GFX9-NEXT:    s_add_u32 s22, s22, s23
 ; GFX9-NEXT:    s_cselect_b32 s23, 1, 0
-; GFX9-NEXT:    s_lshl_b32 s23, s23, 31
-; GFX9-NEXT:    s_lshr_b32 s23, s23, 31
+; GFX9-NEXT:    s_and_b32 s23, s23, 1
 ; GFX9-NEXT:    s_add_i32 s24, s24, s23
 ; GFX9-NEXT:    s_mul_i32 s23, s6, s9
 ; GFX9-NEXT:    s_mul_i32 s7, s7, s8
@@ -1652,7 +1545,7 @@ define amdgpu_ps <8 x i32> @s_mul_i256(i256 inreg %num, i256 inreg %den) {
 ; GFX9-NEXT:    s_add_i32 s7, s7, s27
 ; GFX9-NEXT:    s_mul_i32 s29, s1, s14
 ; GFX9-NEXT:    s_add_i32 s7, s7, s28
-; GFX9-NEXT:    s_mul_i32 s15, s16, s15
+; GFX9-NEXT:    s_mul_i32 s15, s0, s15
 ; GFX9-NEXT:    s_add_i32 s7, s7, s29
 ; GFX9-NEXT:    s_mul_hi_u32 s6, s6, s8
 ; GFX9-NEXT:    s_add_i32 s7, s7, s15
@@ -1666,11 +1559,12 @@ define amdgpu_ps <8 x i32> @s_mul_i256(i256 inreg %num, i256 inreg %den) {
 ; GFX9-NEXT:    s_mul_hi_u32 s2, s2, s12
 ; GFX9-NEXT:    s_add_i32 s2, s3, s2
 ; GFX9-NEXT:    s_mul_hi_u32 s1, s1, s13
-; GFX9-NEXT:    s_mul_i32 s0, s0, s8
+; GFX9-NEXT:    s_mul_i32 s16, s0, s8
 ; GFX9-NEXT:    s_add_i32 s1, s2, s1
-; GFX9-NEXT:    s_mul_hi_u32 s8, s16, s14
-; GFX9-NEXT:    s_add_i32 s1, s1, s8
-; GFX9-NEXT:    s_add_i32 s7, s1, s24
+; GFX9-NEXT:    s_mul_hi_u32 s0, s0, s14
+; GFX9-NEXT:    s_add_i32 s0, s1, s0
+; GFX9-NEXT:    s_add_i32 s7, s0, s24
+; GFX9-NEXT:    s_mov_b32 s0, s16
 ; GFX9-NEXT:    s_mov_b32 s1, s17
 ; GFX9-NEXT:    s_mov_b32 s2, s18
 ; GFX9-NEXT:    s_mov_b32 s3, s19

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-anyext.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-anyext.mir
index 28d5b16cc91b..5c2bc3e93112 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-anyext.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-anyext.mir
@@ -22,10 +22,12 @@ legalized: true
 
 body: |
   bb.0:
-    liveins: $vgpr0_vgpr1
+    liveins: $vgpr0
     ; CHECK-LABEL: name: anyext_s32_to_s64_v
     ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
-    ; CHECK: [[ANYEXT:%[0-9]+]]:vgpr(s64) = G_ANYEXT [[COPY]](s32)
+    ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
+    ; CHECK: [[DEF:%[0-9]+]]:vgpr(s32) = G_IMPLICIT_DEF
+    ; CHECK: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY1]](s32), [[DEF]](s32)
     %0:_(s32) = COPY $vgpr0
     %1:_(s64) = G_ANYEXT %0
 ...
@@ -98,7 +100,10 @@ body: |
     ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
     ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
     ; CHECK: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[COPY1]]
-    ; CHECK: [[ANYEXT:%[0-9]+]]:vgpr(s16) = G_ANYEXT [[ICMP]](s1)
+    ; CHECK: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 1
+    ; CHECK: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0
+    ; CHECK: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[C]], [[C1]]
+    ; CHECK: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[SELECT]](s32)
     %0:_(s32) = COPY $vgpr0
     %1:_(s32) = COPY $vgpr1
     %2:_(s1) = G_ICMP intpred(eq), %0, %1
@@ -116,7 +121,9 @@ body: |
     ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
     ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
     ; CHECK: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[COPY1]]
-    ; CHECK: [[ANYEXT:%[0-9]+]]:vgpr(s32) = G_ANYEXT [[ICMP]](s1)
+    ; CHECK: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 1
+    ; CHECK: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0
+    ; CHECK: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[C]], [[C1]]
     %0:_(s32) = COPY $vgpr0
     %1:_(s32) = COPY $vgpr1
     %2:_(s1) = G_ICMP intpred(eq), %0, %1
@@ -134,7 +141,11 @@ body: |
     ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
     ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
     ; CHECK: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[COPY1]]
-    ; CHECK: [[ANYEXT:%[0-9]+]]:vgpr(s64) = G_ANYEXT [[ICMP]](s1)
+    ; CHECK: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 1
+    ; CHECK: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0
+    ; CHECK: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[C]], [[C1]]
+    ; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[SELECT]](s32)
+    ; CHECK: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[SELECT]](s32), [[COPY2]](s32)
     %0:_(s32) = COPY $vgpr0
     %1:_(s32) = COPY $vgpr1
     %2:_(s1) = G_ICMP intpred(eq), %0, %1
@@ -231,7 +242,9 @@ body: |
     ; CHECK-LABEL: name: anyext_s1_to_s64_vgpr
     ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
     ; CHECK: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY]](s32)
-    ; CHECK: [[ANYEXT:%[0-9]+]]:vgpr(s64) = G_ANYEXT [[TRUNC]](s1)
+    ; CHECK: [[ANYEXT:%[0-9]+]]:vgpr(s32) = G_ANYEXT [[TRUNC]](s1)
+    ; CHECK: [[DEF:%[0-9]+]]:vgpr(s32) = G_IMPLICIT_DEF
+    ; CHECK: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[ANYEXT]](s32), [[DEF]](s32)
     %0:_(s32) = COPY $vgpr0
     %1:_(s1) = G_TRUNC %0
     %2:_(s64) = G_ANYEXT %1

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sext.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sext.mir
index 0f37d7710280..fec347169d0c 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sext.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sext.mir
@@ -61,10 +61,7 @@ body: |
     ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
     ; CHECK: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY]](s32), [[COPY1]]
     ; CHECK: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32)
-    ; CHECK: [[ANYEXT:%[0-9]+]]:sgpr(s16) = G_ANYEXT [[TRUNC]](s1)
-    ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 15
-    ; CHECK: [[SHL:%[0-9]+]]:sgpr(s16) = G_SHL [[ANYEXT]], [[C]](s32)
-    ; CHECK: [[ASHR:%[0-9]+]]:sgpr(s16) = G_ASHR [[SHL]], [[C]](s32)
+    ; CHECK: [[SEXT:%[0-9]+]]:sgpr(s16) = G_SEXT [[TRUNC]](s1)
     %0:_(s32) = COPY $sgpr0
     %1:_(s32) = COPY $sgpr1
     %2:_(s1) = G_ICMP intpred(eq), %0, %1
@@ -83,10 +80,7 @@ body: |
     ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
     ; CHECK: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY]](s32), [[COPY1]]
     ; CHECK: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32)
-    ; CHECK: [[ANYEXT:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[TRUNC]](s1)
-    ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 31
-    ; CHECK: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[ANYEXT]], [[C]](s32)
-    ; CHECK: [[ASHR:%[0-9]+]]:sgpr(s32) = G_ASHR [[SHL]], [[C]](s32)
+    ; CHECK: [[SEXT:%[0-9]+]]:sgpr(s32) = G_SEXT [[TRUNC]](s1)
     %0:_(s32) = COPY $sgpr0
     %1:_(s32) = COPY $sgpr1
     %2:_(s1) = G_ICMP intpred(eq), %0, %1
@@ -105,10 +99,7 @@ body: |
     ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
     ; CHECK: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY]](s32), [[COPY1]]
     ; CHECK: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32)
-    ; CHECK: [[ANYEXT:%[0-9]+]]:sgpr(s64) = G_ANYEXT [[TRUNC]](s1)
-    ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 63
-    ; CHECK: [[SHL:%[0-9]+]]:sgpr(s64) = G_SHL [[ANYEXT]], [[C]](s32)
-    ; CHECK: [[ASHR:%[0-9]+]]:sgpr(s64) = G_ASHR [[SHL]], [[C]](s32)
+    ; CHECK: [[SEXT:%[0-9]+]]:sgpr(s64) = G_SEXT [[TRUNC]](s1)
     %0:_(s32) = COPY $sgpr0
     %1:_(s32) = COPY $sgpr1
     %2:_(s1) = G_ICMP intpred(eq), %0, %1
@@ -188,10 +179,7 @@ body: |
     ; CHECK-LABEL: name: sext_s1_to_s16_sgpr
     ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
     ; CHECK: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY]](s32)
-    ; CHECK: [[ANYEXT:%[0-9]+]]:sgpr(s16) = G_ANYEXT [[TRUNC]](s1)
-    ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 15
-    ; CHECK: [[SHL:%[0-9]+]]:sgpr(s16) = G_SHL [[ANYEXT]], [[C]](s32)
-    ; CHECK: [[ASHR:%[0-9]+]]:sgpr(s16) = G_ASHR [[SHL]], [[C]](s32)
+    ; CHECK: [[SEXT:%[0-9]+]]:sgpr(s16) = G_SEXT [[TRUNC]](s1)
     %0:_(s32) = COPY $sgpr0
     %1:_(s1) = G_TRUNC %0
     %2:_(s16) = G_SEXT %1
@@ -207,10 +195,7 @@ body: |
     ; CHECK-LABEL: name: sext_s1_to_s32_sgpr
     ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
     ; CHECK: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY]](s32)
-    ; CHECK: [[ANYEXT:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[TRUNC]](s1)
-    ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 31
-    ; CHECK: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[ANYEXT]], [[C]](s32)
-    ; CHECK: [[ASHR:%[0-9]+]]:sgpr(s32) = G_ASHR [[SHL]], [[C]](s32)
+    ; CHECK: [[SEXT:%[0-9]+]]:sgpr(s32) = G_SEXT [[TRUNC]](s1)
     %0:_(s32) = COPY $sgpr0
     %1:_(s1) = G_TRUNC %0
     %2:_(s32) = G_SEXT %1
@@ -226,10 +211,7 @@ body: |
     ; CHECK-LABEL: name: sext_s1_to_s64_sgpr
     ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
     ; CHECK: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY]](s32)
-    ; CHECK: [[ANYEXT:%[0-9]+]]:sgpr(s64) = G_ANYEXT [[TRUNC]](s1)
-    ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 63
-    ; CHECK: [[SHL:%[0-9]+]]:sgpr(s64) = G_SHL [[ANYEXT]], [[C]](s32)
-    ; CHECK: [[ASHR:%[0-9]+]]:sgpr(s64) = G_ASHR [[SHL]], [[C]](s32)
+    ; CHECK: [[SEXT:%[0-9]+]]:sgpr(s64) = G_SEXT [[TRUNC]](s1)
     %0:_(s32) = COPY $sgpr0
     %1:_(s1) = G_TRUNC %0
     %2:_(s64) = G_SEXT %1
@@ -245,10 +227,7 @@ body: |
     ; CHECK-LABEL: name: sext_s1_to_s16_vgpr
     ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
     ; CHECK: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY]](s32)
-    ; CHECK: [[ANYEXT:%[0-9]+]]:vgpr(s16) = G_ANYEXT [[TRUNC]](s1)
-    ; CHECK: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 15
-    ; CHECK: [[SHL:%[0-9]+]]:vgpr(s16) = G_SHL [[ANYEXT]], [[C]](s32)
-    ; CHECK: [[ASHR:%[0-9]+]]:vgpr(s16) = G_ASHR [[SHL]], [[C]](s32)
+    ; CHECK: [[SEXT:%[0-9]+]]:vgpr(s16) = G_SEXT [[TRUNC]](s1)
     %0:_(s32) = COPY $vgpr0
     %1:_(s1) = G_TRUNC %0
     %2:_(s16) = G_SEXT %1
@@ -264,10 +243,7 @@ body: |
     ; CHECK-LABEL: name: sext_s1_to_s32_vgpr
     ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
     ; CHECK: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY]](s32)
-    ; CHECK: [[ANYEXT:%[0-9]+]]:vgpr(s32) = G_ANYEXT [[TRUNC]](s1)
-    ; CHECK: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 31
-    ; CHECK: [[SHL:%[0-9]+]]:vgpr(s32) = G_SHL [[ANYEXT]], [[C]](s32)
-    ; CHECK: [[ASHR:%[0-9]+]]:vgpr(s32) = G_ASHR [[SHL]], [[C]](s32)
+    ; CHECK: [[SEXT:%[0-9]+]]:vgpr(s32) = G_SEXT [[TRUNC]](s1)
     %0:_(s32) = COPY $vgpr0
     %1:_(s1) = G_TRUNC %0
     %2:_(s32) = G_SEXT %1

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-zext.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-zext.mir
index ee22c54205d7..ef83a4c6c529 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-zext.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-zext.mir
@@ -60,10 +60,7 @@ body: |
     ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
     ; CHECK: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY]](s32), [[COPY1]]
     ; CHECK: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32)
-    ; CHECK: [[ANYEXT:%[0-9]+]]:sgpr(s16) = G_ANYEXT [[TRUNC]](s1)
-    ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 15
-    ; CHECK: [[SHL:%[0-9]+]]:sgpr(s16) = G_SHL [[ANYEXT]], [[C]](s32)
-    ; CHECK: [[LSHR:%[0-9]+]]:sgpr(s16) = G_LSHR [[SHL]], [[C]](s32)
+    ; CHECK: [[ZEXT:%[0-9]+]]:sgpr(s16) = G_ZEXT [[TRUNC]](s1)
     %0:_(s32) = COPY $sgpr0
     %1:_(s32) = COPY $sgpr1
     %2:_(s1) = G_ICMP intpred(eq), %0, %1
@@ -82,10 +79,7 @@ body: |
     ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
     ; CHECK: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY]](s32), [[COPY1]]
     ; CHECK: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32)
-    ; CHECK: [[ANYEXT:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[TRUNC]](s1)
-    ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 31
-    ; CHECK: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[ANYEXT]], [[C]](s32)
-    ; CHECK: [[LSHR:%[0-9]+]]:sgpr(s32) = G_LSHR [[SHL]], [[C]](s32)
+    ; CHECK: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC]](s1)
     %0:_(s32) = COPY $sgpr0
     %1:_(s32) = COPY $sgpr1
     %2:_(s1) = G_ICMP intpred(eq), %0, %1
@@ -104,10 +98,7 @@ body: |
     ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
     ; CHECK: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY]](s32), [[COPY1]]
     ; CHECK: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32)
-    ; CHECK: [[ANYEXT:%[0-9]+]]:sgpr(s64) = G_ANYEXT [[TRUNC]](s1)
-    ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 63
-    ; CHECK: [[SHL:%[0-9]+]]:sgpr(s64) = G_SHL [[ANYEXT]], [[C]](s32)
-    ; CHECK: [[LSHR:%[0-9]+]]:sgpr(s64) = G_LSHR [[SHL]], [[C]](s32)
+    ; CHECK: [[ZEXT:%[0-9]+]]:sgpr(s64) = G_ZEXT [[TRUNC]](s1)
     %0:_(s32) = COPY $sgpr0
     %1:_(s32) = COPY $sgpr1
     %2:_(s1) = G_ICMP intpred(eq), %0, %1
@@ -187,10 +178,7 @@ body: |
     ; CHECK-LABEL: name: zext_s1_to_s16_sgpr
     ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
     ; CHECK: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY]](s32)
-    ; CHECK: [[ANYEXT:%[0-9]+]]:sgpr(s16) = G_ANYEXT [[TRUNC]](s1)
-    ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 15
-    ; CHECK: [[SHL:%[0-9]+]]:sgpr(s16) = G_SHL [[ANYEXT]], [[C]](s32)
-    ; CHECK: [[LSHR:%[0-9]+]]:sgpr(s16) = G_LSHR [[SHL]], [[C]](s32)
+    ; CHECK: [[ZEXT:%[0-9]+]]:sgpr(s16) = G_ZEXT [[TRUNC]](s1)
     %0:_(s32) = COPY $sgpr0
     %1:_(s1) = G_TRUNC %0
     %2:_(s16) = G_ZEXT %1
@@ -206,10 +194,7 @@ body: |
     ; CHECK-LABEL: name: zext_s1_to_s32_sgpr
     ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
     ; CHECK: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY]](s32)
-    ; CHECK: [[ANYEXT:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[TRUNC]](s1)
-    ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 31
-    ; CHECK: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[ANYEXT]], [[C]](s32)
-    ; CHECK: [[LSHR:%[0-9]+]]:sgpr(s32) = G_LSHR [[SHL]], [[C]](s32)
+    ; CHECK: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC]](s1)
     %0:_(s32) = COPY $sgpr0
     %1:_(s1) = G_TRUNC %0
     %2:_(s32) = G_ZEXT %1
@@ -225,10 +210,7 @@ body: |
     ; CHECK-LABEL: name: zext_s1_to_s64_sgpr
     ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
     ; CHECK: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY]](s32)
-    ; CHECK: [[ANYEXT:%[0-9]+]]:sgpr(s64) = G_ANYEXT [[TRUNC]](s1)
-    ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 63
-    ; CHECK: [[SHL:%[0-9]+]]:sgpr(s64) = G_SHL [[ANYEXT]], [[C]](s32)
-    ; CHECK: [[LSHR:%[0-9]+]]:sgpr(s64) = G_LSHR [[SHL]], [[C]](s32)
+    ; CHECK: [[ZEXT:%[0-9]+]]:sgpr(s64) = G_ZEXT [[TRUNC]](s1)
     %0:_(s32) = COPY $sgpr0
     %1:_(s1) = G_TRUNC %0
     %2:_(s64) = G_ZEXT %1
@@ -244,10 +226,7 @@ body: |
     ; CHECK-LABEL: name: zext_s1_to_s16_vgpr
     ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
     ; CHECK: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY]](s32)
-    ; CHECK: [[ANYEXT:%[0-9]+]]:vgpr(s16) = G_ANYEXT [[TRUNC]](s1)
-    ; CHECK: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 15
-    ; CHECK: [[SHL:%[0-9]+]]:vgpr(s16) = G_SHL [[ANYEXT]], [[C]](s32)
-    ; CHECK: [[LSHR:%[0-9]+]]:vgpr(s16) = G_LSHR [[SHL]], [[C]](s32)
+    ; CHECK: [[ZEXT:%[0-9]+]]:vgpr(s16) = G_ZEXT [[TRUNC]](s1)
     %0:_(s32) = COPY $vgpr0
     %1:_(s1) = G_TRUNC %0
     %2:_(s16) = G_ZEXT %1
@@ -263,10 +242,7 @@ body: |
     ; CHECK-LABEL: name: zext_s1_to_s32_vgpr
     ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
     ; CHECK: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY]](s32)
-    ; CHECK: [[ANYEXT:%[0-9]+]]:vgpr(s32) = G_ANYEXT [[TRUNC]](s1)
-    ; CHECK: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 31
-    ; CHECK: [[SHL:%[0-9]+]]:vgpr(s32) = G_SHL [[ANYEXT]], [[C]](s32)
-    ; CHECK: [[LSHR:%[0-9]+]]:vgpr(s32) = G_LSHR [[SHL]], [[C]](s32)
+    ; CHECK: [[ZEXT:%[0-9]+]]:vgpr(s32) = G_ZEXT [[TRUNC]](s1)
     %0:_(s32) = COPY $vgpr0
     %1:_(s1) = G_TRUNC %0
     %2:_(s32) = G_ZEXT %1


        


More information about the llvm-commits mailing list