[llvm] 80b627d - AMDGPU/GlobalISel: Fix handling of G_ANYEXT with s1 source
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Mon Mar 16 10:02:34 PDT 2020
Author: Matt Arsenault
Date: 2020-03-16T12:59:54-04:00
New Revision: 80b627d69d3457e9b5deac2ba10808f00c96edf6
URL: https://github.com/llvm/llvm-project/commit/80b627d69d3457e9b5deac2ba10808f00c96edf6
DIFF: https://github.com/llvm/llvm-project/commit/80b627d69d3457e9b5deac2ba10808f00c96edf6.diff
LOG: AMDGPU/GlobalISel: Fix handling of G_ANYEXT with s1 source
We were letting G_ANYEXT with a vcc register bank through, which was
incorrect and would select to an invalid copy. Fix this up like G_ZEXT
and G_SEXT. Also drop old code to fixup the non-boolean case in
RegBankSelect. We now have to perform that expansion during selection,
so there's no benefit to doing it during RegBankSelect.
Added:
Modified:
llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
llvm/test/CodeGen/AMDGPU/GlobalISel/mul.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-anyext.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sext.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-zext.mir
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
index da0f6b08264c..e72dc8e20220 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
@@ -220,7 +220,7 @@ unsigned AMDGPURegisterBankInfo::copyCost(const RegisterBank &Dst,
unsigned Size) const {
// TODO: Should there be a UniformVGPRRegBank which can use readfirstlane?
if (Dst.getID() == AMDGPU::SGPRRegBankID &&
- isVectorRegisterBank(Src)) {
+ (isVectorRegisterBank(Src) || Src.getID() == AMDGPU::VCCRegBankID)) {
return std::numeric_limits<unsigned>::max();
}
@@ -238,9 +238,6 @@ unsigned AMDGPURegisterBankInfo::copyCost(const RegisterBank &Dst,
Src.getID() == AMDGPU::VCCRegBankID))
return std::numeric_limits<unsigned>::max();
- if (Src.getID() == AMDGPU::VCCRegBankID)
- return std::numeric_limits<unsigned>::max();
-
// There is no direct copy between AGPRs.
if (Dst.getID() == AMDGPU::AGPRRegBankID &&
Src.getID() == AMDGPU::AGPRRegBankID)
@@ -2252,10 +2249,13 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
return;
}
case AMDGPU::G_SEXT:
- case AMDGPU::G_ZEXT: {
+ case AMDGPU::G_ZEXT:
+ case AMDGPU::G_ANYEXT: {
Register SrcReg = MI.getOperand(1).getReg();
LLT SrcTy = MRI.getType(SrcReg);
- bool Signed = Opc == AMDGPU::G_SEXT;
+ const bool Signed = Opc == AMDGPU::G_SEXT;
+
+ assert(empty(OpdMapper.getVRegs(1)));
MachineIRBuilder B(MI);
const RegisterBank *SrcBank =
@@ -2282,9 +2282,12 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
auto ShiftAmt = B.buildConstant(S32, 31);
MRI.setRegBank(ShiftAmt.getReg(0), *SrcBank);
B.buildAShr(DefRegs[1], DefRegs[0], ShiftAmt);
- } else {
+ } else if (Opc == AMDGPU::G_ZEXT) {
B.buildZExtOrTrunc(DefRegs[0], SrcReg);
B.buildConstant(DefRegs[1], 0);
+ } else {
+ B.buildAnyExtOrTrunc(DefRegs[0], SrcReg);
+ B.buildUndef(DefRegs[1]);
}
MRI.setRegBank(DstReg, *SrcBank);
@@ -2295,6 +2298,9 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
if (SrcTy != LLT::scalar(1))
return;
+ // It is not legal to have a legalization artifact with a VCC source. Rather
+ // than introducing a copy, insert the selcet we would have to select the
+ // copy to.
if (SrcBank == &AMDGPU::VCCRegBank) {
SmallVector<Register, 2> DefRegs(OpdMapper.getVRegs(0));
@@ -2329,24 +2335,7 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
return;
}
- // Fixup the case with an s1 src that isn't a condition register. Use shifts
- // instead of introducing a compare to avoid an unnecessary condition
- // register (and since there's no scalar 16-bit compares).
- auto Ext = B.buildAnyExt(DstTy, SrcReg);
- auto ShiftAmt = B.buildConstant(LLT::scalar(32), DstTy.getSizeInBits() - 1);
- auto Shl = B.buildShl(DstTy, Ext, ShiftAmt);
-
- if (MI.getOpcode() == AMDGPU::G_SEXT)
- B.buildAShr(DstReg, Shl, ShiftAmt);
- else
- B.buildLShr(DstReg, Shl, ShiftAmt);
-
- MRI.setRegBank(DstReg, *SrcBank);
- MRI.setRegBank(Ext.getReg(0), *SrcBank);
- MRI.setRegBank(ShiftAmt.getReg(0), *SrcBank);
- MRI.setRegBank(Shl.getReg(0), *SrcBank);
- MI.eraseFromParent();
- return;
+ break;
}
case AMDGPU::G_BUILD_VECTOR:
case AMDGPU::G_BUILD_VECTOR_TRUNC: {
@@ -3423,17 +3412,11 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
break;
}
- // TODO: Should anyext be split into 32-bit part as well?
- if (MI.getOpcode() == AMDGPU::G_ANYEXT) {
- OpdsMapping[0] = AMDGPU::getValueMapping(DstBank, DstSize);
- OpdsMapping[1] = AMDGPU::getValueMapping(SrcBank->getID(), SrcSize);
- } else {
- // Scalar extend can use 64-bit BFE, but VGPRs require extending to
- // 32-bits, and then to 64.
- OpdsMapping[0] = AMDGPU::getValueMappingSGPR64Only(DstBank, DstSize);
- OpdsMapping[1] = AMDGPU::getValueMappingSGPR64Only(SrcBank->getID(),
- SrcSize);
- }
+ // Scalar extend can use 64-bit BFE, but VGPRs require extending to
+ // 32-bits, and then to 64.
+ OpdsMapping[0] = AMDGPU::getValueMappingSGPR64Only(DstBank, DstSize);
+ OpdsMapping[1] = AMDGPU::getValueMappingSGPR64Only(SrcBank->getID(),
+ SrcSize);
break;
}
case AMDGPU::G_FCMP: {
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/mul.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/mul.ll
index 070bfaf8ff03..f34c481824af 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/mul.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/mul.ll
@@ -301,11 +301,10 @@ define amdgpu_ps <3 x i32> @s_mul_i96(i96 inreg %num, i96 inreg %den) {
; GFX7-NEXT: s_mul_i32 s6, s0, s3
; GFX7-NEXT: s_mul_i32 s5, s0, s5
; GFX7-NEXT: s_add_i32 s0, s2, s7
-; GFX7-NEXT: s_lshl_b32 s8, s8, 31
; GFX7-NEXT: s_add_i32 s0, s0, s5
; GFX7-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
; GFX7-NEXT: v_add_i32_e32 v2, vcc, s0, v2
-; GFX7-NEXT: s_lshr_b32 s8, s8, 31
+; GFX7-NEXT: s_and_b32 s8, s8, 1
; GFX7-NEXT: v_add_i32_e32 v1, vcc, s8, v1
; GFX7-NEXT: v_add_i32_e32 v2, vcc, v2, v3
; GFX7-NEXT: v_add_i32_e32 v1, vcc, v2, v1
@@ -332,11 +331,10 @@ define amdgpu_ps <3 x i32> @s_mul_i96(i96 inreg %num, i96 inreg %den) {
; GFX8-NEXT: s_mul_i32 s6, s0, s3
; GFX8-NEXT: s_mul_i32 s5, s0, s5
; GFX8-NEXT: s_add_i32 s0, s2, s7
-; GFX8-NEXT: s_lshl_b32 s8, s8, 31
; GFX8-NEXT: s_add_i32 s0, s0, s5
; GFX8-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
; GFX8-NEXT: v_add_u32_e32 v2, vcc, s0, v2
-; GFX8-NEXT: s_lshr_b32 s8, s8, 31
+; GFX8-NEXT: s_and_b32 s8, s8, 1
; GFX8-NEXT: v_add_u32_e32 v1, vcc, s8, v1
; GFX8-NEXT: v_add_u32_e32 v2, vcc, v2, v3
; GFX8-NEXT: v_add_u32_e32 v1, vcc, v2, v1
@@ -351,13 +349,11 @@ define amdgpu_ps <3 x i32> @s_mul_i96(i96 inreg %num, i96 inreg %den) {
; GFX9-NEXT: s_mul_i32 s8, s0, s4
; GFX9-NEXT: s_add_u32 s7, s7, s8
; GFX9-NEXT: s_cselect_b32 s8, 1, 0
-; GFX9-NEXT: s_lshl_b32 s8, s8, 31
; GFX9-NEXT: s_mul_hi_u32 s9, s0, s3
-; GFX9-NEXT: s_lshr_b32 s8, s8, 31
+; GFX9-NEXT: s_and_b32 s8, s8, 1
; GFX9-NEXT: s_add_u32 s7, s7, s9
; GFX9-NEXT: s_cselect_b32 s9, 1, 0
-; GFX9-NEXT: s_lshl_b32 s9, s9, 31
-; GFX9-NEXT: s_lshr_b32 s9, s9, 31
+; GFX9-NEXT: s_and_b32 s9, s9, 1
; GFX9-NEXT: s_add_i32 s8, s8, s9
; GFX9-NEXT: s_mul_i32 s9, s1, s4
; GFX9-NEXT: s_mul_i32 s2, s2, s3
@@ -467,27 +463,24 @@ define amdgpu_ps <4 x i32> @s_mul_i128(i128 inreg %num, i128 inreg %den) {
; GFX7-NEXT: s_mul_i32 s10, s0, s5
; GFX7-NEXT: s_add_u32 s9, s9, s10
; GFX7-NEXT: s_cselect_b32 s10, 1, 0
-; GFX7-NEXT: s_lshl_b32 s10, s10, 31
; GFX7-NEXT: v_add_i32_e32 v0, vcc, s9, v0
-; GFX7-NEXT: s_lshr_b32 s10, s10, 31
+; GFX7-NEXT: s_and_b32 s10, s10, 1
; GFX7-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
; GFX7-NEXT: v_add_i32_e32 v1, vcc, s10, v1
; GFX7-NEXT: s_mul_i32 s9, s2, s4
; GFX7-NEXT: s_mul_i32 s10, s1, s5
+; GFX7-NEXT: v_mov_b32_e32 v2, s1
; GFX7-NEXT: s_add_u32 s9, s9, s10
; GFX7-NEXT: s_cselect_b32 s10, 1, 0
-; GFX7-NEXT: v_mov_b32_e32 v2, s1
-; GFX7-NEXT: s_lshl_b32 s10, s10, 31
-; GFX7-NEXT: s_mul_i32 s11, s0, s6
-; GFX7-NEXT: s_lshr_b32 s10, s10, 31
; GFX7-NEXT: v_mul_hi_u32 v2, v2, s4
+; GFX7-NEXT: s_mul_i32 s11, s0, s6
+; GFX7-NEXT: s_and_b32 s10, s10, 1
; GFX7-NEXT: s_add_u32 s9, s9, s11
-; GFX7-NEXT: s_cselect_b32 s11, 1, 0
; GFX7-NEXT: v_mov_b32_e32 v3, s5
-; GFX7-NEXT: s_lshl_b32 s11, s11, 31
+; GFX7-NEXT: s_cselect_b32 s11, 1, 0
; GFX7-NEXT: v_mul_hi_u32 v4, s0, v3
; GFX7-NEXT: v_add_i32_e32 v2, vcc, s9, v2
-; GFX7-NEXT: s_lshr_b32 s11, s11, 31
+; GFX7-NEXT: s_and_b32 s11, s11, 1
; GFX7-NEXT: s_add_i32 s10, s10, s11
; GFX7-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
; GFX7-NEXT: v_add_i32_e32 v5, vcc, s10, v5
@@ -528,27 +521,24 @@ define amdgpu_ps <4 x i32> @s_mul_i128(i128 inreg %num, i128 inreg %den) {
; GFX8-NEXT: s_mul_i32 s10, s0, s5
; GFX8-NEXT: s_add_u32 s9, s9, s10
; GFX8-NEXT: s_cselect_b32 s10, 1, 0
-; GFX8-NEXT: s_lshl_b32 s10, s10, 31
; GFX8-NEXT: v_add_u32_e32 v0, vcc, s9, v0
-; GFX8-NEXT: s_lshr_b32 s10, s10, 31
+; GFX8-NEXT: s_and_b32 s10, s10, 1
; GFX8-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
; GFX8-NEXT: v_add_u32_e32 v1, vcc, s10, v1
; GFX8-NEXT: s_mul_i32 s9, s2, s4
; GFX8-NEXT: s_mul_i32 s10, s1, s5
+; GFX8-NEXT: v_mov_b32_e32 v2, s1
; GFX8-NEXT: s_add_u32 s9, s9, s10
; GFX8-NEXT: s_cselect_b32 s10, 1, 0
-; GFX8-NEXT: v_mov_b32_e32 v2, s1
-; GFX8-NEXT: s_lshl_b32 s10, s10, 31
-; GFX8-NEXT: s_mul_i32 s11, s0, s6
-; GFX8-NEXT: s_lshr_b32 s10, s10, 31
; GFX8-NEXT: v_mul_hi_u32 v2, v2, s4
+; GFX8-NEXT: s_mul_i32 s11, s0, s6
+; GFX8-NEXT: s_and_b32 s10, s10, 1
; GFX8-NEXT: s_add_u32 s9, s9, s11
-; GFX8-NEXT: s_cselect_b32 s11, 1, 0
; GFX8-NEXT: v_mov_b32_e32 v3, s5
-; GFX8-NEXT: s_lshl_b32 s11, s11, 31
+; GFX8-NEXT: s_cselect_b32 s11, 1, 0
; GFX8-NEXT: v_mul_hi_u32 v4, s0, v3
; GFX8-NEXT: v_add_u32_e32 v2, vcc, s9, v2
-; GFX8-NEXT: s_lshr_b32 s11, s11, 31
+; GFX8-NEXT: s_and_b32 s11, s11, 1
; GFX8-NEXT: s_add_i32 s10, s10, s11
; GFX8-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
; GFX8-NEXT: v_add_u32_e32 v5, vcc, s10, v5
@@ -587,42 +577,35 @@ define amdgpu_ps <4 x i32> @s_mul_i128(i128 inreg %num, i128 inreg %den) {
; GFX9-NEXT: s_mul_i32 s10, s0, s5
; GFX9-NEXT: s_add_u32 s9, s9, s10
; GFX9-NEXT: s_cselect_b32 s10, 1, 0
-; GFX9-NEXT: s_lshl_b32 s10, s10, 31
; GFX9-NEXT: s_mul_hi_u32 s11, s0, s4
-; GFX9-NEXT: s_lshr_b32 s10, s10, 31
+; GFX9-NEXT: s_and_b32 s10, s10, 1
; GFX9-NEXT: s_add_u32 s9, s9, s11
; GFX9-NEXT: s_cselect_b32 s11, 1, 0
-; GFX9-NEXT: s_lshl_b32 s11, s11, 31
-; GFX9-NEXT: s_lshr_b32 s11, s11, 31
+; GFX9-NEXT: s_and_b32 s11, s11, 1
; GFX9-NEXT: s_add_i32 s10, s10, s11
; GFX9-NEXT: s_mul_i32 s11, s2, s4
; GFX9-NEXT: s_mul_i32 s12, s1, s5
; GFX9-NEXT: s_add_u32 s11, s11, s12
; GFX9-NEXT: s_cselect_b32 s12, 1, 0
-; GFX9-NEXT: s_lshl_b32 s12, s12, 31
; GFX9-NEXT: s_mul_i32 s13, s0, s6
-; GFX9-NEXT: s_lshr_b32 s12, s12, 31
+; GFX9-NEXT: s_and_b32 s12, s12, 1
; GFX9-NEXT: s_add_u32 s11, s11, s13
; GFX9-NEXT: s_cselect_b32 s13, 1, 0
-; GFX9-NEXT: s_lshl_b32 s13, s13, 31
-; GFX9-NEXT: s_lshr_b32 s13, s13, 31
+; GFX9-NEXT: s_and_b32 s13, s13, 1
; GFX9-NEXT: s_mul_hi_u32 s14, s1, s4
; GFX9-NEXT: s_add_i32 s12, s12, s13
; GFX9-NEXT: s_add_u32 s11, s11, s14
; GFX9-NEXT: s_cselect_b32 s13, 1, 0
-; GFX9-NEXT: s_lshl_b32 s13, s13, 31
-; GFX9-NEXT: s_lshr_b32 s13, s13, 31
+; GFX9-NEXT: s_and_b32 s13, s13, 1
; GFX9-NEXT: s_mul_hi_u32 s15, s0, s5
; GFX9-NEXT: s_add_i32 s12, s12, s13
; GFX9-NEXT: s_add_u32 s11, s11, s15
; GFX9-NEXT: s_cselect_b32 s13, 1, 0
-; GFX9-NEXT: s_lshl_b32 s13, s13, 31
-; GFX9-NEXT: s_lshr_b32 s13, s13, 31
+; GFX9-NEXT: s_and_b32 s13, s13, 1
; GFX9-NEXT: s_add_i32 s12, s12, s13
; GFX9-NEXT: s_add_u32 s10, s11, s10
; GFX9-NEXT: s_cselect_b32 s11, 1, 0
-; GFX9-NEXT: s_lshl_b32 s11, s11, 31
-; GFX9-NEXT: s_lshr_b32 s11, s11, 31
+; GFX9-NEXT: s_and_b32 s11, s11, 1
; GFX9-NEXT: s_add_i32 s12, s12, s11
; GFX9-NEXT: s_mul_i32 s11, s2, s5
; GFX9-NEXT: s_mul_i32 s3, s3, s4
@@ -806,148 +789,134 @@ define amdgpu_ps <8 x i32> @s_mul_i256(i256 inreg %num, i256 inreg %den) {
; GFX7-NEXT: s_mul_i32 s18, s0, s9
; GFX7-NEXT: s_add_u32 s17, s17, s18
; GFX7-NEXT: s_cselect_b32 s18, 1, 0
-; GFX7-NEXT: s_lshl_b32 s18, s18, 31
; GFX7-NEXT: v_add_i32_e32 v0, vcc, s17, v0
-; GFX7-NEXT: s_lshr_b32 s18, s18, 31
+; GFX7-NEXT: s_and_b32 s18, s18, 1
; GFX7-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
; GFX7-NEXT: v_add_i32_e32 v1, vcc, s18, v1
; GFX7-NEXT: s_mul_i32 s17, s2, s8
; GFX7-NEXT: s_mul_i32 s18, s1, s9
+; GFX7-NEXT: v_mov_b32_e32 v2, s1
; GFX7-NEXT: s_add_u32 s17, s17, s18
; GFX7-NEXT: s_cselect_b32 s18, 1, 0
-; GFX7-NEXT: v_mov_b32_e32 v2, s1
-; GFX7-NEXT: s_lshl_b32 s18, s18, 31
-; GFX7-NEXT: s_mul_i32 s19, s0, s10
-; GFX7-NEXT: s_lshr_b32 s18, s18, 31
; GFX7-NEXT: v_mul_hi_u32 v2, v2, s8
+; GFX7-NEXT: s_mul_i32 s19, s0, s10
+; GFX7-NEXT: s_and_b32 s18, s18, 1
; GFX7-NEXT: s_add_u32 s17, s17, s19
-; GFX7-NEXT: s_cselect_b32 s19, 1, 0
-; GFX7-NEXT: s_lshl_b32 s19, s19, 31
-; GFX7-NEXT: v_add_i32_e32 v2, vcc, s17, v2
; GFX7-NEXT: v_mov_b32_e32 v3, s9
-; GFX7-NEXT: s_lshr_b32 s19, s19, 31
+; GFX7-NEXT: s_cselect_b32 s19, 1, 0
; GFX7-NEXT: v_mul_hi_u32 v4, s0, v3
+; GFX7-NEXT: v_add_i32_e32 v2, vcc, s17, v2
+; GFX7-NEXT: s_and_b32 s19, s19, 1
; GFX7-NEXT: s_add_i32 s18, s18, s19
; GFX7-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
; GFX7-NEXT: v_add_i32_e32 v5, vcc, s18, v5
+; GFX7-NEXT: v_add_i32_e32 v2, vcc, v2, v4
; GFX7-NEXT: s_mul_i32 s17, s3, s8
; GFX7-NEXT: s_mul_i32 s18, s2, s9
+; GFX7-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
; GFX7-NEXT: s_add_u32 s17, s17, s18
; GFX7-NEXT: s_cselect_b32 s18, 1, 0
-; GFX7-NEXT: v_add_i32_e32 v2, vcc, v2, v4
-; GFX7-NEXT: s_lshl_b32 s18, s18, 31
-; GFX7-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
-; GFX7-NEXT: s_mul_i32 s19, s1, s10
-; GFX7-NEXT: s_lshr_b32 s18, s18, 31
-; GFX7-NEXT: s_add_u32 s17, s17, s19
; GFX7-NEXT: v_add_i32_e32 v4, vcc, v5, v4
-; GFX7-NEXT: s_cselect_b32 s19, 1, 0
; GFX7-NEXT: v_add_i32_e32 v1, vcc, v2, v1
+; GFX7-NEXT: s_mul_i32 s19, s1, s10
+; GFX7-NEXT: s_and_b32 s18, s18, 1
; GFX7-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
-; GFX7-NEXT: s_lshl_b32 s19, s19, 31
+; GFX7-NEXT: s_add_u32 s17, s17, s19
+; GFX7-NEXT: s_cselect_b32 s19, 1, 0
; GFX7-NEXT: v_add_i32_e32 v2, vcc, v4, v2
; GFX7-NEXT: v_mov_b32_e32 v4, s2
-; GFX7-NEXT: s_lshr_b32 s19, s19, 31
; GFX7-NEXT: v_mul_hi_u32 v5, v4, s8
+; GFX7-NEXT: s_and_b32 s19, s19, 1
; GFX7-NEXT: s_mul_i32 s20, s0, s11
; GFX7-NEXT: s_add_i32 s18, s18, s19
; GFX7-NEXT: s_add_u32 s17, s17, s20
; GFX7-NEXT: s_cselect_b32 s19, 1, 0
-; GFX7-NEXT: s_lshl_b32 s19, s19, 31
+; GFX7-NEXT: v_mul_hi_u32 v3, s1, v3
; GFX7-NEXT: v_add_i32_e32 v5, vcc, s17, v5
-; GFX7-NEXT: s_lshr_b32 s19, s19, 31
+; GFX7-NEXT: s_and_b32 s19, s19, 1
+; GFX7-NEXT: v_mov_b32_e32 v6, s10
; GFX7-NEXT: s_add_i32 s18, s18, s19
; GFX7-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
; GFX7-NEXT: v_add_i32_e32 v8, vcc, s18, v8
+; GFX7-NEXT: v_mul_hi_u32 v7, s0, v6
; GFX7-NEXT: s_mul_i32 s17, s4, s8
; GFX7-NEXT: s_mul_i32 s18, s3, s9
-; GFX7-NEXT: v_mul_hi_u32 v3, s1, v3
+; GFX7-NEXT: v_add_i32_e32 v3, vcc, v5, v3
; GFX7-NEXT: s_add_u32 s17, s17, s18
+; GFX7-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
; GFX7-NEXT: s_cselect_b32 s18, 1, 0
-; GFX7-NEXT: s_lshl_b32 s18, s18, 31
-; GFX7-NEXT: v_mov_b32_e32 v6, s10
-; GFX7-NEXT: v_mul_hi_u32 v7, s0, v6
+; GFX7-NEXT: v_add_i32_e32 v5, vcc, v8, v5
; GFX7-NEXT: s_mul_i32 s19, s2, s10
-; GFX7-NEXT: s_lshr_b32 s18, s18, 31
-; GFX7-NEXT: v_add_i32_e32 v3, vcc, v5, v3
+; GFX7-NEXT: s_and_b32 s18, s18, 1
+; GFX7-NEXT: v_add_i32_e32 v3, vcc, v3, v7
; GFX7-NEXT: s_add_u32 s17, s17, s19
-; GFX7-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
; GFX7-NEXT: s_cselect_b32 s19, 1, 0
-; GFX7-NEXT: v_add_i32_e32 v5, vcc, v8, v5
-; GFX7-NEXT: s_lshl_b32 s19, s19, 31
-; GFX7-NEXT: v_add_i32_e32 v3, vcc, v3, v7
-; GFX7-NEXT: s_lshr_b32 s19, s19, 31
; GFX7-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
+; GFX7-NEXT: v_add_i32_e32 v5, vcc, v5, v7
+; GFX7-NEXT: s_and_b32 s19, s19, 1
+; GFX7-NEXT: v_add_i32_e32 v2, vcc, v3, v2
; GFX7-NEXT: s_mul_i32 s20, s1, s11
; GFX7-NEXT: s_add_i32 s18, s18, s19
+; GFX7-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc
; GFX7-NEXT: s_add_u32 s17, s17, s20
-; GFX7-NEXT: v_add_i32_e32 v5, vcc, v5, v7
; GFX7-NEXT: s_cselect_b32 s19, 1, 0
-; GFX7-NEXT: v_add_i32_e32 v2, vcc, v3, v2
-; GFX7-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc
-; GFX7-NEXT: s_lshl_b32 s19, s19, 31
; GFX7-NEXT: v_add_i32_e32 v3, vcc, v5, v3
; GFX7-NEXT: v_mov_b32_e32 v5, s3
-; GFX7-NEXT: s_lshr_b32 s19, s19, 31
+; GFX7-NEXT: s_and_b32 s19, s19, 1
+; GFX7-NEXT: v_mul_hi_u32 v7, v5, s8
; GFX7-NEXT: s_mul_i32 s21, s0, s12
; GFX7-NEXT: s_add_i32 s18, s18, s19
-; GFX7-NEXT: v_mul_hi_u32 v7, v5, s8
; GFX7-NEXT: s_add_u32 s17, s17, s21
; GFX7-NEXT: s_cselect_b32 s19, 1, 0
-; GFX7-NEXT: s_lshl_b32 s19, s19, 31
; GFX7-NEXT: v_add_i32_e32 v7, vcc, s17, v7
-; GFX7-NEXT: s_lshr_b32 s19, s19, 31
+; GFX7-NEXT: s_and_b32 s19, s19, 1
+; GFX7-NEXT: v_mul_hi_u32 v4, v4, s9
; GFX7-NEXT: s_add_i32 s18, s18, s19
; GFX7-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
; GFX7-NEXT: v_add_i32_e32 v11, vcc, s18, v11
; GFX7-NEXT: s_mul_i32 s17, s5, s8
; GFX7-NEXT: s_mul_i32 s18, s4, s9
; GFX7-NEXT: s_add_u32 s17, s17, s18
-; GFX7-NEXT: s_cselect_b32 s18, 1, 0
-; GFX7-NEXT: v_mul_hi_u32 v4, v4, s9
-; GFX7-NEXT: s_lshl_b32 s18, s18, 31
-; GFX7-NEXT: s_mul_i32 s19, s3, s10
-; GFX7-NEXT: s_lshr_b32 s18, s18, 31
-; GFX7-NEXT: s_add_u32 s17, s17, s19
-; GFX7-NEXT: s_cselect_b32 s19, 1, 0
; GFX7-NEXT: v_mul_hi_u32 v8, s1, v6
+; GFX7-NEXT: s_cselect_b32 s18, 1, 0
; GFX7-NEXT: v_add_i32_e32 v4, vcc, v7, v4
-; GFX7-NEXT: s_lshl_b32 s19, s19, 31
; GFX7-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
; GFX7-NEXT: v_mov_b32_e32 v9, s11
-; GFX7-NEXT: s_lshr_b32 s19, s19, 31
+; GFX7-NEXT: s_mul_i32 s19, s3, s10
+; GFX7-NEXT: s_and_b32 s18, s18, 1
; GFX7-NEXT: v_add_i32_e32 v7, vcc, v11, v7
+; GFX7-NEXT: s_add_u32 s17, s17, s19
; GFX7-NEXT: v_mul_hi_u32 v10, s0, v9
+; GFX7-NEXT: s_cselect_b32 s19, 1, 0
+; GFX7-NEXT: v_add_i32_e32 v4, vcc, v4, v8
+; GFX7-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
+; GFX7-NEXT: s_and_b32 s19, s19, 1
+; GFX7-NEXT: v_add_i32_e32 v7, vcc, v7, v8
; GFX7-NEXT: s_mul_i32 s20, s2, s11
; GFX7-NEXT: s_add_i32 s18, s18, s19
-; GFX7-NEXT: v_add_i32_e32 v4, vcc, v4, v8
+; GFX7-NEXT: v_add_i32_e32 v4, vcc, v4, v10
; GFX7-NEXT: s_add_u32 s17, s17, s20
; GFX7-NEXT: s_cselect_b32 s19, 1, 0
; GFX7-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
-; GFX7-NEXT: s_lshl_b32 s19, s19, 31
; GFX7-NEXT: v_add_i32_e32 v7, vcc, v7, v8
-; GFX7-NEXT: v_add_i32_e32 v4, vcc, v4, v10
-; GFX7-NEXT: s_lshr_b32 s19, s19, 31
-; GFX7-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
+; GFX7-NEXT: s_and_b32 s19, s19, 1
+; GFX7-NEXT: v_add_i32_e32 v3, vcc, v4, v3
; GFX7-NEXT: s_mul_i32 s21, s1, s12
; GFX7-NEXT: s_add_i32 s18, s18, s19
+; GFX7-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
; GFX7-NEXT: s_add_u32 s17, s17, s21
-; GFX7-NEXT: v_add_i32_e32 v7, vcc, v7, v8
; GFX7-NEXT: s_cselect_b32 s19, 1, 0
-; GFX7-NEXT: v_add_i32_e32 v3, vcc, v4, v3
-; GFX7-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
-; GFX7-NEXT: s_lshl_b32 s19, s19, 31
; GFX7-NEXT: v_add_i32_e32 v4, vcc, v7, v4
; GFX7-NEXT: v_mov_b32_e32 v7, s4
-; GFX7-NEXT: s_lshr_b32 s19, s19, 31
+; GFX7-NEXT: s_and_b32 s19, s19, 1
+; GFX7-NEXT: v_mul_hi_u32 v8, v7, s8
; GFX7-NEXT: s_mul_i32 s22, s0, s13
; GFX7-NEXT: s_add_i32 s18, s18, s19
-; GFX7-NEXT: v_mul_hi_u32 v8, v7, s8
; GFX7-NEXT: s_add_u32 s17, s17, s22
; GFX7-NEXT: s_cselect_b32 s19, 1, 0
-; GFX7-NEXT: s_lshl_b32 s19, s19, 31
; GFX7-NEXT: v_add_i32_e32 v8, vcc, s17, v8
-; GFX7-NEXT: s_lshr_b32 s19, s19, 31
+; GFX7-NEXT: s_and_b32 s19, s19, 1
+; GFX7-NEXT: v_mul_hi_u32 v10, v5, s9
; GFX7-NEXT: s_add_i32 s18, s18, s19
; GFX7-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc
; GFX7-NEXT: v_add_i32_e32 v14, vcc, s18, v14
@@ -955,61 +924,54 @@ define amdgpu_ps <8 x i32> @s_mul_i256(i256 inreg %num, i256 inreg %den) {
; GFX7-NEXT: s_mul_i32 s18, s5, s9
; GFX7-NEXT: s_add_u32 s17, s17, s18
; GFX7-NEXT: s_cselect_b32 s18, 1, 0
-; GFX7-NEXT: s_lshl_b32 s18, s18, 31
-; GFX7-NEXT: s_mul_i32 s19, s4, s10
-; GFX7-NEXT: s_lshr_b32 s18, s18, 31
-; GFX7-NEXT: v_mul_hi_u32 v10, v5, s9
-; GFX7-NEXT: s_add_u32 s17, s17, s19
-; GFX7-NEXT: s_cselect_b32 s19, 1, 0
-; GFX7-NEXT: s_lshl_b32 s19, s19, 31
-; GFX7-NEXT: s_lshr_b32 s19, s19, 31
; GFX7-NEXT: v_mul_hi_u32 v6, s2, v6
; GFX7-NEXT: v_add_i32_e32 v8, vcc, v8, v10
-; GFX7-NEXT: s_mul_i32 s20, s3, s11
-; GFX7-NEXT: s_add_i32 s18, s18, s19
+; GFX7-NEXT: s_mul_i32 s19, s4, s10
+; GFX7-NEXT: s_and_b32 s18, s18, 1
; GFX7-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
-; GFX7-NEXT: s_add_u32 s17, s17, s20
+; GFX7-NEXT: s_add_u32 s17, s17, s19
; GFX7-NEXT: s_cselect_b32 s19, 1, 0
; GFX7-NEXT: v_add_i32_e32 v10, vcc, v14, v10
; GFX7-NEXT: v_mul_hi_u32 v11, s1, v9
; GFX7-NEXT: v_add_i32_e32 v6, vcc, v8, v6
-; GFX7-NEXT: s_lshl_b32 s19, s19, 31
+; GFX7-NEXT: s_and_b32 s19, s19, 1
; GFX7-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
; GFX7-NEXT: v_mov_b32_e32 v12, s12
-; GFX7-NEXT: s_lshr_b32 s19, s19, 31
+; GFX7-NEXT: s_mul_i32 s20, s3, s11
+; GFX7-NEXT: s_add_i32 s18, s18, s19
; GFX7-NEXT: v_add_i32_e32 v8, vcc, v10, v8
+; GFX7-NEXT: s_add_u32 s17, s17, s20
; GFX7-NEXT: v_mul_hi_u32 v13, s0, v12
+; GFX7-NEXT: s_cselect_b32 s19, 1, 0
+; GFX7-NEXT: v_add_i32_e32 v6, vcc, v6, v11
+; GFX7-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
+; GFX7-NEXT: s_and_b32 s19, s19, 1
+; GFX7-NEXT: v_add_i32_e32 v8, vcc, v8, v10
; GFX7-NEXT: s_mul_i32 s21, s2, s12
; GFX7-NEXT: s_add_i32 s18, s18, s19
-; GFX7-NEXT: v_add_i32_e32 v6, vcc, v6, v11
+; GFX7-NEXT: v_add_i32_e32 v6, vcc, v6, v13
; GFX7-NEXT: s_add_u32 s17, s17, s21
; GFX7-NEXT: s_cselect_b32 s19, 1, 0
; GFX7-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
-; GFX7-NEXT: s_lshl_b32 s19, s19, 31
; GFX7-NEXT: v_add_i32_e32 v8, vcc, v8, v10
-; GFX7-NEXT: v_add_i32_e32 v6, vcc, v6, v13
-; GFX7-NEXT: s_lshr_b32 s19, s19, 31
-; GFX7-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
+; GFX7-NEXT: s_and_b32 s19, s19, 1
+; GFX7-NEXT: v_add_i32_e32 v4, vcc, v6, v4
; GFX7-NEXT: s_mul_i32 s22, s1, s13
; GFX7-NEXT: s_add_i32 s18, s18, s19
-; GFX7-NEXT: v_add_i32_e32 v8, vcc, v8, v10
+; GFX7-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
; GFX7-NEXT: s_add_u32 s17, s17, s22
; GFX7-NEXT: s_cselect_b32 s19, 1, 0
-; GFX7-NEXT: v_add_i32_e32 v4, vcc, v6, v4
-; GFX7-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
-; GFX7-NEXT: s_lshl_b32 s19, s19, 31
; GFX7-NEXT: v_add_i32_e32 v6, vcc, v8, v6
; GFX7-NEXT: v_mov_b32_e32 v8, s5
-; GFX7-NEXT: s_lshr_b32 s19, s19, 31
; GFX7-NEXT: v_mul_hi_u32 v10, v8, s8
+; GFX7-NEXT: s_and_b32 s19, s19, 1
; GFX7-NEXT: s_mul_i32 s23, s0, s14
; GFX7-NEXT: s_add_i32 s18, s18, s19
; GFX7-NEXT: s_add_u32 s17, s17, s23
; GFX7-NEXT: s_cselect_b32 s19, 1, 0
-; GFX7-NEXT: s_lshl_b32 s19, s19, 31
; GFX7-NEXT: v_mul_hi_u32 v11, v7, s9
; GFX7-NEXT: v_add_i32_e32 v10, vcc, s17, v10
-; GFX7-NEXT: s_lshr_b32 s19, s19, 31
+; GFX7-NEXT: s_and_b32 s19, s19, 1
; GFX7-NEXT: s_add_i32 s18, s18, s19
; GFX7-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc
; GFX7-NEXT: v_add_i32_e32 v17, vcc, s18, v17
@@ -1087,148 +1049,134 @@ define amdgpu_ps <8 x i32> @s_mul_i256(i256 inreg %num, i256 inreg %den) {
; GFX8-NEXT: s_mul_i32 s18, s0, s9
; GFX8-NEXT: s_add_u32 s17, s17, s18
; GFX8-NEXT: s_cselect_b32 s18, 1, 0
-; GFX8-NEXT: s_lshl_b32 s18, s18, 31
; GFX8-NEXT: v_add_u32_e32 v0, vcc, s17, v0
-; GFX8-NEXT: s_lshr_b32 s18, s18, 31
+; GFX8-NEXT: s_and_b32 s18, s18, 1
; GFX8-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
; GFX8-NEXT: v_add_u32_e32 v1, vcc, s18, v1
; GFX8-NEXT: s_mul_i32 s17, s2, s8
; GFX8-NEXT: s_mul_i32 s18, s1, s9
+; GFX8-NEXT: v_mov_b32_e32 v2, s1
; GFX8-NEXT: s_add_u32 s17, s17, s18
; GFX8-NEXT: s_cselect_b32 s18, 1, 0
-; GFX8-NEXT: v_mov_b32_e32 v2, s1
-; GFX8-NEXT: s_lshl_b32 s18, s18, 31
-; GFX8-NEXT: s_mul_i32 s19, s0, s10
-; GFX8-NEXT: s_lshr_b32 s18, s18, 31
; GFX8-NEXT: v_mul_hi_u32 v2, v2, s8
+; GFX8-NEXT: s_mul_i32 s19, s0, s10
+; GFX8-NEXT: s_and_b32 s18, s18, 1
; GFX8-NEXT: s_add_u32 s17, s17, s19
-; GFX8-NEXT: s_cselect_b32 s19, 1, 0
-; GFX8-NEXT: s_lshl_b32 s19, s19, 31
-; GFX8-NEXT: v_add_u32_e32 v2, vcc, s17, v2
; GFX8-NEXT: v_mov_b32_e32 v3, s9
-; GFX8-NEXT: s_lshr_b32 s19, s19, 31
+; GFX8-NEXT: s_cselect_b32 s19, 1, 0
; GFX8-NEXT: v_mul_hi_u32 v4, s0, v3
+; GFX8-NEXT: v_add_u32_e32 v2, vcc, s17, v2
+; GFX8-NEXT: s_and_b32 s19, s19, 1
; GFX8-NEXT: s_add_i32 s18, s18, s19
; GFX8-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
; GFX8-NEXT: v_add_u32_e32 v5, vcc, s18, v5
+; GFX8-NEXT: v_add_u32_e32 v2, vcc, v2, v4
; GFX8-NEXT: s_mul_i32 s17, s3, s8
; GFX8-NEXT: s_mul_i32 s18, s2, s9
+; GFX8-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
; GFX8-NEXT: s_add_u32 s17, s17, s18
; GFX8-NEXT: s_cselect_b32 s18, 1, 0
-; GFX8-NEXT: v_add_u32_e32 v2, vcc, v2, v4
-; GFX8-NEXT: s_lshl_b32 s18, s18, 31
-; GFX8-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
-; GFX8-NEXT: s_mul_i32 s19, s1, s10
-; GFX8-NEXT: s_lshr_b32 s18, s18, 31
-; GFX8-NEXT: s_add_u32 s17, s17, s19
; GFX8-NEXT: v_add_u32_e32 v4, vcc, v5, v4
-; GFX8-NEXT: s_cselect_b32 s19, 1, 0
; GFX8-NEXT: v_add_u32_e32 v1, vcc, v2, v1
+; GFX8-NEXT: s_mul_i32 s19, s1, s10
+; GFX8-NEXT: s_and_b32 s18, s18, 1
; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
-; GFX8-NEXT: s_lshl_b32 s19, s19, 31
+; GFX8-NEXT: s_add_u32 s17, s17, s19
+; GFX8-NEXT: s_cselect_b32 s19, 1, 0
; GFX8-NEXT: v_add_u32_e32 v2, vcc, v4, v2
; GFX8-NEXT: v_mov_b32_e32 v4, s2
-; GFX8-NEXT: s_lshr_b32 s19, s19, 31
; GFX8-NEXT: v_mul_hi_u32 v5, v4, s8
+; GFX8-NEXT: s_and_b32 s19, s19, 1
; GFX8-NEXT: s_mul_i32 s20, s0, s11
; GFX8-NEXT: s_add_i32 s18, s18, s19
; GFX8-NEXT: s_add_u32 s17, s17, s20
; GFX8-NEXT: s_cselect_b32 s19, 1, 0
-; GFX8-NEXT: s_lshl_b32 s19, s19, 31
+; GFX8-NEXT: v_mul_hi_u32 v3, s1, v3
; GFX8-NEXT: v_add_u32_e32 v5, vcc, s17, v5
-; GFX8-NEXT: s_lshr_b32 s19, s19, 31
+; GFX8-NEXT: s_and_b32 s19, s19, 1
+; GFX8-NEXT: v_mov_b32_e32 v6, s10
; GFX8-NEXT: s_add_i32 s18, s18, s19
; GFX8-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
; GFX8-NEXT: v_add_u32_e32 v8, vcc, s18, v8
+; GFX8-NEXT: v_mul_hi_u32 v7, s0, v6
; GFX8-NEXT: s_mul_i32 s17, s4, s8
; GFX8-NEXT: s_mul_i32 s18, s3, s9
-; GFX8-NEXT: v_mul_hi_u32 v3, s1, v3
+; GFX8-NEXT: v_add_u32_e32 v3, vcc, v5, v3
; GFX8-NEXT: s_add_u32 s17, s17, s18
+; GFX8-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
; GFX8-NEXT: s_cselect_b32 s18, 1, 0
-; GFX8-NEXT: s_lshl_b32 s18, s18, 31
-; GFX8-NEXT: v_mov_b32_e32 v6, s10
-; GFX8-NEXT: v_mul_hi_u32 v7, s0, v6
+; GFX8-NEXT: v_add_u32_e32 v5, vcc, v8, v5
; GFX8-NEXT: s_mul_i32 s19, s2, s10
-; GFX8-NEXT: s_lshr_b32 s18, s18, 31
-; GFX8-NEXT: v_add_u32_e32 v3, vcc, v5, v3
+; GFX8-NEXT: s_and_b32 s18, s18, 1
+; GFX8-NEXT: v_add_u32_e32 v3, vcc, v3, v7
; GFX8-NEXT: s_add_u32 s17, s17, s19
-; GFX8-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
; GFX8-NEXT: s_cselect_b32 s19, 1, 0
-; GFX8-NEXT: v_add_u32_e32 v5, vcc, v8, v5
-; GFX8-NEXT: s_lshl_b32 s19, s19, 31
-; GFX8-NEXT: v_add_u32_e32 v3, vcc, v3, v7
-; GFX8-NEXT: s_lshr_b32 s19, s19, 31
; GFX8-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
+; GFX8-NEXT: v_add_u32_e32 v5, vcc, v5, v7
+; GFX8-NEXT: s_and_b32 s19, s19, 1
+; GFX8-NEXT: v_add_u32_e32 v2, vcc, v3, v2
; GFX8-NEXT: s_mul_i32 s20, s1, s11
; GFX8-NEXT: s_add_i32 s18, s18, s19
+; GFX8-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc
; GFX8-NEXT: s_add_u32 s17, s17, s20
-; GFX8-NEXT: v_add_u32_e32 v5, vcc, v5, v7
; GFX8-NEXT: s_cselect_b32 s19, 1, 0
-; GFX8-NEXT: v_add_u32_e32 v2, vcc, v3, v2
-; GFX8-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc
-; GFX8-NEXT: s_lshl_b32 s19, s19, 31
; GFX8-NEXT: v_add_u32_e32 v3, vcc, v5, v3
; GFX8-NEXT: v_mov_b32_e32 v5, s3
-; GFX8-NEXT: s_lshr_b32 s19, s19, 31
+; GFX8-NEXT: s_and_b32 s19, s19, 1
+; GFX8-NEXT: v_mul_hi_u32 v7, v5, s8
; GFX8-NEXT: s_mul_i32 s21, s0, s12
; GFX8-NEXT: s_add_i32 s18, s18, s19
-; GFX8-NEXT: v_mul_hi_u32 v7, v5, s8
; GFX8-NEXT: s_add_u32 s17, s17, s21
; GFX8-NEXT: s_cselect_b32 s19, 1, 0
-; GFX8-NEXT: s_lshl_b32 s19, s19, 31
; GFX8-NEXT: v_add_u32_e32 v7, vcc, s17, v7
-; GFX8-NEXT: s_lshr_b32 s19, s19, 31
+; GFX8-NEXT: s_and_b32 s19, s19, 1
+; GFX8-NEXT: v_mul_hi_u32 v4, v4, s9
; GFX8-NEXT: s_add_i32 s18, s18, s19
; GFX8-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
; GFX8-NEXT: v_add_u32_e32 v11, vcc, s18, v11
; GFX8-NEXT: s_mul_i32 s17, s5, s8
; GFX8-NEXT: s_mul_i32 s18, s4, s9
; GFX8-NEXT: s_add_u32 s17, s17, s18
-; GFX8-NEXT: s_cselect_b32 s18, 1, 0
-; GFX8-NEXT: v_mul_hi_u32 v4, v4, s9
-; GFX8-NEXT: s_lshl_b32 s18, s18, 31
-; GFX8-NEXT: s_mul_i32 s19, s3, s10
-; GFX8-NEXT: s_lshr_b32 s18, s18, 31
-; GFX8-NEXT: s_add_u32 s17, s17, s19
-; GFX8-NEXT: s_cselect_b32 s19, 1, 0
; GFX8-NEXT: v_mul_hi_u32 v8, s1, v6
+; GFX8-NEXT: s_cselect_b32 s18, 1, 0
; GFX8-NEXT: v_add_u32_e32 v4, vcc, v7, v4
-; GFX8-NEXT: s_lshl_b32 s19, s19, 31
; GFX8-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
; GFX8-NEXT: v_mov_b32_e32 v9, s11
-; GFX8-NEXT: s_lshr_b32 s19, s19, 31
+; GFX8-NEXT: s_mul_i32 s19, s3, s10
+; GFX8-NEXT: s_and_b32 s18, s18, 1
; GFX8-NEXT: v_add_u32_e32 v7, vcc, v11, v7
+; GFX8-NEXT: s_add_u32 s17, s17, s19
; GFX8-NEXT: v_mul_hi_u32 v10, s0, v9
+; GFX8-NEXT: s_cselect_b32 s19, 1, 0
+; GFX8-NEXT: v_add_u32_e32 v4, vcc, v4, v8
+; GFX8-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
+; GFX8-NEXT: s_and_b32 s19, s19, 1
+; GFX8-NEXT: v_add_u32_e32 v7, vcc, v7, v8
; GFX8-NEXT: s_mul_i32 s20, s2, s11
; GFX8-NEXT: s_add_i32 s18, s18, s19
-; GFX8-NEXT: v_add_u32_e32 v4, vcc, v4, v8
+; GFX8-NEXT: v_add_u32_e32 v4, vcc, v4, v10
; GFX8-NEXT: s_add_u32 s17, s17, s20
; GFX8-NEXT: s_cselect_b32 s19, 1, 0
; GFX8-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
-; GFX8-NEXT: s_lshl_b32 s19, s19, 31
; GFX8-NEXT: v_add_u32_e32 v7, vcc, v7, v8
-; GFX8-NEXT: v_add_u32_e32 v4, vcc, v4, v10
-; GFX8-NEXT: s_lshr_b32 s19, s19, 31
-; GFX8-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
+; GFX8-NEXT: s_and_b32 s19, s19, 1
+; GFX8-NEXT: v_add_u32_e32 v3, vcc, v4, v3
; GFX8-NEXT: s_mul_i32 s21, s1, s12
; GFX8-NEXT: s_add_i32 s18, s18, s19
+; GFX8-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
; GFX8-NEXT: s_add_u32 s17, s17, s21
-; GFX8-NEXT: v_add_u32_e32 v7, vcc, v7, v8
; GFX8-NEXT: s_cselect_b32 s19, 1, 0
-; GFX8-NEXT: v_add_u32_e32 v3, vcc, v4, v3
-; GFX8-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
-; GFX8-NEXT: s_lshl_b32 s19, s19, 31
; GFX8-NEXT: v_add_u32_e32 v4, vcc, v7, v4
; GFX8-NEXT: v_mov_b32_e32 v7, s4
-; GFX8-NEXT: s_lshr_b32 s19, s19, 31
+; GFX8-NEXT: s_and_b32 s19, s19, 1
+; GFX8-NEXT: v_mul_hi_u32 v8, v7, s8
; GFX8-NEXT: s_mul_i32 s22, s0, s13
; GFX8-NEXT: s_add_i32 s18, s18, s19
-; GFX8-NEXT: v_mul_hi_u32 v8, v7, s8
; GFX8-NEXT: s_add_u32 s17, s17, s22
; GFX8-NEXT: s_cselect_b32 s19, 1, 0
-; GFX8-NEXT: s_lshl_b32 s19, s19, 31
; GFX8-NEXT: v_add_u32_e32 v8, vcc, s17, v8
-; GFX8-NEXT: s_lshr_b32 s19, s19, 31
+; GFX8-NEXT: s_and_b32 s19, s19, 1
+; GFX8-NEXT: v_mul_hi_u32 v10, v5, s9
; GFX8-NEXT: s_add_i32 s18, s18, s19
; GFX8-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc
; GFX8-NEXT: v_add_u32_e32 v14, vcc, s18, v14
@@ -1236,61 +1184,54 @@ define amdgpu_ps <8 x i32> @s_mul_i256(i256 inreg %num, i256 inreg %den) {
; GFX8-NEXT: s_mul_i32 s18, s5, s9
; GFX8-NEXT: s_add_u32 s17, s17, s18
; GFX8-NEXT: s_cselect_b32 s18, 1, 0
-; GFX8-NEXT: s_lshl_b32 s18, s18, 31
-; GFX8-NEXT: s_mul_i32 s19, s4, s10
-; GFX8-NEXT: s_lshr_b32 s18, s18, 31
-; GFX8-NEXT: v_mul_hi_u32 v10, v5, s9
-; GFX8-NEXT: s_add_u32 s17, s17, s19
-; GFX8-NEXT: s_cselect_b32 s19, 1, 0
-; GFX8-NEXT: s_lshl_b32 s19, s19, 31
-; GFX8-NEXT: s_lshr_b32 s19, s19, 31
; GFX8-NEXT: v_mul_hi_u32 v6, s2, v6
; GFX8-NEXT: v_add_u32_e32 v8, vcc, v8, v10
-; GFX8-NEXT: s_mul_i32 s20, s3, s11
-; GFX8-NEXT: s_add_i32 s18, s18, s19
+; GFX8-NEXT: s_mul_i32 s19, s4, s10
+; GFX8-NEXT: s_and_b32 s18, s18, 1
; GFX8-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
-; GFX8-NEXT: s_add_u32 s17, s17, s20
+; GFX8-NEXT: s_add_u32 s17, s17, s19
; GFX8-NEXT: s_cselect_b32 s19, 1, 0
; GFX8-NEXT: v_add_u32_e32 v10, vcc, v14, v10
; GFX8-NEXT: v_mul_hi_u32 v11, s1, v9
; GFX8-NEXT: v_add_u32_e32 v6, vcc, v8, v6
-; GFX8-NEXT: s_lshl_b32 s19, s19, 31
+; GFX8-NEXT: s_and_b32 s19, s19, 1
; GFX8-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
; GFX8-NEXT: v_mov_b32_e32 v12, s12
-; GFX8-NEXT: s_lshr_b32 s19, s19, 31
+; GFX8-NEXT: s_mul_i32 s20, s3, s11
+; GFX8-NEXT: s_add_i32 s18, s18, s19
; GFX8-NEXT: v_add_u32_e32 v8, vcc, v10, v8
+; GFX8-NEXT: s_add_u32 s17, s17, s20
; GFX8-NEXT: v_mul_hi_u32 v13, s0, v12
+; GFX8-NEXT: s_cselect_b32 s19, 1, 0
+; GFX8-NEXT: v_add_u32_e32 v6, vcc, v6, v11
+; GFX8-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
+; GFX8-NEXT: s_and_b32 s19, s19, 1
+; GFX8-NEXT: v_add_u32_e32 v8, vcc, v8, v10
; GFX8-NEXT: s_mul_i32 s21, s2, s12
; GFX8-NEXT: s_add_i32 s18, s18, s19
-; GFX8-NEXT: v_add_u32_e32 v6, vcc, v6, v11
+; GFX8-NEXT: v_add_u32_e32 v6, vcc, v6, v13
; GFX8-NEXT: s_add_u32 s17, s17, s21
; GFX8-NEXT: s_cselect_b32 s19, 1, 0
; GFX8-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
-; GFX8-NEXT: s_lshl_b32 s19, s19, 31
; GFX8-NEXT: v_add_u32_e32 v8, vcc, v8, v10
-; GFX8-NEXT: v_add_u32_e32 v6, vcc, v6, v13
-; GFX8-NEXT: s_lshr_b32 s19, s19, 31
-; GFX8-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
+; GFX8-NEXT: s_and_b32 s19, s19, 1
+; GFX8-NEXT: v_add_u32_e32 v4, vcc, v6, v4
; GFX8-NEXT: s_mul_i32 s22, s1, s13
; GFX8-NEXT: s_add_i32 s18, s18, s19
-; GFX8-NEXT: v_add_u32_e32 v8, vcc, v8, v10
+; GFX8-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
; GFX8-NEXT: s_add_u32 s17, s17, s22
; GFX8-NEXT: s_cselect_b32 s19, 1, 0
-; GFX8-NEXT: v_add_u32_e32 v4, vcc, v6, v4
-; GFX8-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
-; GFX8-NEXT: s_lshl_b32 s19, s19, 31
; GFX8-NEXT: v_add_u32_e32 v6, vcc, v8, v6
; GFX8-NEXT: v_mov_b32_e32 v8, s5
-; GFX8-NEXT: s_lshr_b32 s19, s19, 31
; GFX8-NEXT: v_mul_hi_u32 v10, v8, s8
+; GFX8-NEXT: s_and_b32 s19, s19, 1
; GFX8-NEXT: s_mul_i32 s23, s0, s14
; GFX8-NEXT: s_add_i32 s18, s18, s19
; GFX8-NEXT: s_add_u32 s17, s17, s23
; GFX8-NEXT: s_cselect_b32 s19, 1, 0
-; GFX8-NEXT: s_lshl_b32 s19, s19, 31
; GFX8-NEXT: v_mul_hi_u32 v11, v7, s9
; GFX8-NEXT: v_add_u32_e32 v10, vcc, s17, v10
-; GFX8-NEXT: s_lshr_b32 s19, s19, 31
+; GFX8-NEXT: s_and_b32 s19, s19, 1
; GFX8-NEXT: s_add_i32 s18, s18, s19
; GFX8-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc
; GFX8-NEXT: v_add_u32_e32 v17, vcc, s18, v17
@@ -1362,283 +1303,235 @@ define amdgpu_ps <8 x i32> @s_mul_i256(i256 inreg %num, i256 inreg %den) {
;
; GFX9-LABEL: s_mul_i256:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s16, s0
; GFX9-NEXT: s_mul_i32 s17, s1, s8
-; GFX9-NEXT: s_mul_i32 s18, s16, s9
+; GFX9-NEXT: s_mul_i32 s18, s0, s9
; GFX9-NEXT: s_add_u32 s17, s17, s18
; GFX9-NEXT: s_cselect_b32 s18, 1, 0
-; GFX9-NEXT: s_lshl_b32 s18, s18, 31
-; GFX9-NEXT: s_mul_hi_u32 s19, s16, s8
-; GFX9-NEXT: s_lshr_b32 s18, s18, 31
+; GFX9-NEXT: s_mul_hi_u32 s19, s0, s8
+; GFX9-NEXT: s_and_b32 s18, s18, 1
; GFX9-NEXT: s_add_u32 s17, s17, s19
; GFX9-NEXT: s_cselect_b32 s19, 1, 0
-; GFX9-NEXT: s_lshl_b32 s19, s19, 31
-; GFX9-NEXT: s_lshr_b32 s19, s19, 31
+; GFX9-NEXT: s_and_b32 s19, s19, 1
; GFX9-NEXT: s_add_i32 s18, s18, s19
; GFX9-NEXT: s_mul_i32 s19, s2, s8
; GFX9-NEXT: s_mul_i32 s20, s1, s9
; GFX9-NEXT: s_add_u32 s19, s19, s20
; GFX9-NEXT: s_cselect_b32 s20, 1, 0
-; GFX9-NEXT: s_lshl_b32 s20, s20, 31
-; GFX9-NEXT: s_mul_i32 s21, s16, s10
-; GFX9-NEXT: s_lshr_b32 s20, s20, 31
+; GFX9-NEXT: s_mul_i32 s21, s0, s10
+; GFX9-NEXT: s_and_b32 s20, s20, 1
; GFX9-NEXT: s_add_u32 s19, s19, s21
; GFX9-NEXT: s_cselect_b32 s21, 1, 0
-; GFX9-NEXT: s_lshl_b32 s21, s21, 31
-; GFX9-NEXT: s_lshr_b32 s21, s21, 31
+; GFX9-NEXT: s_and_b32 s21, s21, 1
; GFX9-NEXT: s_mul_hi_u32 s22, s1, s8
; GFX9-NEXT: s_add_i32 s20, s20, s21
; GFX9-NEXT: s_add_u32 s19, s19, s22
; GFX9-NEXT: s_cselect_b32 s21, 1, 0
-; GFX9-NEXT: s_lshl_b32 s21, s21, 31
-; GFX9-NEXT: s_lshr_b32 s21, s21, 31
-; GFX9-NEXT: s_mul_hi_u32 s23, s16, s9
+; GFX9-NEXT: s_and_b32 s21, s21, 1
+; GFX9-NEXT: s_mul_hi_u32 s23, s0, s9
; GFX9-NEXT: s_add_i32 s20, s20, s21
; GFX9-NEXT: s_add_u32 s19, s19, s23
; GFX9-NEXT: s_cselect_b32 s21, 1, 0
-; GFX9-NEXT: s_lshl_b32 s21, s21, 31
-; GFX9-NEXT: s_lshr_b32 s21, s21, 31
+; GFX9-NEXT: s_and_b32 s21, s21, 1
; GFX9-NEXT: s_add_i32 s20, s20, s21
; GFX9-NEXT: s_add_u32 s18, s19, s18
; GFX9-NEXT: s_cselect_b32 s19, 1, 0
-; GFX9-NEXT: s_lshl_b32 s19, s19, 31
-; GFX9-NEXT: s_lshr_b32 s19, s19, 31
+; GFX9-NEXT: s_and_b32 s19, s19, 1
; GFX9-NEXT: s_add_i32 s20, s20, s19
; GFX9-NEXT: s_mul_i32 s19, s3, s8
; GFX9-NEXT: s_mul_i32 s21, s2, s9
; GFX9-NEXT: s_add_u32 s19, s19, s21
; GFX9-NEXT: s_cselect_b32 s21, 1, 0
-; GFX9-NEXT: s_lshl_b32 s21, s21, 31
; GFX9-NEXT: s_mul_i32 s22, s1, s10
-; GFX9-NEXT: s_lshr_b32 s21, s21, 31
+; GFX9-NEXT: s_and_b32 s21, s21, 1
; GFX9-NEXT: s_add_u32 s19, s19, s22
; GFX9-NEXT: s_cselect_b32 s22, 1, 0
-; GFX9-NEXT: s_lshl_b32 s22, s22, 31
-; GFX9-NEXT: s_lshr_b32 s22, s22, 31
-; GFX9-NEXT: s_mul_i32 s23, s16, s11
+; GFX9-NEXT: s_and_b32 s22, s22, 1
+; GFX9-NEXT: s_mul_i32 s23, s0, s11
; GFX9-NEXT: s_add_i32 s21, s21, s22
; GFX9-NEXT: s_add_u32 s19, s19, s23
; GFX9-NEXT: s_cselect_b32 s22, 1, 0
-; GFX9-NEXT: s_lshl_b32 s22, s22, 31
-; GFX9-NEXT: s_lshr_b32 s22, s22, 31
+; GFX9-NEXT: s_and_b32 s22, s22, 1
; GFX9-NEXT: s_mul_hi_u32 s24, s2, s8
; GFX9-NEXT: s_add_i32 s21, s21, s22
; GFX9-NEXT: s_add_u32 s19, s19, s24
; GFX9-NEXT: s_cselect_b32 s22, 1, 0
-; GFX9-NEXT: s_lshl_b32 s22, s22, 31
-; GFX9-NEXT: s_lshr_b32 s22, s22, 31
+; GFX9-NEXT: s_and_b32 s22, s22, 1
; GFX9-NEXT: s_mul_hi_u32 s25, s1, s9
; GFX9-NEXT: s_add_i32 s21, s21, s22
; GFX9-NEXT: s_add_u32 s19, s19, s25
; GFX9-NEXT: s_cselect_b32 s22, 1, 0
-; GFX9-NEXT: s_lshl_b32 s22, s22, 31
-; GFX9-NEXT: s_lshr_b32 s22, s22, 31
-; GFX9-NEXT: s_mul_hi_u32 s26, s16, s10
+; GFX9-NEXT: s_and_b32 s22, s22, 1
+; GFX9-NEXT: s_mul_hi_u32 s26, s0, s10
; GFX9-NEXT: s_add_i32 s21, s21, s22
; GFX9-NEXT: s_add_u32 s19, s19, s26
; GFX9-NEXT: s_cselect_b32 s22, 1, 0
-; GFX9-NEXT: s_lshl_b32 s22, s22, 31
-; GFX9-NEXT: s_lshr_b32 s22, s22, 31
+; GFX9-NEXT: s_and_b32 s22, s22, 1
; GFX9-NEXT: s_add_i32 s21, s21, s22
; GFX9-NEXT: s_add_u32 s19, s19, s20
; GFX9-NEXT: s_cselect_b32 s20, 1, 0
-; GFX9-NEXT: s_lshl_b32 s20, s20, 31
-; GFX9-NEXT: s_lshr_b32 s20, s20, 31
+; GFX9-NEXT: s_and_b32 s20, s20, 1
; GFX9-NEXT: s_add_i32 s21, s21, s20
; GFX9-NEXT: s_mul_i32 s20, s4, s8
; GFX9-NEXT: s_mul_i32 s22, s3, s9
; GFX9-NEXT: s_add_u32 s20, s20, s22
; GFX9-NEXT: s_cselect_b32 s22, 1, 0
-; GFX9-NEXT: s_lshl_b32 s22, s22, 31
; GFX9-NEXT: s_mul_i32 s23, s2, s10
-; GFX9-NEXT: s_lshr_b32 s22, s22, 31
+; GFX9-NEXT: s_and_b32 s22, s22, 1
; GFX9-NEXT: s_add_u32 s20, s20, s23
; GFX9-NEXT: s_cselect_b32 s23, 1, 0
-; GFX9-NEXT: s_lshl_b32 s23, s23, 31
-; GFX9-NEXT: s_lshr_b32 s23, s23, 31
+; GFX9-NEXT: s_and_b32 s23, s23, 1
; GFX9-NEXT: s_mul_i32 s24, s1, s11
; GFX9-NEXT: s_add_i32 s22, s22, s23
; GFX9-NEXT: s_add_u32 s20, s20, s24
; GFX9-NEXT: s_cselect_b32 s23, 1, 0
-; GFX9-NEXT: s_lshl_b32 s23, s23, 31
-; GFX9-NEXT: s_lshr_b32 s23, s23, 31
-; GFX9-NEXT: s_mul_i32 s25, s16, s12
+; GFX9-NEXT: s_and_b32 s23, s23, 1
+; GFX9-NEXT: s_mul_i32 s25, s0, s12
; GFX9-NEXT: s_add_i32 s22, s22, s23
; GFX9-NEXT: s_add_u32 s20, s20, s25
; GFX9-NEXT: s_cselect_b32 s23, 1, 0
-; GFX9-NEXT: s_lshl_b32 s23, s23, 31
-; GFX9-NEXT: s_lshr_b32 s23, s23, 31
+; GFX9-NEXT: s_and_b32 s23, s23, 1
; GFX9-NEXT: s_mul_hi_u32 s26, s3, s8
; GFX9-NEXT: s_add_i32 s22, s22, s23
; GFX9-NEXT: s_add_u32 s20, s20, s26
; GFX9-NEXT: s_cselect_b32 s23, 1, 0
-; GFX9-NEXT: s_lshl_b32 s23, s23, 31
-; GFX9-NEXT: s_lshr_b32 s23, s23, 31
+; GFX9-NEXT: s_and_b32 s23, s23, 1
; GFX9-NEXT: s_mul_hi_u32 s27, s2, s9
; GFX9-NEXT: s_add_i32 s22, s22, s23
; GFX9-NEXT: s_add_u32 s20, s20, s27
; GFX9-NEXT: s_cselect_b32 s23, 1, 0
-; GFX9-NEXT: s_lshl_b32 s23, s23, 31
-; GFX9-NEXT: s_lshr_b32 s23, s23, 31
+; GFX9-NEXT: s_and_b32 s23, s23, 1
; GFX9-NEXT: s_mul_hi_u32 s28, s1, s10
; GFX9-NEXT: s_add_i32 s22, s22, s23
; GFX9-NEXT: s_add_u32 s20, s20, s28
; GFX9-NEXT: s_cselect_b32 s23, 1, 0
-; GFX9-NEXT: s_lshl_b32 s23, s23, 31
-; GFX9-NEXT: s_lshr_b32 s23, s23, 31
-; GFX9-NEXT: s_mul_hi_u32 s29, s16, s11
+; GFX9-NEXT: s_and_b32 s23, s23, 1
+; GFX9-NEXT: s_mul_hi_u32 s29, s0, s11
; GFX9-NEXT: s_add_i32 s22, s22, s23
; GFX9-NEXT: s_add_u32 s20, s20, s29
; GFX9-NEXT: s_cselect_b32 s23, 1, 0
-; GFX9-NEXT: s_lshl_b32 s23, s23, 31
-; GFX9-NEXT: s_lshr_b32 s23, s23, 31
+; GFX9-NEXT: s_and_b32 s23, s23, 1
; GFX9-NEXT: s_add_i32 s22, s22, s23
; GFX9-NEXT: s_add_u32 s20, s20, s21
; GFX9-NEXT: s_cselect_b32 s21, 1, 0
-; GFX9-NEXT: s_lshl_b32 s21, s21, 31
-; GFX9-NEXT: s_lshr_b32 s21, s21, 31
+; GFX9-NEXT: s_and_b32 s21, s21, 1
; GFX9-NEXT: s_add_i32 s22, s22, s21
; GFX9-NEXT: s_mul_i32 s21, s5, s8
; GFX9-NEXT: s_mul_i32 s23, s4, s9
; GFX9-NEXT: s_add_u32 s21, s21, s23
; GFX9-NEXT: s_cselect_b32 s23, 1, 0
-; GFX9-NEXT: s_lshl_b32 s23, s23, 31
; GFX9-NEXT: s_mul_i32 s24, s3, s10
-; GFX9-NEXT: s_lshr_b32 s23, s23, 31
+; GFX9-NEXT: s_and_b32 s23, s23, 1
; GFX9-NEXT: s_add_u32 s21, s21, s24
; GFX9-NEXT: s_cselect_b32 s24, 1, 0
-; GFX9-NEXT: s_lshl_b32 s24, s24, 31
-; GFX9-NEXT: s_lshr_b32 s24, s24, 31
+; GFX9-NEXT: s_and_b32 s24, s24, 1
; GFX9-NEXT: s_mul_i32 s25, s2, s11
; GFX9-NEXT: s_add_i32 s23, s23, s24
; GFX9-NEXT: s_add_u32 s21, s21, s25
; GFX9-NEXT: s_cselect_b32 s24, 1, 0
-; GFX9-NEXT: s_lshl_b32 s24, s24, 31
-; GFX9-NEXT: s_lshr_b32 s24, s24, 31
+; GFX9-NEXT: s_and_b32 s24, s24, 1
; GFX9-NEXT: s_mul_i32 s26, s1, s12
; GFX9-NEXT: s_add_i32 s23, s23, s24
; GFX9-NEXT: s_add_u32 s21, s21, s26
; GFX9-NEXT: s_cselect_b32 s24, 1, 0
-; GFX9-NEXT: s_lshl_b32 s24, s24, 31
-; GFX9-NEXT: s_lshr_b32 s24, s24, 31
-; GFX9-NEXT: s_mul_i32 s27, s16, s13
+; GFX9-NEXT: s_and_b32 s24, s24, 1
+; GFX9-NEXT: s_mul_i32 s27, s0, s13
; GFX9-NEXT: s_add_i32 s23, s23, s24
; GFX9-NEXT: s_add_u32 s21, s21, s27
; GFX9-NEXT: s_cselect_b32 s24, 1, 0
-; GFX9-NEXT: s_lshl_b32 s24, s24, 31
-; GFX9-NEXT: s_lshr_b32 s24, s24, 31
+; GFX9-NEXT: s_and_b32 s24, s24, 1
; GFX9-NEXT: s_mul_hi_u32 s28, s4, s8
; GFX9-NEXT: s_add_i32 s23, s23, s24
; GFX9-NEXT: s_add_u32 s21, s21, s28
; GFX9-NEXT: s_cselect_b32 s24, 1, 0
-; GFX9-NEXT: s_lshl_b32 s24, s24, 31
-; GFX9-NEXT: s_lshr_b32 s24, s24, 31
+; GFX9-NEXT: s_and_b32 s24, s24, 1
; GFX9-NEXT: s_mul_hi_u32 s29, s3, s9
; GFX9-NEXT: s_add_i32 s23, s23, s24
; GFX9-NEXT: s_add_u32 s21, s21, s29
; GFX9-NEXT: s_cselect_b32 s24, 1, 0
-; GFX9-NEXT: s_lshl_b32 s24, s24, 31
-; GFX9-NEXT: s_lshr_b32 s24, s24, 31
+; GFX9-NEXT: s_and_b32 s24, s24, 1
; GFX9-NEXT: s_mul_hi_u32 s30, s2, s10
; GFX9-NEXT: s_add_i32 s23, s23, s24
; GFX9-NEXT: s_add_u32 s21, s21, s30
; GFX9-NEXT: s_cselect_b32 s24, 1, 0
-; GFX9-NEXT: s_lshl_b32 s24, s24, 31
-; GFX9-NEXT: s_lshr_b32 s24, s24, 31
+; GFX9-NEXT: s_and_b32 s24, s24, 1
; GFX9-NEXT: s_mul_hi_u32 s31, s1, s11
; GFX9-NEXT: s_add_i32 s23, s23, s24
; GFX9-NEXT: s_add_u32 s21, s21, s31
; GFX9-NEXT: s_cselect_b32 s24, 1, 0
-; GFX9-NEXT: s_lshl_b32 s24, s24, 31
-; GFX9-NEXT: s_lshr_b32 s24, s24, 31
-; GFX9-NEXT: s_mul_hi_u32 s32, s16, s12
+; GFX9-NEXT: s_and_b32 s24, s24, 1
+; GFX9-NEXT: s_mul_hi_u32 s32, s0, s12
; GFX9-NEXT: s_add_i32 s23, s23, s24
; GFX9-NEXT: s_add_u32 s21, s21, s32
; GFX9-NEXT: s_cselect_b32 s24, 1, 0
-; GFX9-NEXT: s_lshl_b32 s24, s24, 31
-; GFX9-NEXT: s_lshr_b32 s24, s24, 31
+; GFX9-NEXT: s_and_b32 s24, s24, 1
; GFX9-NEXT: s_add_i32 s23, s23, s24
; GFX9-NEXT: s_add_u32 s21, s21, s22
; GFX9-NEXT: s_cselect_b32 s22, 1, 0
-; GFX9-NEXT: s_lshl_b32 s22, s22, 31
-; GFX9-NEXT: s_lshr_b32 s22, s22, 31
+; GFX9-NEXT: s_and_b32 s22, s22, 1
; GFX9-NEXT: s_add_i32 s23, s23, s22
; GFX9-NEXT: s_mul_i32 s22, s6, s8
; GFX9-NEXT: s_mul_i32 s24, s5, s9
; GFX9-NEXT: s_add_u32 s22, s22, s24
; GFX9-NEXT: s_cselect_b32 s24, 1, 0
-; GFX9-NEXT: s_lshl_b32 s24, s24, 31
; GFX9-NEXT: s_mul_i32 s25, s4, s10
-; GFX9-NEXT: s_lshr_b32 s24, s24, 31
+; GFX9-NEXT: s_and_b32 s24, s24, 1
; GFX9-NEXT: s_add_u32 s22, s22, s25
; GFX9-NEXT: s_cselect_b32 s25, 1, 0
-; GFX9-NEXT: s_lshl_b32 s25, s25, 31
-; GFX9-NEXT: s_lshr_b32 s25, s25, 31
+; GFX9-NEXT: s_and_b32 s25, s25, 1
; GFX9-NEXT: s_mul_i32 s26, s3, s11
; GFX9-NEXT: s_add_i32 s24, s24, s25
; GFX9-NEXT: s_add_u32 s22, s22, s26
; GFX9-NEXT: s_cselect_b32 s25, 1, 0
-; GFX9-NEXT: s_lshl_b32 s25, s25, 31
-; GFX9-NEXT: s_lshr_b32 s25, s25, 31
+; GFX9-NEXT: s_and_b32 s25, s25, 1
; GFX9-NEXT: s_mul_i32 s27, s2, s12
; GFX9-NEXT: s_add_i32 s24, s24, s25
; GFX9-NEXT: s_add_u32 s22, s22, s27
; GFX9-NEXT: s_cselect_b32 s25, 1, 0
-; GFX9-NEXT: s_lshl_b32 s25, s25, 31
-; GFX9-NEXT: s_lshr_b32 s25, s25, 31
+; GFX9-NEXT: s_and_b32 s25, s25, 1
; GFX9-NEXT: s_mul_i32 s28, s1, s13
; GFX9-NEXT: s_add_i32 s24, s24, s25
; GFX9-NEXT: s_add_u32 s22, s22, s28
; GFX9-NEXT: s_cselect_b32 s25, 1, 0
-; GFX9-NEXT: s_lshl_b32 s25, s25, 31
-; GFX9-NEXT: s_lshr_b32 s25, s25, 31
-; GFX9-NEXT: s_mul_i32 s29, s16, s14
+; GFX9-NEXT: s_and_b32 s25, s25, 1
+; GFX9-NEXT: s_mul_i32 s29, s0, s14
; GFX9-NEXT: s_add_i32 s24, s24, s25
; GFX9-NEXT: s_add_u32 s22, s22, s29
; GFX9-NEXT: s_cselect_b32 s25, 1, 0
-; GFX9-NEXT: s_lshl_b32 s25, s25, 31
-; GFX9-NEXT: s_lshr_b32 s25, s25, 31
+; GFX9-NEXT: s_and_b32 s25, s25, 1
; GFX9-NEXT: s_mul_hi_u32 s30, s5, s8
; GFX9-NEXT: s_add_i32 s24, s24, s25
; GFX9-NEXT: s_add_u32 s22, s22, s30
; GFX9-NEXT: s_cselect_b32 s25, 1, 0
-; GFX9-NEXT: s_lshl_b32 s25, s25, 31
-; GFX9-NEXT: s_lshr_b32 s25, s25, 31
+; GFX9-NEXT: s_and_b32 s25, s25, 1
; GFX9-NEXT: s_mul_hi_u32 s31, s4, s9
; GFX9-NEXT: s_add_i32 s24, s24, s25
; GFX9-NEXT: s_add_u32 s22, s22, s31
; GFX9-NEXT: s_cselect_b32 s25, 1, 0
-; GFX9-NEXT: s_lshl_b32 s25, s25, 31
-; GFX9-NEXT: s_lshr_b32 s25, s25, 31
+; GFX9-NEXT: s_and_b32 s25, s25, 1
; GFX9-NEXT: s_mul_hi_u32 s32, s3, s10
; GFX9-NEXT: s_add_i32 s24, s24, s25
; GFX9-NEXT: s_add_u32 s22, s22, s32
; GFX9-NEXT: s_cselect_b32 s25, 1, 0
-; GFX9-NEXT: s_lshl_b32 s25, s25, 31
-; GFX9-NEXT: s_lshr_b32 s25, s25, 31
+; GFX9-NEXT: s_and_b32 s25, s25, 1
; GFX9-NEXT: s_mul_hi_u32 s33, s2, s11
; GFX9-NEXT: s_add_i32 s24, s24, s25
; GFX9-NEXT: s_add_u32 s22, s22, s33
; GFX9-NEXT: s_cselect_b32 s25, 1, 0
-; GFX9-NEXT: s_lshl_b32 s25, s25, 31
-; GFX9-NEXT: s_lshr_b32 s25, s25, 31
+; GFX9-NEXT: s_and_b32 s25, s25, 1
; GFX9-NEXT: s_mul_hi_u32 s34, s1, s12
; GFX9-NEXT: s_add_i32 s24, s24, s25
; GFX9-NEXT: s_add_u32 s22, s22, s34
; GFX9-NEXT: s_cselect_b32 s25, 1, 0
-; GFX9-NEXT: s_lshl_b32 s25, s25, 31
-; GFX9-NEXT: s_lshr_b32 s25, s25, 31
-; GFX9-NEXT: s_mul_hi_u32 s35, s16, s13
+; GFX9-NEXT: s_and_b32 s25, s25, 1
+; GFX9-NEXT: s_mul_hi_u32 s35, s0, s13
; GFX9-NEXT: s_add_i32 s24, s24, s25
; GFX9-NEXT: s_add_u32 s22, s22, s35
; GFX9-NEXT: s_cselect_b32 s25, 1, 0
-; GFX9-NEXT: s_lshl_b32 s25, s25, 31
-; GFX9-NEXT: s_lshr_b32 s25, s25, 31
+; GFX9-NEXT: s_and_b32 s25, s25, 1
; GFX9-NEXT: s_add_i32 s24, s24, s25
; GFX9-NEXT: s_add_u32 s22, s22, s23
; GFX9-NEXT: s_cselect_b32 s23, 1, 0
-; GFX9-NEXT: s_lshl_b32 s23, s23, 31
-; GFX9-NEXT: s_lshr_b32 s23, s23, 31
+; GFX9-NEXT: s_and_b32 s23, s23, 1
; GFX9-NEXT: s_add_i32 s24, s24, s23
; GFX9-NEXT: s_mul_i32 s23, s6, s9
; GFX9-NEXT: s_mul_i32 s7, s7, s8
@@ -1652,7 +1545,7 @@ define amdgpu_ps <8 x i32> @s_mul_i256(i256 inreg %num, i256 inreg %den) {
; GFX9-NEXT: s_add_i32 s7, s7, s27
; GFX9-NEXT: s_mul_i32 s29, s1, s14
; GFX9-NEXT: s_add_i32 s7, s7, s28
-; GFX9-NEXT: s_mul_i32 s15, s16, s15
+; GFX9-NEXT: s_mul_i32 s15, s0, s15
; GFX9-NEXT: s_add_i32 s7, s7, s29
; GFX9-NEXT: s_mul_hi_u32 s6, s6, s8
; GFX9-NEXT: s_add_i32 s7, s7, s15
@@ -1666,11 +1559,12 @@ define amdgpu_ps <8 x i32> @s_mul_i256(i256 inreg %num, i256 inreg %den) {
; GFX9-NEXT: s_mul_hi_u32 s2, s2, s12
; GFX9-NEXT: s_add_i32 s2, s3, s2
; GFX9-NEXT: s_mul_hi_u32 s1, s1, s13
-; GFX9-NEXT: s_mul_i32 s0, s0, s8
+; GFX9-NEXT: s_mul_i32 s16, s0, s8
; GFX9-NEXT: s_add_i32 s1, s2, s1
-; GFX9-NEXT: s_mul_hi_u32 s8, s16, s14
-; GFX9-NEXT: s_add_i32 s1, s1, s8
-; GFX9-NEXT: s_add_i32 s7, s1, s24
+; GFX9-NEXT: s_mul_hi_u32 s0, s0, s14
+; GFX9-NEXT: s_add_i32 s0, s1, s0
+; GFX9-NEXT: s_add_i32 s7, s0, s24
+; GFX9-NEXT: s_mov_b32 s0, s16
; GFX9-NEXT: s_mov_b32 s1, s17
; GFX9-NEXT: s_mov_b32 s2, s18
; GFX9-NEXT: s_mov_b32 s3, s19
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-anyext.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-anyext.mir
index 28d5b16cc91b..5c2bc3e93112 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-anyext.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-anyext.mir
@@ -22,10 +22,12 @@ legalized: true
body: |
bb.0:
- liveins: $vgpr0_vgpr1
+ liveins: $vgpr0
; CHECK-LABEL: name: anyext_s32_to_s64_v
; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
- ; CHECK: [[ANYEXT:%[0-9]+]]:vgpr(s64) = G_ANYEXT [[COPY]](s32)
+ ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
+ ; CHECK: [[DEF:%[0-9]+]]:vgpr(s32) = G_IMPLICIT_DEF
+ ; CHECK: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY1]](s32), [[DEF]](s32)
%0:_(s32) = COPY $vgpr0
%1:_(s64) = G_ANYEXT %0
...
@@ -98,7 +100,10 @@ body: |
; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
; CHECK: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[COPY1]]
- ; CHECK: [[ANYEXT:%[0-9]+]]:vgpr(s16) = G_ANYEXT [[ICMP]](s1)
+ ; CHECK: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 1
+ ; CHECK: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0
+ ; CHECK: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[C]], [[C1]]
+ ; CHECK: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[SELECT]](s32)
%0:_(s32) = COPY $vgpr0
%1:_(s32) = COPY $vgpr1
%2:_(s1) = G_ICMP intpred(eq), %0, %1
@@ -116,7 +121,9 @@ body: |
; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
; CHECK: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[COPY1]]
- ; CHECK: [[ANYEXT:%[0-9]+]]:vgpr(s32) = G_ANYEXT [[ICMP]](s1)
+ ; CHECK: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 1
+ ; CHECK: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0
+ ; CHECK: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[C]], [[C1]]
%0:_(s32) = COPY $vgpr0
%1:_(s32) = COPY $vgpr1
%2:_(s1) = G_ICMP intpred(eq), %0, %1
@@ -134,7 +141,11 @@ body: |
; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
; CHECK: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[COPY1]]
- ; CHECK: [[ANYEXT:%[0-9]+]]:vgpr(s64) = G_ANYEXT [[ICMP]](s1)
+ ; CHECK: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 1
+ ; CHECK: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0
+ ; CHECK: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[C]], [[C1]]
+ ; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[SELECT]](s32)
+ ; CHECK: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[SELECT]](s32), [[COPY2]](s32)
%0:_(s32) = COPY $vgpr0
%1:_(s32) = COPY $vgpr1
%2:_(s1) = G_ICMP intpred(eq), %0, %1
@@ -231,7 +242,9 @@ body: |
; CHECK-LABEL: name: anyext_s1_to_s64_vgpr
; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; CHECK: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY]](s32)
- ; CHECK: [[ANYEXT:%[0-9]+]]:vgpr(s64) = G_ANYEXT [[TRUNC]](s1)
+ ; CHECK: [[ANYEXT:%[0-9]+]]:vgpr(s32) = G_ANYEXT [[TRUNC]](s1)
+ ; CHECK: [[DEF:%[0-9]+]]:vgpr(s32) = G_IMPLICIT_DEF
+ ; CHECK: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[ANYEXT]](s32), [[DEF]](s32)
%0:_(s32) = COPY $vgpr0
%1:_(s1) = G_TRUNC %0
%2:_(s64) = G_ANYEXT %1
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sext.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sext.mir
index 0f37d7710280..fec347169d0c 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sext.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sext.mir
@@ -61,10 +61,7 @@ body: |
; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
; CHECK: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY]](s32), [[COPY1]]
; CHECK: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32)
- ; CHECK: [[ANYEXT:%[0-9]+]]:sgpr(s16) = G_ANYEXT [[TRUNC]](s1)
- ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 15
- ; CHECK: [[SHL:%[0-9]+]]:sgpr(s16) = G_SHL [[ANYEXT]], [[C]](s32)
- ; CHECK: [[ASHR:%[0-9]+]]:sgpr(s16) = G_ASHR [[SHL]], [[C]](s32)
+ ; CHECK: [[SEXT:%[0-9]+]]:sgpr(s16) = G_SEXT [[TRUNC]](s1)
%0:_(s32) = COPY $sgpr0
%1:_(s32) = COPY $sgpr1
%2:_(s1) = G_ICMP intpred(eq), %0, %1
@@ -83,10 +80,7 @@ body: |
; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
; CHECK: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY]](s32), [[COPY1]]
; CHECK: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32)
- ; CHECK: [[ANYEXT:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[TRUNC]](s1)
- ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 31
- ; CHECK: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[ANYEXT]], [[C]](s32)
- ; CHECK: [[ASHR:%[0-9]+]]:sgpr(s32) = G_ASHR [[SHL]], [[C]](s32)
+ ; CHECK: [[SEXT:%[0-9]+]]:sgpr(s32) = G_SEXT [[TRUNC]](s1)
%0:_(s32) = COPY $sgpr0
%1:_(s32) = COPY $sgpr1
%2:_(s1) = G_ICMP intpred(eq), %0, %1
@@ -105,10 +99,7 @@ body: |
; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
; CHECK: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY]](s32), [[COPY1]]
; CHECK: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32)
- ; CHECK: [[ANYEXT:%[0-9]+]]:sgpr(s64) = G_ANYEXT [[TRUNC]](s1)
- ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 63
- ; CHECK: [[SHL:%[0-9]+]]:sgpr(s64) = G_SHL [[ANYEXT]], [[C]](s32)
- ; CHECK: [[ASHR:%[0-9]+]]:sgpr(s64) = G_ASHR [[SHL]], [[C]](s32)
+ ; CHECK: [[SEXT:%[0-9]+]]:sgpr(s64) = G_SEXT [[TRUNC]](s1)
%0:_(s32) = COPY $sgpr0
%1:_(s32) = COPY $sgpr1
%2:_(s1) = G_ICMP intpred(eq), %0, %1
@@ -188,10 +179,7 @@ body: |
; CHECK-LABEL: name: sext_s1_to_s16_sgpr
; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
; CHECK: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY]](s32)
- ; CHECK: [[ANYEXT:%[0-9]+]]:sgpr(s16) = G_ANYEXT [[TRUNC]](s1)
- ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 15
- ; CHECK: [[SHL:%[0-9]+]]:sgpr(s16) = G_SHL [[ANYEXT]], [[C]](s32)
- ; CHECK: [[ASHR:%[0-9]+]]:sgpr(s16) = G_ASHR [[SHL]], [[C]](s32)
+ ; CHECK: [[SEXT:%[0-9]+]]:sgpr(s16) = G_SEXT [[TRUNC]](s1)
%0:_(s32) = COPY $sgpr0
%1:_(s1) = G_TRUNC %0
%2:_(s16) = G_SEXT %1
@@ -207,10 +195,7 @@ body: |
; CHECK-LABEL: name: sext_s1_to_s32_sgpr
; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
; CHECK: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY]](s32)
- ; CHECK: [[ANYEXT:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[TRUNC]](s1)
- ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 31
- ; CHECK: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[ANYEXT]], [[C]](s32)
- ; CHECK: [[ASHR:%[0-9]+]]:sgpr(s32) = G_ASHR [[SHL]], [[C]](s32)
+ ; CHECK: [[SEXT:%[0-9]+]]:sgpr(s32) = G_SEXT [[TRUNC]](s1)
%0:_(s32) = COPY $sgpr0
%1:_(s1) = G_TRUNC %0
%2:_(s32) = G_SEXT %1
@@ -226,10 +211,7 @@ body: |
; CHECK-LABEL: name: sext_s1_to_s64_sgpr
; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
; CHECK: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY]](s32)
- ; CHECK: [[ANYEXT:%[0-9]+]]:sgpr(s64) = G_ANYEXT [[TRUNC]](s1)
- ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 63
- ; CHECK: [[SHL:%[0-9]+]]:sgpr(s64) = G_SHL [[ANYEXT]], [[C]](s32)
- ; CHECK: [[ASHR:%[0-9]+]]:sgpr(s64) = G_ASHR [[SHL]], [[C]](s32)
+ ; CHECK: [[SEXT:%[0-9]+]]:sgpr(s64) = G_SEXT [[TRUNC]](s1)
%0:_(s32) = COPY $sgpr0
%1:_(s1) = G_TRUNC %0
%2:_(s64) = G_SEXT %1
@@ -245,10 +227,7 @@ body: |
; CHECK-LABEL: name: sext_s1_to_s16_vgpr
; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; CHECK: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY]](s32)
- ; CHECK: [[ANYEXT:%[0-9]+]]:vgpr(s16) = G_ANYEXT [[TRUNC]](s1)
- ; CHECK: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 15
- ; CHECK: [[SHL:%[0-9]+]]:vgpr(s16) = G_SHL [[ANYEXT]], [[C]](s32)
- ; CHECK: [[ASHR:%[0-9]+]]:vgpr(s16) = G_ASHR [[SHL]], [[C]](s32)
+ ; CHECK: [[SEXT:%[0-9]+]]:vgpr(s16) = G_SEXT [[TRUNC]](s1)
%0:_(s32) = COPY $vgpr0
%1:_(s1) = G_TRUNC %0
%2:_(s16) = G_SEXT %1
@@ -264,10 +243,7 @@ body: |
; CHECK-LABEL: name: sext_s1_to_s32_vgpr
; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; CHECK: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY]](s32)
- ; CHECK: [[ANYEXT:%[0-9]+]]:vgpr(s32) = G_ANYEXT [[TRUNC]](s1)
- ; CHECK: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 31
- ; CHECK: [[SHL:%[0-9]+]]:vgpr(s32) = G_SHL [[ANYEXT]], [[C]](s32)
- ; CHECK: [[ASHR:%[0-9]+]]:vgpr(s32) = G_ASHR [[SHL]], [[C]](s32)
+ ; CHECK: [[SEXT:%[0-9]+]]:vgpr(s32) = G_SEXT [[TRUNC]](s1)
%0:_(s32) = COPY $vgpr0
%1:_(s1) = G_TRUNC %0
%2:_(s32) = G_SEXT %1
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-zext.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-zext.mir
index ee22c54205d7..ef83a4c6c529 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-zext.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-zext.mir
@@ -60,10 +60,7 @@ body: |
; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
; CHECK: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY]](s32), [[COPY1]]
; CHECK: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32)
- ; CHECK: [[ANYEXT:%[0-9]+]]:sgpr(s16) = G_ANYEXT [[TRUNC]](s1)
- ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 15
- ; CHECK: [[SHL:%[0-9]+]]:sgpr(s16) = G_SHL [[ANYEXT]], [[C]](s32)
- ; CHECK: [[LSHR:%[0-9]+]]:sgpr(s16) = G_LSHR [[SHL]], [[C]](s32)
+ ; CHECK: [[ZEXT:%[0-9]+]]:sgpr(s16) = G_ZEXT [[TRUNC]](s1)
%0:_(s32) = COPY $sgpr0
%1:_(s32) = COPY $sgpr1
%2:_(s1) = G_ICMP intpred(eq), %0, %1
@@ -82,10 +79,7 @@ body: |
; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
; CHECK: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY]](s32), [[COPY1]]
; CHECK: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32)
- ; CHECK: [[ANYEXT:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[TRUNC]](s1)
- ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 31
- ; CHECK: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[ANYEXT]], [[C]](s32)
- ; CHECK: [[LSHR:%[0-9]+]]:sgpr(s32) = G_LSHR [[SHL]], [[C]](s32)
+ ; CHECK: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC]](s1)
%0:_(s32) = COPY $sgpr0
%1:_(s32) = COPY $sgpr1
%2:_(s1) = G_ICMP intpred(eq), %0, %1
@@ -104,10 +98,7 @@ body: |
; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
; CHECK: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY]](s32), [[COPY1]]
; CHECK: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32)
- ; CHECK: [[ANYEXT:%[0-9]+]]:sgpr(s64) = G_ANYEXT [[TRUNC]](s1)
- ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 63
- ; CHECK: [[SHL:%[0-9]+]]:sgpr(s64) = G_SHL [[ANYEXT]], [[C]](s32)
- ; CHECK: [[LSHR:%[0-9]+]]:sgpr(s64) = G_LSHR [[SHL]], [[C]](s32)
+ ; CHECK: [[ZEXT:%[0-9]+]]:sgpr(s64) = G_ZEXT [[TRUNC]](s1)
%0:_(s32) = COPY $sgpr0
%1:_(s32) = COPY $sgpr1
%2:_(s1) = G_ICMP intpred(eq), %0, %1
@@ -187,10 +178,7 @@ body: |
; CHECK-LABEL: name: zext_s1_to_s16_sgpr
; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
; CHECK: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY]](s32)
- ; CHECK: [[ANYEXT:%[0-9]+]]:sgpr(s16) = G_ANYEXT [[TRUNC]](s1)
- ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 15
- ; CHECK: [[SHL:%[0-9]+]]:sgpr(s16) = G_SHL [[ANYEXT]], [[C]](s32)
- ; CHECK: [[LSHR:%[0-9]+]]:sgpr(s16) = G_LSHR [[SHL]], [[C]](s32)
+ ; CHECK: [[ZEXT:%[0-9]+]]:sgpr(s16) = G_ZEXT [[TRUNC]](s1)
%0:_(s32) = COPY $sgpr0
%1:_(s1) = G_TRUNC %0
%2:_(s16) = G_ZEXT %1
@@ -206,10 +194,7 @@ body: |
; CHECK-LABEL: name: zext_s1_to_s32_sgpr
; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
; CHECK: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY]](s32)
- ; CHECK: [[ANYEXT:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[TRUNC]](s1)
- ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 31
- ; CHECK: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[ANYEXT]], [[C]](s32)
- ; CHECK: [[LSHR:%[0-9]+]]:sgpr(s32) = G_LSHR [[SHL]], [[C]](s32)
+ ; CHECK: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC]](s1)
%0:_(s32) = COPY $sgpr0
%1:_(s1) = G_TRUNC %0
%2:_(s32) = G_ZEXT %1
@@ -225,10 +210,7 @@ body: |
; CHECK-LABEL: name: zext_s1_to_s64_sgpr
; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
; CHECK: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY]](s32)
- ; CHECK: [[ANYEXT:%[0-9]+]]:sgpr(s64) = G_ANYEXT [[TRUNC]](s1)
- ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 63
- ; CHECK: [[SHL:%[0-9]+]]:sgpr(s64) = G_SHL [[ANYEXT]], [[C]](s32)
- ; CHECK: [[LSHR:%[0-9]+]]:sgpr(s64) = G_LSHR [[SHL]], [[C]](s32)
+ ; CHECK: [[ZEXT:%[0-9]+]]:sgpr(s64) = G_ZEXT [[TRUNC]](s1)
%0:_(s32) = COPY $sgpr0
%1:_(s1) = G_TRUNC %0
%2:_(s64) = G_ZEXT %1
@@ -244,10 +226,7 @@ body: |
; CHECK-LABEL: name: zext_s1_to_s16_vgpr
; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; CHECK: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY]](s32)
- ; CHECK: [[ANYEXT:%[0-9]+]]:vgpr(s16) = G_ANYEXT [[TRUNC]](s1)
- ; CHECK: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 15
- ; CHECK: [[SHL:%[0-9]+]]:vgpr(s16) = G_SHL [[ANYEXT]], [[C]](s32)
- ; CHECK: [[LSHR:%[0-9]+]]:vgpr(s16) = G_LSHR [[SHL]], [[C]](s32)
+ ; CHECK: [[ZEXT:%[0-9]+]]:vgpr(s16) = G_ZEXT [[TRUNC]](s1)
%0:_(s32) = COPY $vgpr0
%1:_(s1) = G_TRUNC %0
%2:_(s16) = G_ZEXT %1
@@ -263,10 +242,7 @@ body: |
; CHECK-LABEL: name: zext_s1_to_s32_vgpr
; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; CHECK: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY]](s32)
- ; CHECK: [[ANYEXT:%[0-9]+]]:vgpr(s32) = G_ANYEXT [[TRUNC]](s1)
- ; CHECK: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 31
- ; CHECK: [[SHL:%[0-9]+]]:vgpr(s32) = G_SHL [[ANYEXT]], [[C]](s32)
- ; CHECK: [[LSHR:%[0-9]+]]:vgpr(s32) = G_LSHR [[SHL]], [[C]](s32)
+ ; CHECK: [[ZEXT:%[0-9]+]]:vgpr(s32) = G_ZEXT [[TRUNC]](s1)
%0:_(s32) = COPY $vgpr0
%1:_(s1) = G_TRUNC %0
%2:_(s32) = G_ZEXT %1
More information about the llvm-commits
mailing list