[llvm] dc4ca0d - [GlobalISel] Constant fold G_SITOFP and G_UITOFP in CSEMIRBuilder

Jay Foad via llvm-commits llvm-commits at lists.llvm.org
Tue Jul 27 03:28:07 PDT 2021


Author: Jay Foad
Date: 2021-07-27T11:27:58+01:00
New Revision: dc4ca0dbbcef5eb957efbd2e08a197b4aae3d909

URL: https://github.com/llvm/llvm-project/commit/dc4ca0dbbcef5eb957efbd2e08a197b4aae3d909
DIFF: https://github.com/llvm/llvm-project/commit/dc4ca0dbbcef5eb957efbd2e08a197b4aae3d909.diff

LOG: [GlobalISel] Constant fold G_SITOFP and G_UITOFP in CSEMIRBuilder

Differential Revision: https://reviews.llvm.org/D104528

Added: 
    

Modified: 
    llvm/include/llvm/CodeGen/GlobalISel/Utils.h
    llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp
    llvm/lib/CodeGen/GlobalISel/Utils.cpp
    llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.sbfe.ll
    llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.powi.ll
    llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i32.ll
    llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i32.ll
    llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i32.ll
    llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i32.ll

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/CodeGen/GlobalISel/Utils.h b/llvm/include/llvm/CodeGen/GlobalISel/Utils.h
index e151c65dcee6d..818475a48abb1 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/Utils.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/Utils.h
@@ -257,6 +257,10 @@ Optional<APFloat> ConstantFoldFPBinOp(unsigned Opcode, const Register Op1,
 Optional<APInt> ConstantFoldExtOp(unsigned Opcode, const Register Op1,
                                   uint64_t Imm, const MachineRegisterInfo &MRI);
 
+Optional<APFloat> ConstantFoldIntToFloat(unsigned Opcode, LLT DstTy,
+                                         Register Src,
+                                         const MachineRegisterInfo &MRI);
+
 /// Test if the given value is known to have exactly one bit set. This 
diff ers
 /// from computeKnownBits in that it doesn't necessarily determine which bit is
 /// set.

diff  --git a/llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp b/llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp
index 82942b9476dd1..dd560e8ff145a 100644
--- a/llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp
@@ -203,6 +203,16 @@ MachineInstrBuilder CSEMIRBuilder::buildInstr(unsigned Opc,
       return buildConstant(Dst, *MaybeCst);
     break;
   }
+  case TargetOpcode::G_SITOFP:
+  case TargetOpcode::G_UITOFP: {
+    // Try to constant fold these.
+    assert(SrcOps.size() == 1 && "Invalid sources");
+    assert(DstOps.size() == 1 && "Invalid dsts");
+    if (Optional<APFloat> Cst = ConstantFoldIntToFloat(
+            Opc, DstOps[0].getLLTTy(*getMRI()), SrcOps[0].getReg(), *getMRI()))
+      return buildFConstant(DstOps[0], *Cst);
+    break;
+  }
   }
   bool CanCopy = checkCopyToDefsPossible(DstOps);
   if (!canPerformCSEForOpc(Opc))

diff  --git a/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/llvm/lib/CodeGen/GlobalISel/Utils.cpp
index 31b8347a0aebb..f64e41b9dccce 100644
--- a/llvm/lib/CodeGen/GlobalISel/Utils.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/Utils.cpp
@@ -673,6 +673,19 @@ Optional<APInt> llvm::ConstantFoldExtOp(unsigned Opcode, const Register Op1,
   return None;
 }
 
+Optional<APFloat> llvm::ConstantFoldIntToFloat(unsigned Opcode, LLT DstTy,
+                                               Register Src,
+                                               const MachineRegisterInfo &MRI) {
+  assert(Opcode == TargetOpcode::G_SITOFP || Opcode == TargetOpcode::G_UITOFP);
+  if (auto MaybeSrcVal = getConstantVRegVal(Src, MRI)) {
+    APFloat DstVal(getFltSemanticForLLT(DstTy));
+    DstVal.convertFromAPInt(*MaybeSrcVal, Opcode == TargetOpcode::G_SITOFP,
+                            APFloat::rmNearestTiesToEven);
+    return DstVal;
+  }
+  return None;
+}
+
 bool llvm::isKnownToBeAPowerOfTwo(Register Reg, const MachineRegisterInfo &MRI,
                                   GISelKnownBits *KB) {
   Optional<DefinitionAndSourceRegister> DefSrcReg =

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.sbfe.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.sbfe.ll
index f3a53fb7d22d5..bd5ec1685ab28 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.sbfe.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.sbfe.ll
@@ -689,8 +689,7 @@ define amdgpu_kernel void @bfe_sext_in_reg_i24(i32 addrspace(1)* %out, i32 addrs
 define amdgpu_kernel void @simplify_demanded_bfe_sdiv(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
 ; GFX6-LABEL: simplify_demanded_bfe_sdiv:
 ; GFX6:       ; %bb.0:
-; GFX6-NEXT:    v_cvt_f32_ubyte0_e32 v0, 2
-; GFX6-NEXT:    v_rcp_iflag_f32_e32 v0, v0
+; GFX6-NEXT:    v_rcp_iflag_f32_e32 v0, 2.0
 ; GFX6-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0xb
 ; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
 ; GFX6-NEXT:    s_mov_b32 s6, -1

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.powi.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.powi.ll
index 5923bbdc65073..8ebcec4ba95a8 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.powi.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.powi.ll
@@ -68,9 +68,8 @@ define float @v_powi_neg1_f32(float %l) {
 ; GCN-LABEL: v_powi_neg1_f32:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT:    v_cvt_f32_i32_e32 v1, -1
 ; GCN-NEXT:    v_log_f32_e32 v0, v0
-; GCN-NEXT:    v_mul_legacy_f32_e32 v0, v0, v1
+; GCN-NEXT:    v_mul_legacy_f32_e32 v0, -1.0, v0
 ; GCN-NEXT:    v_exp_f32_e32 v0, v0
 ; GCN-NEXT:    s_setpc_b64 s[30:31]
   %res = call float @llvm.powi.f32.i32(float %l, i32 -1)
@@ -82,8 +81,7 @@ define float @v_powi_2_f32(float %l) {
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GCN-NEXT:    v_log_f32_e32 v0, v0
-; GCN-NEXT:    v_cvt_f32_ubyte0_e32 v1, 2
-; GCN-NEXT:    v_mul_legacy_f32_e32 v0, v0, v1
+; GCN-NEXT:    v_mul_legacy_f32_e32 v0, 2.0, v0
 ; GCN-NEXT:    v_exp_f32_e32 v0, v0
 ; GCN-NEXT:    s_setpc_b64 s[30:31]
   %res = call float @llvm.powi.f32.i32(float %l, i32 2)
@@ -94,9 +92,8 @@ define float @v_powi_neg2_f32(float %l) {
 ; GCN-LABEL: v_powi_neg2_f32:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT:    v_cvt_f32_i32_e32 v1, -2
 ; GCN-NEXT:    v_log_f32_e32 v0, v0
-; GCN-NEXT:    v_mul_legacy_f32_e32 v0, v0, v1
+; GCN-NEXT:    v_mul_legacy_f32_e32 v0, -2.0, v0
 ; GCN-NEXT:    v_exp_f32_e32 v0, v0
 ; GCN-NEXT:    s_setpc_b64 s[30:31]
   %res = call float @llvm.powi.f32.i32(float %l, i32 -2)
@@ -108,8 +105,7 @@ define float @v_powi_4_f32(float %l) {
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GCN-NEXT:    v_log_f32_e32 v0, v0
-; GCN-NEXT:    v_cvt_f32_ubyte0_e32 v1, 4
-; GCN-NEXT:    v_mul_legacy_f32_e32 v0, v0, v1
+; GCN-NEXT:    v_mul_legacy_f32_e32 v0, 4.0, v0
 ; GCN-NEXT:    v_exp_f32_e32 v0, v0
 ; GCN-NEXT:    s_setpc_b64 s[30:31]
   %res = call float @llvm.powi.f32.i32(float %l, i32 4)
@@ -121,8 +117,7 @@ define float @v_powi_8_f32(float %l) {
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GCN-NEXT:    v_log_f32_e32 v0, v0
-; GCN-NEXT:    v_cvt_f32_ubyte0_e32 v1, 8
-; GCN-NEXT:    v_mul_legacy_f32_e32 v0, v0, v1
+; GCN-NEXT:    v_mul_legacy_f32_e32 v0, 0x41000000, v0
 ; GCN-NEXT:    v_exp_f32_e32 v0, v0
 ; GCN-NEXT:    s_setpc_b64 s[30:31]
   %res = call float @llvm.powi.f32.i32(float %l, i32 8)
@@ -134,8 +129,7 @@ define float @v_powi_16_f32(float %l) {
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GCN-NEXT:    v_log_f32_e32 v0, v0
-; GCN-NEXT:    v_cvt_f32_ubyte0_e32 v1, 16
-; GCN-NEXT:    v_mul_legacy_f32_e32 v0, v0, v1
+; GCN-NEXT:    v_mul_legacy_f32_e32 v0, 0x41800000, v0
 ; GCN-NEXT:    v_exp_f32_e32 v0, v0
 ; GCN-NEXT:    s_setpc_b64 s[30:31]
   %res = call float @llvm.powi.f32.i32(float %l, i32 16)
@@ -147,8 +141,7 @@ define float @v_powi_128_f32(float %l) {
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GCN-NEXT:    v_log_f32_e32 v0, v0
-; GCN-NEXT:    v_cvt_f32_ubyte0_e32 v1, 0x80
-; GCN-NEXT:    v_mul_legacy_f32_e32 v0, v0, v1
+; GCN-NEXT:    v_mul_legacy_f32_e32 v0, 0x43000000, v0
 ; GCN-NEXT:    v_exp_f32_e32 v0, v0
 ; GCN-NEXT:    s_setpc_b64 s[30:31]
   %res = call float @llvm.powi.f32.i32(float %l, i32 128)
@@ -159,9 +152,8 @@ define float @v_powi_neg128_f32(float %l) {
 ; GCN-LABEL: v_powi_neg128_f32:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT:    v_cvt_f32_i32_e32 v1, 0xffffff80
 ; GCN-NEXT:    v_log_f32_e32 v0, v0
-; GCN-NEXT:    v_mul_legacy_f32_e32 v0, v0, v1
+; GCN-NEXT:    v_mul_legacy_f32_e32 v0, 0xc3000000, v0
 ; GCN-NEXT:    v_exp_f32_e32 v0, v0
 ; GCN-NEXT:    s_setpc_b64 s[30:31]
   %res = call float @llvm.powi.f32.i32(float %l, i32 -128)

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i32.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i32.ll
index 8c534e8a06aff..af008f0cabdef 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i32.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i32.ll
@@ -296,16 +296,15 @@ define i32 @v_sdiv_i32_pow2k_denom(i32 %num) {
 ; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; CHECK-NEXT:    s_movk_i32 s6, 0x1000
 ; CHECK-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
-; CHECK-NEXT:    v_mov_b32_e32 v2, 0xfffff000
+; CHECK-NEXT:    v_rcp_iflag_f32_e32 v2, 0x45800000
+; CHECK-NEXT:    v_mov_b32_e32 v3, 0xfffff000
 ; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v1
-; CHECK-NEXT:    v_cvt_f32_u32_e32 v3, s6
+; CHECK-NEXT:    v_mul_f32_e32 v2, 0x4f7ffffe, v2
 ; CHECK-NEXT:    v_xor_b32_e32 v0, v0, v1
-; CHECK-NEXT:    v_rcp_iflag_f32_e32 v3, v3
-; CHECK-NEXT:    v_mul_f32_e32 v3, 0x4f7ffffe, v3
-; CHECK-NEXT:    v_cvt_u32_f32_e32 v3, v3
-; CHECK-NEXT:    v_mul_lo_u32 v2, v2, v3
-; CHECK-NEXT:    v_mul_hi_u32 v2, v3, v2
-; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v3, v2
+; CHECK-NEXT:    v_cvt_u32_f32_e32 v2, v2
+; CHECK-NEXT:    v_mul_lo_u32 v3, v3, v2
+; CHECK-NEXT:    v_mul_hi_u32 v3, v2, v3
+; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v3
 ; CHECK-NEXT:    v_mul_hi_u32 v2, v0, v2
 ; CHECK-NEXT:    v_lshlrev_b32_e32 v3, 12, v2
 ; CHECK-NEXT:    v_add_i32_e32 v4, vcc, 1, v2
@@ -379,56 +378,54 @@ define <2 x i32> @v_sdiv_v2i32_pow2k_denom(<2 x i32> %num) {
 ; CGP-LABEL: v_sdiv_v2i32_pow2k_denom:
 ; CGP:       ; %bb.0:
 ; CGP-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CGP-NEXT:    s_movk_i32 s4, 0x1000
+; CGP-NEXT:    s_movk_i32 s8, 0x1000
 ; CGP-NEXT:    v_ashrrev_i32_e32 v2, 31, v0
-; CGP-NEXT:    v_mov_b32_e32 v3, 0x1000
-; CGP-NEXT:    s_movk_i32 s5, 0xf000
+; CGP-NEXT:    v_rcp_iflag_f32_e32 v3, 0x45800000
+; CGP-NEXT:    s_movk_i32 s4, 0xf000
 ; CGP-NEXT:    v_mov_b32_e32 v4, 0xfffff000
-; CGP-NEXT:    v_ashrrev_i32_e32 v5, 31, v1
+; CGP-NEXT:    v_mov_b32_e32 v5, 0x1000
+; CGP-NEXT:    v_ashrrev_i32_e32 v6, 31, v1
+; CGP-NEXT:    v_rcp_iflag_f32_e32 v7, 0x45800000
 ; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
-; CGP-NEXT:    v_cvt_f32_u32_e32 v6, s4
-; CGP-NEXT:    v_add_i32_e32 v1, vcc, v1, v5
-; CGP-NEXT:    v_cvt_f32_u32_e32 v7, v3
-; CGP-NEXT:    v_xor_b32_e32 v0, v0, v2
-; CGP-NEXT:    v_rcp_iflag_f32_e32 v6, v6
-; CGP-NEXT:    v_xor_b32_e32 v1, v1, v5
-; CGP-NEXT:    v_rcp_iflag_f32_e32 v7, v7
-; CGP-NEXT:    v_mul_f32_e32 v6, 0x4f7ffffe, v6
+; CGP-NEXT:    v_mul_f32_e32 v3, 0x4f7ffffe, v3
+; CGP-NEXT:    v_add_i32_e32 v1, vcc, v1, v6
 ; CGP-NEXT:    v_mul_f32_e32 v7, 0x4f7ffffe, v7
-; CGP-NEXT:    v_cvt_u32_f32_e32 v6, v6
+; CGP-NEXT:    v_xor_b32_e32 v0, v0, v2
+; CGP-NEXT:    v_cvt_u32_f32_e32 v3, v3
+; CGP-NEXT:    v_xor_b32_e32 v1, v1, v6
 ; CGP-NEXT:    v_cvt_u32_f32_e32 v7, v7
-; CGP-NEXT:    v_mul_lo_u32 v8, s5, v6
+; CGP-NEXT:    v_mul_lo_u32 v8, s4, v3
 ; CGP-NEXT:    v_mul_lo_u32 v4, v4, v7
-; CGP-NEXT:    v_mul_hi_u32 v8, v6, v8
+; CGP-NEXT:    v_mul_hi_u32 v8, v3, v8
 ; CGP-NEXT:    v_mul_hi_u32 v4, v7, v4
-; CGP-NEXT:    v_add_i32_e32 v6, vcc, v6, v8
+; CGP-NEXT:    v_add_i32_e32 v3, vcc, v3, v8
 ; CGP-NEXT:    v_add_i32_e32 v4, vcc, v7, v4
-; CGP-NEXT:    v_mul_hi_u32 v6, v0, v6
+; CGP-NEXT:    v_mul_hi_u32 v3, v0, v3
 ; CGP-NEXT:    v_mul_hi_u32 v4, v1, v4
-; CGP-NEXT:    v_lshlrev_b32_e32 v7, 12, v6
-; CGP-NEXT:    v_add_i32_e32 v8, vcc, 1, v6
+; CGP-NEXT:    v_lshlrev_b32_e32 v7, 12, v3
+; CGP-NEXT:    v_add_i32_e32 v8, vcc, 1, v3
 ; CGP-NEXT:    v_lshlrev_b32_e32 v9, 12, v4
 ; CGP-NEXT:    v_add_i32_e32 v10, vcc, 1, v4
 ; CGP-NEXT:    v_sub_i32_e32 v0, vcc, v0, v7
 ; CGP-NEXT:    v_sub_i32_e32 v1, vcc, v1, v9
-; CGP-NEXT:    v_cmp_le_u32_e32 vcc, s4, v0
-; CGP-NEXT:    v_cndmask_b32_e32 v6, v6, v8, vcc
-; CGP-NEXT:    v_subrev_i32_e64 v7, s[4:5], s4, v0
-; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v1, v3
+; CGP-NEXT:    v_cmp_le_u32_e32 vcc, s8, v0
+; CGP-NEXT:    v_cndmask_b32_e32 v3, v3, v8, vcc
+; CGP-NEXT:    v_subrev_i32_e64 v7, s[4:5], s8, v0
+; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v1, v5
 ; CGP-NEXT:    v_cndmask_b32_e64 v4, v4, v10, s[4:5]
-; CGP-NEXT:    v_sub_i32_e64 v8, s[6:7], v1, v3
+; CGP-NEXT:    v_sub_i32_e64 v8, s[6:7], v1, v5
 ; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v7, vcc
-; CGP-NEXT:    v_add_i32_e32 v7, vcc, 1, v6
+; CGP-NEXT:    v_add_i32_e32 v7, vcc, 1, v3
 ; CGP-NEXT:    v_cndmask_b32_e64 v1, v1, v8, s[4:5]
 ; CGP-NEXT:    v_add_i32_e32 v8, vcc, 1, v4
-; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v3
-; CGP-NEXT:    v_cndmask_b32_e32 v0, v6, v7, vcc
-; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v3
+; CGP-NEXT:    v_cmp_le_u32_e32 vcc, s8, v0
+; CGP-NEXT:    v_cndmask_b32_e32 v0, v3, v7, vcc
+; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v5
 ; CGP-NEXT:    v_cndmask_b32_e32 v1, v4, v8, vcc
 ; CGP-NEXT:    v_xor_b32_e32 v0, v0, v2
-; CGP-NEXT:    v_xor_b32_e32 v1, v1, v5
+; CGP-NEXT:    v_xor_b32_e32 v1, v1, v6
 ; CGP-NEXT:    v_sub_i32_e32 v0, vcc, v0, v2
-; CGP-NEXT:    v_sub_i32_e32 v1, vcc, v1, v5
+; CGP-NEXT:    v_sub_i32_e32 v1, vcc, v1, v6
 ; CGP-NEXT:    s_setpc_b64 s[30:31]
   %result = sdiv <2 x i32> %num, <i32 4096, i32 4096>
   ret <2 x i32> %result
@@ -440,16 +437,15 @@ define i32 @v_sdiv_i32_oddk_denom(i32 %num) {
 ; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; CHECK-NEXT:    s_mov_b32 s6, 0x12d8fb
 ; CHECK-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
-; CHECK-NEXT:    v_mov_b32_e32 v2, 0xffed2705
+; CHECK-NEXT:    v_rcp_iflag_f32_e32 v2, 0x4996c7d8
+; CHECK-NEXT:    v_mov_b32_e32 v3, 0xffed2705
 ; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v1
-; CHECK-NEXT:    v_cvt_f32_u32_e32 v3, s6
+; CHECK-NEXT:    v_mul_f32_e32 v2, 0x4f7ffffe, v2
 ; CHECK-NEXT:    v_xor_b32_e32 v0, v0, v1
-; CHECK-NEXT:    v_rcp_iflag_f32_e32 v3, v3
-; CHECK-NEXT:    v_mul_f32_e32 v3, 0x4f7ffffe, v3
-; CHECK-NEXT:    v_cvt_u32_f32_e32 v3, v3
-; CHECK-NEXT:    v_mul_lo_u32 v2, v2, v3
-; CHECK-NEXT:    v_mul_hi_u32 v2, v3, v2
-; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v3, v2
+; CHECK-NEXT:    v_cvt_u32_f32_e32 v2, v2
+; CHECK-NEXT:    v_mul_lo_u32 v3, v3, v2
+; CHECK-NEXT:    v_mul_hi_u32 v3, v2, v3
+; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v3
 ; CHECK-NEXT:    v_mul_hi_u32 v2, v0, v2
 ; CHECK-NEXT:    v_mul_lo_u32 v3, v2, s6
 ; CHECK-NEXT:    v_add_i32_e32 v4, vcc, 1, v2
@@ -523,56 +519,54 @@ define <2 x i32> @v_sdiv_v2i32_oddk_denom(<2 x i32> %num) {
 ; CGP-LABEL: v_sdiv_v2i32_oddk_denom:
 ; CGP:       ; %bb.0:
 ; CGP-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CGP-NEXT:    s_mov_b32 s4, 0x12d8fb
+; CGP-NEXT:    s_mov_b32 s8, 0x12d8fb
 ; CGP-NEXT:    v_ashrrev_i32_e32 v2, 31, v0
-; CGP-NEXT:    v_mov_b32_e32 v3, 0x12d8fb
-; CGP-NEXT:    s_mov_b32 s5, 0xffed2705
+; CGP-NEXT:    v_rcp_iflag_f32_e32 v3, 0x4996c7d8
+; CGP-NEXT:    s_mov_b32 s4, 0xffed2705
 ; CGP-NEXT:    v_mov_b32_e32 v4, 0xffed2705
-; CGP-NEXT:    v_ashrrev_i32_e32 v5, 31, v1
+; CGP-NEXT:    v_mov_b32_e32 v5, 0x12d8fb
+; CGP-NEXT:    v_ashrrev_i32_e32 v6, 31, v1
+; CGP-NEXT:    v_rcp_iflag_f32_e32 v7, 0x4996c7d8
 ; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
-; CGP-NEXT:    v_cvt_f32_u32_e32 v6, s4
-; CGP-NEXT:    v_add_i32_e32 v1, vcc, v1, v5
-; CGP-NEXT:    v_cvt_f32_u32_e32 v7, v3
-; CGP-NEXT:    v_xor_b32_e32 v0, v0, v2
-; CGP-NEXT:    v_rcp_iflag_f32_e32 v6, v6
-; CGP-NEXT:    v_xor_b32_e32 v1, v1, v5
-; CGP-NEXT:    v_rcp_iflag_f32_e32 v7, v7
-; CGP-NEXT:    v_mul_f32_e32 v6, 0x4f7ffffe, v6
+; CGP-NEXT:    v_mul_f32_e32 v3, 0x4f7ffffe, v3
+; CGP-NEXT:    v_add_i32_e32 v1, vcc, v1, v6
 ; CGP-NEXT:    v_mul_f32_e32 v7, 0x4f7ffffe, v7
-; CGP-NEXT:    v_cvt_u32_f32_e32 v6, v6
+; CGP-NEXT:    v_xor_b32_e32 v0, v0, v2
+; CGP-NEXT:    v_cvt_u32_f32_e32 v3, v3
+; CGP-NEXT:    v_xor_b32_e32 v1, v1, v6
 ; CGP-NEXT:    v_cvt_u32_f32_e32 v7, v7
-; CGP-NEXT:    v_mul_lo_u32 v8, s5, v6
+; CGP-NEXT:    v_mul_lo_u32 v8, s4, v3
 ; CGP-NEXT:    v_mul_lo_u32 v4, v4, v7
-; CGP-NEXT:    v_mul_hi_u32 v8, v6, v8
+; CGP-NEXT:    v_mul_hi_u32 v8, v3, v8
 ; CGP-NEXT:    v_mul_hi_u32 v4, v7, v4
-; CGP-NEXT:    v_add_i32_e32 v6, vcc, v6, v8
+; CGP-NEXT:    v_add_i32_e32 v3, vcc, v3, v8
 ; CGP-NEXT:    v_add_i32_e32 v4, vcc, v7, v4
-; CGP-NEXT:    v_mul_hi_u32 v6, v0, v6
+; CGP-NEXT:    v_mul_hi_u32 v3, v0, v3
 ; CGP-NEXT:    v_mul_hi_u32 v4, v1, v4
-; CGP-NEXT:    v_mul_lo_u32 v7, v6, s4
-; CGP-NEXT:    v_add_i32_e32 v8, vcc, 1, v6
-; CGP-NEXT:    v_mul_lo_u32 v9, v4, v3
+; CGP-NEXT:    v_mul_lo_u32 v7, v3, s8
+; CGP-NEXT:    v_add_i32_e32 v8, vcc, 1, v3
+; CGP-NEXT:    v_mul_lo_u32 v9, v4, v5
 ; CGP-NEXT:    v_add_i32_e32 v10, vcc, 1, v4
 ; CGP-NEXT:    v_sub_i32_e32 v0, vcc, v0, v7
 ; CGP-NEXT:    v_sub_i32_e32 v1, vcc, v1, v9
-; CGP-NEXT:    v_cmp_le_u32_e32 vcc, s4, v0
-; CGP-NEXT:    v_cndmask_b32_e32 v6, v6, v8, vcc
-; CGP-NEXT:    v_subrev_i32_e64 v7, s[4:5], s4, v0
-; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v1, v3
+; CGP-NEXT:    v_cmp_le_u32_e32 vcc, s8, v0
+; CGP-NEXT:    v_cndmask_b32_e32 v3, v3, v8, vcc
+; CGP-NEXT:    v_subrev_i32_e64 v7, s[4:5], s8, v0
+; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v1, v5
 ; CGP-NEXT:    v_cndmask_b32_e64 v4, v4, v10, s[4:5]
-; CGP-NEXT:    v_sub_i32_e64 v8, s[6:7], v1, v3
+; CGP-NEXT:    v_sub_i32_e64 v8, s[6:7], v1, v5
 ; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v7, vcc
-; CGP-NEXT:    v_add_i32_e32 v7, vcc, 1, v6
+; CGP-NEXT:    v_add_i32_e32 v7, vcc, 1, v3
 ; CGP-NEXT:    v_cndmask_b32_e64 v1, v1, v8, s[4:5]
 ; CGP-NEXT:    v_add_i32_e32 v8, vcc, 1, v4
-; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v3
-; CGP-NEXT:    v_cndmask_b32_e32 v0, v6, v7, vcc
-; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v3
+; CGP-NEXT:    v_cmp_le_u32_e32 vcc, s8, v0
+; CGP-NEXT:    v_cndmask_b32_e32 v0, v3, v7, vcc
+; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v5
 ; CGP-NEXT:    v_cndmask_b32_e32 v1, v4, v8, vcc
 ; CGP-NEXT:    v_xor_b32_e32 v0, v0, v2
-; CGP-NEXT:    v_xor_b32_e32 v1, v1, v5
+; CGP-NEXT:    v_xor_b32_e32 v1, v1, v6
 ; CGP-NEXT:    v_sub_i32_e32 v0, vcc, v0, v2
-; CGP-NEXT:    v_sub_i32_e32 v1, vcc, v1, v5
+; CGP-NEXT:    v_sub_i32_e32 v1, vcc, v1, v6
 ; CGP-NEXT:    s_setpc_b64 s[30:31]
   %result = sdiv <2 x i32> %num, <i32 1235195, i32 1235195>
   ret <2 x i32> %result

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i32.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i32.ll
index 4bd950c893114..a85173ac78df8 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i32.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i32.ll
@@ -272,16 +272,15 @@ define i32 @v_srem_i32_pow2k_denom(i32 %num) {
 ; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; CHECK-NEXT:    s_movk_i32 s4, 0x1000
 ; CHECK-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
-; CHECK-NEXT:    v_mov_b32_e32 v2, 0xfffff000
+; CHECK-NEXT:    v_rcp_iflag_f32_e32 v2, 0x45800000
+; CHECK-NEXT:    v_mov_b32_e32 v3, 0xfffff000
 ; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v1
-; CHECK-NEXT:    v_cvt_f32_u32_e32 v3, s4
+; CHECK-NEXT:    v_mul_f32_e32 v2, 0x4f7ffffe, v2
 ; CHECK-NEXT:    v_xor_b32_e32 v0, v0, v1
-; CHECK-NEXT:    v_rcp_iflag_f32_e32 v3, v3
-; CHECK-NEXT:    v_mul_f32_e32 v3, 0x4f7ffffe, v3
-; CHECK-NEXT:    v_cvt_u32_f32_e32 v3, v3
-; CHECK-NEXT:    v_mul_lo_u32 v2, v2, v3
-; CHECK-NEXT:    v_mul_hi_u32 v2, v3, v2
-; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v3, v2
+; CHECK-NEXT:    v_cvt_u32_f32_e32 v2, v2
+; CHECK-NEXT:    v_mul_lo_u32 v3, v3, v2
+; CHECK-NEXT:    v_mul_hi_u32 v3, v2, v3
+; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v3
 ; CHECK-NEXT:    v_mul_hi_u32 v2, v0, v2
 ; CHECK-NEXT:    v_lshlrev_b32_e32 v2, 12, v2
 ; CHECK-NEXT:    v_sub_i32_e32 v0, vcc, v0, v2
@@ -351,50 +350,48 @@ define <2 x i32> @v_srem_v2i32_pow2k_denom(<2 x i32> %num) {
 ; CGP-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; CGP-NEXT:    s_movk_i32 s4, 0x1000
 ; CGP-NEXT:    v_ashrrev_i32_e32 v2, 31, v0
-; CGP-NEXT:    v_mov_b32_e32 v3, 0x1000
+; CGP-NEXT:    v_rcp_iflag_f32_e32 v3, 0x45800000
 ; CGP-NEXT:    s_movk_i32 s5, 0xf000
 ; CGP-NEXT:    v_mov_b32_e32 v4, 0xfffff000
-; CGP-NEXT:    v_ashrrev_i32_e32 v5, 31, v1
+; CGP-NEXT:    v_mov_b32_e32 v5, 0x1000
+; CGP-NEXT:    v_ashrrev_i32_e32 v6, 31, v1
+; CGP-NEXT:    v_rcp_iflag_f32_e32 v7, 0x45800000
 ; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
-; CGP-NEXT:    v_cvt_f32_u32_e32 v6, s4
-; CGP-NEXT:    v_add_i32_e32 v1, vcc, v1, v5
-; CGP-NEXT:    v_cvt_f32_u32_e32 v7, v3
-; CGP-NEXT:    v_xor_b32_e32 v0, v0, v2
-; CGP-NEXT:    v_rcp_iflag_f32_e32 v6, v6
-; CGP-NEXT:    v_xor_b32_e32 v1, v1, v5
-; CGP-NEXT:    v_rcp_iflag_f32_e32 v7, v7
-; CGP-NEXT:    v_mul_f32_e32 v6, 0x4f7ffffe, v6
+; CGP-NEXT:    v_mul_f32_e32 v3, 0x4f7ffffe, v3
+; CGP-NEXT:    v_add_i32_e32 v1, vcc, v1, v6
 ; CGP-NEXT:    v_mul_f32_e32 v7, 0x4f7ffffe, v7
-; CGP-NEXT:    v_cvt_u32_f32_e32 v6, v6
+; CGP-NEXT:    v_xor_b32_e32 v0, v0, v2
+; CGP-NEXT:    v_cvt_u32_f32_e32 v3, v3
+; CGP-NEXT:    v_xor_b32_e32 v1, v1, v6
 ; CGP-NEXT:    v_cvt_u32_f32_e32 v7, v7
-; CGP-NEXT:    v_mul_lo_u32 v8, s5, v6
+; CGP-NEXT:    v_mul_lo_u32 v8, s5, v3
 ; CGP-NEXT:    v_mul_lo_u32 v4, v4, v7
-; CGP-NEXT:    v_mul_hi_u32 v8, v6, v8
+; CGP-NEXT:    v_mul_hi_u32 v8, v3, v8
 ; CGP-NEXT:    v_mul_hi_u32 v4, v7, v4
-; CGP-NEXT:    v_add_i32_e32 v6, vcc, v6, v8
+; CGP-NEXT:    v_add_i32_e32 v3, vcc, v3, v8
 ; CGP-NEXT:    v_add_i32_e32 v4, vcc, v7, v4
-; CGP-NEXT:    v_mul_hi_u32 v6, v0, v6
+; CGP-NEXT:    v_mul_hi_u32 v3, v0, v3
 ; CGP-NEXT:    v_mul_hi_u32 v4, v1, v4
-; CGP-NEXT:    v_lshlrev_b32_e32 v6, 12, v6
+; CGP-NEXT:    v_lshlrev_b32_e32 v3, 12, v3
 ; CGP-NEXT:    v_lshlrev_b32_e32 v4, 12, v4
-; CGP-NEXT:    v_sub_i32_e32 v0, vcc, v0, v6
+; CGP-NEXT:    v_sub_i32_e32 v0, vcc, v0, v3
 ; CGP-NEXT:    v_sub_i32_e32 v1, vcc, v1, v4
-; CGP-NEXT:    v_subrev_i32_e32 v4, vcc, s4, v0
-; CGP-NEXT:    v_sub_i32_e32 v6, vcc, v1, v3
+; CGP-NEXT:    v_subrev_i32_e32 v3, vcc, s4, v0
+; CGP-NEXT:    v_sub_i32_e32 v4, vcc, v1, v5
 ; CGP-NEXT:    v_cmp_le_u32_e32 vcc, s4, v0
-; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
-; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v3
-; CGP-NEXT:    v_cndmask_b32_e32 v1, v1, v6, vcc
-; CGP-NEXT:    v_subrev_i32_e32 v4, vcc, s4, v0
-; CGP-NEXT:    v_sub_i32_e32 v6, vcc, v1, v3
+; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
+; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v5
+; CGP-NEXT:    v_cndmask_b32_e32 v1, v1, v4, vcc
+; CGP-NEXT:    v_subrev_i32_e32 v3, vcc, s4, v0
+; CGP-NEXT:    v_sub_i32_e32 v4, vcc, v1, v5
 ; CGP-NEXT:    v_cmp_le_u32_e32 vcc, s4, v0
-; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
-; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v3
-; CGP-NEXT:    v_cndmask_b32_e32 v1, v1, v6, vcc
+; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
+; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v5
+; CGP-NEXT:    v_cndmask_b32_e32 v1, v1, v4, vcc
 ; CGP-NEXT:    v_xor_b32_e32 v0, v0, v2
-; CGP-NEXT:    v_xor_b32_e32 v1, v1, v5
+; CGP-NEXT:    v_xor_b32_e32 v1, v1, v6
 ; CGP-NEXT:    v_sub_i32_e32 v0, vcc, v0, v2
-; CGP-NEXT:    v_sub_i32_e32 v1, vcc, v1, v5
+; CGP-NEXT:    v_sub_i32_e32 v1, vcc, v1, v6
 ; CGP-NEXT:    s_setpc_b64 s[30:31]
   %result = srem <2 x i32> %num, <i32 4096, i32 4096>
   ret <2 x i32> %result
@@ -406,16 +403,15 @@ define i32 @v_srem_i32_oddk_denom(i32 %num) {
 ; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; CHECK-NEXT:    s_mov_b32 s4, 0x12d8fb
 ; CHECK-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
-; CHECK-NEXT:    v_mov_b32_e32 v2, 0xffed2705
+; CHECK-NEXT:    v_rcp_iflag_f32_e32 v2, 0x4996c7d8
+; CHECK-NEXT:    v_mov_b32_e32 v3, 0xffed2705
 ; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v1
-; CHECK-NEXT:    v_cvt_f32_u32_e32 v3, s4
+; CHECK-NEXT:    v_mul_f32_e32 v2, 0x4f7ffffe, v2
 ; CHECK-NEXT:    v_xor_b32_e32 v0, v0, v1
-; CHECK-NEXT:    v_rcp_iflag_f32_e32 v3, v3
-; CHECK-NEXT:    v_mul_f32_e32 v3, 0x4f7ffffe, v3
-; CHECK-NEXT:    v_cvt_u32_f32_e32 v3, v3
-; CHECK-NEXT:    v_mul_lo_u32 v2, v2, v3
-; CHECK-NEXT:    v_mul_hi_u32 v2, v3, v2
-; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v3, v2
+; CHECK-NEXT:    v_cvt_u32_f32_e32 v2, v2
+; CHECK-NEXT:    v_mul_lo_u32 v3, v3, v2
+; CHECK-NEXT:    v_mul_hi_u32 v3, v2, v3
+; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v3
 ; CHECK-NEXT:    v_mul_hi_u32 v2, v0, v2
 ; CHECK-NEXT:    v_mul_lo_u32 v2, v2, s4
 ; CHECK-NEXT:    v_sub_i32_e32 v0, vcc, v0, v2
@@ -485,50 +481,48 @@ define <2 x i32> @v_srem_v2i32_oddk_denom(<2 x i32> %num) {
 ; CGP-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; CGP-NEXT:    s_mov_b32 s4, 0x12d8fb
 ; CGP-NEXT:    v_ashrrev_i32_e32 v2, 31, v0
-; CGP-NEXT:    v_mov_b32_e32 v3, 0x12d8fb
+; CGP-NEXT:    v_rcp_iflag_f32_e32 v3, 0x4996c7d8
 ; CGP-NEXT:    s_mov_b32 s5, 0xffed2705
 ; CGP-NEXT:    v_mov_b32_e32 v4, 0xffed2705
-; CGP-NEXT:    v_ashrrev_i32_e32 v5, 31, v1
+; CGP-NEXT:    v_mov_b32_e32 v5, 0x12d8fb
+; CGP-NEXT:    v_ashrrev_i32_e32 v6, 31, v1
+; CGP-NEXT:    v_rcp_iflag_f32_e32 v7, 0x4996c7d8
 ; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
-; CGP-NEXT:    v_cvt_f32_u32_e32 v6, s4
-; CGP-NEXT:    v_add_i32_e32 v1, vcc, v1, v5
-; CGP-NEXT:    v_cvt_f32_u32_e32 v7, v3
-; CGP-NEXT:    v_xor_b32_e32 v0, v0, v2
-; CGP-NEXT:    v_rcp_iflag_f32_e32 v6, v6
-; CGP-NEXT:    v_xor_b32_e32 v1, v1, v5
-; CGP-NEXT:    v_rcp_iflag_f32_e32 v7, v7
-; CGP-NEXT:    v_mul_f32_e32 v6, 0x4f7ffffe, v6
+; CGP-NEXT:    v_mul_f32_e32 v3, 0x4f7ffffe, v3
+; CGP-NEXT:    v_add_i32_e32 v1, vcc, v1, v6
 ; CGP-NEXT:    v_mul_f32_e32 v7, 0x4f7ffffe, v7
-; CGP-NEXT:    v_cvt_u32_f32_e32 v6, v6
+; CGP-NEXT:    v_xor_b32_e32 v0, v0, v2
+; CGP-NEXT:    v_cvt_u32_f32_e32 v3, v3
+; CGP-NEXT:    v_xor_b32_e32 v1, v1, v6
 ; CGP-NEXT:    v_cvt_u32_f32_e32 v7, v7
-; CGP-NEXT:    v_mul_lo_u32 v8, s5, v6
+; CGP-NEXT:    v_mul_lo_u32 v8, s5, v3
 ; CGP-NEXT:    v_mul_lo_u32 v4, v4, v7
-; CGP-NEXT:    v_mul_hi_u32 v8, v6, v8
+; CGP-NEXT:    v_mul_hi_u32 v8, v3, v8
 ; CGP-NEXT:    v_mul_hi_u32 v4, v7, v4
-; CGP-NEXT:    v_add_i32_e32 v6, vcc, v6, v8
+; CGP-NEXT:    v_add_i32_e32 v3, vcc, v3, v8
 ; CGP-NEXT:    v_add_i32_e32 v4, vcc, v7, v4
-; CGP-NEXT:    v_mul_hi_u32 v6, v0, v6
+; CGP-NEXT:    v_mul_hi_u32 v3, v0, v3
 ; CGP-NEXT:    v_mul_hi_u32 v4, v1, v4
-; CGP-NEXT:    v_mul_lo_u32 v6, v6, s4
-; CGP-NEXT:    v_mul_lo_u32 v4, v4, v3
-; CGP-NEXT:    v_sub_i32_e32 v0, vcc, v0, v6
+; CGP-NEXT:    v_mul_lo_u32 v3, v3, s4
+; CGP-NEXT:    v_mul_lo_u32 v4, v4, s4
+; CGP-NEXT:    v_sub_i32_e32 v0, vcc, v0, v3
 ; CGP-NEXT:    v_sub_i32_e32 v1, vcc, v1, v4
-; CGP-NEXT:    v_subrev_i32_e32 v4, vcc, s4, v0
-; CGP-NEXT:    v_sub_i32_e32 v6, vcc, v1, v3
+; CGP-NEXT:    v_subrev_i32_e32 v3, vcc, s4, v0
+; CGP-NEXT:    v_sub_i32_e32 v4, vcc, v1, v5
 ; CGP-NEXT:    v_cmp_le_u32_e32 vcc, s4, v0
-; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
-; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v3
-; CGP-NEXT:    v_cndmask_b32_e32 v1, v1, v6, vcc
-; CGP-NEXT:    v_subrev_i32_e32 v4, vcc, s4, v0
-; CGP-NEXT:    v_sub_i32_e32 v6, vcc, v1, v3
+; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
+; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v5
+; CGP-NEXT:    v_cndmask_b32_e32 v1, v1, v4, vcc
+; CGP-NEXT:    v_subrev_i32_e32 v3, vcc, s4, v0
+; CGP-NEXT:    v_sub_i32_e32 v4, vcc, v1, v5
 ; CGP-NEXT:    v_cmp_le_u32_e32 vcc, s4, v0
-; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
-; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v3
-; CGP-NEXT:    v_cndmask_b32_e32 v1, v1, v6, vcc
+; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
+; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v5
+; CGP-NEXT:    v_cndmask_b32_e32 v1, v1, v4, vcc
 ; CGP-NEXT:    v_xor_b32_e32 v0, v0, v2
-; CGP-NEXT:    v_xor_b32_e32 v1, v1, v5
+; CGP-NEXT:    v_xor_b32_e32 v1, v1, v6
 ; CGP-NEXT:    v_sub_i32_e32 v0, vcc, v0, v2
-; CGP-NEXT:    v_sub_i32_e32 v1, vcc, v1, v5
+; CGP-NEXT:    v_sub_i32_e32 v1, vcc, v1, v6
 ; CGP-NEXT:    s_setpc_b64 s[30:31]
   %result = srem <2 x i32> %num, <i32 1235195, i32 1235195>
   ret <2 x i32> %result

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i32.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i32.ll
index 66628a115253a..bd1bc4d0a25c9 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i32.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i32.ll
@@ -223,14 +223,13 @@ define i32 @v_udiv_i32_pow2k_denom(i32 %num) {
 ; CHECK:       ; %bb.0:
 ; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; CHECK-NEXT:    s_movk_i32 s6, 0x1000
-; CHECK-NEXT:    v_mov_b32_e32 v1, 0xfffff000
-; CHECK-NEXT:    v_cvt_f32_u32_e32 v2, s6
-; CHECK-NEXT:    v_rcp_iflag_f32_e32 v2, v2
-; CHECK-NEXT:    v_mul_f32_e32 v2, 0x4f7ffffe, v2
-; CHECK-NEXT:    v_cvt_u32_f32_e32 v2, v2
-; CHECK-NEXT:    v_mul_lo_u32 v1, v1, v2
-; CHECK-NEXT:    v_mul_hi_u32 v1, v2, v1
-; CHECK-NEXT:    v_add_i32_e32 v1, vcc, v2, v1
+; CHECK-NEXT:    v_rcp_iflag_f32_e32 v1, 0x45800000
+; CHECK-NEXT:    v_mov_b32_e32 v2, 0xfffff000
+; CHECK-NEXT:    v_mul_f32_e32 v1, 0x4f7ffffe, v1
+; CHECK-NEXT:    v_cvt_u32_f32_e32 v1, v1
+; CHECK-NEXT:    v_mul_lo_u32 v2, v2, v1
+; CHECK-NEXT:    v_mul_hi_u32 v2, v1, v2
+; CHECK-NEXT:    v_add_i32_e32 v1, vcc, v1, v2
 ; CHECK-NEXT:    v_mul_hi_u32 v1, v0, v1
 ; CHECK-NEXT:    v_lshlrev_b32_e32 v2, 12, v1
 ; CHECK-NEXT:    v_add_i32_e32 v3, vcc, 1, v1
@@ -295,45 +294,43 @@ define <2 x i32> @v_udiv_v2i32_pow2k_denom(<2 x i32> %num) {
 ; CGP-LABEL: v_udiv_v2i32_pow2k_denom:
 ; CGP:       ; %bb.0:
 ; CGP-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CGP-NEXT:    s_movk_i32 s4, 0x1000
-; CGP-NEXT:    v_mov_b32_e32 v2, 0x1000
-; CGP-NEXT:    s_movk_i32 s5, 0xf000
+; CGP-NEXT:    s_movk_i32 s8, 0x1000
+; CGP-NEXT:    v_rcp_iflag_f32_e32 v2, 0x45800000
+; CGP-NEXT:    s_movk_i32 s4, 0xf000
 ; CGP-NEXT:    v_mov_b32_e32 v3, 0xfffff000
-; CGP-NEXT:    v_cvt_f32_u32_e32 v4, s4
-; CGP-NEXT:    v_cvt_f32_u32_e32 v5, v2
-; CGP-NEXT:    v_rcp_iflag_f32_e32 v4, v4
-; CGP-NEXT:    v_rcp_iflag_f32_e32 v5, v5
-; CGP-NEXT:    v_mul_f32_e32 v4, 0x4f7ffffe, v4
+; CGP-NEXT:    v_mov_b32_e32 v4, 0x1000
+; CGP-NEXT:    v_rcp_iflag_f32_e32 v5, 0x45800000
+; CGP-NEXT:    v_mul_f32_e32 v2, 0x4f7ffffe, v2
 ; CGP-NEXT:    v_mul_f32_e32 v5, 0x4f7ffffe, v5
-; CGP-NEXT:    v_cvt_u32_f32_e32 v4, v4
+; CGP-NEXT:    v_cvt_u32_f32_e32 v2, v2
 ; CGP-NEXT:    v_cvt_u32_f32_e32 v5, v5
-; CGP-NEXT:    v_mul_lo_u32 v6, s5, v4
+; CGP-NEXT:    v_mul_lo_u32 v6, s4, v2
 ; CGP-NEXT:    v_mul_lo_u32 v3, v3, v5
-; CGP-NEXT:    v_mul_hi_u32 v6, v4, v6
+; CGP-NEXT:    v_mul_hi_u32 v6, v2, v6
 ; CGP-NEXT:    v_mul_hi_u32 v3, v5, v3
-; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v6
+; CGP-NEXT:    v_add_i32_e32 v2, vcc, v2, v6
 ; CGP-NEXT:    v_add_i32_e32 v3, vcc, v5, v3
-; CGP-NEXT:    v_mul_hi_u32 v4, v0, v4
+; CGP-NEXT:    v_mul_hi_u32 v2, v0, v2
 ; CGP-NEXT:    v_mul_hi_u32 v3, v1, v3
-; CGP-NEXT:    v_lshlrev_b32_e32 v5, 12, v4
-; CGP-NEXT:    v_add_i32_e32 v6, vcc, 1, v4
+; CGP-NEXT:    v_lshlrev_b32_e32 v5, 12, v2
+; CGP-NEXT:    v_add_i32_e32 v6, vcc, 1, v2
 ; CGP-NEXT:    v_lshlrev_b32_e32 v7, 12, v3
 ; CGP-NEXT:    v_add_i32_e32 v8, vcc, 1, v3
 ; CGP-NEXT:    v_sub_i32_e32 v0, vcc, v0, v5
 ; CGP-NEXT:    v_sub_i32_e32 v1, vcc, v1, v7
-; CGP-NEXT:    v_cmp_le_u32_e32 vcc, s4, v0
-; CGP-NEXT:    v_cndmask_b32_e32 v4, v4, v6, vcc
-; CGP-NEXT:    v_subrev_i32_e64 v5, s[4:5], s4, v0
-; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v1, v2
+; CGP-NEXT:    v_cmp_le_u32_e32 vcc, s8, v0
+; CGP-NEXT:    v_cndmask_b32_e32 v2, v2, v6, vcc
+; CGP-NEXT:    v_subrev_i32_e64 v5, s[4:5], s8, v0
+; CGP-NEXT:    v_cmp_le_u32_e64 s[4:5], s8, v1
 ; CGP-NEXT:    v_cndmask_b32_e64 v3, v3, v8, s[4:5]
-; CGP-NEXT:    v_sub_i32_e64 v6, s[6:7], v1, v2
+; CGP-NEXT:    v_sub_i32_e64 v6, s[6:7], v1, v4
 ; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v5, vcc
-; CGP-NEXT:    v_add_i32_e32 v5, vcc, 1, v4
+; CGP-NEXT:    v_add_i32_e32 v5, vcc, 1, v2
 ; CGP-NEXT:    v_cndmask_b32_e64 v1, v1, v6, s[4:5]
 ; CGP-NEXT:    v_add_i32_e32 v6, vcc, 1, v3
-; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v2
-; CGP-NEXT:    v_cndmask_b32_e32 v0, v4, v5, vcc
-; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v2
+; CGP-NEXT:    v_cmp_le_u32_e32 vcc, s8, v0
+; CGP-NEXT:    v_cndmask_b32_e32 v0, v2, v5, vcc
+; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v4
 ; CGP-NEXT:    v_cndmask_b32_e32 v1, v3, v6, vcc
 ; CGP-NEXT:    s_setpc_b64 s[30:31]
   %result = udiv <2 x i32> %num, <i32 4096, i32 4096>
@@ -345,14 +342,13 @@ define i32 @v_udiv_i32_oddk_denom(i32 %num) {
 ; CHECK:       ; %bb.0:
 ; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; CHECK-NEXT:    s_mov_b32 s6, 0x12d8fb
-; CHECK-NEXT:    v_mov_b32_e32 v1, 0xffed2705
-; CHECK-NEXT:    v_cvt_f32_u32_e32 v2, s6
-; CHECK-NEXT:    v_rcp_iflag_f32_e32 v2, v2
-; CHECK-NEXT:    v_mul_f32_e32 v2, 0x4f7ffffe, v2
-; CHECK-NEXT:    v_cvt_u32_f32_e32 v2, v2
-; CHECK-NEXT:    v_mul_lo_u32 v1, v1, v2
-; CHECK-NEXT:    v_mul_hi_u32 v1, v2, v1
-; CHECK-NEXT:    v_add_i32_e32 v1, vcc, v2, v1
+; CHECK-NEXT:    v_rcp_iflag_f32_e32 v1, 0x4996c7d8
+; CHECK-NEXT:    v_mov_b32_e32 v2, 0xffed2705
+; CHECK-NEXT:    v_mul_f32_e32 v1, 0x4f7ffffe, v1
+; CHECK-NEXT:    v_cvt_u32_f32_e32 v1, v1
+; CHECK-NEXT:    v_mul_lo_u32 v2, v2, v1
+; CHECK-NEXT:    v_mul_hi_u32 v2, v1, v2
+; CHECK-NEXT:    v_add_i32_e32 v1, vcc, v1, v2
 ; CHECK-NEXT:    v_mul_hi_u32 v1, v0, v1
 ; CHECK-NEXT:    v_mul_lo_u32 v2, v1, s6
 ; CHECK-NEXT:    v_add_i32_e32 v3, vcc, 1, v1
@@ -417,44 +413,42 @@ define <2 x i32> @v_udiv_v2i32_oddk_denom(<2 x i32> %num) {
 ; CGP-LABEL: v_udiv_v2i32_oddk_denom:
 ; CGP:       ; %bb.0:
 ; CGP-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CGP-NEXT:    s_mov_b32 s4, 0x12d8fb
-; CGP-NEXT:    v_mov_b32_e32 v2, 0x12d8fb
-; CGP-NEXT:    s_mov_b32 s5, 0xffed2705
-; CGP-NEXT:    v_cvt_f32_u32_e32 v3, s4
-; CGP-NEXT:    v_cvt_f32_u32_e32 v4, v2
-; CGP-NEXT:    v_rcp_iflag_f32_e32 v3, v3
-; CGP-NEXT:    v_rcp_iflag_f32_e32 v4, v4
-; CGP-NEXT:    v_mul_f32_e32 v3, 0x4f7ffffe, v3
+; CGP-NEXT:    s_mov_b32 s8, 0x12d8fb
+; CGP-NEXT:    v_rcp_iflag_f32_e32 v2, 0x4996c7d8
+; CGP-NEXT:    s_mov_b32 s4, 0xffed2705
+; CGP-NEXT:    v_mov_b32_e32 v3, 0x12d8fb
+; CGP-NEXT:    v_rcp_iflag_f32_e32 v4, 0x4996c7d8
+; CGP-NEXT:    v_mul_f32_e32 v2, 0x4f7ffffe, v2
 ; CGP-NEXT:    v_mul_f32_e32 v4, 0x4f7ffffe, v4
-; CGP-NEXT:    v_cvt_u32_f32_e32 v3, v3
+; CGP-NEXT:    v_cvt_u32_f32_e32 v2, v2
 ; CGP-NEXT:    v_cvt_u32_f32_e32 v4, v4
-; CGP-NEXT:    v_mul_lo_u32 v5, s5, v3
-; CGP-NEXT:    v_mul_lo_u32 v6, s5, v4
-; CGP-NEXT:    v_mul_hi_u32 v5, v3, v5
+; CGP-NEXT:    v_mul_lo_u32 v5, s4, v2
+; CGP-NEXT:    v_mul_lo_u32 v6, s4, v4
+; CGP-NEXT:    v_mul_hi_u32 v5, v2, v5
 ; CGP-NEXT:    v_mul_hi_u32 v6, v4, v6
-; CGP-NEXT:    v_add_i32_e32 v3, vcc, v3, v5
+; CGP-NEXT:    v_add_i32_e32 v2, vcc, v2, v5
 ; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v6
-; CGP-NEXT:    v_mul_hi_u32 v3, v0, v3
+; CGP-NEXT:    v_mul_hi_u32 v2, v0, v2
 ; CGP-NEXT:    v_mul_hi_u32 v4, v1, v4
-; CGP-NEXT:    v_mul_lo_u32 v5, v3, s4
-; CGP-NEXT:    v_add_i32_e32 v6, vcc, 1, v3
-; CGP-NEXT:    v_mul_lo_u32 v7, v4, v2
+; CGP-NEXT:    v_mul_lo_u32 v5, v2, s8
+; CGP-NEXT:    v_add_i32_e32 v6, vcc, 1, v2
+; CGP-NEXT:    v_mul_lo_u32 v7, v4, s8
 ; CGP-NEXT:    v_add_i32_e32 v8, vcc, 1, v4
 ; CGP-NEXT:    v_sub_i32_e32 v0, vcc, v0, v5
 ; CGP-NEXT:    v_sub_i32_e32 v1, vcc, v1, v7
-; CGP-NEXT:    v_cmp_le_u32_e32 vcc, s4, v0
-; CGP-NEXT:    v_cndmask_b32_e32 v3, v3, v6, vcc
-; CGP-NEXT:    v_subrev_i32_e64 v5, s[4:5], s4, v0
-; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v1, v2
+; CGP-NEXT:    v_cmp_le_u32_e32 vcc, s8, v0
+; CGP-NEXT:    v_cndmask_b32_e32 v2, v2, v6, vcc
+; CGP-NEXT:    v_subrev_i32_e64 v5, s[4:5], s8, v0
+; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v1, v3
 ; CGP-NEXT:    v_cndmask_b32_e64 v4, v4, v8, s[4:5]
-; CGP-NEXT:    v_sub_i32_e64 v6, s[6:7], v1, v2
+; CGP-NEXT:    v_sub_i32_e64 v6, s[6:7], v1, v3
 ; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v5, vcc
-; CGP-NEXT:    v_add_i32_e32 v5, vcc, 1, v3
+; CGP-NEXT:    v_add_i32_e32 v5, vcc, 1, v2
 ; CGP-NEXT:    v_cndmask_b32_e64 v1, v1, v6, s[4:5]
 ; CGP-NEXT:    v_add_i32_e32 v6, vcc, 1, v4
-; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v2
-; CGP-NEXT:    v_cndmask_b32_e32 v0, v3, v5, vcc
-; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v2
+; CGP-NEXT:    v_cmp_le_u32_e32 vcc, s8, v0
+; CGP-NEXT:    v_cndmask_b32_e32 v0, v2, v5, vcc
+; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v3
 ; CGP-NEXT:    v_cndmask_b32_e32 v1, v4, v6, vcc
 ; CGP-NEXT:    s_setpc_b64 s[30:31]
   %result = udiv <2 x i32> %num, <i32 1235195, i32 1235195>

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i32.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i32.ll
index 56d2311f50aa8..738cd237eb017 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i32.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i32.ll
@@ -230,14 +230,13 @@ define i32 @v_urem_i32_oddk_denom(i32 %num) {
 ; CHECK:       ; %bb.0:
 ; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; CHECK-NEXT:    s_mov_b32 s4, 0x12d8fb
-; CHECK-NEXT:    v_mov_b32_e32 v1, 0xffed2705
-; CHECK-NEXT:    v_cvt_f32_u32_e32 v2, s4
-; CHECK-NEXT:    v_rcp_iflag_f32_e32 v2, v2
-; CHECK-NEXT:    v_mul_f32_e32 v2, 0x4f7ffffe, v2
-; CHECK-NEXT:    v_cvt_u32_f32_e32 v2, v2
-; CHECK-NEXT:    v_mul_lo_u32 v1, v1, v2
-; CHECK-NEXT:    v_mul_hi_u32 v1, v2, v1
-; CHECK-NEXT:    v_add_i32_e32 v1, vcc, v2, v1
+; CHECK-NEXT:    v_rcp_iflag_f32_e32 v1, 0x4996c7d8
+; CHECK-NEXT:    v_mov_b32_e32 v2, 0xffed2705
+; CHECK-NEXT:    v_mul_f32_e32 v1, 0x4f7ffffe, v1
+; CHECK-NEXT:    v_cvt_u32_f32_e32 v1, v1
+; CHECK-NEXT:    v_mul_lo_u32 v2, v2, v1
+; CHECK-NEXT:    v_mul_hi_u32 v2, v1, v2
+; CHECK-NEXT:    v_add_i32_e32 v1, vcc, v1, v2
 ; CHECK-NEXT:    v_mul_hi_u32 v1, v0, v1
 ; CHECK-NEXT:    v_mul_lo_u32 v1, v1, s4
 ; CHECK-NEXT:    v_sub_i32_e32 v0, vcc, v0, v1
@@ -290,33 +289,31 @@ define <2 x i32> @v_urem_v2i32_oddk_denom(<2 x i32> %num) {
 ; CGP:       ; %bb.0:
 ; CGP-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; CGP-NEXT:    s_mov_b32 s4, 0x12d8fb
-; CGP-NEXT:    v_mov_b32_e32 v2, 0x12d8fb
+; CGP-NEXT:    v_rcp_iflag_f32_e32 v2, 0x4996c7d8
 ; CGP-NEXT:    s_mov_b32 s5, 0xffed2705
-; CGP-NEXT:    v_cvt_f32_u32_e32 v3, s4
-; CGP-NEXT:    v_rcp_iflag_f32_e32 v3, v3
-; CGP-NEXT:    v_mul_f32_e32 v3, 0x4f7ffffe, v3
-; CGP-NEXT:    v_cvt_u32_f32_e32 v3, v3
-; CGP-NEXT:    v_mul_lo_u32 v4, s5, v3
-; CGP-NEXT:    v_mul_hi_u32 v4, v3, v4
-; CGP-NEXT:    v_add_i32_e32 v3, vcc, v3, v4
-; CGP-NEXT:    v_mul_hi_u32 v4, v0, v3
-; CGP-NEXT:    v_mul_hi_u32 v3, v1, v3
-; CGP-NEXT:    v_mul_lo_u32 v4, v4, s4
-; CGP-NEXT:    v_mul_lo_u32 v3, v3, v2
-; CGP-NEXT:    v_sub_i32_e32 v0, vcc, v0, v4
-; CGP-NEXT:    v_sub_i32_e32 v1, vcc, v1, v3
-; CGP-NEXT:    v_subrev_i32_e32 v3, vcc, s4, v0
-; CGP-NEXT:    v_sub_i32_e32 v4, vcc, v1, v2
+; CGP-NEXT:    v_mul_f32_e32 v2, 0x4f7ffffe, v2
+; CGP-NEXT:    v_cvt_u32_f32_e32 v2, v2
+; CGP-NEXT:    v_mul_lo_u32 v3, s5, v2
+; CGP-NEXT:    v_mul_hi_u32 v3, v2, v3
+; CGP-NEXT:    v_add_i32_e32 v2, vcc, v2, v3
+; CGP-NEXT:    v_mul_hi_u32 v3, v0, v2
+; CGP-NEXT:    v_mul_hi_u32 v2, v1, v2
+; CGP-NEXT:    v_mul_lo_u32 v3, v3, s4
+; CGP-NEXT:    v_mul_lo_u32 v2, v2, s4
+; CGP-NEXT:    v_sub_i32_e32 v0, vcc, v0, v3
+; CGP-NEXT:    v_sub_i32_e32 v1, vcc, v1, v2
+; CGP-NEXT:    v_subrev_i32_e32 v2, vcc, s4, v0
+; CGP-NEXT:    v_subrev_i32_e32 v3, vcc, s4, v1
 ; CGP-NEXT:    v_cmp_le_u32_e32 vcc, s4, v0
-; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
-; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v2
-; CGP-NEXT:    v_cndmask_b32_e32 v1, v1, v4, vcc
-; CGP-NEXT:    v_subrev_i32_e32 v3, vcc, s4, v0
-; CGP-NEXT:    v_sub_i32_e32 v4, vcc, v1, v2
+; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; CGP-NEXT:    v_cmp_le_u32_e32 vcc, s4, v1
+; CGP-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
+; CGP-NEXT:    v_subrev_i32_e32 v2, vcc, s4, v0
+; CGP-NEXT:    v_subrev_i32_e32 v3, vcc, 0x12d8fb, v1
 ; CGP-NEXT:    v_cmp_le_u32_e32 vcc, s4, v0
-; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
-; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v2
-; CGP-NEXT:    v_cndmask_b32_e32 v1, v1, v4, vcc
+; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; CGP-NEXT:    v_cmp_le_u32_e32 vcc, s4, v1
+; CGP-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
 ; CGP-NEXT:    s_setpc_b64 s[30:31]
   %result = urem <2 x i32> %num, <i32 1235195, i32 1235195>
   ret <2 x i32> %result


        


More information about the llvm-commits mailing list