[llvm] f742a28 - AMDGPU/GlobalISel: Custom lower 32-bit G_SDIV/G_SREM

Matt Arsenault via llvm-commits llvm-commits at lists.llvm.org
Mon Feb 17 12:10:01 PST 2020


Author: Matt Arsenault
Date: 2020-02-17T15:09:51-05:00
New Revision: f742a28ae3e3895280717b16216363e866517cbf

URL: https://github.com/llvm/llvm-project/commit/f742a28ae3e3895280717b16216363e866517cbf
DIFF: https://github.com/llvm/llvm-project/commit/f742a28ae3e3895280717b16216363e866517cbf.diff

LOG: AMDGPU/GlobalISel: Custom lower 32-bit G_SDIV/G_SREM

Added: 
    llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i32.ll
    llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i32.ll

Modified: 
    llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
    llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h
    llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sdiv.mir
    llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-srem.mir

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index e49cb9f1f911..53ce268d5e4a 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -1354,6 +1354,9 @@ bool AMDGPULegalizerInfo::legalizeCustom(MachineInstr &MI,
   case TargetOpcode::G_UDIV:
   case TargetOpcode::G_UREM:
     return legalizeUDIV_UREM(MI, MRI, B);
+  case TargetOpcode::G_SDIV:
+  case TargetOpcode::G_SREM:
+    return legalizeSDIV_SREM(MI, MRI, B);
   case TargetOpcode::G_ATOMIC_CMPXCHG:
     return legalizeAtomicCmpXChg(MI, MRI, B);
   case TargetOpcode::G_FLOG:
@@ -2329,19 +2332,14 @@ static Register buildDivRCP(MachineIRBuilder &B, Register Src) {
   return B.buildFPTOUI(S32, Mul).getReg(0);
 }
 
-bool AMDGPULegalizerInfo::legalizeUDIV_UREM32(MachineInstr &MI,
-                                              MachineRegisterInfo &MRI,
-                                              MachineIRBuilder &B) const {
-  B.setInstr(MI);
-  bool IsRem = MI.getOpcode() == AMDGPU::G_UREM;
-
+void AMDGPULegalizerInfo::legalizeUDIV_UREM32Impl(MachineIRBuilder &B,
+                                                  Register DstReg,
+                                                  Register Num,
+                                                  Register Den,
+                                                  bool IsRem) const {
   const LLT S1 = LLT::scalar(1);
   const LLT S32 = LLT::scalar(32);
 
-  Register DstReg = MI.getOperand(0).getReg();
-  Register Num = MI.getOperand(1).getReg();
-  Register Den = MI.getOperand(2).getReg();
-
   // RCP =  URECIP(Den) = 2^32 / Den + e
   // e is rounding error.
   auto RCP = buildDivRCP(B, Den);
@@ -2422,7 +2420,17 @@ bool AMDGPULegalizerInfo::legalizeUDIV_UREM32(MachineInstr &MI,
   } else {
     B.buildSelect(DstReg, Remainder_GE_Zero, Div, Quotient_S_One);
   }
+}
 
+bool AMDGPULegalizerInfo::legalizeUDIV_UREM32(MachineInstr &MI,
+                                              MachineRegisterInfo &MRI,
+                                              MachineIRBuilder &B) const {
+  B.setInstr(MI);
+  const bool IsRem = MI.getOpcode() == AMDGPU::G_UREM;
+  Register DstReg = MI.getOperand(0).getReg();
+  Register Num = MI.getOperand(1).getReg();
+  Register Den = MI.getOperand(2).getReg();
+  legalizeUDIV_UREM32Impl(B, DstReg, Num, Den, IsRem);
   MI.eraseFromParent();
   return true;
 }
@@ -2435,6 +2443,52 @@ bool AMDGPULegalizerInfo::legalizeUDIV_UREM(MachineInstr &MI,
   return false;
 }
 
+bool AMDGPULegalizerInfo::legalizeSDIV_SREM32(MachineInstr &MI,
+                                              MachineRegisterInfo &MRI,
+                                              MachineIRBuilder &B) const {
+  B.setInstr(MI);
+  const LLT S32 = LLT::scalar(32);
+
+  const bool IsRem = MI.getOpcode() == AMDGPU::G_SREM;
+  Register DstReg = MI.getOperand(0).getReg();
+  Register LHS = MI.getOperand(1).getReg();
+  Register RHS = MI.getOperand(2).getReg();
+
+  auto ThirtyOne = B.buildConstant(S32, 31);
+  auto LHSign = B.buildAShr(S32, LHS, ThirtyOne);
+  auto RHSign = B.buildAShr(S32, LHS, ThirtyOne);
+
+  LHS = B.buildAdd(S32, LHS, LHSign).getReg(0);
+  RHS = B.buildAdd(S32, RHS, RHSign).getReg(0);
+
+  LHS = B.buildXor(S32, LHS, LHSign).getReg(0);
+  RHS = B.buildXor(S32, RHS, RHSign).getReg(0);
+
+  Register UDivRem = MRI.createGenericVirtualRegister(S32);
+  legalizeUDIV_UREM32Impl(B, UDivRem, LHS, RHS, IsRem);
+
+  if (IsRem) {
+    auto RSign = LHSign; // Remainder sign is the same as LHS
+    UDivRem = B.buildXor(S32, UDivRem, RSign).getReg(0);
+    B.buildSub(DstReg, UDivRem, RSign);
+  } else {
+    auto DSign = B.buildXor(S32, LHSign, RHSign);
+    UDivRem = B.buildXor(S32, UDivRem, DSign).getReg(0);
+    B.buildSub(DstReg, UDivRem, DSign);
+  }
+
+  MI.eraseFromParent();
+  return true;
+}
+
+bool AMDGPULegalizerInfo::legalizeSDIV_SREM(MachineInstr &MI,
+                                            MachineRegisterInfo &MRI,
+                                            MachineIRBuilder &B) const {
+  if (MRI.getType(MI.getOperand(0).getReg()) == LLT::scalar(32))
+    return legalizeSDIV_SREM32(MI, MRI, B);
+  return false;
+}
+
 bool AMDGPULegalizerInfo::legalizeFastUnsafeFDIV(MachineInstr &MI,
                                                  MachineRegisterInfo &MRI,
                                                  MachineIRBuilder &B) const {

diff  --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h
index 777c34cbd190..c16fb9c74a91 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h
@@ -99,9 +99,18 @@ class AMDGPULegalizerInfo : public LegalizerInfo {
 
   bool legalizeUDIV_UREM(MachineInstr &MI, MachineRegisterInfo &MRI,
                          MachineIRBuilder &B) const;
+
+  void legalizeUDIV_UREM32Impl(MachineIRBuilder &B,
+                               Register DstReg, Register Num, Register Den,
+                               bool IsRem) const;
   bool legalizeUDIV_UREM32(MachineInstr &MI, MachineRegisterInfo &MRI,
                            MachineIRBuilder &B) const;
 
+  bool legalizeSDIV_SREM32(MachineInstr &MI, MachineRegisterInfo &MRI,
+                           MachineIRBuilder &B) const;
+  bool legalizeSDIV_SREM(MachineInstr &MI, MachineRegisterInfo &MRI,
+                         MachineIRBuilder &B) const;
+
   bool legalizeFDIV(MachineInstr &MI, MachineRegisterInfo &MRI,
                     MachineIRBuilder &B) const;
   bool legalizeFDIV16(MachineInstr &MI, MachineRegisterInfo &MRI,

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sdiv.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sdiv.mir
index 8530fa5fb70b..8690d04ef023 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sdiv.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sdiv.mir
@@ -12,18 +12,123 @@ body: |
     ; GFX6-LABEL: name: test_sdiv_s32
     ; GFX6: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
     ; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-    ; GFX6: [[SDIV:%[0-9]+]]:_(s32) = G_SDIV [[COPY]], [[COPY1]]
-    ; GFX6: $vgpr0 = COPY [[SDIV]](s32)
+    ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31
+    ; GFX6: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[COPY]], [[C]](s32)
+    ; GFX6: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[COPY]], [[C]](s32)
+    ; GFX6: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[ASHR]]
+    ; GFX6: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[COPY1]], [[ASHR1]]
+    ; GFX6: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ADD]], [[ASHR]]
+    ; GFX6: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]]
+    ; GFX6: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32)
+    ; GFX6: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32)
+    ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000
+    ; GFX6: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]]
+    ; GFX6: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32)
+    ; GFX6: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[XOR1]]
+    ; GFX6: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[XOR1]]
+    ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[MUL]]
+    ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C2]]
+    ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]]
+    ; GFX6: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]]
+    ; GFX6: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]]
+    ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]]
+    ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD2]], [[SUB1]]
+    ; GFX6: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[XOR]]
+    ; GFX6: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[XOR1]]
+    ; GFX6: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]]
+    ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[XOR1]]
+    ; GFX6: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[XOR]](s32), [[MUL1]]
+    ; GFX6: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]]
+    ; GFX6: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; GFX6: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH2]], [[C3]]
+    ; GFX6: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[UMULH2]], [[C3]]
+    ; GFX6: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s1), [[UMULH2]], [[ADD3]]
+    ; GFX6: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[SUB3]]
+    ; GFX6: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[ASHR]], [[ASHR1]]
+    ; GFX6: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[SELECT3]], [[XOR2]]
+    ; GFX6: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[XOR2]]
+    ; GFX6: $vgpr0 = COPY [[SUB4]](s32)
     ; GFX8-LABEL: name: test_sdiv_s32
     ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
     ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-    ; GFX8: [[SDIV:%[0-9]+]]:_(s32) = G_SDIV [[COPY]], [[COPY1]]
-    ; GFX8: $vgpr0 = COPY [[SDIV]](s32)
+    ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31
+    ; GFX8: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[COPY]], [[C]](s32)
+    ; GFX8: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[COPY]], [[C]](s32)
+    ; GFX8: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[ASHR]]
+    ; GFX8: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[COPY1]], [[ASHR1]]
+    ; GFX8: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ADD]], [[ASHR]]
+    ; GFX8: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]]
+    ; GFX8: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32)
+    ; GFX8: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32)
+    ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000
+    ; GFX8: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]]
+    ; GFX8: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32)
+    ; GFX8: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[XOR1]]
+    ; GFX8: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[XOR1]]
+    ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[MUL]]
+    ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C2]]
+    ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]]
+    ; GFX8: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]]
+    ; GFX8: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]]
+    ; GFX8: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]]
+    ; GFX8: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD2]], [[SUB1]]
+    ; GFX8: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[XOR]]
+    ; GFX8: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[XOR1]]
+    ; GFX8: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]]
+    ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[XOR1]]
+    ; GFX8: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[XOR]](s32), [[MUL1]]
+    ; GFX8: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]]
+    ; GFX8: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; GFX8: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH2]], [[C3]]
+    ; GFX8: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[UMULH2]], [[C3]]
+    ; GFX8: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s1), [[UMULH2]], [[ADD3]]
+    ; GFX8: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[SUB3]]
+    ; GFX8: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[ASHR]], [[ASHR1]]
+    ; GFX8: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[SELECT3]], [[XOR2]]
+    ; GFX8: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[XOR2]]
+    ; GFX8: $vgpr0 = COPY [[SUB4]](s32)
     ; GFX9-LABEL: name: test_sdiv_s32
     ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
     ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-    ; GFX9: [[SDIV:%[0-9]+]]:_(s32) = G_SDIV [[COPY]], [[COPY1]]
-    ; GFX9: $vgpr0 = COPY [[SDIV]](s32)
+    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31
+    ; GFX9: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[COPY]], [[C]](s32)
+    ; GFX9: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[COPY]], [[C]](s32)
+    ; GFX9: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[ASHR]]
+    ; GFX9: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[COPY1]], [[ASHR1]]
+    ; GFX9: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ADD]], [[ASHR]]
+    ; GFX9: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]]
+    ; GFX9: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32)
+    ; GFX9: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32)
+    ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000
+    ; GFX9: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]]
+    ; GFX9: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32)
+    ; GFX9: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[XOR1]]
+    ; GFX9: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[XOR1]]
+    ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; GFX9: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[MUL]]
+    ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C2]]
+    ; GFX9: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]]
+    ; GFX9: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]]
+    ; GFX9: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]]
+    ; GFX9: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]]
+    ; GFX9: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD2]], [[SUB1]]
+    ; GFX9: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[XOR]]
+    ; GFX9: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[XOR1]]
+    ; GFX9: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]]
+    ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[XOR1]]
+    ; GFX9: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[XOR]](s32), [[MUL1]]
+    ; GFX9: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]]
+    ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; GFX9: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH2]], [[C3]]
+    ; GFX9: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[UMULH2]], [[C3]]
+    ; GFX9: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s1), [[UMULH2]], [[ADD3]]
+    ; GFX9: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[SUB3]]
+    ; GFX9: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[ASHR]], [[ASHR1]]
+    ; GFX9: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[SELECT3]], [[XOR2]]
+    ; GFX9: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[XOR2]]
+    ; GFX9: $vgpr0 = COPY [[SUB4]](s32)
     %0:_(s32) = COPY $vgpr0
     %1:_(s32) = COPY $vgpr1
     %2:_(s32) = G_SDIV %0, %1
@@ -41,27 +146,228 @@ body: |
     ; GFX6: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3
     ; GFX6: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
     ; GFX6: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>)
-    ; GFX6: [[SDIV:%[0-9]+]]:_(s32) = G_SDIV [[UV]], [[UV2]]
-    ; GFX6: [[SDIV1:%[0-9]+]]:_(s32) = G_SDIV [[UV1]], [[UV3]]
-    ; GFX6: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SDIV]](s32), [[SDIV1]](s32)
+    ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31
+    ; GFX6: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[UV]], [[C]](s32)
+    ; GFX6: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[UV]], [[C]](s32)
+    ; GFX6: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UV]], [[ASHR]]
+    ; GFX6: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UV2]], [[ASHR1]]
+    ; GFX6: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ADD]], [[ASHR]]
+    ; GFX6: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]]
+    ; GFX6: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32)
+    ; GFX6: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32)
+    ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000
+    ; GFX6: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]]
+    ; GFX6: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32)
+    ; GFX6: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[XOR1]]
+    ; GFX6: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[XOR1]]
+    ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[MUL]]
+    ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C2]]
+    ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]]
+    ; GFX6: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]]
+    ; GFX6: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]]
+    ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]]
+    ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD2]], [[SUB1]]
+    ; GFX6: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[XOR]]
+    ; GFX6: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[XOR1]]
+    ; GFX6: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]]
+    ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[XOR1]]
+    ; GFX6: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[XOR]](s32), [[MUL1]]
+    ; GFX6: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]]
+    ; GFX6: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; GFX6: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH2]], [[C3]]
+    ; GFX6: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[UMULH2]], [[C3]]
+    ; GFX6: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s1), [[UMULH2]], [[ADD3]]
+    ; GFX6: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[SUB3]]
+    ; GFX6: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[ASHR]], [[ASHR1]]
+    ; GFX6: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[SELECT3]], [[XOR2]]
+    ; GFX6: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[XOR2]]
+    ; GFX6: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[UV1]], [[C]](s32)
+    ; GFX6: [[ASHR3:%[0-9]+]]:_(s32) = G_ASHR [[UV1]], [[C]](s32)
+    ; GFX6: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[UV1]], [[ASHR2]]
+    ; GFX6: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UV3]], [[ASHR3]]
+    ; GFX6: [[XOR4:%[0-9]+]]:_(s32) = G_XOR [[ADD4]], [[ASHR2]]
+    ; GFX6: [[XOR5:%[0-9]+]]:_(s32) = G_XOR [[ADD5]], [[ASHR3]]
+    ; GFX6: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[XOR5]](s32)
+    ; GFX6: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP1]](s32)
+    ; GFX6: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000
+    ; GFX6: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C4]]
+    ; GFX6: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL1]](s32)
+    ; GFX6: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[XOR5]]
+    ; GFX6: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[XOR5]]
+    ; GFX6: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[MUL2]]
+    ; GFX6: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH3]](s32), [[C2]]
+    ; GFX6: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[SUB5]], [[MUL2]]
+    ; GFX6: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[SELECT4]], [[FPTOUI1]]
+    ; GFX6: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[UMULH4]]
+    ; GFX6: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI1]], [[UMULH4]]
+    ; GFX6: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[ADD6]], [[SUB6]]
+    ; GFX6: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[SELECT5]], [[XOR4]]
+    ; GFX6: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH5]], [[XOR5]]
+    ; GFX6: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[XOR4]], [[MUL3]]
+    ; GFX6: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB7]](s32), [[XOR5]]
+    ; GFX6: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[XOR4]](s32), [[MUL3]]
+    ; GFX6: [[AND1:%[0-9]+]]:_(s1) = G_AND [[ICMP4]], [[ICMP5]]
+    ; GFX6: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UMULH5]], [[C3]]
+    ; GFX6: [[SUB8:%[0-9]+]]:_(s32) = G_SUB [[UMULH5]], [[C3]]
+    ; GFX6: [[SELECT6:%[0-9]+]]:_(s32) = G_SELECT [[AND1]](s1), [[UMULH5]], [[ADD7]]
+    ; GFX6: [[SELECT7:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SELECT6]], [[SUB8]]
+    ; GFX6: [[XOR6:%[0-9]+]]:_(s32) = G_XOR [[ASHR2]], [[ASHR3]]
+    ; GFX6: [[XOR7:%[0-9]+]]:_(s32) = G_XOR [[SELECT7]], [[XOR6]]
+    ; GFX6: [[SUB9:%[0-9]+]]:_(s32) = G_SUB [[XOR7]], [[XOR6]]
+    ; GFX6: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SUB4]](s32), [[SUB9]](s32)
     ; GFX6: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
     ; GFX8-LABEL: name: test_sdiv_v2s32
     ; GFX8: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
     ; GFX8: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3
     ; GFX8: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
     ; GFX8: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>)
-    ; GFX8: [[SDIV:%[0-9]+]]:_(s32) = G_SDIV [[UV]], [[UV2]]
-    ; GFX8: [[SDIV1:%[0-9]+]]:_(s32) = G_SDIV [[UV1]], [[UV3]]
-    ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SDIV]](s32), [[SDIV1]](s32)
+    ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31
+    ; GFX8: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[UV]], [[C]](s32)
+    ; GFX8: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[UV]], [[C]](s32)
+    ; GFX8: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UV]], [[ASHR]]
+    ; GFX8: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UV2]], [[ASHR1]]
+    ; GFX8: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ADD]], [[ASHR]]
+    ; GFX8: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]]
+    ; GFX8: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32)
+    ; GFX8: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32)
+    ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000
+    ; GFX8: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]]
+    ; GFX8: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32)
+    ; GFX8: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[XOR1]]
+    ; GFX8: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[XOR1]]
+    ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[MUL]]
+    ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C2]]
+    ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]]
+    ; GFX8: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]]
+    ; GFX8: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]]
+    ; GFX8: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]]
+    ; GFX8: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD2]], [[SUB1]]
+    ; GFX8: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[XOR]]
+    ; GFX8: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[XOR1]]
+    ; GFX8: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]]
+    ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[XOR1]]
+    ; GFX8: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[XOR]](s32), [[MUL1]]
+    ; GFX8: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]]
+    ; GFX8: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; GFX8: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH2]], [[C3]]
+    ; GFX8: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[UMULH2]], [[C3]]
+    ; GFX8: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s1), [[UMULH2]], [[ADD3]]
+    ; GFX8: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[SUB3]]
+    ; GFX8: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[ASHR]], [[ASHR1]]
+    ; GFX8: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[SELECT3]], [[XOR2]]
+    ; GFX8: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[XOR2]]
+    ; GFX8: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[UV1]], [[C]](s32)
+    ; GFX8: [[ASHR3:%[0-9]+]]:_(s32) = G_ASHR [[UV1]], [[C]](s32)
+    ; GFX8: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[UV1]], [[ASHR2]]
+    ; GFX8: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UV3]], [[ASHR3]]
+    ; GFX8: [[XOR4:%[0-9]+]]:_(s32) = G_XOR [[ADD4]], [[ASHR2]]
+    ; GFX8: [[XOR5:%[0-9]+]]:_(s32) = G_XOR [[ADD5]], [[ASHR3]]
+    ; GFX8: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[XOR5]](s32)
+    ; GFX8: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP1]](s32)
+    ; GFX8: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000
+    ; GFX8: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C4]]
+    ; GFX8: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL1]](s32)
+    ; GFX8: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[XOR5]]
+    ; GFX8: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[XOR5]]
+    ; GFX8: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[MUL2]]
+    ; GFX8: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH3]](s32), [[C2]]
+    ; GFX8: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[SUB5]], [[MUL2]]
+    ; GFX8: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[SELECT4]], [[FPTOUI1]]
+    ; GFX8: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[UMULH4]]
+    ; GFX8: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI1]], [[UMULH4]]
+    ; GFX8: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[ADD6]], [[SUB6]]
+    ; GFX8: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[SELECT5]], [[XOR4]]
+    ; GFX8: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH5]], [[XOR5]]
+    ; GFX8: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[XOR4]], [[MUL3]]
+    ; GFX8: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB7]](s32), [[XOR5]]
+    ; GFX8: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[XOR4]](s32), [[MUL3]]
+    ; GFX8: [[AND1:%[0-9]+]]:_(s1) = G_AND [[ICMP4]], [[ICMP5]]
+    ; GFX8: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UMULH5]], [[C3]]
+    ; GFX8: [[SUB8:%[0-9]+]]:_(s32) = G_SUB [[UMULH5]], [[C3]]
+    ; GFX8: [[SELECT6:%[0-9]+]]:_(s32) = G_SELECT [[AND1]](s1), [[UMULH5]], [[ADD7]]
+    ; GFX8: [[SELECT7:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SELECT6]], [[SUB8]]
+    ; GFX8: [[XOR6:%[0-9]+]]:_(s32) = G_XOR [[ASHR2]], [[ASHR3]]
+    ; GFX8: [[XOR7:%[0-9]+]]:_(s32) = G_XOR [[SELECT7]], [[XOR6]]
+    ; GFX8: [[SUB9:%[0-9]+]]:_(s32) = G_SUB [[XOR7]], [[XOR6]]
+    ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SUB4]](s32), [[SUB9]](s32)
     ; GFX8: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
     ; GFX9-LABEL: name: test_sdiv_v2s32
     ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
     ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3
     ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
     ; GFX9: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>)
-    ; GFX9: [[SDIV:%[0-9]+]]:_(s32) = G_SDIV [[UV]], [[UV2]]
-    ; GFX9: [[SDIV1:%[0-9]+]]:_(s32) = G_SDIV [[UV1]], [[UV3]]
-    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SDIV]](s32), [[SDIV1]](s32)
+    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31
+    ; GFX9: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[UV]], [[C]](s32)
+    ; GFX9: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[UV]], [[C]](s32)
+    ; GFX9: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UV]], [[ASHR]]
+    ; GFX9: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UV2]], [[ASHR1]]
+    ; GFX9: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ADD]], [[ASHR]]
+    ; GFX9: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]]
+    ; GFX9: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32)
+    ; GFX9: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32)
+    ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000
+    ; GFX9: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]]
+    ; GFX9: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32)
+    ; GFX9: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[XOR1]]
+    ; GFX9: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[XOR1]]
+    ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; GFX9: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[MUL]]
+    ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C2]]
+    ; GFX9: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]]
+    ; GFX9: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]]
+    ; GFX9: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]]
+    ; GFX9: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]]
+    ; GFX9: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD2]], [[SUB1]]
+    ; GFX9: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[XOR]]
+    ; GFX9: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[XOR1]]
+    ; GFX9: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]]
+    ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[XOR1]]
+    ; GFX9: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[XOR]](s32), [[MUL1]]
+    ; GFX9: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]]
+    ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; GFX9: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH2]], [[C3]]
+    ; GFX9: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[UMULH2]], [[C3]]
+    ; GFX9: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s1), [[UMULH2]], [[ADD3]]
+    ; GFX9: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[SUB3]]
+    ; GFX9: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[ASHR]], [[ASHR1]]
+    ; GFX9: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[SELECT3]], [[XOR2]]
+    ; GFX9: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[XOR2]]
+    ; GFX9: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[UV1]], [[C]](s32)
+    ; GFX9: [[ASHR3:%[0-9]+]]:_(s32) = G_ASHR [[UV1]], [[C]](s32)
+    ; GFX9: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[UV1]], [[ASHR2]]
+    ; GFX9: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UV3]], [[ASHR3]]
+    ; GFX9: [[XOR4:%[0-9]+]]:_(s32) = G_XOR [[ADD4]], [[ASHR2]]
+    ; GFX9: [[XOR5:%[0-9]+]]:_(s32) = G_XOR [[ADD5]], [[ASHR3]]
+    ; GFX9: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[XOR5]](s32)
+    ; GFX9: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP1]](s32)
+    ; GFX9: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000
+    ; GFX9: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C4]]
+    ; GFX9: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL1]](s32)
+    ; GFX9: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[XOR5]]
+    ; GFX9: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[XOR5]]
+    ; GFX9: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[MUL2]]
+    ; GFX9: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH3]](s32), [[C2]]
+    ; GFX9: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[SUB5]], [[MUL2]]
+    ; GFX9: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[SELECT4]], [[FPTOUI1]]
+    ; GFX9: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[UMULH4]]
+    ; GFX9: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI1]], [[UMULH4]]
+    ; GFX9: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[ADD6]], [[SUB6]]
+    ; GFX9: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[SELECT5]], [[XOR4]]
+    ; GFX9: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH5]], [[XOR5]]
+    ; GFX9: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[XOR4]], [[MUL3]]
+    ; GFX9: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB7]](s32), [[XOR5]]
+    ; GFX9: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[XOR4]](s32), [[MUL3]]
+    ; GFX9: [[AND1:%[0-9]+]]:_(s1) = G_AND [[ICMP4]], [[ICMP5]]
+    ; GFX9: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UMULH5]], [[C3]]
+    ; GFX9: [[SUB8:%[0-9]+]]:_(s32) = G_SUB [[UMULH5]], [[C3]]
+    ; GFX9: [[SELECT6:%[0-9]+]]:_(s32) = G_SELECT [[AND1]](s1), [[UMULH5]], [[ADD7]]
+    ; GFX9: [[SELECT7:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SELECT6]], [[SUB8]]
+    ; GFX9: [[XOR6:%[0-9]+]]:_(s32) = G_XOR [[ASHR2]], [[ASHR3]]
+    ; GFX9: [[XOR7:%[0-9]+]]:_(s32) = G_XOR [[SELECT7]], [[XOR6]]
+    ; GFX9: [[SUB9:%[0-9]+]]:_(s32) = G_SUB [[XOR7]], [[XOR6]]
+    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SUB4]](s32), [[SUB9]](s32)
     ; GFX9: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
     %0:_(<2 x s32>) = COPY $vgpr0_vgpr1
     %1:_(<2 x s32>) = COPY $vgpr2_vgpr3
@@ -144,36 +450,138 @@ body: |
     ; GFX6-LABEL: name: test_sdiv_s16
     ; GFX6: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
     ; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-    ; GFX6: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
-    ; GFX6: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
-    ; GFX6: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC]](s16)
-    ; GFX6: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC1]](s16)
-    ; GFX6: [[SDIV:%[0-9]+]]:_(s32) = G_SDIV [[SEXT]], [[SEXT1]]
-    ; GFX6: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[SDIV]](s32)
-    ; GFX6: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC2]](s16)
-    ; GFX6: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; GFX6: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+    ; GFX6: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY2]], 16
+    ; GFX6: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+    ; GFX6: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY3]], 16
+    ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31
+    ; GFX6: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[C]](s32)
+    ; GFX6: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[C]](s32)
+    ; GFX6: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG]], [[ASHR]]
+    ; GFX6: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG1]], [[ASHR1]]
+    ; GFX6: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ADD]], [[ASHR]]
+    ; GFX6: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]]
+    ; GFX6: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32)
+    ; GFX6: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32)
+    ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000
+    ; GFX6: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]]
+    ; GFX6: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32)
+    ; GFX6: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[XOR1]]
+    ; GFX6: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[XOR1]]
+    ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[MUL]]
+    ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C2]]
+    ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]]
+    ; GFX6: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]]
+    ; GFX6: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]]
+    ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]]
+    ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD2]], [[SUB1]]
+    ; GFX6: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[XOR]]
+    ; GFX6: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[XOR1]]
+    ; GFX6: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]]
+    ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[XOR1]]
+    ; GFX6: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[XOR]](s32), [[MUL1]]
+    ; GFX6: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]]
+    ; GFX6: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; GFX6: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH2]], [[C3]]
+    ; GFX6: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[UMULH2]], [[C3]]
+    ; GFX6: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s1), [[UMULH2]], [[ADD3]]
+    ; GFX6: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[SUB3]]
+    ; GFX6: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[ASHR]], [[ASHR1]]
+    ; GFX6: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[SELECT3]], [[XOR2]]
+    ; GFX6: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[XOR2]]
+    ; GFX6: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SUB4]](s32)
+    ; GFX6: $vgpr0 = COPY [[COPY4]](s32)
     ; GFX8-LABEL: name: test_sdiv_s16
     ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
     ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-    ; GFX8: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
-    ; GFX8: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
-    ; GFX8: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC]](s16)
-    ; GFX8: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC1]](s16)
-    ; GFX8: [[SDIV:%[0-9]+]]:_(s32) = G_SDIV [[SEXT]], [[SEXT1]]
-    ; GFX8: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[SDIV]](s32)
-    ; GFX8: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC2]](s16)
-    ; GFX8: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; GFX8: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+    ; GFX8: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY2]], 16
+    ; GFX8: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+    ; GFX8: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY3]], 16
+    ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31
+    ; GFX8: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[C]](s32)
+    ; GFX8: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[C]](s32)
+    ; GFX8: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG]], [[ASHR]]
+    ; GFX8: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG1]], [[ASHR1]]
+    ; GFX8: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ADD]], [[ASHR]]
+    ; GFX8: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]]
+    ; GFX8: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32)
+    ; GFX8: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32)
+    ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000
+    ; GFX8: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]]
+    ; GFX8: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32)
+    ; GFX8: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[XOR1]]
+    ; GFX8: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[XOR1]]
+    ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[MUL]]
+    ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C2]]
+    ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]]
+    ; GFX8: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]]
+    ; GFX8: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]]
+    ; GFX8: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]]
+    ; GFX8: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD2]], [[SUB1]]
+    ; GFX8: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[XOR]]
+    ; GFX8: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[XOR1]]
+    ; GFX8: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]]
+    ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[XOR1]]
+    ; GFX8: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[XOR]](s32), [[MUL1]]
+    ; GFX8: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]]
+    ; GFX8: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; GFX8: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH2]], [[C3]]
+    ; GFX8: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[UMULH2]], [[C3]]
+    ; GFX8: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s1), [[UMULH2]], [[ADD3]]
+    ; GFX8: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[SUB3]]
+    ; GFX8: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[ASHR]], [[ASHR1]]
+    ; GFX8: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[SELECT3]], [[XOR2]]
+    ; GFX8: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[XOR2]]
+    ; GFX8: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SUB4]](s32)
+    ; GFX8: $vgpr0 = COPY [[COPY4]](s32)
     ; GFX9-LABEL: name: test_sdiv_s16
     ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
     ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-    ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
-    ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
-    ; GFX9: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC]](s16)
-    ; GFX9: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC1]](s16)
-    ; GFX9: [[SDIV:%[0-9]+]]:_(s32) = G_SDIV [[SEXT]], [[SEXT1]]
-    ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[SDIV]](s32)
-    ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC2]](s16)
-    ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+    ; GFX9: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY2]], 16
+    ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+    ; GFX9: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY3]], 16
+    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31
+    ; GFX9: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[C]](s32)
+    ; GFX9: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[C]](s32)
+    ; GFX9: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG]], [[ASHR]]
+    ; GFX9: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG1]], [[ASHR1]]
+    ; GFX9: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ADD]], [[ASHR]]
+    ; GFX9: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]]
+    ; GFX9: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32)
+    ; GFX9: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32)
+    ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000
+    ; GFX9: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]]
+    ; GFX9: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32)
+    ; GFX9: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[XOR1]]
+    ; GFX9: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[XOR1]]
+    ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; GFX9: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[MUL]]
+    ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C2]]
+    ; GFX9: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]]
+    ; GFX9: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]]
+    ; GFX9: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]]
+    ; GFX9: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]]
+    ; GFX9: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD2]], [[SUB1]]
+    ; GFX9: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[XOR]]
+    ; GFX9: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[XOR1]]
+    ; GFX9: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]]
+    ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[XOR1]]
+    ; GFX9: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[XOR]](s32), [[MUL1]]
+    ; GFX9: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]]
+    ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; GFX9: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH2]], [[C3]]
+    ; GFX9: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[UMULH2]], [[C3]]
+    ; GFX9: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s1), [[UMULH2]], [[ADD3]]
+    ; GFX9: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[SUB3]]
+    ; GFX9: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[ASHR]], [[ASHR1]]
+    ; GFX9: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[SELECT3]], [[XOR2]]
+    ; GFX9: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[XOR2]]
+    ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SUB4]](s32)
+    ; GFX9: $vgpr0 = COPY [[COPY4]](s32)
     %0:_(s32) = COPY $vgpr0
     %1:_(s32) = COPY $vgpr1
     %2:_(s16) = G_TRUNC %0
@@ -192,39 +600,280 @@ body: |
     ; GFX6-LABEL: name: test_sdiv_v2s16
     ; GFX6: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
     ; GFX6: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
-    ; GFX6: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<2 x s16>)
-    ; GFX6: [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY1]](<2 x s16>)
-    ; GFX6: [[SDIV:%[0-9]+]]:_(s16) = G_SDIV [[UV]], [[UV2]]
-    ; GFX6: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[UV1]](s16)
-    ; GFX6: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[UV3]](s16)
-    ; GFX6: [[SDIV1:%[0-9]+]]:_(s32) = G_SDIV [[SEXT]], [[SEXT1]]
-    ; GFX6: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SDIV1]](s32)
-    ; GFX6: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[SDIV]](s16), [[TRUNC]](s16)
-    ; GFX6: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>)
+    ; GFX6: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>)
+    ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX6: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
+    ; GFX6: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>)
+    ; GFX6: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
+    ; GFX6: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
+    ; GFX6: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY2]], 16
+    ; GFX6: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32)
+    ; GFX6: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY3]], 16
+    ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 31
+    ; GFX6: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[C1]](s32)
+    ; GFX6: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[C1]](s32)
+    ; GFX6: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG]], [[ASHR]]
+    ; GFX6: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG1]], [[ASHR1]]
+    ; GFX6: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ADD]], [[ASHR]]
+    ; GFX6: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]]
+    ; GFX6: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32)
+    ; GFX6: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32)
+    ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000
+    ; GFX6: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]]
+    ; GFX6: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32)
+    ; GFX6: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[XOR1]]
+    ; GFX6: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[XOR1]]
+    ; GFX6: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[MUL]]
+    ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C3]]
+    ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]]
+    ; GFX6: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]]
+    ; GFX6: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]]
+    ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]]
+    ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD2]], [[SUB1]]
+    ; GFX6: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[XOR]]
+    ; GFX6: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[XOR1]]
+    ; GFX6: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]]
+    ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[XOR1]]
+    ; GFX6: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[XOR]](s32), [[MUL1]]
+    ; GFX6: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]]
+    ; GFX6: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; GFX6: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH2]], [[C4]]
+    ; GFX6: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[UMULH2]], [[C4]]
+    ; GFX6: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s1), [[UMULH2]], [[ADD3]]
+    ; GFX6: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[SUB3]]
+    ; GFX6: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[ASHR]], [[ASHR1]]
+    ; GFX6: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[SELECT3]], [[XOR2]]
+    ; GFX6: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[XOR2]]
+    ; GFX6: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; GFX6: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY4]], 16
+    ; GFX6: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
+    ; GFX6: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY5]], 16
+    ; GFX6: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG2]], [[C1]](s32)
+    ; GFX6: [[ASHR3:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG2]], [[C1]](s32)
+    ; GFX6: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG2]], [[ASHR2]]
+    ; GFX6: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG3]], [[ASHR3]]
+    ; GFX6: [[XOR4:%[0-9]+]]:_(s32) = G_XOR [[ADD4]], [[ASHR2]]
+    ; GFX6: [[XOR5:%[0-9]+]]:_(s32) = G_XOR [[ADD5]], [[ASHR3]]
+    ; GFX6: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[XOR5]](s32)
+    ; GFX6: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP1]](s32)
+    ; GFX6: [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000
+    ; GFX6: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C5]]
+    ; GFX6: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL1]](s32)
+    ; GFX6: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[XOR5]]
+    ; GFX6: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[XOR5]]
+    ; GFX6: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[MUL2]]
+    ; GFX6: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH3]](s32), [[C3]]
+    ; GFX6: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[SUB5]], [[MUL2]]
+    ; GFX6: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[SELECT4]], [[FPTOUI1]]
+    ; GFX6: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[UMULH4]]
+    ; GFX6: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI1]], [[UMULH4]]
+    ; GFX6: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[ADD6]], [[SUB6]]
+    ; GFX6: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[SELECT5]], [[XOR4]]
+    ; GFX6: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH5]], [[XOR5]]
+    ; GFX6: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[XOR4]], [[MUL3]]
+    ; GFX6: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB7]](s32), [[XOR5]]
+    ; GFX6: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[XOR4]](s32), [[MUL3]]
+    ; GFX6: [[AND1:%[0-9]+]]:_(s1) = G_AND [[ICMP4]], [[ICMP5]]
+    ; GFX6: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UMULH5]], [[C4]]
+    ; GFX6: [[SUB8:%[0-9]+]]:_(s32) = G_SUB [[UMULH5]], [[C4]]
+    ; GFX6: [[SELECT6:%[0-9]+]]:_(s32) = G_SELECT [[AND1]](s1), [[UMULH5]], [[ADD7]]
+    ; GFX6: [[SELECT7:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SELECT6]], [[SUB8]]
+    ; GFX6: [[XOR6:%[0-9]+]]:_(s32) = G_XOR [[ASHR2]], [[ASHR3]]
+    ; GFX6: [[XOR7:%[0-9]+]]:_(s32) = G_XOR [[SELECT7]], [[XOR6]]
+    ; GFX6: [[SUB9:%[0-9]+]]:_(s32) = G_SUB [[XOR7]], [[XOR6]]
+    ; GFX6: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; GFX6: [[COPY6:%[0-9]+]]:_(s32) = COPY [[SUB4]](s32)
+    ; GFX6: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C6]]
+    ; GFX6: [[COPY7:%[0-9]+]]:_(s32) = COPY [[SUB9]](s32)
+    ; GFX6: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C6]]
+    ; GFX6: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32)
+    ; GFX6: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL]]
+    ; GFX6: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; GFX6: $vgpr0 = COPY [[BITCAST2]](<2 x s16>)
     ; GFX8-LABEL: name: test_sdiv_v2s16
     ; GFX8: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
     ; GFX8: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
-    ; GFX8: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<2 x s16>)
-    ; GFX8: [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY1]](<2 x s16>)
-    ; GFX8: [[SDIV:%[0-9]+]]:_(s16) = G_SDIV [[UV]], [[UV2]]
-    ; GFX8: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[UV1]](s16)
-    ; GFX8: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[UV3]](s16)
-    ; GFX8: [[SDIV1:%[0-9]+]]:_(s32) = G_SDIV [[SEXT]], [[SEXT1]]
-    ; GFX8: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SDIV1]](s32)
-    ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[SDIV]](s16), [[TRUNC]](s16)
-    ; GFX8: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>)
+    ; GFX8: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>)
+    ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX8: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
+    ; GFX8: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>)
+    ; GFX8: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
+    ; GFX8: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
+    ; GFX8: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY2]], 16
+    ; GFX8: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32)
+    ; GFX8: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY3]], 16
+    ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 31
+    ; GFX8: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[C1]](s32)
+    ; GFX8: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[C1]](s32)
+    ; GFX8: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG]], [[ASHR]]
+    ; GFX8: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG1]], [[ASHR1]]
+    ; GFX8: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ADD]], [[ASHR]]
+    ; GFX8: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]]
+    ; GFX8: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32)
+    ; GFX8: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32)
+    ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000
+    ; GFX8: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]]
+    ; GFX8: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32)
+    ; GFX8: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[XOR1]]
+    ; GFX8: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[XOR1]]
+    ; GFX8: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[MUL]]
+    ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C3]]
+    ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]]
+    ; GFX8: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]]
+    ; GFX8: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]]
+    ; GFX8: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]]
+    ; GFX8: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD2]], [[SUB1]]
+    ; GFX8: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[XOR]]
+    ; GFX8: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[XOR1]]
+    ; GFX8: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]]
+    ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[XOR1]]
+    ; GFX8: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[XOR]](s32), [[MUL1]]
+    ; GFX8: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]]
+    ; GFX8: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; GFX8: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH2]], [[C4]]
+    ; GFX8: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[UMULH2]], [[C4]]
+    ; GFX8: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s1), [[UMULH2]], [[ADD3]]
+    ; GFX8: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[SUB3]]
+    ; GFX8: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[ASHR]], [[ASHR1]]
+    ; GFX8: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[SELECT3]], [[XOR2]]
+    ; GFX8: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[XOR2]]
+    ; GFX8: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; GFX8: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY4]], 16
+    ; GFX8: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
+    ; GFX8: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY5]], 16
+    ; GFX8: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG2]], [[C1]](s32)
+    ; GFX8: [[ASHR3:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG2]], [[C1]](s32)
+    ; GFX8: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG2]], [[ASHR2]]
+    ; GFX8: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG3]], [[ASHR3]]
+    ; GFX8: [[XOR4:%[0-9]+]]:_(s32) = G_XOR [[ADD4]], [[ASHR2]]
+    ; GFX8: [[XOR5:%[0-9]+]]:_(s32) = G_XOR [[ADD5]], [[ASHR3]]
+    ; GFX8: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[XOR5]](s32)
+    ; GFX8: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP1]](s32)
+    ; GFX8: [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000
+    ; GFX8: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C5]]
+    ; GFX8: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL1]](s32)
+    ; GFX8: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[XOR5]]
+    ; GFX8: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[XOR5]]
+    ; GFX8: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[MUL2]]
+    ; GFX8: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH3]](s32), [[C3]]
+    ; GFX8: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[SUB5]], [[MUL2]]
+    ; GFX8: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[SELECT4]], [[FPTOUI1]]
+    ; GFX8: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[UMULH4]]
+    ; GFX8: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI1]], [[UMULH4]]
+    ; GFX8: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[ADD6]], [[SUB6]]
+    ; GFX8: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[SELECT5]], [[XOR4]]
+    ; GFX8: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH5]], [[XOR5]]
+    ; GFX8: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[XOR4]], [[MUL3]]
+    ; GFX8: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB7]](s32), [[XOR5]]
+    ; GFX8: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[XOR4]](s32), [[MUL3]]
+    ; GFX8: [[AND1:%[0-9]+]]:_(s1) = G_AND [[ICMP4]], [[ICMP5]]
+    ; GFX8: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UMULH5]], [[C4]]
+    ; GFX8: [[SUB8:%[0-9]+]]:_(s32) = G_SUB [[UMULH5]], [[C4]]
+    ; GFX8: [[SELECT6:%[0-9]+]]:_(s32) = G_SELECT [[AND1]](s1), [[UMULH5]], [[ADD7]]
+    ; GFX8: [[SELECT7:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SELECT6]], [[SUB8]]
+    ; GFX8: [[XOR6:%[0-9]+]]:_(s32) = G_XOR [[ASHR2]], [[ASHR3]]
+    ; GFX8: [[XOR7:%[0-9]+]]:_(s32) = G_XOR [[SELECT7]], [[XOR6]]
+    ; GFX8: [[SUB9:%[0-9]+]]:_(s32) = G_SUB [[XOR7]], [[XOR6]]
+    ; GFX8: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; GFX8: [[COPY6:%[0-9]+]]:_(s32) = COPY [[SUB4]](s32)
+    ; GFX8: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C6]]
+    ; GFX8: [[COPY7:%[0-9]+]]:_(s32) = COPY [[SUB9]](s32)
+    ; GFX8: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C6]]
+    ; GFX8: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32)
+    ; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL]]
+    ; GFX8: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; GFX8: $vgpr0 = COPY [[BITCAST2]](<2 x s16>)
     ; GFX9-LABEL: name: test_sdiv_v2s16
     ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
     ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
-    ; GFX9: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<2 x s16>)
-    ; GFX9: [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY1]](<2 x s16>)
-    ; GFX9: [[SDIV:%[0-9]+]]:_(s16) = G_SDIV [[UV]], [[UV2]]
-    ; GFX9: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[UV1]](s16)
-    ; GFX9: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[UV3]](s16)
-    ; GFX9: [[SDIV1:%[0-9]+]]:_(s32) = G_SDIV [[SEXT]], [[SEXT1]]
-    ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SDIV1]](s32)
-    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[SDIV]](s16), [[TRUNC]](s16)
-    ; GFX9: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>)
+    ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>)
+    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
+    ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>)
+    ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
+    ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
+    ; GFX9: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY2]], 16
+    ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32)
+    ; GFX9: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY3]], 16
+    ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 31
+    ; GFX9: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[C1]](s32)
+    ; GFX9: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[C1]](s32)
+    ; GFX9: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG]], [[ASHR]]
+    ; GFX9: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG1]], [[ASHR1]]
+    ; GFX9: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ADD]], [[ASHR]]
+    ; GFX9: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]]
+    ; GFX9: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32)
+    ; GFX9: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32)
+    ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000
+    ; GFX9: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]]
+    ; GFX9: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32)
+    ; GFX9: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[XOR1]]
+    ; GFX9: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[XOR1]]
+    ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; GFX9: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[MUL]]
+    ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C3]]
+    ; GFX9: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]]
+    ; GFX9: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]]
+    ; GFX9: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]]
+    ; GFX9: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]]
+    ; GFX9: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD2]], [[SUB1]]
+    ; GFX9: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[XOR]]
+    ; GFX9: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[XOR1]]
+    ; GFX9: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]]
+    ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[XOR1]]
+    ; GFX9: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[XOR]](s32), [[MUL1]]
+    ; GFX9: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]]
+    ; GFX9: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; GFX9: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH2]], [[C4]]
+    ; GFX9: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[UMULH2]], [[C4]]
+    ; GFX9: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s1), [[UMULH2]], [[ADD3]]
+    ; GFX9: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[SUB3]]
+    ; GFX9: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[ASHR]], [[ASHR1]]
+    ; GFX9: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[SELECT3]], [[XOR2]]
+    ; GFX9: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[XOR2]]
+    ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; GFX9: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY4]], 16
+    ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
+    ; GFX9: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY5]], 16
+    ; GFX9: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG2]], [[C1]](s32)
+    ; GFX9: [[ASHR3:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG2]], [[C1]](s32)
+    ; GFX9: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG2]], [[ASHR2]]
+    ; GFX9: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG3]], [[ASHR3]]
+    ; GFX9: [[XOR4:%[0-9]+]]:_(s32) = G_XOR [[ADD4]], [[ASHR2]]
+    ; GFX9: [[XOR5:%[0-9]+]]:_(s32) = G_XOR [[ADD5]], [[ASHR3]]
+    ; GFX9: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[XOR5]](s32)
+    ; GFX9: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP1]](s32)
+    ; GFX9: [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000
+    ; GFX9: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C5]]
+    ; GFX9: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL1]](s32)
+    ; GFX9: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[XOR5]]
+    ; GFX9: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[XOR5]]
+    ; GFX9: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[MUL2]]
+    ; GFX9: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH3]](s32), [[C3]]
+    ; GFX9: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[SUB5]], [[MUL2]]
+    ; GFX9: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[SELECT4]], [[FPTOUI1]]
+    ; GFX9: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[UMULH4]]
+    ; GFX9: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI1]], [[UMULH4]]
+    ; GFX9: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[ADD6]], [[SUB6]]
+    ; GFX9: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[SELECT5]], [[XOR4]]
+    ; GFX9: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH5]], [[XOR5]]
+    ; GFX9: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[XOR4]], [[MUL3]]
+    ; GFX9: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB7]](s32), [[XOR5]]
+    ; GFX9: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[XOR4]](s32), [[MUL3]]
+    ; GFX9: [[AND1:%[0-9]+]]:_(s1) = G_AND [[ICMP4]], [[ICMP5]]
+    ; GFX9: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UMULH5]], [[C4]]
+    ; GFX9: [[SUB8:%[0-9]+]]:_(s32) = G_SUB [[UMULH5]], [[C4]]
+    ; GFX9: [[SELECT6:%[0-9]+]]:_(s32) = G_SELECT [[AND1]](s1), [[UMULH5]], [[ADD7]]
+    ; GFX9: [[SELECT7:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SELECT6]], [[SUB8]]
+    ; GFX9: [[XOR6:%[0-9]+]]:_(s32) = G_XOR [[ASHR2]], [[ASHR3]]
+    ; GFX9: [[XOR7:%[0-9]+]]:_(s32) = G_XOR [[SELECT7]], [[XOR6]]
+    ; GFX9: [[SUB9:%[0-9]+]]:_(s32) = G_SUB [[XOR7]], [[XOR6]]
+    ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY [[SUB4]](s32)
+    ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY [[SUB9]](s32)
+    ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY6]](s32), [[COPY7]](s32)
+    ; GFX9: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>)
     %0:_(<2 x s16>) = COPY $vgpr0
     %1:_(<2 x s16>) = COPY $vgpr1
     %2:_(<2 x s16>) = G_SDIV %0, %1
@@ -240,36 +889,138 @@ body: |
     ; GFX6-LABEL: name: test_sdiv_s7
     ; GFX6: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
     ; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-    ; GFX6: [[TRUNC:%[0-9]+]]:_(s7) = G_TRUNC [[COPY]](s32)
-    ; GFX6: [[TRUNC1:%[0-9]+]]:_(s7) = G_TRUNC [[COPY1]](s32)
-    ; GFX6: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC]](s7)
-    ; GFX6: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC1]](s7)
-    ; GFX6: [[SDIV:%[0-9]+]]:_(s32) = G_SDIV [[SEXT]], [[SEXT1]]
-    ; GFX6: [[TRUNC2:%[0-9]+]]:_(s7) = G_TRUNC [[SDIV]](s32)
-    ; GFX6: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC2]](s7)
-    ; GFX6: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; GFX6: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+    ; GFX6: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY2]], 7
+    ; GFX6: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+    ; GFX6: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY3]], 7
+    ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31
+    ; GFX6: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[C]](s32)
+    ; GFX6: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[C]](s32)
+    ; GFX6: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG]], [[ASHR]]
+    ; GFX6: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG1]], [[ASHR1]]
+    ; GFX6: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ADD]], [[ASHR]]
+    ; GFX6: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]]
+    ; GFX6: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32)
+    ; GFX6: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32)
+    ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000
+    ; GFX6: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]]
+    ; GFX6: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32)
+    ; GFX6: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[XOR1]]
+    ; GFX6: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[XOR1]]
+    ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[MUL]]
+    ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C2]]
+    ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]]
+    ; GFX6: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]]
+    ; GFX6: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]]
+    ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]]
+    ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD2]], [[SUB1]]
+    ; GFX6: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[XOR]]
+    ; GFX6: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[XOR1]]
+    ; GFX6: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]]
+    ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[XOR1]]
+    ; GFX6: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[XOR]](s32), [[MUL1]]
+    ; GFX6: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]]
+    ; GFX6: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; GFX6: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH2]], [[C3]]
+    ; GFX6: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[UMULH2]], [[C3]]
+    ; GFX6: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s1), [[UMULH2]], [[ADD3]]
+    ; GFX6: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[SUB3]]
+    ; GFX6: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[ASHR]], [[ASHR1]]
+    ; GFX6: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[SELECT3]], [[XOR2]]
+    ; GFX6: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[XOR2]]
+    ; GFX6: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SUB4]](s32)
+    ; GFX6: $vgpr0 = COPY [[COPY4]](s32)
     ; GFX8-LABEL: name: test_sdiv_s7
     ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
     ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-    ; GFX8: [[TRUNC:%[0-9]+]]:_(s7) = G_TRUNC [[COPY]](s32)
-    ; GFX8: [[TRUNC1:%[0-9]+]]:_(s7) = G_TRUNC [[COPY1]](s32)
-    ; GFX8: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC]](s7)
-    ; GFX8: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC1]](s7)
-    ; GFX8: [[SDIV:%[0-9]+]]:_(s32) = G_SDIV [[SEXT]], [[SEXT1]]
-    ; GFX8: [[TRUNC2:%[0-9]+]]:_(s7) = G_TRUNC [[SDIV]](s32)
-    ; GFX8: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC2]](s7)
-    ; GFX8: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; GFX8: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+    ; GFX8: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY2]], 7
+    ; GFX8: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+    ; GFX8: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY3]], 7
+    ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31
+    ; GFX8: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[C]](s32)
+    ; GFX8: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[C]](s32)
+    ; GFX8: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG]], [[ASHR]]
+    ; GFX8: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG1]], [[ASHR1]]
+    ; GFX8: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ADD]], [[ASHR]]
+    ; GFX8: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]]
+    ; GFX8: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32)
+    ; GFX8: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32)
+    ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000
+    ; GFX8: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]]
+    ; GFX8: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32)
+    ; GFX8: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[XOR1]]
+    ; GFX8: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[XOR1]]
+    ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[MUL]]
+    ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C2]]
+    ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]]
+    ; GFX8: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]]
+    ; GFX8: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]]
+    ; GFX8: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]]
+    ; GFX8: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD2]], [[SUB1]]
+    ; GFX8: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[XOR]]
+    ; GFX8: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[XOR1]]
+    ; GFX8: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]]
+    ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[XOR1]]
+    ; GFX8: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[XOR]](s32), [[MUL1]]
+    ; GFX8: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]]
+    ; GFX8: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; GFX8: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH2]], [[C3]]
+    ; GFX8: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[UMULH2]], [[C3]]
+    ; GFX8: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s1), [[UMULH2]], [[ADD3]]
+    ; GFX8: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[SUB3]]
+    ; GFX8: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[ASHR]], [[ASHR1]]
+    ; GFX8: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[SELECT3]], [[XOR2]]
+    ; GFX8: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[XOR2]]
+    ; GFX8: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SUB4]](s32)
+    ; GFX8: $vgpr0 = COPY [[COPY4]](s32)
     ; GFX9-LABEL: name: test_sdiv_s7
     ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
     ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-    ; GFX9: [[TRUNC:%[0-9]+]]:_(s7) = G_TRUNC [[COPY]](s32)
-    ; GFX9: [[TRUNC1:%[0-9]+]]:_(s7) = G_TRUNC [[COPY1]](s32)
-    ; GFX9: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC]](s7)
-    ; GFX9: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC1]](s7)
-    ; GFX9: [[SDIV:%[0-9]+]]:_(s32) = G_SDIV [[SEXT]], [[SEXT1]]
-    ; GFX9: [[TRUNC2:%[0-9]+]]:_(s7) = G_TRUNC [[SDIV]](s32)
-    ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC2]](s7)
-    ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+    ; GFX9: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY2]], 7
+    ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+    ; GFX9: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY3]], 7
+    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31
+    ; GFX9: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[C]](s32)
+    ; GFX9: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[C]](s32)
+    ; GFX9: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG]], [[ASHR]]
+    ; GFX9: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG1]], [[ASHR1]]
+    ; GFX9: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ADD]], [[ASHR]]
+    ; GFX9: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]]
+    ; GFX9: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32)
+    ; GFX9: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32)
+    ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000
+    ; GFX9: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]]
+    ; GFX9: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32)
+    ; GFX9: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[XOR1]]
+    ; GFX9: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[XOR1]]
+    ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; GFX9: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[MUL]]
+    ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C2]]
+    ; GFX9: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]]
+    ; GFX9: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]]
+    ; GFX9: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]]
+    ; GFX9: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]]
+    ; GFX9: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD2]], [[SUB1]]
+    ; GFX9: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[XOR]]
+    ; GFX9: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[XOR1]]
+    ; GFX9: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]]
+    ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[XOR1]]
+    ; GFX9: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[XOR]](s32), [[MUL1]]
+    ; GFX9: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]]
+    ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; GFX9: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH2]], [[C3]]
+    ; GFX9: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[UMULH2]], [[C3]]
+    ; GFX9: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s1), [[UMULH2]], [[ADD3]]
+    ; GFX9: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[SUB3]]
+    ; GFX9: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[ASHR]], [[ASHR1]]
+    ; GFX9: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[SELECT3]], [[XOR2]]
+    ; GFX9: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[XOR2]]
+    ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SUB4]](s32)
+    ; GFX9: $vgpr0 = COPY [[COPY4]](s32)
     %0:_(s32) = COPY $vgpr0
     %1:_(s32) = COPY $vgpr1
     %2:_(s7) = G_TRUNC %0
@@ -288,36 +1039,138 @@ body: |
     ; GFX6-LABEL: name: test_sdiv_s17
     ; GFX6: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
     ; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-    ; GFX6: [[TRUNC:%[0-9]+]]:_(s17) = G_TRUNC [[COPY]](s32)
-    ; GFX6: [[TRUNC1:%[0-9]+]]:_(s17) = G_TRUNC [[COPY1]](s32)
-    ; GFX6: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC]](s17)
-    ; GFX6: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC1]](s17)
-    ; GFX6: [[SDIV:%[0-9]+]]:_(s32) = G_SDIV [[SEXT]], [[SEXT1]]
-    ; GFX6: [[TRUNC2:%[0-9]+]]:_(s17) = G_TRUNC [[SDIV]](s32)
-    ; GFX6: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC2]](s17)
-    ; GFX6: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; GFX6: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+    ; GFX6: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY2]], 17
+    ; GFX6: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+    ; GFX6: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY3]], 17
+    ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31
+    ; GFX6: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[C]](s32)
+    ; GFX6: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[C]](s32)
+    ; GFX6: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG]], [[ASHR]]
+    ; GFX6: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG1]], [[ASHR1]]
+    ; GFX6: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ADD]], [[ASHR]]
+    ; GFX6: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]]
+    ; GFX6: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32)
+    ; GFX6: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32)
+    ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000
+    ; GFX6: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]]
+    ; GFX6: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32)
+    ; GFX6: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[XOR1]]
+    ; GFX6: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[XOR1]]
+    ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[MUL]]
+    ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C2]]
+    ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]]
+    ; GFX6: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]]
+    ; GFX6: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]]
+    ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]]
+    ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD2]], [[SUB1]]
+    ; GFX6: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[XOR]]
+    ; GFX6: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[XOR1]]
+    ; GFX6: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]]
+    ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[XOR1]]
+    ; GFX6: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[XOR]](s32), [[MUL1]]
+    ; GFX6: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]]
+    ; GFX6: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; GFX6: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH2]], [[C3]]
+    ; GFX6: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[UMULH2]], [[C3]]
+    ; GFX6: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s1), [[UMULH2]], [[ADD3]]
+    ; GFX6: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[SUB3]]
+    ; GFX6: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[ASHR]], [[ASHR1]]
+    ; GFX6: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[SELECT3]], [[XOR2]]
+    ; GFX6: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[XOR2]]
+    ; GFX6: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SUB4]](s32)
+    ; GFX6: $vgpr0 = COPY [[COPY4]](s32)
     ; GFX8-LABEL: name: test_sdiv_s17
     ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
     ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-    ; GFX8: [[TRUNC:%[0-9]+]]:_(s17) = G_TRUNC [[COPY]](s32)
-    ; GFX8: [[TRUNC1:%[0-9]+]]:_(s17) = G_TRUNC [[COPY1]](s32)
-    ; GFX8: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC]](s17)
-    ; GFX8: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC1]](s17)
-    ; GFX8: [[SDIV:%[0-9]+]]:_(s32) = G_SDIV [[SEXT]], [[SEXT1]]
-    ; GFX8: [[TRUNC2:%[0-9]+]]:_(s17) = G_TRUNC [[SDIV]](s32)
-    ; GFX8: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC2]](s17)
-    ; GFX8: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; GFX8: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+    ; GFX8: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY2]], 17
+    ; GFX8: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+    ; GFX8: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY3]], 17
+    ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31
+    ; GFX8: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[C]](s32)
+    ; GFX8: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[C]](s32)
+    ; GFX8: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG]], [[ASHR]]
+    ; GFX8: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG1]], [[ASHR1]]
+    ; GFX8: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ADD]], [[ASHR]]
+    ; GFX8: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]]
+    ; GFX8: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32)
+    ; GFX8: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32)
+    ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000
+    ; GFX8: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]]
+    ; GFX8: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32)
+    ; GFX8: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[XOR1]]
+    ; GFX8: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[XOR1]]
+    ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[MUL]]
+    ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C2]]
+    ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]]
+    ; GFX8: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]]
+    ; GFX8: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]]
+    ; GFX8: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]]
+    ; GFX8: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD2]], [[SUB1]]
+    ; GFX8: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[XOR]]
+    ; GFX8: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[XOR1]]
+    ; GFX8: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]]
+    ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[XOR1]]
+    ; GFX8: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[XOR]](s32), [[MUL1]]
+    ; GFX8: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]]
+    ; GFX8: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; GFX8: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH2]], [[C3]]
+    ; GFX8: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[UMULH2]], [[C3]]
+    ; GFX8: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s1), [[UMULH2]], [[ADD3]]
+    ; GFX8: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[SUB3]]
+    ; GFX8: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[ASHR]], [[ASHR1]]
+    ; GFX8: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[SELECT3]], [[XOR2]]
+    ; GFX8: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[XOR2]]
+    ; GFX8: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SUB4]](s32)
+    ; GFX8: $vgpr0 = COPY [[COPY4]](s32)
     ; GFX9-LABEL: name: test_sdiv_s17
     ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
     ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-    ; GFX9: [[TRUNC:%[0-9]+]]:_(s17) = G_TRUNC [[COPY]](s32)
-    ; GFX9: [[TRUNC1:%[0-9]+]]:_(s17) = G_TRUNC [[COPY1]](s32)
-    ; GFX9: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC]](s17)
-    ; GFX9: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC1]](s17)
-    ; GFX9: [[SDIV:%[0-9]+]]:_(s32) = G_SDIV [[SEXT]], [[SEXT1]]
-    ; GFX9: [[TRUNC2:%[0-9]+]]:_(s17) = G_TRUNC [[SDIV]](s32)
-    ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC2]](s17)
-    ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+    ; GFX9: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY2]], 17
+    ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+    ; GFX9: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY3]], 17
+    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31
+    ; GFX9: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[C]](s32)
+    ; GFX9: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[C]](s32)
+    ; GFX9: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG]], [[ASHR]]
+    ; GFX9: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG1]], [[ASHR1]]
+    ; GFX9: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ADD]], [[ASHR]]
+    ; GFX9: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]]
+    ; GFX9: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32)
+    ; GFX9: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32)
+    ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000
+    ; GFX9: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]]
+    ; GFX9: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32)
+    ; GFX9: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[XOR1]]
+    ; GFX9: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[XOR1]]
+    ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; GFX9: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[MUL]]
+    ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C2]]
+    ; GFX9: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]]
+    ; GFX9: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]]
+    ; GFX9: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]]
+    ; GFX9: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]]
+    ; GFX9: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD2]], [[SUB1]]
+    ; GFX9: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[XOR]]
+    ; GFX9: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[XOR1]]
+    ; GFX9: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]]
+    ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[XOR1]]
+    ; GFX9: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[XOR]](s32), [[MUL1]]
+    ; GFX9: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]]
+    ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; GFX9: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH2]], [[C3]]
+    ; GFX9: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[UMULH2]], [[C3]]
+    ; GFX9: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s1), [[UMULH2]], [[ADD3]]
+    ; GFX9: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[SUB3]]
+    ; GFX9: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[ASHR]], [[ASHR1]]
+    ; GFX9: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[SELECT3]], [[XOR2]]
+    ; GFX9: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[XOR2]]
+    ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SUB4]](s32)
+    ; GFX9: $vgpr0 = COPY [[COPY4]](s32)
     %0:_(s32) = COPY $vgpr0
     %1:_(s32) = COPY $vgpr1
     %2:_(s17) = G_TRUNC %0

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-srem.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-srem.mir
index 7b56c3e2ece0..431126348001 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-srem.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-srem.mir
@@ -12,18 +12,117 @@ body: |
     ; GFX6-LABEL: name: test_srem_s32
     ; GFX6: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
     ; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-    ; GFX6: [[SREM:%[0-9]+]]:_(s32) = G_SREM [[COPY]], [[COPY1]]
-    ; GFX6: $vgpr0 = COPY [[SREM]](s32)
+    ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31
+    ; GFX6: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[COPY]], [[C]](s32)
+    ; GFX6: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[COPY]], [[C]](s32)
+    ; GFX6: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[ASHR]]
+    ; GFX6: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[COPY1]], [[ASHR1]]
+    ; GFX6: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ADD]], [[ASHR]]
+    ; GFX6: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]]
+    ; GFX6: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32)
+    ; GFX6: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32)
+    ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000
+    ; GFX6: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]]
+    ; GFX6: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32)
+    ; GFX6: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[XOR1]]
+    ; GFX6: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[XOR1]]
+    ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[MUL]]
+    ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C2]]
+    ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]]
+    ; GFX6: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]]
+    ; GFX6: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]]
+    ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]]
+    ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD2]], [[SUB1]]
+    ; GFX6: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[XOR]]
+    ; GFX6: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[XOR1]]
+    ; GFX6: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]]
+    ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[XOR1]]
+    ; GFX6: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[XOR]](s32), [[MUL1]]
+    ; GFX6: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]]
+    ; GFX6: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SUB2]], [[XOR1]]
+    ; GFX6: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[SUB2]], [[XOR1]]
+    ; GFX6: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s1), [[SUB3]], [[SUB2]]
+    ; GFX6: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[ADD3]]
+    ; GFX6: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[SELECT3]], [[ASHR]]
+    ; GFX6: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR2]], [[ASHR]]
+    ; GFX6: $vgpr0 = COPY [[SUB4]](s32)
     ; GFX8-LABEL: name: test_srem_s32
     ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
     ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-    ; GFX8: [[SREM:%[0-9]+]]:_(s32) = G_SREM [[COPY]], [[COPY1]]
-    ; GFX8: $vgpr0 = COPY [[SREM]](s32)
+    ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31
+    ; GFX8: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[COPY]], [[C]](s32)
+    ; GFX8: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[COPY]], [[C]](s32)
+    ; GFX8: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[ASHR]]
+    ; GFX8: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[COPY1]], [[ASHR1]]
+    ; GFX8: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ADD]], [[ASHR]]
+    ; GFX8: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]]
+    ; GFX8: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32)
+    ; GFX8: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32)
+    ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000
+    ; GFX8: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]]
+    ; GFX8: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32)
+    ; GFX8: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[XOR1]]
+    ; GFX8: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[XOR1]]
+    ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[MUL]]
+    ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C2]]
+    ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]]
+    ; GFX8: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]]
+    ; GFX8: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]]
+    ; GFX8: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]]
+    ; GFX8: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD2]], [[SUB1]]
+    ; GFX8: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[XOR]]
+    ; GFX8: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[XOR1]]
+    ; GFX8: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]]
+    ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[XOR1]]
+    ; GFX8: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[XOR]](s32), [[MUL1]]
+    ; GFX8: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]]
+    ; GFX8: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SUB2]], [[XOR1]]
+    ; GFX8: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[SUB2]], [[XOR1]]
+    ; GFX8: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s1), [[SUB3]], [[SUB2]]
+    ; GFX8: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[ADD3]]
+    ; GFX8: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[SELECT3]], [[ASHR]]
+    ; GFX8: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR2]], [[ASHR]]
+    ; GFX8: $vgpr0 = COPY [[SUB4]](s32)
     ; GFX9-LABEL: name: test_srem_s32
     ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
     ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-    ; GFX9: [[SREM:%[0-9]+]]:_(s32) = G_SREM [[COPY]], [[COPY1]]
-    ; GFX9: $vgpr0 = COPY [[SREM]](s32)
+    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31
+    ; GFX9: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[COPY]], [[C]](s32)
+    ; GFX9: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[COPY]], [[C]](s32)
+    ; GFX9: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[ASHR]]
+    ; GFX9: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[COPY1]], [[ASHR1]]
+    ; GFX9: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ADD]], [[ASHR]]
+    ; GFX9: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]]
+    ; GFX9: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32)
+    ; GFX9: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32)
+    ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000
+    ; GFX9: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]]
+    ; GFX9: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32)
+    ; GFX9: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[XOR1]]
+    ; GFX9: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[XOR1]]
+    ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; GFX9: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[MUL]]
+    ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C2]]
+    ; GFX9: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]]
+    ; GFX9: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]]
+    ; GFX9: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]]
+    ; GFX9: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]]
+    ; GFX9: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD2]], [[SUB1]]
+    ; GFX9: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[XOR]]
+    ; GFX9: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[XOR1]]
+    ; GFX9: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]]
+    ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[XOR1]]
+    ; GFX9: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[XOR]](s32), [[MUL1]]
+    ; GFX9: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]]
+    ; GFX9: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SUB2]], [[XOR1]]
+    ; GFX9: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[SUB2]], [[XOR1]]
+    ; GFX9: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s1), [[SUB3]], [[SUB2]]
+    ; GFX9: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[ADD3]]
+    ; GFX9: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[SELECT3]], [[ASHR]]
+    ; GFX9: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR2]], [[ASHR]]
+    ; GFX9: $vgpr0 = COPY [[SUB4]](s32)
     %0:_(s32) = COPY $vgpr0
     %1:_(s32) = COPY $vgpr1
     %2:_(s32) = G_SREM %0, %1
@@ -41,27 +140,219 @@ body: |
     ; GFX6: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3
     ; GFX6: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
     ; GFX6: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>)
-    ; GFX6: [[SREM:%[0-9]+]]:_(s32) = G_SREM [[UV]], [[UV2]]
-    ; GFX6: [[SREM1:%[0-9]+]]:_(s32) = G_SREM [[UV1]], [[UV3]]
-    ; GFX6: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SREM]](s32), [[SREM1]](s32)
+    ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31
+    ; GFX6: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[UV]], [[C]](s32)
+    ; GFX6: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[UV]], [[C]](s32)
+    ; GFX6: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UV]], [[ASHR]]
+    ; GFX6: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UV2]], [[ASHR1]]
+    ; GFX6: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ADD]], [[ASHR]]
+    ; GFX6: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]]
+    ; GFX6: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32)
+    ; GFX6: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32)
+    ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000
+    ; GFX6: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]]
+    ; GFX6: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32)
+    ; GFX6: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[XOR1]]
+    ; GFX6: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[XOR1]]
+    ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[MUL]]
+    ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C2]]
+    ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]]
+    ; GFX6: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]]
+    ; GFX6: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]]
+    ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]]
+    ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD2]], [[SUB1]]
+    ; GFX6: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[XOR]]
+    ; GFX6: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[XOR1]]
+    ; GFX6: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]]
+    ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[XOR1]]
+    ; GFX6: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[XOR]](s32), [[MUL1]]
+    ; GFX6: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]]
+    ; GFX6: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SUB2]], [[XOR1]]
+    ; GFX6: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[SUB2]], [[XOR1]]
+    ; GFX6: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s1), [[SUB3]], [[SUB2]]
+    ; GFX6: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[ADD3]]
+    ; GFX6: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[SELECT3]], [[ASHR]]
+    ; GFX6: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR2]], [[ASHR]]
+    ; GFX6: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[UV1]], [[C]](s32)
+    ; GFX6: [[ASHR3:%[0-9]+]]:_(s32) = G_ASHR [[UV1]], [[C]](s32)
+    ; GFX6: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[UV1]], [[ASHR2]]
+    ; GFX6: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UV3]], [[ASHR3]]
+    ; GFX6: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[ADD4]], [[ASHR2]]
+    ; GFX6: [[XOR4:%[0-9]+]]:_(s32) = G_XOR [[ADD5]], [[ASHR3]]
+    ; GFX6: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[XOR4]](s32)
+    ; GFX6: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP1]](s32)
+    ; GFX6: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000
+    ; GFX6: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C3]]
+    ; GFX6: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL1]](s32)
+    ; GFX6: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[XOR4]]
+    ; GFX6: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[XOR4]]
+    ; GFX6: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[MUL2]]
+    ; GFX6: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH3]](s32), [[C2]]
+    ; GFX6: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[SUB5]], [[MUL2]]
+    ; GFX6: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[SELECT4]], [[FPTOUI1]]
+    ; GFX6: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[UMULH4]]
+    ; GFX6: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI1]], [[UMULH4]]
+    ; GFX6: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[ADD6]], [[SUB6]]
+    ; GFX6: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[SELECT5]], [[XOR3]]
+    ; GFX6: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH5]], [[XOR4]]
+    ; GFX6: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[MUL3]]
+    ; GFX6: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB7]](s32), [[XOR4]]
+    ; GFX6: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[XOR3]](s32), [[MUL3]]
+    ; GFX6: [[AND1:%[0-9]+]]:_(s1) = G_AND [[ICMP4]], [[ICMP5]]
+    ; GFX6: [[SUB8:%[0-9]+]]:_(s32) = G_SUB [[SUB7]], [[XOR4]]
+    ; GFX6: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[SUB7]], [[XOR4]]
+    ; GFX6: [[SELECT6:%[0-9]+]]:_(s32) = G_SELECT [[AND1]](s1), [[SUB8]], [[SUB7]]
+    ; GFX6: [[SELECT7:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SELECT6]], [[ADD7]]
+    ; GFX6: [[XOR5:%[0-9]+]]:_(s32) = G_XOR [[SELECT7]], [[ASHR2]]
+    ; GFX6: [[SUB9:%[0-9]+]]:_(s32) = G_SUB [[XOR5]], [[ASHR2]]
+    ; GFX6: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SUB4]](s32), [[SUB9]](s32)
     ; GFX6: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
     ; GFX8-LABEL: name: test_srem_v2s32
     ; GFX8: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
     ; GFX8: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3
     ; GFX8: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
     ; GFX8: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>)
-    ; GFX8: [[SREM:%[0-9]+]]:_(s32) = G_SREM [[UV]], [[UV2]]
-    ; GFX8: [[SREM1:%[0-9]+]]:_(s32) = G_SREM [[UV1]], [[UV3]]
-    ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SREM]](s32), [[SREM1]](s32)
+    ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31
+    ; GFX8: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[UV]], [[C]](s32)
+    ; GFX8: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[UV]], [[C]](s32)
+    ; GFX8: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UV]], [[ASHR]]
+    ; GFX8: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UV2]], [[ASHR1]]
+    ; GFX8: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ADD]], [[ASHR]]
+    ; GFX8: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]]
+    ; GFX8: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32)
+    ; GFX8: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32)
+    ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000
+    ; GFX8: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]]
+    ; GFX8: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32)
+    ; GFX8: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[XOR1]]
+    ; GFX8: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[XOR1]]
+    ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[MUL]]
+    ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C2]]
+    ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]]
+    ; GFX8: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]]
+    ; GFX8: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]]
+    ; GFX8: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]]
+    ; GFX8: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD2]], [[SUB1]]
+    ; GFX8: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[XOR]]
+    ; GFX8: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[XOR1]]
+    ; GFX8: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]]
+    ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[XOR1]]
+    ; GFX8: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[XOR]](s32), [[MUL1]]
+    ; GFX8: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]]
+    ; GFX8: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SUB2]], [[XOR1]]
+    ; GFX8: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[SUB2]], [[XOR1]]
+    ; GFX8: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s1), [[SUB3]], [[SUB2]]
+    ; GFX8: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[ADD3]]
+    ; GFX8: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[SELECT3]], [[ASHR]]
+    ; GFX8: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR2]], [[ASHR]]
+    ; GFX8: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[UV1]], [[C]](s32)
+    ; GFX8: [[ASHR3:%[0-9]+]]:_(s32) = G_ASHR [[UV1]], [[C]](s32)
+    ; GFX8: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[UV1]], [[ASHR2]]
+    ; GFX8: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UV3]], [[ASHR3]]
+    ; GFX8: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[ADD4]], [[ASHR2]]
+    ; GFX8: [[XOR4:%[0-9]+]]:_(s32) = G_XOR [[ADD5]], [[ASHR3]]
+    ; GFX8: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[XOR4]](s32)
+    ; GFX8: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP1]](s32)
+    ; GFX8: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000
+    ; GFX8: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C3]]
+    ; GFX8: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL1]](s32)
+    ; GFX8: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[XOR4]]
+    ; GFX8: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[XOR4]]
+    ; GFX8: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[MUL2]]
+    ; GFX8: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH3]](s32), [[C2]]
+    ; GFX8: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[SUB5]], [[MUL2]]
+    ; GFX8: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[SELECT4]], [[FPTOUI1]]
+    ; GFX8: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[UMULH4]]
+    ; GFX8: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI1]], [[UMULH4]]
+    ; GFX8: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[ADD6]], [[SUB6]]
+    ; GFX8: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[SELECT5]], [[XOR3]]
+    ; GFX8: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH5]], [[XOR4]]
+    ; GFX8: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[MUL3]]
+    ; GFX8: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB7]](s32), [[XOR4]]
+    ; GFX8: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[XOR3]](s32), [[MUL3]]
+    ; GFX8: [[AND1:%[0-9]+]]:_(s1) = G_AND [[ICMP4]], [[ICMP5]]
+    ; GFX8: [[SUB8:%[0-9]+]]:_(s32) = G_SUB [[SUB7]], [[XOR4]]
+    ; GFX8: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[SUB7]], [[XOR4]]
+    ; GFX8: [[SELECT6:%[0-9]+]]:_(s32) = G_SELECT [[AND1]](s1), [[SUB8]], [[SUB7]]
+    ; GFX8: [[SELECT7:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SELECT6]], [[ADD7]]
+    ; GFX8: [[XOR5:%[0-9]+]]:_(s32) = G_XOR [[SELECT7]], [[ASHR2]]
+    ; GFX8: [[SUB9:%[0-9]+]]:_(s32) = G_SUB [[XOR5]], [[ASHR2]]
+    ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SUB4]](s32), [[SUB9]](s32)
     ; GFX8: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
     ; GFX9-LABEL: name: test_srem_v2s32
     ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
     ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3
     ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
     ; GFX9: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>)
-    ; GFX9: [[SREM:%[0-9]+]]:_(s32) = G_SREM [[UV]], [[UV2]]
-    ; GFX9: [[SREM1:%[0-9]+]]:_(s32) = G_SREM [[UV1]], [[UV3]]
-    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SREM]](s32), [[SREM1]](s32)
+    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31
+    ; GFX9: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[UV]], [[C]](s32)
+    ; GFX9: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[UV]], [[C]](s32)
+    ; GFX9: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UV]], [[ASHR]]
+    ; GFX9: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UV2]], [[ASHR1]]
+    ; GFX9: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ADD]], [[ASHR]]
+    ; GFX9: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]]
+    ; GFX9: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32)
+    ; GFX9: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32)
+    ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000
+    ; GFX9: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]]
+    ; GFX9: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32)
+    ; GFX9: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[XOR1]]
+    ; GFX9: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[XOR1]]
+    ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; GFX9: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[MUL]]
+    ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C2]]
+    ; GFX9: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]]
+    ; GFX9: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]]
+    ; GFX9: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]]
+    ; GFX9: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]]
+    ; GFX9: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD2]], [[SUB1]]
+    ; GFX9: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[XOR]]
+    ; GFX9: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[XOR1]]
+    ; GFX9: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]]
+    ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[XOR1]]
+    ; GFX9: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[XOR]](s32), [[MUL1]]
+    ; GFX9: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]]
+    ; GFX9: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SUB2]], [[XOR1]]
+    ; GFX9: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[SUB2]], [[XOR1]]
+    ; GFX9: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s1), [[SUB3]], [[SUB2]]
+    ; GFX9: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[ADD3]]
+    ; GFX9: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[SELECT3]], [[ASHR]]
+    ; GFX9: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR2]], [[ASHR]]
+    ; GFX9: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[UV1]], [[C]](s32)
+    ; GFX9: [[ASHR3:%[0-9]+]]:_(s32) = G_ASHR [[UV1]], [[C]](s32)
+    ; GFX9: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[UV1]], [[ASHR2]]
+    ; GFX9: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UV3]], [[ASHR3]]
+    ; GFX9: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[ADD4]], [[ASHR2]]
+    ; GFX9: [[XOR4:%[0-9]+]]:_(s32) = G_XOR [[ADD5]], [[ASHR3]]
+    ; GFX9: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[XOR4]](s32)
+    ; GFX9: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP1]](s32)
+    ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000
+    ; GFX9: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C3]]
+    ; GFX9: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL1]](s32)
+    ; GFX9: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[XOR4]]
+    ; GFX9: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[XOR4]]
+    ; GFX9: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[MUL2]]
+    ; GFX9: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH3]](s32), [[C2]]
+    ; GFX9: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[SUB5]], [[MUL2]]
+    ; GFX9: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[SELECT4]], [[FPTOUI1]]
+    ; GFX9: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[UMULH4]]
+    ; GFX9: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI1]], [[UMULH4]]
+    ; GFX9: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[ADD6]], [[SUB6]]
+    ; GFX9: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[SELECT5]], [[XOR3]]
+    ; GFX9: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH5]], [[XOR4]]
+    ; GFX9: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[MUL3]]
+    ; GFX9: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB7]](s32), [[XOR4]]
+    ; GFX9: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[XOR3]](s32), [[MUL3]]
+    ; GFX9: [[AND1:%[0-9]+]]:_(s1) = G_AND [[ICMP4]], [[ICMP5]]
+    ; GFX9: [[SUB8:%[0-9]+]]:_(s32) = G_SUB [[SUB7]], [[XOR4]]
+    ; GFX9: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[SUB7]], [[XOR4]]
+    ; GFX9: [[SELECT6:%[0-9]+]]:_(s32) = G_SELECT [[AND1]](s1), [[SUB8]], [[SUB7]]
+    ; GFX9: [[SELECT7:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SELECT6]], [[ADD7]]
+    ; GFX9: [[XOR5:%[0-9]+]]:_(s32) = G_XOR [[SELECT7]], [[ASHR2]]
+    ; GFX9: [[SUB9:%[0-9]+]]:_(s32) = G_SUB [[XOR5]], [[ASHR2]]
+    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SUB4]](s32), [[SUB9]](s32)
     ; GFX9: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
     %0:_(<2 x s32>) = COPY $vgpr0_vgpr1
     %1:_(<2 x s32>) = COPY $vgpr2_vgpr3
@@ -144,36 +435,138 @@ body: |
     ; GFX6-LABEL: name: test_srem_s16
     ; GFX6: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
     ; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-    ; GFX6: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
-    ; GFX6: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
-    ; GFX6: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC]](s16)
-    ; GFX6: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC1]](s16)
-    ; GFX6: [[SREM:%[0-9]+]]:_(s32) = G_SREM [[SEXT]], [[SEXT1]]
-    ; GFX6: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[SREM]](s32)
-    ; GFX6: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[TRUNC2]](s16)
-    ; GFX6: $vgpr0 = COPY [[ZEXT]](s32)
+    ; GFX6: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+    ; GFX6: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY2]], 16
+    ; GFX6: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+    ; GFX6: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY3]], 16
+    ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31
+    ; GFX6: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[C]](s32)
+    ; GFX6: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[C]](s32)
+    ; GFX6: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG]], [[ASHR]]
+    ; GFX6: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG1]], [[ASHR1]]
+    ; GFX6: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ADD]], [[ASHR]]
+    ; GFX6: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]]
+    ; GFX6: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32)
+    ; GFX6: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32)
+    ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000
+    ; GFX6: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]]
+    ; GFX6: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32)
+    ; GFX6: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[XOR1]]
+    ; GFX6: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[XOR1]]
+    ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[MUL]]
+    ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C2]]
+    ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]]
+    ; GFX6: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]]
+    ; GFX6: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]]
+    ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]]
+    ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD2]], [[SUB1]]
+    ; GFX6: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[XOR]]
+    ; GFX6: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[XOR1]]
+    ; GFX6: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]]
+    ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[XOR1]]
+    ; GFX6: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[XOR]](s32), [[MUL1]]
+    ; GFX6: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]]
+    ; GFX6: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SUB2]], [[XOR1]]
+    ; GFX6: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[SUB2]], [[XOR1]]
+    ; GFX6: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s1), [[SUB3]], [[SUB2]]
+    ; GFX6: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[ADD3]]
+    ; GFX6: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[SELECT3]], [[ASHR]]
+    ; GFX6: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR2]], [[ASHR]]
+    ; GFX6: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; GFX6: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SUB4]](s32)
+    ; GFX6: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]]
+    ; GFX6: $vgpr0 = COPY [[AND1]](s32)
     ; GFX8-LABEL: name: test_srem_s16
     ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
     ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-    ; GFX8: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
-    ; GFX8: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
-    ; GFX8: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC]](s16)
-    ; GFX8: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC1]](s16)
-    ; GFX8: [[SREM:%[0-9]+]]:_(s32) = G_SREM [[SEXT]], [[SEXT1]]
-    ; GFX8: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[SREM]](s32)
-    ; GFX8: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[TRUNC2]](s16)
-    ; GFX8: $vgpr0 = COPY [[ZEXT]](s32)
+    ; GFX8: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+    ; GFX8: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY2]], 16
+    ; GFX8: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+    ; GFX8: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY3]], 16
+    ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31
+    ; GFX8: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[C]](s32)
+    ; GFX8: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[C]](s32)
+    ; GFX8: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG]], [[ASHR]]
+    ; GFX8: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG1]], [[ASHR1]]
+    ; GFX8: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ADD]], [[ASHR]]
+    ; GFX8: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]]
+    ; GFX8: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32)
+    ; GFX8: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32)
+    ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000
+    ; GFX8: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]]
+    ; GFX8: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32)
+    ; GFX8: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[XOR1]]
+    ; GFX8: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[XOR1]]
+    ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[MUL]]
+    ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C2]]
+    ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]]
+    ; GFX8: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]]
+    ; GFX8: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]]
+    ; GFX8: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]]
+    ; GFX8: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD2]], [[SUB1]]
+    ; GFX8: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[XOR]]
+    ; GFX8: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[XOR1]]
+    ; GFX8: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]]
+    ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[XOR1]]
+    ; GFX8: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[XOR]](s32), [[MUL1]]
+    ; GFX8: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]]
+    ; GFX8: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SUB2]], [[XOR1]]
+    ; GFX8: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[SUB2]], [[XOR1]]
+    ; GFX8: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s1), [[SUB3]], [[SUB2]]
+    ; GFX8: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[ADD3]]
+    ; GFX8: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[SELECT3]], [[ASHR]]
+    ; GFX8: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR2]], [[ASHR]]
+    ; GFX8: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; GFX8: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SUB4]](s32)
+    ; GFX8: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]]
+    ; GFX8: $vgpr0 = COPY [[AND1]](s32)
     ; GFX9-LABEL: name: test_srem_s16
     ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
     ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-    ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
-    ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
-    ; GFX9: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC]](s16)
-    ; GFX9: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC1]](s16)
-    ; GFX9: [[SREM:%[0-9]+]]:_(s32) = G_SREM [[SEXT]], [[SEXT1]]
-    ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[SREM]](s32)
-    ; GFX9: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[TRUNC2]](s16)
-    ; GFX9: $vgpr0 = COPY [[ZEXT]](s32)
+    ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+    ; GFX9: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY2]], 16
+    ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+    ; GFX9: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY3]], 16
+    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31
+    ; GFX9: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[C]](s32)
+    ; GFX9: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[C]](s32)
+    ; GFX9: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG]], [[ASHR]]
+    ; GFX9: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG1]], [[ASHR1]]
+    ; GFX9: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ADD]], [[ASHR]]
+    ; GFX9: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]]
+    ; GFX9: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32)
+    ; GFX9: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32)
+    ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000
+    ; GFX9: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]]
+    ; GFX9: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32)
+    ; GFX9: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[XOR1]]
+    ; GFX9: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[XOR1]]
+    ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; GFX9: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[MUL]]
+    ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C2]]
+    ; GFX9: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]]
+    ; GFX9: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]]
+    ; GFX9: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]]
+    ; GFX9: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]]
+    ; GFX9: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD2]], [[SUB1]]
+    ; GFX9: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[XOR]]
+    ; GFX9: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[XOR1]]
+    ; GFX9: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]]
+    ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[XOR1]]
+    ; GFX9: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[XOR]](s32), [[MUL1]]
+    ; GFX9: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]]
+    ; GFX9: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SUB2]], [[XOR1]]
+    ; GFX9: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[SUB2]], [[XOR1]]
+    ; GFX9: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s1), [[SUB3]], [[SUB2]]
+    ; GFX9: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[ADD3]]
+    ; GFX9: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[SELECT3]], [[ASHR]]
+    ; GFX9: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR2]], [[ASHR]]
+    ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SUB4]](s32)
+    ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]]
+    ; GFX9: $vgpr0 = COPY [[AND1]](s32)
     %0:_(s32) = COPY $vgpr0
     %1:_(s32) = COPY $vgpr1
     %2:_(s16) = G_TRUNC %0
@@ -192,39 +585,271 @@ body: |
     ; GFX6-LABEL: name: test_srem_v2s16
     ; GFX6: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
     ; GFX6: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
-    ; GFX6: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<2 x s16>)
-    ; GFX6: [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY1]](<2 x s16>)
-    ; GFX6: [[SREM:%[0-9]+]]:_(s16) = G_SREM [[UV]], [[UV2]]
-    ; GFX6: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[UV1]](s16)
-    ; GFX6: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[UV3]](s16)
-    ; GFX6: [[SREM1:%[0-9]+]]:_(s32) = G_SREM [[SEXT]], [[SEXT1]]
-    ; GFX6: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SREM1]](s32)
-    ; GFX6: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[SREM]](s16), [[TRUNC]](s16)
-    ; GFX6: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>)
+    ; GFX6: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>)
+    ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX6: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
+    ; GFX6: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>)
+    ; GFX6: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
+    ; GFX6: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
+    ; GFX6: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY2]], 16
+    ; GFX6: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32)
+    ; GFX6: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY3]], 16
+    ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 31
+    ; GFX6: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[C1]](s32)
+    ; GFX6: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[C1]](s32)
+    ; GFX6: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG]], [[ASHR]]
+    ; GFX6: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG1]], [[ASHR1]]
+    ; GFX6: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ADD]], [[ASHR]]
+    ; GFX6: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]]
+    ; GFX6: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32)
+    ; GFX6: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32)
+    ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000
+    ; GFX6: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]]
+    ; GFX6: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32)
+    ; GFX6: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[XOR1]]
+    ; GFX6: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[XOR1]]
+    ; GFX6: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[MUL]]
+    ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C3]]
+    ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]]
+    ; GFX6: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]]
+    ; GFX6: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]]
+    ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]]
+    ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD2]], [[SUB1]]
+    ; GFX6: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[XOR]]
+    ; GFX6: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[XOR1]]
+    ; GFX6: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]]
+    ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[XOR1]]
+    ; GFX6: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[XOR]](s32), [[MUL1]]
+    ; GFX6: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]]
+    ; GFX6: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SUB2]], [[XOR1]]
+    ; GFX6: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[SUB2]], [[XOR1]]
+    ; GFX6: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s1), [[SUB3]], [[SUB2]]
+    ; GFX6: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[ADD3]]
+    ; GFX6: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[SELECT3]], [[ASHR]]
+    ; GFX6: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR2]], [[ASHR]]
+    ; GFX6: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; GFX6: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY4]], 16
+    ; GFX6: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
+    ; GFX6: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY5]], 16
+    ; GFX6: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG2]], [[C1]](s32)
+    ; GFX6: [[ASHR3:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG2]], [[C1]](s32)
+    ; GFX6: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG2]], [[ASHR2]]
+    ; GFX6: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG3]], [[ASHR3]]
+    ; GFX6: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[ADD4]], [[ASHR2]]
+    ; GFX6: [[XOR4:%[0-9]+]]:_(s32) = G_XOR [[ADD5]], [[ASHR3]]
+    ; GFX6: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[XOR4]](s32)
+    ; GFX6: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP1]](s32)
+    ; GFX6: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000
+    ; GFX6: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C4]]
+    ; GFX6: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL1]](s32)
+    ; GFX6: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[XOR4]]
+    ; GFX6: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[XOR4]]
+    ; GFX6: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[MUL2]]
+    ; GFX6: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH3]](s32), [[C3]]
+    ; GFX6: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[SUB5]], [[MUL2]]
+    ; GFX6: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[SELECT4]], [[FPTOUI1]]
+    ; GFX6: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[UMULH4]]
+    ; GFX6: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI1]], [[UMULH4]]
+    ; GFX6: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[ADD6]], [[SUB6]]
+    ; GFX6: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[SELECT5]], [[XOR3]]
+    ; GFX6: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH5]], [[XOR4]]
+    ; GFX6: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[MUL3]]
+    ; GFX6: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB7]](s32), [[XOR4]]
+    ; GFX6: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[XOR3]](s32), [[MUL3]]
+    ; GFX6: [[AND1:%[0-9]+]]:_(s1) = G_AND [[ICMP4]], [[ICMP5]]
+    ; GFX6: [[SUB8:%[0-9]+]]:_(s32) = G_SUB [[SUB7]], [[XOR4]]
+    ; GFX6: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[SUB7]], [[XOR4]]
+    ; GFX6: [[SELECT6:%[0-9]+]]:_(s32) = G_SELECT [[AND1]](s1), [[SUB8]], [[SUB7]]
+    ; GFX6: [[SELECT7:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SELECT6]], [[ADD7]]
+    ; GFX6: [[XOR5:%[0-9]+]]:_(s32) = G_XOR [[SELECT7]], [[ASHR2]]
+    ; GFX6: [[SUB9:%[0-9]+]]:_(s32) = G_SUB [[XOR5]], [[ASHR2]]
+    ; GFX6: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; GFX6: [[COPY6:%[0-9]+]]:_(s32) = COPY [[SUB4]](s32)
+    ; GFX6: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C5]]
+    ; GFX6: [[COPY7:%[0-9]+]]:_(s32) = COPY [[SUB9]](s32)
+    ; GFX6: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C5]]
+    ; GFX6: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32)
+    ; GFX6: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL]]
+    ; GFX6: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; GFX6: $vgpr0 = COPY [[BITCAST2]](<2 x s16>)
     ; GFX8-LABEL: name: test_srem_v2s16
     ; GFX8: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
     ; GFX8: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
-    ; GFX8: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<2 x s16>)
-    ; GFX8: [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY1]](<2 x s16>)
-    ; GFX8: [[SREM:%[0-9]+]]:_(s16) = G_SREM [[UV]], [[UV2]]
-    ; GFX8: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[UV1]](s16)
-    ; GFX8: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[UV3]](s16)
-    ; GFX8: [[SREM1:%[0-9]+]]:_(s32) = G_SREM [[SEXT]], [[SEXT1]]
-    ; GFX8: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SREM1]](s32)
-    ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[SREM]](s16), [[TRUNC]](s16)
-    ; GFX8: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>)
+    ; GFX8: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>)
+    ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX8: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
+    ; GFX8: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>)
+    ; GFX8: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
+    ; GFX8: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
+    ; GFX8: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY2]], 16
+    ; GFX8: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32)
+    ; GFX8: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY3]], 16
+    ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 31
+    ; GFX8: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[C1]](s32)
+    ; GFX8: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[C1]](s32)
+    ; GFX8: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG]], [[ASHR]]
+    ; GFX8: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG1]], [[ASHR1]]
+    ; GFX8: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ADD]], [[ASHR]]
+    ; GFX8: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]]
+    ; GFX8: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32)
+    ; GFX8: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32)
+    ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000
+    ; GFX8: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]]
+    ; GFX8: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32)
+    ; GFX8: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[XOR1]]
+    ; GFX8: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[XOR1]]
+    ; GFX8: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[MUL]]
+    ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C3]]
+    ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]]
+    ; GFX8: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]]
+    ; GFX8: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]]
+    ; GFX8: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]]
+    ; GFX8: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD2]], [[SUB1]]
+    ; GFX8: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[XOR]]
+    ; GFX8: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[XOR1]]
+    ; GFX8: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]]
+    ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[XOR1]]
+    ; GFX8: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[XOR]](s32), [[MUL1]]
+    ; GFX8: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]]
+    ; GFX8: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SUB2]], [[XOR1]]
+    ; GFX8: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[SUB2]], [[XOR1]]
+    ; GFX8: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s1), [[SUB3]], [[SUB2]]
+    ; GFX8: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[ADD3]]
+    ; GFX8: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[SELECT3]], [[ASHR]]
+    ; GFX8: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR2]], [[ASHR]]
+    ; GFX8: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; GFX8: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY4]], 16
+    ; GFX8: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
+    ; GFX8: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY5]], 16
+    ; GFX8: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG2]], [[C1]](s32)
+    ; GFX8: [[ASHR3:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG2]], [[C1]](s32)
+    ; GFX8: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG2]], [[ASHR2]]
+    ; GFX8: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG3]], [[ASHR3]]
+    ; GFX8: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[ADD4]], [[ASHR2]]
+    ; GFX8: [[XOR4:%[0-9]+]]:_(s32) = G_XOR [[ADD5]], [[ASHR3]]
+    ; GFX8: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[XOR4]](s32)
+    ; GFX8: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP1]](s32)
+    ; GFX8: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000
+    ; GFX8: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C4]]
+    ; GFX8: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL1]](s32)
+    ; GFX8: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[XOR4]]
+    ; GFX8: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[XOR4]]
+    ; GFX8: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[MUL2]]
+    ; GFX8: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH3]](s32), [[C3]]
+    ; GFX8: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[SUB5]], [[MUL2]]
+    ; GFX8: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[SELECT4]], [[FPTOUI1]]
+    ; GFX8: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[UMULH4]]
+    ; GFX8: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI1]], [[UMULH4]]
+    ; GFX8: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[ADD6]], [[SUB6]]
+    ; GFX8: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[SELECT5]], [[XOR3]]
+    ; GFX8: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH5]], [[XOR4]]
+    ; GFX8: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[MUL3]]
+    ; GFX8: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB7]](s32), [[XOR4]]
+    ; GFX8: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[XOR3]](s32), [[MUL3]]
+    ; GFX8: [[AND1:%[0-9]+]]:_(s1) = G_AND [[ICMP4]], [[ICMP5]]
+    ; GFX8: [[SUB8:%[0-9]+]]:_(s32) = G_SUB [[SUB7]], [[XOR4]]
+    ; GFX8: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[SUB7]], [[XOR4]]
+    ; GFX8: [[SELECT6:%[0-9]+]]:_(s32) = G_SELECT [[AND1]](s1), [[SUB8]], [[SUB7]]
+    ; GFX8: [[SELECT7:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SELECT6]], [[ADD7]]
+    ; GFX8: [[XOR5:%[0-9]+]]:_(s32) = G_XOR [[SELECT7]], [[ASHR2]]
+    ; GFX8: [[SUB9:%[0-9]+]]:_(s32) = G_SUB [[XOR5]], [[ASHR2]]
+    ; GFX8: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; GFX8: [[COPY6:%[0-9]+]]:_(s32) = COPY [[SUB4]](s32)
+    ; GFX8: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C5]]
+    ; GFX8: [[COPY7:%[0-9]+]]:_(s32) = COPY [[SUB9]](s32)
+    ; GFX8: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C5]]
+    ; GFX8: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32)
+    ; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL]]
+    ; GFX8: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; GFX8: $vgpr0 = COPY [[BITCAST2]](<2 x s16>)
     ; GFX9-LABEL: name: test_srem_v2s16
     ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
     ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
-    ; GFX9: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<2 x s16>)
-    ; GFX9: [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY1]](<2 x s16>)
-    ; GFX9: [[SREM:%[0-9]+]]:_(s16) = G_SREM [[UV]], [[UV2]]
-    ; GFX9: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[UV1]](s16)
-    ; GFX9: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[UV3]](s16)
-    ; GFX9: [[SREM1:%[0-9]+]]:_(s32) = G_SREM [[SEXT]], [[SEXT1]]
-    ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SREM1]](s32)
-    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[SREM]](s16), [[TRUNC]](s16)
-    ; GFX9: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>)
+    ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>)
+    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
+    ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>)
+    ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
+    ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
+    ; GFX9: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY2]], 16
+    ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32)
+    ; GFX9: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY3]], 16
+    ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 31
+    ; GFX9: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[C1]](s32)
+    ; GFX9: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[C1]](s32)
+    ; GFX9: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG]], [[ASHR]]
+    ; GFX9: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG1]], [[ASHR1]]
+    ; GFX9: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ADD]], [[ASHR]]
+    ; GFX9: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]]
+    ; GFX9: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32)
+    ; GFX9: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32)
+    ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000
+    ; GFX9: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]]
+    ; GFX9: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32)
+    ; GFX9: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[XOR1]]
+    ; GFX9: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[XOR1]]
+    ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; GFX9: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[MUL]]
+    ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C3]]
+    ; GFX9: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]]
+    ; GFX9: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]]
+    ; GFX9: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]]
+    ; GFX9: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]]
+    ; GFX9: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD2]], [[SUB1]]
+    ; GFX9: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[XOR]]
+    ; GFX9: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[XOR1]]
+    ; GFX9: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]]
+    ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[XOR1]]
+    ; GFX9: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[XOR]](s32), [[MUL1]]
+    ; GFX9: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]]
+    ; GFX9: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SUB2]], [[XOR1]]
+    ; GFX9: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[SUB2]], [[XOR1]]
+    ; GFX9: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s1), [[SUB3]], [[SUB2]]
+    ; GFX9: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[ADD3]]
+    ; GFX9: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[SELECT3]], [[ASHR]]
+    ; GFX9: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR2]], [[ASHR]]
+    ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; GFX9: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY4]], 16
+    ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
+    ; GFX9: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY5]], 16
+    ; GFX9: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG2]], [[C1]](s32)
+    ; GFX9: [[ASHR3:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG2]], [[C1]](s32)
+    ; GFX9: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG2]], [[ASHR2]]
+    ; GFX9: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG3]], [[ASHR3]]
+    ; GFX9: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[ADD4]], [[ASHR2]]
+    ; GFX9: [[XOR4:%[0-9]+]]:_(s32) = G_XOR [[ADD5]], [[ASHR3]]
+    ; GFX9: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[XOR4]](s32)
+    ; GFX9: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP1]](s32)
+    ; GFX9: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000
+    ; GFX9: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C4]]
+    ; GFX9: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL1]](s32)
+    ; GFX9: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[XOR4]]
+    ; GFX9: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[XOR4]]
+    ; GFX9: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[MUL2]]
+    ; GFX9: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH3]](s32), [[C3]]
+    ; GFX9: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[SUB5]], [[MUL2]]
+    ; GFX9: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[SELECT4]], [[FPTOUI1]]
+    ; GFX9: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[UMULH4]]
+    ; GFX9: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI1]], [[UMULH4]]
+    ; GFX9: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[ADD6]], [[SUB6]]
+    ; GFX9: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[SELECT5]], [[XOR3]]
+    ; GFX9: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH5]], [[XOR4]]
+    ; GFX9: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[MUL3]]
+    ; GFX9: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB7]](s32), [[XOR4]]
+    ; GFX9: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[XOR3]](s32), [[MUL3]]
+    ; GFX9: [[AND1:%[0-9]+]]:_(s1) = G_AND [[ICMP4]], [[ICMP5]]
+    ; GFX9: [[SUB8:%[0-9]+]]:_(s32) = G_SUB [[SUB7]], [[XOR4]]
+    ; GFX9: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[SUB7]], [[XOR4]]
+    ; GFX9: [[SELECT6:%[0-9]+]]:_(s32) = G_SELECT [[AND1]](s1), [[SUB8]], [[SUB7]]
+    ; GFX9: [[SELECT7:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SELECT6]], [[ADD7]]
+    ; GFX9: [[XOR5:%[0-9]+]]:_(s32) = G_XOR [[SELECT7]], [[ASHR2]]
+    ; GFX9: [[SUB9:%[0-9]+]]:_(s32) = G_SUB [[XOR5]], [[ASHR2]]
+    ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY [[SUB4]](s32)
+    ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY [[SUB9]](s32)
+    ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY6]](s32), [[COPY7]](s32)
+    ; GFX9: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>)
     %0:_(<2 x s16>) = COPY $vgpr0
     %1:_(<2 x s16>) = COPY $vgpr1
     %2:_(<2 x s16>) = G_SREM %0, %1
@@ -240,36 +865,132 @@ body: |
     ; GFX6-LABEL: name: test_srem_s7
     ; GFX6: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
     ; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-    ; GFX6: [[TRUNC:%[0-9]+]]:_(s7) = G_TRUNC [[COPY]](s32)
-    ; GFX6: [[TRUNC1:%[0-9]+]]:_(s7) = G_TRUNC [[COPY1]](s32)
-    ; GFX6: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC]](s7)
-    ; GFX6: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC1]](s7)
-    ; GFX6: [[SREM:%[0-9]+]]:_(s32) = G_SREM [[SEXT]], [[SEXT1]]
-    ; GFX6: [[TRUNC2:%[0-9]+]]:_(s7) = G_TRUNC [[SREM]](s32)
-    ; GFX6: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC2]](s7)
-    ; GFX6: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; GFX6: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+    ; GFX6: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY2]], 7
+    ; GFX6: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+    ; GFX6: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY3]], 7
+    ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31
+    ; GFX6: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[C]](s32)
+    ; GFX6: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[C]](s32)
+    ; GFX6: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG]], [[ASHR]]
+    ; GFX6: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG1]], [[ASHR1]]
+    ; GFX6: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ADD]], [[ASHR]]
+    ; GFX6: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]]
+    ; GFX6: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32)
+    ; GFX6: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32)
+    ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000
+    ; GFX6: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]]
+    ; GFX6: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32)
+    ; GFX6: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[XOR1]]
+    ; GFX6: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[XOR1]]
+    ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[MUL]]
+    ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C2]]
+    ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]]
+    ; GFX6: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]]
+    ; GFX6: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]]
+    ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]]
+    ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD2]], [[SUB1]]
+    ; GFX6: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[XOR]]
+    ; GFX6: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[XOR1]]
+    ; GFX6: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]]
+    ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[XOR1]]
+    ; GFX6: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[XOR]](s32), [[MUL1]]
+    ; GFX6: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]]
+    ; GFX6: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SUB2]], [[XOR1]]
+    ; GFX6: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[SUB2]], [[XOR1]]
+    ; GFX6: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s1), [[SUB3]], [[SUB2]]
+    ; GFX6: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[ADD3]]
+    ; GFX6: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[SELECT3]], [[ASHR]]
+    ; GFX6: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR2]], [[ASHR]]
+    ; GFX6: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SUB4]](s32)
+    ; GFX6: $vgpr0 = COPY [[COPY4]](s32)
     ; GFX8-LABEL: name: test_srem_s7
     ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
     ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-    ; GFX8: [[TRUNC:%[0-9]+]]:_(s7) = G_TRUNC [[COPY]](s32)
-    ; GFX8: [[TRUNC1:%[0-9]+]]:_(s7) = G_TRUNC [[COPY1]](s32)
-    ; GFX8: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC]](s7)
-    ; GFX8: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC1]](s7)
-    ; GFX8: [[SREM:%[0-9]+]]:_(s32) = G_SREM [[SEXT]], [[SEXT1]]
-    ; GFX8: [[TRUNC2:%[0-9]+]]:_(s7) = G_TRUNC [[SREM]](s32)
-    ; GFX8: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC2]](s7)
-    ; GFX8: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; GFX8: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+    ; GFX8: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY2]], 7
+    ; GFX8: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+    ; GFX8: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY3]], 7
+    ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31
+    ; GFX8: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[C]](s32)
+    ; GFX8: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[C]](s32)
+    ; GFX8: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG]], [[ASHR]]
+    ; GFX8: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG1]], [[ASHR1]]
+    ; GFX8: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ADD]], [[ASHR]]
+    ; GFX8: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]]
+    ; GFX8: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32)
+    ; GFX8: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32)
+    ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000
+    ; GFX8: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]]
+    ; GFX8: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32)
+    ; GFX8: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[XOR1]]
+    ; GFX8: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[XOR1]]
+    ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[MUL]]
+    ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C2]]
+    ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]]
+    ; GFX8: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]]
+    ; GFX8: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]]
+    ; GFX8: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]]
+    ; GFX8: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD2]], [[SUB1]]
+    ; GFX8: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[XOR]]
+    ; GFX8: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[XOR1]]
+    ; GFX8: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]]
+    ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[XOR1]]
+    ; GFX8: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[XOR]](s32), [[MUL1]]
+    ; GFX8: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]]
+    ; GFX8: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SUB2]], [[XOR1]]
+    ; GFX8: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[SUB2]], [[XOR1]]
+    ; GFX8: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s1), [[SUB3]], [[SUB2]]
+    ; GFX8: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[ADD3]]
+    ; GFX8: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[SELECT3]], [[ASHR]]
+    ; GFX8: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR2]], [[ASHR]]
+    ; GFX8: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SUB4]](s32)
+    ; GFX8: $vgpr0 = COPY [[COPY4]](s32)
     ; GFX9-LABEL: name: test_srem_s7
     ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
     ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-    ; GFX9: [[TRUNC:%[0-9]+]]:_(s7) = G_TRUNC [[COPY]](s32)
-    ; GFX9: [[TRUNC1:%[0-9]+]]:_(s7) = G_TRUNC [[COPY1]](s32)
-    ; GFX9: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC]](s7)
-    ; GFX9: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC1]](s7)
-    ; GFX9: [[SREM:%[0-9]+]]:_(s32) = G_SREM [[SEXT]], [[SEXT1]]
-    ; GFX9: [[TRUNC2:%[0-9]+]]:_(s7) = G_TRUNC [[SREM]](s32)
-    ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC2]](s7)
-    ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+    ; GFX9: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY2]], 7
+    ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+    ; GFX9: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY3]], 7
+    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31
+    ; GFX9: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[C]](s32)
+    ; GFX9: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[C]](s32)
+    ; GFX9: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG]], [[ASHR]]
+    ; GFX9: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG1]], [[ASHR1]]
+    ; GFX9: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ADD]], [[ASHR]]
+    ; GFX9: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]]
+    ; GFX9: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32)
+    ; GFX9: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32)
+    ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000
+    ; GFX9: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]]
+    ; GFX9: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32)
+    ; GFX9: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[XOR1]]
+    ; GFX9: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[XOR1]]
+    ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; GFX9: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[MUL]]
+    ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C2]]
+    ; GFX9: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]]
+    ; GFX9: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]]
+    ; GFX9: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]]
+    ; GFX9: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]]
+    ; GFX9: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD2]], [[SUB1]]
+    ; GFX9: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[XOR]]
+    ; GFX9: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[XOR1]]
+    ; GFX9: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]]
+    ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[XOR1]]
+    ; GFX9: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[XOR]](s32), [[MUL1]]
+    ; GFX9: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]]
+    ; GFX9: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SUB2]], [[XOR1]]
+    ; GFX9: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[SUB2]], [[XOR1]]
+    ; GFX9: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s1), [[SUB3]], [[SUB2]]
+    ; GFX9: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[ADD3]]
+    ; GFX9: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[SELECT3]], [[ASHR]]
+    ; GFX9: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR2]], [[ASHR]]
+    ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SUB4]](s32)
+    ; GFX9: $vgpr0 = COPY [[COPY4]](s32)
     %0:_(s32) = COPY $vgpr0
     %1:_(s32) = COPY $vgpr1
     %2:_(s7) = G_TRUNC %0
@@ -288,36 +1009,132 @@ body: |
     ; GFX6-LABEL: name: test_srem_s17
     ; GFX6: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
     ; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-    ; GFX6: [[TRUNC:%[0-9]+]]:_(s17) = G_TRUNC [[COPY]](s32)
-    ; GFX6: [[TRUNC1:%[0-9]+]]:_(s17) = G_TRUNC [[COPY1]](s32)
-    ; GFX6: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC]](s17)
-    ; GFX6: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC1]](s17)
-    ; GFX6: [[SREM:%[0-9]+]]:_(s32) = G_SREM [[SEXT]], [[SEXT1]]
-    ; GFX6: [[TRUNC2:%[0-9]+]]:_(s17) = G_TRUNC [[SREM]](s32)
-    ; GFX6: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC2]](s17)
-    ; GFX6: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; GFX6: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+    ; GFX6: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY2]], 17
+    ; GFX6: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+    ; GFX6: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY3]], 17
+    ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31
+    ; GFX6: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[C]](s32)
+    ; GFX6: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[C]](s32)
+    ; GFX6: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG]], [[ASHR]]
+    ; GFX6: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG1]], [[ASHR1]]
+    ; GFX6: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ADD]], [[ASHR]]
+    ; GFX6: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]]
+    ; GFX6: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32)
+    ; GFX6: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32)
+    ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000
+    ; GFX6: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]]
+    ; GFX6: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32)
+    ; GFX6: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[XOR1]]
+    ; GFX6: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[XOR1]]
+    ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[MUL]]
+    ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C2]]
+    ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]]
+    ; GFX6: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]]
+    ; GFX6: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]]
+    ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]]
+    ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD2]], [[SUB1]]
+    ; GFX6: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[XOR]]
+    ; GFX6: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[XOR1]]
+    ; GFX6: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]]
+    ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[XOR1]]
+    ; GFX6: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[XOR]](s32), [[MUL1]]
+    ; GFX6: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]]
+    ; GFX6: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SUB2]], [[XOR1]]
+    ; GFX6: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[SUB2]], [[XOR1]]
+    ; GFX6: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s1), [[SUB3]], [[SUB2]]
+    ; GFX6: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[ADD3]]
+    ; GFX6: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[SELECT3]], [[ASHR]]
+    ; GFX6: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR2]], [[ASHR]]
+    ; GFX6: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SUB4]](s32)
+    ; GFX6: $vgpr0 = COPY [[COPY4]](s32)
     ; GFX8-LABEL: name: test_srem_s17
     ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
     ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-    ; GFX8: [[TRUNC:%[0-9]+]]:_(s17) = G_TRUNC [[COPY]](s32)
-    ; GFX8: [[TRUNC1:%[0-9]+]]:_(s17) = G_TRUNC [[COPY1]](s32)
-    ; GFX8: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC]](s17)
-    ; GFX8: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC1]](s17)
-    ; GFX8: [[SREM:%[0-9]+]]:_(s32) = G_SREM [[SEXT]], [[SEXT1]]
-    ; GFX8: [[TRUNC2:%[0-9]+]]:_(s17) = G_TRUNC [[SREM]](s32)
-    ; GFX8: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC2]](s17)
-    ; GFX8: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; GFX8: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+    ; GFX8: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY2]], 17
+    ; GFX8: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+    ; GFX8: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY3]], 17
+    ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31
+    ; GFX8: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[C]](s32)
+    ; GFX8: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[C]](s32)
+    ; GFX8: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG]], [[ASHR]]
+    ; GFX8: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG1]], [[ASHR1]]
+    ; GFX8: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ADD]], [[ASHR]]
+    ; GFX8: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]]
+    ; GFX8: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32)
+    ; GFX8: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32)
+    ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000
+    ; GFX8: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]]
+    ; GFX8: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32)
+    ; GFX8: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[XOR1]]
+    ; GFX8: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[XOR1]]
+    ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[MUL]]
+    ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C2]]
+    ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]]
+    ; GFX8: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]]
+    ; GFX8: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]]
+    ; GFX8: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]]
+    ; GFX8: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD2]], [[SUB1]]
+    ; GFX8: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[XOR]]
+    ; GFX8: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[XOR1]]
+    ; GFX8: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]]
+    ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[XOR1]]
+    ; GFX8: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[XOR]](s32), [[MUL1]]
+    ; GFX8: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]]
+    ; GFX8: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SUB2]], [[XOR1]]
+    ; GFX8: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[SUB2]], [[XOR1]]
+    ; GFX8: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s1), [[SUB3]], [[SUB2]]
+    ; GFX8: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[ADD3]]
+    ; GFX8: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[SELECT3]], [[ASHR]]
+    ; GFX8: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR2]], [[ASHR]]
+    ; GFX8: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SUB4]](s32)
+    ; GFX8: $vgpr0 = COPY [[COPY4]](s32)
     ; GFX9-LABEL: name: test_srem_s17
     ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
     ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-    ; GFX9: [[TRUNC:%[0-9]+]]:_(s17) = G_TRUNC [[COPY]](s32)
-    ; GFX9: [[TRUNC1:%[0-9]+]]:_(s17) = G_TRUNC [[COPY1]](s32)
-    ; GFX9: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC]](s17)
-    ; GFX9: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC1]](s17)
-    ; GFX9: [[SREM:%[0-9]+]]:_(s32) = G_SREM [[SEXT]], [[SEXT1]]
-    ; GFX9: [[TRUNC2:%[0-9]+]]:_(s17) = G_TRUNC [[SREM]](s32)
-    ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC2]](s17)
-    ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+    ; GFX9: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY2]], 17
+    ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+    ; GFX9: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY3]], 17
+    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31
+    ; GFX9: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[C]](s32)
+    ; GFX9: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[C]](s32)
+    ; GFX9: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG]], [[ASHR]]
+    ; GFX9: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG1]], [[ASHR1]]
+    ; GFX9: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ADD]], [[ASHR]]
+    ; GFX9: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]]
+    ; GFX9: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32)
+    ; GFX9: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32)
+    ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000
+    ; GFX9: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]]
+    ; GFX9: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32)
+    ; GFX9: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[XOR1]]
+    ; GFX9: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[XOR1]]
+    ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; GFX9: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[MUL]]
+    ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C2]]
+    ; GFX9: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]]
+    ; GFX9: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]]
+    ; GFX9: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]]
+    ; GFX9: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]]
+    ; GFX9: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD2]], [[SUB1]]
+    ; GFX9: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[XOR]]
+    ; GFX9: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[XOR1]]
+    ; GFX9: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]]
+    ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[XOR1]]
+    ; GFX9: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[XOR]](s32), [[MUL1]]
+    ; GFX9: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]]
+    ; GFX9: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SUB2]], [[XOR1]]
+    ; GFX9: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[SUB2]], [[XOR1]]
+    ; GFX9: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s1), [[SUB3]], [[SUB2]]
+    ; GFX9: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[ADD3]]
+    ; GFX9: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[SELECT3]], [[ASHR]]
+    ; GFX9: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR2]], [[ASHR]]
+    ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SUB4]](s32)
+    ; GFX9: $vgpr0 = COPY [[COPY4]](s32)
     %0:_(s32) = COPY $vgpr0
     %1:_(s32) = COPY $vgpr1
     %2:_(s17) = G_TRUNC %0

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i32.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i32.ll
new file mode 100644
index 000000000000..ecd9cd5c547d
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i32.ll
@@ -0,0 +1,1030 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -global-isel -amdgpu-codegenprepare-disable-idiv-expansion=1 -mtriple=amdgcn-amd-amdhsa < %s | FileCheck -check-prefixes=CHECK,GISEL %s
+; RUN: llc -global-isel -amdgpu-codegenprepare-disable-idiv-expansion=0 -mtriple=amdgcn-amd-amdhsa < %s | FileCheck -check-prefixes=CHECK,CGP %s
+
+; The same 32-bit expansion is implemented in the legalizer and in AMDGPUCodeGenPrepare.
+
+define i32 @v_sdiv_i32(i32 %num, i32 %den) {
+; GISEL-LABEL: v_sdiv_i32:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-NEXT:    v_ashrrev_i32_e32 v2, 31, v0
+; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
+; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v1, v2
+; GISEL-NEXT:    v_xor_b32_e32 v0, v0, v2
+; GISEL-NEXT:    v_xor_b32_e32 v1, v1, v2
+; GISEL-NEXT:    v_cvt_f32_u32_e32 v3, v1
+; GISEL-NEXT:    v_rcp_iflag_f32_e32 v3, v3
+; GISEL-NEXT:    v_mul_f32_e32 v3, 0x4f800000, v3
+; GISEL-NEXT:    v_cvt_u32_f32_e32 v3, v3
+; GISEL-NEXT:    v_mul_lo_u32 v4, v3, v1
+; GISEL-NEXT:    v_mul_hi_u32 v5, v3, v1
+; GISEL-NEXT:    v_sub_i32_e32 v6, vcc, 0, v4
+; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v5
+; GISEL-NEXT:    v_cndmask_b32_e32 v4, v4, v6, vcc
+; GISEL-NEXT:    v_mul_hi_u32 v4, v4, v3
+; GISEL-NEXT:    v_add_i32_e64 v5, s[4:5], v3, v4
+; GISEL-NEXT:    v_sub_i32_e64 v3, s[4:5], v3, v4
+; GISEL-NEXT:    v_cndmask_b32_e32 v3, v3, v5, vcc
+; GISEL-NEXT:    v_mul_hi_u32 v3, v3, v0
+; GISEL-NEXT:    v_mul_lo_u32 v4, v3, v1
+; GISEL-NEXT:    v_add_i32_e32 v5, vcc, 1, v3
+; GISEL-NEXT:    v_subrev_i32_e32 v6, vcc, 1, v3
+; GISEL-NEXT:    v_sub_i32_e32 v7, vcc, v0, v4
+; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v4
+; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v7, v1
+; GISEL-NEXT:    s_and_b64 s[4:5], s[4:5], vcc
+; GISEL-NEXT:    v_cndmask_b32_e64 v0, v5, v3, s[4:5]
+; GISEL-NEXT:    v_cndmask_b32_e32 v0, v6, v0, vcc
+; GISEL-NEXT:    v_xor_b32_e32 v1, v2, v2
+; GISEL-NEXT:    v_xor_b32_e32 v0, v0, v1
+; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v1
+; GISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; CGP-LABEL: v_sdiv_i32:
+; CGP:       ; %bb.0:
+; CGP-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CGP-NEXT:    v_ashrrev_i32_e32 v2, 31, v0
+; CGP-NEXT:    v_ashrrev_i32_e32 v3, 31, v1
+; CGP-NEXT:    v_xor_b32_e32 v4, v2, v3
+; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
+; CGP-NEXT:    v_add_i32_e32 v1, vcc, v1, v3
+; CGP-NEXT:    v_xor_b32_e32 v0, v0, v2
+; CGP-NEXT:    v_xor_b32_e32 v1, v1, v3
+; CGP-NEXT:    v_cvt_f32_u32_e32 v2, v1
+; CGP-NEXT:    v_mul_lo_u32 v3, 0, v1
+; CGP-NEXT:    v_mul_lo_u32 v5, 0, v0
+; CGP-NEXT:    v_rcp_f32_e32 v2, v2
+; CGP-NEXT:    v_mul_f32_e32 v2, 0x4f800000, v2
+; CGP-NEXT:    v_cvt_u32_f32_e32 v2, v2
+; CGP-NEXT:    v_mul_lo_u32 v6, v2, v1
+; CGP-NEXT:    v_mul_lo_u32 v7, v2, 0
+; CGP-NEXT:    v_mul_hi_u32 v8, v2, v1
+; CGP-NEXT:    v_add_i32_e32 v3, vcc, v3, v7
+; CGP-NEXT:    v_sub_i32_e32 v9, vcc, 0, v6
+; CGP-NEXT:    v_add_i32_e32 v3, vcc, v3, v8
+; CGP-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v3
+; CGP-NEXT:    v_cndmask_b32_e32 v3, v6, v9, vcc
+; CGP-NEXT:    v_mul_lo_u32 v6, v3, 0
+; CGP-NEXT:    v_mul_hi_u32 v3, v3, v2
+; CGP-NEXT:    v_add_i32_e64 v6, s[4:5], v7, v6
+; CGP-NEXT:    v_add_i32_e64 v3, s[4:5], v6, v3
+; CGP-NEXT:    v_add_i32_e64 v6, s[4:5], v2, v3
+; CGP-NEXT:    v_sub_i32_e64 v2, s[4:5], v2, v3
+; CGP-NEXT:    v_cndmask_b32_e32 v2, v2, v6, vcc
+; CGP-NEXT:    v_mul_lo_u32 v3, v2, 0
+; CGP-NEXT:    v_mul_hi_u32 v2, v2, v0
+; CGP-NEXT:    v_add_i32_e32 v3, vcc, v5, v3
+; CGP-NEXT:    v_add_i32_e32 v2, vcc, v3, v2
+; CGP-NEXT:    v_mul_lo_u32 v3, v2, v1
+; CGP-NEXT:    v_add_i32_e32 v5, vcc, 1, v2
+; CGP-NEXT:    v_subrev_i32_e32 v6, vcc, 1, v2
+; CGP-NEXT:    v_sub_i32_e32 v7, vcc, v0, v3
+; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v3
+; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v7, v1
+; CGP-NEXT:    s_and_b64 s[4:5], s[4:5], vcc
+; CGP-NEXT:    v_cndmask_b32_e64 v0, v2, v5, s[4:5]
+; CGP-NEXT:    v_cndmask_b32_e32 v0, v6, v0, vcc
+; CGP-NEXT:    v_xor_b32_e32 v0, v0, v4
+; CGP-NEXT:    v_sub_i32_e32 v0, vcc, v0, v4
+; CGP-NEXT:    s_setpc_b64 s[30:31]
+  %result = sdiv i32 %num, %den
+  ret i32 %result
+}
+
+; FIXME: This is a workaround for not handling uniform VGPR case.
+declare i32 @llvm.amdgcn.readfirstlane(i32)
+
+define amdgpu_ps i32 @s_sdiv_i32(i32 inreg %num, i32 inreg %den) {
+; GISEL-LABEL: s_sdiv_i32:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    s_ashr_i32 s2, s0, 31
+; GISEL-NEXT:    s_add_i32 s0, s0, s2
+; GISEL-NEXT:    s_add_i32 s1, s1, s2
+; GISEL-NEXT:    s_xor_b32 s3, s0, s2
+; GISEL-NEXT:    s_xor_b32 s4, s1, s2
+; GISEL-NEXT:    v_cvt_f32_u32_e32 v0, s4
+; GISEL-NEXT:    v_rcp_iflag_f32_e32 v0, v0
+; GISEL-NEXT:    v_mul_f32_e32 v0, 0x4f800000, v0
+; GISEL-NEXT:    v_cvt_u32_f32_e32 v0, v0
+; GISEL-NEXT:    v_mul_lo_u32 v1, v0, s4
+; GISEL-NEXT:    v_mul_hi_u32 v2, v0, s4
+; GISEL-NEXT:    v_sub_i32_e32 v3, vcc, 0, v1
+; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v2
+; GISEL-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
+; GISEL-NEXT:    v_mul_hi_u32 v1, v1, v0
+; GISEL-NEXT:    v_add_i32_e64 v2, s[0:1], v0, v1
+; GISEL-NEXT:    v_sub_i32_e64 v0, s[0:1], v0, v1
+; GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GISEL-NEXT:    v_mul_hi_u32 v0, v0, s3
+; GISEL-NEXT:    v_mul_lo_u32 v1, v0, s4
+; GISEL-NEXT:    v_add_i32_e32 v2, vcc, 1, v0
+; GISEL-NEXT:    v_subrev_i32_e32 v3, vcc, 1, v0
+; GISEL-NEXT:    v_sub_i32_e32 v4, vcc, s3, v1
+; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, s3, v1
+; GISEL-NEXT:    v_cmp_le_u32_e64 s[0:1], s4, v4
+; GISEL-NEXT:    s_and_b64 s[0:1], s[0:1], vcc
+; GISEL-NEXT:    v_cndmask_b32_e64 v0, v2, v0, s[0:1]
+; GISEL-NEXT:    v_cndmask_b32_e32 v0, v3, v0, vcc
+; GISEL-NEXT:    s_xor_b32 s0, s2, s2
+; GISEL-NEXT:    v_xor_b32_e32 v0, s0, v0
+; GISEL-NEXT:    v_subrev_i32_e32 v0, vcc, s0, v0
+; GISEL-NEXT:    v_readfirstlane_b32 s0, v0
+; GISEL-NEXT:    ; return to shader part epilog
+;
+; CGP-LABEL: s_sdiv_i32:
+; CGP:       ; %bb.0:
+; CGP-NEXT:    s_ashr_i32 s2, s0, 31
+; CGP-NEXT:    s_ashr_i32 s3, s1, 31
+; CGP-NEXT:    s_xor_b32 s5, s2, s3
+; CGP-NEXT:    s_add_i32 s0, s0, s2
+; CGP-NEXT:    s_add_i32 s1, s1, s3
+; CGP-NEXT:    s_xor_b32 s2, s0, s2
+; CGP-NEXT:    s_xor_b32 s4, s1, s3
+; CGP-NEXT:    v_cvt_f32_u32_e32 v0, s4
+; CGP-NEXT:    s_bfe_u64 s[0:1], s[4:5], 0x200000
+; CGP-NEXT:    s_bfe_u64 s[6:7], s[2:3], 0x200000
+; CGP-NEXT:    v_rcp_f32_e32 v0, v0
+; CGP-NEXT:    v_mul_lo_u32 v1, 0, s0
+; CGP-NEXT:    v_mul_lo_u32 v2, 0, s6
+; CGP-NEXT:    v_mul_f32_e32 v0, 0x4f800000, v0
+; CGP-NEXT:    v_cvt_u32_f32_e32 v0, v0
+; CGP-NEXT:    v_mul_lo_u32 v3, v0, s0
+; CGP-NEXT:    v_mul_lo_u32 v4, v0, s1
+; CGP-NEXT:    v_mul_hi_u32 v5, v0, s0
+; CGP-NEXT:    v_mul_lo_u32 v6, 0, v0
+; CGP-NEXT:    v_add_i32_e32 v1, vcc, v1, v4
+; CGP-NEXT:    v_sub_i32_e32 v4, vcc, 0, v3
+; CGP-NEXT:    v_add_i32_e32 v1, vcc, v1, v5
+; CGP-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v1
+; CGP-NEXT:    v_cndmask_b32_e32 v1, v3, v4, vcc
+; CGP-NEXT:    v_mul_lo_u32 v3, v1, 0
+; CGP-NEXT:    v_mul_hi_u32 v1, v1, v0
+; CGP-NEXT:    v_add_i32_e64 v3, s[0:1], v6, v3
+; CGP-NEXT:    v_add_i32_e64 v1, s[0:1], v3, v1
+; CGP-NEXT:    v_add_i32_e64 v3, s[0:1], v0, v1
+; CGP-NEXT:    v_sub_i32_e64 v0, s[0:1], v0, v1
+; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
+; CGP-NEXT:    v_mul_lo_u32 v1, v0, s7
+; CGP-NEXT:    v_mul_hi_u32 v0, v0, s6
+; CGP-NEXT:    v_add_i32_e32 v1, vcc, v2, v1
+; CGP-NEXT:    v_add_i32_e32 v0, vcc, v1, v0
+; CGP-NEXT:    v_mul_lo_u32 v1, v0, s4
+; CGP-NEXT:    v_add_i32_e32 v2, vcc, 1, v0
+; CGP-NEXT:    v_subrev_i32_e32 v3, vcc, 1, v0
+; CGP-NEXT:    v_sub_i32_e32 v4, vcc, s2, v1
+; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, s2, v1
+; CGP-NEXT:    v_cmp_le_u32_e64 s[0:1], s4, v4
+; CGP-NEXT:    s_and_b64 s[0:1], s[0:1], vcc
+; CGP-NEXT:    v_cndmask_b32_e64 v0, v0, v2, s[0:1]
+; CGP-NEXT:    v_cndmask_b32_e32 v0, v3, v0, vcc
+; CGP-NEXT:    v_xor_b32_e32 v0, s5, v0
+; CGP-NEXT:    v_subrev_i32_e32 v0, vcc, s5, v0
+; CGP-NEXT:    v_readfirstlane_b32 s0, v0
+; CGP-NEXT:    ; return to shader part epilog
+  %result = sdiv i32 %num, %den
+  %readlane = call i32 @llvm.amdgcn.readfirstlane(i32 %result)
+  ret i32 %readlane
+}
+
+define <2 x i32> @v_sdiv_v2i32(<2 x i32> %num, <2 x i32> %den) {
+; GISEL-LABEL: v_sdiv_v2i32:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-NEXT:    v_ashrrev_i32_e32 v4, 31, v0
+; GISEL-NEXT:    v_ashrrev_i32_e32 v5, 31, v1
+; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v4
+; GISEL-NEXT:    v_add_i32_e32 v2, vcc, v2, v4
+; GISEL-NEXT:    v_xor_b32_e32 v6, v4, v4
+; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v1, v5
+; GISEL-NEXT:    v_add_i32_e32 v3, vcc, v3, v5
+; GISEL-NEXT:    v_xor_b32_e32 v7, v5, v5
+; GISEL-NEXT:    v_xor_b32_e32 v0, v0, v4
+; GISEL-NEXT:    v_xor_b32_e32 v2, v2, v4
+; GISEL-NEXT:    v_xor_b32_e32 v1, v1, v5
+; GISEL-NEXT:    v_xor_b32_e32 v3, v3, v5
+; GISEL-NEXT:    v_cvt_f32_u32_e32 v4, v2
+; GISEL-NEXT:    v_cvt_f32_u32_e32 v5, v3
+; GISEL-NEXT:    v_rcp_iflag_f32_e32 v4, v4
+; GISEL-NEXT:    v_rcp_iflag_f32_e32 v5, v5
+; GISEL-NEXT:    v_mul_f32_e32 v4, 0x4f800000, v4
+; GISEL-NEXT:    v_mul_f32_e32 v5, 0x4f800000, v5
+; GISEL-NEXT:    v_cvt_u32_f32_e32 v4, v4
+; GISEL-NEXT:    v_cvt_u32_f32_e32 v5, v5
+; GISEL-NEXT:    v_mul_lo_u32 v8, v4, v2
+; GISEL-NEXT:    v_mul_hi_u32 v9, v4, v2
+; GISEL-NEXT:    v_mul_lo_u32 v10, v5, v3
+; GISEL-NEXT:    v_mul_hi_u32 v11, v5, v3
+; GISEL-NEXT:    v_sub_i32_e32 v12, vcc, 0, v8
+; GISEL-NEXT:    v_sub_i32_e32 v13, vcc, 0, v10
+; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v9
+; GISEL-NEXT:    v_cndmask_b32_e32 v8, v8, v12, vcc
+; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v11
+; GISEL-NEXT:    v_cndmask_b32_e64 v9, v10, v13, s[4:5]
+; GISEL-NEXT:    v_mul_hi_u32 v8, v8, v4
+; GISEL-NEXT:    v_mul_hi_u32 v9, v9, v5
+; GISEL-NEXT:    v_add_i32_e64 v10, s[6:7], v4, v8
+; GISEL-NEXT:    v_sub_i32_e64 v4, s[6:7], v4, v8
+; GISEL-NEXT:    v_add_i32_e64 v8, s[6:7], v5, v9
+; GISEL-NEXT:    v_sub_i32_e64 v5, s[6:7], v5, v9
+; GISEL-NEXT:    v_cndmask_b32_e32 v4, v4, v10, vcc
+; GISEL-NEXT:    v_cndmask_b32_e64 v5, v5, v8, s[4:5]
+; GISEL-NEXT:    v_mul_hi_u32 v4, v4, v0
+; GISEL-NEXT:    v_mul_hi_u32 v5, v5, v1
+; GISEL-NEXT:    v_mul_lo_u32 v8, v4, v2
+; GISEL-NEXT:    v_add_i32_e32 v9, vcc, 1, v4
+; GISEL-NEXT:    v_subrev_i32_e32 v10, vcc, 1, v4
+; GISEL-NEXT:    v_mul_lo_u32 v11, v5, v3
+; GISEL-NEXT:    v_add_i32_e32 v12, vcc, 1, v5
+; GISEL-NEXT:    v_subrev_i32_e32 v13, vcc, 1, v5
+; GISEL-NEXT:    v_sub_i32_e32 v14, vcc, v0, v8
+; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v8
+; GISEL-NEXT:    v_sub_i32_e64 v0, s[4:5], v1, v11
+; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v1, v11
+; GISEL-NEXT:    v_cmp_ge_u32_e64 s[6:7], v14, v2
+; GISEL-NEXT:    v_cmp_ge_u32_e64 s[8:9], v0, v3
+; GISEL-NEXT:    s_and_b64 s[6:7], s[6:7], vcc
+; GISEL-NEXT:    v_cndmask_b32_e64 v0, v9, v4, s[6:7]
+; GISEL-NEXT:    s_and_b64 s[6:7], s[8:9], s[4:5]
+; GISEL-NEXT:    v_cndmask_b32_e64 v1, v12, v5, s[6:7]
+; GISEL-NEXT:    v_cndmask_b32_e32 v0, v10, v0, vcc
+; GISEL-NEXT:    v_cndmask_b32_e64 v1, v13, v1, s[4:5]
+; GISEL-NEXT:    v_xor_b32_e32 v0, v0, v6
+; GISEL-NEXT:    v_xor_b32_e32 v1, v1, v7
+; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v6
+; GISEL-NEXT:    v_sub_i32_e32 v1, vcc, v1, v7
+; GISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; CGP-LABEL: v_sdiv_v2i32:
+; CGP:       ; %bb.0:
+; CGP-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CGP-NEXT:    v_ashrrev_i32_e32 v4, 31, v0
+; CGP-NEXT:    v_ashrrev_i32_e32 v5, 31, v2
+; CGP-NEXT:    v_ashrrev_i32_e32 v6, 31, v1
+; CGP-NEXT:    v_ashrrev_i32_e32 v7, 31, v3
+; CGP-NEXT:    v_xor_b32_e32 v8, v4, v5
+; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v4
+; CGP-NEXT:    v_add_i32_e32 v2, vcc, v2, v5
+; CGP-NEXT:    v_xor_b32_e32 v9, v6, v7
+; CGP-NEXT:    v_add_i32_e32 v1, vcc, v1, v6
+; CGP-NEXT:    v_add_i32_e32 v3, vcc, v3, v7
+; CGP-NEXT:    v_xor_b32_e32 v0, v0, v4
+; CGP-NEXT:    v_xor_b32_e32 v2, v2, v5
+; CGP-NEXT:    v_xor_b32_e32 v1, v1, v6
+; CGP-NEXT:    v_xor_b32_e32 v3, v3, v7
+; CGP-NEXT:    v_cvt_f32_u32_e32 v4, v2
+; CGP-NEXT:    v_mul_lo_u32 v5, 0, v2
+; CGP-NEXT:    v_mul_lo_u32 v6, 0, v0
+; CGP-NEXT:    v_cvt_f32_u32_e32 v7, v3
+; CGP-NEXT:    v_mul_lo_u32 v10, 0, v3
+; CGP-NEXT:    v_mul_lo_u32 v11, 0, v1
+; CGP-NEXT:    v_rcp_f32_e32 v4, v4
+; CGP-NEXT:    v_rcp_f32_e32 v7, v7
+; CGP-NEXT:    v_mul_f32_e32 v4, 0x4f800000, v4
+; CGP-NEXT:    v_mul_f32_e32 v7, 0x4f800000, v7
+; CGP-NEXT:    v_cvt_u32_f32_e32 v4, v4
+; CGP-NEXT:    v_cvt_u32_f32_e32 v7, v7
+; CGP-NEXT:    v_mul_lo_u32 v12, v4, v2
+; CGP-NEXT:    v_mul_lo_u32 v13, v4, 0
+; CGP-NEXT:    v_mul_hi_u32 v14, v4, v2
+; CGP-NEXT:    v_mul_lo_u32 v15, v7, v3
+; CGP-NEXT:    v_mul_lo_u32 v16, v7, 0
+; CGP-NEXT:    v_mul_hi_u32 v17, v7, v3
+; CGP-NEXT:    v_add_i32_e32 v5, vcc, v5, v13
+; CGP-NEXT:    v_sub_i32_e32 v18, vcc, 0, v12
+; CGP-NEXT:    v_add_i32_e32 v10, vcc, v10, v16
+; CGP-NEXT:    v_sub_i32_e32 v19, vcc, 0, v15
+; CGP-NEXT:    v_add_i32_e32 v5, vcc, v5, v14
+; CGP-NEXT:    v_add_i32_e32 v10, vcc, v10, v17
+; CGP-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v5
+; CGP-NEXT:    v_cndmask_b32_e32 v5, v12, v18, vcc
+; CGP-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v10
+; CGP-NEXT:    v_cndmask_b32_e64 v10, v15, v19, s[4:5]
+; CGP-NEXT:    v_mul_lo_u32 v12, v5, 0
+; CGP-NEXT:    v_mul_hi_u32 v5, v5, v4
+; CGP-NEXT:    v_mul_lo_u32 v14, v10, 0
+; CGP-NEXT:    v_mul_hi_u32 v10, v10, v7
+; CGP-NEXT:    v_add_i32_e64 v12, s[6:7], v13, v12
+; CGP-NEXT:    v_add_i32_e64 v13, s[6:7], v16, v14
+; CGP-NEXT:    v_add_i32_e64 v5, s[6:7], v12, v5
+; CGP-NEXT:    v_add_i32_e64 v10, s[6:7], v13, v10
+; CGP-NEXT:    v_add_i32_e64 v12, s[6:7], v4, v5
+; CGP-NEXT:    v_sub_i32_e64 v4, s[6:7], v4, v5
+; CGP-NEXT:    v_add_i32_e64 v5, s[6:7], v7, v10
+; CGP-NEXT:    v_sub_i32_e64 v7, s[6:7], v7, v10
+; CGP-NEXT:    v_cndmask_b32_e32 v4, v4, v12, vcc
+; CGP-NEXT:    v_cndmask_b32_e64 v5, v7, v5, s[4:5]
+; CGP-NEXT:    v_mul_lo_u32 v7, v4, 0
+; CGP-NEXT:    v_mul_hi_u32 v4, v4, v0
+; CGP-NEXT:    v_mul_lo_u32 v10, v5, 0
+; CGP-NEXT:    v_mul_hi_u32 v5, v5, v1
+; CGP-NEXT:    v_add_i32_e32 v6, vcc, v6, v7
+; CGP-NEXT:    v_add_i32_e32 v7, vcc, v11, v10
+; CGP-NEXT:    v_add_i32_e32 v4, vcc, v6, v4
+; CGP-NEXT:    v_add_i32_e32 v5, vcc, v7, v5
+; CGP-NEXT:    v_mul_lo_u32 v6, v4, v2
+; CGP-NEXT:    v_add_i32_e32 v7, vcc, 1, v4
+; CGP-NEXT:    v_subrev_i32_e32 v10, vcc, 1, v4
+; CGP-NEXT:    v_mul_lo_u32 v11, v5, v3
+; CGP-NEXT:    v_add_i32_e32 v12, vcc, 1, v5
+; CGP-NEXT:    v_subrev_i32_e32 v13, vcc, 1, v5
+; CGP-NEXT:    v_sub_i32_e32 v14, vcc, v0, v6
+; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v6
+; CGP-NEXT:    v_sub_i32_e64 v0, s[4:5], v1, v11
+; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v1, v11
+; CGP-NEXT:    v_cmp_ge_u32_e64 s[6:7], v14, v2
+; CGP-NEXT:    v_cmp_ge_u32_e64 s[8:9], v0, v3
+; CGP-NEXT:    s_and_b64 s[6:7], s[6:7], vcc
+; CGP-NEXT:    v_cndmask_b32_e64 v0, v4, v7, s[6:7]
+; CGP-NEXT:    s_and_b64 s[6:7], s[8:9], s[4:5]
+; CGP-NEXT:    v_cndmask_b32_e64 v1, v5, v12, s[6:7]
+; CGP-NEXT:    v_cndmask_b32_e32 v0, v10, v0, vcc
+; CGP-NEXT:    v_cndmask_b32_e64 v1, v13, v1, s[4:5]
+; CGP-NEXT:    v_xor_b32_e32 v0, v0, v8
+; CGP-NEXT:    v_xor_b32_e32 v1, v1, v9
+; CGP-NEXT:    v_sub_i32_e32 v0, vcc, v0, v8
+; CGP-NEXT:    v_sub_i32_e32 v1, vcc, v1, v9
+; CGP-NEXT:    s_setpc_b64 s[30:31]
+  %result = sdiv <2 x i32> %num, %den
+  ret <2 x i32> %result
+}
+
+define i32 @v_sdiv_i32_pow2k_denom(i32 %num) {
+; CHECK-LABEL: v_sdiv_i32_pow2k_denom:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
+; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v1
+; CHECK-NEXT:    v_add_i32_e32 v2, vcc, 0x1000, v1
+; CHECK-NEXT:    v_xor_b32_e32 v0, v0, v1
+; CHECK-NEXT:    v_xor_b32_e32 v2, v2, v1
+; CHECK-NEXT:    v_cvt_f32_u32_e32 v3, v2
+; CHECK-NEXT:    v_rcp_iflag_f32_e32 v3, v3
+; CHECK-NEXT:    v_mul_f32_e32 v3, 0x4f800000, v3
+; CHECK-NEXT:    v_cvt_u32_f32_e32 v3, v3
+; CHECK-NEXT:    v_mul_lo_u32 v4, v3, v2
+; CHECK-NEXT:    v_mul_hi_u32 v5, v3, v2
+; CHECK-NEXT:    v_sub_i32_e32 v6, vcc, 0, v4
+; CHECK-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v5
+; CHECK-NEXT:    v_cndmask_b32_e32 v4, v4, v6, vcc
+; CHECK-NEXT:    v_mul_hi_u32 v4, v4, v3
+; CHECK-NEXT:    v_add_i32_e64 v5, s[4:5], v3, v4
+; CHECK-NEXT:    v_sub_i32_e64 v3, s[4:5], v3, v4
+; CHECK-NEXT:    v_cndmask_b32_e32 v3, v3, v5, vcc
+; CHECK-NEXT:    v_mul_hi_u32 v3, v3, v0
+; CHECK-NEXT:    v_mul_lo_u32 v4, v3, v2
+; CHECK-NEXT:    v_add_i32_e32 v5, vcc, 1, v3
+; CHECK-NEXT:    v_subrev_i32_e32 v6, vcc, 1, v3
+; CHECK-NEXT:    v_sub_i32_e32 v7, vcc, v0, v4
+; CHECK-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v4
+; CHECK-NEXT:    v_cmp_ge_u32_e64 s[4:5], v7, v2
+; CHECK-NEXT:    s_and_b64 s[4:5], s[4:5], vcc
+; CHECK-NEXT:    v_cndmask_b32_e64 v0, v5, v3, s[4:5]
+; CHECK-NEXT:    v_cndmask_b32_e32 v0, v6, v0, vcc
+; CHECK-NEXT:    v_xor_b32_e32 v1, v1, v1
+; CHECK-NEXT:    v_xor_b32_e32 v0, v0, v1
+; CHECK-NEXT:    v_sub_i32_e32 v0, vcc, v0, v1
+; CHECK-NEXT:    s_setpc_b64 s[30:31]
+  %result = sdiv i32 %num, 4096
+  ret i32 %result
+}
+
+define <2 x i32> @v_sdiv_v2i32_pow2k_denom(<2 x i32> %num) {
+; CHECK-LABEL: v_sdiv_v2i32_pow2k_denom:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT:    s_movk_i32 s4, 0x1000
+; CHECK-NEXT:    v_ashrrev_i32_e32 v2, 31, v0
+; CHECK-NEXT:    v_mov_b32_e32 v3, 0x1000
+; CHECK-NEXT:    v_ashrrev_i32_e32 v4, 31, v1
+; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
+; CHECK-NEXT:    v_add_i32_e32 v5, vcc, s4, v2
+; CHECK-NEXT:    v_xor_b32_e32 v6, v2, v2
+; CHECK-NEXT:    v_add_i32_e32 v1, vcc, v1, v4
+; CHECK-NEXT:    v_add_i32_e32 v3, vcc, v3, v4
+; CHECK-NEXT:    v_xor_b32_e32 v7, v4, v4
+; CHECK-NEXT:    v_xor_b32_e32 v0, v0, v2
+; CHECK-NEXT:    v_xor_b32_e32 v2, v5, v2
+; CHECK-NEXT:    v_xor_b32_e32 v1, v1, v4
+; CHECK-NEXT:    v_xor_b32_e32 v3, v3, v4
+; CHECK-NEXT:    v_cvt_f32_u32_e32 v4, v2
+; CHECK-NEXT:    v_cvt_f32_u32_e32 v5, v3
+; CHECK-NEXT:    v_rcp_iflag_f32_e32 v4, v4
+; CHECK-NEXT:    v_rcp_iflag_f32_e32 v5, v5
+; CHECK-NEXT:    v_mul_f32_e32 v4, 0x4f800000, v4
+; CHECK-NEXT:    v_mul_f32_e32 v5, 0x4f800000, v5
+; CHECK-NEXT:    v_cvt_u32_f32_e32 v4, v4
+; CHECK-NEXT:    v_cvt_u32_f32_e32 v5, v5
+; CHECK-NEXT:    v_mul_lo_u32 v8, v4, v2
+; CHECK-NEXT:    v_mul_hi_u32 v9, v4, v2
+; CHECK-NEXT:    v_mul_lo_u32 v10, v5, v3
+; CHECK-NEXT:    v_mul_hi_u32 v11, v5, v3
+; CHECK-NEXT:    v_sub_i32_e32 v12, vcc, 0, v8
+; CHECK-NEXT:    v_sub_i32_e32 v13, vcc, 0, v10
+; CHECK-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v9
+; CHECK-NEXT:    v_cndmask_b32_e32 v8, v8, v12, vcc
+; CHECK-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v11
+; CHECK-NEXT:    v_cndmask_b32_e64 v9, v10, v13, s[4:5]
+; CHECK-NEXT:    v_mul_hi_u32 v8, v8, v4
+; CHECK-NEXT:    v_mul_hi_u32 v9, v9, v5
+; CHECK-NEXT:    v_add_i32_e64 v10, s[6:7], v4, v8
+; CHECK-NEXT:    v_sub_i32_e64 v4, s[6:7], v4, v8
+; CHECK-NEXT:    v_add_i32_e64 v8, s[6:7], v5, v9
+; CHECK-NEXT:    v_sub_i32_e64 v5, s[6:7], v5, v9
+; CHECK-NEXT:    v_cndmask_b32_e32 v4, v4, v10, vcc
+; CHECK-NEXT:    v_cndmask_b32_e64 v5, v5, v8, s[4:5]
+; CHECK-NEXT:    v_mul_hi_u32 v4, v4, v0
+; CHECK-NEXT:    v_mul_hi_u32 v5, v5, v1
+; CHECK-NEXT:    v_mul_lo_u32 v8, v4, v2
+; CHECK-NEXT:    v_add_i32_e32 v9, vcc, 1, v4
+; CHECK-NEXT:    v_subrev_i32_e32 v10, vcc, 1, v4
+; CHECK-NEXT:    v_mul_lo_u32 v11, v5, v3
+; CHECK-NEXT:    v_add_i32_e32 v12, vcc, 1, v5
+; CHECK-NEXT:    v_subrev_i32_e32 v13, vcc, 1, v5
+; CHECK-NEXT:    v_sub_i32_e32 v14, vcc, v0, v8
+; CHECK-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v8
+; CHECK-NEXT:    v_sub_i32_e64 v0, s[4:5], v1, v11
+; CHECK-NEXT:    v_cmp_ge_u32_e64 s[4:5], v1, v11
+; CHECK-NEXT:    v_cmp_ge_u32_e64 s[6:7], v14, v2
+; CHECK-NEXT:    v_cmp_ge_u32_e64 s[8:9], v0, v3
+; CHECK-NEXT:    s_and_b64 s[6:7], s[6:7], vcc
+; CHECK-NEXT:    v_cndmask_b32_e64 v0, v9, v4, s[6:7]
+; CHECK-NEXT:    s_and_b64 s[6:7], s[8:9], s[4:5]
+; CHECK-NEXT:    v_cndmask_b32_e64 v1, v12, v5, s[6:7]
+; CHECK-NEXT:    v_cndmask_b32_e32 v0, v10, v0, vcc
+; CHECK-NEXT:    v_cndmask_b32_e64 v1, v13, v1, s[4:5]
+; CHECK-NEXT:    v_xor_b32_e32 v0, v0, v6
+; CHECK-NEXT:    v_xor_b32_e32 v1, v1, v7
+; CHECK-NEXT:    v_sub_i32_e32 v0, vcc, v0, v6
+; CHECK-NEXT:    v_sub_i32_e32 v1, vcc, v1, v7
+; CHECK-NEXT:    s_setpc_b64 s[30:31]
+  %result = sdiv <2 x i32> %num, <i32 4096, i32 4096>
+  ret <2 x i32> %result
+}
+
+define i32 @v_sdiv_i32_oddk_denom(i32 %num) {
+; CHECK-LABEL: v_sdiv_i32_oddk_denom:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
+; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v1
+; CHECK-NEXT:    v_add_i32_e32 v2, vcc, 0x12d8fb, v1
+; CHECK-NEXT:    v_xor_b32_e32 v0, v0, v1
+; CHECK-NEXT:    v_xor_b32_e32 v2, v2, v1
+; CHECK-NEXT:    v_cvt_f32_u32_e32 v3, v2
+; CHECK-NEXT:    v_rcp_iflag_f32_e32 v3, v3
+; CHECK-NEXT:    v_mul_f32_e32 v3, 0x4f800000, v3
+; CHECK-NEXT:    v_cvt_u32_f32_e32 v3, v3
+; CHECK-NEXT:    v_mul_lo_u32 v4, v3, v2
+; CHECK-NEXT:    v_mul_hi_u32 v5, v3, v2
+; CHECK-NEXT:    v_sub_i32_e32 v6, vcc, 0, v4
+; CHECK-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v5
+; CHECK-NEXT:    v_cndmask_b32_e32 v4, v4, v6, vcc
+; CHECK-NEXT:    v_mul_hi_u32 v4, v4, v3
+; CHECK-NEXT:    v_add_i32_e64 v5, s[4:5], v3, v4
+; CHECK-NEXT:    v_sub_i32_e64 v3, s[4:5], v3, v4
+; CHECK-NEXT:    v_cndmask_b32_e32 v3, v3, v5, vcc
+; CHECK-NEXT:    v_mul_hi_u32 v3, v3, v0
+; CHECK-NEXT:    v_mul_lo_u32 v4, v3, v2
+; CHECK-NEXT:    v_add_i32_e32 v5, vcc, 1, v3
+; CHECK-NEXT:    v_subrev_i32_e32 v6, vcc, 1, v3
+; CHECK-NEXT:    v_sub_i32_e32 v7, vcc, v0, v4
+; CHECK-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v4
+; CHECK-NEXT:    v_cmp_ge_u32_e64 s[4:5], v7, v2
+; CHECK-NEXT:    s_and_b64 s[4:5], s[4:5], vcc
+; CHECK-NEXT:    v_cndmask_b32_e64 v0, v5, v3, s[4:5]
+; CHECK-NEXT:    v_cndmask_b32_e32 v0, v6, v0, vcc
+; CHECK-NEXT:    v_xor_b32_e32 v1, v1, v1
+; CHECK-NEXT:    v_xor_b32_e32 v0, v0, v1
+; CHECK-NEXT:    v_sub_i32_e32 v0, vcc, v0, v1
+; CHECK-NEXT:    s_setpc_b64 s[30:31]
+  %result = sdiv i32 %num, 1235195
+  ret i32 %result
+}
+
+define <2 x i32> @v_sdiv_v2i32_oddk_denom(<2 x i32> %num) {
+; CHECK-LABEL: v_sdiv_v2i32_oddk_denom:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT:    s_mov_b32 s4, 0x12d8fb
+; CHECK-NEXT:    v_ashrrev_i32_e32 v2, 31, v0
+; CHECK-NEXT:    v_mov_b32_e32 v3, 0x12d8fb
+; CHECK-NEXT:    v_ashrrev_i32_e32 v4, 31, v1
+; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
+; CHECK-NEXT:    v_add_i32_e32 v5, vcc, s4, v2
+; CHECK-NEXT:    v_xor_b32_e32 v6, v2, v2
+; CHECK-NEXT:    v_add_i32_e32 v1, vcc, v1, v4
+; CHECK-NEXT:    v_add_i32_e32 v3, vcc, v3, v4
+; CHECK-NEXT:    v_xor_b32_e32 v7, v4, v4
+; CHECK-NEXT:    v_xor_b32_e32 v0, v0, v2
+; CHECK-NEXT:    v_xor_b32_e32 v2, v5, v2
+; CHECK-NEXT:    v_xor_b32_e32 v1, v1, v4
+; CHECK-NEXT:    v_xor_b32_e32 v3, v3, v4
+; CHECK-NEXT:    v_cvt_f32_u32_e32 v4, v2
+; CHECK-NEXT:    v_cvt_f32_u32_e32 v5, v3
+; CHECK-NEXT:    v_rcp_iflag_f32_e32 v4, v4
+; CHECK-NEXT:    v_rcp_iflag_f32_e32 v5, v5
+; CHECK-NEXT:    v_mul_f32_e32 v4, 0x4f800000, v4
+; CHECK-NEXT:    v_mul_f32_e32 v5, 0x4f800000, v5
+; CHECK-NEXT:    v_cvt_u32_f32_e32 v4, v4
+; CHECK-NEXT:    v_cvt_u32_f32_e32 v5, v5
+; CHECK-NEXT:    v_mul_lo_u32 v8, v4, v2
+; CHECK-NEXT:    v_mul_hi_u32 v9, v4, v2
+; CHECK-NEXT:    v_mul_lo_u32 v10, v5, v3
+; CHECK-NEXT:    v_mul_hi_u32 v11, v5, v3
+; CHECK-NEXT:    v_sub_i32_e32 v12, vcc, 0, v8
+; CHECK-NEXT:    v_sub_i32_e32 v13, vcc, 0, v10
+; CHECK-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v9
+; CHECK-NEXT:    v_cndmask_b32_e32 v8, v8, v12, vcc
+; CHECK-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v11
+; CHECK-NEXT:    v_cndmask_b32_e64 v9, v10, v13, s[4:5]
+; CHECK-NEXT:    v_mul_hi_u32 v8, v8, v4
+; CHECK-NEXT:    v_mul_hi_u32 v9, v9, v5
+; CHECK-NEXT:    v_add_i32_e64 v10, s[6:7], v4, v8
+; CHECK-NEXT:    v_sub_i32_e64 v4, s[6:7], v4, v8
+; CHECK-NEXT:    v_add_i32_e64 v8, s[6:7], v5, v9
+; CHECK-NEXT:    v_sub_i32_e64 v5, s[6:7], v5, v9
+; CHECK-NEXT:    v_cndmask_b32_e32 v4, v4, v10, vcc
+; CHECK-NEXT:    v_cndmask_b32_e64 v5, v5, v8, s[4:5]
+; CHECK-NEXT:    v_mul_hi_u32 v4, v4, v0
+; CHECK-NEXT:    v_mul_hi_u32 v5, v5, v1
+; CHECK-NEXT:    v_mul_lo_u32 v8, v4, v2
+; CHECK-NEXT:    v_add_i32_e32 v9, vcc, 1, v4
+; CHECK-NEXT:    v_subrev_i32_e32 v10, vcc, 1, v4
+; CHECK-NEXT:    v_mul_lo_u32 v11, v5, v3
+; CHECK-NEXT:    v_add_i32_e32 v12, vcc, 1, v5
+; CHECK-NEXT:    v_subrev_i32_e32 v13, vcc, 1, v5
+; CHECK-NEXT:    v_sub_i32_e32 v14, vcc, v0, v8
+; CHECK-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v8
+; CHECK-NEXT:    v_sub_i32_e64 v0, s[4:5], v1, v11
+; CHECK-NEXT:    v_cmp_ge_u32_e64 s[4:5], v1, v11
+; CHECK-NEXT:    v_cmp_ge_u32_e64 s[6:7], v14, v2
+; CHECK-NEXT:    v_cmp_ge_u32_e64 s[8:9], v0, v3
+; CHECK-NEXT:    s_and_b64 s[6:7], s[6:7], vcc
+; CHECK-NEXT:    v_cndmask_b32_e64 v0, v9, v4, s[6:7]
+; CHECK-NEXT:    s_and_b64 s[6:7], s[8:9], s[4:5]
+; CHECK-NEXT:    v_cndmask_b32_e64 v1, v12, v5, s[6:7]
+; CHECK-NEXT:    v_cndmask_b32_e32 v0, v10, v0, vcc
+; CHECK-NEXT:    v_cndmask_b32_e64 v1, v13, v1, s[4:5]
+; CHECK-NEXT:    v_xor_b32_e32 v0, v0, v6
+; CHECK-NEXT:    v_xor_b32_e32 v1, v1, v7
+; CHECK-NEXT:    v_sub_i32_e32 v0, vcc, v0, v6
+; CHECK-NEXT:    v_sub_i32_e32 v1, vcc, v1, v7
+; CHECK-NEXT:    s_setpc_b64 s[30:31]
+  %result = sdiv <2 x i32> %num, <i32 1235195, i32 1235195>
+  ret <2 x i32> %result
+}
+
+define i32 @v_sdiv_i32_pow2_shl_denom(i32 %x, i32 %y) {
+; CHECK-LABEL: v_sdiv_i32_pow2_shl_denom:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT:    v_lshl_b32_e32 v1, 0x1000, v1
+; CHECK-NEXT:    v_ashrrev_i32_e32 v2, 31, v0
+; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
+; CHECK-NEXT:    v_add_i32_e32 v1, vcc, v1, v2
+; CHECK-NEXT:    v_xor_b32_e32 v0, v0, v2
+; CHECK-NEXT:    v_xor_b32_e32 v1, v1, v2
+; CHECK-NEXT:    v_cvt_f32_u32_e32 v3, v1
+; CHECK-NEXT:    v_rcp_iflag_f32_e32 v3, v3
+; CHECK-NEXT:    v_mul_f32_e32 v3, 0x4f800000, v3
+; CHECK-NEXT:    v_cvt_u32_f32_e32 v3, v3
+; CHECK-NEXT:    v_mul_lo_u32 v4, v3, v1
+; CHECK-NEXT:    v_mul_hi_u32 v5, v3, v1
+; CHECK-NEXT:    v_sub_i32_e32 v6, vcc, 0, v4
+; CHECK-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v5
+; CHECK-NEXT:    v_cndmask_b32_e32 v4, v4, v6, vcc
+; CHECK-NEXT:    v_mul_hi_u32 v4, v4, v3
+; CHECK-NEXT:    v_add_i32_e64 v5, s[4:5], v3, v4
+; CHECK-NEXT:    v_sub_i32_e64 v3, s[4:5], v3, v4
+; CHECK-NEXT:    v_cndmask_b32_e32 v3, v3, v5, vcc
+; CHECK-NEXT:    v_mul_hi_u32 v3, v3, v0
+; CHECK-NEXT:    v_mul_lo_u32 v4, v3, v1
+; CHECK-NEXT:    v_add_i32_e32 v5, vcc, 1, v3
+; CHECK-NEXT:    v_subrev_i32_e32 v6, vcc, 1, v3
+; CHECK-NEXT:    v_sub_i32_e32 v7, vcc, v0, v4
+; CHECK-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v4
+; CHECK-NEXT:    v_cmp_ge_u32_e64 s[4:5], v7, v1
+; CHECK-NEXT:    s_and_b64 s[4:5], s[4:5], vcc
+; CHECK-NEXT:    v_cndmask_b32_e64 v0, v5, v3, s[4:5]
+; CHECK-NEXT:    v_cndmask_b32_e32 v0, v6, v0, vcc
+; CHECK-NEXT:    v_xor_b32_e32 v1, v2, v2
+; CHECK-NEXT:    v_xor_b32_e32 v0, v0, v1
+; CHECK-NEXT:    v_sub_i32_e32 v0, vcc, v0, v1
+; CHECK-NEXT:    s_setpc_b64 s[30:31]
+  %shl.y = shl i32 4096, %y
+  %r = sdiv i32 %x, %shl.y
+  ret i32 %r
+}
+
+define <2 x i32> @v_sdiv_v2i32_pow2_shl_denom(<2 x i32> %x, <2 x i32> %y) {
+; GISEL-LABEL: v_sdiv_v2i32_pow2_shl_denom:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-NEXT:    s_movk_i32 s4, 0x1000
+; GISEL-NEXT:    v_ashrrev_i32_e32 v4, 31, v0
+; GISEL-NEXT:    v_ashrrev_i32_e32 v5, 31, v1
+; GISEL-NEXT:    v_lshl_b32_e32 v2, s4, v2
+; GISEL-NEXT:    v_lshl_b32_e32 v3, s4, v3
+; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v4
+; GISEL-NEXT:    v_xor_b32_e32 v6, v4, v4
+; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v1, v5
+; GISEL-NEXT:    v_xor_b32_e32 v7, v5, v5
+; GISEL-NEXT:    v_add_i32_e32 v2, vcc, v2, v4
+; GISEL-NEXT:    v_xor_b32_e32 v0, v0, v4
+; GISEL-NEXT:    v_add_i32_e32 v3, vcc, v3, v5
+; GISEL-NEXT:    v_xor_b32_e32 v1, v1, v5
+; GISEL-NEXT:    v_xor_b32_e32 v2, v2, v4
+; GISEL-NEXT:    v_xor_b32_e32 v3, v3, v5
+; GISEL-NEXT:    v_cvt_f32_u32_e32 v4, v2
+; GISEL-NEXT:    v_cvt_f32_u32_e32 v5, v3
+; GISEL-NEXT:    v_rcp_iflag_f32_e32 v4, v4
+; GISEL-NEXT:    v_rcp_iflag_f32_e32 v5, v5
+; GISEL-NEXT:    v_mul_f32_e32 v4, 0x4f800000, v4
+; GISEL-NEXT:    v_mul_f32_e32 v5, 0x4f800000, v5
+; GISEL-NEXT:    v_cvt_u32_f32_e32 v4, v4
+; GISEL-NEXT:    v_cvt_u32_f32_e32 v5, v5
+; GISEL-NEXT:    v_mul_lo_u32 v8, v4, v2
+; GISEL-NEXT:    v_mul_hi_u32 v9, v4, v2
+; GISEL-NEXT:    v_mul_lo_u32 v10, v5, v3
+; GISEL-NEXT:    v_mul_hi_u32 v11, v5, v3
+; GISEL-NEXT:    v_sub_i32_e32 v12, vcc, 0, v8
+; GISEL-NEXT:    v_sub_i32_e32 v13, vcc, 0, v10
+; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v9
+; GISEL-NEXT:    v_cndmask_b32_e32 v8, v8, v12, vcc
+; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v11
+; GISEL-NEXT:    v_cndmask_b32_e64 v9, v10, v13, s[4:5]
+; GISEL-NEXT:    v_mul_hi_u32 v8, v8, v4
+; GISEL-NEXT:    v_mul_hi_u32 v9, v9, v5
+; GISEL-NEXT:    v_add_i32_e64 v10, s[6:7], v4, v8
+; GISEL-NEXT:    v_sub_i32_e64 v4, s[6:7], v4, v8
+; GISEL-NEXT:    v_add_i32_e64 v8, s[6:7], v5, v9
+; GISEL-NEXT:    v_sub_i32_e64 v5, s[6:7], v5, v9
+; GISEL-NEXT:    v_cndmask_b32_e32 v4, v4, v10, vcc
+; GISEL-NEXT:    v_cndmask_b32_e64 v5, v5, v8, s[4:5]
+; GISEL-NEXT:    v_mul_hi_u32 v4, v4, v0
+; GISEL-NEXT:    v_mul_hi_u32 v5, v5, v1
+; GISEL-NEXT:    v_mul_lo_u32 v8, v4, v2
+; GISEL-NEXT:    v_add_i32_e32 v9, vcc, 1, v4
+; GISEL-NEXT:    v_subrev_i32_e32 v10, vcc, 1, v4
+; GISEL-NEXT:    v_mul_lo_u32 v11, v5, v3
+; GISEL-NEXT:    v_add_i32_e32 v12, vcc, 1, v5
+; GISEL-NEXT:    v_subrev_i32_e32 v13, vcc, 1, v5
+; GISEL-NEXT:    v_sub_i32_e32 v14, vcc, v0, v8
+; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v8
+; GISEL-NEXT:    v_sub_i32_e64 v0, s[4:5], v1, v11
+; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v1, v11
+; GISEL-NEXT:    v_cmp_ge_u32_e64 s[6:7], v14, v2
+; GISEL-NEXT:    v_cmp_ge_u32_e64 s[8:9], v0, v3
+; GISEL-NEXT:    s_and_b64 s[6:7], s[6:7], vcc
+; GISEL-NEXT:    v_cndmask_b32_e64 v0, v9, v4, s[6:7]
+; GISEL-NEXT:    s_and_b64 s[6:7], s[8:9], s[4:5]
+; GISEL-NEXT:    v_cndmask_b32_e64 v1, v12, v5, s[6:7]
+; GISEL-NEXT:    v_cndmask_b32_e32 v0, v10, v0, vcc
+; GISEL-NEXT:    v_cndmask_b32_e64 v1, v13, v1, s[4:5]
+; GISEL-NEXT:    v_xor_b32_e32 v0, v0, v6
+; GISEL-NEXT:    v_xor_b32_e32 v1, v1, v7
+; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v6
+; GISEL-NEXT:    v_sub_i32_e32 v1, vcc, v1, v7
+; GISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; CGP-LABEL: v_sdiv_v2i32_pow2_shl_denom:
+; CGP:       ; %bb.0:
+; CGP-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CGP-NEXT:    s_movk_i32 s4, 0x1000
+; CGP-NEXT:    v_ashrrev_i32_e32 v4, 31, v0
+; CGP-NEXT:    v_ashrrev_i32_e32 v5, 31, v1
+; CGP-NEXT:    v_lshl_b32_e32 v2, s4, v2
+; CGP-NEXT:    v_lshl_b32_e32 v3, s4, v3
+; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v4
+; CGP-NEXT:    v_add_i32_e32 v1, vcc, v1, v5
+; CGP-NEXT:    v_ashrrev_i32_e32 v6, 31, v2
+; CGP-NEXT:    v_xor_b32_e32 v0, v0, v4
+; CGP-NEXT:    v_ashrrev_i32_e32 v7, 31, v3
+; CGP-NEXT:    v_xor_b32_e32 v1, v1, v5
+; CGP-NEXT:    v_xor_b32_e32 v4, v4, v6
+; CGP-NEXT:    v_add_i32_e32 v2, vcc, v2, v6
+; CGP-NEXT:    v_mul_lo_u32 v8, 0, v0
+; CGP-NEXT:    v_xor_b32_e32 v5, v5, v7
+; CGP-NEXT:    v_add_i32_e32 v3, vcc, v3, v7
+; CGP-NEXT:    v_mul_lo_u32 v9, 0, v1
+; CGP-NEXT:    v_xor_b32_e32 v2, v2, v6
+; CGP-NEXT:    v_xor_b32_e32 v3, v3, v7
+; CGP-NEXT:    v_cvt_f32_u32_e32 v6, v2
+; CGP-NEXT:    v_mul_lo_u32 v7, 0, v2
+; CGP-NEXT:    v_cvt_f32_u32_e32 v10, v3
+; CGP-NEXT:    v_mul_lo_u32 v11, 0, v3
+; CGP-NEXT:    v_rcp_f32_e32 v6, v6
+; CGP-NEXT:    v_rcp_f32_e32 v10, v10
+; CGP-NEXT:    v_mul_f32_e32 v6, 0x4f800000, v6
+; CGP-NEXT:    v_mul_f32_e32 v10, 0x4f800000, v10
+; CGP-NEXT:    v_cvt_u32_f32_e32 v6, v6
+; CGP-NEXT:    v_cvt_u32_f32_e32 v10, v10
+; CGP-NEXT:    v_mul_lo_u32 v12, v6, v2
+; CGP-NEXT:    v_mul_lo_u32 v13, v6, 0
+; CGP-NEXT:    v_mul_hi_u32 v14, v6, v2
+; CGP-NEXT:    v_mul_lo_u32 v15, v10, v3
+; CGP-NEXT:    v_mul_lo_u32 v16, v10, 0
+; CGP-NEXT:    v_mul_hi_u32 v17, v10, v3
+; CGP-NEXT:    v_add_i32_e32 v7, vcc, v7, v13
+; CGP-NEXT:    v_sub_i32_e32 v18, vcc, 0, v12
+; CGP-NEXT:    v_add_i32_e32 v11, vcc, v11, v16
+; CGP-NEXT:    v_sub_i32_e32 v19, vcc, 0, v15
+; CGP-NEXT:    v_add_i32_e32 v7, vcc, v7, v14
+; CGP-NEXT:    v_add_i32_e32 v11, vcc, v11, v17
+; CGP-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v7
+; CGP-NEXT:    v_cndmask_b32_e32 v7, v12, v18, vcc
+; CGP-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v11
+; CGP-NEXT:    v_cndmask_b32_e64 v11, v15, v19, s[4:5]
+; CGP-NEXT:    v_mul_lo_u32 v12, v7, 0
+; CGP-NEXT:    v_mul_hi_u32 v7, v7, v6
+; CGP-NEXT:    v_mul_lo_u32 v14, v11, 0
+; CGP-NEXT:    v_mul_hi_u32 v11, v11, v10
+; CGP-NEXT:    v_add_i32_e64 v12, s[6:7], v13, v12
+; CGP-NEXT:    v_add_i32_e64 v13, s[6:7], v16, v14
+; CGP-NEXT:    v_add_i32_e64 v7, s[6:7], v12, v7
+; CGP-NEXT:    v_add_i32_e64 v11, s[6:7], v13, v11
+; CGP-NEXT:    v_add_i32_e64 v12, s[6:7], v6, v7
+; CGP-NEXT:    v_sub_i32_e64 v6, s[6:7], v6, v7
+; CGP-NEXT:    v_add_i32_e64 v7, s[6:7], v10, v11
+; CGP-NEXT:    v_sub_i32_e64 v10, s[6:7], v10, v11
+; CGP-NEXT:    v_cndmask_b32_e32 v6, v6, v12, vcc
+; CGP-NEXT:    v_cndmask_b32_e64 v7, v10, v7, s[4:5]
+; CGP-NEXT:    v_mul_lo_u32 v10, v6, 0
+; CGP-NEXT:    v_mul_hi_u32 v6, v6, v0
+; CGP-NEXT:    v_mul_lo_u32 v11, v7, 0
+; CGP-NEXT:    v_mul_hi_u32 v7, v7, v1
+; CGP-NEXT:    v_add_i32_e32 v8, vcc, v8, v10
+; CGP-NEXT:    v_add_i32_e32 v9, vcc, v9, v11
+; CGP-NEXT:    v_add_i32_e32 v6, vcc, v8, v6
+; CGP-NEXT:    v_add_i32_e32 v7, vcc, v9, v7
+; CGP-NEXT:    v_mul_lo_u32 v8, v6, v2
+; CGP-NEXT:    v_add_i32_e32 v9, vcc, 1, v6
+; CGP-NEXT:    v_subrev_i32_e32 v10, vcc, 1, v6
+; CGP-NEXT:    v_mul_lo_u32 v11, v7, v3
+; CGP-NEXT:    v_add_i32_e32 v12, vcc, 1, v7
+; CGP-NEXT:    v_subrev_i32_e32 v13, vcc, 1, v7
+; CGP-NEXT:    v_sub_i32_e32 v14, vcc, v0, v8
+; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v8
+; CGP-NEXT:    v_sub_i32_e64 v0, s[4:5], v1, v11
+; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v1, v11
+; CGP-NEXT:    v_cmp_ge_u32_e64 s[6:7], v14, v2
+; CGP-NEXT:    v_cmp_ge_u32_e64 s[8:9], v0, v3
+; CGP-NEXT:    s_and_b64 s[6:7], s[6:7], vcc
+; CGP-NEXT:    v_cndmask_b32_e64 v0, v6, v9, s[6:7]
+; CGP-NEXT:    s_and_b64 s[6:7], s[8:9], s[4:5]
+; CGP-NEXT:    v_cndmask_b32_e64 v1, v7, v12, s[6:7]
+; CGP-NEXT:    v_cndmask_b32_e32 v0, v10, v0, vcc
+; CGP-NEXT:    v_cndmask_b32_e64 v1, v13, v1, s[4:5]
+; CGP-NEXT:    v_xor_b32_e32 v0, v0, v4
+; CGP-NEXT:    v_xor_b32_e32 v1, v1, v5
+; CGP-NEXT:    v_sub_i32_e32 v0, vcc, v0, v4
+; CGP-NEXT:    v_sub_i32_e32 v1, vcc, v1, v5
+; CGP-NEXT:    s_setpc_b64 s[30:31]
+  %shl.y = shl <2 x i32> <i32 4096, i32 4096>, %y
+  %r = sdiv <2 x i32> %x, %shl.y
+  ret <2 x i32> %r
+}
+
+define i32 @v_sdiv_i32_24bit(i32 %num, i32 %den) {
+; GISEL-LABEL: v_sdiv_i32_24bit:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-NEXT:    s_mov_b32 s4, 0xffffff
+; GISEL-NEXT:    v_and_b32_e32 v0, s4, v0
+; GISEL-NEXT:    v_and_b32_e32 v1, s4, v1
+; GISEL-NEXT:    v_ashrrev_i32_e32 v2, 31, v0
+; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
+; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v1, v2
+; GISEL-NEXT:    v_xor_b32_e32 v0, v0, v2
+; GISEL-NEXT:    v_xor_b32_e32 v1, v1, v2
+; GISEL-NEXT:    v_cvt_f32_u32_e32 v3, v1
+; GISEL-NEXT:    v_rcp_iflag_f32_e32 v3, v3
+; GISEL-NEXT:    v_mul_f32_e32 v3, 0x4f800000, v3
+; GISEL-NEXT:    v_cvt_u32_f32_e32 v3, v3
+; GISEL-NEXT:    v_mul_lo_u32 v4, v3, v1
+; GISEL-NEXT:    v_mul_hi_u32 v5, v3, v1
+; GISEL-NEXT:    v_sub_i32_e32 v6, vcc, 0, v4
+; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v5
+; GISEL-NEXT:    v_cndmask_b32_e32 v4, v4, v6, vcc
+; GISEL-NEXT:    v_mul_hi_u32 v4, v4, v3
+; GISEL-NEXT:    v_add_i32_e64 v5, s[4:5], v3, v4
+; GISEL-NEXT:    v_sub_i32_e64 v3, s[4:5], v3, v4
+; GISEL-NEXT:    v_cndmask_b32_e32 v3, v3, v5, vcc
+; GISEL-NEXT:    v_mul_hi_u32 v3, v3, v0
+; GISEL-NEXT:    v_mul_lo_u32 v4, v3, v1
+; GISEL-NEXT:    v_add_i32_e32 v5, vcc, 1, v3
+; GISEL-NEXT:    v_subrev_i32_e32 v6, vcc, 1, v3
+; GISEL-NEXT:    v_sub_i32_e32 v7, vcc, v0, v4
+; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v4
+; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v7, v1
+; GISEL-NEXT:    s_and_b64 s[4:5], s[4:5], vcc
+; GISEL-NEXT:    v_cndmask_b32_e64 v0, v5, v3, s[4:5]
+; GISEL-NEXT:    v_cndmask_b32_e32 v0, v6, v0, vcc
+; GISEL-NEXT:    v_xor_b32_e32 v1, v2, v2
+; GISEL-NEXT:    v_xor_b32_e32 v0, v0, v1
+; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v1
+; GISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; CGP-LABEL: v_sdiv_i32_24bit:
+; CGP:       ; %bb.0:
+; CGP-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CGP-NEXT:    s_mov_b32 s4, 0xffffff
+; CGP-NEXT:    v_and_b32_e32 v0, s4, v0
+; CGP-NEXT:    v_and_b32_e32 v1, s4, v1
+; CGP-NEXT:    v_cvt_f32_u32_e32 v2, v1
+; CGP-NEXT:    v_mul_lo_u32 v3, 0, v1
+; CGP-NEXT:    v_mul_lo_u32 v4, 0, v0
+; CGP-NEXT:    v_rcp_f32_e32 v2, v2
+; CGP-NEXT:    v_mul_f32_e32 v2, 0x4f800000, v2
+; CGP-NEXT:    v_cvt_u32_f32_e32 v2, v2
+; CGP-NEXT:    v_mul_lo_u32 v5, v2, v1
+; CGP-NEXT:    v_mul_lo_u32 v6, v2, 0
+; CGP-NEXT:    v_mul_hi_u32 v7, v2, v1
+; CGP-NEXT:    v_add_i32_e32 v3, vcc, v3, v6
+; CGP-NEXT:    v_sub_i32_e32 v8, vcc, 0, v5
+; CGP-NEXT:    v_add_i32_e32 v3, vcc, v3, v7
+; CGP-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v3
+; CGP-NEXT:    v_cndmask_b32_e32 v3, v5, v8, vcc
+; CGP-NEXT:    v_mul_lo_u32 v5, v3, 0
+; CGP-NEXT:    v_mul_hi_u32 v3, v3, v2
+; CGP-NEXT:    v_add_i32_e64 v5, s[4:5], v6, v5
+; CGP-NEXT:    v_add_i32_e64 v3, s[4:5], v5, v3
+; CGP-NEXT:    v_add_i32_e64 v5, s[4:5], v2, v3
+; CGP-NEXT:    v_sub_i32_e64 v2, s[4:5], v2, v3
+; CGP-NEXT:    v_cndmask_b32_e32 v2, v2, v5, vcc
+; CGP-NEXT:    v_mul_lo_u32 v3, v2, 0
+; CGP-NEXT:    v_mul_hi_u32 v2, v2, v0
+; CGP-NEXT:    v_add_i32_e32 v3, vcc, v4, v3
+; CGP-NEXT:    v_add_i32_e32 v2, vcc, v3, v2
+; CGP-NEXT:    v_mul_lo_u32 v3, v2, v1
+; CGP-NEXT:    v_add_i32_e32 v4, vcc, 1, v2
+; CGP-NEXT:    v_subrev_i32_e32 v5, vcc, 1, v2
+; CGP-NEXT:    v_sub_i32_e32 v6, vcc, v0, v3
+; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v3
+; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v6, v1
+; CGP-NEXT:    s_and_b64 s[4:5], s[4:5], vcc
+; CGP-NEXT:    v_cndmask_b32_e64 v0, v2, v4, s[4:5]
+; CGP-NEXT:    v_cndmask_b32_e32 v0, v5, v0, vcc
+; CGP-NEXT:    s_setpc_b64 s[30:31]
+  %num.mask = and i32 %num, 16777215
+  %den.mask = and i32 %den, 16777215
+  %result = sdiv i32 %num.mask, %den.mask
+  ret i32 %result
+}
+
+define <2 x i32> @v_sdiv_v2i32_24bit(<2 x i32> %num, <2 x i32> %den) {
+; GISEL-LABEL: v_sdiv_v2i32_24bit:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-NEXT:    s_mov_b32 s4, 0xffffff
+; GISEL-NEXT:    v_and_b32_e32 v0, s4, v0
+; GISEL-NEXT:    v_and_b32_e32 v1, s4, v1
+; GISEL-NEXT:    v_and_b32_e32 v2, s4, v2
+; GISEL-NEXT:    v_and_b32_e32 v3, s4, v3
+; GISEL-NEXT:    v_ashrrev_i32_e32 v4, 31, v0
+; GISEL-NEXT:    v_ashrrev_i32_e32 v5, 31, v1
+; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v4
+; GISEL-NEXT:    v_add_i32_e32 v2, vcc, v2, v4
+; GISEL-NEXT:    v_xor_b32_e32 v6, v4, v4
+; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v1, v5
+; GISEL-NEXT:    v_add_i32_e32 v3, vcc, v3, v5
+; GISEL-NEXT:    v_xor_b32_e32 v7, v5, v5
+; GISEL-NEXT:    v_xor_b32_e32 v0, v0, v4
+; GISEL-NEXT:    v_xor_b32_e32 v2, v2, v4
+; GISEL-NEXT:    v_xor_b32_e32 v1, v1, v5
+; GISEL-NEXT:    v_xor_b32_e32 v3, v3, v5
+; GISEL-NEXT:    v_cvt_f32_u32_e32 v4, v2
+; GISEL-NEXT:    v_cvt_f32_u32_e32 v5, v3
+; GISEL-NEXT:    v_rcp_iflag_f32_e32 v4, v4
+; GISEL-NEXT:    v_rcp_iflag_f32_e32 v5, v5
+; GISEL-NEXT:    v_mul_f32_e32 v4, 0x4f800000, v4
+; GISEL-NEXT:    v_mul_f32_e32 v5, 0x4f800000, v5
+; GISEL-NEXT:    v_cvt_u32_f32_e32 v4, v4
+; GISEL-NEXT:    v_cvt_u32_f32_e32 v5, v5
+; GISEL-NEXT:    v_mul_lo_u32 v8, v4, v2
+; GISEL-NEXT:    v_mul_hi_u32 v9, v4, v2
+; GISEL-NEXT:    v_mul_lo_u32 v10, v5, v3
+; GISEL-NEXT:    v_mul_hi_u32 v11, v5, v3
+; GISEL-NEXT:    v_sub_i32_e32 v12, vcc, 0, v8
+; GISEL-NEXT:    v_sub_i32_e32 v13, vcc, 0, v10
+; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v9
+; GISEL-NEXT:    v_cndmask_b32_e32 v8, v8, v12, vcc
+; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v11
+; GISEL-NEXT:    v_cndmask_b32_e64 v9, v10, v13, s[4:5]
+; GISEL-NEXT:    v_mul_hi_u32 v8, v8, v4
+; GISEL-NEXT:    v_mul_hi_u32 v9, v9, v5
+; GISEL-NEXT:    v_add_i32_e64 v10, s[6:7], v4, v8
+; GISEL-NEXT:    v_sub_i32_e64 v4, s[6:7], v4, v8
+; GISEL-NEXT:    v_add_i32_e64 v8, s[6:7], v5, v9
+; GISEL-NEXT:    v_sub_i32_e64 v5, s[6:7], v5, v9
+; GISEL-NEXT:    v_cndmask_b32_e32 v4, v4, v10, vcc
+; GISEL-NEXT:    v_cndmask_b32_e64 v5, v5, v8, s[4:5]
+; GISEL-NEXT:    v_mul_hi_u32 v4, v4, v0
+; GISEL-NEXT:    v_mul_hi_u32 v5, v5, v1
+; GISEL-NEXT:    v_mul_lo_u32 v8, v4, v2
+; GISEL-NEXT:    v_add_i32_e32 v9, vcc, 1, v4
+; GISEL-NEXT:    v_subrev_i32_e32 v10, vcc, 1, v4
+; GISEL-NEXT:    v_mul_lo_u32 v11, v5, v3
+; GISEL-NEXT:    v_add_i32_e32 v12, vcc, 1, v5
+; GISEL-NEXT:    v_subrev_i32_e32 v13, vcc, 1, v5
+; GISEL-NEXT:    v_sub_i32_e32 v14, vcc, v0, v8
+; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v8
+; GISEL-NEXT:    v_sub_i32_e64 v0, s[4:5], v1, v11
+; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v1, v11
+; GISEL-NEXT:    v_cmp_ge_u32_e64 s[6:7], v14, v2
+; GISEL-NEXT:    v_cmp_ge_u32_e64 s[8:9], v0, v3
+; GISEL-NEXT:    s_and_b64 s[6:7], s[6:7], vcc
+; GISEL-NEXT:    v_cndmask_b32_e64 v0, v9, v4, s[6:7]
+; GISEL-NEXT:    s_and_b64 s[6:7], s[8:9], s[4:5]
+; GISEL-NEXT:    v_cndmask_b32_e64 v1, v12, v5, s[6:7]
+; GISEL-NEXT:    v_cndmask_b32_e32 v0, v10, v0, vcc
+; GISEL-NEXT:    v_cndmask_b32_e64 v1, v13, v1, s[4:5]
+; GISEL-NEXT:    v_xor_b32_e32 v0, v0, v6
+; GISEL-NEXT:    v_xor_b32_e32 v1, v1, v7
+; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v6
+; GISEL-NEXT:    v_sub_i32_e32 v1, vcc, v1, v7
+; GISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; CGP-LABEL: v_sdiv_v2i32_24bit:
+; CGP:       ; %bb.0:
+; CGP-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CGP-NEXT:    s_mov_b32 s4, 0xffffff
+; CGP-NEXT:    v_and_b32_e32 v0, s4, v0
+; CGP-NEXT:    v_and_b32_e32 v1, s4, v1
+; CGP-NEXT:    v_and_b32_e32 v2, s4, v2
+; CGP-NEXT:    v_and_b32_e32 v3, s4, v3
+; CGP-NEXT:    v_cvt_f32_u32_e32 v4, v2
+; CGP-NEXT:    v_mul_lo_u32 v5, 0, v2
+; CGP-NEXT:    v_mul_lo_u32 v6, 0, v0
+; CGP-NEXT:    v_cvt_f32_u32_e32 v7, v3
+; CGP-NEXT:    v_mul_lo_u32 v8, 0, v3
+; CGP-NEXT:    v_mul_lo_u32 v9, 0, v1
+; CGP-NEXT:    v_rcp_f32_e32 v4, v4
+; CGP-NEXT:    v_rcp_f32_e32 v7, v7
+; CGP-NEXT:    v_mul_f32_e32 v4, 0x4f800000, v4
+; CGP-NEXT:    v_mul_f32_e32 v7, 0x4f800000, v7
+; CGP-NEXT:    v_cvt_u32_f32_e32 v4, v4
+; CGP-NEXT:    v_cvt_u32_f32_e32 v7, v7
+; CGP-NEXT:    v_mul_lo_u32 v10, v4, v2
+; CGP-NEXT:    v_mul_lo_u32 v11, v4, 0
+; CGP-NEXT:    v_mul_hi_u32 v12, v4, v2
+; CGP-NEXT:    v_mul_lo_u32 v13, v7, v3
+; CGP-NEXT:    v_mul_lo_u32 v14, v7, 0
+; CGP-NEXT:    v_mul_hi_u32 v15, v7, v3
+; CGP-NEXT:    v_add_i32_e32 v5, vcc, v5, v11
+; CGP-NEXT:    v_sub_i32_e32 v16, vcc, 0, v10
+; CGP-NEXT:    v_add_i32_e32 v8, vcc, v8, v14
+; CGP-NEXT:    v_sub_i32_e32 v17, vcc, 0, v13
+; CGP-NEXT:    v_add_i32_e32 v5, vcc, v5, v12
+; CGP-NEXT:    v_add_i32_e32 v8, vcc, v8, v15
+; CGP-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v5
+; CGP-NEXT:    v_cndmask_b32_e32 v5, v10, v16, vcc
+; CGP-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v8
+; CGP-NEXT:    v_cndmask_b32_e64 v8, v13, v17, s[4:5]
+; CGP-NEXT:    v_mul_lo_u32 v10, v5, 0
+; CGP-NEXT:    v_mul_hi_u32 v5, v5, v4
+; CGP-NEXT:    v_mul_lo_u32 v12, v8, 0
+; CGP-NEXT:    v_mul_hi_u32 v8, v8, v7
+; CGP-NEXT:    v_add_i32_e64 v10, s[6:7], v11, v10
+; CGP-NEXT:    v_add_i32_e64 v11, s[6:7], v14, v12
+; CGP-NEXT:    v_add_i32_e64 v5, s[6:7], v10, v5
+; CGP-NEXT:    v_add_i32_e64 v8, s[6:7], v11, v8
+; CGP-NEXT:    v_add_i32_e64 v10, s[6:7], v4, v5
+; CGP-NEXT:    v_sub_i32_e64 v4, s[6:7], v4, v5
+; CGP-NEXT:    v_add_i32_e64 v5, s[6:7], v7, v8
+; CGP-NEXT:    v_sub_i32_e64 v7, s[6:7], v7, v8
+; CGP-NEXT:    v_cndmask_b32_e32 v4, v4, v10, vcc
+; CGP-NEXT:    v_cndmask_b32_e64 v5, v7, v5, s[4:5]
+; CGP-NEXT:    v_mul_lo_u32 v7, v4, 0
+; CGP-NEXT:    v_mul_hi_u32 v4, v4, v0
+; CGP-NEXT:    v_mul_lo_u32 v8, v5, 0
+; CGP-NEXT:    v_mul_hi_u32 v5, v5, v1
+; CGP-NEXT:    v_add_i32_e32 v6, vcc, v6, v7
+; CGP-NEXT:    v_add_i32_e32 v7, vcc, v9, v8
+; CGP-NEXT:    v_add_i32_e32 v4, vcc, v6, v4
+; CGP-NEXT:    v_add_i32_e32 v5, vcc, v7, v5
+; CGP-NEXT:    v_mul_lo_u32 v6, v4, v2
+; CGP-NEXT:    v_add_i32_e32 v7, vcc, 1, v4
+; CGP-NEXT:    v_subrev_i32_e32 v8, vcc, 1, v4
+; CGP-NEXT:    v_mul_lo_u32 v9, v5, v3
+; CGP-NEXT:    v_add_i32_e32 v10, vcc, 1, v5
+; CGP-NEXT:    v_subrev_i32_e32 v11, vcc, 1, v5
+; CGP-NEXT:    v_sub_i32_e32 v12, vcc, v0, v6
+; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v6
+; CGP-NEXT:    v_sub_i32_e64 v0, s[4:5], v1, v9
+; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v1, v9
+; CGP-NEXT:    v_cmp_ge_u32_e64 s[6:7], v12, v2
+; CGP-NEXT:    v_cmp_ge_u32_e64 s[8:9], v0, v3
+; CGP-NEXT:    s_and_b64 s[6:7], s[6:7], vcc
+; CGP-NEXT:    v_cndmask_b32_e64 v0, v4, v7, s[6:7]
+; CGP-NEXT:    s_and_b64 s[6:7], s[8:9], s[4:5]
+; CGP-NEXT:    v_cndmask_b32_e64 v1, v5, v10, s[6:7]
+; CGP-NEXT:    v_cndmask_b32_e32 v0, v8, v0, vcc
+; CGP-NEXT:    v_cndmask_b32_e64 v1, v11, v1, s[4:5]
+; CGP-NEXT:    s_setpc_b64 s[30:31]
+  %num.mask = and <2 x i32> %num, <i32 16777215, i32 16777215>
+  %den.mask = and <2 x i32> %den, <i32 16777215, i32 16777215>
+  %result = sdiv <2 x i32> %num.mask, %den.mask
+  ret <2 x i32> %result
+}

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i32.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i32.ll
new file mode 100644
index 000000000000..62939b5e59e7
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i32.ll
@@ -0,0 +1,1008 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -global-isel -amdgpu-codegenprepare-disable-idiv-expansion=1 -mtriple=amdgcn-amd-amdhsa < %s | FileCheck -check-prefixes=CHECK,GISEL %s
+; RUN: llc -global-isel -amdgpu-codegenprepare-disable-idiv-expansion=0 -mtriple=amdgcn-amd-amdhsa < %s | FileCheck -check-prefixes=CHECK,CGP %s
+
+; The same 32-bit expansion is implemented in the legalizer and in AMDGPUCodeGenPrepare.
+
+define i32 @v_srem_i32(i32 %num, i32 %den) {
+; GISEL-LABEL: v_srem_i32:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-NEXT:    v_ashrrev_i32_e32 v2, 31, v0
+; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
+; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v1, v2
+; GISEL-NEXT:    v_xor_b32_e32 v0, v0, v2
+; GISEL-NEXT:    v_xor_b32_e32 v1, v1, v2
+; GISEL-NEXT:    v_cvt_f32_u32_e32 v3, v1
+; GISEL-NEXT:    v_rcp_iflag_f32_e32 v3, v3
+; GISEL-NEXT:    v_mul_f32_e32 v3, 0x4f800000, v3
+; GISEL-NEXT:    v_cvt_u32_f32_e32 v3, v3
+; GISEL-NEXT:    v_mul_lo_u32 v4, v3, v1
+; GISEL-NEXT:    v_mul_hi_u32 v5, v3, v1
+; GISEL-NEXT:    v_sub_i32_e32 v6, vcc, 0, v4
+; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v5
+; GISEL-NEXT:    v_cndmask_b32_e32 v4, v4, v6, vcc
+; GISEL-NEXT:    v_mul_hi_u32 v4, v4, v3
+; GISEL-NEXT:    v_add_i32_e64 v5, s[4:5], v3, v4
+; GISEL-NEXT:    v_sub_i32_e64 v3, s[4:5], v3, v4
+; GISEL-NEXT:    v_cndmask_b32_e32 v3, v3, v5, vcc
+; GISEL-NEXT:    v_mul_hi_u32 v3, v3, v0
+; GISEL-NEXT:    v_mul_lo_u32 v3, v3, v1
+; GISEL-NEXT:    v_sub_i32_e32 v4, vcc, v0, v3
+; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v4, v1
+; GISEL-NEXT:    v_add_i32_e64 v5, s[4:5], v4, v1
+; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v0, v3
+; GISEL-NEXT:    v_sub_i32_e64 v0, s[6:7], v4, v1
+; GISEL-NEXT:    s_and_b64 vcc, vcc, s[4:5]
+; GISEL-NEXT:    v_cndmask_b32_e32 v0, v4, v0, vcc
+; GISEL-NEXT:    v_cndmask_b32_e64 v0, v5, v0, s[4:5]
+; GISEL-NEXT:    v_xor_b32_e32 v0, v0, v2
+; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v2
+; GISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; CGP-LABEL: v_srem_i32:
+; CGP:       ; %bb.0:
+; CGP-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CGP-NEXT:    v_ashrrev_i32_e32 v2, 31, v0
+; CGP-NEXT:    v_ashrrev_i32_e32 v3, 31, v1
+; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
+; CGP-NEXT:    v_add_i32_e32 v1, vcc, v1, v3
+; CGP-NEXT:    v_xor_b32_e32 v0, v0, v2
+; CGP-NEXT:    v_xor_b32_e32 v1, v1, v3
+; CGP-NEXT:    v_cvt_f32_u32_e32 v3, v1
+; CGP-NEXT:    v_mul_lo_u32 v4, 0, v1
+; CGP-NEXT:    v_mul_lo_u32 v5, 0, v0
+; CGP-NEXT:    v_rcp_f32_e32 v3, v3
+; CGP-NEXT:    v_mul_f32_e32 v3, 0x4f800000, v3
+; CGP-NEXT:    v_cvt_u32_f32_e32 v3, v3
+; CGP-NEXT:    v_mul_lo_u32 v6, v3, v1
+; CGP-NEXT:    v_mul_lo_u32 v7, v3, 0
+; CGP-NEXT:    v_mul_hi_u32 v8, v3, v1
+; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v7
+; CGP-NEXT:    v_sub_i32_e32 v9, vcc, 0, v6
+; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v8
+; CGP-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v4
+; CGP-NEXT:    v_cndmask_b32_e32 v4, v6, v9, vcc
+; CGP-NEXT:    v_mul_lo_u32 v6, v4, 0
+; CGP-NEXT:    v_mul_hi_u32 v4, v4, v3
+; CGP-NEXT:    v_add_i32_e64 v6, s[4:5], v7, v6
+; CGP-NEXT:    v_add_i32_e64 v4, s[4:5], v6, v4
+; CGP-NEXT:    v_add_i32_e64 v6, s[4:5], v3, v4
+; CGP-NEXT:    v_sub_i32_e64 v3, s[4:5], v3, v4
+; CGP-NEXT:    v_cndmask_b32_e32 v3, v3, v6, vcc
+; CGP-NEXT:    v_mul_lo_u32 v4, v3, 0
+; CGP-NEXT:    v_mul_hi_u32 v3, v3, v0
+; CGP-NEXT:    v_add_i32_e32 v4, vcc, v5, v4
+; CGP-NEXT:    v_add_i32_e32 v3, vcc, v4, v3
+; CGP-NEXT:    v_mul_lo_u32 v3, v3, v1
+; CGP-NEXT:    v_sub_i32_e32 v4, vcc, v0, v3
+; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v4, v1
+; CGP-NEXT:    v_add_i32_e64 v5, s[4:5], v4, v1
+; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v0, v3
+; CGP-NEXT:    v_sub_i32_e64 v0, s[6:7], v4, v1
+; CGP-NEXT:    s_and_b64 vcc, vcc, s[4:5]
+; CGP-NEXT:    v_cndmask_b32_e32 v0, v4, v0, vcc
+; CGP-NEXT:    v_cndmask_b32_e64 v0, v5, v0, s[4:5]
+; CGP-NEXT:    v_xor_b32_e32 v0, v0, v2
+; CGP-NEXT:    v_sub_i32_e32 v0, vcc, v0, v2
+; CGP-NEXT:    s_setpc_b64 s[30:31]
+  %result = srem i32 %num, %den
+  ret i32 %result
+}
+
+; FIXME: This is a workaround for not handling uniform VGPR case.
+declare i32 @llvm.amdgcn.readfirstlane(i32)
+
+define amdgpu_ps i32 @s_srem_i32(i32 inreg %num, i32 inreg %den) {
+; GISEL-LABEL: s_srem_i32:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    s_ashr_i32 s4, s0, 31
+; GISEL-NEXT:    s_add_i32 s0, s0, s4
+; GISEL-NEXT:    s_add_i32 s1, s1, s4
+; GISEL-NEXT:    s_xor_b32 s2, s0, s4
+; GISEL-NEXT:    s_xor_b32 s3, s1, s4
+; GISEL-NEXT:    v_cvt_f32_u32_e32 v0, s3
+; GISEL-NEXT:    v_rcp_iflag_f32_e32 v0, v0
+; GISEL-NEXT:    v_mul_f32_e32 v0, 0x4f800000, v0
+; GISEL-NEXT:    v_cvt_u32_f32_e32 v0, v0
+; GISEL-NEXT:    v_mul_lo_u32 v1, v0, s3
+; GISEL-NEXT:    v_mul_hi_u32 v2, v0, s3
+; GISEL-NEXT:    v_sub_i32_e32 v3, vcc, 0, v1
+; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v2
+; GISEL-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
+; GISEL-NEXT:    v_mul_hi_u32 v1, v1, v0
+; GISEL-NEXT:    v_add_i32_e64 v2, s[0:1], v0, v1
+; GISEL-NEXT:    v_sub_i32_e64 v0, s[0:1], v0, v1
+; GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GISEL-NEXT:    v_mul_hi_u32 v0, v0, s2
+; GISEL-NEXT:    v_mul_lo_u32 v0, v0, s3
+; GISEL-NEXT:    v_sub_i32_e32 v1, vcc, s2, v0
+; GISEL-NEXT:    v_cmp_le_u32_e32 vcc, s3, v1
+; GISEL-NEXT:    v_add_i32_e64 v2, s[0:1], s3, v1
+; GISEL-NEXT:    v_cmp_ge_u32_e64 s[0:1], s2, v0
+; GISEL-NEXT:    v_subrev_i32_e64 v0, s[2:3], s3, v1
+; GISEL-NEXT:    s_and_b64 vcc, vcc, s[0:1]
+; GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
+; GISEL-NEXT:    v_cndmask_b32_e64 v0, v2, v0, s[0:1]
+; GISEL-NEXT:    v_xor_b32_e32 v0, s4, v0
+; GISEL-NEXT:    v_subrev_i32_e32 v0, vcc, s4, v0
+; GISEL-NEXT:    v_readfirstlane_b32 s0, v0
+; GISEL-NEXT:    ; return to shader part epilog
+;
+; CGP-LABEL: s_srem_i32:
+; CGP:       ; %bb.0:
+; CGP-NEXT:    s_ashr_i32 s5, s0, 31
+; CGP-NEXT:    s_ashr_i32 s3, s1, 31
+; CGP-NEXT:    s_add_i32 s0, s0, s5
+; CGP-NEXT:    s_add_i32 s1, s1, s3
+; CGP-NEXT:    s_xor_b32 s2, s0, s5
+; CGP-NEXT:    s_xor_b32 s4, s1, s3
+; CGP-NEXT:    v_cvt_f32_u32_e32 v0, s4
+; CGP-NEXT:    s_bfe_u64 s[0:1], s[4:5], 0x200000
+; CGP-NEXT:    s_bfe_u64 s[6:7], s[2:3], 0x200000
+; CGP-NEXT:    v_rcp_f32_e32 v0, v0
+; CGP-NEXT:    v_mul_lo_u32 v1, 0, s0
+; CGP-NEXT:    v_mul_lo_u32 v2, 0, s6
+; CGP-NEXT:    v_mul_f32_e32 v0, 0x4f800000, v0
+; CGP-NEXT:    v_cvt_u32_f32_e32 v0, v0
+; CGP-NEXT:    v_mul_lo_u32 v3, v0, s0
+; CGP-NEXT:    v_mul_lo_u32 v4, v0, s1
+; CGP-NEXT:    v_mul_hi_u32 v5, v0, s0
+; CGP-NEXT:    v_mul_lo_u32 v6, 0, v0
+; CGP-NEXT:    v_add_i32_e32 v1, vcc, v1, v4
+; CGP-NEXT:    v_sub_i32_e32 v4, vcc, 0, v3
+; CGP-NEXT:    v_add_i32_e32 v1, vcc, v1, v5
+; CGP-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v1
+; CGP-NEXT:    v_cndmask_b32_e32 v1, v3, v4, vcc
+; CGP-NEXT:    v_mul_lo_u32 v3, v1, 0
+; CGP-NEXT:    v_mul_hi_u32 v1, v1, v0
+; CGP-NEXT:    v_add_i32_e64 v3, s[0:1], v6, v3
+; CGP-NEXT:    v_add_i32_e64 v1, s[0:1], v3, v1
+; CGP-NEXT:    v_add_i32_e64 v3, s[0:1], v0, v1
+; CGP-NEXT:    v_sub_i32_e64 v0, s[0:1], v0, v1
+; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
+; CGP-NEXT:    v_mul_lo_u32 v1, v0, s7
+; CGP-NEXT:    v_mul_hi_u32 v0, v0, s6
+; CGP-NEXT:    v_add_i32_e32 v1, vcc, v2, v1
+; CGP-NEXT:    v_add_i32_e32 v0, vcc, v1, v0
+; CGP-NEXT:    v_mul_lo_u32 v0, v0, s4
+; CGP-NEXT:    v_sub_i32_e32 v1, vcc, s2, v0
+; CGP-NEXT:    v_cmp_le_u32_e32 vcc, s4, v1
+; CGP-NEXT:    v_add_i32_e64 v2, s[0:1], s4, v1
+; CGP-NEXT:    v_cmp_ge_u32_e64 s[0:1], s2, v0
+; CGP-NEXT:    v_subrev_i32_e64 v0, s[2:3], s4, v1
+; CGP-NEXT:    s_and_b64 vcc, vcc, s[0:1]
+; CGP-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
+; CGP-NEXT:    v_cndmask_b32_e64 v0, v2, v0, s[0:1]
+; CGP-NEXT:    v_xor_b32_e32 v0, s5, v0
+; CGP-NEXT:    v_subrev_i32_e32 v0, vcc, s5, v0
+; CGP-NEXT:    v_readfirstlane_b32 s0, v0
+; CGP-NEXT:    ; return to shader part epilog
+  %result = srem i32 %num, %den
+  %readlane = call i32 @llvm.amdgcn.readfirstlane(i32 %result)
+  ret i32 %readlane
+}
+
+define <2 x i32> @v_srem_v2i32(<2 x i32> %num, <2 x i32> %den) {
+; GISEL-LABEL: v_srem_v2i32:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-NEXT:    v_ashrrev_i32_e32 v4, 31, v0
+; GISEL-NEXT:    v_ashrrev_i32_e32 v5, 31, v1
+; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v4
+; GISEL-NEXT:    v_add_i32_e32 v2, vcc, v2, v4
+; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v1, v5
+; GISEL-NEXT:    v_add_i32_e32 v3, vcc, v3, v5
+; GISEL-NEXT:    v_xor_b32_e32 v0, v0, v4
+; GISEL-NEXT:    v_xor_b32_e32 v2, v2, v4
+; GISEL-NEXT:    v_xor_b32_e32 v1, v1, v5
+; GISEL-NEXT:    v_xor_b32_e32 v3, v3, v5
+; GISEL-NEXT:    v_cvt_f32_u32_e32 v6, v2
+; GISEL-NEXT:    v_cvt_f32_u32_e32 v7, v3
+; GISEL-NEXT:    v_rcp_iflag_f32_e32 v6, v6
+; GISEL-NEXT:    v_rcp_iflag_f32_e32 v7, v7
+; GISEL-NEXT:    v_mul_f32_e32 v6, 0x4f800000, v6
+; GISEL-NEXT:    v_mul_f32_e32 v7, 0x4f800000, v7
+; GISEL-NEXT:    v_cvt_u32_f32_e32 v6, v6
+; GISEL-NEXT:    v_cvt_u32_f32_e32 v7, v7
+; GISEL-NEXT:    v_mul_lo_u32 v8, v6, v2
+; GISEL-NEXT:    v_mul_hi_u32 v9, v6, v2
+; GISEL-NEXT:    v_mul_lo_u32 v10, v7, v3
+; GISEL-NEXT:    v_mul_hi_u32 v11, v7, v3
+; GISEL-NEXT:    v_sub_i32_e32 v12, vcc, 0, v8
+; GISEL-NEXT:    v_sub_i32_e32 v13, vcc, 0, v10
+; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v9
+; GISEL-NEXT:    v_cndmask_b32_e32 v8, v8, v12, vcc
+; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v11
+; GISEL-NEXT:    v_cndmask_b32_e64 v9, v10, v13, s[4:5]
+; GISEL-NEXT:    v_mul_hi_u32 v8, v8, v6
+; GISEL-NEXT:    v_mul_hi_u32 v9, v9, v7
+; GISEL-NEXT:    v_add_i32_e64 v10, s[6:7], v6, v8
+; GISEL-NEXT:    v_sub_i32_e64 v6, s[6:7], v6, v8
+; GISEL-NEXT:    v_add_i32_e64 v8, s[6:7], v7, v9
+; GISEL-NEXT:    v_sub_i32_e64 v7, s[6:7], v7, v9
+; GISEL-NEXT:    v_cndmask_b32_e32 v6, v6, v10, vcc
+; GISEL-NEXT:    v_cndmask_b32_e64 v7, v7, v8, s[4:5]
+; GISEL-NEXT:    v_mul_hi_u32 v6, v6, v0
+; GISEL-NEXT:    v_mul_hi_u32 v7, v7, v1
+; GISEL-NEXT:    v_mul_lo_u32 v6, v6, v2
+; GISEL-NEXT:    v_mul_lo_u32 v7, v7, v3
+; GISEL-NEXT:    v_sub_i32_e32 v8, vcc, v0, v6
+; GISEL-NEXT:    v_sub_i32_e32 v9, vcc, v1, v7
+; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v8, v2
+; GISEL-NEXT:    v_add_i32_e64 v10, s[4:5], v8, v2
+; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v0, v6
+; GISEL-NEXT:    v_sub_i32_e64 v0, s[6:7], v8, v2
+; GISEL-NEXT:    v_cmp_ge_u32_e64 s[6:7], v9, v3
+; GISEL-NEXT:    v_add_i32_e64 v2, s[8:9], v9, v3
+; GISEL-NEXT:    v_cmp_ge_u32_e64 s[8:9], v1, v7
+; GISEL-NEXT:    v_sub_i32_e64 v1, s[10:11], v9, v3
+; GISEL-NEXT:    s_and_b64 vcc, vcc, s[4:5]
+; GISEL-NEXT:    v_cndmask_b32_e32 v0, v8, v0, vcc
+; GISEL-NEXT:    s_and_b64 vcc, s[6:7], s[8:9]
+; GISEL-NEXT:    v_cndmask_b32_e32 v1, v9, v1, vcc
+; GISEL-NEXT:    v_cndmask_b32_e64 v0, v10, v0, s[4:5]
+; GISEL-NEXT:    v_cndmask_b32_e64 v1, v2, v1, s[8:9]
+; GISEL-NEXT:    v_xor_b32_e32 v0, v0, v4
+; GISEL-NEXT:    v_xor_b32_e32 v1, v1, v5
+; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v4
+; GISEL-NEXT:    v_sub_i32_e32 v1, vcc, v1, v5
+; GISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; CGP-LABEL: v_srem_v2i32:
+; CGP:       ; %bb.0:
+; CGP-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CGP-NEXT:    v_ashrrev_i32_e32 v4, 31, v0
+; CGP-NEXT:    v_ashrrev_i32_e32 v5, 31, v2
+; CGP-NEXT:    v_ashrrev_i32_e32 v6, 31, v1
+; CGP-NEXT:    v_ashrrev_i32_e32 v7, 31, v3
+; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v4
+; CGP-NEXT:    v_add_i32_e32 v2, vcc, v2, v5
+; CGP-NEXT:    v_add_i32_e32 v1, vcc, v1, v6
+; CGP-NEXT:    v_add_i32_e32 v3, vcc, v3, v7
+; CGP-NEXT:    v_xor_b32_e32 v0, v0, v4
+; CGP-NEXT:    v_xor_b32_e32 v2, v2, v5
+; CGP-NEXT:    v_xor_b32_e32 v1, v1, v6
+; CGP-NEXT:    v_xor_b32_e32 v3, v3, v7
+; CGP-NEXT:    v_cvt_f32_u32_e32 v5, v2
+; CGP-NEXT:    v_mul_lo_u32 v7, 0, v2
+; CGP-NEXT:    v_mul_lo_u32 v8, 0, v0
+; CGP-NEXT:    v_cvt_f32_u32_e32 v9, v3
+; CGP-NEXT:    v_mul_lo_u32 v10, 0, v3
+; CGP-NEXT:    v_mul_lo_u32 v11, 0, v1
+; CGP-NEXT:    v_rcp_f32_e32 v5, v5
+; CGP-NEXT:    v_rcp_f32_e32 v9, v9
+; CGP-NEXT:    v_mul_f32_e32 v5, 0x4f800000, v5
+; CGP-NEXT:    v_mul_f32_e32 v9, 0x4f800000, v9
+; CGP-NEXT:    v_cvt_u32_f32_e32 v5, v5
+; CGP-NEXT:    v_cvt_u32_f32_e32 v9, v9
+; CGP-NEXT:    v_mul_lo_u32 v12, v5, v2
+; CGP-NEXT:    v_mul_lo_u32 v13, v5, 0
+; CGP-NEXT:    v_mul_hi_u32 v14, v5, v2
+; CGP-NEXT:    v_mul_lo_u32 v15, v9, v3
+; CGP-NEXT:    v_mul_lo_u32 v16, v9, 0
+; CGP-NEXT:    v_mul_hi_u32 v17, v9, v3
+; CGP-NEXT:    v_add_i32_e32 v7, vcc, v7, v13
+; CGP-NEXT:    v_sub_i32_e32 v18, vcc, 0, v12
+; CGP-NEXT:    v_add_i32_e32 v10, vcc, v10, v16
+; CGP-NEXT:    v_sub_i32_e32 v19, vcc, 0, v15
+; CGP-NEXT:    v_add_i32_e32 v7, vcc, v7, v14
+; CGP-NEXT:    v_add_i32_e32 v10, vcc, v10, v17
+; CGP-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v7
+; CGP-NEXT:    v_cndmask_b32_e32 v7, v12, v18, vcc
+; CGP-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v10
+; CGP-NEXT:    v_cndmask_b32_e64 v10, v15, v19, s[4:5]
+; CGP-NEXT:    v_mul_lo_u32 v12, v7, 0
+; CGP-NEXT:    v_mul_hi_u32 v7, v7, v5
+; CGP-NEXT:    v_mul_lo_u32 v14, v10, 0
+; CGP-NEXT:    v_mul_hi_u32 v10, v10, v9
+; CGP-NEXT:    v_add_i32_e64 v12, s[6:7], v13, v12
+; CGP-NEXT:    v_add_i32_e64 v13, s[6:7], v16, v14
+; CGP-NEXT:    v_add_i32_e64 v7, s[6:7], v12, v7
+; CGP-NEXT:    v_add_i32_e64 v10, s[6:7], v13, v10
+; CGP-NEXT:    v_add_i32_e64 v12, s[6:7], v5, v7
+; CGP-NEXT:    v_sub_i32_e64 v5, s[6:7], v5, v7
+; CGP-NEXT:    v_add_i32_e64 v7, s[6:7], v9, v10
+; CGP-NEXT:    v_sub_i32_e64 v9, s[6:7], v9, v10
+; CGP-NEXT:    v_cndmask_b32_e32 v5, v5, v12, vcc
+; CGP-NEXT:    v_cndmask_b32_e64 v7, v9, v7, s[4:5]
+; CGP-NEXT:    v_mul_lo_u32 v9, v5, 0
+; CGP-NEXT:    v_mul_hi_u32 v5, v5, v0
+; CGP-NEXT:    v_mul_lo_u32 v10, v7, 0
+; CGP-NEXT:    v_mul_hi_u32 v7, v7, v1
+; CGP-NEXT:    v_add_i32_e32 v8, vcc, v8, v9
+; CGP-NEXT:    v_add_i32_e32 v9, vcc, v11, v10
+; CGP-NEXT:    v_add_i32_e32 v5, vcc, v8, v5
+; CGP-NEXT:    v_add_i32_e32 v7, vcc, v9, v7
+; CGP-NEXT:    v_mul_lo_u32 v5, v5, v2
+; CGP-NEXT:    v_mul_lo_u32 v7, v7, v3
+; CGP-NEXT:    v_sub_i32_e32 v8, vcc, v0, v5
+; CGP-NEXT:    v_sub_i32_e32 v9, vcc, v1, v7
+; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v8, v2
+; CGP-NEXT:    v_add_i32_e64 v10, s[4:5], v8, v2
+; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v0, v5
+; CGP-NEXT:    v_sub_i32_e64 v0, s[6:7], v8, v2
+; CGP-NEXT:    v_cmp_ge_u32_e64 s[6:7], v9, v3
+; CGP-NEXT:    v_add_i32_e64 v2, s[8:9], v9, v3
+; CGP-NEXT:    v_cmp_ge_u32_e64 s[8:9], v1, v7
+; CGP-NEXT:    v_sub_i32_e64 v1, s[10:11], v9, v3
+; CGP-NEXT:    s_and_b64 vcc, vcc, s[4:5]
+; CGP-NEXT:    v_cndmask_b32_e32 v0, v8, v0, vcc
+; CGP-NEXT:    s_and_b64 vcc, s[6:7], s[8:9]
+; CGP-NEXT:    v_cndmask_b32_e32 v1, v9, v1, vcc
+; CGP-NEXT:    v_cndmask_b32_e64 v0, v10, v0, s[4:5]
+; CGP-NEXT:    v_cndmask_b32_e64 v1, v2, v1, s[8:9]
+; CGP-NEXT:    v_xor_b32_e32 v0, v0, v4
+; CGP-NEXT:    v_xor_b32_e32 v1, v1, v6
+; CGP-NEXT:    v_sub_i32_e32 v0, vcc, v0, v4
+; CGP-NEXT:    v_sub_i32_e32 v1, vcc, v1, v6
+; CGP-NEXT:    s_setpc_b64 s[30:31]
+  %result = srem <2 x i32> %num, %den
+  ret <2 x i32> %result
+}
+
+define i32 @v_srem_i32_pow2k_denom(i32 %num) {
+; CHECK-LABEL: v_srem_i32_pow2k_denom:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
+; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v1
+; CHECK-NEXT:    v_add_i32_e32 v2, vcc, 0x1000, v1
+; CHECK-NEXT:    v_xor_b32_e32 v0, v0, v1
+; CHECK-NEXT:    v_xor_b32_e32 v2, v2, v1
+; CHECK-NEXT:    v_cvt_f32_u32_e32 v3, v2
+; CHECK-NEXT:    v_rcp_iflag_f32_e32 v3, v3
+; CHECK-NEXT:    v_mul_f32_e32 v3, 0x4f800000, v3
+; CHECK-NEXT:    v_cvt_u32_f32_e32 v3, v3
+; CHECK-NEXT:    v_mul_lo_u32 v4, v3, v2
+; CHECK-NEXT:    v_mul_hi_u32 v5, v3, v2
+; CHECK-NEXT:    v_sub_i32_e32 v6, vcc, 0, v4
+; CHECK-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v5
+; CHECK-NEXT:    v_cndmask_b32_e32 v4, v4, v6, vcc
+; CHECK-NEXT:    v_mul_hi_u32 v4, v4, v3
+; CHECK-NEXT:    v_add_i32_e64 v5, s[4:5], v3, v4
+; CHECK-NEXT:    v_sub_i32_e64 v3, s[4:5], v3, v4
+; CHECK-NEXT:    v_cndmask_b32_e32 v3, v3, v5, vcc
+; CHECK-NEXT:    v_mul_hi_u32 v3, v3, v0
+; CHECK-NEXT:    v_mul_lo_u32 v3, v3, v2
+; CHECK-NEXT:    v_sub_i32_e32 v4, vcc, v0, v3
+; CHECK-NEXT:    v_cmp_ge_u32_e32 vcc, v4, v2
+; CHECK-NEXT:    v_add_i32_e64 v5, s[4:5], v4, v2
+; CHECK-NEXT:    v_cmp_ge_u32_e64 s[4:5], v0, v3
+; CHECK-NEXT:    v_sub_i32_e64 v0, s[6:7], v4, v2
+; CHECK-NEXT:    s_and_b64 vcc, vcc, s[4:5]
+; CHECK-NEXT:    v_cndmask_b32_e32 v0, v4, v0, vcc
+; CHECK-NEXT:    v_cndmask_b32_e64 v0, v5, v0, s[4:5]
+; CHECK-NEXT:    v_xor_b32_e32 v0, v0, v1
+; CHECK-NEXT:    v_sub_i32_e32 v0, vcc, v0, v1
+; CHECK-NEXT:    s_setpc_b64 s[30:31]
+  %result = srem i32 %num, 4096
+  ret i32 %result
+}
+
+define <2 x i32> @v_srem_v2i32_pow2k_denom(<2 x i32> %num) {
+; CHECK-LABEL: v_srem_v2i32_pow2k_denom:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT:    s_movk_i32 s4, 0x1000
+; CHECK-NEXT:    v_ashrrev_i32_e32 v2, 31, v0
+; CHECK-NEXT:    v_mov_b32_e32 v3, 0x1000
+; CHECK-NEXT:    v_ashrrev_i32_e32 v4, 31, v1
+; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
+; CHECK-NEXT:    v_add_i32_e32 v5, vcc, s4, v2
+; CHECK-NEXT:    v_add_i32_e32 v1, vcc, v1, v4
+; CHECK-NEXT:    v_add_i32_e32 v3, vcc, v3, v4
+; CHECK-NEXT:    v_xor_b32_e32 v0, v0, v2
+; CHECK-NEXT:    v_xor_b32_e32 v5, v5, v2
+; CHECK-NEXT:    v_xor_b32_e32 v1, v1, v4
+; CHECK-NEXT:    v_xor_b32_e32 v3, v3, v4
+; CHECK-NEXT:    v_cvt_f32_u32_e32 v6, v5
+; CHECK-NEXT:    v_cvt_f32_u32_e32 v7, v3
+; CHECK-NEXT:    v_rcp_iflag_f32_e32 v6, v6
+; CHECK-NEXT:    v_rcp_iflag_f32_e32 v7, v7
+; CHECK-NEXT:    v_mul_f32_e32 v6, 0x4f800000, v6
+; CHECK-NEXT:    v_mul_f32_e32 v7, 0x4f800000, v7
+; CHECK-NEXT:    v_cvt_u32_f32_e32 v6, v6
+; CHECK-NEXT:    v_cvt_u32_f32_e32 v7, v7
+; CHECK-NEXT:    v_mul_lo_u32 v8, v6, v5
+; CHECK-NEXT:    v_mul_hi_u32 v9, v6, v5
+; CHECK-NEXT:    v_mul_lo_u32 v10, v7, v3
+; CHECK-NEXT:    v_mul_hi_u32 v11, v7, v3
+; CHECK-NEXT:    v_sub_i32_e32 v12, vcc, 0, v8
+; CHECK-NEXT:    v_sub_i32_e32 v13, vcc, 0, v10
+; CHECK-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v9
+; CHECK-NEXT:    v_cndmask_b32_e32 v8, v8, v12, vcc
+; CHECK-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v11
+; CHECK-NEXT:    v_cndmask_b32_e64 v9, v10, v13, s[4:5]
+; CHECK-NEXT:    v_mul_hi_u32 v8, v8, v6
+; CHECK-NEXT:    v_mul_hi_u32 v9, v9, v7
+; CHECK-NEXT:    v_add_i32_e64 v10, s[6:7], v6, v8
+; CHECK-NEXT:    v_sub_i32_e64 v6, s[6:7], v6, v8
+; CHECK-NEXT:    v_add_i32_e64 v8, s[6:7], v7, v9
+; CHECK-NEXT:    v_sub_i32_e64 v7, s[6:7], v7, v9
+; CHECK-NEXT:    v_cndmask_b32_e32 v6, v6, v10, vcc
+; CHECK-NEXT:    v_cndmask_b32_e64 v7, v7, v8, s[4:5]
+; CHECK-NEXT:    v_mul_hi_u32 v6, v6, v0
+; CHECK-NEXT:    v_mul_hi_u32 v7, v7, v1
+; CHECK-NEXT:    v_mul_lo_u32 v6, v6, v5
+; CHECK-NEXT:    v_mul_lo_u32 v7, v7, v3
+; CHECK-NEXT:    v_sub_i32_e32 v8, vcc, v0, v6
+; CHECK-NEXT:    v_sub_i32_e32 v9, vcc, v1, v7
+; CHECK-NEXT:    v_cmp_ge_u32_e32 vcc, v8, v5
+; CHECK-NEXT:    v_add_i32_e64 v10, s[4:5], v8, v5
+; CHECK-NEXT:    v_cmp_ge_u32_e64 s[4:5], v0, v6
+; CHECK-NEXT:    v_sub_i32_e64 v0, s[6:7], v8, v5
+; CHECK-NEXT:    v_cmp_ge_u32_e64 s[6:7], v9, v3
+; CHECK-NEXT:    v_add_i32_e64 v5, s[8:9], v9, v3
+; CHECK-NEXT:    v_cmp_ge_u32_e64 s[8:9], v1, v7
+; CHECK-NEXT:    v_sub_i32_e64 v1, s[10:11], v9, v3
+; CHECK-NEXT:    s_and_b64 vcc, vcc, s[4:5]
+; CHECK-NEXT:    v_cndmask_b32_e32 v0, v8, v0, vcc
+; CHECK-NEXT:    s_and_b64 vcc, s[6:7], s[8:9]
+; CHECK-NEXT:    v_cndmask_b32_e32 v1, v9, v1, vcc
+; CHECK-NEXT:    v_cndmask_b32_e64 v0, v10, v0, s[4:5]
+; CHECK-NEXT:    v_cndmask_b32_e64 v1, v5, v1, s[8:9]
+; CHECK-NEXT:    v_xor_b32_e32 v0, v0, v2
+; CHECK-NEXT:    v_xor_b32_e32 v1, v1, v4
+; CHECK-NEXT:    v_sub_i32_e32 v0, vcc, v0, v2
+; CHECK-NEXT:    v_sub_i32_e32 v1, vcc, v1, v4
+; CHECK-NEXT:    s_setpc_b64 s[30:31]
+  %result = srem <2 x i32> %num, <i32 4096, i32 4096>
+  ret <2 x i32> %result
+}
+
+define i32 @v_srem_i32_oddk_denom(i32 %num) {
+; CHECK-LABEL: v_srem_i32_oddk_denom:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
+; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v1
+; CHECK-NEXT:    v_add_i32_e32 v2, vcc, 0x12d8fb, v1
+; CHECK-NEXT:    v_xor_b32_e32 v0, v0, v1
+; CHECK-NEXT:    v_xor_b32_e32 v2, v2, v1
+; CHECK-NEXT:    v_cvt_f32_u32_e32 v3, v2
+; CHECK-NEXT:    v_rcp_iflag_f32_e32 v3, v3
+; CHECK-NEXT:    v_mul_f32_e32 v3, 0x4f800000, v3
+; CHECK-NEXT:    v_cvt_u32_f32_e32 v3, v3
+; CHECK-NEXT:    v_mul_lo_u32 v4, v3, v2
+; CHECK-NEXT:    v_mul_hi_u32 v5, v3, v2
+; CHECK-NEXT:    v_sub_i32_e32 v6, vcc, 0, v4
+; CHECK-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v5
+; CHECK-NEXT:    v_cndmask_b32_e32 v4, v4, v6, vcc
+; CHECK-NEXT:    v_mul_hi_u32 v4, v4, v3
+; CHECK-NEXT:    v_add_i32_e64 v5, s[4:5], v3, v4
+; CHECK-NEXT:    v_sub_i32_e64 v3, s[4:5], v3, v4
+; CHECK-NEXT:    v_cndmask_b32_e32 v3, v3, v5, vcc
+; CHECK-NEXT:    v_mul_hi_u32 v3, v3, v0
+; CHECK-NEXT:    v_mul_lo_u32 v3, v3, v2
+; CHECK-NEXT:    v_sub_i32_e32 v4, vcc, v0, v3
+; CHECK-NEXT:    v_cmp_ge_u32_e32 vcc, v4, v2
+; CHECK-NEXT:    v_add_i32_e64 v5, s[4:5], v4, v2
+; CHECK-NEXT:    v_cmp_ge_u32_e64 s[4:5], v0, v3
+; CHECK-NEXT:    v_sub_i32_e64 v0, s[6:7], v4, v2
+; CHECK-NEXT:    s_and_b64 vcc, vcc, s[4:5]
+; CHECK-NEXT:    v_cndmask_b32_e32 v0, v4, v0, vcc
+; CHECK-NEXT:    v_cndmask_b32_e64 v0, v5, v0, s[4:5]
+; CHECK-NEXT:    v_xor_b32_e32 v0, v0, v1
+; CHECK-NEXT:    v_sub_i32_e32 v0, vcc, v0, v1
+; CHECK-NEXT:    s_setpc_b64 s[30:31]
+  %result = srem i32 %num, 1235195
+  ret i32 %result
+}
+
+define <2 x i32> @v_srem_v2i32_oddk_denom(<2 x i32> %num) {
+; CHECK-LABEL: v_srem_v2i32_oddk_denom:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT:    s_mov_b32 s4, 0x12d8fb
+; CHECK-NEXT:    v_ashrrev_i32_e32 v2, 31, v0
+; CHECK-NEXT:    v_mov_b32_e32 v3, 0x12d8fb
+; CHECK-NEXT:    v_ashrrev_i32_e32 v4, 31, v1
+; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
+; CHECK-NEXT:    v_add_i32_e32 v5, vcc, s4, v2
+; CHECK-NEXT:    v_add_i32_e32 v1, vcc, v1, v4
+; CHECK-NEXT:    v_add_i32_e32 v3, vcc, v3, v4
+; CHECK-NEXT:    v_xor_b32_e32 v0, v0, v2
+; CHECK-NEXT:    v_xor_b32_e32 v5, v5, v2
+; CHECK-NEXT:    v_xor_b32_e32 v1, v1, v4
+; CHECK-NEXT:    v_xor_b32_e32 v3, v3, v4
+; CHECK-NEXT:    v_cvt_f32_u32_e32 v6, v5
+; CHECK-NEXT:    v_cvt_f32_u32_e32 v7, v3
+; CHECK-NEXT:    v_rcp_iflag_f32_e32 v6, v6
+; CHECK-NEXT:    v_rcp_iflag_f32_e32 v7, v7
+; CHECK-NEXT:    v_mul_f32_e32 v6, 0x4f800000, v6
+; CHECK-NEXT:    v_mul_f32_e32 v7, 0x4f800000, v7
+; CHECK-NEXT:    v_cvt_u32_f32_e32 v6, v6
+; CHECK-NEXT:    v_cvt_u32_f32_e32 v7, v7
+; CHECK-NEXT:    v_mul_lo_u32 v8, v6, v5
+; CHECK-NEXT:    v_mul_hi_u32 v9, v6, v5
+; CHECK-NEXT:    v_mul_lo_u32 v10, v7, v3
+; CHECK-NEXT:    v_mul_hi_u32 v11, v7, v3
+; CHECK-NEXT:    v_sub_i32_e32 v12, vcc, 0, v8
+; CHECK-NEXT:    v_sub_i32_e32 v13, vcc, 0, v10
+; CHECK-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v9
+; CHECK-NEXT:    v_cndmask_b32_e32 v8, v8, v12, vcc
+; CHECK-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v11
+; CHECK-NEXT:    v_cndmask_b32_e64 v9, v10, v13, s[4:5]
+; CHECK-NEXT:    v_mul_hi_u32 v8, v8, v6
+; CHECK-NEXT:    v_mul_hi_u32 v9, v9, v7
+; CHECK-NEXT:    v_add_i32_e64 v10, s[6:7], v6, v8
+; CHECK-NEXT:    v_sub_i32_e64 v6, s[6:7], v6, v8
+; CHECK-NEXT:    v_add_i32_e64 v8, s[6:7], v7, v9
+; CHECK-NEXT:    v_sub_i32_e64 v7, s[6:7], v7, v9
+; CHECK-NEXT:    v_cndmask_b32_e32 v6, v6, v10, vcc
+; CHECK-NEXT:    v_cndmask_b32_e64 v7, v7, v8, s[4:5]
+; CHECK-NEXT:    v_mul_hi_u32 v6, v6, v0
+; CHECK-NEXT:    v_mul_hi_u32 v7, v7, v1
+; CHECK-NEXT:    v_mul_lo_u32 v6, v6, v5
+; CHECK-NEXT:    v_mul_lo_u32 v7, v7, v3
+; CHECK-NEXT:    v_sub_i32_e32 v8, vcc, v0, v6
+; CHECK-NEXT:    v_sub_i32_e32 v9, vcc, v1, v7
+; CHECK-NEXT:    v_cmp_ge_u32_e32 vcc, v8, v5
+; CHECK-NEXT:    v_add_i32_e64 v10, s[4:5], v8, v5
+; CHECK-NEXT:    v_cmp_ge_u32_e64 s[4:5], v0, v6
+; CHECK-NEXT:    v_sub_i32_e64 v0, s[6:7], v8, v5
+; CHECK-NEXT:    v_cmp_ge_u32_e64 s[6:7], v9, v3
+; CHECK-NEXT:    v_add_i32_e64 v5, s[8:9], v9, v3
+; CHECK-NEXT:    v_cmp_ge_u32_e64 s[8:9], v1, v7
+; CHECK-NEXT:    v_sub_i32_e64 v1, s[10:11], v9, v3
+; CHECK-NEXT:    s_and_b64 vcc, vcc, s[4:5]
+; CHECK-NEXT:    v_cndmask_b32_e32 v0, v8, v0, vcc
+; CHECK-NEXT:    s_and_b64 vcc, s[6:7], s[8:9]
+; CHECK-NEXT:    v_cndmask_b32_e32 v1, v9, v1, vcc
+; CHECK-NEXT:    v_cndmask_b32_e64 v0, v10, v0, s[4:5]
+; CHECK-NEXT:    v_cndmask_b32_e64 v1, v5, v1, s[8:9]
+; CHECK-NEXT:    v_xor_b32_e32 v0, v0, v2
+; CHECK-NEXT:    v_xor_b32_e32 v1, v1, v4
+; CHECK-NEXT:    v_sub_i32_e32 v0, vcc, v0, v2
+; CHECK-NEXT:    v_sub_i32_e32 v1, vcc, v1, v4
+; CHECK-NEXT:    s_setpc_b64 s[30:31]
+  %result = srem <2 x i32> %num, <i32 1235195, i32 1235195>
+  ret <2 x i32> %result
+}
+
+define i32 @v_srem_i32_pow2_shl_denom(i32 %x, i32 %y) {
+; CHECK-LABEL: v_srem_i32_pow2_shl_denom:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT:    v_lshl_b32_e32 v1, 0x1000, v1
+; CHECK-NEXT:    v_ashrrev_i32_e32 v2, 31, v0
+; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
+; CHECK-NEXT:    v_add_i32_e32 v1, vcc, v1, v2
+; CHECK-NEXT:    v_xor_b32_e32 v0, v0, v2
+; CHECK-NEXT:    v_xor_b32_e32 v1, v1, v2
+; CHECK-NEXT:    v_cvt_f32_u32_e32 v3, v1
+; CHECK-NEXT:    v_rcp_iflag_f32_e32 v3, v3
+; CHECK-NEXT:    v_mul_f32_e32 v3, 0x4f800000, v3
+; CHECK-NEXT:    v_cvt_u32_f32_e32 v3, v3
+; CHECK-NEXT:    v_mul_lo_u32 v4, v3, v1
+; CHECK-NEXT:    v_mul_hi_u32 v5, v3, v1
+; CHECK-NEXT:    v_sub_i32_e32 v6, vcc, 0, v4
+; CHECK-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v5
+; CHECK-NEXT:    v_cndmask_b32_e32 v4, v4, v6, vcc
+; CHECK-NEXT:    v_mul_hi_u32 v4, v4, v3
+; CHECK-NEXT:    v_add_i32_e64 v5, s[4:5], v3, v4
+; CHECK-NEXT:    v_sub_i32_e64 v3, s[4:5], v3, v4
+; CHECK-NEXT:    v_cndmask_b32_e32 v3, v3, v5, vcc
+; CHECK-NEXT:    v_mul_hi_u32 v3, v3, v0
+; CHECK-NEXT:    v_mul_lo_u32 v3, v3, v1
+; CHECK-NEXT:    v_sub_i32_e32 v4, vcc, v0, v3
+; CHECK-NEXT:    v_cmp_ge_u32_e32 vcc, v4, v1
+; CHECK-NEXT:    v_add_i32_e64 v5, s[4:5], v4, v1
+; CHECK-NEXT:    v_cmp_ge_u32_e64 s[4:5], v0, v3
+; CHECK-NEXT:    v_sub_i32_e64 v0, s[6:7], v4, v1
+; CHECK-NEXT:    s_and_b64 vcc, vcc, s[4:5]
+; CHECK-NEXT:    v_cndmask_b32_e32 v0, v4, v0, vcc
+; CHECK-NEXT:    v_cndmask_b32_e64 v0, v5, v0, s[4:5]
+; CHECK-NEXT:    v_xor_b32_e32 v0, v0, v2
+; CHECK-NEXT:    v_sub_i32_e32 v0, vcc, v0, v2
+; CHECK-NEXT:    s_setpc_b64 s[30:31]
+  %shl.y = shl i32 4096, %y
+  %r = srem i32 %x, %shl.y
+  ret i32 %r
+}
+
+define <2 x i32> @v_srem_v2i32_pow2_shl_denom(<2 x i32> %x, <2 x i32> %y) {
+; GISEL-LABEL: v_srem_v2i32_pow2_shl_denom:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-NEXT:    s_movk_i32 s4, 0x1000
+; GISEL-NEXT:    v_ashrrev_i32_e32 v4, 31, v0
+; GISEL-NEXT:    v_ashrrev_i32_e32 v5, 31, v1
+; GISEL-NEXT:    v_lshl_b32_e32 v2, s4, v2
+; GISEL-NEXT:    v_lshl_b32_e32 v3, s4, v3
+; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v4
+; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v1, v5
+; GISEL-NEXT:    v_add_i32_e32 v2, vcc, v2, v4
+; GISEL-NEXT:    v_xor_b32_e32 v0, v0, v4
+; GISEL-NEXT:    v_add_i32_e32 v3, vcc, v3, v5
+; GISEL-NEXT:    v_xor_b32_e32 v1, v1, v5
+; GISEL-NEXT:    v_xor_b32_e32 v2, v2, v4
+; GISEL-NEXT:    v_xor_b32_e32 v3, v3, v5
+; GISEL-NEXT:    v_cvt_f32_u32_e32 v6, v2
+; GISEL-NEXT:    v_cvt_f32_u32_e32 v7, v3
+; GISEL-NEXT:    v_rcp_iflag_f32_e32 v6, v6
+; GISEL-NEXT:    v_rcp_iflag_f32_e32 v7, v7
+; GISEL-NEXT:    v_mul_f32_e32 v6, 0x4f800000, v6
+; GISEL-NEXT:    v_mul_f32_e32 v7, 0x4f800000, v7
+; GISEL-NEXT:    v_cvt_u32_f32_e32 v6, v6
+; GISEL-NEXT:    v_cvt_u32_f32_e32 v7, v7
+; GISEL-NEXT:    v_mul_lo_u32 v8, v6, v2
+; GISEL-NEXT:    v_mul_hi_u32 v9, v6, v2
+; GISEL-NEXT:    v_mul_lo_u32 v10, v7, v3
+; GISEL-NEXT:    v_mul_hi_u32 v11, v7, v3
+; GISEL-NEXT:    v_sub_i32_e32 v12, vcc, 0, v8
+; GISEL-NEXT:    v_sub_i32_e32 v13, vcc, 0, v10
+; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v9
+; GISEL-NEXT:    v_cndmask_b32_e32 v8, v8, v12, vcc
+; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v11
+; GISEL-NEXT:    v_cndmask_b32_e64 v9, v10, v13, s[4:5]
+; GISEL-NEXT:    v_mul_hi_u32 v8, v8, v6
+; GISEL-NEXT:    v_mul_hi_u32 v9, v9, v7
+; GISEL-NEXT:    v_add_i32_e64 v10, s[6:7], v6, v8
+; GISEL-NEXT:    v_sub_i32_e64 v6, s[6:7], v6, v8
+; GISEL-NEXT:    v_add_i32_e64 v8, s[6:7], v7, v9
+; GISEL-NEXT:    v_sub_i32_e64 v7, s[6:7], v7, v9
+; GISEL-NEXT:    v_cndmask_b32_e32 v6, v6, v10, vcc
+; GISEL-NEXT:    v_cndmask_b32_e64 v7, v7, v8, s[4:5]
+; GISEL-NEXT:    v_mul_hi_u32 v6, v6, v0
+; GISEL-NEXT:    v_mul_hi_u32 v7, v7, v1
+; GISEL-NEXT:    v_mul_lo_u32 v6, v6, v2
+; GISEL-NEXT:    v_mul_lo_u32 v7, v7, v3
+; GISEL-NEXT:    v_sub_i32_e32 v8, vcc, v0, v6
+; GISEL-NEXT:    v_sub_i32_e32 v9, vcc, v1, v7
+; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v8, v2
+; GISEL-NEXT:    v_add_i32_e64 v10, s[4:5], v8, v2
+; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v0, v6
+; GISEL-NEXT:    v_sub_i32_e64 v0, s[6:7], v8, v2
+; GISEL-NEXT:    v_cmp_ge_u32_e64 s[6:7], v9, v3
+; GISEL-NEXT:    v_add_i32_e64 v2, s[8:9], v9, v3
+; GISEL-NEXT:    v_cmp_ge_u32_e64 s[8:9], v1, v7
+; GISEL-NEXT:    v_sub_i32_e64 v1, s[10:11], v9, v3
+; GISEL-NEXT:    s_and_b64 vcc, vcc, s[4:5]
+; GISEL-NEXT:    v_cndmask_b32_e32 v0, v8, v0, vcc
+; GISEL-NEXT:    s_and_b64 vcc, s[6:7], s[8:9]
+; GISEL-NEXT:    v_cndmask_b32_e32 v1, v9, v1, vcc
+; GISEL-NEXT:    v_cndmask_b32_e64 v0, v10, v0, s[4:5]
+; GISEL-NEXT:    v_cndmask_b32_e64 v1, v2, v1, s[8:9]
+; GISEL-NEXT:    v_xor_b32_e32 v0, v0, v4
+; GISEL-NEXT:    v_xor_b32_e32 v1, v1, v5
+; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v4
+; GISEL-NEXT:    v_sub_i32_e32 v1, vcc, v1, v5
+; GISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; CGP-LABEL: v_srem_v2i32_pow2_shl_denom:
+; CGP:       ; %bb.0:
+; CGP-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CGP-NEXT:    s_movk_i32 s4, 0x1000
+; CGP-NEXT:    v_ashrrev_i32_e32 v4, 31, v0
+; CGP-NEXT:    v_ashrrev_i32_e32 v5, 31, v1
+; CGP-NEXT:    v_lshl_b32_e32 v2, s4, v2
+; CGP-NEXT:    v_lshl_b32_e32 v3, s4, v3
+; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v4
+; CGP-NEXT:    v_add_i32_e32 v1, vcc, v1, v5
+; CGP-NEXT:    v_ashrrev_i32_e32 v6, 31, v2
+; CGP-NEXT:    v_xor_b32_e32 v0, v0, v4
+; CGP-NEXT:    v_ashrrev_i32_e32 v7, 31, v3
+; CGP-NEXT:    v_xor_b32_e32 v1, v1, v5
+; CGP-NEXT:    v_add_i32_e32 v2, vcc, v2, v6
+; CGP-NEXT:    v_mul_lo_u32 v8, 0, v0
+; CGP-NEXT:    v_add_i32_e32 v3, vcc, v3, v7
+; CGP-NEXT:    v_mul_lo_u32 v9, 0, v1
+; CGP-NEXT:    v_xor_b32_e32 v2, v2, v6
+; CGP-NEXT:    v_xor_b32_e32 v3, v3, v7
+; CGP-NEXT:    v_cvt_f32_u32_e32 v6, v2
+; CGP-NEXT:    v_mul_lo_u32 v7, 0, v2
+; CGP-NEXT:    v_cvt_f32_u32_e32 v10, v3
+; CGP-NEXT:    v_mul_lo_u32 v11, 0, v3
+; CGP-NEXT:    v_rcp_f32_e32 v6, v6
+; CGP-NEXT:    v_rcp_f32_e32 v10, v10
+; CGP-NEXT:    v_mul_f32_e32 v6, 0x4f800000, v6
+; CGP-NEXT:    v_mul_f32_e32 v10, 0x4f800000, v10
+; CGP-NEXT:    v_cvt_u32_f32_e32 v6, v6
+; CGP-NEXT:    v_cvt_u32_f32_e32 v10, v10
+; CGP-NEXT:    v_mul_lo_u32 v12, v6, v2
+; CGP-NEXT:    v_mul_lo_u32 v13, v6, 0
+; CGP-NEXT:    v_mul_hi_u32 v14, v6, v2
+; CGP-NEXT:    v_mul_lo_u32 v15, v10, v3
+; CGP-NEXT:    v_mul_lo_u32 v16, v10, 0
+; CGP-NEXT:    v_mul_hi_u32 v17, v10, v3
+; CGP-NEXT:    v_add_i32_e32 v7, vcc, v7, v13
+; CGP-NEXT:    v_sub_i32_e32 v18, vcc, 0, v12
+; CGP-NEXT:    v_add_i32_e32 v11, vcc, v11, v16
+; CGP-NEXT:    v_sub_i32_e32 v19, vcc, 0, v15
+; CGP-NEXT:    v_add_i32_e32 v7, vcc, v7, v14
+; CGP-NEXT:    v_add_i32_e32 v11, vcc, v11, v17
+; CGP-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v7
+; CGP-NEXT:    v_cndmask_b32_e32 v7, v12, v18, vcc
+; CGP-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v11
+; CGP-NEXT:    v_cndmask_b32_e64 v11, v15, v19, s[4:5]
+; CGP-NEXT:    v_mul_lo_u32 v12, v7, 0
+; CGP-NEXT:    v_mul_hi_u32 v7, v7, v6
+; CGP-NEXT:    v_mul_lo_u32 v14, v11, 0
+; CGP-NEXT:    v_mul_hi_u32 v11, v11, v10
+; CGP-NEXT:    v_add_i32_e64 v12, s[6:7], v13, v12
+; CGP-NEXT:    v_add_i32_e64 v13, s[6:7], v16, v14
+; CGP-NEXT:    v_add_i32_e64 v7, s[6:7], v12, v7
+; CGP-NEXT:    v_add_i32_e64 v11, s[6:7], v13, v11
+; CGP-NEXT:    v_add_i32_e64 v12, s[6:7], v6, v7
+; CGP-NEXT:    v_sub_i32_e64 v6, s[6:7], v6, v7
+; CGP-NEXT:    v_add_i32_e64 v7, s[6:7], v10, v11
+; CGP-NEXT:    v_sub_i32_e64 v10, s[6:7], v10, v11
+; CGP-NEXT:    v_cndmask_b32_e32 v6, v6, v12, vcc
+; CGP-NEXT:    v_cndmask_b32_e64 v7, v10, v7, s[4:5]
+; CGP-NEXT:    v_mul_lo_u32 v10, v6, 0
+; CGP-NEXT:    v_mul_hi_u32 v6, v6, v0
+; CGP-NEXT:    v_mul_lo_u32 v11, v7, 0
+; CGP-NEXT:    v_mul_hi_u32 v7, v7, v1
+; CGP-NEXT:    v_add_i32_e32 v8, vcc, v8, v10
+; CGP-NEXT:    v_add_i32_e32 v9, vcc, v9, v11
+; CGP-NEXT:    v_add_i32_e32 v6, vcc, v8, v6
+; CGP-NEXT:    v_add_i32_e32 v7, vcc, v9, v7
+; CGP-NEXT:    v_mul_lo_u32 v6, v6, v2
+; CGP-NEXT:    v_mul_lo_u32 v7, v7, v3
+; CGP-NEXT:    v_sub_i32_e32 v8, vcc, v0, v6
+; CGP-NEXT:    v_sub_i32_e32 v9, vcc, v1, v7
+; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v8, v2
+; CGP-NEXT:    v_add_i32_e64 v10, s[4:5], v8, v2
+; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v0, v6
+; CGP-NEXT:    v_sub_i32_e64 v0, s[6:7], v8, v2
+; CGP-NEXT:    v_cmp_ge_u32_e64 s[6:7], v9, v3
+; CGP-NEXT:    v_add_i32_e64 v2, s[8:9], v9, v3
+; CGP-NEXT:    v_cmp_ge_u32_e64 s[8:9], v1, v7
+; CGP-NEXT:    v_sub_i32_e64 v1, s[10:11], v9, v3
+; CGP-NEXT:    s_and_b64 vcc, vcc, s[4:5]
+; CGP-NEXT:    v_cndmask_b32_e32 v0, v8, v0, vcc
+; CGP-NEXT:    s_and_b64 vcc, s[6:7], s[8:9]
+; CGP-NEXT:    v_cndmask_b32_e32 v1, v9, v1, vcc
+; CGP-NEXT:    v_cndmask_b32_e64 v0, v10, v0, s[4:5]
+; CGP-NEXT:    v_cndmask_b32_e64 v1, v2, v1, s[8:9]
+; CGP-NEXT:    v_xor_b32_e32 v0, v0, v4
+; CGP-NEXT:    v_xor_b32_e32 v1, v1, v5
+; CGP-NEXT:    v_sub_i32_e32 v0, vcc, v0, v4
+; CGP-NEXT:    v_sub_i32_e32 v1, vcc, v1, v5
+; CGP-NEXT:    s_setpc_b64 s[30:31]
+  %shl.y = shl <2 x i32> <i32 4096, i32 4096>, %y
+  %r = srem <2 x i32> %x, %shl.y
+  ret <2 x i32> %r
+}
+
+define i32 @v_srem_i32_24bit(i32 %num, i32 %den) {
+; GISEL-LABEL: v_srem_i32_24bit:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-NEXT:    s_mov_b32 s4, 0xffffff
+; GISEL-NEXT:    v_and_b32_e32 v0, s4, v0
+; GISEL-NEXT:    v_and_b32_e32 v1, s4, v1
+; GISEL-NEXT:    v_ashrrev_i32_e32 v2, 31, v0
+; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
+; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v1, v2
+; GISEL-NEXT:    v_xor_b32_e32 v0, v0, v2
+; GISEL-NEXT:    v_xor_b32_e32 v1, v1, v2
+; GISEL-NEXT:    v_cvt_f32_u32_e32 v3, v1
+; GISEL-NEXT:    v_rcp_iflag_f32_e32 v3, v3
+; GISEL-NEXT:    v_mul_f32_e32 v3, 0x4f800000, v3
+; GISEL-NEXT:    v_cvt_u32_f32_e32 v3, v3
+; GISEL-NEXT:    v_mul_lo_u32 v4, v3, v1
+; GISEL-NEXT:    v_mul_hi_u32 v5, v3, v1
+; GISEL-NEXT:    v_sub_i32_e32 v6, vcc, 0, v4
+; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v5
+; GISEL-NEXT:    v_cndmask_b32_e32 v4, v4, v6, vcc
+; GISEL-NEXT:    v_mul_hi_u32 v4, v4, v3
+; GISEL-NEXT:    v_add_i32_e64 v5, s[4:5], v3, v4
+; GISEL-NEXT:    v_sub_i32_e64 v3, s[4:5], v3, v4
+; GISEL-NEXT:    v_cndmask_b32_e32 v3, v3, v5, vcc
+; GISEL-NEXT:    v_mul_hi_u32 v3, v3, v0
+; GISEL-NEXT:    v_mul_lo_u32 v3, v3, v1
+; GISEL-NEXT:    v_sub_i32_e32 v4, vcc, v0, v3
+; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v4, v1
+; GISEL-NEXT:    v_add_i32_e64 v5, s[4:5], v4, v1
+; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v0, v3
+; GISEL-NEXT:    v_sub_i32_e64 v0, s[6:7], v4, v1
+; GISEL-NEXT:    s_and_b64 vcc, vcc, s[4:5]
+; GISEL-NEXT:    v_cndmask_b32_e32 v0, v4, v0, vcc
+; GISEL-NEXT:    v_cndmask_b32_e64 v0, v5, v0, s[4:5]
+; GISEL-NEXT:    v_xor_b32_e32 v0, v0, v2
+; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v2
+; GISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; CGP-LABEL: v_srem_i32_24bit:
+; CGP:       ; %bb.0:
+; CGP-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CGP-NEXT:    s_mov_b32 s4, 0xffffff
+; CGP-NEXT:    v_and_b32_e32 v0, s4, v0
+; CGP-NEXT:    v_and_b32_e32 v1, s4, v1
+; CGP-NEXT:    v_cvt_f32_u32_e32 v2, v1
+; CGP-NEXT:    v_mul_lo_u32 v3, 0, v1
+; CGP-NEXT:    v_mul_lo_u32 v4, 0, v0
+; CGP-NEXT:    v_rcp_f32_e32 v2, v2
+; CGP-NEXT:    v_mul_f32_e32 v2, 0x4f800000, v2
+; CGP-NEXT:    v_cvt_u32_f32_e32 v2, v2
+; CGP-NEXT:    v_mul_lo_u32 v5, v2, v1
+; CGP-NEXT:    v_mul_lo_u32 v6, v2, 0
+; CGP-NEXT:    v_mul_hi_u32 v7, v2, v1
+; CGP-NEXT:    v_add_i32_e32 v3, vcc, v3, v6
+; CGP-NEXT:    v_sub_i32_e32 v8, vcc, 0, v5
+; CGP-NEXT:    v_add_i32_e32 v3, vcc, v3, v7
+; CGP-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v3
+; CGP-NEXT:    v_cndmask_b32_e32 v3, v5, v8, vcc
+; CGP-NEXT:    v_mul_lo_u32 v5, v3, 0
+; CGP-NEXT:    v_mul_hi_u32 v3, v3, v2
+; CGP-NEXT:    v_add_i32_e64 v5, s[4:5], v6, v5
+; CGP-NEXT:    v_add_i32_e64 v3, s[4:5], v5, v3
+; CGP-NEXT:    v_add_i32_e64 v5, s[4:5], v2, v3
+; CGP-NEXT:    v_sub_i32_e64 v2, s[4:5], v2, v3
+; CGP-NEXT:    v_cndmask_b32_e32 v2, v2, v5, vcc
+; CGP-NEXT:    v_mul_lo_u32 v3, v2, 0
+; CGP-NEXT:    v_mul_hi_u32 v2, v2, v0
+; CGP-NEXT:    v_add_i32_e32 v3, vcc, v4, v3
+; CGP-NEXT:    v_add_i32_e32 v2, vcc, v3, v2
+; CGP-NEXT:    v_mul_lo_u32 v2, v2, v1
+; CGP-NEXT:    v_sub_i32_e32 v3, vcc, v0, v2
+; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v3, v1
+; CGP-NEXT:    v_add_i32_e64 v4, s[4:5], v3, v1
+; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v0, v2
+; CGP-NEXT:    v_sub_i32_e64 v0, s[6:7], v3, v1
+; CGP-NEXT:    s_and_b64 vcc, vcc, s[4:5]
+; CGP-NEXT:    v_cndmask_b32_e32 v0, v3, v0, vcc
+; CGP-NEXT:    v_cndmask_b32_e64 v0, v4, v0, s[4:5]
+; CGP-NEXT:    s_setpc_b64 s[30:31]
+  %num.mask = and i32 %num, 16777215
+  %den.mask = and i32 %den, 16777215
+  %result = srem i32 %num.mask, %den.mask
+  ret i32 %result
+}
+
+define <2 x i32> @v_srem_v2i32_24bit(<2 x i32> %num, <2 x i32> %den) {
+; GISEL-LABEL: v_srem_v2i32_24bit:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-NEXT:    s_mov_b32 s4, 0xffffff
+; GISEL-NEXT:    v_and_b32_e32 v0, s4, v0
+; GISEL-NEXT:    v_and_b32_e32 v1, s4, v1
+; GISEL-NEXT:    v_and_b32_e32 v2, s4, v2
+; GISEL-NEXT:    v_and_b32_e32 v3, s4, v3
+; GISEL-NEXT:    v_ashrrev_i32_e32 v4, 31, v0
+; GISEL-NEXT:    v_ashrrev_i32_e32 v5, 31, v1
+; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v4
+; GISEL-NEXT:    v_add_i32_e32 v2, vcc, v2, v4
+; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v1, v5
+; GISEL-NEXT:    v_add_i32_e32 v3, vcc, v3, v5
+; GISEL-NEXT:    v_xor_b32_e32 v0, v0, v4
+; GISEL-NEXT:    v_xor_b32_e32 v2, v2, v4
+; GISEL-NEXT:    v_xor_b32_e32 v1, v1, v5
+; GISEL-NEXT:    v_xor_b32_e32 v3, v3, v5
+; GISEL-NEXT:    v_cvt_f32_u32_e32 v6, v2
+; GISEL-NEXT:    v_cvt_f32_u32_e32 v7, v3
+; GISEL-NEXT:    v_rcp_iflag_f32_e32 v6, v6
+; GISEL-NEXT:    v_rcp_iflag_f32_e32 v7, v7
+; GISEL-NEXT:    v_mul_f32_e32 v6, 0x4f800000, v6
+; GISEL-NEXT:    v_mul_f32_e32 v7, 0x4f800000, v7
+; GISEL-NEXT:    v_cvt_u32_f32_e32 v6, v6
+; GISEL-NEXT:    v_cvt_u32_f32_e32 v7, v7
+; GISEL-NEXT:    v_mul_lo_u32 v8, v6, v2
+; GISEL-NEXT:    v_mul_hi_u32 v9, v6, v2
+; GISEL-NEXT:    v_mul_lo_u32 v10, v7, v3
+; GISEL-NEXT:    v_mul_hi_u32 v11, v7, v3
+; GISEL-NEXT:    v_sub_i32_e32 v12, vcc, 0, v8
+; GISEL-NEXT:    v_sub_i32_e32 v13, vcc, 0, v10
+; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v9
+; GISEL-NEXT:    v_cndmask_b32_e32 v8, v8, v12, vcc
+; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v11
+; GISEL-NEXT:    v_cndmask_b32_e64 v9, v10, v13, s[4:5]
+; GISEL-NEXT:    v_mul_hi_u32 v8, v8, v6
+; GISEL-NEXT:    v_mul_hi_u32 v9, v9, v7
+; GISEL-NEXT:    v_add_i32_e64 v10, s[6:7], v6, v8
+; GISEL-NEXT:    v_sub_i32_e64 v6, s[6:7], v6, v8
+; GISEL-NEXT:    v_add_i32_e64 v8, s[6:7], v7, v9
+; GISEL-NEXT:    v_sub_i32_e64 v7, s[6:7], v7, v9
+; GISEL-NEXT:    v_cndmask_b32_e32 v6, v6, v10, vcc
+; GISEL-NEXT:    v_cndmask_b32_e64 v7, v7, v8, s[4:5]
+; GISEL-NEXT:    v_mul_hi_u32 v6, v6, v0
+; GISEL-NEXT:    v_mul_hi_u32 v7, v7, v1
+; GISEL-NEXT:    v_mul_lo_u32 v6, v6, v2
+; GISEL-NEXT:    v_mul_lo_u32 v7, v7, v3
+; GISEL-NEXT:    v_sub_i32_e32 v8, vcc, v0, v6
+; GISEL-NEXT:    v_sub_i32_e32 v9, vcc, v1, v7
+; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v8, v2
+; GISEL-NEXT:    v_add_i32_e64 v10, s[4:5], v8, v2
+; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v0, v6
+; GISEL-NEXT:    v_sub_i32_e64 v0, s[6:7], v8, v2
+; GISEL-NEXT:    v_cmp_ge_u32_e64 s[6:7], v9, v3
+; GISEL-NEXT:    v_add_i32_e64 v2, s[8:9], v9, v3
+; GISEL-NEXT:    v_cmp_ge_u32_e64 s[8:9], v1, v7
+; GISEL-NEXT:    v_sub_i32_e64 v1, s[10:11], v9, v3
+; GISEL-NEXT:    s_and_b64 vcc, vcc, s[4:5]
+; GISEL-NEXT:    v_cndmask_b32_e32 v0, v8, v0, vcc
+; GISEL-NEXT:    s_and_b64 vcc, s[6:7], s[8:9]
+; GISEL-NEXT:    v_cndmask_b32_e32 v1, v9, v1, vcc
+; GISEL-NEXT:    v_cndmask_b32_e64 v0, v10, v0, s[4:5]
+; GISEL-NEXT:    v_cndmask_b32_e64 v1, v2, v1, s[8:9]
+; GISEL-NEXT:    v_xor_b32_e32 v0, v0, v4
+; GISEL-NEXT:    v_xor_b32_e32 v1, v1, v5
+; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v4
+; GISEL-NEXT:    v_sub_i32_e32 v1, vcc, v1, v5
+; GISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; CGP-LABEL: v_srem_v2i32_24bit:
+; CGP:       ; %bb.0:
+; CGP-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CGP-NEXT:    s_mov_b32 s4, 0xffffff
+; CGP-NEXT:    v_and_b32_e32 v0, s4, v0
+; CGP-NEXT:    v_and_b32_e32 v1, s4, v1
+; CGP-NEXT:    v_and_b32_e32 v2, s4, v2
+; CGP-NEXT:    v_and_b32_e32 v3, s4, v3
+; CGP-NEXT:    v_cvt_f32_u32_e32 v4, v2
+; CGP-NEXT:    v_mul_lo_u32 v5, 0, v2
+; CGP-NEXT:    v_mul_lo_u32 v6, 0, v0
+; CGP-NEXT:    v_cvt_f32_u32_e32 v7, v3
+; CGP-NEXT:    v_mul_lo_u32 v8, 0, v3
+; CGP-NEXT:    v_mul_lo_u32 v9, 0, v1
+; CGP-NEXT:    v_rcp_f32_e32 v4, v4
+; CGP-NEXT:    v_rcp_f32_e32 v7, v7
+; CGP-NEXT:    v_mul_f32_e32 v4, 0x4f800000, v4
+; CGP-NEXT:    v_mul_f32_e32 v7, 0x4f800000, v7
+; CGP-NEXT:    v_cvt_u32_f32_e32 v4, v4
+; CGP-NEXT:    v_cvt_u32_f32_e32 v7, v7
+; CGP-NEXT:    v_mul_lo_u32 v10, v4, v2
+; CGP-NEXT:    v_mul_lo_u32 v11, v4, 0
+; CGP-NEXT:    v_mul_hi_u32 v12, v4, v2
+; CGP-NEXT:    v_mul_lo_u32 v13, v7, v3
+; CGP-NEXT:    v_mul_lo_u32 v14, v7, 0
+; CGP-NEXT:    v_mul_hi_u32 v15, v7, v3
+; CGP-NEXT:    v_add_i32_e32 v5, vcc, v5, v11
+; CGP-NEXT:    v_sub_i32_e32 v16, vcc, 0, v10
+; CGP-NEXT:    v_add_i32_e32 v8, vcc, v8, v14
+; CGP-NEXT:    v_sub_i32_e32 v17, vcc, 0, v13
+; CGP-NEXT:    v_add_i32_e32 v5, vcc, v5, v12
+; CGP-NEXT:    v_add_i32_e32 v8, vcc, v8, v15
+; CGP-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v5
+; CGP-NEXT:    v_cndmask_b32_e32 v5, v10, v16, vcc
+; CGP-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v8
+; CGP-NEXT:    v_cndmask_b32_e64 v8, v13, v17, s[4:5]
+; CGP-NEXT:    v_mul_lo_u32 v10, v5, 0
+; CGP-NEXT:    v_mul_hi_u32 v5, v5, v4
+; CGP-NEXT:    v_mul_lo_u32 v12, v8, 0
+; CGP-NEXT:    v_mul_hi_u32 v8, v8, v7
+; CGP-NEXT:    v_add_i32_e64 v10, s[6:7], v11, v10
+; CGP-NEXT:    v_add_i32_e64 v11, s[6:7], v14, v12
+; CGP-NEXT:    v_add_i32_e64 v5, s[6:7], v10, v5
+; CGP-NEXT:    v_add_i32_e64 v8, s[6:7], v11, v8
+; CGP-NEXT:    v_add_i32_e64 v10, s[6:7], v4, v5
+; CGP-NEXT:    v_sub_i32_e64 v4, s[6:7], v4, v5
+; CGP-NEXT:    v_add_i32_e64 v5, s[6:7], v7, v8
+; CGP-NEXT:    v_sub_i32_e64 v7, s[6:7], v7, v8
+; CGP-NEXT:    v_cndmask_b32_e32 v4, v4, v10, vcc
+; CGP-NEXT:    v_cndmask_b32_e64 v5, v7, v5, s[4:5]
+; CGP-NEXT:    v_mul_lo_u32 v7, v4, 0
+; CGP-NEXT:    v_mul_hi_u32 v4, v4, v0
+; CGP-NEXT:    v_mul_lo_u32 v8, v5, 0
+; CGP-NEXT:    v_mul_hi_u32 v5, v5, v1
+; CGP-NEXT:    v_add_i32_e32 v6, vcc, v6, v7
+; CGP-NEXT:    v_add_i32_e32 v7, vcc, v9, v8
+; CGP-NEXT:    v_add_i32_e32 v4, vcc, v6, v4
+; CGP-NEXT:    v_add_i32_e32 v5, vcc, v7, v5
+; CGP-NEXT:    v_mul_lo_u32 v4, v4, v2
+; CGP-NEXT:    v_mul_lo_u32 v5, v5, v3
+; CGP-NEXT:    v_sub_i32_e32 v6, vcc, v0, v4
+; CGP-NEXT:    v_sub_i32_e32 v7, vcc, v1, v5
+; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v6, v2
+; CGP-NEXT:    v_add_i32_e64 v8, s[4:5], v6, v2
+; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v0, v4
+; CGP-NEXT:    v_sub_i32_e64 v0, s[6:7], v6, v2
+; CGP-NEXT:    v_cmp_ge_u32_e64 s[6:7], v7, v3
+; CGP-NEXT:    v_add_i32_e64 v2, s[8:9], v7, v3
+; CGP-NEXT:    v_cmp_ge_u32_e64 s[8:9], v1, v5
+; CGP-NEXT:    v_sub_i32_e64 v1, s[10:11], v7, v3
+; CGP-NEXT:    s_and_b64 vcc, vcc, s[4:5]
+; CGP-NEXT:    v_cndmask_b32_e32 v0, v6, v0, vcc
+; CGP-NEXT:    s_and_b64 vcc, s[6:7], s[8:9]
+; CGP-NEXT:    v_cndmask_b32_e32 v1, v7, v1, vcc
+; CGP-NEXT:    v_cndmask_b32_e64 v0, v8, v0, s[4:5]
+; CGP-NEXT:    v_cndmask_b32_e64 v1, v2, v1, s[8:9]
+; CGP-NEXT:    s_setpc_b64 s[30:31]
+  %num.mask = and <2 x i32> %num, <i32 16777215, i32 16777215>
+  %den.mask = and <2 x i32> %den, <i32 16777215, i32 16777215>
+  %result = srem <2 x i32> %num.mask, %den.mask
+  ret <2 x i32> %result
+}


        


More information about the llvm-commits mailing list