[llvm] a448670 - AMDGPU/GlobalISel: Legalize 64-bit G_SDIV/G_SREM

Matt Arsenault via llvm-commits llvm-commits at lists.llvm.org
Wed Jun 24 08:47:45 PDT 2020


Author: Matt Arsenault
Date: 2020-06-24T11:39:45-04:00
New Revision: a44867075201b6aa3355f63445d81d66ef9daa50

URL: https://github.com/llvm/llvm-project/commit/a44867075201b6aa3355f63445d81d66ef9daa50
DIFF: https://github.com/llvm/llvm-project/commit/a44867075201b6aa3355f63445d81d66ef9daa50.diff

LOG: AMDGPU/GlobalISel: Legalize 64-bit G_SDIV/G_SREM

Now all the divisions should be complete, although we should fix
emitting the entire common part for div/rem when you use both.

Added: 
    llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll
    llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll

Modified: 
    llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
    llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h
    llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sdiv.mir
    llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-srem.mir

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index 91ef33ad201b..c7d13c6d559e 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -2678,15 +2678,14 @@ static std::pair<Register, Register> emitReciprocalU64(MachineIRBuilder &B,
   return {ResultLo.getReg(0), ResultHi.getReg(0)};
 }
 
-bool AMDGPULegalizerInfo::legalizeUDIV_UREM64(MachineInstr &MI,
-                                              MachineRegisterInfo &MRI,
-                                              MachineIRBuilder &B) const {
-  const bool IsDiv = MI.getOpcode() == TargetOpcode::G_UDIV;
+void AMDGPULegalizerInfo::legalizeUDIV_UREM64Impl(MachineIRBuilder &B,
+                                                  Register DstReg,
+                                                  Register Numer,
+                                                  Register Denom,
+                                                  bool IsDiv) const {
   const LLT S32 = LLT::scalar(32);
   const LLT S64 = LLT::scalar(64);
   const LLT S1 = LLT::scalar(1);
-  Register Numer = MI.getOperand(1).getReg();
-  Register Denom = MI.getOperand(2).getReg();
   Register RcpLo, RcpHi;
 
   std::tie(RcpLo, RcpHi) = emitReciprocalU64(B, Denom);
@@ -2782,75 +2781,84 @@ bool AMDGPULegalizerInfo::legalizeUDIV_UREM64(MachineInstr &MI,
   if (IsDiv) {
     auto Sel1 = B.buildSelect(
         S64, B.buildICmp(CmpInst::ICMP_NE, S1, C6, Zero32), Add4, Add3);
-    B.buildSelect(MI.getOperand(0),
+    B.buildSelect(DstReg,
                   B.buildICmp(CmpInst::ICMP_NE, S1, C3, Zero32), Sel1, MulHi3);
   } else {
     auto Sel2 = B.buildSelect(
         S64, B.buildICmp(CmpInst::ICMP_NE, S1, C6, Zero32), Sub3, Sub2);
-    B.buildSelect(MI.getOperand(0),
+    B.buildSelect(DstReg,
                   B.buildICmp(CmpInst::ICMP_NE, S1, C3, Zero32), Sel2, Sub1);
   }
-
-  MI.eraseFromParent();
-  return true;
 }
 
 bool AMDGPULegalizerInfo::legalizeUDIV_UREM(MachineInstr &MI,
                                             MachineRegisterInfo &MRI,
                                             MachineIRBuilder &B) const {
-  LLT Ty = MRI.getType(MI.getOperand(0).getReg());
-  if (Ty == LLT::scalar(32))
-    return legalizeUDIV_UREM32(MI, MRI, B);
-  if (Ty == LLT::scalar(64))
-    return legalizeUDIV_UREM64(MI, MRI, B);
-  return false;
+  const LLT S64 = LLT::scalar(64);
+  const LLT S32 = LLT::scalar(32);
+  const bool IsDiv = MI.getOpcode() == AMDGPU::G_UDIV;
+  Register DstReg = MI.getOperand(0).getReg();
+  Register Num = MI.getOperand(1).getReg();
+  Register Den = MI.getOperand(2).getReg();
+  LLT Ty = MRI.getType(DstReg);
+
+  if (Ty == S32)
+    legalizeUDIV_UREM32Impl(B, DstReg, Num, Den, IsDiv);
+  else if (Ty == S64)
+    legalizeUDIV_UREM64Impl(B, DstReg, Num, Den, IsDiv);
+  else
+    return false;
+
+  MI.eraseFromParent();
+  return true;
+
 }
 
-bool AMDGPULegalizerInfo::legalizeSDIV_SREM32(MachineInstr &MI,
-                                              MachineRegisterInfo &MRI,
-                                              MachineIRBuilder &B) const {
+bool AMDGPULegalizerInfo::legalizeSDIV_SREM(MachineInstr &MI,
+                                            MachineRegisterInfo &MRI,
+                                            MachineIRBuilder &B) const {
+  const LLT S64 = LLT::scalar(64);
   const LLT S32 = LLT::scalar(32);
 
-  const bool IsDiv = MI.getOpcode() == AMDGPU::G_SDIV;
   Register DstReg = MI.getOperand(0).getReg();
+  const LLT Ty = MRI.getType(DstReg);
+  if (Ty != S32 && Ty != S64)
+    return false;
+
+  const bool IsDiv = MI.getOpcode() == AMDGPU::G_SDIV;
+
   Register LHS = MI.getOperand(1).getReg();
   Register RHS = MI.getOperand(2).getReg();
 
-  auto ThirtyOne = B.buildConstant(S32, 31);
-  auto LHSign = B.buildAShr(S32, LHS, ThirtyOne);
-  auto RHSign = B.buildAShr(S32, RHS, ThirtyOne);
+  auto SignBitOffset = B.buildConstant(S32, Ty.getSizeInBits() - 1);
+  auto LHSign = B.buildAShr(Ty, LHS, SignBitOffset);
+  auto RHSign = B.buildAShr(Ty, RHS, SignBitOffset);
 
-  LHS = B.buildAdd(S32, LHS, LHSign).getReg(0);
-  RHS = B.buildAdd(S32, RHS, RHSign).getReg(0);
+  LHS = B.buildAdd(Ty, LHS, LHSign).getReg(0);
+  RHS = B.buildAdd(Ty, RHS, RHSign).getReg(0);
 
-  LHS = B.buildXor(S32, LHS, LHSign).getReg(0);
-  RHS = B.buildXor(S32, RHS, RHSign).getReg(0);
+  LHS = B.buildXor(Ty, LHS, LHSign).getReg(0);
+  RHS = B.buildXor(Ty, RHS, RHSign).getReg(0);
 
-  Register UDivRem = MRI.createGenericVirtualRegister(S32);
-  legalizeUDIV_UREM32Impl(B, UDivRem, LHS, RHS, IsDiv);
+  Register UDivRem = MRI.createGenericVirtualRegister(Ty);
+  if (Ty == S32)
+    legalizeUDIV_UREM32Impl(B, UDivRem, LHS, RHS, IsDiv);
+  else
+    legalizeUDIV_UREM64Impl(B, UDivRem, LHS, RHS, IsDiv);
 
-  if (IsDiv) {
-    auto DSign = B.buildXor(S32, LHSign, RHSign);
-    UDivRem = B.buildXor(S32, UDivRem, DSign).getReg(0);
-    B.buildSub(DstReg, UDivRem, DSign);
-  } else {
-    auto RSign = LHSign; // Remainder sign is the same as LHS
-    UDivRem = B.buildXor(S32, UDivRem, RSign).getReg(0);
-    B.buildSub(DstReg, UDivRem, RSign);
-  }
+  Register Sign;
+  if (IsDiv)
+    Sign = B.buildXor(Ty, LHSign, RHSign).getReg(0);
+  else
+    Sign = LHSign.getReg(0); // Remainder sign is the same as LHS
+
+  UDivRem = B.buildXor(Ty, UDivRem, Sign).getReg(0);
+  B.buildSub(DstReg, UDivRem, Sign);
 
   MI.eraseFromParent();
   return true;
 }
 
-bool AMDGPULegalizerInfo::legalizeSDIV_SREM(MachineInstr &MI,
-                                            MachineRegisterInfo &MRI,
-                                            MachineIRBuilder &B) const {
-  if (MRI.getType(MI.getOperand(0).getReg()) == LLT::scalar(32))
-    return legalizeSDIV_SREM32(MI, MRI, B);
-  return false;
-}
-
 bool AMDGPULegalizerInfo::legalizeFastUnsafeFDIV(MachineInstr &MI,
                                                  MachineRegisterInfo &MRI,
                                                  MachineIRBuilder &B) const {

diff  --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h
index 40367df0e315..ce32bbf76b34 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h
@@ -111,6 +111,10 @@ class AMDGPULegalizerInfo : public LegalizerInfo {
   bool legalizeSDIV_SREM32(MachineInstr &MI, MachineRegisterInfo &MRI,
                            MachineIRBuilder &B) const;
 
+  void legalizeUDIV_UREM64Impl(MachineIRBuilder &B,
+                               Register DstReg, Register Numer, Register Denom,
+                               bool IsDiv) const;
+
   bool legalizeUDIV_UREM64(MachineInstr &MI, MachineRegisterInfo &MRI,
                            MachineIRBuilder &B) const;
   bool legalizeSDIV_SREM(MachineInstr &MI, MachineRegisterInfo &MRI,

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sdiv.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sdiv.mir
index 8a569c68b1f3..1e821b69b1ab 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sdiv.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sdiv.mir
@@ -384,18 +384,528 @@ body: |
     ; GFX6-LABEL: name: test_sdiv_s64
     ; GFX6: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
     ; GFX6: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
-    ; GFX6: [[SDIV:%[0-9]+]]:_(s64) = G_SDIV [[COPY]], [[COPY1]]
-    ; GFX6: $vgpr0_vgpr1 = COPY [[SDIV]](s64)
+    ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 63
+    ; GFX6: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[COPY]], [[C]](s32)
+    ; GFX6: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[COPY1]], [[C]](s32)
+    ; GFX6: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64)
+    ; GFX6: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64)
+    ; GFX6: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV]], [[UV2]]
+    ; GFX6: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV1]], [[UV3]], [[UADDO1]]
+    ; GFX6: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32)
+    ; GFX6: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64)
+    ; GFX6: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR1]](s64)
+    ; GFX6: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UV4]], [[UV6]]
+    ; GFX6: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UV5]], [[UV7]], [[UADDO3]]
+    ; GFX6: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO2]](s32), [[UADDE2]](s32)
+    ; GFX6: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[MV]], [[ASHR]]
+    ; GFX6: [[XOR1:%[0-9]+]]:_(s64) = G_XOR [[MV1]], [[ASHR1]]
+    ; GFX6: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64)
+    ; GFX6: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV8]](s32)
+    ; GFX6: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV9]](s32)
+    ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000
+    ; GFX6: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP1]], [[C1]]
+    ; GFX6: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UITOFP]]
+    ; GFX6: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[FADD]](s32)
+    ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x43EFFFFF80000000
+    ; GFX6: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]]
+    ; GFX6: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000
+    ; GFX6: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FMUL1]], [[C3]]
+    ; GFX6: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL2]]
+    ; GFX6: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000
+    ; GFX6: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC]], [[C4]]
+    ; GFX6: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[FMUL1]]
+    ; GFX6: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FADD1]](s32)
+    ; GFX6: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC]](s32)
+    ; GFX6: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; GFX6: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C5]](s64)
+    ; GFX6: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64)
+    ; GFX6: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV10]], [[UV12]]
+    ; GFX6: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV11]], [[UV13]], [[USUBO1]]
+    ; GFX6: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI]]
+    ; GFX6: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]]
+    ; GFX6: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]]
+    ; GFX6: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[FPTOUI]]
+    ; GFX6: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]]
+    ; GFX6: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]]
+    ; GFX6: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[MUL]]
+    ; GFX6: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]]
+    ; GFX6: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]]
+    ; GFX6: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]]
+    ; GFX6: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1)
+    ; GFX6: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH1]]
+    ; GFX6: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1)
+    ; GFX6: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]]
+    ; GFX6: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]]
+    ; GFX6: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL]]
+    ; GFX6: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]]
+    ; GFX6: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]]
+    ; GFX6: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1)
+    ; GFX6: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UMULH3]]
+    ; GFX6: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO11]](s1)
+    ; GFX6: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]]
+    ; GFX6: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[ADD2]]
+    ; GFX6: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1)
+    ; GFX6: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]]
+    ; GFX6: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]]
+    ; GFX6: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[ADD4]]
+    ; GFX6: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO12]]
+    ; GFX6: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO15]]
+    ; GFX6: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[ADD5]]
+    ; GFX6: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO14]]
+    ; GFX6: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO14]]
+    ; GFX6: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE4]]
+    ; GFX6: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO14]]
+    ; GFX6: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]]
+    ; GFX6: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ADD7]], [[UMULH5]]
+    ; GFX6: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[MUL6]]
+    ; GFX6: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO14]], [[ADD8]]
+    ; GFX6: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[MUL6]]
+    ; GFX6: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]]
+    ; GFX6: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1)
+    ; GFX6: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH6]]
+    ; GFX6: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1)
+    ; GFX6: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]]
+    ; GFX6: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[ADD8]]
+    ; GFX6: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[MUL6]]
+    ; GFX6: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[ADD8]]
+    ; GFX6: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]]
+    ; GFX6: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1)
+    ; GFX6: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UMULH8]]
+    ; GFX6: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO23]](s1)
+    ; GFX6: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]]
+    ; GFX6: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[UADDO22]], [[ADD9]]
+    ; GFX6: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1)
+    ; GFX6: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[ADD10]], [[ZEXT9]]
+    ; GFX6: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[ADD8]]
+    ; GFX6: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[ADD11]]
+    ; GFX6: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; GFX6: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO14]], [[UADDO24]]
+    ; GFX6: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[ADD6]], [[ADD12]], [[UADDO15]]
+    ; GFX6: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDE6]], [[C6]], [[UADDO27]]
+    ; GFX6: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64)
+    ; GFX6: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64)
+    ; GFX6: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDO26]]
+    ; GFX6: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV16]], [[UADDE8]]
+    ; GFX6: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDO26]]
+    ; GFX6: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]]
+    ; GFX6: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1)
+    ; GFX6: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH10]]
+    ; GFX6: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1)
+    ; GFX6: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]]
+    ; GFX6: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDE8]]
+    ; GFX6: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDO26]]
+    ; GFX6: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDE8]]
+    ; GFX6: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]]
+    ; GFX6: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1)
+    ; GFX6: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UMULH12]]
+    ; GFX6: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO35]](s1)
+    ; GFX6: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]]
+    ; GFX6: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[ADD13]]
+    ; GFX6: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO37]](s1)
+    ; GFX6: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[ADD14]], [[ZEXT14]]
+    ; GFX6: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDE8]]
+    ; GFX6: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[ADD15]]
+    ; GFX6: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO36]](s32), [[ADD16]](s32)
+    ; GFX6: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64)
+    ; GFX6: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV18]], [[UADDO36]]
+    ; GFX6: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV19]], [[UADDO36]]
+    ; GFX6: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV18]], [[ADD16]]
+    ; GFX6: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV18]], [[UADDO36]]
+    ; GFX6: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]]
+    ; GFX6: [[ADD18:%[0-9]+]]:_(s32) = G_ADD [[ADD17]], [[UMULH14]]
+    ; GFX6: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV14]], [[MUL15]]
+    ; GFX6: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV15]], [[ADD18]], [[USUBO3]]
+    ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV15]], [[ADD18]]
+    ; GFX6: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64)
+    ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV21]]
+    ; GFX6: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1)
+    ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV20]]
+    ; GFX6: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[ICMP1]](s1)
+    ; GFX6: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV21]]
+    ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SEXT1]], [[SEXT]]
+    ; GFX6: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV20]]
+    ; GFX6: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV21]], [[USUBO3]]
+    ; GFX6: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C6]], [[USUBO5]]
+    ; GFX6: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; GFX6: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64)
+    ; GFX6: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[UADDO36]], [[UV22]]
+    ; GFX6: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[ADD16]], [[UV23]], [[UADDO39]]
+    ; GFX6: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO38]](s32), [[UADDE10]](s32)
+    ; GFX6: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV21]]
+    ; GFX6: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1)
+    ; GFX6: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV20]]
+    ; GFX6: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT [[ICMP4]](s1)
+    ; GFX6: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV21]]
+    ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]]
+    ; GFX6: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64)
+    ; GFX6: [[UADDO40:%[0-9]+]]:_(s32), [[UADDO41:%[0-9]+]]:_(s1) = G_UADDO [[UADDO38]], [[UV24]]
+    ; GFX6: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[UADDE10]], [[UV25]], [[UADDO41]]
+    ; GFX6: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO40]](s32), [[UADDE12]](s32)
+    ; GFX6: [[USUBO6:%[0-9]+]]:_(s32), [[USUBO7:%[0-9]+]]:_(s1) = G_USUBO [[USUBO4]], [[UV20]]
+    ; GFX6: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[UV21]], [[USUBO5]]
+    ; GFX6: [[USUBE10:%[0-9]+]]:_(s32), [[USUBE11:%[0-9]+]]:_(s1) = G_USUBE [[USUBE8]], [[C6]], [[USUBO7]]
+    ; GFX6: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C6]]
+    ; GFX6: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV4]], [[MV3]]
+    ; GFX6: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C6]]
+    ; GFX6: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV2]]
+    ; GFX6: [[XOR2:%[0-9]+]]:_(s64) = G_XOR [[ASHR]], [[ASHR1]]
+    ; GFX6: [[XOR3:%[0-9]+]]:_(s64) = G_XOR [[SELECT3]], [[XOR2]]
+    ; GFX6: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR3]](s64)
+    ; GFX6: [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR2]](s64)
+    ; GFX6: [[USUBO8:%[0-9]+]]:_(s32), [[USUBO9:%[0-9]+]]:_(s1) = G_USUBO [[UV26]], [[UV28]]
+    ; GFX6: [[USUBE12:%[0-9]+]]:_(s32), [[USUBE13:%[0-9]+]]:_(s1) = G_USUBE [[UV27]], [[UV29]], [[USUBO9]]
+    ; GFX6: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO8]](s32), [[USUBE12]](s32)
+    ; GFX6: $vgpr0_vgpr1 = COPY [[MV5]](s64)
     ; GFX8-LABEL: name: test_sdiv_s64
     ; GFX8: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
     ; GFX8: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
-    ; GFX8: [[SDIV:%[0-9]+]]:_(s64) = G_SDIV [[COPY]], [[COPY1]]
-    ; GFX8: $vgpr0_vgpr1 = COPY [[SDIV]](s64)
+    ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 63
+    ; GFX8: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[COPY]], [[C]](s32)
+    ; GFX8: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[COPY1]], [[C]](s32)
+    ; GFX8: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64)
+    ; GFX8: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64)
+    ; GFX8: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV]], [[UV2]]
+    ; GFX8: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV1]], [[UV3]], [[UADDO1]]
+    ; GFX8: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32)
+    ; GFX8: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64)
+    ; GFX8: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR1]](s64)
+    ; GFX8: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UV4]], [[UV6]]
+    ; GFX8: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UV5]], [[UV7]], [[UADDO3]]
+    ; GFX8: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO2]](s32), [[UADDE2]](s32)
+    ; GFX8: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[MV]], [[ASHR]]
+    ; GFX8: [[XOR1:%[0-9]+]]:_(s64) = G_XOR [[MV1]], [[ASHR1]]
+    ; GFX8: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64)
+    ; GFX8: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV8]](s32)
+    ; GFX8: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV9]](s32)
+    ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000
+    ; GFX8: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP1]], [[C1]]
+    ; GFX8: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UITOFP]]
+    ; GFX8: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[FADD]](s32)
+    ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x43EFFFFF80000000
+    ; GFX8: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]]
+    ; GFX8: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000
+    ; GFX8: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FMUL1]], [[C3]]
+    ; GFX8: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL2]]
+    ; GFX8: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000
+    ; GFX8: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC]], [[C4]]
+    ; GFX8: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[FMUL1]]
+    ; GFX8: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FADD1]](s32)
+    ; GFX8: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC]](s32)
+    ; GFX8: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; GFX8: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C5]](s64)
+    ; GFX8: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64)
+    ; GFX8: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV10]], [[UV12]]
+    ; GFX8: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV11]], [[UV13]], [[USUBO1]]
+    ; GFX8: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI]]
+    ; GFX8: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]]
+    ; GFX8: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]]
+    ; GFX8: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[FPTOUI]]
+    ; GFX8: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]]
+    ; GFX8: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]]
+    ; GFX8: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[MUL]]
+    ; GFX8: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]]
+    ; GFX8: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]]
+    ; GFX8: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]]
+    ; GFX8: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1)
+    ; GFX8: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH1]]
+    ; GFX8: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1)
+    ; GFX8: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]]
+    ; GFX8: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]]
+    ; GFX8: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL]]
+    ; GFX8: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]]
+    ; GFX8: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]]
+    ; GFX8: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1)
+    ; GFX8: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UMULH3]]
+    ; GFX8: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO11]](s1)
+    ; GFX8: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]]
+    ; GFX8: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[ADD2]]
+    ; GFX8: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1)
+    ; GFX8: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]]
+    ; GFX8: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]]
+    ; GFX8: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[ADD4]]
+    ; GFX8: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO12]]
+    ; GFX8: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO15]]
+    ; GFX8: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[ADD5]]
+    ; GFX8: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO14]]
+    ; GFX8: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO14]]
+    ; GFX8: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE4]]
+    ; GFX8: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO14]]
+    ; GFX8: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]]
+    ; GFX8: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ADD7]], [[UMULH5]]
+    ; GFX8: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[MUL6]]
+    ; GFX8: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO14]], [[ADD8]]
+    ; GFX8: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[MUL6]]
+    ; GFX8: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]]
+    ; GFX8: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1)
+    ; GFX8: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH6]]
+    ; GFX8: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1)
+    ; GFX8: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]]
+    ; GFX8: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[ADD8]]
+    ; GFX8: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[MUL6]]
+    ; GFX8: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[ADD8]]
+    ; GFX8: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]]
+    ; GFX8: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1)
+    ; GFX8: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UMULH8]]
+    ; GFX8: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO23]](s1)
+    ; GFX8: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]]
+    ; GFX8: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[UADDO22]], [[ADD9]]
+    ; GFX8: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1)
+    ; GFX8: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[ADD10]], [[ZEXT9]]
+    ; GFX8: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[ADD8]]
+    ; GFX8: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[ADD11]]
+    ; GFX8: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; GFX8: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO14]], [[UADDO24]]
+    ; GFX8: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[ADD6]], [[ADD12]], [[UADDO15]]
+    ; GFX8: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDE6]], [[C6]], [[UADDO27]]
+    ; GFX8: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64)
+    ; GFX8: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64)
+    ; GFX8: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDO26]]
+    ; GFX8: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV16]], [[UADDE8]]
+    ; GFX8: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDO26]]
+    ; GFX8: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]]
+    ; GFX8: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1)
+    ; GFX8: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH10]]
+    ; GFX8: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1)
+    ; GFX8: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]]
+    ; GFX8: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDE8]]
+    ; GFX8: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDO26]]
+    ; GFX8: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDE8]]
+    ; GFX8: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]]
+    ; GFX8: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1)
+    ; GFX8: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UMULH12]]
+    ; GFX8: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO35]](s1)
+    ; GFX8: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]]
+    ; GFX8: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[ADD13]]
+    ; GFX8: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO37]](s1)
+    ; GFX8: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[ADD14]], [[ZEXT14]]
+    ; GFX8: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDE8]]
+    ; GFX8: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[ADD15]]
+    ; GFX8: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO36]](s32), [[ADD16]](s32)
+    ; GFX8: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64)
+    ; GFX8: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV18]], [[UADDO36]]
+    ; GFX8: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV19]], [[UADDO36]]
+    ; GFX8: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV18]], [[ADD16]]
+    ; GFX8: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV18]], [[UADDO36]]
+    ; GFX8: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]]
+    ; GFX8: [[ADD18:%[0-9]+]]:_(s32) = G_ADD [[ADD17]], [[UMULH14]]
+    ; GFX8: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV14]], [[MUL15]]
+    ; GFX8: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV15]], [[ADD18]], [[USUBO3]]
+    ; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV15]], [[ADD18]]
+    ; GFX8: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64)
+    ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV21]]
+    ; GFX8: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1)
+    ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV20]]
+    ; GFX8: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[ICMP1]](s1)
+    ; GFX8: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV21]]
+    ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SEXT1]], [[SEXT]]
+    ; GFX8: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV20]]
+    ; GFX8: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV21]], [[USUBO3]]
+    ; GFX8: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C6]], [[USUBO5]]
+    ; GFX8: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; GFX8: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64)
+    ; GFX8: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[UADDO36]], [[UV22]]
+    ; GFX8: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[ADD16]], [[UV23]], [[UADDO39]]
+    ; GFX8: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO38]](s32), [[UADDE10]](s32)
+    ; GFX8: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV21]]
+    ; GFX8: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1)
+    ; GFX8: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV20]]
+    ; GFX8: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT [[ICMP4]](s1)
+    ; GFX8: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV21]]
+    ; GFX8: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]]
+    ; GFX8: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64)
+    ; GFX8: [[UADDO40:%[0-9]+]]:_(s32), [[UADDO41:%[0-9]+]]:_(s1) = G_UADDO [[UADDO38]], [[UV24]]
+    ; GFX8: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[UADDE10]], [[UV25]], [[UADDO41]]
+    ; GFX8: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO40]](s32), [[UADDE12]](s32)
+    ; GFX8: [[USUBO6:%[0-9]+]]:_(s32), [[USUBO7:%[0-9]+]]:_(s1) = G_USUBO [[USUBO4]], [[UV20]]
+    ; GFX8: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[UV21]], [[USUBO5]]
+    ; GFX8: [[USUBE10:%[0-9]+]]:_(s32), [[USUBE11:%[0-9]+]]:_(s1) = G_USUBE [[USUBE8]], [[C6]], [[USUBO7]]
+    ; GFX8: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C6]]
+    ; GFX8: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV4]], [[MV3]]
+    ; GFX8: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C6]]
+    ; GFX8: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV2]]
+    ; GFX8: [[XOR2:%[0-9]+]]:_(s64) = G_XOR [[ASHR]], [[ASHR1]]
+    ; GFX8: [[XOR3:%[0-9]+]]:_(s64) = G_XOR [[SELECT3]], [[XOR2]]
+    ; GFX8: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR3]](s64)
+    ; GFX8: [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR2]](s64)
+    ; GFX8: [[USUBO8:%[0-9]+]]:_(s32), [[USUBO9:%[0-9]+]]:_(s1) = G_USUBO [[UV26]], [[UV28]]
+    ; GFX8: [[USUBE12:%[0-9]+]]:_(s32), [[USUBE13:%[0-9]+]]:_(s1) = G_USUBE [[UV27]], [[UV29]], [[USUBO9]]
+    ; GFX8: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO8]](s32), [[USUBE12]](s32)
+    ; GFX8: $vgpr0_vgpr1 = COPY [[MV5]](s64)
     ; GFX9-LABEL: name: test_sdiv_s64
     ; GFX9: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
     ; GFX9: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
-    ; GFX9: [[SDIV:%[0-9]+]]:_(s64) = G_SDIV [[COPY]], [[COPY1]]
-    ; GFX9: $vgpr0_vgpr1 = COPY [[SDIV]](s64)
+    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 63
+    ; GFX9: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[COPY]], [[C]](s32)
+    ; GFX9: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[COPY1]], [[C]](s32)
+    ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64)
+    ; GFX9: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64)
+    ; GFX9: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV]], [[UV2]]
+    ; GFX9: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV1]], [[UV3]], [[UADDO1]]
+    ; GFX9: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32)
+    ; GFX9: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64)
+    ; GFX9: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR1]](s64)
+    ; GFX9: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UV4]], [[UV6]]
+    ; GFX9: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UV5]], [[UV7]], [[UADDO3]]
+    ; GFX9: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO2]](s32), [[UADDE2]](s32)
+    ; GFX9: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[MV]], [[ASHR]]
+    ; GFX9: [[XOR1:%[0-9]+]]:_(s64) = G_XOR [[MV1]], [[ASHR1]]
+    ; GFX9: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64)
+    ; GFX9: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV8]](s32)
+    ; GFX9: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV9]](s32)
+    ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000
+    ; GFX9: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP1]], [[C1]]
+    ; GFX9: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UITOFP]]
+    ; GFX9: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[FADD]](s32)
+    ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x43EFFFFF80000000
+    ; GFX9: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]]
+    ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000
+    ; GFX9: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FMUL1]], [[C3]]
+    ; GFX9: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL2]]
+    ; GFX9: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000
+    ; GFX9: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC]], [[C4]]
+    ; GFX9: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[FMUL1]]
+    ; GFX9: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FADD1]](s32)
+    ; GFX9: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC]](s32)
+    ; GFX9: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; GFX9: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C5]](s64)
+    ; GFX9: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64)
+    ; GFX9: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV10]], [[UV12]]
+    ; GFX9: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV11]], [[UV13]], [[USUBO1]]
+    ; GFX9: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI]]
+    ; GFX9: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]]
+    ; GFX9: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]]
+    ; GFX9: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[FPTOUI]]
+    ; GFX9: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]]
+    ; GFX9: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]]
+    ; GFX9: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[MUL]]
+    ; GFX9: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]]
+    ; GFX9: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]]
+    ; GFX9: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]]
+    ; GFX9: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1)
+    ; GFX9: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH1]]
+    ; GFX9: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1)
+    ; GFX9: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]]
+    ; GFX9: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]]
+    ; GFX9: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL]]
+    ; GFX9: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]]
+    ; GFX9: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]]
+    ; GFX9: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1)
+    ; GFX9: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UMULH3]]
+    ; GFX9: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO11]](s1)
+    ; GFX9: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]]
+    ; GFX9: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[ADD2]]
+    ; GFX9: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1)
+    ; GFX9: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]]
+    ; GFX9: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]]
+    ; GFX9: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[ADD4]]
+    ; GFX9: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO12]]
+    ; GFX9: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO15]]
+    ; GFX9: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[ADD5]]
+    ; GFX9: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO14]]
+    ; GFX9: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO14]]
+    ; GFX9: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE4]]
+    ; GFX9: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO14]]
+    ; GFX9: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]]
+    ; GFX9: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ADD7]], [[UMULH5]]
+    ; GFX9: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[MUL6]]
+    ; GFX9: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO14]], [[ADD8]]
+    ; GFX9: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[MUL6]]
+    ; GFX9: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]]
+    ; GFX9: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1)
+    ; GFX9: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH6]]
+    ; GFX9: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1)
+    ; GFX9: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]]
+    ; GFX9: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[ADD8]]
+    ; GFX9: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[MUL6]]
+    ; GFX9: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[ADD8]]
+    ; GFX9: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]]
+    ; GFX9: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1)
+    ; GFX9: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UMULH8]]
+    ; GFX9: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO23]](s1)
+    ; GFX9: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]]
+    ; GFX9: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[UADDO22]], [[ADD9]]
+    ; GFX9: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1)
+    ; GFX9: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[ADD10]], [[ZEXT9]]
+    ; GFX9: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[ADD8]]
+    ; GFX9: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[ADD11]]
+    ; GFX9: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; GFX9: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO14]], [[UADDO24]]
+    ; GFX9: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[ADD6]], [[ADD12]], [[UADDO15]]
+    ; GFX9: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDE6]], [[C6]], [[UADDO27]]
+    ; GFX9: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64)
+    ; GFX9: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64)
+    ; GFX9: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDO26]]
+    ; GFX9: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV16]], [[UADDE8]]
+    ; GFX9: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDO26]]
+    ; GFX9: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]]
+    ; GFX9: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1)
+    ; GFX9: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH10]]
+    ; GFX9: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1)
+    ; GFX9: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]]
+    ; GFX9: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDE8]]
+    ; GFX9: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDO26]]
+    ; GFX9: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDE8]]
+    ; GFX9: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]]
+    ; GFX9: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1)
+    ; GFX9: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UMULH12]]
+    ; GFX9: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO35]](s1)
+    ; GFX9: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]]
+    ; GFX9: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[ADD13]]
+    ; GFX9: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO37]](s1)
+    ; GFX9: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[ADD14]], [[ZEXT14]]
+    ; GFX9: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDE8]]
+    ; GFX9: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[ADD15]]
+    ; GFX9: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO36]](s32), [[ADD16]](s32)
+    ; GFX9: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64)
+    ; GFX9: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV18]], [[UADDO36]]
+    ; GFX9: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV19]], [[UADDO36]]
+    ; GFX9: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV18]], [[ADD16]]
+    ; GFX9: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV18]], [[UADDO36]]
+    ; GFX9: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]]
+    ; GFX9: [[ADD18:%[0-9]+]]:_(s32) = G_ADD [[ADD17]], [[UMULH14]]
+    ; GFX9: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV14]], [[MUL15]]
+    ; GFX9: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV15]], [[ADD18]], [[USUBO3]]
+    ; GFX9: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV15]], [[ADD18]]
+    ; GFX9: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64)
+    ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV21]]
+    ; GFX9: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1)
+    ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV20]]
+    ; GFX9: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[ICMP1]](s1)
+    ; GFX9: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV21]]
+    ; GFX9: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SEXT1]], [[SEXT]]
+    ; GFX9: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV20]]
+    ; GFX9: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV21]], [[USUBO3]]
+    ; GFX9: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C6]], [[USUBO5]]
+    ; GFX9: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; GFX9: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64)
+    ; GFX9: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[UADDO36]], [[UV22]]
+    ; GFX9: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[ADD16]], [[UV23]], [[UADDO39]]
+    ; GFX9: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO38]](s32), [[UADDE10]](s32)
+    ; GFX9: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV21]]
+    ; GFX9: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1)
+    ; GFX9: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV20]]
+    ; GFX9: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT [[ICMP4]](s1)
+    ; GFX9: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV21]]
+    ; GFX9: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]]
+    ; GFX9: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64)
+    ; GFX9: [[UADDO40:%[0-9]+]]:_(s32), [[UADDO41:%[0-9]+]]:_(s1) = G_UADDO [[UADDO38]], [[UV24]]
+    ; GFX9: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[UADDE10]], [[UV25]], [[UADDO41]]
+    ; GFX9: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO40]](s32), [[UADDE12]](s32)
+    ; GFX9: [[USUBO6:%[0-9]+]]:_(s32), [[USUBO7:%[0-9]+]]:_(s1) = G_USUBO [[USUBO4]], [[UV20]]
+    ; GFX9: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[UV21]], [[USUBO5]]
+    ; GFX9: [[USUBE10:%[0-9]+]]:_(s32), [[USUBE11:%[0-9]+]]:_(s1) = G_USUBE [[USUBE8]], [[C6]], [[USUBO7]]
+    ; GFX9: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C6]]
+    ; GFX9: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV4]], [[MV3]]
+    ; GFX9: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C6]]
+    ; GFX9: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV2]]
+    ; GFX9: [[XOR2:%[0-9]+]]:_(s64) = G_XOR [[ASHR]], [[ASHR1]]
+    ; GFX9: [[XOR3:%[0-9]+]]:_(s64) = G_XOR [[SELECT3]], [[XOR2]]
+    ; GFX9: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR3]](s64)
+    ; GFX9: [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR2]](s64)
+    ; GFX9: [[USUBO8:%[0-9]+]]:_(s32), [[USUBO9:%[0-9]+]]:_(s1) = G_USUBO [[UV26]], [[UV28]]
+    ; GFX9: [[USUBE12:%[0-9]+]]:_(s32), [[USUBE13:%[0-9]+]]:_(s1) = G_USUBE [[UV27]], [[UV29]], [[USUBO9]]
+    ; GFX9: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO8]](s32), [[USUBE12]](s32)
+    ; GFX9: $vgpr0_vgpr1 = COPY [[MV5]](s64)
     %0:_(s64) = COPY $vgpr0_vgpr1
     %1:_(s64) = COPY $vgpr2_vgpr3
     %2:_(s64) = G_SDIV %0, %1
@@ -413,27 +923,1035 @@ body: |
     ; GFX6: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7
     ; GFX6: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>)
     ; GFX6: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>)
-    ; GFX6: [[SDIV:%[0-9]+]]:_(s64) = G_SDIV [[UV]], [[UV2]]
-    ; GFX6: [[SDIV1:%[0-9]+]]:_(s64) = G_SDIV [[UV1]], [[UV3]]
-    ; GFX6: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SDIV]](s64), [[SDIV1]](s64)
+    ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 63
+    ; GFX6: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[UV]], [[C]](s32)
+    ; GFX6: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[UV2]], [[C]](s32)
+    ; GFX6: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64)
+    ; GFX6: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64)
+    ; GFX6: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV4]], [[UV6]]
+    ; GFX6: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV5]], [[UV7]], [[UADDO1]]
+    ; GFX6: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32)
+    ; GFX6: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64)
+    ; GFX6: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR1]](s64)
+    ; GFX6: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UV8]], [[UV10]]
+    ; GFX6: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UV9]], [[UV11]], [[UADDO3]]
+    ; GFX6: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO2]](s32), [[UADDE2]](s32)
+    ; GFX6: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[MV]], [[ASHR]]
+    ; GFX6: [[XOR1:%[0-9]+]]:_(s64) = G_XOR [[MV1]], [[ASHR1]]
+    ; GFX6: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64)
+    ; GFX6: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV12]](s32)
+    ; GFX6: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV13]](s32)
+    ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000
+    ; GFX6: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP1]], [[C1]]
+    ; GFX6: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UITOFP]]
+    ; GFX6: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[FADD]](s32)
+    ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x43EFFFFF80000000
+    ; GFX6: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]]
+    ; GFX6: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000
+    ; GFX6: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FMUL1]], [[C3]]
+    ; GFX6: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL2]]
+    ; GFX6: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000
+    ; GFX6: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC]], [[C4]]
+    ; GFX6: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[FMUL1]]
+    ; GFX6: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FADD1]](s32)
+    ; GFX6: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC]](s32)
+    ; GFX6: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; GFX6: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C5]](s64)
+    ; GFX6: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64)
+    ; GFX6: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV14]], [[UV16]]
+    ; GFX6: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV15]], [[UV17]], [[USUBO1]]
+    ; GFX6: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI]]
+    ; GFX6: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]]
+    ; GFX6: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]]
+    ; GFX6: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[FPTOUI]]
+    ; GFX6: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]]
+    ; GFX6: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]]
+    ; GFX6: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[MUL]]
+    ; GFX6: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]]
+    ; GFX6: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]]
+    ; GFX6: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]]
+    ; GFX6: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1)
+    ; GFX6: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH1]]
+    ; GFX6: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1)
+    ; GFX6: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]]
+    ; GFX6: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]]
+    ; GFX6: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL]]
+    ; GFX6: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]]
+    ; GFX6: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]]
+    ; GFX6: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1)
+    ; GFX6: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UMULH3]]
+    ; GFX6: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO11]](s1)
+    ; GFX6: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]]
+    ; GFX6: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[ADD2]]
+    ; GFX6: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1)
+    ; GFX6: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]]
+    ; GFX6: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]]
+    ; GFX6: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[ADD4]]
+    ; GFX6: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO12]]
+    ; GFX6: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO15]]
+    ; GFX6: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[ADD5]]
+    ; GFX6: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO14]]
+    ; GFX6: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO14]]
+    ; GFX6: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE4]]
+    ; GFX6: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO14]]
+    ; GFX6: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]]
+    ; GFX6: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ADD7]], [[UMULH5]]
+    ; GFX6: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[MUL6]]
+    ; GFX6: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO14]], [[ADD8]]
+    ; GFX6: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[MUL6]]
+    ; GFX6: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]]
+    ; GFX6: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1)
+    ; GFX6: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH6]]
+    ; GFX6: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1)
+    ; GFX6: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]]
+    ; GFX6: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[ADD8]]
+    ; GFX6: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[MUL6]]
+    ; GFX6: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[ADD8]]
+    ; GFX6: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]]
+    ; GFX6: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1)
+    ; GFX6: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UMULH8]]
+    ; GFX6: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO23]](s1)
+    ; GFX6: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]]
+    ; GFX6: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[UADDO22]], [[ADD9]]
+    ; GFX6: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1)
+    ; GFX6: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[ADD10]], [[ZEXT9]]
+    ; GFX6: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[ADD8]]
+    ; GFX6: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[ADD11]]
+    ; GFX6: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; GFX6: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO14]], [[UADDO24]]
+    ; GFX6: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[ADD6]], [[ADD12]], [[UADDO15]]
+    ; GFX6: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDE6]], [[C6]], [[UADDO27]]
+    ; GFX6: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64)
+    ; GFX6: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64)
+    ; GFX6: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV21]], [[UADDO26]]
+    ; GFX6: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV20]], [[UADDE8]]
+    ; GFX6: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV20]], [[UADDO26]]
+    ; GFX6: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]]
+    ; GFX6: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1)
+    ; GFX6: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH10]]
+    ; GFX6: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1)
+    ; GFX6: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]]
+    ; GFX6: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV21]], [[UADDE8]]
+    ; GFX6: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV21]], [[UADDO26]]
+    ; GFX6: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV20]], [[UADDE8]]
+    ; GFX6: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]]
+    ; GFX6: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1)
+    ; GFX6: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UMULH12]]
+    ; GFX6: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO35]](s1)
+    ; GFX6: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]]
+    ; GFX6: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[ADD13]]
+    ; GFX6: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO37]](s1)
+    ; GFX6: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[ADD14]], [[ZEXT14]]
+    ; GFX6: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV21]], [[UADDE8]]
+    ; GFX6: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[ADD15]]
+    ; GFX6: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO36]](s32), [[ADD16]](s32)
+    ; GFX6: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64)
+    ; GFX6: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV22]], [[UADDO36]]
+    ; GFX6: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV23]], [[UADDO36]]
+    ; GFX6: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV22]], [[ADD16]]
+    ; GFX6: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV22]], [[UADDO36]]
+    ; GFX6: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]]
+    ; GFX6: [[ADD18:%[0-9]+]]:_(s32) = G_ADD [[ADD17]], [[UMULH14]]
+    ; GFX6: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV18]], [[MUL15]]
+    ; GFX6: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV19]], [[ADD18]], [[USUBO3]]
+    ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV19]], [[ADD18]]
+    ; GFX6: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64)
+    ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV25]]
+    ; GFX6: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1)
+    ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV24]]
+    ; GFX6: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[ICMP1]](s1)
+    ; GFX6: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV25]]
+    ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SEXT1]], [[SEXT]]
+    ; GFX6: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV24]]
+    ; GFX6: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV25]], [[USUBO3]]
+    ; GFX6: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C6]], [[USUBO5]]
+    ; GFX6: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; GFX6: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64)
+    ; GFX6: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[UADDO36]], [[UV26]]
+    ; GFX6: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[ADD16]], [[UV27]], [[UADDO39]]
+    ; GFX6: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO38]](s32), [[UADDE10]](s32)
+    ; GFX6: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV25]]
+    ; GFX6: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1)
+    ; GFX6: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV24]]
+    ; GFX6: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT [[ICMP4]](s1)
+    ; GFX6: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV25]]
+    ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]]
+    ; GFX6: [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64)
+    ; GFX6: [[UADDO40:%[0-9]+]]:_(s32), [[UADDO41:%[0-9]+]]:_(s1) = G_UADDO [[UADDO38]], [[UV28]]
+    ; GFX6: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[UADDE10]], [[UV29]], [[UADDO41]]
+    ; GFX6: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO40]](s32), [[UADDE12]](s32)
+    ; GFX6: [[USUBO6:%[0-9]+]]:_(s32), [[USUBO7:%[0-9]+]]:_(s1) = G_USUBO [[USUBO4]], [[UV24]]
+    ; GFX6: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[UV25]], [[USUBO5]]
+    ; GFX6: [[USUBE10:%[0-9]+]]:_(s32), [[USUBE11:%[0-9]+]]:_(s1) = G_USUBE [[USUBE8]], [[C6]], [[USUBO7]]
+    ; GFX6: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C6]]
+    ; GFX6: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV4]], [[MV3]]
+    ; GFX6: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C6]]
+    ; GFX6: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV2]]
+    ; GFX6: [[XOR2:%[0-9]+]]:_(s64) = G_XOR [[ASHR]], [[ASHR1]]
+    ; GFX6: [[XOR3:%[0-9]+]]:_(s64) = G_XOR [[SELECT3]], [[XOR2]]
+    ; GFX6: [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR3]](s64)
+    ; GFX6: [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR2]](s64)
+    ; GFX6: [[USUBO8:%[0-9]+]]:_(s32), [[USUBO9:%[0-9]+]]:_(s1) = G_USUBO [[UV30]], [[UV32]]
+    ; GFX6: [[USUBE12:%[0-9]+]]:_(s32), [[USUBE13:%[0-9]+]]:_(s1) = G_USUBE [[UV31]], [[UV33]], [[USUBO9]]
+    ; GFX6: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO8]](s32), [[USUBE12]](s32)
+    ; GFX6: [[ASHR2:%[0-9]+]]:_(s64) = G_ASHR [[UV1]], [[C]](s32)
+    ; GFX6: [[ASHR3:%[0-9]+]]:_(s64) = G_ASHR [[UV3]], [[C]](s32)
+    ; GFX6: [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64)
+    ; GFX6: [[UV36:%[0-9]+]]:_(s32), [[UV37:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR2]](s64)
+    ; GFX6: [[UADDO42:%[0-9]+]]:_(s32), [[UADDO43:%[0-9]+]]:_(s1) = G_UADDO [[UV34]], [[UV36]]
+    ; GFX6: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UV35]], [[UV37]], [[UADDO43]]
+    ; GFX6: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO42]](s32), [[UADDE14]](s32)
+    ; GFX6: [[UV38:%[0-9]+]]:_(s32), [[UV39:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64)
+    ; GFX6: [[UV40:%[0-9]+]]:_(s32), [[UV41:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR3]](s64)
+    ; GFX6: [[UADDO44:%[0-9]+]]:_(s32), [[UADDO45:%[0-9]+]]:_(s1) = G_UADDO [[UV38]], [[UV40]]
+    ; GFX6: [[UADDE16:%[0-9]+]]:_(s32), [[UADDE17:%[0-9]+]]:_(s1) = G_UADDE [[UV39]], [[UV41]], [[UADDO45]]
+    ; GFX6: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO44]](s32), [[UADDE16]](s32)
+    ; GFX6: [[XOR4:%[0-9]+]]:_(s64) = G_XOR [[MV6]], [[ASHR2]]
+    ; GFX6: [[XOR5:%[0-9]+]]:_(s64) = G_XOR [[MV7]], [[ASHR3]]
+    ; GFX6: [[UV42:%[0-9]+]]:_(s32), [[UV43:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR5]](s64)
+    ; GFX6: [[UITOFP2:%[0-9]+]]:_(s32) = G_UITOFP [[UV42]](s32)
+    ; GFX6: [[UITOFP3:%[0-9]+]]:_(s32) = G_UITOFP [[UV43]](s32)
+    ; GFX6: [[C8:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000
+    ; GFX6: [[FMUL4:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP3]], [[C8]]
+    ; GFX6: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FMUL4]], [[UITOFP2]]
+    ; GFX6: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[FADD2]](s32)
+    ; GFX6: [[C9:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x43EFFFFF80000000
+    ; GFX6: [[FMUL5:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C9]]
+    ; GFX6: [[C10:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000
+    ; GFX6: [[FMUL6:%[0-9]+]]:_(s32) = G_FMUL [[FMUL5]], [[C10]]
+    ; GFX6: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL6]]
+    ; GFX6: [[C11:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000
+    ; GFX6: [[FMUL7:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC1]], [[C11]]
+    ; GFX6: [[FADD3:%[0-9]+]]:_(s32) = G_FADD [[FMUL7]], [[FMUL5]]
+    ; GFX6: [[FPTOUI2:%[0-9]+]]:_(s32) = G_FPTOUI [[FADD3]](s32)
+    ; GFX6: [[FPTOUI3:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC1]](s32)
+    ; GFX6: [[UV44:%[0-9]+]]:_(s32), [[UV45:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C5]](s64)
+    ; GFX6: [[UV46:%[0-9]+]]:_(s32), [[UV47:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR5]](s64)
+    ; GFX6: [[USUBO10:%[0-9]+]]:_(s32), [[USUBO11:%[0-9]+]]:_(s1) = G_USUBO [[UV44]], [[UV46]]
+    ; GFX6: [[USUBE14:%[0-9]+]]:_(s32), [[USUBE15:%[0-9]+]]:_(s1) = G_USUBE [[UV45]], [[UV47]], [[USUBO11]]
+    ; GFX6: [[MUL18:%[0-9]+]]:_(s32) = G_MUL [[USUBO10]], [[FPTOUI2]]
+    ; GFX6: [[MUL19:%[0-9]+]]:_(s32) = G_MUL [[USUBE14]], [[FPTOUI2]]
+    ; GFX6: [[MUL20:%[0-9]+]]:_(s32) = G_MUL [[USUBO10]], [[FPTOUI3]]
+    ; GFX6: [[UMULH15:%[0-9]+]]:_(s32) = G_UMULH [[USUBO10]], [[FPTOUI2]]
+    ; GFX6: [[ADD19:%[0-9]+]]:_(s32) = G_ADD [[MUL19]], [[MUL20]]
+    ; GFX6: [[ADD20:%[0-9]+]]:_(s32) = G_ADD [[ADD19]], [[UMULH15]]
+    ; GFX6: [[MUL21:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[MUL18]]
+    ; GFX6: [[MUL22:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI2]], [[ADD20]]
+    ; GFX6: [[UMULH16:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[MUL18]]
+    ; GFX6: [[UADDO46:%[0-9]+]]:_(s32), [[UADDO47:%[0-9]+]]:_(s1) = G_UADDO [[MUL21]], [[MUL22]]
+    ; GFX6: [[ZEXT15:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO47]](s1)
+    ; GFX6: [[UADDO48:%[0-9]+]]:_(s32), [[UADDO49:%[0-9]+]]:_(s1) = G_UADDO [[UADDO46]], [[UMULH16]]
+    ; GFX6: [[ZEXT16:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO49]](s1)
+    ; GFX6: [[ADD21:%[0-9]+]]:_(s32) = G_ADD [[ZEXT15]], [[ZEXT16]]
+    ; GFX6: [[MUL23:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[ADD20]]
+    ; GFX6: [[UMULH17:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[MUL18]]
+    ; GFX6: [[UMULH18:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[ADD20]]
+    ; GFX6: [[UADDO50:%[0-9]+]]:_(s32), [[UADDO51:%[0-9]+]]:_(s1) = G_UADDO [[MUL23]], [[UMULH17]]
+    ; GFX6: [[ZEXT17:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO51]](s1)
+    ; GFX6: [[UADDO52:%[0-9]+]]:_(s32), [[UADDO53:%[0-9]+]]:_(s1) = G_UADDO [[UADDO50]], [[UMULH18]]
+    ; GFX6: [[ZEXT18:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO53]](s1)
+    ; GFX6: [[ADD22:%[0-9]+]]:_(s32) = G_ADD [[ZEXT17]], [[ZEXT18]]
+    ; GFX6: [[UADDO54:%[0-9]+]]:_(s32), [[UADDO55:%[0-9]+]]:_(s1) = G_UADDO [[UADDO52]], [[ADD21]]
+    ; GFX6: [[ZEXT19:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO55]](s1)
+    ; GFX6: [[ADD23:%[0-9]+]]:_(s32) = G_ADD [[ADD22]], [[ZEXT19]]
+    ; GFX6: [[UMULH19:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[ADD20]]
+    ; GFX6: [[ADD24:%[0-9]+]]:_(s32) = G_ADD [[UMULH19]], [[ADD23]]
+    ; GFX6: [[UADDO56:%[0-9]+]]:_(s32), [[UADDO57:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI2]], [[UADDO54]]
+    ; GFX6: [[UADDE18:%[0-9]+]]:_(s32), [[UADDE19:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI3]], [[ADD24]], [[UADDO57]]
+    ; GFX6: [[ADD25:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI3]], [[ADD24]]
+    ; GFX6: [[MUL24:%[0-9]+]]:_(s32) = G_MUL [[USUBO10]], [[UADDO56]]
+    ; GFX6: [[MUL25:%[0-9]+]]:_(s32) = G_MUL [[USUBE14]], [[UADDO56]]
+    ; GFX6: [[MUL26:%[0-9]+]]:_(s32) = G_MUL [[USUBO10]], [[UADDE18]]
+    ; GFX6: [[UMULH20:%[0-9]+]]:_(s32) = G_UMULH [[USUBO10]], [[UADDO56]]
+    ; GFX6: [[ADD26:%[0-9]+]]:_(s32) = G_ADD [[MUL25]], [[MUL26]]
+    ; GFX6: [[ADD27:%[0-9]+]]:_(s32) = G_ADD [[ADD26]], [[UMULH20]]
+    ; GFX6: [[MUL27:%[0-9]+]]:_(s32) = G_MUL [[UADDE18]], [[MUL24]]
+    ; GFX6: [[MUL28:%[0-9]+]]:_(s32) = G_MUL [[UADDO56]], [[ADD27]]
+    ; GFX6: [[UMULH21:%[0-9]+]]:_(s32) = G_UMULH [[UADDO56]], [[MUL24]]
+    ; GFX6: [[UADDO58:%[0-9]+]]:_(s32), [[UADDO59:%[0-9]+]]:_(s1) = G_UADDO [[MUL27]], [[MUL28]]
+    ; GFX6: [[ZEXT20:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO59]](s1)
+    ; GFX6: [[UADDO60:%[0-9]+]]:_(s32), [[UADDO61:%[0-9]+]]:_(s1) = G_UADDO [[UADDO58]], [[UMULH21]]
+    ; GFX6: [[ZEXT21:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO61]](s1)
+    ; GFX6: [[ADD28:%[0-9]+]]:_(s32) = G_ADD [[ZEXT20]], [[ZEXT21]]
+    ; GFX6: [[MUL29:%[0-9]+]]:_(s32) = G_MUL [[UADDE18]], [[ADD27]]
+    ; GFX6: [[UMULH22:%[0-9]+]]:_(s32) = G_UMULH [[UADDE18]], [[MUL24]]
+    ; GFX6: [[UMULH23:%[0-9]+]]:_(s32) = G_UMULH [[UADDO56]], [[ADD27]]
+    ; GFX6: [[UADDO62:%[0-9]+]]:_(s32), [[UADDO63:%[0-9]+]]:_(s1) = G_UADDO [[MUL29]], [[UMULH22]]
+    ; GFX6: [[ZEXT22:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO63]](s1)
+    ; GFX6: [[UADDO64:%[0-9]+]]:_(s32), [[UADDO65:%[0-9]+]]:_(s1) = G_UADDO [[UADDO62]], [[UMULH23]]
+    ; GFX6: [[ZEXT23:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO65]](s1)
+    ; GFX6: [[ADD29:%[0-9]+]]:_(s32) = G_ADD [[ZEXT22]], [[ZEXT23]]
+    ; GFX6: [[UADDO66:%[0-9]+]]:_(s32), [[UADDO67:%[0-9]+]]:_(s1) = G_UADDO [[UADDO64]], [[ADD28]]
+    ; GFX6: [[ZEXT24:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO67]](s1)
+    ; GFX6: [[ADD30:%[0-9]+]]:_(s32) = G_ADD [[ADD29]], [[ZEXT24]]
+    ; GFX6: [[UMULH24:%[0-9]+]]:_(s32) = G_UMULH [[UADDE18]], [[ADD27]]
+    ; GFX6: [[ADD31:%[0-9]+]]:_(s32) = G_ADD [[UMULH24]], [[ADD30]]
+    ; GFX6: [[UADDO68:%[0-9]+]]:_(s32), [[UADDO69:%[0-9]+]]:_(s1) = G_UADDO [[UADDO56]], [[UADDO66]]
+    ; GFX6: [[UADDE20:%[0-9]+]]:_(s32), [[UADDE21:%[0-9]+]]:_(s1) = G_UADDE [[ADD25]], [[ADD31]], [[UADDO57]]
+    ; GFX6: [[UADDE22:%[0-9]+]]:_(s32), [[UADDE23:%[0-9]+]]:_(s1) = G_UADDE [[UADDE20]], [[C6]], [[UADDO69]]
+    ; GFX6: [[UV48:%[0-9]+]]:_(s32), [[UV49:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR4]](s64)
+    ; GFX6: [[UV50:%[0-9]+]]:_(s32), [[UV51:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR4]](s64)
+    ; GFX6: [[MUL30:%[0-9]+]]:_(s32) = G_MUL [[UV51]], [[UADDO68]]
+    ; GFX6: [[MUL31:%[0-9]+]]:_(s32) = G_MUL [[UV50]], [[UADDE22]]
+    ; GFX6: [[UMULH25:%[0-9]+]]:_(s32) = G_UMULH [[UV50]], [[UADDO68]]
+    ; GFX6: [[UADDO70:%[0-9]+]]:_(s32), [[UADDO71:%[0-9]+]]:_(s1) = G_UADDO [[MUL30]], [[MUL31]]
+    ; GFX6: [[ZEXT25:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO71]](s1)
+    ; GFX6: [[UADDO72:%[0-9]+]]:_(s32), [[UADDO73:%[0-9]+]]:_(s1) = G_UADDO [[UADDO70]], [[UMULH25]]
+    ; GFX6: [[ZEXT26:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO73]](s1)
+    ; GFX6: [[ADD32:%[0-9]+]]:_(s32) = G_ADD [[ZEXT25]], [[ZEXT26]]
+    ; GFX6: [[MUL32:%[0-9]+]]:_(s32) = G_MUL [[UV51]], [[UADDE22]]
+    ; GFX6: [[UMULH26:%[0-9]+]]:_(s32) = G_UMULH [[UV51]], [[UADDO68]]
+    ; GFX6: [[UMULH27:%[0-9]+]]:_(s32) = G_UMULH [[UV50]], [[UADDE22]]
+    ; GFX6: [[UADDO74:%[0-9]+]]:_(s32), [[UADDO75:%[0-9]+]]:_(s1) = G_UADDO [[MUL32]], [[UMULH26]]
+    ; GFX6: [[ZEXT27:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO75]](s1)
+    ; GFX6: [[UADDO76:%[0-9]+]]:_(s32), [[UADDO77:%[0-9]+]]:_(s1) = G_UADDO [[UADDO74]], [[UMULH27]]
+    ; GFX6: [[ZEXT28:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO77]](s1)
+    ; GFX6: [[ADD33:%[0-9]+]]:_(s32) = G_ADD [[ZEXT27]], [[ZEXT28]]
+    ; GFX6: [[UADDO78:%[0-9]+]]:_(s32), [[UADDO79:%[0-9]+]]:_(s1) = G_UADDO [[UADDO76]], [[ADD32]]
+    ; GFX6: [[ZEXT29:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO79]](s1)
+    ; GFX6: [[ADD34:%[0-9]+]]:_(s32) = G_ADD [[ADD33]], [[ZEXT29]]
+    ; GFX6: [[UMULH28:%[0-9]+]]:_(s32) = G_UMULH [[UV51]], [[UADDE22]]
+    ; GFX6: [[ADD35:%[0-9]+]]:_(s32) = G_ADD [[UMULH28]], [[ADD34]]
+    ; GFX6: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO78]](s32), [[ADD35]](s32)
+    ; GFX6: [[UV52:%[0-9]+]]:_(s32), [[UV53:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR5]](s64)
+    ; GFX6: [[MUL33:%[0-9]+]]:_(s32) = G_MUL [[UV52]], [[UADDO78]]
+    ; GFX6: [[MUL34:%[0-9]+]]:_(s32) = G_MUL [[UV53]], [[UADDO78]]
+    ; GFX6: [[MUL35:%[0-9]+]]:_(s32) = G_MUL [[UV52]], [[ADD35]]
+    ; GFX6: [[UMULH29:%[0-9]+]]:_(s32) = G_UMULH [[UV52]], [[UADDO78]]
+    ; GFX6: [[ADD36:%[0-9]+]]:_(s32) = G_ADD [[MUL34]], [[MUL35]]
+    ; GFX6: [[ADD37:%[0-9]+]]:_(s32) = G_ADD [[ADD36]], [[UMULH29]]
+    ; GFX6: [[USUBO12:%[0-9]+]]:_(s32), [[USUBO13:%[0-9]+]]:_(s1) = G_USUBO [[UV48]], [[MUL33]]
+    ; GFX6: [[USUBE16:%[0-9]+]]:_(s32), [[USUBE17:%[0-9]+]]:_(s1) = G_USUBE [[UV49]], [[ADD37]], [[USUBO13]]
+    ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[UV49]], [[ADD37]]
+    ; GFX6: [[UV54:%[0-9]+]]:_(s32), [[UV55:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR5]](s64)
+    ; GFX6: [[ICMP8:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE16]](s32), [[UV55]]
+    ; GFX6: [[SEXT4:%[0-9]+]]:_(s32) = G_SEXT [[ICMP8]](s1)
+    ; GFX6: [[ICMP9:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO12]](s32), [[UV54]]
+    ; GFX6: [[SEXT5:%[0-9]+]]:_(s32) = G_SEXT [[ICMP9]](s1)
+    ; GFX6: [[ICMP10:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE16]](s32), [[UV55]]
+    ; GFX6: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP10]](s1), [[SEXT5]], [[SEXT4]]
+    ; GFX6: [[USUBO14:%[0-9]+]]:_(s32), [[USUBO15:%[0-9]+]]:_(s1) = G_USUBO [[USUBO12]], [[UV54]]
+    ; GFX6: [[USUBE18:%[0-9]+]]:_(s32), [[USUBE19:%[0-9]+]]:_(s1) = G_USUBE [[SUB1]], [[UV55]], [[USUBO13]]
+    ; GFX6: [[USUBE20:%[0-9]+]]:_(s32), [[USUBE21:%[0-9]+]]:_(s1) = G_USUBE [[USUBE18]], [[C6]], [[USUBO15]]
+    ; GFX6: [[UV56:%[0-9]+]]:_(s32), [[UV57:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64)
+    ; GFX6: [[UADDO80:%[0-9]+]]:_(s32), [[UADDO81:%[0-9]+]]:_(s1) = G_UADDO [[UADDO78]], [[UV56]]
+    ; GFX6: [[UADDE24:%[0-9]+]]:_(s32), [[UADDE25:%[0-9]+]]:_(s1) = G_UADDE [[ADD35]], [[UV57]], [[UADDO81]]
+    ; GFX6: [[MV9:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO80]](s32), [[UADDE24]](s32)
+    ; GFX6: [[ICMP11:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE20]](s32), [[UV55]]
+    ; GFX6: [[SEXT6:%[0-9]+]]:_(s32) = G_SEXT [[ICMP11]](s1)
+    ; GFX6: [[ICMP12:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO14]](s32), [[UV54]]
+    ; GFX6: [[SEXT7:%[0-9]+]]:_(s32) = G_SEXT [[ICMP12]](s1)
+    ; GFX6: [[ICMP13:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE20]](s32), [[UV55]]
+    ; GFX6: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP13]](s1), [[SEXT7]], [[SEXT6]]
+    ; GFX6: [[UV58:%[0-9]+]]:_(s32), [[UV59:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64)
+    ; GFX6: [[UADDO82:%[0-9]+]]:_(s32), [[UADDO83:%[0-9]+]]:_(s1) = G_UADDO [[UADDO80]], [[UV58]]
+    ; GFX6: [[UADDE26:%[0-9]+]]:_(s32), [[UADDE27:%[0-9]+]]:_(s1) = G_UADDE [[UADDE24]], [[UV59]], [[UADDO83]]
+    ; GFX6: [[MV10:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO82]](s32), [[UADDE26]](s32)
+    ; GFX6: [[USUBO16:%[0-9]+]]:_(s32), [[USUBO17:%[0-9]+]]:_(s1) = G_USUBO [[USUBO14]], [[UV54]]
+    ; GFX6: [[USUBE22:%[0-9]+]]:_(s32), [[USUBE23:%[0-9]+]]:_(s1) = G_USUBE [[USUBE18]], [[UV55]], [[USUBO15]]
+    ; GFX6: [[USUBE24:%[0-9]+]]:_(s32), [[USUBE25:%[0-9]+]]:_(s1) = G_USUBE [[USUBE22]], [[C6]], [[USUBO17]]
+    ; GFX6: [[ICMP14:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT5]](s32), [[C6]]
+    ; GFX6: [[SELECT6:%[0-9]+]]:_(s64) = G_SELECT [[ICMP14]](s1), [[MV10]], [[MV9]]
+    ; GFX6: [[ICMP15:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT4]](s32), [[C6]]
+    ; GFX6: [[SELECT7:%[0-9]+]]:_(s64) = G_SELECT [[ICMP15]](s1), [[SELECT6]], [[MV8]]
+    ; GFX6: [[XOR6:%[0-9]+]]:_(s64) = G_XOR [[ASHR2]], [[ASHR3]]
+    ; GFX6: [[XOR7:%[0-9]+]]:_(s64) = G_XOR [[SELECT7]], [[XOR6]]
+    ; GFX6: [[UV60:%[0-9]+]]:_(s32), [[UV61:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR7]](s64)
+    ; GFX6: [[UV62:%[0-9]+]]:_(s32), [[UV63:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR6]](s64)
+    ; GFX6: [[USUBO18:%[0-9]+]]:_(s32), [[USUBO19:%[0-9]+]]:_(s1) = G_USUBO [[UV60]], [[UV62]]
+    ; GFX6: [[USUBE26:%[0-9]+]]:_(s32), [[USUBE27:%[0-9]+]]:_(s1) = G_USUBE [[UV61]], [[UV63]], [[USUBO19]]
+    ; GFX6: [[MV11:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO18]](s32), [[USUBE26]](s32)
+    ; GFX6: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV5]](s64), [[MV11]](s64)
     ; GFX6: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>)
     ; GFX8-LABEL: name: test_sdiv_v2s64
     ; GFX8: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
     ; GFX8: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7
     ; GFX8: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>)
     ; GFX8: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>)
-    ; GFX8: [[SDIV:%[0-9]+]]:_(s64) = G_SDIV [[UV]], [[UV2]]
-    ; GFX8: [[SDIV1:%[0-9]+]]:_(s64) = G_SDIV [[UV1]], [[UV3]]
-    ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SDIV]](s64), [[SDIV1]](s64)
+    ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 63
+    ; GFX8: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[UV]], [[C]](s32)
+    ; GFX8: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[UV2]], [[C]](s32)
+    ; GFX8: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64)
+    ; GFX8: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64)
+    ; GFX8: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV4]], [[UV6]]
+    ; GFX8: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV5]], [[UV7]], [[UADDO1]]
+    ; GFX8: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32)
+    ; GFX8: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64)
+    ; GFX8: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR1]](s64)
+    ; GFX8: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UV8]], [[UV10]]
+    ; GFX8: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UV9]], [[UV11]], [[UADDO3]]
+    ; GFX8: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO2]](s32), [[UADDE2]](s32)
+    ; GFX8: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[MV]], [[ASHR]]
+    ; GFX8: [[XOR1:%[0-9]+]]:_(s64) = G_XOR [[MV1]], [[ASHR1]]
+    ; GFX8: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64)
+    ; GFX8: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV12]](s32)
+    ; GFX8: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV13]](s32)
+    ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000
+    ; GFX8: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP1]], [[C1]]
+    ; GFX8: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UITOFP]]
+    ; GFX8: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[FADD]](s32)
+    ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x43EFFFFF80000000
+    ; GFX8: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]]
+    ; GFX8: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000
+    ; GFX8: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FMUL1]], [[C3]]
+    ; GFX8: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL2]]
+    ; GFX8: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000
+    ; GFX8: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC]], [[C4]]
+    ; GFX8: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[FMUL1]]
+    ; GFX8: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FADD1]](s32)
+    ; GFX8: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC]](s32)
+    ; GFX8: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; GFX8: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C5]](s64)
+    ; GFX8: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64)
+    ; GFX8: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV14]], [[UV16]]
+    ; GFX8: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV15]], [[UV17]], [[USUBO1]]
+    ; GFX8: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI]]
+    ; GFX8: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]]
+    ; GFX8: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]]
+    ; GFX8: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[FPTOUI]]
+    ; GFX8: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]]
+    ; GFX8: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]]
+    ; GFX8: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[MUL]]
+    ; GFX8: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]]
+    ; GFX8: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]]
+    ; GFX8: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]]
+    ; GFX8: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1)
+    ; GFX8: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH1]]
+    ; GFX8: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1)
+    ; GFX8: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]]
+    ; GFX8: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]]
+    ; GFX8: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL]]
+    ; GFX8: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]]
+    ; GFX8: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]]
+    ; GFX8: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1)
+    ; GFX8: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UMULH3]]
+    ; GFX8: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO11]](s1)
+    ; GFX8: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]]
+    ; GFX8: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[ADD2]]
+    ; GFX8: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1)
+    ; GFX8: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]]
+    ; GFX8: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]]
+    ; GFX8: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[ADD4]]
+    ; GFX8: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO12]]
+    ; GFX8: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO15]]
+    ; GFX8: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[ADD5]]
+    ; GFX8: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO14]]
+    ; GFX8: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO14]]
+    ; GFX8: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE4]]
+    ; GFX8: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO14]]
+    ; GFX8: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]]
+    ; GFX8: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ADD7]], [[UMULH5]]
+    ; GFX8: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[MUL6]]
+    ; GFX8: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO14]], [[ADD8]]
+    ; GFX8: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[MUL6]]
+    ; GFX8: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]]
+    ; GFX8: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1)
+    ; GFX8: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH6]]
+    ; GFX8: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1)
+    ; GFX8: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]]
+    ; GFX8: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[ADD8]]
+    ; GFX8: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[MUL6]]
+    ; GFX8: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[ADD8]]
+    ; GFX8: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]]
+    ; GFX8: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1)
+    ; GFX8: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UMULH8]]
+    ; GFX8: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO23]](s1)
+    ; GFX8: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]]
+    ; GFX8: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[UADDO22]], [[ADD9]]
+    ; GFX8: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1)
+    ; GFX8: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[ADD10]], [[ZEXT9]]
+    ; GFX8: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[ADD8]]
+    ; GFX8: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[ADD11]]
+    ; GFX8: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; GFX8: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO14]], [[UADDO24]]
+    ; GFX8: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[ADD6]], [[ADD12]], [[UADDO15]]
+    ; GFX8: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDE6]], [[C6]], [[UADDO27]]
+    ; GFX8: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64)
+    ; GFX8: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64)
+    ; GFX8: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV21]], [[UADDO26]]
+    ; GFX8: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV20]], [[UADDE8]]
+    ; GFX8: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV20]], [[UADDO26]]
+    ; GFX8: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]]
+    ; GFX8: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1)
+    ; GFX8: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH10]]
+    ; GFX8: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1)
+    ; GFX8: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]]
+    ; GFX8: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV21]], [[UADDE8]]
+    ; GFX8: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV21]], [[UADDO26]]
+    ; GFX8: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV20]], [[UADDE8]]
+    ; GFX8: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]]
+    ; GFX8: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1)
+    ; GFX8: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UMULH12]]
+    ; GFX8: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO35]](s1)
+    ; GFX8: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]]
+    ; GFX8: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[ADD13]]
+    ; GFX8: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO37]](s1)
+    ; GFX8: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[ADD14]], [[ZEXT14]]
+    ; GFX8: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV21]], [[UADDE8]]
+    ; GFX8: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[ADD15]]
+    ; GFX8: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO36]](s32), [[ADD16]](s32)
+    ; GFX8: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64)
+    ; GFX8: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV22]], [[UADDO36]]
+    ; GFX8: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV23]], [[UADDO36]]
+    ; GFX8: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV22]], [[ADD16]]
+    ; GFX8: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV22]], [[UADDO36]]
+    ; GFX8: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]]
+    ; GFX8: [[ADD18:%[0-9]+]]:_(s32) = G_ADD [[ADD17]], [[UMULH14]]
+    ; GFX8: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV18]], [[MUL15]]
+    ; GFX8: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV19]], [[ADD18]], [[USUBO3]]
+    ; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV19]], [[ADD18]]
+    ; GFX8: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64)
+    ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV25]]
+    ; GFX8: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1)
+    ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV24]]
+    ; GFX8: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[ICMP1]](s1)
+    ; GFX8: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV25]]
+    ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SEXT1]], [[SEXT]]
+    ; GFX8: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV24]]
+    ; GFX8: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV25]], [[USUBO3]]
+    ; GFX8: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C6]], [[USUBO5]]
+    ; GFX8: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; GFX8: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64)
+    ; GFX8: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[UADDO36]], [[UV26]]
+    ; GFX8: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[ADD16]], [[UV27]], [[UADDO39]]
+    ; GFX8: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO38]](s32), [[UADDE10]](s32)
+    ; GFX8: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV25]]
+    ; GFX8: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1)
+    ; GFX8: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV24]]
+    ; GFX8: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT [[ICMP4]](s1)
+    ; GFX8: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV25]]
+    ; GFX8: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]]
+    ; GFX8: [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64)
+    ; GFX8: [[UADDO40:%[0-9]+]]:_(s32), [[UADDO41:%[0-9]+]]:_(s1) = G_UADDO [[UADDO38]], [[UV28]]
+    ; GFX8: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[UADDE10]], [[UV29]], [[UADDO41]]
+    ; GFX8: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO40]](s32), [[UADDE12]](s32)
+    ; GFX8: [[USUBO6:%[0-9]+]]:_(s32), [[USUBO7:%[0-9]+]]:_(s1) = G_USUBO [[USUBO4]], [[UV24]]
+    ; GFX8: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[UV25]], [[USUBO5]]
+    ; GFX8: [[USUBE10:%[0-9]+]]:_(s32), [[USUBE11:%[0-9]+]]:_(s1) = G_USUBE [[USUBE8]], [[C6]], [[USUBO7]]
+    ; GFX8: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C6]]
+    ; GFX8: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV4]], [[MV3]]
+    ; GFX8: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C6]]
+    ; GFX8: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV2]]
+    ; GFX8: [[XOR2:%[0-9]+]]:_(s64) = G_XOR [[ASHR]], [[ASHR1]]
+    ; GFX8: [[XOR3:%[0-9]+]]:_(s64) = G_XOR [[SELECT3]], [[XOR2]]
+    ; GFX8: [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR3]](s64)
+    ; GFX8: [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR2]](s64)
+    ; GFX8: [[USUBO8:%[0-9]+]]:_(s32), [[USUBO9:%[0-9]+]]:_(s1) = G_USUBO [[UV30]], [[UV32]]
+    ; GFX8: [[USUBE12:%[0-9]+]]:_(s32), [[USUBE13:%[0-9]+]]:_(s1) = G_USUBE [[UV31]], [[UV33]], [[USUBO9]]
+    ; GFX8: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO8]](s32), [[USUBE12]](s32)
+    ; GFX8: [[ASHR2:%[0-9]+]]:_(s64) = G_ASHR [[UV1]], [[C]](s32)
+    ; GFX8: [[ASHR3:%[0-9]+]]:_(s64) = G_ASHR [[UV3]], [[C]](s32)
+    ; GFX8: [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64)
+    ; GFX8: [[UV36:%[0-9]+]]:_(s32), [[UV37:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR2]](s64)
+    ; GFX8: [[UADDO42:%[0-9]+]]:_(s32), [[UADDO43:%[0-9]+]]:_(s1) = G_UADDO [[UV34]], [[UV36]]
+    ; GFX8: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UV35]], [[UV37]], [[UADDO43]]
+    ; GFX8: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO42]](s32), [[UADDE14]](s32)
+    ; GFX8: [[UV38:%[0-9]+]]:_(s32), [[UV39:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64)
+    ; GFX8: [[UV40:%[0-9]+]]:_(s32), [[UV41:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR3]](s64)
+    ; GFX8: [[UADDO44:%[0-9]+]]:_(s32), [[UADDO45:%[0-9]+]]:_(s1) = G_UADDO [[UV38]], [[UV40]]
+    ; GFX8: [[UADDE16:%[0-9]+]]:_(s32), [[UADDE17:%[0-9]+]]:_(s1) = G_UADDE [[UV39]], [[UV41]], [[UADDO45]]
+    ; GFX8: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO44]](s32), [[UADDE16]](s32)
+    ; GFX8: [[XOR4:%[0-9]+]]:_(s64) = G_XOR [[MV6]], [[ASHR2]]
+    ; GFX8: [[XOR5:%[0-9]+]]:_(s64) = G_XOR [[MV7]], [[ASHR3]]
+    ; GFX8: [[UV42:%[0-9]+]]:_(s32), [[UV43:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR5]](s64)
+    ; GFX8: [[UITOFP2:%[0-9]+]]:_(s32) = G_UITOFP [[UV42]](s32)
+    ; GFX8: [[UITOFP3:%[0-9]+]]:_(s32) = G_UITOFP [[UV43]](s32)
+    ; GFX8: [[C8:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000
+    ; GFX8: [[FMUL4:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP3]], [[C8]]
+    ; GFX8: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FMUL4]], [[UITOFP2]]
+    ; GFX8: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[FADD2]](s32)
+    ; GFX8: [[C9:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x43EFFFFF80000000
+    ; GFX8: [[FMUL5:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C9]]
+    ; GFX8: [[C10:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000
+    ; GFX8: [[FMUL6:%[0-9]+]]:_(s32) = G_FMUL [[FMUL5]], [[C10]]
+    ; GFX8: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL6]]
+    ; GFX8: [[C11:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000
+    ; GFX8: [[FMUL7:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC1]], [[C11]]
+    ; GFX8: [[FADD3:%[0-9]+]]:_(s32) = G_FADD [[FMUL7]], [[FMUL5]]
+    ; GFX8: [[FPTOUI2:%[0-9]+]]:_(s32) = G_FPTOUI [[FADD3]](s32)
+    ; GFX8: [[FPTOUI3:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC1]](s32)
+    ; GFX8: [[UV44:%[0-9]+]]:_(s32), [[UV45:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C5]](s64)
+    ; GFX8: [[UV46:%[0-9]+]]:_(s32), [[UV47:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR5]](s64)
+    ; GFX8: [[USUBO10:%[0-9]+]]:_(s32), [[USUBO11:%[0-9]+]]:_(s1) = G_USUBO [[UV44]], [[UV46]]
+    ; GFX8: [[USUBE14:%[0-9]+]]:_(s32), [[USUBE15:%[0-9]+]]:_(s1) = G_USUBE [[UV45]], [[UV47]], [[USUBO11]]
+    ; GFX8: [[MUL18:%[0-9]+]]:_(s32) = G_MUL [[USUBO10]], [[FPTOUI2]]
+    ; GFX8: [[MUL19:%[0-9]+]]:_(s32) = G_MUL [[USUBE14]], [[FPTOUI2]]
+    ; GFX8: [[MUL20:%[0-9]+]]:_(s32) = G_MUL [[USUBO10]], [[FPTOUI3]]
+    ; GFX8: [[UMULH15:%[0-9]+]]:_(s32) = G_UMULH [[USUBO10]], [[FPTOUI2]]
+    ; GFX8: [[ADD19:%[0-9]+]]:_(s32) = G_ADD [[MUL19]], [[MUL20]]
+    ; GFX8: [[ADD20:%[0-9]+]]:_(s32) = G_ADD [[ADD19]], [[UMULH15]]
+    ; GFX8: [[MUL21:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[MUL18]]
+    ; GFX8: [[MUL22:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI2]], [[ADD20]]
+    ; GFX8: [[UMULH16:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[MUL18]]
+    ; GFX8: [[UADDO46:%[0-9]+]]:_(s32), [[UADDO47:%[0-9]+]]:_(s1) = G_UADDO [[MUL21]], [[MUL22]]
+    ; GFX8: [[ZEXT15:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO47]](s1)
+    ; GFX8: [[UADDO48:%[0-9]+]]:_(s32), [[UADDO49:%[0-9]+]]:_(s1) = G_UADDO [[UADDO46]], [[UMULH16]]
+    ; GFX8: [[ZEXT16:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO49]](s1)
+    ; GFX8: [[ADD21:%[0-9]+]]:_(s32) = G_ADD [[ZEXT15]], [[ZEXT16]]
+    ; GFX8: [[MUL23:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[ADD20]]
+    ; GFX8: [[UMULH17:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[MUL18]]
+    ; GFX8: [[UMULH18:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[ADD20]]
+    ; GFX8: [[UADDO50:%[0-9]+]]:_(s32), [[UADDO51:%[0-9]+]]:_(s1) = G_UADDO [[MUL23]], [[UMULH17]]
+    ; GFX8: [[ZEXT17:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO51]](s1)
+    ; GFX8: [[UADDO52:%[0-9]+]]:_(s32), [[UADDO53:%[0-9]+]]:_(s1) = G_UADDO [[UADDO50]], [[UMULH18]]
+    ; GFX8: [[ZEXT18:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO53]](s1)
+    ; GFX8: [[ADD22:%[0-9]+]]:_(s32) = G_ADD [[ZEXT17]], [[ZEXT18]]
+    ; GFX8: [[UADDO54:%[0-9]+]]:_(s32), [[UADDO55:%[0-9]+]]:_(s1) = G_UADDO [[UADDO52]], [[ADD21]]
+    ; GFX8: [[ZEXT19:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO55]](s1)
+    ; GFX8: [[ADD23:%[0-9]+]]:_(s32) = G_ADD [[ADD22]], [[ZEXT19]]
+    ; GFX8: [[UMULH19:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[ADD20]]
+    ; GFX8: [[ADD24:%[0-9]+]]:_(s32) = G_ADD [[UMULH19]], [[ADD23]]
+    ; GFX8: [[UADDO56:%[0-9]+]]:_(s32), [[UADDO57:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI2]], [[UADDO54]]
+    ; GFX8: [[UADDE18:%[0-9]+]]:_(s32), [[UADDE19:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI3]], [[ADD24]], [[UADDO57]]
+    ; GFX8: [[ADD25:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI3]], [[ADD24]]
+    ; GFX8: [[MUL24:%[0-9]+]]:_(s32) = G_MUL [[USUBO10]], [[UADDO56]]
+    ; GFX8: [[MUL25:%[0-9]+]]:_(s32) = G_MUL [[USUBE14]], [[UADDO56]]
+    ; GFX8: [[MUL26:%[0-9]+]]:_(s32) = G_MUL [[USUBO10]], [[UADDE18]]
+    ; GFX8: [[UMULH20:%[0-9]+]]:_(s32) = G_UMULH [[USUBO10]], [[UADDO56]]
+    ; GFX8: [[ADD26:%[0-9]+]]:_(s32) = G_ADD [[MUL25]], [[MUL26]]
+    ; GFX8: [[ADD27:%[0-9]+]]:_(s32) = G_ADD [[ADD26]], [[UMULH20]]
+    ; GFX8: [[MUL27:%[0-9]+]]:_(s32) = G_MUL [[UADDE18]], [[MUL24]]
+    ; GFX8: [[MUL28:%[0-9]+]]:_(s32) = G_MUL [[UADDO56]], [[ADD27]]
+    ; GFX8: [[UMULH21:%[0-9]+]]:_(s32) = G_UMULH [[UADDO56]], [[MUL24]]
+    ; GFX8: [[UADDO58:%[0-9]+]]:_(s32), [[UADDO59:%[0-9]+]]:_(s1) = G_UADDO [[MUL27]], [[MUL28]]
+    ; GFX8: [[ZEXT20:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO59]](s1)
+    ; GFX8: [[UADDO60:%[0-9]+]]:_(s32), [[UADDO61:%[0-9]+]]:_(s1) = G_UADDO [[UADDO58]], [[UMULH21]]
+    ; GFX8: [[ZEXT21:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO61]](s1)
+    ; GFX8: [[ADD28:%[0-9]+]]:_(s32) = G_ADD [[ZEXT20]], [[ZEXT21]]
+    ; GFX8: [[MUL29:%[0-9]+]]:_(s32) = G_MUL [[UADDE18]], [[ADD27]]
+    ; GFX8: [[UMULH22:%[0-9]+]]:_(s32) = G_UMULH [[UADDE18]], [[MUL24]]
+    ; GFX8: [[UMULH23:%[0-9]+]]:_(s32) = G_UMULH [[UADDO56]], [[ADD27]]
+    ; GFX8: [[UADDO62:%[0-9]+]]:_(s32), [[UADDO63:%[0-9]+]]:_(s1) = G_UADDO [[MUL29]], [[UMULH22]]
+    ; GFX8: [[ZEXT22:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO63]](s1)
+    ; GFX8: [[UADDO64:%[0-9]+]]:_(s32), [[UADDO65:%[0-9]+]]:_(s1) = G_UADDO [[UADDO62]], [[UMULH23]]
+    ; GFX8: [[ZEXT23:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO65]](s1)
+    ; GFX8: [[ADD29:%[0-9]+]]:_(s32) = G_ADD [[ZEXT22]], [[ZEXT23]]
+    ; GFX8: [[UADDO66:%[0-9]+]]:_(s32), [[UADDO67:%[0-9]+]]:_(s1) = G_UADDO [[UADDO64]], [[ADD28]]
+    ; GFX8: [[ZEXT24:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO67]](s1)
+    ; GFX8: [[ADD30:%[0-9]+]]:_(s32) = G_ADD [[ADD29]], [[ZEXT24]]
+    ; GFX8: [[UMULH24:%[0-9]+]]:_(s32) = G_UMULH [[UADDE18]], [[ADD27]]
+    ; GFX8: [[ADD31:%[0-9]+]]:_(s32) = G_ADD [[UMULH24]], [[ADD30]]
+    ; GFX8: [[UADDO68:%[0-9]+]]:_(s32), [[UADDO69:%[0-9]+]]:_(s1) = G_UADDO [[UADDO56]], [[UADDO66]]
+    ; GFX8: [[UADDE20:%[0-9]+]]:_(s32), [[UADDE21:%[0-9]+]]:_(s1) = G_UADDE [[ADD25]], [[ADD31]], [[UADDO57]]
+    ; GFX8: [[UADDE22:%[0-9]+]]:_(s32), [[UADDE23:%[0-9]+]]:_(s1) = G_UADDE [[UADDE20]], [[C6]], [[UADDO69]]
+    ; GFX8: [[UV48:%[0-9]+]]:_(s32), [[UV49:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR4]](s64)
+    ; GFX8: [[UV50:%[0-9]+]]:_(s32), [[UV51:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR4]](s64)
+    ; GFX8: [[MUL30:%[0-9]+]]:_(s32) = G_MUL [[UV51]], [[UADDO68]]
+    ; GFX8: [[MUL31:%[0-9]+]]:_(s32) = G_MUL [[UV50]], [[UADDE22]]
+    ; GFX8: [[UMULH25:%[0-9]+]]:_(s32) = G_UMULH [[UV50]], [[UADDO68]]
+    ; GFX8: [[UADDO70:%[0-9]+]]:_(s32), [[UADDO71:%[0-9]+]]:_(s1) = G_UADDO [[MUL30]], [[MUL31]]
+    ; GFX8: [[ZEXT25:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO71]](s1)
+    ; GFX8: [[UADDO72:%[0-9]+]]:_(s32), [[UADDO73:%[0-9]+]]:_(s1) = G_UADDO [[UADDO70]], [[UMULH25]]
+    ; GFX8: [[ZEXT26:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO73]](s1)
+    ; GFX8: [[ADD32:%[0-9]+]]:_(s32) = G_ADD [[ZEXT25]], [[ZEXT26]]
+    ; GFX8: [[MUL32:%[0-9]+]]:_(s32) = G_MUL [[UV51]], [[UADDE22]]
+    ; GFX8: [[UMULH26:%[0-9]+]]:_(s32) = G_UMULH [[UV51]], [[UADDO68]]
+    ; GFX8: [[UMULH27:%[0-9]+]]:_(s32) = G_UMULH [[UV50]], [[UADDE22]]
+    ; GFX8: [[UADDO74:%[0-9]+]]:_(s32), [[UADDO75:%[0-9]+]]:_(s1) = G_UADDO [[MUL32]], [[UMULH26]]
+    ; GFX8: [[ZEXT27:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO75]](s1)
+    ; GFX8: [[UADDO76:%[0-9]+]]:_(s32), [[UADDO77:%[0-9]+]]:_(s1) = G_UADDO [[UADDO74]], [[UMULH27]]
+    ; GFX8: [[ZEXT28:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO77]](s1)
+    ; GFX8: [[ADD33:%[0-9]+]]:_(s32) = G_ADD [[ZEXT27]], [[ZEXT28]]
+    ; GFX8: [[UADDO78:%[0-9]+]]:_(s32), [[UADDO79:%[0-9]+]]:_(s1) = G_UADDO [[UADDO76]], [[ADD32]]
+    ; GFX8: [[ZEXT29:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO79]](s1)
+    ; GFX8: [[ADD34:%[0-9]+]]:_(s32) = G_ADD [[ADD33]], [[ZEXT29]]
+    ; GFX8: [[UMULH28:%[0-9]+]]:_(s32) = G_UMULH [[UV51]], [[UADDE22]]
+    ; GFX8: [[ADD35:%[0-9]+]]:_(s32) = G_ADD [[UMULH28]], [[ADD34]]
+    ; GFX8: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO78]](s32), [[ADD35]](s32)
+    ; GFX8: [[UV52:%[0-9]+]]:_(s32), [[UV53:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR5]](s64)
+    ; GFX8: [[MUL33:%[0-9]+]]:_(s32) = G_MUL [[UV52]], [[UADDO78]]
+    ; GFX8: [[MUL34:%[0-9]+]]:_(s32) = G_MUL [[UV53]], [[UADDO78]]
+    ; GFX8: [[MUL35:%[0-9]+]]:_(s32) = G_MUL [[UV52]], [[ADD35]]
+    ; GFX8: [[UMULH29:%[0-9]+]]:_(s32) = G_UMULH [[UV52]], [[UADDO78]]
+    ; GFX8: [[ADD36:%[0-9]+]]:_(s32) = G_ADD [[MUL34]], [[MUL35]]
+    ; GFX8: [[ADD37:%[0-9]+]]:_(s32) = G_ADD [[ADD36]], [[UMULH29]]
+    ; GFX8: [[USUBO12:%[0-9]+]]:_(s32), [[USUBO13:%[0-9]+]]:_(s1) = G_USUBO [[UV48]], [[MUL33]]
+    ; GFX8: [[USUBE16:%[0-9]+]]:_(s32), [[USUBE17:%[0-9]+]]:_(s1) = G_USUBE [[UV49]], [[ADD37]], [[USUBO13]]
+    ; GFX8: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[UV49]], [[ADD37]]
+    ; GFX8: [[UV54:%[0-9]+]]:_(s32), [[UV55:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR5]](s64)
+    ; GFX8: [[ICMP8:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE16]](s32), [[UV55]]
+    ; GFX8: [[SEXT4:%[0-9]+]]:_(s32) = G_SEXT [[ICMP8]](s1)
+    ; GFX8: [[ICMP9:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO12]](s32), [[UV54]]
+    ; GFX8: [[SEXT5:%[0-9]+]]:_(s32) = G_SEXT [[ICMP9]](s1)
+    ; GFX8: [[ICMP10:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE16]](s32), [[UV55]]
+    ; GFX8: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP10]](s1), [[SEXT5]], [[SEXT4]]
+    ; GFX8: [[USUBO14:%[0-9]+]]:_(s32), [[USUBO15:%[0-9]+]]:_(s1) = G_USUBO [[USUBO12]], [[UV54]]
+    ; GFX8: [[USUBE18:%[0-9]+]]:_(s32), [[USUBE19:%[0-9]+]]:_(s1) = G_USUBE [[SUB1]], [[UV55]], [[USUBO13]]
+    ; GFX8: [[USUBE20:%[0-9]+]]:_(s32), [[USUBE21:%[0-9]+]]:_(s1) = G_USUBE [[USUBE18]], [[C6]], [[USUBO15]]
+    ; GFX8: [[UV56:%[0-9]+]]:_(s32), [[UV57:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64)
+    ; GFX8: [[UADDO80:%[0-9]+]]:_(s32), [[UADDO81:%[0-9]+]]:_(s1) = G_UADDO [[UADDO78]], [[UV56]]
+    ; GFX8: [[UADDE24:%[0-9]+]]:_(s32), [[UADDE25:%[0-9]+]]:_(s1) = G_UADDE [[ADD35]], [[UV57]], [[UADDO81]]
+    ; GFX8: [[MV9:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO80]](s32), [[UADDE24]](s32)
+    ; GFX8: [[ICMP11:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE20]](s32), [[UV55]]
+    ; GFX8: [[SEXT6:%[0-9]+]]:_(s32) = G_SEXT [[ICMP11]](s1)
+    ; GFX8: [[ICMP12:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO14]](s32), [[UV54]]
+    ; GFX8: [[SEXT7:%[0-9]+]]:_(s32) = G_SEXT [[ICMP12]](s1)
+    ; GFX8: [[ICMP13:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE20]](s32), [[UV55]]
+    ; GFX8: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP13]](s1), [[SEXT7]], [[SEXT6]]
+    ; GFX8: [[UV58:%[0-9]+]]:_(s32), [[UV59:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64)
+    ; GFX8: [[UADDO82:%[0-9]+]]:_(s32), [[UADDO83:%[0-9]+]]:_(s1) = G_UADDO [[UADDO80]], [[UV58]]
+    ; GFX8: [[UADDE26:%[0-9]+]]:_(s32), [[UADDE27:%[0-9]+]]:_(s1) = G_UADDE [[UADDE24]], [[UV59]], [[UADDO83]]
+    ; GFX8: [[MV10:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO82]](s32), [[UADDE26]](s32)
+    ; GFX8: [[USUBO16:%[0-9]+]]:_(s32), [[USUBO17:%[0-9]+]]:_(s1) = G_USUBO [[USUBO14]], [[UV54]]
+    ; GFX8: [[USUBE22:%[0-9]+]]:_(s32), [[USUBE23:%[0-9]+]]:_(s1) = G_USUBE [[USUBE18]], [[UV55]], [[USUBO15]]
+    ; GFX8: [[USUBE24:%[0-9]+]]:_(s32), [[USUBE25:%[0-9]+]]:_(s1) = G_USUBE [[USUBE22]], [[C6]], [[USUBO17]]
+    ; GFX8: [[ICMP14:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT5]](s32), [[C6]]
+    ; GFX8: [[SELECT6:%[0-9]+]]:_(s64) = G_SELECT [[ICMP14]](s1), [[MV10]], [[MV9]]
+    ; GFX8: [[ICMP15:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT4]](s32), [[C6]]
+    ; GFX8: [[SELECT7:%[0-9]+]]:_(s64) = G_SELECT [[ICMP15]](s1), [[SELECT6]], [[MV8]]
+    ; GFX8: [[XOR6:%[0-9]+]]:_(s64) = G_XOR [[ASHR2]], [[ASHR3]]
+    ; GFX8: [[XOR7:%[0-9]+]]:_(s64) = G_XOR [[SELECT7]], [[XOR6]]
+    ; GFX8: [[UV60:%[0-9]+]]:_(s32), [[UV61:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR7]](s64)
+    ; GFX8: [[UV62:%[0-9]+]]:_(s32), [[UV63:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR6]](s64)
+    ; GFX8: [[USUBO18:%[0-9]+]]:_(s32), [[USUBO19:%[0-9]+]]:_(s1) = G_USUBO [[UV60]], [[UV62]]
+    ; GFX8: [[USUBE26:%[0-9]+]]:_(s32), [[USUBE27:%[0-9]+]]:_(s1) = G_USUBE [[UV61]], [[UV63]], [[USUBO19]]
+    ; GFX8: [[MV11:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO18]](s32), [[USUBE26]](s32)
+    ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV5]](s64), [[MV11]](s64)
     ; GFX8: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>)
     ; GFX9-LABEL: name: test_sdiv_v2s64
     ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
     ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7
     ; GFX9: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>)
     ; GFX9: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>)
-    ; GFX9: [[SDIV:%[0-9]+]]:_(s64) = G_SDIV [[UV]], [[UV2]]
-    ; GFX9: [[SDIV1:%[0-9]+]]:_(s64) = G_SDIV [[UV1]], [[UV3]]
-    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SDIV]](s64), [[SDIV1]](s64)
+    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 63
+    ; GFX9: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[UV]], [[C]](s32)
+    ; GFX9: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[UV2]], [[C]](s32)
+    ; GFX9: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64)
+    ; GFX9: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64)
+    ; GFX9: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV4]], [[UV6]]
+    ; GFX9: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV5]], [[UV7]], [[UADDO1]]
+    ; GFX9: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32)
+    ; GFX9: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64)
+    ; GFX9: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR1]](s64)
+    ; GFX9: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UV8]], [[UV10]]
+    ; GFX9: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UV9]], [[UV11]], [[UADDO3]]
+    ; GFX9: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO2]](s32), [[UADDE2]](s32)
+    ; GFX9: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[MV]], [[ASHR]]
+    ; GFX9: [[XOR1:%[0-9]+]]:_(s64) = G_XOR [[MV1]], [[ASHR1]]
+    ; GFX9: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64)
+    ; GFX9: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV12]](s32)
+    ; GFX9: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV13]](s32)
+    ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000
+    ; GFX9: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP1]], [[C1]]
+    ; GFX9: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UITOFP]]
+    ; GFX9: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[FADD]](s32)
+    ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x43EFFFFF80000000
+    ; GFX9: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]]
+    ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000
+    ; GFX9: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FMUL1]], [[C3]]
+    ; GFX9: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL2]]
+    ; GFX9: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000
+    ; GFX9: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC]], [[C4]]
+    ; GFX9: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[FMUL1]]
+    ; GFX9: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FADD1]](s32)
+    ; GFX9: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC]](s32)
+    ; GFX9: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; GFX9: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C5]](s64)
+    ; GFX9: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64)
+    ; GFX9: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV14]], [[UV16]]
+    ; GFX9: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV15]], [[UV17]], [[USUBO1]]
+    ; GFX9: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI]]
+    ; GFX9: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]]
+    ; GFX9: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]]
+    ; GFX9: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[FPTOUI]]
+    ; GFX9: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]]
+    ; GFX9: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]]
+    ; GFX9: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[MUL]]
+    ; GFX9: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]]
+    ; GFX9: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]]
+    ; GFX9: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]]
+    ; GFX9: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1)
+    ; GFX9: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH1]]
+    ; GFX9: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1)
+    ; GFX9: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]]
+    ; GFX9: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]]
+    ; GFX9: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL]]
+    ; GFX9: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]]
+    ; GFX9: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]]
+    ; GFX9: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1)
+    ; GFX9: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UMULH3]]
+    ; GFX9: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO11]](s1)
+    ; GFX9: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]]
+    ; GFX9: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[ADD2]]
+    ; GFX9: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1)
+    ; GFX9: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]]
+    ; GFX9: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]]
+    ; GFX9: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[ADD4]]
+    ; GFX9: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO12]]
+    ; GFX9: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO15]]
+    ; GFX9: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[ADD5]]
+    ; GFX9: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO14]]
+    ; GFX9: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO14]]
+    ; GFX9: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE4]]
+    ; GFX9: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO14]]
+    ; GFX9: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]]
+    ; GFX9: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ADD7]], [[UMULH5]]
+    ; GFX9: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[MUL6]]
+    ; GFX9: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO14]], [[ADD8]]
+    ; GFX9: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[MUL6]]
+    ; GFX9: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]]
+    ; GFX9: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1)
+    ; GFX9: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH6]]
+    ; GFX9: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1)
+    ; GFX9: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]]
+    ; GFX9: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[ADD8]]
+    ; GFX9: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[MUL6]]
+    ; GFX9: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[ADD8]]
+    ; GFX9: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]]
+    ; GFX9: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1)
+    ; GFX9: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UMULH8]]
+    ; GFX9: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO23]](s1)
+    ; GFX9: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]]
+    ; GFX9: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[UADDO22]], [[ADD9]]
+    ; GFX9: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1)
+    ; GFX9: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[ADD10]], [[ZEXT9]]
+    ; GFX9: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[ADD8]]
+    ; GFX9: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[ADD11]]
+    ; GFX9: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; GFX9: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO14]], [[UADDO24]]
+    ; GFX9: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[ADD6]], [[ADD12]], [[UADDO15]]
+    ; GFX9: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDE6]], [[C6]], [[UADDO27]]
+    ; GFX9: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64)
+    ; GFX9: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64)
+    ; GFX9: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV21]], [[UADDO26]]
+    ; GFX9: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV20]], [[UADDE8]]
+    ; GFX9: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV20]], [[UADDO26]]
+    ; GFX9: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]]
+    ; GFX9: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1)
+    ; GFX9: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH10]]
+    ; GFX9: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1)
+    ; GFX9: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]]
+    ; GFX9: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV21]], [[UADDE8]]
+    ; GFX9: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV21]], [[UADDO26]]
+    ; GFX9: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV20]], [[UADDE8]]
+    ; GFX9: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]]
+    ; GFX9: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1)
+    ; GFX9: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UMULH12]]
+    ; GFX9: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO35]](s1)
+    ; GFX9: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]]
+    ; GFX9: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[ADD13]]
+    ; GFX9: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO37]](s1)
+    ; GFX9: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[ADD14]], [[ZEXT14]]
+    ; GFX9: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV21]], [[UADDE8]]
+    ; GFX9: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[ADD15]]
+    ; GFX9: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO36]](s32), [[ADD16]](s32)
+    ; GFX9: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64)
+    ; GFX9: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV22]], [[UADDO36]]
+    ; GFX9: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV23]], [[UADDO36]]
+    ; GFX9: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV22]], [[ADD16]]
+    ; GFX9: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV22]], [[UADDO36]]
+    ; GFX9: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]]
+    ; GFX9: [[ADD18:%[0-9]+]]:_(s32) = G_ADD [[ADD17]], [[UMULH14]]
+    ; GFX9: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV18]], [[MUL15]]
+    ; GFX9: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV19]], [[ADD18]], [[USUBO3]]
+    ; GFX9: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV19]], [[ADD18]]
+    ; GFX9: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64)
+    ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV25]]
+    ; GFX9: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1)
+    ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV24]]
+    ; GFX9: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[ICMP1]](s1)
+    ; GFX9: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV25]]
+    ; GFX9: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SEXT1]], [[SEXT]]
+    ; GFX9: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV24]]
+    ; GFX9: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV25]], [[USUBO3]]
+    ; GFX9: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C6]], [[USUBO5]]
+    ; GFX9: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; GFX9: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64)
+    ; GFX9: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[UADDO36]], [[UV26]]
+    ; GFX9: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[ADD16]], [[UV27]], [[UADDO39]]
+    ; GFX9: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO38]](s32), [[UADDE10]](s32)
+    ; GFX9: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV25]]
+    ; GFX9: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1)
+    ; GFX9: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV24]]
+    ; GFX9: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT [[ICMP4]](s1)
+    ; GFX9: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV25]]
+    ; GFX9: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]]
+    ; GFX9: [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64)
+    ; GFX9: [[UADDO40:%[0-9]+]]:_(s32), [[UADDO41:%[0-9]+]]:_(s1) = G_UADDO [[UADDO38]], [[UV28]]
+    ; GFX9: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[UADDE10]], [[UV29]], [[UADDO41]]
+    ; GFX9: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO40]](s32), [[UADDE12]](s32)
+    ; GFX9: [[USUBO6:%[0-9]+]]:_(s32), [[USUBO7:%[0-9]+]]:_(s1) = G_USUBO [[USUBO4]], [[UV24]]
+    ; GFX9: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[UV25]], [[USUBO5]]
+    ; GFX9: [[USUBE10:%[0-9]+]]:_(s32), [[USUBE11:%[0-9]+]]:_(s1) = G_USUBE [[USUBE8]], [[C6]], [[USUBO7]]
+    ; GFX9: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C6]]
+    ; GFX9: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV4]], [[MV3]]
+    ; GFX9: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C6]]
+    ; GFX9: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV2]]
+    ; GFX9: [[XOR2:%[0-9]+]]:_(s64) = G_XOR [[ASHR]], [[ASHR1]]
+    ; GFX9: [[XOR3:%[0-9]+]]:_(s64) = G_XOR [[SELECT3]], [[XOR2]]
+    ; GFX9: [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR3]](s64)
+    ; GFX9: [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR2]](s64)
+    ; GFX9: [[USUBO8:%[0-9]+]]:_(s32), [[USUBO9:%[0-9]+]]:_(s1) = G_USUBO [[UV30]], [[UV32]]
+    ; GFX9: [[USUBE12:%[0-9]+]]:_(s32), [[USUBE13:%[0-9]+]]:_(s1) = G_USUBE [[UV31]], [[UV33]], [[USUBO9]]
+    ; GFX9: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO8]](s32), [[USUBE12]](s32)
+    ; GFX9: [[ASHR2:%[0-9]+]]:_(s64) = G_ASHR [[UV1]], [[C]](s32)
+    ; GFX9: [[ASHR3:%[0-9]+]]:_(s64) = G_ASHR [[UV3]], [[C]](s32)
+    ; GFX9: [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64)
+    ; GFX9: [[UV36:%[0-9]+]]:_(s32), [[UV37:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR2]](s64)
+    ; GFX9: [[UADDO42:%[0-9]+]]:_(s32), [[UADDO43:%[0-9]+]]:_(s1) = G_UADDO [[UV34]], [[UV36]]
+    ; GFX9: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UV35]], [[UV37]], [[UADDO43]]
+    ; GFX9: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO42]](s32), [[UADDE14]](s32)
+    ; GFX9: [[UV38:%[0-9]+]]:_(s32), [[UV39:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64)
+    ; GFX9: [[UV40:%[0-9]+]]:_(s32), [[UV41:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR3]](s64)
+    ; GFX9: [[UADDO44:%[0-9]+]]:_(s32), [[UADDO45:%[0-9]+]]:_(s1) = G_UADDO [[UV38]], [[UV40]]
+    ; GFX9: [[UADDE16:%[0-9]+]]:_(s32), [[UADDE17:%[0-9]+]]:_(s1) = G_UADDE [[UV39]], [[UV41]], [[UADDO45]]
+    ; GFX9: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO44]](s32), [[UADDE16]](s32)
+    ; GFX9: [[XOR4:%[0-9]+]]:_(s64) = G_XOR [[MV6]], [[ASHR2]]
+    ; GFX9: [[XOR5:%[0-9]+]]:_(s64) = G_XOR [[MV7]], [[ASHR3]]
+    ; GFX9: [[UV42:%[0-9]+]]:_(s32), [[UV43:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR5]](s64)
+    ; GFX9: [[UITOFP2:%[0-9]+]]:_(s32) = G_UITOFP [[UV42]](s32)
+    ; GFX9: [[UITOFP3:%[0-9]+]]:_(s32) = G_UITOFP [[UV43]](s32)
+    ; GFX9: [[C8:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000
+    ; GFX9: [[FMUL4:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP3]], [[C8]]
+    ; GFX9: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FMUL4]], [[UITOFP2]]
+    ; GFX9: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[FADD2]](s32)
+    ; GFX9: [[C9:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x43EFFFFF80000000
+    ; GFX9: [[FMUL5:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C9]]
+    ; GFX9: [[C10:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000
+    ; GFX9: [[FMUL6:%[0-9]+]]:_(s32) = G_FMUL [[FMUL5]], [[C10]]
+    ; GFX9: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL6]]
+    ; GFX9: [[C11:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000
+    ; GFX9: [[FMUL7:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC1]], [[C11]]
+    ; GFX9: [[FADD3:%[0-9]+]]:_(s32) = G_FADD [[FMUL7]], [[FMUL5]]
+    ; GFX9: [[FPTOUI2:%[0-9]+]]:_(s32) = G_FPTOUI [[FADD3]](s32)
+    ; GFX9: [[FPTOUI3:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC1]](s32)
+    ; GFX9: [[UV44:%[0-9]+]]:_(s32), [[UV45:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C5]](s64)
+    ; GFX9: [[UV46:%[0-9]+]]:_(s32), [[UV47:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR5]](s64)
+    ; GFX9: [[USUBO10:%[0-9]+]]:_(s32), [[USUBO11:%[0-9]+]]:_(s1) = G_USUBO [[UV44]], [[UV46]]
+    ; GFX9: [[USUBE14:%[0-9]+]]:_(s32), [[USUBE15:%[0-9]+]]:_(s1) = G_USUBE [[UV45]], [[UV47]], [[USUBO11]]
+    ; GFX9: [[MUL18:%[0-9]+]]:_(s32) = G_MUL [[USUBO10]], [[FPTOUI2]]
+    ; GFX9: [[MUL19:%[0-9]+]]:_(s32) = G_MUL [[USUBE14]], [[FPTOUI2]]
+    ; GFX9: [[MUL20:%[0-9]+]]:_(s32) = G_MUL [[USUBO10]], [[FPTOUI3]]
+    ; GFX9: [[UMULH15:%[0-9]+]]:_(s32) = G_UMULH [[USUBO10]], [[FPTOUI2]]
+    ; GFX9: [[ADD19:%[0-9]+]]:_(s32) = G_ADD [[MUL19]], [[MUL20]]
+    ; GFX9: [[ADD20:%[0-9]+]]:_(s32) = G_ADD [[ADD19]], [[UMULH15]]
+    ; GFX9: [[MUL21:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[MUL18]]
+    ; GFX9: [[MUL22:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI2]], [[ADD20]]
+    ; GFX9: [[UMULH16:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[MUL18]]
+    ; GFX9: [[UADDO46:%[0-9]+]]:_(s32), [[UADDO47:%[0-9]+]]:_(s1) = G_UADDO [[MUL21]], [[MUL22]]
+    ; GFX9: [[ZEXT15:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO47]](s1)
+    ; GFX9: [[UADDO48:%[0-9]+]]:_(s32), [[UADDO49:%[0-9]+]]:_(s1) = G_UADDO [[UADDO46]], [[UMULH16]]
+    ; GFX9: [[ZEXT16:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO49]](s1)
+    ; GFX9: [[ADD21:%[0-9]+]]:_(s32) = G_ADD [[ZEXT15]], [[ZEXT16]]
+    ; GFX9: [[MUL23:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[ADD20]]
+    ; GFX9: [[UMULH17:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[MUL18]]
+    ; GFX9: [[UMULH18:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[ADD20]]
+    ; GFX9: [[UADDO50:%[0-9]+]]:_(s32), [[UADDO51:%[0-9]+]]:_(s1) = G_UADDO [[MUL23]], [[UMULH17]]
+    ; GFX9: [[ZEXT17:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO51]](s1)
+    ; GFX9: [[UADDO52:%[0-9]+]]:_(s32), [[UADDO53:%[0-9]+]]:_(s1) = G_UADDO [[UADDO50]], [[UMULH18]]
+    ; GFX9: [[ZEXT18:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO53]](s1)
+    ; GFX9: [[ADD22:%[0-9]+]]:_(s32) = G_ADD [[ZEXT17]], [[ZEXT18]]
+    ; GFX9: [[UADDO54:%[0-9]+]]:_(s32), [[UADDO55:%[0-9]+]]:_(s1) = G_UADDO [[UADDO52]], [[ADD21]]
+    ; GFX9: [[ZEXT19:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO55]](s1)
+    ; GFX9: [[ADD23:%[0-9]+]]:_(s32) = G_ADD [[ADD22]], [[ZEXT19]]
+    ; GFX9: [[UMULH19:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[ADD20]]
+    ; GFX9: [[ADD24:%[0-9]+]]:_(s32) = G_ADD [[UMULH19]], [[ADD23]]
+    ; GFX9: [[UADDO56:%[0-9]+]]:_(s32), [[UADDO57:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI2]], [[UADDO54]]
+    ; GFX9: [[UADDE18:%[0-9]+]]:_(s32), [[UADDE19:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI3]], [[ADD24]], [[UADDO57]]
+    ; GFX9: [[ADD25:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI3]], [[ADD24]]
+    ; GFX9: [[MUL24:%[0-9]+]]:_(s32) = G_MUL [[USUBO10]], [[UADDO56]]
+    ; GFX9: [[MUL25:%[0-9]+]]:_(s32) = G_MUL [[USUBE14]], [[UADDO56]]
+    ; GFX9: [[MUL26:%[0-9]+]]:_(s32) = G_MUL [[USUBO10]], [[UADDE18]]
+    ; GFX9: [[UMULH20:%[0-9]+]]:_(s32) = G_UMULH [[USUBO10]], [[UADDO56]]
+    ; GFX9: [[ADD26:%[0-9]+]]:_(s32) = G_ADD [[MUL25]], [[MUL26]]
+    ; GFX9: [[ADD27:%[0-9]+]]:_(s32) = G_ADD [[ADD26]], [[UMULH20]]
+    ; GFX9: [[MUL27:%[0-9]+]]:_(s32) = G_MUL [[UADDE18]], [[MUL24]]
+    ; GFX9: [[MUL28:%[0-9]+]]:_(s32) = G_MUL [[UADDO56]], [[ADD27]]
+    ; GFX9: [[UMULH21:%[0-9]+]]:_(s32) = G_UMULH [[UADDO56]], [[MUL24]]
+    ; GFX9: [[UADDO58:%[0-9]+]]:_(s32), [[UADDO59:%[0-9]+]]:_(s1) = G_UADDO [[MUL27]], [[MUL28]]
+    ; GFX9: [[ZEXT20:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO59]](s1)
+    ; GFX9: [[UADDO60:%[0-9]+]]:_(s32), [[UADDO61:%[0-9]+]]:_(s1) = G_UADDO [[UADDO58]], [[UMULH21]]
+    ; GFX9: [[ZEXT21:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO61]](s1)
+    ; GFX9: [[ADD28:%[0-9]+]]:_(s32) = G_ADD [[ZEXT20]], [[ZEXT21]]
+    ; GFX9: [[MUL29:%[0-9]+]]:_(s32) = G_MUL [[UADDE18]], [[ADD27]]
+    ; GFX9: [[UMULH22:%[0-9]+]]:_(s32) = G_UMULH [[UADDE18]], [[MUL24]]
+    ; GFX9: [[UMULH23:%[0-9]+]]:_(s32) = G_UMULH [[UADDO56]], [[ADD27]]
+    ; GFX9: [[UADDO62:%[0-9]+]]:_(s32), [[UADDO63:%[0-9]+]]:_(s1) = G_UADDO [[MUL29]], [[UMULH22]]
+    ; GFX9: [[ZEXT22:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO63]](s1)
+    ; GFX9: [[UADDO64:%[0-9]+]]:_(s32), [[UADDO65:%[0-9]+]]:_(s1) = G_UADDO [[UADDO62]], [[UMULH23]]
+    ; GFX9: [[ZEXT23:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO65]](s1)
+    ; GFX9: [[ADD29:%[0-9]+]]:_(s32) = G_ADD [[ZEXT22]], [[ZEXT23]]
+    ; GFX9: [[UADDO66:%[0-9]+]]:_(s32), [[UADDO67:%[0-9]+]]:_(s1) = G_UADDO [[UADDO64]], [[ADD28]]
+    ; GFX9: [[ZEXT24:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO67]](s1)
+    ; GFX9: [[ADD30:%[0-9]+]]:_(s32) = G_ADD [[ADD29]], [[ZEXT24]]
+    ; GFX9: [[UMULH24:%[0-9]+]]:_(s32) = G_UMULH [[UADDE18]], [[ADD27]]
+    ; GFX9: [[ADD31:%[0-9]+]]:_(s32) = G_ADD [[UMULH24]], [[ADD30]]
+    ; GFX9: [[UADDO68:%[0-9]+]]:_(s32), [[UADDO69:%[0-9]+]]:_(s1) = G_UADDO [[UADDO56]], [[UADDO66]]
+    ; GFX9: [[UADDE20:%[0-9]+]]:_(s32), [[UADDE21:%[0-9]+]]:_(s1) = G_UADDE [[ADD25]], [[ADD31]], [[UADDO57]]
+    ; GFX9: [[UADDE22:%[0-9]+]]:_(s32), [[UADDE23:%[0-9]+]]:_(s1) = G_UADDE [[UADDE20]], [[C6]], [[UADDO69]]
+    ; GFX9: [[UV48:%[0-9]+]]:_(s32), [[UV49:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR4]](s64)
+    ; GFX9: [[UV50:%[0-9]+]]:_(s32), [[UV51:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR4]](s64)
+    ; GFX9: [[MUL30:%[0-9]+]]:_(s32) = G_MUL [[UV51]], [[UADDO68]]
+    ; GFX9: [[MUL31:%[0-9]+]]:_(s32) = G_MUL [[UV50]], [[UADDE22]]
+    ; GFX9: [[UMULH25:%[0-9]+]]:_(s32) = G_UMULH [[UV50]], [[UADDO68]]
+    ; GFX9: [[UADDO70:%[0-9]+]]:_(s32), [[UADDO71:%[0-9]+]]:_(s1) = G_UADDO [[MUL30]], [[MUL31]]
+    ; GFX9: [[ZEXT25:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO71]](s1)
+    ; GFX9: [[UADDO72:%[0-9]+]]:_(s32), [[UADDO73:%[0-9]+]]:_(s1) = G_UADDO [[UADDO70]], [[UMULH25]]
+    ; GFX9: [[ZEXT26:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO73]](s1)
+    ; GFX9: [[ADD32:%[0-9]+]]:_(s32) = G_ADD [[ZEXT25]], [[ZEXT26]]
+    ; GFX9: [[MUL32:%[0-9]+]]:_(s32) = G_MUL [[UV51]], [[UADDE22]]
+    ; GFX9: [[UMULH26:%[0-9]+]]:_(s32) = G_UMULH [[UV51]], [[UADDO68]]
+    ; GFX9: [[UMULH27:%[0-9]+]]:_(s32) = G_UMULH [[UV50]], [[UADDE22]]
+    ; GFX9: [[UADDO74:%[0-9]+]]:_(s32), [[UADDO75:%[0-9]+]]:_(s1) = G_UADDO [[MUL32]], [[UMULH26]]
+    ; GFX9: [[ZEXT27:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO75]](s1)
+    ; GFX9: [[UADDO76:%[0-9]+]]:_(s32), [[UADDO77:%[0-9]+]]:_(s1) = G_UADDO [[UADDO74]], [[UMULH27]]
+    ; GFX9: [[ZEXT28:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO77]](s1)
+    ; GFX9: [[ADD33:%[0-9]+]]:_(s32) = G_ADD [[ZEXT27]], [[ZEXT28]]
+    ; GFX9: [[UADDO78:%[0-9]+]]:_(s32), [[UADDO79:%[0-9]+]]:_(s1) = G_UADDO [[UADDO76]], [[ADD32]]
+    ; GFX9: [[ZEXT29:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO79]](s1)
+    ; GFX9: [[ADD34:%[0-9]+]]:_(s32) = G_ADD [[ADD33]], [[ZEXT29]]
+    ; GFX9: [[UMULH28:%[0-9]+]]:_(s32) = G_UMULH [[UV51]], [[UADDE22]]
+    ; GFX9: [[ADD35:%[0-9]+]]:_(s32) = G_ADD [[UMULH28]], [[ADD34]]
+    ; GFX9: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO78]](s32), [[ADD35]](s32)
+    ; GFX9: [[UV52:%[0-9]+]]:_(s32), [[UV53:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR5]](s64)
+    ; GFX9: [[MUL33:%[0-9]+]]:_(s32) = G_MUL [[UV52]], [[UADDO78]]
+    ; GFX9: [[MUL34:%[0-9]+]]:_(s32) = G_MUL [[UV53]], [[UADDO78]]
+    ; GFX9: [[MUL35:%[0-9]+]]:_(s32) = G_MUL [[UV52]], [[ADD35]]
+    ; GFX9: [[UMULH29:%[0-9]+]]:_(s32) = G_UMULH [[UV52]], [[UADDO78]]
+    ; GFX9: [[ADD36:%[0-9]+]]:_(s32) = G_ADD [[MUL34]], [[MUL35]]
+    ; GFX9: [[ADD37:%[0-9]+]]:_(s32) = G_ADD [[ADD36]], [[UMULH29]]
+    ; GFX9: [[USUBO12:%[0-9]+]]:_(s32), [[USUBO13:%[0-9]+]]:_(s1) = G_USUBO [[UV48]], [[MUL33]]
+    ; GFX9: [[USUBE16:%[0-9]+]]:_(s32), [[USUBE17:%[0-9]+]]:_(s1) = G_USUBE [[UV49]], [[ADD37]], [[USUBO13]]
+    ; GFX9: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[UV49]], [[ADD37]]
+    ; GFX9: [[UV54:%[0-9]+]]:_(s32), [[UV55:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR5]](s64)
+    ; GFX9: [[ICMP8:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE16]](s32), [[UV55]]
+    ; GFX9: [[SEXT4:%[0-9]+]]:_(s32) = G_SEXT [[ICMP8]](s1)
+    ; GFX9: [[ICMP9:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO12]](s32), [[UV54]]
+    ; GFX9: [[SEXT5:%[0-9]+]]:_(s32) = G_SEXT [[ICMP9]](s1)
+    ; GFX9: [[ICMP10:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE16]](s32), [[UV55]]
+    ; GFX9: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP10]](s1), [[SEXT5]], [[SEXT4]]
+    ; GFX9: [[USUBO14:%[0-9]+]]:_(s32), [[USUBO15:%[0-9]+]]:_(s1) = G_USUBO [[USUBO12]], [[UV54]]
+    ; GFX9: [[USUBE18:%[0-9]+]]:_(s32), [[USUBE19:%[0-9]+]]:_(s1) = G_USUBE [[SUB1]], [[UV55]], [[USUBO13]]
+    ; GFX9: [[USUBE20:%[0-9]+]]:_(s32), [[USUBE21:%[0-9]+]]:_(s1) = G_USUBE [[USUBE18]], [[C6]], [[USUBO15]]
+    ; GFX9: [[UV56:%[0-9]+]]:_(s32), [[UV57:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64)
+    ; GFX9: [[UADDO80:%[0-9]+]]:_(s32), [[UADDO81:%[0-9]+]]:_(s1) = G_UADDO [[UADDO78]], [[UV56]]
+    ; GFX9: [[UADDE24:%[0-9]+]]:_(s32), [[UADDE25:%[0-9]+]]:_(s1) = G_UADDE [[ADD35]], [[UV57]], [[UADDO81]]
+    ; GFX9: [[MV9:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO80]](s32), [[UADDE24]](s32)
+    ; GFX9: [[ICMP11:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE20]](s32), [[UV55]]
+    ; GFX9: [[SEXT6:%[0-9]+]]:_(s32) = G_SEXT [[ICMP11]](s1)
+    ; GFX9: [[ICMP12:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO14]](s32), [[UV54]]
+    ; GFX9: [[SEXT7:%[0-9]+]]:_(s32) = G_SEXT [[ICMP12]](s1)
+    ; GFX9: [[ICMP13:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE20]](s32), [[UV55]]
+    ; GFX9: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP13]](s1), [[SEXT7]], [[SEXT6]]
+    ; GFX9: [[UV58:%[0-9]+]]:_(s32), [[UV59:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64)
+    ; GFX9: [[UADDO82:%[0-9]+]]:_(s32), [[UADDO83:%[0-9]+]]:_(s1) = G_UADDO [[UADDO80]], [[UV58]]
+    ; GFX9: [[UADDE26:%[0-9]+]]:_(s32), [[UADDE27:%[0-9]+]]:_(s1) = G_UADDE [[UADDE24]], [[UV59]], [[UADDO83]]
+    ; GFX9: [[MV10:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO82]](s32), [[UADDE26]](s32)
+    ; GFX9: [[USUBO16:%[0-9]+]]:_(s32), [[USUBO17:%[0-9]+]]:_(s1) = G_USUBO [[USUBO14]], [[UV54]]
+    ; GFX9: [[USUBE22:%[0-9]+]]:_(s32), [[USUBE23:%[0-9]+]]:_(s1) = G_USUBE [[USUBE18]], [[UV55]], [[USUBO15]]
+    ; GFX9: [[USUBE24:%[0-9]+]]:_(s32), [[USUBE25:%[0-9]+]]:_(s1) = G_USUBE [[USUBE22]], [[C6]], [[USUBO17]]
+    ; GFX9: [[ICMP14:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT5]](s32), [[C6]]
+    ; GFX9: [[SELECT6:%[0-9]+]]:_(s64) = G_SELECT [[ICMP14]](s1), [[MV10]], [[MV9]]
+    ; GFX9: [[ICMP15:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT4]](s32), [[C6]]
+    ; GFX9: [[SELECT7:%[0-9]+]]:_(s64) = G_SELECT [[ICMP15]](s1), [[SELECT6]], [[MV8]]
+    ; GFX9: [[XOR6:%[0-9]+]]:_(s64) = G_XOR [[ASHR2]], [[ASHR3]]
+    ; GFX9: [[XOR7:%[0-9]+]]:_(s64) = G_XOR [[SELECT7]], [[XOR6]]
+    ; GFX9: [[UV60:%[0-9]+]]:_(s32), [[UV61:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR7]](s64)
+    ; GFX9: [[UV62:%[0-9]+]]:_(s32), [[UV63:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR6]](s64)
+    ; GFX9: [[USUBO18:%[0-9]+]]:_(s32), [[USUBO19:%[0-9]+]]:_(s1) = G_USUBO [[UV60]], [[UV62]]
+    ; GFX9: [[USUBE26:%[0-9]+]]:_(s32), [[USUBE27:%[0-9]+]]:_(s1) = G_USUBE [[UV61]], [[UV63]], [[USUBO19]]
+    ; GFX9: [[MV11:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO18]](s32), [[USUBE26]](s32)
+    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV5]](s64), [[MV11]](s64)
     ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>)
     %0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
     %1:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7
@@ -1189,36 +2707,543 @@ body: |
     ; GFX6-LABEL: name: test_sdiv_s33
     ; GFX6: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
     ; GFX6: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
-    ; GFX6: [[TRUNC:%[0-9]+]]:_(s33) = G_TRUNC [[COPY]](s64)
-    ; GFX6: [[TRUNC1:%[0-9]+]]:_(s33) = G_TRUNC [[COPY1]](s64)
-    ; GFX6: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[TRUNC]](s33)
-    ; GFX6: [[SEXT1:%[0-9]+]]:_(s64) = G_SEXT [[TRUNC1]](s33)
-    ; GFX6: [[SDIV:%[0-9]+]]:_(s64) = G_SDIV [[SEXT]], [[SEXT1]]
-    ; GFX6: [[TRUNC2:%[0-9]+]]:_(s33) = G_TRUNC [[SDIV]](s64)
-    ; GFX6: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[TRUNC2]](s33)
-    ; GFX6: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
+    ; GFX6: [[COPY2:%[0-9]+]]:_(s64) = COPY [[COPY]](s64)
+    ; GFX6: [[SEXT_INREG:%[0-9]+]]:_(s64) = G_SEXT_INREG [[COPY2]], 33
+    ; GFX6: [[COPY3:%[0-9]+]]:_(s64) = COPY [[COPY1]](s64)
+    ; GFX6: [[SEXT_INREG1:%[0-9]+]]:_(s64) = G_SEXT_INREG [[COPY3]], 33
+    ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 63
+    ; GFX6: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[SEXT_INREG]], [[C]](s32)
+    ; GFX6: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[SEXT_INREG1]], [[C]](s32)
+    ; GFX6: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SEXT_INREG]](s64)
+    ; GFX6: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64)
+    ; GFX6: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV]], [[UV2]]
+    ; GFX6: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV1]], [[UV3]], [[UADDO1]]
+    ; GFX6: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32)
+    ; GFX6: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SEXT_INREG1]](s64)
+    ; GFX6: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR1]](s64)
+    ; GFX6: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UV4]], [[UV6]]
+    ; GFX6: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UV5]], [[UV7]], [[UADDO3]]
+    ; GFX6: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO2]](s32), [[UADDE2]](s32)
+    ; GFX6: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[MV]], [[ASHR]]
+    ; GFX6: [[XOR1:%[0-9]+]]:_(s64) = G_XOR [[MV1]], [[ASHR1]]
+    ; GFX6: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64)
+    ; GFX6: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV8]](s32)
+    ; GFX6: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV9]](s32)
+    ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000
+    ; GFX6: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP1]], [[C1]]
+    ; GFX6: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UITOFP]]
+    ; GFX6: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[FADD]](s32)
+    ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x43EFFFFF80000000
+    ; GFX6: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]]
+    ; GFX6: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000
+    ; GFX6: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FMUL1]], [[C3]]
+    ; GFX6: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL2]]
+    ; GFX6: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000
+    ; GFX6: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC]], [[C4]]
+    ; GFX6: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[FMUL1]]
+    ; GFX6: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FADD1]](s32)
+    ; GFX6: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC]](s32)
+    ; GFX6: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; GFX6: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C5]](s64)
+    ; GFX6: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64)
+    ; GFX6: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV10]], [[UV12]]
+    ; GFX6: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV11]], [[UV13]], [[USUBO1]]
+    ; GFX6: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI]]
+    ; GFX6: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]]
+    ; GFX6: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]]
+    ; GFX6: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[FPTOUI]]
+    ; GFX6: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]]
+    ; GFX6: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]]
+    ; GFX6: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[MUL]]
+    ; GFX6: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]]
+    ; GFX6: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]]
+    ; GFX6: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]]
+    ; GFX6: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1)
+    ; GFX6: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH1]]
+    ; GFX6: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1)
+    ; GFX6: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]]
+    ; GFX6: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]]
+    ; GFX6: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL]]
+    ; GFX6: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]]
+    ; GFX6: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]]
+    ; GFX6: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1)
+    ; GFX6: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UMULH3]]
+    ; GFX6: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO11]](s1)
+    ; GFX6: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]]
+    ; GFX6: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[ADD2]]
+    ; GFX6: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1)
+    ; GFX6: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]]
+    ; GFX6: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]]
+    ; GFX6: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[ADD4]]
+    ; GFX6: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO12]]
+    ; GFX6: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO15]]
+    ; GFX6: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[ADD5]]
+    ; GFX6: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO14]]
+    ; GFX6: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO14]]
+    ; GFX6: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE4]]
+    ; GFX6: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO14]]
+    ; GFX6: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]]
+    ; GFX6: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ADD7]], [[UMULH5]]
+    ; GFX6: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[MUL6]]
+    ; GFX6: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO14]], [[ADD8]]
+    ; GFX6: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[MUL6]]
+    ; GFX6: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]]
+    ; GFX6: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1)
+    ; GFX6: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH6]]
+    ; GFX6: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1)
+    ; GFX6: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]]
+    ; GFX6: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[ADD8]]
+    ; GFX6: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[MUL6]]
+    ; GFX6: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[ADD8]]
+    ; GFX6: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]]
+    ; GFX6: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1)
+    ; GFX6: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UMULH8]]
+    ; GFX6: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO23]](s1)
+    ; GFX6: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]]
+    ; GFX6: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[UADDO22]], [[ADD9]]
+    ; GFX6: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1)
+    ; GFX6: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[ADD10]], [[ZEXT9]]
+    ; GFX6: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[ADD8]]
+    ; GFX6: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[ADD11]]
+    ; GFX6: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; GFX6: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO14]], [[UADDO24]]
+    ; GFX6: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[ADD6]], [[ADD12]], [[UADDO15]]
+    ; GFX6: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDE6]], [[C6]], [[UADDO27]]
+    ; GFX6: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64)
+    ; GFX6: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64)
+    ; GFX6: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDO26]]
+    ; GFX6: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV16]], [[UADDE8]]
+    ; GFX6: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDO26]]
+    ; GFX6: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]]
+    ; GFX6: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1)
+    ; GFX6: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH10]]
+    ; GFX6: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1)
+    ; GFX6: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]]
+    ; GFX6: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDE8]]
+    ; GFX6: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDO26]]
+    ; GFX6: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDE8]]
+    ; GFX6: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]]
+    ; GFX6: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1)
+    ; GFX6: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UMULH12]]
+    ; GFX6: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO35]](s1)
+    ; GFX6: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]]
+    ; GFX6: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[ADD13]]
+    ; GFX6: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO37]](s1)
+    ; GFX6: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[ADD14]], [[ZEXT14]]
+    ; GFX6: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDE8]]
+    ; GFX6: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[ADD15]]
+    ; GFX6: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO36]](s32), [[ADD16]](s32)
+    ; GFX6: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64)
+    ; GFX6: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV18]], [[UADDO36]]
+    ; GFX6: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV19]], [[UADDO36]]
+    ; GFX6: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV18]], [[ADD16]]
+    ; GFX6: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV18]], [[UADDO36]]
+    ; GFX6: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]]
+    ; GFX6: [[ADD18:%[0-9]+]]:_(s32) = G_ADD [[ADD17]], [[UMULH14]]
+    ; GFX6: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV14]], [[MUL15]]
+    ; GFX6: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV15]], [[ADD18]], [[USUBO3]]
+    ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV15]], [[ADD18]]
+    ; GFX6: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64)
+    ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV21]]
+    ; GFX6: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1)
+    ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV20]]
+    ; GFX6: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[ICMP1]](s1)
+    ; GFX6: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV21]]
+    ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SEXT1]], [[SEXT]]
+    ; GFX6: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV20]]
+    ; GFX6: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV21]], [[USUBO3]]
+    ; GFX6: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C6]], [[USUBO5]]
+    ; GFX6: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; GFX6: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64)
+    ; GFX6: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[UADDO36]], [[UV22]]
+    ; GFX6: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[ADD16]], [[UV23]], [[UADDO39]]
+    ; GFX6: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO38]](s32), [[UADDE10]](s32)
+    ; GFX6: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV21]]
+    ; GFX6: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1)
+    ; GFX6: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV20]]
+    ; GFX6: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT [[ICMP4]](s1)
+    ; GFX6: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV21]]
+    ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]]
+    ; GFX6: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64)
+    ; GFX6: [[UADDO40:%[0-9]+]]:_(s32), [[UADDO41:%[0-9]+]]:_(s1) = G_UADDO [[UADDO38]], [[UV24]]
+    ; GFX6: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[UADDE10]], [[UV25]], [[UADDO41]]
+    ; GFX6: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO40]](s32), [[UADDE12]](s32)
+    ; GFX6: [[USUBO6:%[0-9]+]]:_(s32), [[USUBO7:%[0-9]+]]:_(s1) = G_USUBO [[USUBO4]], [[UV20]]
+    ; GFX6: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[UV21]], [[USUBO5]]
+    ; GFX6: [[USUBE10:%[0-9]+]]:_(s32), [[USUBE11:%[0-9]+]]:_(s1) = G_USUBE [[USUBE8]], [[C6]], [[USUBO7]]
+    ; GFX6: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C6]]
+    ; GFX6: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV4]], [[MV3]]
+    ; GFX6: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C6]]
+    ; GFX6: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV2]]
+    ; GFX6: [[XOR2:%[0-9]+]]:_(s64) = G_XOR [[ASHR]], [[ASHR1]]
+    ; GFX6: [[XOR3:%[0-9]+]]:_(s64) = G_XOR [[SELECT3]], [[XOR2]]
+    ; GFX6: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR3]](s64)
+    ; GFX6: [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR2]](s64)
+    ; GFX6: [[USUBO8:%[0-9]+]]:_(s32), [[USUBO9:%[0-9]+]]:_(s1) = G_USUBO [[UV26]], [[UV28]]
+    ; GFX6: [[USUBE12:%[0-9]+]]:_(s32), [[USUBE13:%[0-9]+]]:_(s1) = G_USUBE [[UV27]], [[UV29]], [[USUBO9]]
+    ; GFX6: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO8]](s32), [[USUBE12]](s32)
+    ; GFX6: [[COPY4:%[0-9]+]]:_(s64) = COPY [[MV5]](s64)
+    ; GFX6: $vgpr0_vgpr1 = COPY [[COPY4]](s64)
     ; GFX8-LABEL: name: test_sdiv_s33
     ; GFX8: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
     ; GFX8: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
-    ; GFX8: [[TRUNC:%[0-9]+]]:_(s33) = G_TRUNC [[COPY]](s64)
-    ; GFX8: [[TRUNC1:%[0-9]+]]:_(s33) = G_TRUNC [[COPY1]](s64)
-    ; GFX8: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[TRUNC]](s33)
-    ; GFX8: [[SEXT1:%[0-9]+]]:_(s64) = G_SEXT [[TRUNC1]](s33)
-    ; GFX8: [[SDIV:%[0-9]+]]:_(s64) = G_SDIV [[SEXT]], [[SEXT1]]
-    ; GFX8: [[TRUNC2:%[0-9]+]]:_(s33) = G_TRUNC [[SDIV]](s64)
-    ; GFX8: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[TRUNC2]](s33)
-    ; GFX8: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
+    ; GFX8: [[COPY2:%[0-9]+]]:_(s64) = COPY [[COPY]](s64)
+    ; GFX8: [[SEXT_INREG:%[0-9]+]]:_(s64) = G_SEXT_INREG [[COPY2]], 33
+    ; GFX8: [[COPY3:%[0-9]+]]:_(s64) = COPY [[COPY1]](s64)
+    ; GFX8: [[SEXT_INREG1:%[0-9]+]]:_(s64) = G_SEXT_INREG [[COPY3]], 33
+    ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 63
+    ; GFX8: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[SEXT_INREG]], [[C]](s32)
+    ; GFX8: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[SEXT_INREG1]], [[C]](s32)
+    ; GFX8: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SEXT_INREG]](s64)
+    ; GFX8: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64)
+    ; GFX8: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV]], [[UV2]]
+    ; GFX8: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV1]], [[UV3]], [[UADDO1]]
+    ; GFX8: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32)
+    ; GFX8: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SEXT_INREG1]](s64)
+    ; GFX8: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR1]](s64)
+    ; GFX8: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UV4]], [[UV6]]
+    ; GFX8: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UV5]], [[UV7]], [[UADDO3]]
+    ; GFX8: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO2]](s32), [[UADDE2]](s32)
+    ; GFX8: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[MV]], [[ASHR]]
+    ; GFX8: [[XOR1:%[0-9]+]]:_(s64) = G_XOR [[MV1]], [[ASHR1]]
+    ; GFX8: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64)
+    ; GFX8: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV8]](s32)
+    ; GFX8: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV9]](s32)
+    ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000
+    ; GFX8: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP1]], [[C1]]
+    ; GFX8: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UITOFP]]
+    ; GFX8: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[FADD]](s32)
+    ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x43EFFFFF80000000
+    ; GFX8: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]]
+    ; GFX8: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000
+    ; GFX8: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FMUL1]], [[C3]]
+    ; GFX8: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL2]]
+    ; GFX8: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000
+    ; GFX8: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC]], [[C4]]
+    ; GFX8: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[FMUL1]]
+    ; GFX8: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FADD1]](s32)
+    ; GFX8: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC]](s32)
+    ; GFX8: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; GFX8: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C5]](s64)
+    ; GFX8: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64)
+    ; GFX8: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV10]], [[UV12]]
+    ; GFX8: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV11]], [[UV13]], [[USUBO1]]
+    ; GFX8: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI]]
+    ; GFX8: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]]
+    ; GFX8: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]]
+    ; GFX8: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[FPTOUI]]
+    ; GFX8: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]]
+    ; GFX8: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]]
+    ; GFX8: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[MUL]]
+    ; GFX8: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]]
+    ; GFX8: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]]
+    ; GFX8: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]]
+    ; GFX8: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1)
+    ; GFX8: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH1]]
+    ; GFX8: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1)
+    ; GFX8: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]]
+    ; GFX8: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]]
+    ; GFX8: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL]]
+    ; GFX8: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]]
+    ; GFX8: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]]
+    ; GFX8: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1)
+    ; GFX8: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UMULH3]]
+    ; GFX8: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO11]](s1)
+    ; GFX8: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]]
+    ; GFX8: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[ADD2]]
+    ; GFX8: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1)
+    ; GFX8: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]]
+    ; GFX8: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]]
+    ; GFX8: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[ADD4]]
+    ; GFX8: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO12]]
+    ; GFX8: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO15]]
+    ; GFX8: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[ADD5]]
+    ; GFX8: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO14]]
+    ; GFX8: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO14]]
+    ; GFX8: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE4]]
+    ; GFX8: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO14]]
+    ; GFX8: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]]
+    ; GFX8: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ADD7]], [[UMULH5]]
+    ; GFX8: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[MUL6]]
+    ; GFX8: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO14]], [[ADD8]]
+    ; GFX8: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[MUL6]]
+    ; GFX8: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]]
+    ; GFX8: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1)
+    ; GFX8: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH6]]
+    ; GFX8: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1)
+    ; GFX8: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]]
+    ; GFX8: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[ADD8]]
+    ; GFX8: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[MUL6]]
+    ; GFX8: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[ADD8]]
+    ; GFX8: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]]
+    ; GFX8: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1)
+    ; GFX8: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UMULH8]]
+    ; GFX8: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO23]](s1)
+    ; GFX8: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]]
+    ; GFX8: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[UADDO22]], [[ADD9]]
+    ; GFX8: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1)
+    ; GFX8: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[ADD10]], [[ZEXT9]]
+    ; GFX8: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[ADD8]]
+    ; GFX8: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[ADD11]]
+    ; GFX8: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; GFX8: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO14]], [[UADDO24]]
+    ; GFX8: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[ADD6]], [[ADD12]], [[UADDO15]]
+    ; GFX8: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDE6]], [[C6]], [[UADDO27]]
+    ; GFX8: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64)
+    ; GFX8: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64)
+    ; GFX8: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDO26]]
+    ; GFX8: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV16]], [[UADDE8]]
+    ; GFX8: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDO26]]
+    ; GFX8: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]]
+    ; GFX8: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1)
+    ; GFX8: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH10]]
+    ; GFX8: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1)
+    ; GFX8: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]]
+    ; GFX8: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDE8]]
+    ; GFX8: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDO26]]
+    ; GFX8: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDE8]]
+    ; GFX8: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]]
+    ; GFX8: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1)
+    ; GFX8: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UMULH12]]
+    ; GFX8: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO35]](s1)
+    ; GFX8: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]]
+    ; GFX8: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[ADD13]]
+    ; GFX8: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO37]](s1)
+    ; GFX8: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[ADD14]], [[ZEXT14]]
+    ; GFX8: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDE8]]
+    ; GFX8: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[ADD15]]
+    ; GFX8: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO36]](s32), [[ADD16]](s32)
+    ; GFX8: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64)
+    ; GFX8: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV18]], [[UADDO36]]
+    ; GFX8: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV19]], [[UADDO36]]
+    ; GFX8: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV18]], [[ADD16]]
+    ; GFX8: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV18]], [[UADDO36]]
+    ; GFX8: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]]
+    ; GFX8: [[ADD18:%[0-9]+]]:_(s32) = G_ADD [[ADD17]], [[UMULH14]]
+    ; GFX8: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV14]], [[MUL15]]
+    ; GFX8: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV15]], [[ADD18]], [[USUBO3]]
+    ; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV15]], [[ADD18]]
+    ; GFX8: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64)
+    ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV21]]
+    ; GFX8: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1)
+    ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV20]]
+    ; GFX8: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[ICMP1]](s1)
+    ; GFX8: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV21]]
+    ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SEXT1]], [[SEXT]]
+    ; GFX8: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV20]]
+    ; GFX8: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV21]], [[USUBO3]]
+    ; GFX8: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C6]], [[USUBO5]]
+    ; GFX8: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; GFX8: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64)
+    ; GFX8: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[UADDO36]], [[UV22]]
+    ; GFX8: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[ADD16]], [[UV23]], [[UADDO39]]
+    ; GFX8: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO38]](s32), [[UADDE10]](s32)
+    ; GFX8: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV21]]
+    ; GFX8: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1)
+    ; GFX8: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV20]]
+    ; GFX8: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT [[ICMP4]](s1)
+    ; GFX8: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV21]]
+    ; GFX8: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]]
+    ; GFX8: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64)
+    ; GFX8: [[UADDO40:%[0-9]+]]:_(s32), [[UADDO41:%[0-9]+]]:_(s1) = G_UADDO [[UADDO38]], [[UV24]]
+    ; GFX8: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[UADDE10]], [[UV25]], [[UADDO41]]
+    ; GFX8: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO40]](s32), [[UADDE12]](s32)
+    ; GFX8: [[USUBO6:%[0-9]+]]:_(s32), [[USUBO7:%[0-9]+]]:_(s1) = G_USUBO [[USUBO4]], [[UV20]]
+    ; GFX8: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[UV21]], [[USUBO5]]
+    ; GFX8: [[USUBE10:%[0-9]+]]:_(s32), [[USUBE11:%[0-9]+]]:_(s1) = G_USUBE [[USUBE8]], [[C6]], [[USUBO7]]
+    ; GFX8: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C6]]
+    ; GFX8: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV4]], [[MV3]]
+    ; GFX8: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C6]]
+    ; GFX8: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV2]]
+    ; GFX8: [[XOR2:%[0-9]+]]:_(s64) = G_XOR [[ASHR]], [[ASHR1]]
+    ; GFX8: [[XOR3:%[0-9]+]]:_(s64) = G_XOR [[SELECT3]], [[XOR2]]
+    ; GFX8: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR3]](s64)
+    ; GFX8: [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR2]](s64)
+    ; GFX8: [[USUBO8:%[0-9]+]]:_(s32), [[USUBO9:%[0-9]+]]:_(s1) = G_USUBO [[UV26]], [[UV28]]
+    ; GFX8: [[USUBE12:%[0-9]+]]:_(s32), [[USUBE13:%[0-9]+]]:_(s1) = G_USUBE [[UV27]], [[UV29]], [[USUBO9]]
+    ; GFX8: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO8]](s32), [[USUBE12]](s32)
+    ; GFX8: [[COPY4:%[0-9]+]]:_(s64) = COPY [[MV5]](s64)
+    ; GFX8: $vgpr0_vgpr1 = COPY [[COPY4]](s64)
     ; GFX9-LABEL: name: test_sdiv_s33
     ; GFX9: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
     ; GFX9: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
-    ; GFX9: [[TRUNC:%[0-9]+]]:_(s33) = G_TRUNC [[COPY]](s64)
-    ; GFX9: [[TRUNC1:%[0-9]+]]:_(s33) = G_TRUNC [[COPY1]](s64)
-    ; GFX9: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[TRUNC]](s33)
-    ; GFX9: [[SEXT1:%[0-9]+]]:_(s64) = G_SEXT [[TRUNC1]](s33)
-    ; GFX9: [[SDIV:%[0-9]+]]:_(s64) = G_SDIV [[SEXT]], [[SEXT1]]
-    ; GFX9: [[TRUNC2:%[0-9]+]]:_(s33) = G_TRUNC [[SDIV]](s64)
-    ; GFX9: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[TRUNC2]](s33)
-    ; GFX9: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
+    ; GFX9: [[COPY2:%[0-9]+]]:_(s64) = COPY [[COPY]](s64)
+    ; GFX9: [[SEXT_INREG:%[0-9]+]]:_(s64) = G_SEXT_INREG [[COPY2]], 33
+    ; GFX9: [[COPY3:%[0-9]+]]:_(s64) = COPY [[COPY1]](s64)
+    ; GFX9: [[SEXT_INREG1:%[0-9]+]]:_(s64) = G_SEXT_INREG [[COPY3]], 33
+    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 63
+    ; GFX9: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[SEXT_INREG]], [[C]](s32)
+    ; GFX9: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[SEXT_INREG1]], [[C]](s32)
+    ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SEXT_INREG]](s64)
+    ; GFX9: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64)
+    ; GFX9: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV]], [[UV2]]
+    ; GFX9: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV1]], [[UV3]], [[UADDO1]]
+    ; GFX9: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32)
+    ; GFX9: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SEXT_INREG1]](s64)
+    ; GFX9: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR1]](s64)
+    ; GFX9: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UV4]], [[UV6]]
+    ; GFX9: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UV5]], [[UV7]], [[UADDO3]]
+    ; GFX9: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO2]](s32), [[UADDE2]](s32)
+    ; GFX9: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[MV]], [[ASHR]]
+    ; GFX9: [[XOR1:%[0-9]+]]:_(s64) = G_XOR [[MV1]], [[ASHR1]]
+    ; GFX9: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64)
+    ; GFX9: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV8]](s32)
+    ; GFX9: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV9]](s32)
+    ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000
+    ; GFX9: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP1]], [[C1]]
+    ; GFX9: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UITOFP]]
+    ; GFX9: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[FADD]](s32)
+    ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x43EFFFFF80000000
+    ; GFX9: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]]
+    ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000
+    ; GFX9: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FMUL1]], [[C3]]
+    ; GFX9: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL2]]
+    ; GFX9: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000
+    ; GFX9: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC]], [[C4]]
+    ; GFX9: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[FMUL1]]
+    ; GFX9: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FADD1]](s32)
+    ; GFX9: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC]](s32)
+    ; GFX9: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; GFX9: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C5]](s64)
+    ; GFX9: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64)
+    ; GFX9: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV10]], [[UV12]]
+    ; GFX9: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV11]], [[UV13]], [[USUBO1]]
+    ; GFX9: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI]]
+    ; GFX9: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]]
+    ; GFX9: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]]
+    ; GFX9: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[FPTOUI]]
+    ; GFX9: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]]
+    ; GFX9: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]]
+    ; GFX9: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[MUL]]
+    ; GFX9: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]]
+    ; GFX9: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]]
+    ; GFX9: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]]
+    ; GFX9: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1)
+    ; GFX9: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH1]]
+    ; GFX9: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1)
+    ; GFX9: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]]
+    ; GFX9: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]]
+    ; GFX9: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL]]
+    ; GFX9: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]]
+    ; GFX9: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]]
+    ; GFX9: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1)
+    ; GFX9: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UMULH3]]
+    ; GFX9: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO11]](s1)
+    ; GFX9: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]]
+    ; GFX9: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[ADD2]]
+    ; GFX9: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1)
+    ; GFX9: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]]
+    ; GFX9: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]]
+    ; GFX9: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[ADD4]]
+    ; GFX9: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO12]]
+    ; GFX9: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO15]]
+    ; GFX9: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[ADD5]]
+    ; GFX9: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO14]]
+    ; GFX9: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO14]]
+    ; GFX9: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE4]]
+    ; GFX9: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO14]]
+    ; GFX9: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]]
+    ; GFX9: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ADD7]], [[UMULH5]]
+    ; GFX9: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[MUL6]]
+    ; GFX9: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO14]], [[ADD8]]
+    ; GFX9: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[MUL6]]
+    ; GFX9: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]]
+    ; GFX9: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1)
+    ; GFX9: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH6]]
+    ; GFX9: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1)
+    ; GFX9: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]]
+    ; GFX9: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[ADD8]]
+    ; GFX9: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[MUL6]]
+    ; GFX9: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[ADD8]]
+    ; GFX9: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]]
+    ; GFX9: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1)
+    ; GFX9: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UMULH8]]
+    ; GFX9: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO23]](s1)
+    ; GFX9: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]]
+    ; GFX9: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[UADDO22]], [[ADD9]]
+    ; GFX9: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1)
+    ; GFX9: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[ADD10]], [[ZEXT9]]
+    ; GFX9: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[ADD8]]
+    ; GFX9: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[ADD11]]
+    ; GFX9: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; GFX9: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO14]], [[UADDO24]]
+    ; GFX9: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[ADD6]], [[ADD12]], [[UADDO15]]
+    ; GFX9: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDE6]], [[C6]], [[UADDO27]]
+    ; GFX9: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64)
+    ; GFX9: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64)
+    ; GFX9: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDO26]]
+    ; GFX9: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV16]], [[UADDE8]]
+    ; GFX9: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDO26]]
+    ; GFX9: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]]
+    ; GFX9: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1)
+    ; GFX9: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH10]]
+    ; GFX9: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1)
+    ; GFX9: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]]
+    ; GFX9: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDE8]]
+    ; GFX9: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDO26]]
+    ; GFX9: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDE8]]
+    ; GFX9: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]]
+    ; GFX9: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1)
+    ; GFX9: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UMULH12]]
+    ; GFX9: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO35]](s1)
+    ; GFX9: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]]
+    ; GFX9: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[ADD13]]
+    ; GFX9: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO37]](s1)
+    ; GFX9: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[ADD14]], [[ZEXT14]]
+    ; GFX9: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDE8]]
+    ; GFX9: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[ADD15]]
+    ; GFX9: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO36]](s32), [[ADD16]](s32)
+    ; GFX9: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64)
+    ; GFX9: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV18]], [[UADDO36]]
+    ; GFX9: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV19]], [[UADDO36]]
+    ; GFX9: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV18]], [[ADD16]]
+    ; GFX9: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV18]], [[UADDO36]]
+    ; GFX9: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]]
+    ; GFX9: [[ADD18:%[0-9]+]]:_(s32) = G_ADD [[ADD17]], [[UMULH14]]
+    ; GFX9: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV14]], [[MUL15]]
+    ; GFX9: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV15]], [[ADD18]], [[USUBO3]]
+    ; GFX9: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV15]], [[ADD18]]
+    ; GFX9: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64)
+    ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV21]]
+    ; GFX9: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1)
+    ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV20]]
+    ; GFX9: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[ICMP1]](s1)
+    ; GFX9: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV21]]
+    ; GFX9: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SEXT1]], [[SEXT]]
+    ; GFX9: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV20]]
+    ; GFX9: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV21]], [[USUBO3]]
+    ; GFX9: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C6]], [[USUBO5]]
+    ; GFX9: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; GFX9: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64)
+    ; GFX9: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[UADDO36]], [[UV22]]
+    ; GFX9: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[ADD16]], [[UV23]], [[UADDO39]]
+    ; GFX9: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO38]](s32), [[UADDE10]](s32)
+    ; GFX9: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV21]]
+    ; GFX9: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1)
+    ; GFX9: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV20]]
+    ; GFX9: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT [[ICMP4]](s1)
+    ; GFX9: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV21]]
+    ; GFX9: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]]
+    ; GFX9: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64)
+    ; GFX9: [[UADDO40:%[0-9]+]]:_(s32), [[UADDO41:%[0-9]+]]:_(s1) = G_UADDO [[UADDO38]], [[UV24]]
+    ; GFX9: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[UADDE10]], [[UV25]], [[UADDO41]]
+    ; GFX9: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO40]](s32), [[UADDE12]](s32)
+    ; GFX9: [[USUBO6:%[0-9]+]]:_(s32), [[USUBO7:%[0-9]+]]:_(s1) = G_USUBO [[USUBO4]], [[UV20]]
+    ; GFX9: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[UV21]], [[USUBO5]]
+    ; GFX9: [[USUBE10:%[0-9]+]]:_(s32), [[USUBE11:%[0-9]+]]:_(s1) = G_USUBE [[USUBE8]], [[C6]], [[USUBO7]]
+    ; GFX9: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C6]]
+    ; GFX9: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV4]], [[MV3]]
+    ; GFX9: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C6]]
+    ; GFX9: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV2]]
+    ; GFX9: [[XOR2:%[0-9]+]]:_(s64) = G_XOR [[ASHR]], [[ASHR1]]
+    ; GFX9: [[XOR3:%[0-9]+]]:_(s64) = G_XOR [[SELECT3]], [[XOR2]]
+    ; GFX9: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR3]](s64)
+    ; GFX9: [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR2]](s64)
+    ; GFX9: [[USUBO8:%[0-9]+]]:_(s32), [[USUBO9:%[0-9]+]]:_(s1) = G_USUBO [[UV26]], [[UV28]]
+    ; GFX9: [[USUBE12:%[0-9]+]]:_(s32), [[USUBE13:%[0-9]+]]:_(s1) = G_USUBE [[UV27]], [[UV29]], [[USUBO9]]
+    ; GFX9: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO8]](s32), [[USUBE12]](s32)
+    ; GFX9: [[COPY4:%[0-9]+]]:_(s64) = COPY [[MV5]](s64)
+    ; GFX9: $vgpr0_vgpr1 = COPY [[COPY4]](s64)
     %0:_(s64) = COPY $vgpr0_vgpr1
     %1:_(s64) = COPY $vgpr2_vgpr3
     %2:_(s33) = G_TRUNC %0

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-srem.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-srem.mir
index 0eb03135e18b..653d38f88cd3 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-srem.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-srem.mir
@@ -369,18 +369,504 @@ body: |
     ; GFX6-LABEL: name: test_srem_s64
     ; GFX6: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
     ; GFX6: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
-    ; GFX6: [[SREM:%[0-9]+]]:_(s64) = G_SREM [[COPY]], [[COPY1]]
-    ; GFX6: $vgpr0_vgpr1 = COPY [[SREM]](s64)
+    ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 63
+    ; GFX6: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[COPY]], [[C]](s32)
+    ; GFX6: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[COPY1]], [[C]](s32)
+    ; GFX6: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64)
+    ; GFX6: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64)
+    ; GFX6: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV]], [[UV2]]
+    ; GFX6: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV1]], [[UV3]], [[UADDO1]]
+    ; GFX6: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32)
+    ; GFX6: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64)
+    ; GFX6: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR1]](s64)
+    ; GFX6: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UV4]], [[UV6]]
+    ; GFX6: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UV5]], [[UV7]], [[UADDO3]]
+    ; GFX6: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO2]](s32), [[UADDE2]](s32)
+    ; GFX6: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[MV]], [[ASHR]]
+    ; GFX6: [[XOR1:%[0-9]+]]:_(s64) = G_XOR [[MV1]], [[ASHR1]]
+    ; GFX6: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64)
+    ; GFX6: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV8]](s32)
+    ; GFX6: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV9]](s32)
+    ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000
+    ; GFX6: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP1]], [[C1]]
+    ; GFX6: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UITOFP]]
+    ; GFX6: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[FADD]](s32)
+    ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x43EFFFFF80000000
+    ; GFX6: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]]
+    ; GFX6: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000
+    ; GFX6: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FMUL1]], [[C3]]
+    ; GFX6: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL2]]
+    ; GFX6: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000
+    ; GFX6: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC]], [[C4]]
+    ; GFX6: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[FMUL1]]
+    ; GFX6: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FADD1]](s32)
+    ; GFX6: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC]](s32)
+    ; GFX6: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; GFX6: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C5]](s64)
+    ; GFX6: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64)
+    ; GFX6: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV10]], [[UV12]]
+    ; GFX6: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV11]], [[UV13]], [[USUBO1]]
+    ; GFX6: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI]]
+    ; GFX6: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]]
+    ; GFX6: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]]
+    ; GFX6: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[FPTOUI]]
+    ; GFX6: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]]
+    ; GFX6: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]]
+    ; GFX6: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[MUL]]
+    ; GFX6: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]]
+    ; GFX6: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]]
+    ; GFX6: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]]
+    ; GFX6: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1)
+    ; GFX6: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH1]]
+    ; GFX6: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1)
+    ; GFX6: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]]
+    ; GFX6: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]]
+    ; GFX6: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL]]
+    ; GFX6: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]]
+    ; GFX6: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]]
+    ; GFX6: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1)
+    ; GFX6: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UMULH3]]
+    ; GFX6: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO11]](s1)
+    ; GFX6: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]]
+    ; GFX6: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[ADD2]]
+    ; GFX6: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1)
+    ; GFX6: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]]
+    ; GFX6: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]]
+    ; GFX6: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[ADD4]]
+    ; GFX6: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO12]]
+    ; GFX6: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO15]]
+    ; GFX6: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[ADD5]]
+    ; GFX6: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO14]]
+    ; GFX6: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO14]]
+    ; GFX6: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE4]]
+    ; GFX6: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO14]]
+    ; GFX6: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]]
+    ; GFX6: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ADD7]], [[UMULH5]]
+    ; GFX6: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[MUL6]]
+    ; GFX6: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO14]], [[ADD8]]
+    ; GFX6: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[MUL6]]
+    ; GFX6: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]]
+    ; GFX6: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1)
+    ; GFX6: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH6]]
+    ; GFX6: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1)
+    ; GFX6: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]]
+    ; GFX6: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[ADD8]]
+    ; GFX6: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[MUL6]]
+    ; GFX6: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[ADD8]]
+    ; GFX6: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]]
+    ; GFX6: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1)
+    ; GFX6: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UMULH8]]
+    ; GFX6: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO23]](s1)
+    ; GFX6: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]]
+    ; GFX6: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[UADDO22]], [[ADD9]]
+    ; GFX6: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1)
+    ; GFX6: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[ADD10]], [[ZEXT9]]
+    ; GFX6: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[ADD8]]
+    ; GFX6: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[ADD11]]
+    ; GFX6: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; GFX6: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO14]], [[UADDO24]]
+    ; GFX6: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[ADD6]], [[ADD12]], [[UADDO15]]
+    ; GFX6: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDE6]], [[C6]], [[UADDO27]]
+    ; GFX6: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64)
+    ; GFX6: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64)
+    ; GFX6: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDO26]]
+    ; GFX6: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV16]], [[UADDE8]]
+    ; GFX6: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDO26]]
+    ; GFX6: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]]
+    ; GFX6: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1)
+    ; GFX6: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH10]]
+    ; GFX6: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1)
+    ; GFX6: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]]
+    ; GFX6: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDE8]]
+    ; GFX6: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDO26]]
+    ; GFX6: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDE8]]
+    ; GFX6: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]]
+    ; GFX6: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1)
+    ; GFX6: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UMULH12]]
+    ; GFX6: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO35]](s1)
+    ; GFX6: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]]
+    ; GFX6: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[ADD13]]
+    ; GFX6: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO37]](s1)
+    ; GFX6: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[ADD14]], [[ZEXT14]]
+    ; GFX6: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDE8]]
+    ; GFX6: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[ADD15]]
+    ; GFX6: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64)
+    ; GFX6: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV18]], [[UADDO36]]
+    ; GFX6: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV19]], [[UADDO36]]
+    ; GFX6: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV18]], [[ADD16]]
+    ; GFX6: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV18]], [[UADDO36]]
+    ; GFX6: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]]
+    ; GFX6: [[ADD18:%[0-9]+]]:_(s32) = G_ADD [[ADD17]], [[UMULH14]]
+    ; GFX6: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV14]], [[MUL15]]
+    ; GFX6: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV15]], [[ADD18]], [[USUBO3]]
+    ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV15]], [[ADD18]]
+    ; GFX6: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO2]](s32), [[USUBE2]](s32)
+    ; GFX6: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64)
+    ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV21]]
+    ; GFX6: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1)
+    ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV20]]
+    ; GFX6: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[ICMP1]](s1)
+    ; GFX6: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV21]]
+    ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SEXT1]], [[SEXT]]
+    ; GFX6: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV20]]
+    ; GFX6: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV21]], [[USUBO3]]
+    ; GFX6: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C6]], [[USUBO5]]
+    ; GFX6: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO4]](s32), [[USUBE6]](s32)
+    ; GFX6: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV21]]
+    ; GFX6: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1)
+    ; GFX6: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV20]]
+    ; GFX6: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT [[ICMP4]](s1)
+    ; GFX6: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV21]]
+    ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]]
+    ; GFX6: [[USUBO6:%[0-9]+]]:_(s32), [[USUBO7:%[0-9]+]]:_(s1) = G_USUBO [[USUBO4]], [[UV20]]
+    ; GFX6: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[UV21]], [[USUBO5]]
+    ; GFX6: [[USUBE10:%[0-9]+]]:_(s32), [[USUBE11:%[0-9]+]]:_(s1) = G_USUBE [[USUBE8]], [[C6]], [[USUBO7]]
+    ; GFX6: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO6]](s32), [[USUBE10]](s32)
+    ; GFX6: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C6]]
+    ; GFX6: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV4]], [[MV3]]
+    ; GFX6: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C6]]
+    ; GFX6: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV2]]
+    ; GFX6: [[XOR2:%[0-9]+]]:_(s64) = G_XOR [[SELECT3]], [[ASHR]]
+    ; GFX6: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR2]](s64)
+    ; GFX6: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64)
+    ; GFX6: [[USUBO8:%[0-9]+]]:_(s32), [[USUBO9:%[0-9]+]]:_(s1) = G_USUBO [[UV22]], [[UV24]]
+    ; GFX6: [[USUBE12:%[0-9]+]]:_(s32), [[USUBE13:%[0-9]+]]:_(s1) = G_USUBE [[UV23]], [[UV25]], [[USUBO9]]
+    ; GFX6: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO8]](s32), [[USUBE12]](s32)
+    ; GFX6: $vgpr0_vgpr1 = COPY [[MV5]](s64)
     ; GFX8-LABEL: name: test_srem_s64
     ; GFX8: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
     ; GFX8: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
-    ; GFX8: [[SREM:%[0-9]+]]:_(s64) = G_SREM [[COPY]], [[COPY1]]
-    ; GFX8: $vgpr0_vgpr1 = COPY [[SREM]](s64)
+    ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 63
+    ; GFX8: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[COPY]], [[C]](s32)
+    ; GFX8: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[COPY1]], [[C]](s32)
+    ; GFX8: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64)
+    ; GFX8: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64)
+    ; GFX8: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV]], [[UV2]]
+    ; GFX8: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV1]], [[UV3]], [[UADDO1]]
+    ; GFX8: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32)
+    ; GFX8: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64)
+    ; GFX8: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR1]](s64)
+    ; GFX8: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UV4]], [[UV6]]
+    ; GFX8: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UV5]], [[UV7]], [[UADDO3]]
+    ; GFX8: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO2]](s32), [[UADDE2]](s32)
+    ; GFX8: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[MV]], [[ASHR]]
+    ; GFX8: [[XOR1:%[0-9]+]]:_(s64) = G_XOR [[MV1]], [[ASHR1]]
+    ; GFX8: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64)
+    ; GFX8: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV8]](s32)
+    ; GFX8: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV9]](s32)
+    ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000
+    ; GFX8: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP1]], [[C1]]
+    ; GFX8: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UITOFP]]
+    ; GFX8: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[FADD]](s32)
+    ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x43EFFFFF80000000
+    ; GFX8: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]]
+    ; GFX8: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000
+    ; GFX8: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FMUL1]], [[C3]]
+    ; GFX8: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL2]]
+    ; GFX8: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000
+    ; GFX8: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC]], [[C4]]
+    ; GFX8: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[FMUL1]]
+    ; GFX8: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FADD1]](s32)
+    ; GFX8: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC]](s32)
+    ; GFX8: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; GFX8: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C5]](s64)
+    ; GFX8: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64)
+    ; GFX8: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV10]], [[UV12]]
+    ; GFX8: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV11]], [[UV13]], [[USUBO1]]
+    ; GFX8: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI]]
+    ; GFX8: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]]
+    ; GFX8: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]]
+    ; GFX8: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[FPTOUI]]
+    ; GFX8: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]]
+    ; GFX8: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]]
+    ; GFX8: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[MUL]]
+    ; GFX8: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]]
+    ; GFX8: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]]
+    ; GFX8: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]]
+    ; GFX8: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1)
+    ; GFX8: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH1]]
+    ; GFX8: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1)
+    ; GFX8: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]]
+    ; GFX8: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]]
+    ; GFX8: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL]]
+    ; GFX8: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]]
+    ; GFX8: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]]
+    ; GFX8: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1)
+    ; GFX8: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UMULH3]]
+    ; GFX8: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO11]](s1)
+    ; GFX8: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]]
+    ; GFX8: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[ADD2]]
+    ; GFX8: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1)
+    ; GFX8: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]]
+    ; GFX8: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]]
+    ; GFX8: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[ADD4]]
+    ; GFX8: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO12]]
+    ; GFX8: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO15]]
+    ; GFX8: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[ADD5]]
+    ; GFX8: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO14]]
+    ; GFX8: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO14]]
+    ; GFX8: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE4]]
+    ; GFX8: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO14]]
+    ; GFX8: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]]
+    ; GFX8: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ADD7]], [[UMULH5]]
+    ; GFX8: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[MUL6]]
+    ; GFX8: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO14]], [[ADD8]]
+    ; GFX8: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[MUL6]]
+    ; GFX8: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]]
+    ; GFX8: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1)
+    ; GFX8: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH6]]
+    ; GFX8: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1)
+    ; GFX8: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]]
+    ; GFX8: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[ADD8]]
+    ; GFX8: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[MUL6]]
+    ; GFX8: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[ADD8]]
+    ; GFX8: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]]
+    ; GFX8: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1)
+    ; GFX8: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UMULH8]]
+    ; GFX8: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO23]](s1)
+    ; GFX8: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]]
+    ; GFX8: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[UADDO22]], [[ADD9]]
+    ; GFX8: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1)
+    ; GFX8: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[ADD10]], [[ZEXT9]]
+    ; GFX8: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[ADD8]]
+    ; GFX8: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[ADD11]]
+    ; GFX8: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; GFX8: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO14]], [[UADDO24]]
+    ; GFX8: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[ADD6]], [[ADD12]], [[UADDO15]]
+    ; GFX8: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDE6]], [[C6]], [[UADDO27]]
+    ; GFX8: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64)
+    ; GFX8: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64)
+    ; GFX8: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDO26]]
+    ; GFX8: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV16]], [[UADDE8]]
+    ; GFX8: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDO26]]
+    ; GFX8: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]]
+    ; GFX8: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1)
+    ; GFX8: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH10]]
+    ; GFX8: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1)
+    ; GFX8: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]]
+    ; GFX8: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDE8]]
+    ; GFX8: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDO26]]
+    ; GFX8: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDE8]]
+    ; GFX8: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]]
+    ; GFX8: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1)
+    ; GFX8: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UMULH12]]
+    ; GFX8: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO35]](s1)
+    ; GFX8: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]]
+    ; GFX8: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[ADD13]]
+    ; GFX8: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO37]](s1)
+    ; GFX8: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[ADD14]], [[ZEXT14]]
+    ; GFX8: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDE8]]
+    ; GFX8: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[ADD15]]
+    ; GFX8: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64)
+    ; GFX8: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV18]], [[UADDO36]]
+    ; GFX8: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV19]], [[UADDO36]]
+    ; GFX8: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV18]], [[ADD16]]
+    ; GFX8: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV18]], [[UADDO36]]
+    ; GFX8: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]]
+    ; GFX8: [[ADD18:%[0-9]+]]:_(s32) = G_ADD [[ADD17]], [[UMULH14]]
+    ; GFX8: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV14]], [[MUL15]]
+    ; GFX8: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV15]], [[ADD18]], [[USUBO3]]
+    ; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV15]], [[ADD18]]
+    ; GFX8: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO2]](s32), [[USUBE2]](s32)
+    ; GFX8: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64)
+    ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV21]]
+    ; GFX8: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1)
+    ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV20]]
+    ; GFX8: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[ICMP1]](s1)
+    ; GFX8: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV21]]
+    ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SEXT1]], [[SEXT]]
+    ; GFX8: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV20]]
+    ; GFX8: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV21]], [[USUBO3]]
+    ; GFX8: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C6]], [[USUBO5]]
+    ; GFX8: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO4]](s32), [[USUBE6]](s32)
+    ; GFX8: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV21]]
+    ; GFX8: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1)
+    ; GFX8: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV20]]
+    ; GFX8: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT [[ICMP4]](s1)
+    ; GFX8: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV21]]
+    ; GFX8: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]]
+    ; GFX8: [[USUBO6:%[0-9]+]]:_(s32), [[USUBO7:%[0-9]+]]:_(s1) = G_USUBO [[USUBO4]], [[UV20]]
+    ; GFX8: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[UV21]], [[USUBO5]]
+    ; GFX8: [[USUBE10:%[0-9]+]]:_(s32), [[USUBE11:%[0-9]+]]:_(s1) = G_USUBE [[USUBE8]], [[C6]], [[USUBO7]]
+    ; GFX8: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO6]](s32), [[USUBE10]](s32)
+    ; GFX8: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C6]]
+    ; GFX8: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV4]], [[MV3]]
+    ; GFX8: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C6]]
+    ; GFX8: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV2]]
+    ; GFX8: [[XOR2:%[0-9]+]]:_(s64) = G_XOR [[SELECT3]], [[ASHR]]
+    ; GFX8: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR2]](s64)
+    ; GFX8: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64)
+    ; GFX8: [[USUBO8:%[0-9]+]]:_(s32), [[USUBO9:%[0-9]+]]:_(s1) = G_USUBO [[UV22]], [[UV24]]
+    ; GFX8: [[USUBE12:%[0-9]+]]:_(s32), [[USUBE13:%[0-9]+]]:_(s1) = G_USUBE [[UV23]], [[UV25]], [[USUBO9]]
+    ; GFX8: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO8]](s32), [[USUBE12]](s32)
+    ; GFX8: $vgpr0_vgpr1 = COPY [[MV5]](s64)
     ; GFX9-LABEL: name: test_srem_s64
     ; GFX9: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
     ; GFX9: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
-    ; GFX9: [[SREM:%[0-9]+]]:_(s64) = G_SREM [[COPY]], [[COPY1]]
-    ; GFX9: $vgpr0_vgpr1 = COPY [[SREM]](s64)
+    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 63
+    ; GFX9: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[COPY]], [[C]](s32)
+    ; GFX9: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[COPY1]], [[C]](s32)
+    ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64)
+    ; GFX9: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64)
+    ; GFX9: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV]], [[UV2]]
+    ; GFX9: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV1]], [[UV3]], [[UADDO1]]
+    ; GFX9: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32)
+    ; GFX9: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64)
+    ; GFX9: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR1]](s64)
+    ; GFX9: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UV4]], [[UV6]]
+    ; GFX9: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UV5]], [[UV7]], [[UADDO3]]
+    ; GFX9: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO2]](s32), [[UADDE2]](s32)
+    ; GFX9: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[MV]], [[ASHR]]
+    ; GFX9: [[XOR1:%[0-9]+]]:_(s64) = G_XOR [[MV1]], [[ASHR1]]
+    ; GFX9: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64)
+    ; GFX9: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV8]](s32)
+    ; GFX9: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV9]](s32)
+    ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000
+    ; GFX9: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP1]], [[C1]]
+    ; GFX9: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UITOFP]]
+    ; GFX9: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[FADD]](s32)
+    ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x43EFFFFF80000000
+    ; GFX9: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]]
+    ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000
+    ; GFX9: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FMUL1]], [[C3]]
+    ; GFX9: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL2]]
+    ; GFX9: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000
+    ; GFX9: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC]], [[C4]]
+    ; GFX9: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[FMUL1]]
+    ; GFX9: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FADD1]](s32)
+    ; GFX9: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC]](s32)
+    ; GFX9: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; GFX9: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C5]](s64)
+    ; GFX9: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64)
+    ; GFX9: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV10]], [[UV12]]
+    ; GFX9: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV11]], [[UV13]], [[USUBO1]]
+    ; GFX9: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI]]
+    ; GFX9: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]]
+    ; GFX9: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]]
+    ; GFX9: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[FPTOUI]]
+    ; GFX9: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]]
+    ; GFX9: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]]
+    ; GFX9: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[MUL]]
+    ; GFX9: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]]
+    ; GFX9: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]]
+    ; GFX9: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]]
+    ; GFX9: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1)
+    ; GFX9: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH1]]
+    ; GFX9: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1)
+    ; GFX9: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]]
+    ; GFX9: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]]
+    ; GFX9: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL]]
+    ; GFX9: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]]
+    ; GFX9: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]]
+    ; GFX9: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1)
+    ; GFX9: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UMULH3]]
+    ; GFX9: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO11]](s1)
+    ; GFX9: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]]
+    ; GFX9: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[ADD2]]
+    ; GFX9: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1)
+    ; GFX9: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]]
+    ; GFX9: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]]
+    ; GFX9: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[ADD4]]
+    ; GFX9: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO12]]
+    ; GFX9: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO15]]
+    ; GFX9: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[ADD5]]
+    ; GFX9: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO14]]
+    ; GFX9: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO14]]
+    ; GFX9: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE4]]
+    ; GFX9: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO14]]
+    ; GFX9: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]]
+    ; GFX9: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ADD7]], [[UMULH5]]
+    ; GFX9: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[MUL6]]
+    ; GFX9: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO14]], [[ADD8]]
+    ; GFX9: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[MUL6]]
+    ; GFX9: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]]
+    ; GFX9: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1)
+    ; GFX9: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH6]]
+    ; GFX9: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1)
+    ; GFX9: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]]
+    ; GFX9: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[ADD8]]
+    ; GFX9: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[MUL6]]
+    ; GFX9: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[ADD8]]
+    ; GFX9: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]]
+    ; GFX9: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1)
+    ; GFX9: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UMULH8]]
+    ; GFX9: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO23]](s1)
+    ; GFX9: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]]
+    ; GFX9: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[UADDO22]], [[ADD9]]
+    ; GFX9: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1)
+    ; GFX9: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[ADD10]], [[ZEXT9]]
+    ; GFX9: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[ADD8]]
+    ; GFX9: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[ADD11]]
+    ; GFX9: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; GFX9: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO14]], [[UADDO24]]
+    ; GFX9: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[ADD6]], [[ADD12]], [[UADDO15]]
+    ; GFX9: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDE6]], [[C6]], [[UADDO27]]
+    ; GFX9: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64)
+    ; GFX9: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64)
+    ; GFX9: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDO26]]
+    ; GFX9: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV16]], [[UADDE8]]
+    ; GFX9: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDO26]]
+    ; GFX9: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]]
+    ; GFX9: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1)
+    ; GFX9: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH10]]
+    ; GFX9: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1)
+    ; GFX9: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]]
+    ; GFX9: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDE8]]
+    ; GFX9: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDO26]]
+    ; GFX9: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDE8]]
+    ; GFX9: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]]
+    ; GFX9: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1)
+    ; GFX9: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UMULH12]]
+    ; GFX9: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO35]](s1)
+    ; GFX9: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]]
+    ; GFX9: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[ADD13]]
+    ; GFX9: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO37]](s1)
+    ; GFX9: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[ADD14]], [[ZEXT14]]
+    ; GFX9: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDE8]]
+    ; GFX9: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[ADD15]]
+    ; GFX9: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64)
+    ; GFX9: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV18]], [[UADDO36]]
+    ; GFX9: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV19]], [[UADDO36]]
+    ; GFX9: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV18]], [[ADD16]]
+    ; GFX9: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV18]], [[UADDO36]]
+    ; GFX9: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]]
+    ; GFX9: [[ADD18:%[0-9]+]]:_(s32) = G_ADD [[ADD17]], [[UMULH14]]
+    ; GFX9: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV14]], [[MUL15]]
+    ; GFX9: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV15]], [[ADD18]], [[USUBO3]]
+    ; GFX9: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV15]], [[ADD18]]
+    ; GFX9: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO2]](s32), [[USUBE2]](s32)
+    ; GFX9: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64)
+    ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV21]]
+    ; GFX9: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1)
+    ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV20]]
+    ; GFX9: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[ICMP1]](s1)
+    ; GFX9: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV21]]
+    ; GFX9: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SEXT1]], [[SEXT]]
+    ; GFX9: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV20]]
+    ; GFX9: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV21]], [[USUBO3]]
+    ; GFX9: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C6]], [[USUBO5]]
+    ; GFX9: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO4]](s32), [[USUBE6]](s32)
+    ; GFX9: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV21]]
+    ; GFX9: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1)
+    ; GFX9: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV20]]
+    ; GFX9: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT [[ICMP4]](s1)
+    ; GFX9: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV21]]
+    ; GFX9: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]]
+    ; GFX9: [[USUBO6:%[0-9]+]]:_(s32), [[USUBO7:%[0-9]+]]:_(s1) = G_USUBO [[USUBO4]], [[UV20]]
+    ; GFX9: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[UV21]], [[USUBO5]]
+    ; GFX9: [[USUBE10:%[0-9]+]]:_(s32), [[USUBE11:%[0-9]+]]:_(s1) = G_USUBE [[USUBE8]], [[C6]], [[USUBO7]]
+    ; GFX9: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO6]](s32), [[USUBE10]](s32)
+    ; GFX9: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C6]]
+    ; GFX9: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV4]], [[MV3]]
+    ; GFX9: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C6]]
+    ; GFX9: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV2]]
+    ; GFX9: [[XOR2:%[0-9]+]]:_(s64) = G_XOR [[SELECT3]], [[ASHR]]
+    ; GFX9: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR2]](s64)
+    ; GFX9: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64)
+    ; GFX9: [[USUBO8:%[0-9]+]]:_(s32), [[USUBO9:%[0-9]+]]:_(s1) = G_USUBO [[UV22]], [[UV24]]
+    ; GFX9: [[USUBE12:%[0-9]+]]:_(s32), [[USUBE13:%[0-9]+]]:_(s1) = G_USUBE [[UV23]], [[UV25]], [[USUBO9]]
+    ; GFX9: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO8]](s32), [[USUBE12]](s32)
+    ; GFX9: $vgpr0_vgpr1 = COPY [[MV5]](s64)
     %0:_(s64) = COPY $vgpr0_vgpr1
     %1:_(s64) = COPY $vgpr2_vgpr3
     %2:_(s64) = G_SREM %0, %1
@@ -398,27 +884,990 @@ body: |
     ; GFX6: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7
     ; GFX6: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>)
     ; GFX6: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>)
-    ; GFX6: [[SREM:%[0-9]+]]:_(s64) = G_SREM [[UV]], [[UV2]]
-    ; GFX6: [[SREM1:%[0-9]+]]:_(s64) = G_SREM [[UV1]], [[UV3]]
-    ; GFX6: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SREM]](s64), [[SREM1]](s64)
+    ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 63
+    ; GFX6: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[UV]], [[C]](s32)
+    ; GFX6: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[UV2]], [[C]](s32)
+    ; GFX6: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64)
+    ; GFX6: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64)
+    ; GFX6: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV4]], [[UV6]]
+    ; GFX6: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV5]], [[UV7]], [[UADDO1]]
+    ; GFX6: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32)
+    ; GFX6: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64)
+    ; GFX6: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR1]](s64)
+    ; GFX6: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UV8]], [[UV10]]
+    ; GFX6: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UV9]], [[UV11]], [[UADDO3]]
+    ; GFX6: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO2]](s32), [[UADDE2]](s32)
+    ; GFX6: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[MV]], [[ASHR]]
+    ; GFX6: [[XOR1:%[0-9]+]]:_(s64) = G_XOR [[MV1]], [[ASHR1]]
+    ; GFX6: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64)
+    ; GFX6: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV12]](s32)
+    ; GFX6: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV13]](s32)
+    ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000
+    ; GFX6: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP1]], [[C1]]
+    ; GFX6: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UITOFP]]
+    ; GFX6: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[FADD]](s32)
+    ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x43EFFFFF80000000
+    ; GFX6: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]]
+    ; GFX6: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000
+    ; GFX6: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FMUL1]], [[C3]]
+    ; GFX6: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL2]]
+    ; GFX6: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000
+    ; GFX6: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC]], [[C4]]
+    ; GFX6: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[FMUL1]]
+    ; GFX6: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FADD1]](s32)
+    ; GFX6: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC]](s32)
+    ; GFX6: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; GFX6: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C5]](s64)
+    ; GFX6: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64)
+    ; GFX6: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV14]], [[UV16]]
+    ; GFX6: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV15]], [[UV17]], [[USUBO1]]
+    ; GFX6: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI]]
+    ; GFX6: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]]
+    ; GFX6: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]]
+    ; GFX6: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[FPTOUI]]
+    ; GFX6: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]]
+    ; GFX6: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]]
+    ; GFX6: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[MUL]]
+    ; GFX6: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]]
+    ; GFX6: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]]
+    ; GFX6: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]]
+    ; GFX6: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1)
+    ; GFX6: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH1]]
+    ; GFX6: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1)
+    ; GFX6: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]]
+    ; GFX6: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]]
+    ; GFX6: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL]]
+    ; GFX6: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]]
+    ; GFX6: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]]
+    ; GFX6: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1)
+    ; GFX6: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UMULH3]]
+    ; GFX6: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO11]](s1)
+    ; GFX6: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]]
+    ; GFX6: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[ADD2]]
+    ; GFX6: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1)
+    ; GFX6: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]]
+    ; GFX6: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]]
+    ; GFX6: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[ADD4]]
+    ; GFX6: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO12]]
+    ; GFX6: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO15]]
+    ; GFX6: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[ADD5]]
+    ; GFX6: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO14]]
+    ; GFX6: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO14]]
+    ; GFX6: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE4]]
+    ; GFX6: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO14]]
+    ; GFX6: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]]
+    ; GFX6: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ADD7]], [[UMULH5]]
+    ; GFX6: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[MUL6]]
+    ; GFX6: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO14]], [[ADD8]]
+    ; GFX6: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[MUL6]]
+    ; GFX6: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]]
+    ; GFX6: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1)
+    ; GFX6: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH6]]
+    ; GFX6: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1)
+    ; GFX6: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]]
+    ; GFX6: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[ADD8]]
+    ; GFX6: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[MUL6]]
+    ; GFX6: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[ADD8]]
+    ; GFX6: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]]
+    ; GFX6: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1)
+    ; GFX6: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UMULH8]]
+    ; GFX6: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO23]](s1)
+    ; GFX6: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]]
+    ; GFX6: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[UADDO22]], [[ADD9]]
+    ; GFX6: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1)
+    ; GFX6: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[ADD10]], [[ZEXT9]]
+    ; GFX6: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[ADD8]]
+    ; GFX6: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[ADD11]]
+    ; GFX6: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; GFX6: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO14]], [[UADDO24]]
+    ; GFX6: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[ADD6]], [[ADD12]], [[UADDO15]]
+    ; GFX6: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDE6]], [[C6]], [[UADDO27]]
+    ; GFX6: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64)
+    ; GFX6: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64)
+    ; GFX6: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV21]], [[UADDO26]]
+    ; GFX6: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV20]], [[UADDE8]]
+    ; GFX6: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV20]], [[UADDO26]]
+    ; GFX6: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]]
+    ; GFX6: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1)
+    ; GFX6: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH10]]
+    ; GFX6: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1)
+    ; GFX6: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]]
+    ; GFX6: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV21]], [[UADDE8]]
+    ; GFX6: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV21]], [[UADDO26]]
+    ; GFX6: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV20]], [[UADDE8]]
+    ; GFX6: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]]
+    ; GFX6: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1)
+    ; GFX6: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UMULH12]]
+    ; GFX6: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO35]](s1)
+    ; GFX6: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]]
+    ; GFX6: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[ADD13]]
+    ; GFX6: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO37]](s1)
+    ; GFX6: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[ADD14]], [[ZEXT14]]
+    ; GFX6: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV21]], [[UADDE8]]
+    ; GFX6: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[ADD15]]
+    ; GFX6: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64)
+    ; GFX6: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV22]], [[UADDO36]]
+    ; GFX6: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV23]], [[UADDO36]]
+    ; GFX6: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV22]], [[ADD16]]
+    ; GFX6: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV22]], [[UADDO36]]
+    ; GFX6: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]]
+    ; GFX6: [[ADD18:%[0-9]+]]:_(s32) = G_ADD [[ADD17]], [[UMULH14]]
+    ; GFX6: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV18]], [[MUL15]]
+    ; GFX6: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV19]], [[ADD18]], [[USUBO3]]
+    ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV19]], [[ADD18]]
+    ; GFX6: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO2]](s32), [[USUBE2]](s32)
+    ; GFX6: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64)
+    ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV25]]
+    ; GFX6: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1)
+    ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV24]]
+    ; GFX6: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[ICMP1]](s1)
+    ; GFX6: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV25]]
+    ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SEXT1]], [[SEXT]]
+    ; GFX6: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV24]]
+    ; GFX6: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV25]], [[USUBO3]]
+    ; GFX6: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C6]], [[USUBO5]]
+    ; GFX6: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO4]](s32), [[USUBE6]](s32)
+    ; GFX6: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV25]]
+    ; GFX6: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1)
+    ; GFX6: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV24]]
+    ; GFX6: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT [[ICMP4]](s1)
+    ; GFX6: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV25]]
+    ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]]
+    ; GFX6: [[USUBO6:%[0-9]+]]:_(s32), [[USUBO7:%[0-9]+]]:_(s1) = G_USUBO [[USUBO4]], [[UV24]]
+    ; GFX6: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[UV25]], [[USUBO5]]
+    ; GFX6: [[USUBE10:%[0-9]+]]:_(s32), [[USUBE11:%[0-9]+]]:_(s1) = G_USUBE [[USUBE8]], [[C6]], [[USUBO7]]
+    ; GFX6: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO6]](s32), [[USUBE10]](s32)
+    ; GFX6: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C6]]
+    ; GFX6: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV4]], [[MV3]]
+    ; GFX6: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C6]]
+    ; GFX6: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV2]]
+    ; GFX6: [[XOR2:%[0-9]+]]:_(s64) = G_XOR [[SELECT3]], [[ASHR]]
+    ; GFX6: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR2]](s64)
+    ; GFX6: [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64)
+    ; GFX6: [[USUBO8:%[0-9]+]]:_(s32), [[USUBO9:%[0-9]+]]:_(s1) = G_USUBO [[UV26]], [[UV28]]
+    ; GFX6: [[USUBE12:%[0-9]+]]:_(s32), [[USUBE13:%[0-9]+]]:_(s1) = G_USUBE [[UV27]], [[UV29]], [[USUBO9]]
+    ; GFX6: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO8]](s32), [[USUBE12]](s32)
+    ; GFX6: [[ASHR2:%[0-9]+]]:_(s64) = G_ASHR [[UV1]], [[C]](s32)
+    ; GFX6: [[ASHR3:%[0-9]+]]:_(s64) = G_ASHR [[UV3]], [[C]](s32)
+    ; GFX6: [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64)
+    ; GFX6: [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR2]](s64)
+    ; GFX6: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[UV30]], [[UV32]]
+    ; GFX6: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UV31]], [[UV33]], [[UADDO39]]
+    ; GFX6: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO38]](s32), [[UADDE10]](s32)
+    ; GFX6: [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64)
+    ; GFX6: [[UV36:%[0-9]+]]:_(s32), [[UV37:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR3]](s64)
+    ; GFX6: [[UADDO40:%[0-9]+]]:_(s32), [[UADDO41:%[0-9]+]]:_(s1) = G_UADDO [[UV34]], [[UV36]]
+    ; GFX6: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[UV35]], [[UV37]], [[UADDO41]]
+    ; GFX6: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO40]](s32), [[UADDE12]](s32)
+    ; GFX6: [[XOR3:%[0-9]+]]:_(s64) = G_XOR [[MV6]], [[ASHR2]]
+    ; GFX6: [[XOR4:%[0-9]+]]:_(s64) = G_XOR [[MV7]], [[ASHR3]]
+    ; GFX6: [[UV38:%[0-9]+]]:_(s32), [[UV39:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR4]](s64)
+    ; GFX6: [[UITOFP2:%[0-9]+]]:_(s32) = G_UITOFP [[UV38]](s32)
+    ; GFX6: [[UITOFP3:%[0-9]+]]:_(s32) = G_UITOFP [[UV39]](s32)
+    ; GFX6: [[C7:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000
+    ; GFX6: [[FMUL4:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP3]], [[C7]]
+    ; GFX6: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FMUL4]], [[UITOFP2]]
+    ; GFX6: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[FADD2]](s32)
+    ; GFX6: [[C8:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x43EFFFFF80000000
+    ; GFX6: [[FMUL5:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C8]]
+    ; GFX6: [[C9:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000
+    ; GFX6: [[FMUL6:%[0-9]+]]:_(s32) = G_FMUL [[FMUL5]], [[C9]]
+    ; GFX6: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL6]]
+    ; GFX6: [[C10:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000
+    ; GFX6: [[FMUL7:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC1]], [[C10]]
+    ; GFX6: [[FADD3:%[0-9]+]]:_(s32) = G_FADD [[FMUL7]], [[FMUL5]]
+    ; GFX6: [[FPTOUI2:%[0-9]+]]:_(s32) = G_FPTOUI [[FADD3]](s32)
+    ; GFX6: [[FPTOUI3:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC1]](s32)
+    ; GFX6: [[UV40:%[0-9]+]]:_(s32), [[UV41:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C5]](s64)
+    ; GFX6: [[UV42:%[0-9]+]]:_(s32), [[UV43:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR4]](s64)
+    ; GFX6: [[USUBO10:%[0-9]+]]:_(s32), [[USUBO11:%[0-9]+]]:_(s1) = G_USUBO [[UV40]], [[UV42]]
+    ; GFX6: [[USUBE14:%[0-9]+]]:_(s32), [[USUBE15:%[0-9]+]]:_(s1) = G_USUBE [[UV41]], [[UV43]], [[USUBO11]]
+    ; GFX6: [[MUL18:%[0-9]+]]:_(s32) = G_MUL [[USUBO10]], [[FPTOUI2]]
+    ; GFX6: [[MUL19:%[0-9]+]]:_(s32) = G_MUL [[USUBE14]], [[FPTOUI2]]
+    ; GFX6: [[MUL20:%[0-9]+]]:_(s32) = G_MUL [[USUBO10]], [[FPTOUI3]]
+    ; GFX6: [[UMULH15:%[0-9]+]]:_(s32) = G_UMULH [[USUBO10]], [[FPTOUI2]]
+    ; GFX6: [[ADD19:%[0-9]+]]:_(s32) = G_ADD [[MUL19]], [[MUL20]]
+    ; GFX6: [[ADD20:%[0-9]+]]:_(s32) = G_ADD [[ADD19]], [[UMULH15]]
+    ; GFX6: [[MUL21:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[MUL18]]
+    ; GFX6: [[MUL22:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI2]], [[ADD20]]
+    ; GFX6: [[UMULH16:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[MUL18]]
+    ; GFX6: [[UADDO42:%[0-9]+]]:_(s32), [[UADDO43:%[0-9]+]]:_(s1) = G_UADDO [[MUL21]], [[MUL22]]
+    ; GFX6: [[ZEXT15:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO43]](s1)
+    ; GFX6: [[UADDO44:%[0-9]+]]:_(s32), [[UADDO45:%[0-9]+]]:_(s1) = G_UADDO [[UADDO42]], [[UMULH16]]
+    ; GFX6: [[ZEXT16:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO45]](s1)
+    ; GFX6: [[ADD21:%[0-9]+]]:_(s32) = G_ADD [[ZEXT15]], [[ZEXT16]]
+    ; GFX6: [[MUL23:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[ADD20]]
+    ; GFX6: [[UMULH17:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[MUL18]]
+    ; GFX6: [[UMULH18:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[ADD20]]
+    ; GFX6: [[UADDO46:%[0-9]+]]:_(s32), [[UADDO47:%[0-9]+]]:_(s1) = G_UADDO [[MUL23]], [[UMULH17]]
+    ; GFX6: [[ZEXT17:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO47]](s1)
+    ; GFX6: [[UADDO48:%[0-9]+]]:_(s32), [[UADDO49:%[0-9]+]]:_(s1) = G_UADDO [[UADDO46]], [[UMULH18]]
+    ; GFX6: [[ZEXT18:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO49]](s1)
+    ; GFX6: [[ADD22:%[0-9]+]]:_(s32) = G_ADD [[ZEXT17]], [[ZEXT18]]
+    ; GFX6: [[UADDO50:%[0-9]+]]:_(s32), [[UADDO51:%[0-9]+]]:_(s1) = G_UADDO [[UADDO48]], [[ADD21]]
+    ; GFX6: [[ZEXT19:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO51]](s1)
+    ; GFX6: [[ADD23:%[0-9]+]]:_(s32) = G_ADD [[ADD22]], [[ZEXT19]]
+    ; GFX6: [[UMULH19:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[ADD20]]
+    ; GFX6: [[ADD24:%[0-9]+]]:_(s32) = G_ADD [[UMULH19]], [[ADD23]]
+    ; GFX6: [[UADDO52:%[0-9]+]]:_(s32), [[UADDO53:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI2]], [[UADDO50]]
+    ; GFX6: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI3]], [[ADD24]], [[UADDO53]]
+    ; GFX6: [[ADD25:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI3]], [[ADD24]]
+    ; GFX6: [[MUL24:%[0-9]+]]:_(s32) = G_MUL [[USUBO10]], [[UADDO52]]
+    ; GFX6: [[MUL25:%[0-9]+]]:_(s32) = G_MUL [[USUBE14]], [[UADDO52]]
+    ; GFX6: [[MUL26:%[0-9]+]]:_(s32) = G_MUL [[USUBO10]], [[UADDE14]]
+    ; GFX6: [[UMULH20:%[0-9]+]]:_(s32) = G_UMULH [[USUBO10]], [[UADDO52]]
+    ; GFX6: [[ADD26:%[0-9]+]]:_(s32) = G_ADD [[MUL25]], [[MUL26]]
+    ; GFX6: [[ADD27:%[0-9]+]]:_(s32) = G_ADD [[ADD26]], [[UMULH20]]
+    ; GFX6: [[MUL27:%[0-9]+]]:_(s32) = G_MUL [[UADDE14]], [[MUL24]]
+    ; GFX6: [[MUL28:%[0-9]+]]:_(s32) = G_MUL [[UADDO52]], [[ADD27]]
+    ; GFX6: [[UMULH21:%[0-9]+]]:_(s32) = G_UMULH [[UADDO52]], [[MUL24]]
+    ; GFX6: [[UADDO54:%[0-9]+]]:_(s32), [[UADDO55:%[0-9]+]]:_(s1) = G_UADDO [[MUL27]], [[MUL28]]
+    ; GFX6: [[ZEXT20:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO55]](s1)
+    ; GFX6: [[UADDO56:%[0-9]+]]:_(s32), [[UADDO57:%[0-9]+]]:_(s1) = G_UADDO [[UADDO54]], [[UMULH21]]
+    ; GFX6: [[ZEXT21:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO57]](s1)
+    ; GFX6: [[ADD28:%[0-9]+]]:_(s32) = G_ADD [[ZEXT20]], [[ZEXT21]]
+    ; GFX6: [[MUL29:%[0-9]+]]:_(s32) = G_MUL [[UADDE14]], [[ADD27]]
+    ; GFX6: [[UMULH22:%[0-9]+]]:_(s32) = G_UMULH [[UADDE14]], [[MUL24]]
+    ; GFX6: [[UMULH23:%[0-9]+]]:_(s32) = G_UMULH [[UADDO52]], [[ADD27]]
+    ; GFX6: [[UADDO58:%[0-9]+]]:_(s32), [[UADDO59:%[0-9]+]]:_(s1) = G_UADDO [[MUL29]], [[UMULH22]]
+    ; GFX6: [[ZEXT22:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO59]](s1)
+    ; GFX6: [[UADDO60:%[0-9]+]]:_(s32), [[UADDO61:%[0-9]+]]:_(s1) = G_UADDO [[UADDO58]], [[UMULH23]]
+    ; GFX6: [[ZEXT23:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO61]](s1)
+    ; GFX6: [[ADD29:%[0-9]+]]:_(s32) = G_ADD [[ZEXT22]], [[ZEXT23]]
+    ; GFX6: [[UADDO62:%[0-9]+]]:_(s32), [[UADDO63:%[0-9]+]]:_(s1) = G_UADDO [[UADDO60]], [[ADD28]]
+    ; GFX6: [[ZEXT24:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO63]](s1)
+    ; GFX6: [[ADD30:%[0-9]+]]:_(s32) = G_ADD [[ADD29]], [[ZEXT24]]
+    ; GFX6: [[UMULH24:%[0-9]+]]:_(s32) = G_UMULH [[UADDE14]], [[ADD27]]
+    ; GFX6: [[ADD31:%[0-9]+]]:_(s32) = G_ADD [[UMULH24]], [[ADD30]]
+    ; GFX6: [[UADDO64:%[0-9]+]]:_(s32), [[UADDO65:%[0-9]+]]:_(s1) = G_UADDO [[UADDO52]], [[UADDO62]]
+    ; GFX6: [[UADDE16:%[0-9]+]]:_(s32), [[UADDE17:%[0-9]+]]:_(s1) = G_UADDE [[ADD25]], [[ADD31]], [[UADDO53]]
+    ; GFX6: [[UADDE18:%[0-9]+]]:_(s32), [[UADDE19:%[0-9]+]]:_(s1) = G_UADDE [[UADDE16]], [[C6]], [[UADDO65]]
+    ; GFX6: [[UV44:%[0-9]+]]:_(s32), [[UV45:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR3]](s64)
+    ; GFX6: [[UV46:%[0-9]+]]:_(s32), [[UV47:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR3]](s64)
+    ; GFX6: [[MUL30:%[0-9]+]]:_(s32) = G_MUL [[UV47]], [[UADDO64]]
+    ; GFX6: [[MUL31:%[0-9]+]]:_(s32) = G_MUL [[UV46]], [[UADDE18]]
+    ; GFX6: [[UMULH25:%[0-9]+]]:_(s32) = G_UMULH [[UV46]], [[UADDO64]]
+    ; GFX6: [[UADDO66:%[0-9]+]]:_(s32), [[UADDO67:%[0-9]+]]:_(s1) = G_UADDO [[MUL30]], [[MUL31]]
+    ; GFX6: [[ZEXT25:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO67]](s1)
+    ; GFX6: [[UADDO68:%[0-9]+]]:_(s32), [[UADDO69:%[0-9]+]]:_(s1) = G_UADDO [[UADDO66]], [[UMULH25]]
+    ; GFX6: [[ZEXT26:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO69]](s1)
+    ; GFX6: [[ADD32:%[0-9]+]]:_(s32) = G_ADD [[ZEXT25]], [[ZEXT26]]
+    ; GFX6: [[MUL32:%[0-9]+]]:_(s32) = G_MUL [[UV47]], [[UADDE18]]
+    ; GFX6: [[UMULH26:%[0-9]+]]:_(s32) = G_UMULH [[UV47]], [[UADDO64]]
+    ; GFX6: [[UMULH27:%[0-9]+]]:_(s32) = G_UMULH [[UV46]], [[UADDE18]]
+    ; GFX6: [[UADDO70:%[0-9]+]]:_(s32), [[UADDO71:%[0-9]+]]:_(s1) = G_UADDO [[MUL32]], [[UMULH26]]
+    ; GFX6: [[ZEXT27:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO71]](s1)
+    ; GFX6: [[UADDO72:%[0-9]+]]:_(s32), [[UADDO73:%[0-9]+]]:_(s1) = G_UADDO [[UADDO70]], [[UMULH27]]
+    ; GFX6: [[ZEXT28:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO73]](s1)
+    ; GFX6: [[ADD33:%[0-9]+]]:_(s32) = G_ADD [[ZEXT27]], [[ZEXT28]]
+    ; GFX6: [[UADDO74:%[0-9]+]]:_(s32), [[UADDO75:%[0-9]+]]:_(s1) = G_UADDO [[UADDO72]], [[ADD32]]
+    ; GFX6: [[ZEXT29:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO75]](s1)
+    ; GFX6: [[ADD34:%[0-9]+]]:_(s32) = G_ADD [[ADD33]], [[ZEXT29]]
+    ; GFX6: [[UMULH28:%[0-9]+]]:_(s32) = G_UMULH [[UV47]], [[UADDE18]]
+    ; GFX6: [[ADD35:%[0-9]+]]:_(s32) = G_ADD [[UMULH28]], [[ADD34]]
+    ; GFX6: [[UV48:%[0-9]+]]:_(s32), [[UV49:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR4]](s64)
+    ; GFX6: [[MUL33:%[0-9]+]]:_(s32) = G_MUL [[UV48]], [[UADDO74]]
+    ; GFX6: [[MUL34:%[0-9]+]]:_(s32) = G_MUL [[UV49]], [[UADDO74]]
+    ; GFX6: [[MUL35:%[0-9]+]]:_(s32) = G_MUL [[UV48]], [[ADD35]]
+    ; GFX6: [[UMULH29:%[0-9]+]]:_(s32) = G_UMULH [[UV48]], [[UADDO74]]
+    ; GFX6: [[ADD36:%[0-9]+]]:_(s32) = G_ADD [[MUL34]], [[MUL35]]
+    ; GFX6: [[ADD37:%[0-9]+]]:_(s32) = G_ADD [[ADD36]], [[UMULH29]]
+    ; GFX6: [[USUBO12:%[0-9]+]]:_(s32), [[USUBO13:%[0-9]+]]:_(s1) = G_USUBO [[UV44]], [[MUL33]]
+    ; GFX6: [[USUBE16:%[0-9]+]]:_(s32), [[USUBE17:%[0-9]+]]:_(s1) = G_USUBE [[UV45]], [[ADD37]], [[USUBO13]]
+    ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[UV45]], [[ADD37]]
+    ; GFX6: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO12]](s32), [[USUBE16]](s32)
+    ; GFX6: [[UV50:%[0-9]+]]:_(s32), [[UV51:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR4]](s64)
+    ; GFX6: [[ICMP8:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE16]](s32), [[UV51]]
+    ; GFX6: [[SEXT4:%[0-9]+]]:_(s32) = G_SEXT [[ICMP8]](s1)
+    ; GFX6: [[ICMP9:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO12]](s32), [[UV50]]
+    ; GFX6: [[SEXT5:%[0-9]+]]:_(s32) = G_SEXT [[ICMP9]](s1)
+    ; GFX6: [[ICMP10:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE16]](s32), [[UV51]]
+    ; GFX6: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP10]](s1), [[SEXT5]], [[SEXT4]]
+    ; GFX6: [[USUBO14:%[0-9]+]]:_(s32), [[USUBO15:%[0-9]+]]:_(s1) = G_USUBO [[USUBO12]], [[UV50]]
+    ; GFX6: [[USUBE18:%[0-9]+]]:_(s32), [[USUBE19:%[0-9]+]]:_(s1) = G_USUBE [[SUB1]], [[UV51]], [[USUBO13]]
+    ; GFX6: [[USUBE20:%[0-9]+]]:_(s32), [[USUBE21:%[0-9]+]]:_(s1) = G_USUBE [[USUBE18]], [[C6]], [[USUBO15]]
+    ; GFX6: [[MV9:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO14]](s32), [[USUBE20]](s32)
+    ; GFX6: [[ICMP11:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE20]](s32), [[UV51]]
+    ; GFX6: [[SEXT6:%[0-9]+]]:_(s32) = G_SEXT [[ICMP11]](s1)
+    ; GFX6: [[ICMP12:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO14]](s32), [[UV50]]
+    ; GFX6: [[SEXT7:%[0-9]+]]:_(s32) = G_SEXT [[ICMP12]](s1)
+    ; GFX6: [[ICMP13:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE20]](s32), [[UV51]]
+    ; GFX6: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP13]](s1), [[SEXT7]], [[SEXT6]]
+    ; GFX6: [[USUBO16:%[0-9]+]]:_(s32), [[USUBO17:%[0-9]+]]:_(s1) = G_USUBO [[USUBO14]], [[UV50]]
+    ; GFX6: [[USUBE22:%[0-9]+]]:_(s32), [[USUBE23:%[0-9]+]]:_(s1) = G_USUBE [[USUBE18]], [[UV51]], [[USUBO15]]
+    ; GFX6: [[USUBE24:%[0-9]+]]:_(s32), [[USUBE25:%[0-9]+]]:_(s1) = G_USUBE [[USUBE22]], [[C6]], [[USUBO17]]
+    ; GFX6: [[MV10:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO16]](s32), [[USUBE24]](s32)
+    ; GFX6: [[ICMP14:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT5]](s32), [[C6]]
+    ; GFX6: [[SELECT6:%[0-9]+]]:_(s64) = G_SELECT [[ICMP14]](s1), [[MV10]], [[MV9]]
+    ; GFX6: [[ICMP15:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT4]](s32), [[C6]]
+    ; GFX6: [[SELECT7:%[0-9]+]]:_(s64) = G_SELECT [[ICMP15]](s1), [[SELECT6]], [[MV8]]
+    ; GFX6: [[XOR5:%[0-9]+]]:_(s64) = G_XOR [[SELECT7]], [[ASHR2]]
+    ; GFX6: [[UV52:%[0-9]+]]:_(s32), [[UV53:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR5]](s64)
+    ; GFX6: [[UV54:%[0-9]+]]:_(s32), [[UV55:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR2]](s64)
+    ; GFX6: [[USUBO18:%[0-9]+]]:_(s32), [[USUBO19:%[0-9]+]]:_(s1) = G_USUBO [[UV52]], [[UV54]]
+    ; GFX6: [[USUBE26:%[0-9]+]]:_(s32), [[USUBE27:%[0-9]+]]:_(s1) = G_USUBE [[UV53]], [[UV55]], [[USUBO19]]
+    ; GFX6: [[MV11:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO18]](s32), [[USUBE26]](s32)
+    ; GFX6: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV5]](s64), [[MV11]](s64)
     ; GFX6: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>)
     ; GFX8-LABEL: name: test_srem_v2s64
     ; GFX8: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
     ; GFX8: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7
     ; GFX8: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>)
     ; GFX8: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>)
-    ; GFX8: [[SREM:%[0-9]+]]:_(s64) = G_SREM [[UV]], [[UV2]]
-    ; GFX8: [[SREM1:%[0-9]+]]:_(s64) = G_SREM [[UV1]], [[UV3]]
-    ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SREM]](s64), [[SREM1]](s64)
+    ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 63
+    ; GFX8: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[UV]], [[C]](s32)
+    ; GFX8: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[UV2]], [[C]](s32)
+    ; GFX8: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64)
+    ; GFX8: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64)
+    ; GFX8: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV4]], [[UV6]]
+    ; GFX8: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV5]], [[UV7]], [[UADDO1]]
+    ; GFX8: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32)
+    ; GFX8: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64)
+    ; GFX8: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR1]](s64)
+    ; GFX8: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UV8]], [[UV10]]
+    ; GFX8: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UV9]], [[UV11]], [[UADDO3]]
+    ; GFX8: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO2]](s32), [[UADDE2]](s32)
+    ; GFX8: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[MV]], [[ASHR]]
+    ; GFX8: [[XOR1:%[0-9]+]]:_(s64) = G_XOR [[MV1]], [[ASHR1]]
+    ; GFX8: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64)
+    ; GFX8: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV12]](s32)
+    ; GFX8: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV13]](s32)
+    ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000
+    ; GFX8: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP1]], [[C1]]
+    ; GFX8: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UITOFP]]
+    ; GFX8: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[FADD]](s32)
+    ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x43EFFFFF80000000
+    ; GFX8: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]]
+    ; GFX8: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000
+    ; GFX8: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FMUL1]], [[C3]]
+    ; GFX8: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL2]]
+    ; GFX8: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000
+    ; GFX8: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC]], [[C4]]
+    ; GFX8: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[FMUL1]]
+    ; GFX8: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FADD1]](s32)
+    ; GFX8: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC]](s32)
+    ; GFX8: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; GFX8: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C5]](s64)
+    ; GFX8: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64)
+    ; GFX8: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV14]], [[UV16]]
+    ; GFX8: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV15]], [[UV17]], [[USUBO1]]
+    ; GFX8: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI]]
+    ; GFX8: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]]
+    ; GFX8: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]]
+    ; GFX8: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[FPTOUI]]
+    ; GFX8: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]]
+    ; GFX8: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]]
+    ; GFX8: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[MUL]]
+    ; GFX8: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]]
+    ; GFX8: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]]
+    ; GFX8: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]]
+    ; GFX8: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1)
+    ; GFX8: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH1]]
+    ; GFX8: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1)
+    ; GFX8: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]]
+    ; GFX8: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]]
+    ; GFX8: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL]]
+    ; GFX8: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]]
+    ; GFX8: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]]
+    ; GFX8: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1)
+    ; GFX8: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UMULH3]]
+    ; GFX8: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO11]](s1)
+    ; GFX8: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]]
+    ; GFX8: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[ADD2]]
+    ; GFX8: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1)
+    ; GFX8: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]]
+    ; GFX8: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]]
+    ; GFX8: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[ADD4]]
+    ; GFX8: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO12]]
+    ; GFX8: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO15]]
+    ; GFX8: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[ADD5]]
+    ; GFX8: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO14]]
+    ; GFX8: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO14]]
+    ; GFX8: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE4]]
+    ; GFX8: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO14]]
+    ; GFX8: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]]
+    ; GFX8: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ADD7]], [[UMULH5]]
+    ; GFX8: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[MUL6]]
+    ; GFX8: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO14]], [[ADD8]]
+    ; GFX8: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[MUL6]]
+    ; GFX8: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]]
+    ; GFX8: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1)
+    ; GFX8: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH6]]
+    ; GFX8: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1)
+    ; GFX8: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]]
+    ; GFX8: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[ADD8]]
+    ; GFX8: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[MUL6]]
+    ; GFX8: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[ADD8]]
+    ; GFX8: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]]
+    ; GFX8: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1)
+    ; GFX8: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UMULH8]]
+    ; GFX8: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO23]](s1)
+    ; GFX8: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]]
+    ; GFX8: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[UADDO22]], [[ADD9]]
+    ; GFX8: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1)
+    ; GFX8: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[ADD10]], [[ZEXT9]]
+    ; GFX8: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[ADD8]]
+    ; GFX8: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[ADD11]]
+    ; GFX8: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; GFX8: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO14]], [[UADDO24]]
+    ; GFX8: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[ADD6]], [[ADD12]], [[UADDO15]]
+    ; GFX8: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDE6]], [[C6]], [[UADDO27]]
+    ; GFX8: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64)
+    ; GFX8: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64)
+    ; GFX8: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV21]], [[UADDO26]]
+    ; GFX8: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV20]], [[UADDE8]]
+    ; GFX8: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV20]], [[UADDO26]]
+    ; GFX8: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]]
+    ; GFX8: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1)
+    ; GFX8: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH10]]
+    ; GFX8: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1)
+    ; GFX8: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]]
+    ; GFX8: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV21]], [[UADDE8]]
+    ; GFX8: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV21]], [[UADDO26]]
+    ; GFX8: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV20]], [[UADDE8]]
+    ; GFX8: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]]
+    ; GFX8: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1)
+    ; GFX8: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UMULH12]]
+    ; GFX8: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO35]](s1)
+    ; GFX8: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]]
+    ; GFX8: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[ADD13]]
+    ; GFX8: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO37]](s1)
+    ; GFX8: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[ADD14]], [[ZEXT14]]
+    ; GFX8: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV21]], [[UADDE8]]
+    ; GFX8: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[ADD15]]
+    ; GFX8: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64)
+    ; GFX8: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV22]], [[UADDO36]]
+    ; GFX8: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV23]], [[UADDO36]]
+    ; GFX8: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV22]], [[ADD16]]
+    ; GFX8: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV22]], [[UADDO36]]
+    ; GFX8: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]]
+    ; GFX8: [[ADD18:%[0-9]+]]:_(s32) = G_ADD [[ADD17]], [[UMULH14]]
+    ; GFX8: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV18]], [[MUL15]]
+    ; GFX8: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV19]], [[ADD18]], [[USUBO3]]
+    ; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV19]], [[ADD18]]
+    ; GFX8: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO2]](s32), [[USUBE2]](s32)
+    ; GFX8: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64)
+    ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV25]]
+    ; GFX8: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1)
+    ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV24]]
+    ; GFX8: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[ICMP1]](s1)
+    ; GFX8: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV25]]
+    ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SEXT1]], [[SEXT]]
+    ; GFX8: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV24]]
+    ; GFX8: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV25]], [[USUBO3]]
+    ; GFX8: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C6]], [[USUBO5]]
+    ; GFX8: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO4]](s32), [[USUBE6]](s32)
+    ; GFX8: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV25]]
+    ; GFX8: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1)
+    ; GFX8: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV24]]
+    ; GFX8: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT [[ICMP4]](s1)
+    ; GFX8: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV25]]
+    ; GFX8: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]]
+    ; GFX8: [[USUBO6:%[0-9]+]]:_(s32), [[USUBO7:%[0-9]+]]:_(s1) = G_USUBO [[USUBO4]], [[UV24]]
+    ; GFX8: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[UV25]], [[USUBO5]]
+    ; GFX8: [[USUBE10:%[0-9]+]]:_(s32), [[USUBE11:%[0-9]+]]:_(s1) = G_USUBE [[USUBE8]], [[C6]], [[USUBO7]]
+    ; GFX8: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO6]](s32), [[USUBE10]](s32)
+    ; GFX8: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C6]]
+    ; GFX8: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV4]], [[MV3]]
+    ; GFX8: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C6]]
+    ; GFX8: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV2]]
+    ; GFX8: [[XOR2:%[0-9]+]]:_(s64) = G_XOR [[SELECT3]], [[ASHR]]
+    ; GFX8: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR2]](s64)
+    ; GFX8: [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64)
+    ; GFX8: [[USUBO8:%[0-9]+]]:_(s32), [[USUBO9:%[0-9]+]]:_(s1) = G_USUBO [[UV26]], [[UV28]]
+    ; GFX8: [[USUBE12:%[0-9]+]]:_(s32), [[USUBE13:%[0-9]+]]:_(s1) = G_USUBE [[UV27]], [[UV29]], [[USUBO9]]
+    ; GFX8: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO8]](s32), [[USUBE12]](s32)
+    ; GFX8: [[ASHR2:%[0-9]+]]:_(s64) = G_ASHR [[UV1]], [[C]](s32)
+    ; GFX8: [[ASHR3:%[0-9]+]]:_(s64) = G_ASHR [[UV3]], [[C]](s32)
+    ; GFX8: [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64)
+    ; GFX8: [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR2]](s64)
+    ; GFX8: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[UV30]], [[UV32]]
+    ; GFX8: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UV31]], [[UV33]], [[UADDO39]]
+    ; GFX8: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO38]](s32), [[UADDE10]](s32)
+    ; GFX8: [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64)
+    ; GFX8: [[UV36:%[0-9]+]]:_(s32), [[UV37:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR3]](s64)
+    ; GFX8: [[UADDO40:%[0-9]+]]:_(s32), [[UADDO41:%[0-9]+]]:_(s1) = G_UADDO [[UV34]], [[UV36]]
+    ; GFX8: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[UV35]], [[UV37]], [[UADDO41]]
+    ; GFX8: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO40]](s32), [[UADDE12]](s32)
+    ; GFX8: [[XOR3:%[0-9]+]]:_(s64) = G_XOR [[MV6]], [[ASHR2]]
+    ; GFX8: [[XOR4:%[0-9]+]]:_(s64) = G_XOR [[MV7]], [[ASHR3]]
+    ; GFX8: [[UV38:%[0-9]+]]:_(s32), [[UV39:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR4]](s64)
+    ; GFX8: [[UITOFP2:%[0-9]+]]:_(s32) = G_UITOFP [[UV38]](s32)
+    ; GFX8: [[UITOFP3:%[0-9]+]]:_(s32) = G_UITOFP [[UV39]](s32)
+    ; GFX8: [[C7:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000
+    ; GFX8: [[FMUL4:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP3]], [[C7]]
+    ; GFX8: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FMUL4]], [[UITOFP2]]
+    ; GFX8: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[FADD2]](s32)
+    ; GFX8: [[C8:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x43EFFFFF80000000
+    ; GFX8: [[FMUL5:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C8]]
+    ; GFX8: [[C9:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000
+    ; GFX8: [[FMUL6:%[0-9]+]]:_(s32) = G_FMUL [[FMUL5]], [[C9]]
+    ; GFX8: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL6]]
+    ; GFX8: [[C10:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000
+    ; GFX8: [[FMUL7:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC1]], [[C10]]
+    ; GFX8: [[FADD3:%[0-9]+]]:_(s32) = G_FADD [[FMUL7]], [[FMUL5]]
+    ; GFX8: [[FPTOUI2:%[0-9]+]]:_(s32) = G_FPTOUI [[FADD3]](s32)
+    ; GFX8: [[FPTOUI3:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC1]](s32)
+    ; GFX8: [[UV40:%[0-9]+]]:_(s32), [[UV41:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C5]](s64)
+    ; GFX8: [[UV42:%[0-9]+]]:_(s32), [[UV43:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR4]](s64)
+    ; GFX8: [[USUBO10:%[0-9]+]]:_(s32), [[USUBO11:%[0-9]+]]:_(s1) = G_USUBO [[UV40]], [[UV42]]
+    ; GFX8: [[USUBE14:%[0-9]+]]:_(s32), [[USUBE15:%[0-9]+]]:_(s1) = G_USUBE [[UV41]], [[UV43]], [[USUBO11]]
+    ; GFX8: [[MUL18:%[0-9]+]]:_(s32) = G_MUL [[USUBO10]], [[FPTOUI2]]
+    ; GFX8: [[MUL19:%[0-9]+]]:_(s32) = G_MUL [[USUBE14]], [[FPTOUI2]]
+    ; GFX8: [[MUL20:%[0-9]+]]:_(s32) = G_MUL [[USUBO10]], [[FPTOUI3]]
+    ; GFX8: [[UMULH15:%[0-9]+]]:_(s32) = G_UMULH [[USUBO10]], [[FPTOUI2]]
+    ; GFX8: [[ADD19:%[0-9]+]]:_(s32) = G_ADD [[MUL19]], [[MUL20]]
+    ; GFX8: [[ADD20:%[0-9]+]]:_(s32) = G_ADD [[ADD19]], [[UMULH15]]
+    ; GFX8: [[MUL21:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[MUL18]]
+    ; GFX8: [[MUL22:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI2]], [[ADD20]]
+    ; GFX8: [[UMULH16:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[MUL18]]
+    ; GFX8: [[UADDO42:%[0-9]+]]:_(s32), [[UADDO43:%[0-9]+]]:_(s1) = G_UADDO [[MUL21]], [[MUL22]]
+    ; GFX8: [[ZEXT15:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO43]](s1)
+    ; GFX8: [[UADDO44:%[0-9]+]]:_(s32), [[UADDO45:%[0-9]+]]:_(s1) = G_UADDO [[UADDO42]], [[UMULH16]]
+    ; GFX8: [[ZEXT16:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO45]](s1)
+    ; GFX8: [[ADD21:%[0-9]+]]:_(s32) = G_ADD [[ZEXT15]], [[ZEXT16]]
+    ; GFX8: [[MUL23:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[ADD20]]
+    ; GFX8: [[UMULH17:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[MUL18]]
+    ; GFX8: [[UMULH18:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[ADD20]]
+    ; GFX8: [[UADDO46:%[0-9]+]]:_(s32), [[UADDO47:%[0-9]+]]:_(s1) = G_UADDO [[MUL23]], [[UMULH17]]
+    ; GFX8: [[ZEXT17:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO47]](s1)
+    ; GFX8: [[UADDO48:%[0-9]+]]:_(s32), [[UADDO49:%[0-9]+]]:_(s1) = G_UADDO [[UADDO46]], [[UMULH18]]
+    ; GFX8: [[ZEXT18:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO49]](s1)
+    ; GFX8: [[ADD22:%[0-9]+]]:_(s32) = G_ADD [[ZEXT17]], [[ZEXT18]]
+    ; GFX8: [[UADDO50:%[0-9]+]]:_(s32), [[UADDO51:%[0-9]+]]:_(s1) = G_UADDO [[UADDO48]], [[ADD21]]
+    ; GFX8: [[ZEXT19:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO51]](s1)
+    ; GFX8: [[ADD23:%[0-9]+]]:_(s32) = G_ADD [[ADD22]], [[ZEXT19]]
+    ; GFX8: [[UMULH19:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[ADD20]]
+    ; GFX8: [[ADD24:%[0-9]+]]:_(s32) = G_ADD [[UMULH19]], [[ADD23]]
+    ; GFX8: [[UADDO52:%[0-9]+]]:_(s32), [[UADDO53:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI2]], [[UADDO50]]
+    ; GFX8: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI3]], [[ADD24]], [[UADDO53]]
+    ; GFX8: [[ADD25:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI3]], [[ADD24]]
+    ; GFX8: [[MUL24:%[0-9]+]]:_(s32) = G_MUL [[USUBO10]], [[UADDO52]]
+    ; GFX8: [[MUL25:%[0-9]+]]:_(s32) = G_MUL [[USUBE14]], [[UADDO52]]
+    ; GFX8: [[MUL26:%[0-9]+]]:_(s32) = G_MUL [[USUBO10]], [[UADDE14]]
+    ; GFX8: [[UMULH20:%[0-9]+]]:_(s32) = G_UMULH [[USUBO10]], [[UADDO52]]
+    ; GFX8: [[ADD26:%[0-9]+]]:_(s32) = G_ADD [[MUL25]], [[MUL26]]
+    ; GFX8: [[ADD27:%[0-9]+]]:_(s32) = G_ADD [[ADD26]], [[UMULH20]]
+    ; GFX8: [[MUL27:%[0-9]+]]:_(s32) = G_MUL [[UADDE14]], [[MUL24]]
+    ; GFX8: [[MUL28:%[0-9]+]]:_(s32) = G_MUL [[UADDO52]], [[ADD27]]
+    ; GFX8: [[UMULH21:%[0-9]+]]:_(s32) = G_UMULH [[UADDO52]], [[MUL24]]
+    ; GFX8: [[UADDO54:%[0-9]+]]:_(s32), [[UADDO55:%[0-9]+]]:_(s1) = G_UADDO [[MUL27]], [[MUL28]]
+    ; GFX8: [[ZEXT20:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO55]](s1)
+    ; GFX8: [[UADDO56:%[0-9]+]]:_(s32), [[UADDO57:%[0-9]+]]:_(s1) = G_UADDO [[UADDO54]], [[UMULH21]]
+    ; GFX8: [[ZEXT21:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO57]](s1)
+    ; GFX8: [[ADD28:%[0-9]+]]:_(s32) = G_ADD [[ZEXT20]], [[ZEXT21]]
+    ; GFX8: [[MUL29:%[0-9]+]]:_(s32) = G_MUL [[UADDE14]], [[ADD27]]
+    ; GFX8: [[UMULH22:%[0-9]+]]:_(s32) = G_UMULH [[UADDE14]], [[MUL24]]
+    ; GFX8: [[UMULH23:%[0-9]+]]:_(s32) = G_UMULH [[UADDO52]], [[ADD27]]
+    ; GFX8: [[UADDO58:%[0-9]+]]:_(s32), [[UADDO59:%[0-9]+]]:_(s1) = G_UADDO [[MUL29]], [[UMULH22]]
+    ; GFX8: [[ZEXT22:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO59]](s1)
+    ; GFX8: [[UADDO60:%[0-9]+]]:_(s32), [[UADDO61:%[0-9]+]]:_(s1) = G_UADDO [[UADDO58]], [[UMULH23]]
+    ; GFX8: [[ZEXT23:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO61]](s1)
+    ; GFX8: [[ADD29:%[0-9]+]]:_(s32) = G_ADD [[ZEXT22]], [[ZEXT23]]
+    ; GFX8: [[UADDO62:%[0-9]+]]:_(s32), [[UADDO63:%[0-9]+]]:_(s1) = G_UADDO [[UADDO60]], [[ADD28]]
+    ; GFX8: [[ZEXT24:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO63]](s1)
+    ; GFX8: [[ADD30:%[0-9]+]]:_(s32) = G_ADD [[ADD29]], [[ZEXT24]]
+    ; GFX8: [[UMULH24:%[0-9]+]]:_(s32) = G_UMULH [[UADDE14]], [[ADD27]]
+    ; GFX8: [[ADD31:%[0-9]+]]:_(s32) = G_ADD [[UMULH24]], [[ADD30]]
+    ; GFX8: [[UADDO64:%[0-9]+]]:_(s32), [[UADDO65:%[0-9]+]]:_(s1) = G_UADDO [[UADDO52]], [[UADDO62]]
+    ; GFX8: [[UADDE16:%[0-9]+]]:_(s32), [[UADDE17:%[0-9]+]]:_(s1) = G_UADDE [[ADD25]], [[ADD31]], [[UADDO53]]
+    ; GFX8: [[UADDE18:%[0-9]+]]:_(s32), [[UADDE19:%[0-9]+]]:_(s1) = G_UADDE [[UADDE16]], [[C6]], [[UADDO65]]
+    ; GFX8: [[UV44:%[0-9]+]]:_(s32), [[UV45:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR3]](s64)
+    ; GFX8: [[UV46:%[0-9]+]]:_(s32), [[UV47:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR3]](s64)
+    ; GFX8: [[MUL30:%[0-9]+]]:_(s32) = G_MUL [[UV47]], [[UADDO64]]
+    ; GFX8: [[MUL31:%[0-9]+]]:_(s32) = G_MUL [[UV46]], [[UADDE18]]
+    ; GFX8: [[UMULH25:%[0-9]+]]:_(s32) = G_UMULH [[UV46]], [[UADDO64]]
+    ; GFX8: [[UADDO66:%[0-9]+]]:_(s32), [[UADDO67:%[0-9]+]]:_(s1) = G_UADDO [[MUL30]], [[MUL31]]
+    ; GFX8: [[ZEXT25:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO67]](s1)
+    ; GFX8: [[UADDO68:%[0-9]+]]:_(s32), [[UADDO69:%[0-9]+]]:_(s1) = G_UADDO [[UADDO66]], [[UMULH25]]
+    ; GFX8: [[ZEXT26:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO69]](s1)
+    ; GFX8: [[ADD32:%[0-9]+]]:_(s32) = G_ADD [[ZEXT25]], [[ZEXT26]]
+    ; GFX8: [[MUL32:%[0-9]+]]:_(s32) = G_MUL [[UV47]], [[UADDE18]]
+    ; GFX8: [[UMULH26:%[0-9]+]]:_(s32) = G_UMULH [[UV47]], [[UADDO64]]
+    ; GFX8: [[UMULH27:%[0-9]+]]:_(s32) = G_UMULH [[UV46]], [[UADDE18]]
+    ; GFX8: [[UADDO70:%[0-9]+]]:_(s32), [[UADDO71:%[0-9]+]]:_(s1) = G_UADDO [[MUL32]], [[UMULH26]]
+    ; GFX8: [[ZEXT27:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO71]](s1)
+    ; GFX8: [[UADDO72:%[0-9]+]]:_(s32), [[UADDO73:%[0-9]+]]:_(s1) = G_UADDO [[UADDO70]], [[UMULH27]]
+    ; GFX8: [[ZEXT28:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO73]](s1)
+    ; GFX8: [[ADD33:%[0-9]+]]:_(s32) = G_ADD [[ZEXT27]], [[ZEXT28]]
+    ; GFX8: [[UADDO74:%[0-9]+]]:_(s32), [[UADDO75:%[0-9]+]]:_(s1) = G_UADDO [[UADDO72]], [[ADD32]]
+    ; GFX8: [[ZEXT29:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO75]](s1)
+    ; GFX8: [[ADD34:%[0-9]+]]:_(s32) = G_ADD [[ADD33]], [[ZEXT29]]
+    ; GFX8: [[UMULH28:%[0-9]+]]:_(s32) = G_UMULH [[UV47]], [[UADDE18]]
+    ; GFX8: [[ADD35:%[0-9]+]]:_(s32) = G_ADD [[UMULH28]], [[ADD34]]
+    ; GFX8: [[UV48:%[0-9]+]]:_(s32), [[UV49:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR4]](s64)
+    ; GFX8: [[MUL33:%[0-9]+]]:_(s32) = G_MUL [[UV48]], [[UADDO74]]
+    ; GFX8: [[MUL34:%[0-9]+]]:_(s32) = G_MUL [[UV49]], [[UADDO74]]
+    ; GFX8: [[MUL35:%[0-9]+]]:_(s32) = G_MUL [[UV48]], [[ADD35]]
+    ; GFX8: [[UMULH29:%[0-9]+]]:_(s32) = G_UMULH [[UV48]], [[UADDO74]]
+    ; GFX8: [[ADD36:%[0-9]+]]:_(s32) = G_ADD [[MUL34]], [[MUL35]]
+    ; GFX8: [[ADD37:%[0-9]+]]:_(s32) = G_ADD [[ADD36]], [[UMULH29]]
+    ; GFX8: [[USUBO12:%[0-9]+]]:_(s32), [[USUBO13:%[0-9]+]]:_(s1) = G_USUBO [[UV44]], [[MUL33]]
+    ; GFX8: [[USUBE16:%[0-9]+]]:_(s32), [[USUBE17:%[0-9]+]]:_(s1) = G_USUBE [[UV45]], [[ADD37]], [[USUBO13]]
+    ; GFX8: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[UV45]], [[ADD37]]
+    ; GFX8: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO12]](s32), [[USUBE16]](s32)
+    ; GFX8: [[UV50:%[0-9]+]]:_(s32), [[UV51:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR4]](s64)
+    ; GFX8: [[ICMP8:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE16]](s32), [[UV51]]
+    ; GFX8: [[SEXT4:%[0-9]+]]:_(s32) = G_SEXT [[ICMP8]](s1)
+    ; GFX8: [[ICMP9:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO12]](s32), [[UV50]]
+    ; GFX8: [[SEXT5:%[0-9]+]]:_(s32) = G_SEXT [[ICMP9]](s1)
+    ; GFX8: [[ICMP10:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE16]](s32), [[UV51]]
+    ; GFX8: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP10]](s1), [[SEXT5]], [[SEXT4]]
+    ; GFX8: [[USUBO14:%[0-9]+]]:_(s32), [[USUBO15:%[0-9]+]]:_(s1) = G_USUBO [[USUBO12]], [[UV50]]
+    ; GFX8: [[USUBE18:%[0-9]+]]:_(s32), [[USUBE19:%[0-9]+]]:_(s1) = G_USUBE [[SUB1]], [[UV51]], [[USUBO13]]
+    ; GFX8: [[USUBE20:%[0-9]+]]:_(s32), [[USUBE21:%[0-9]+]]:_(s1) = G_USUBE [[USUBE18]], [[C6]], [[USUBO15]]
+    ; GFX8: [[MV9:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO14]](s32), [[USUBE20]](s32)
+    ; GFX8: [[ICMP11:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE20]](s32), [[UV51]]
+    ; GFX8: [[SEXT6:%[0-9]+]]:_(s32) = G_SEXT [[ICMP11]](s1)
+    ; GFX8: [[ICMP12:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO14]](s32), [[UV50]]
+    ; GFX8: [[SEXT7:%[0-9]+]]:_(s32) = G_SEXT [[ICMP12]](s1)
+    ; GFX8: [[ICMP13:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE20]](s32), [[UV51]]
+    ; GFX8: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP13]](s1), [[SEXT7]], [[SEXT6]]
+    ; GFX8: [[USUBO16:%[0-9]+]]:_(s32), [[USUBO17:%[0-9]+]]:_(s1) = G_USUBO [[USUBO14]], [[UV50]]
+    ; GFX8: [[USUBE22:%[0-9]+]]:_(s32), [[USUBE23:%[0-9]+]]:_(s1) = G_USUBE [[USUBE18]], [[UV51]], [[USUBO15]]
+    ; GFX8: [[USUBE24:%[0-9]+]]:_(s32), [[USUBE25:%[0-9]+]]:_(s1) = G_USUBE [[USUBE22]], [[C6]], [[USUBO17]]
+    ; GFX8: [[MV10:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO16]](s32), [[USUBE24]](s32)
+    ; GFX8: [[ICMP14:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT5]](s32), [[C6]]
+    ; GFX8: [[SELECT6:%[0-9]+]]:_(s64) = G_SELECT [[ICMP14]](s1), [[MV10]], [[MV9]]
+    ; GFX8: [[ICMP15:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT4]](s32), [[C6]]
+    ; GFX8: [[SELECT7:%[0-9]+]]:_(s64) = G_SELECT [[ICMP15]](s1), [[SELECT6]], [[MV8]]
+    ; GFX8: [[XOR5:%[0-9]+]]:_(s64) = G_XOR [[SELECT7]], [[ASHR2]]
+    ; GFX8: [[UV52:%[0-9]+]]:_(s32), [[UV53:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR5]](s64)
+    ; GFX8: [[UV54:%[0-9]+]]:_(s32), [[UV55:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR2]](s64)
+    ; GFX8: [[USUBO18:%[0-9]+]]:_(s32), [[USUBO19:%[0-9]+]]:_(s1) = G_USUBO [[UV52]], [[UV54]]
+    ; GFX8: [[USUBE26:%[0-9]+]]:_(s32), [[USUBE27:%[0-9]+]]:_(s1) = G_USUBE [[UV53]], [[UV55]], [[USUBO19]]
+    ; GFX8: [[MV11:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO18]](s32), [[USUBE26]](s32)
+    ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV5]](s64), [[MV11]](s64)
     ; GFX8: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>)
     ; GFX9-LABEL: name: test_srem_v2s64
     ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
     ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7
     ; GFX9: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>)
     ; GFX9: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>)
-    ; GFX9: [[SREM:%[0-9]+]]:_(s64) = G_SREM [[UV]], [[UV2]]
-    ; GFX9: [[SREM1:%[0-9]+]]:_(s64) = G_SREM [[UV1]], [[UV3]]
-    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SREM]](s64), [[SREM1]](s64)
+    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 63
+    ; GFX9: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[UV]], [[C]](s32)
+    ; GFX9: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[UV2]], [[C]](s32)
+    ; GFX9: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64)
+    ; GFX9: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64)
+    ; GFX9: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV4]], [[UV6]]
+    ; GFX9: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV5]], [[UV7]], [[UADDO1]]
+    ; GFX9: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32)
+    ; GFX9: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64)
+    ; GFX9: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR1]](s64)
+    ; GFX9: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UV8]], [[UV10]]
+    ; GFX9: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UV9]], [[UV11]], [[UADDO3]]
+    ; GFX9: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO2]](s32), [[UADDE2]](s32)
+    ; GFX9: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[MV]], [[ASHR]]
+    ; GFX9: [[XOR1:%[0-9]+]]:_(s64) = G_XOR [[MV1]], [[ASHR1]]
+    ; GFX9: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64)
+    ; GFX9: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV12]](s32)
+    ; GFX9: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV13]](s32)
+    ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000
+    ; GFX9: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP1]], [[C1]]
+    ; GFX9: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UITOFP]]
+    ; GFX9: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[FADD]](s32)
+    ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x43EFFFFF80000000
+    ; GFX9: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]]
+    ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000
+    ; GFX9: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FMUL1]], [[C3]]
+    ; GFX9: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL2]]
+    ; GFX9: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000
+    ; GFX9: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC]], [[C4]]
+    ; GFX9: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[FMUL1]]
+    ; GFX9: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FADD1]](s32)
+    ; GFX9: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC]](s32)
+    ; GFX9: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; GFX9: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C5]](s64)
+    ; GFX9: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64)
+    ; GFX9: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV14]], [[UV16]]
+    ; GFX9: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV15]], [[UV17]], [[USUBO1]]
+    ; GFX9: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI]]
+    ; GFX9: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]]
+    ; GFX9: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]]
+    ; GFX9: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[FPTOUI]]
+    ; GFX9: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]]
+    ; GFX9: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]]
+    ; GFX9: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[MUL]]
+    ; GFX9: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]]
+    ; GFX9: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]]
+    ; GFX9: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]]
+    ; GFX9: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1)
+    ; GFX9: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH1]]
+    ; GFX9: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1)
+    ; GFX9: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]]
+    ; GFX9: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]]
+    ; GFX9: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL]]
+    ; GFX9: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]]
+    ; GFX9: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]]
+    ; GFX9: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1)
+    ; GFX9: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UMULH3]]
+    ; GFX9: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO11]](s1)
+    ; GFX9: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]]
+    ; GFX9: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[ADD2]]
+    ; GFX9: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1)
+    ; GFX9: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]]
+    ; GFX9: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]]
+    ; GFX9: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[ADD4]]
+    ; GFX9: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO12]]
+    ; GFX9: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO15]]
+    ; GFX9: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[ADD5]]
+    ; GFX9: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO14]]
+    ; GFX9: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO14]]
+    ; GFX9: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE4]]
+    ; GFX9: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO14]]
+    ; GFX9: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]]
+    ; GFX9: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ADD7]], [[UMULH5]]
+    ; GFX9: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[MUL6]]
+    ; GFX9: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO14]], [[ADD8]]
+    ; GFX9: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[MUL6]]
+    ; GFX9: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]]
+    ; GFX9: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1)
+    ; GFX9: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH6]]
+    ; GFX9: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1)
+    ; GFX9: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]]
+    ; GFX9: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[ADD8]]
+    ; GFX9: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[MUL6]]
+    ; GFX9: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[ADD8]]
+    ; GFX9: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]]
+    ; GFX9: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1)
+    ; GFX9: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UMULH8]]
+    ; GFX9: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO23]](s1)
+    ; GFX9: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]]
+    ; GFX9: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[UADDO22]], [[ADD9]]
+    ; GFX9: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1)
+    ; GFX9: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[ADD10]], [[ZEXT9]]
+    ; GFX9: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[ADD8]]
+    ; GFX9: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[ADD11]]
+    ; GFX9: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; GFX9: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO14]], [[UADDO24]]
+    ; GFX9: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[ADD6]], [[ADD12]], [[UADDO15]]
+    ; GFX9: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDE6]], [[C6]], [[UADDO27]]
+    ; GFX9: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64)
+    ; GFX9: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64)
+    ; GFX9: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV21]], [[UADDO26]]
+    ; GFX9: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV20]], [[UADDE8]]
+    ; GFX9: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV20]], [[UADDO26]]
+    ; GFX9: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]]
+    ; GFX9: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1)
+    ; GFX9: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH10]]
+    ; GFX9: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1)
+    ; GFX9: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]]
+    ; GFX9: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV21]], [[UADDE8]]
+    ; GFX9: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV21]], [[UADDO26]]
+    ; GFX9: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV20]], [[UADDE8]]
+    ; GFX9: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]]
+    ; GFX9: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1)
+    ; GFX9: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UMULH12]]
+    ; GFX9: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO35]](s1)
+    ; GFX9: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]]
+    ; GFX9: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[ADD13]]
+    ; GFX9: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO37]](s1)
+    ; GFX9: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[ADD14]], [[ZEXT14]]
+    ; GFX9: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV21]], [[UADDE8]]
+    ; GFX9: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[ADD15]]
+    ; GFX9: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64)
+    ; GFX9: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV22]], [[UADDO36]]
+    ; GFX9: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV23]], [[UADDO36]]
+    ; GFX9: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV22]], [[ADD16]]
+    ; GFX9: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV22]], [[UADDO36]]
+    ; GFX9: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]]
+    ; GFX9: [[ADD18:%[0-9]+]]:_(s32) = G_ADD [[ADD17]], [[UMULH14]]
+    ; GFX9: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV18]], [[MUL15]]
+    ; GFX9: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV19]], [[ADD18]], [[USUBO3]]
+    ; GFX9: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV19]], [[ADD18]]
+    ; GFX9: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO2]](s32), [[USUBE2]](s32)
+    ; GFX9: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64)
+    ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV25]]
+    ; GFX9: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1)
+    ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV24]]
+    ; GFX9: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[ICMP1]](s1)
+    ; GFX9: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV25]]
+    ; GFX9: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SEXT1]], [[SEXT]]
+    ; GFX9: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV24]]
+    ; GFX9: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV25]], [[USUBO3]]
+    ; GFX9: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C6]], [[USUBO5]]
+    ; GFX9: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO4]](s32), [[USUBE6]](s32)
+    ; GFX9: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV25]]
+    ; GFX9: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1)
+    ; GFX9: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV24]]
+    ; GFX9: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT [[ICMP4]](s1)
+    ; GFX9: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV25]]
+    ; GFX9: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]]
+    ; GFX9: [[USUBO6:%[0-9]+]]:_(s32), [[USUBO7:%[0-9]+]]:_(s1) = G_USUBO [[USUBO4]], [[UV24]]
+    ; GFX9: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[UV25]], [[USUBO5]]
+    ; GFX9: [[USUBE10:%[0-9]+]]:_(s32), [[USUBE11:%[0-9]+]]:_(s1) = G_USUBE [[USUBE8]], [[C6]], [[USUBO7]]
+    ; GFX9: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO6]](s32), [[USUBE10]](s32)
+    ; GFX9: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C6]]
+    ; GFX9: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV4]], [[MV3]]
+    ; GFX9: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C6]]
+    ; GFX9: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV2]]
+    ; GFX9: [[XOR2:%[0-9]+]]:_(s64) = G_XOR [[SELECT3]], [[ASHR]]
+    ; GFX9: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR2]](s64)
+    ; GFX9: [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64)
+    ; GFX9: [[USUBO8:%[0-9]+]]:_(s32), [[USUBO9:%[0-9]+]]:_(s1) = G_USUBO [[UV26]], [[UV28]]
+    ; GFX9: [[USUBE12:%[0-9]+]]:_(s32), [[USUBE13:%[0-9]+]]:_(s1) = G_USUBE [[UV27]], [[UV29]], [[USUBO9]]
+    ; GFX9: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO8]](s32), [[USUBE12]](s32)
+    ; GFX9: [[ASHR2:%[0-9]+]]:_(s64) = G_ASHR [[UV1]], [[C]](s32)
+    ; GFX9: [[ASHR3:%[0-9]+]]:_(s64) = G_ASHR [[UV3]], [[C]](s32)
+    ; GFX9: [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64)
+    ; GFX9: [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR2]](s64)
+    ; GFX9: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[UV30]], [[UV32]]
+    ; GFX9: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UV31]], [[UV33]], [[UADDO39]]
+    ; GFX9: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO38]](s32), [[UADDE10]](s32)
+    ; GFX9: [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64)
+    ; GFX9: [[UV36:%[0-9]+]]:_(s32), [[UV37:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR3]](s64)
+    ; GFX9: [[UADDO40:%[0-9]+]]:_(s32), [[UADDO41:%[0-9]+]]:_(s1) = G_UADDO [[UV34]], [[UV36]]
+    ; GFX9: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[UV35]], [[UV37]], [[UADDO41]]
+    ; GFX9: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO40]](s32), [[UADDE12]](s32)
+    ; GFX9: [[XOR3:%[0-9]+]]:_(s64) = G_XOR [[MV6]], [[ASHR2]]
+    ; GFX9: [[XOR4:%[0-9]+]]:_(s64) = G_XOR [[MV7]], [[ASHR3]]
+    ; GFX9: [[UV38:%[0-9]+]]:_(s32), [[UV39:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR4]](s64)
+    ; GFX9: [[UITOFP2:%[0-9]+]]:_(s32) = G_UITOFP [[UV38]](s32)
+    ; GFX9: [[UITOFP3:%[0-9]+]]:_(s32) = G_UITOFP [[UV39]](s32)
+    ; GFX9: [[C7:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000
+    ; GFX9: [[FMUL4:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP3]], [[C7]]
+    ; GFX9: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FMUL4]], [[UITOFP2]]
+    ; GFX9: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[FADD2]](s32)
+    ; GFX9: [[C8:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x43EFFFFF80000000
+    ; GFX9: [[FMUL5:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C8]]
+    ; GFX9: [[C9:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000
+    ; GFX9: [[FMUL6:%[0-9]+]]:_(s32) = G_FMUL [[FMUL5]], [[C9]]
+    ; GFX9: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL6]]
+    ; GFX9: [[C10:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000
+    ; GFX9: [[FMUL7:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC1]], [[C10]]
+    ; GFX9: [[FADD3:%[0-9]+]]:_(s32) = G_FADD [[FMUL7]], [[FMUL5]]
+    ; GFX9: [[FPTOUI2:%[0-9]+]]:_(s32) = G_FPTOUI [[FADD3]](s32)
+    ; GFX9: [[FPTOUI3:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC1]](s32)
+    ; GFX9: [[UV40:%[0-9]+]]:_(s32), [[UV41:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C5]](s64)
+    ; GFX9: [[UV42:%[0-9]+]]:_(s32), [[UV43:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR4]](s64)
+    ; GFX9: [[USUBO10:%[0-9]+]]:_(s32), [[USUBO11:%[0-9]+]]:_(s1) = G_USUBO [[UV40]], [[UV42]]
+    ; GFX9: [[USUBE14:%[0-9]+]]:_(s32), [[USUBE15:%[0-9]+]]:_(s1) = G_USUBE [[UV41]], [[UV43]], [[USUBO11]]
+    ; GFX9: [[MUL18:%[0-9]+]]:_(s32) = G_MUL [[USUBO10]], [[FPTOUI2]]
+    ; GFX9: [[MUL19:%[0-9]+]]:_(s32) = G_MUL [[USUBE14]], [[FPTOUI2]]
+    ; GFX9: [[MUL20:%[0-9]+]]:_(s32) = G_MUL [[USUBO10]], [[FPTOUI3]]
+    ; GFX9: [[UMULH15:%[0-9]+]]:_(s32) = G_UMULH [[USUBO10]], [[FPTOUI2]]
+    ; GFX9: [[ADD19:%[0-9]+]]:_(s32) = G_ADD [[MUL19]], [[MUL20]]
+    ; GFX9: [[ADD20:%[0-9]+]]:_(s32) = G_ADD [[ADD19]], [[UMULH15]]
+    ; GFX9: [[MUL21:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[MUL18]]
+    ; GFX9: [[MUL22:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI2]], [[ADD20]]
+    ; GFX9: [[UMULH16:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[MUL18]]
+    ; GFX9: [[UADDO42:%[0-9]+]]:_(s32), [[UADDO43:%[0-9]+]]:_(s1) = G_UADDO [[MUL21]], [[MUL22]]
+    ; GFX9: [[ZEXT15:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO43]](s1)
+    ; GFX9: [[UADDO44:%[0-9]+]]:_(s32), [[UADDO45:%[0-9]+]]:_(s1) = G_UADDO [[UADDO42]], [[UMULH16]]
+    ; GFX9: [[ZEXT16:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO45]](s1)
+    ; GFX9: [[ADD21:%[0-9]+]]:_(s32) = G_ADD [[ZEXT15]], [[ZEXT16]]
+    ; GFX9: [[MUL23:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[ADD20]]
+    ; GFX9: [[UMULH17:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[MUL18]]
+    ; GFX9: [[UMULH18:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[ADD20]]
+    ; GFX9: [[UADDO46:%[0-9]+]]:_(s32), [[UADDO47:%[0-9]+]]:_(s1) = G_UADDO [[MUL23]], [[UMULH17]]
+    ; GFX9: [[ZEXT17:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO47]](s1)
+    ; GFX9: [[UADDO48:%[0-9]+]]:_(s32), [[UADDO49:%[0-9]+]]:_(s1) = G_UADDO [[UADDO46]], [[UMULH18]]
+    ; GFX9: [[ZEXT18:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO49]](s1)
+    ; GFX9: [[ADD22:%[0-9]+]]:_(s32) = G_ADD [[ZEXT17]], [[ZEXT18]]
+    ; GFX9: [[UADDO50:%[0-9]+]]:_(s32), [[UADDO51:%[0-9]+]]:_(s1) = G_UADDO [[UADDO48]], [[ADD21]]
+    ; GFX9: [[ZEXT19:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO51]](s1)
+    ; GFX9: [[ADD23:%[0-9]+]]:_(s32) = G_ADD [[ADD22]], [[ZEXT19]]
+    ; GFX9: [[UMULH19:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[ADD20]]
+    ; GFX9: [[ADD24:%[0-9]+]]:_(s32) = G_ADD [[UMULH19]], [[ADD23]]
+    ; GFX9: [[UADDO52:%[0-9]+]]:_(s32), [[UADDO53:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI2]], [[UADDO50]]
+    ; GFX9: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI3]], [[ADD24]], [[UADDO53]]
+    ; GFX9: [[ADD25:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI3]], [[ADD24]]
+    ; GFX9: [[MUL24:%[0-9]+]]:_(s32) = G_MUL [[USUBO10]], [[UADDO52]]
+    ; GFX9: [[MUL25:%[0-9]+]]:_(s32) = G_MUL [[USUBE14]], [[UADDO52]]
+    ; GFX9: [[MUL26:%[0-9]+]]:_(s32) = G_MUL [[USUBO10]], [[UADDE14]]
+    ; GFX9: [[UMULH20:%[0-9]+]]:_(s32) = G_UMULH [[USUBO10]], [[UADDO52]]
+    ; GFX9: [[ADD26:%[0-9]+]]:_(s32) = G_ADD [[MUL25]], [[MUL26]]
+    ; GFX9: [[ADD27:%[0-9]+]]:_(s32) = G_ADD [[ADD26]], [[UMULH20]]
+    ; GFX9: [[MUL27:%[0-9]+]]:_(s32) = G_MUL [[UADDE14]], [[MUL24]]
+    ; GFX9: [[MUL28:%[0-9]+]]:_(s32) = G_MUL [[UADDO52]], [[ADD27]]
+    ; GFX9: [[UMULH21:%[0-9]+]]:_(s32) = G_UMULH [[UADDO52]], [[MUL24]]
+    ; GFX9: [[UADDO54:%[0-9]+]]:_(s32), [[UADDO55:%[0-9]+]]:_(s1) = G_UADDO [[MUL27]], [[MUL28]]
+    ; GFX9: [[ZEXT20:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO55]](s1)
+    ; GFX9: [[UADDO56:%[0-9]+]]:_(s32), [[UADDO57:%[0-9]+]]:_(s1) = G_UADDO [[UADDO54]], [[UMULH21]]
+    ; GFX9: [[ZEXT21:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO57]](s1)
+    ; GFX9: [[ADD28:%[0-9]+]]:_(s32) = G_ADD [[ZEXT20]], [[ZEXT21]]
+    ; GFX9: [[MUL29:%[0-9]+]]:_(s32) = G_MUL [[UADDE14]], [[ADD27]]
+    ; GFX9: [[UMULH22:%[0-9]+]]:_(s32) = G_UMULH [[UADDE14]], [[MUL24]]
+    ; GFX9: [[UMULH23:%[0-9]+]]:_(s32) = G_UMULH [[UADDO52]], [[ADD27]]
+    ; GFX9: [[UADDO58:%[0-9]+]]:_(s32), [[UADDO59:%[0-9]+]]:_(s1) = G_UADDO [[MUL29]], [[UMULH22]]
+    ; GFX9: [[ZEXT22:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO59]](s1)
+    ; GFX9: [[UADDO60:%[0-9]+]]:_(s32), [[UADDO61:%[0-9]+]]:_(s1) = G_UADDO [[UADDO58]], [[UMULH23]]
+    ; GFX9: [[ZEXT23:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO61]](s1)
+    ; GFX9: [[ADD29:%[0-9]+]]:_(s32) = G_ADD [[ZEXT22]], [[ZEXT23]]
+    ; GFX9: [[UADDO62:%[0-9]+]]:_(s32), [[UADDO63:%[0-9]+]]:_(s1) = G_UADDO [[UADDO60]], [[ADD28]]
+    ; GFX9: [[ZEXT24:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO63]](s1)
+    ; GFX9: [[ADD30:%[0-9]+]]:_(s32) = G_ADD [[ADD29]], [[ZEXT24]]
+    ; GFX9: [[UMULH24:%[0-9]+]]:_(s32) = G_UMULH [[UADDE14]], [[ADD27]]
+    ; GFX9: [[ADD31:%[0-9]+]]:_(s32) = G_ADD [[UMULH24]], [[ADD30]]
+    ; GFX9: [[UADDO64:%[0-9]+]]:_(s32), [[UADDO65:%[0-9]+]]:_(s1) = G_UADDO [[UADDO52]], [[UADDO62]]
+    ; GFX9: [[UADDE16:%[0-9]+]]:_(s32), [[UADDE17:%[0-9]+]]:_(s1) = G_UADDE [[ADD25]], [[ADD31]], [[UADDO53]]
+    ; GFX9: [[UADDE18:%[0-9]+]]:_(s32), [[UADDE19:%[0-9]+]]:_(s1) = G_UADDE [[UADDE16]], [[C6]], [[UADDO65]]
+    ; GFX9: [[UV44:%[0-9]+]]:_(s32), [[UV45:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR3]](s64)
+    ; GFX9: [[UV46:%[0-9]+]]:_(s32), [[UV47:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR3]](s64)
+    ; GFX9: [[MUL30:%[0-9]+]]:_(s32) = G_MUL [[UV47]], [[UADDO64]]
+    ; GFX9: [[MUL31:%[0-9]+]]:_(s32) = G_MUL [[UV46]], [[UADDE18]]
+    ; GFX9: [[UMULH25:%[0-9]+]]:_(s32) = G_UMULH [[UV46]], [[UADDO64]]
+    ; GFX9: [[UADDO66:%[0-9]+]]:_(s32), [[UADDO67:%[0-9]+]]:_(s1) = G_UADDO [[MUL30]], [[MUL31]]
+    ; GFX9: [[ZEXT25:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO67]](s1)
+    ; GFX9: [[UADDO68:%[0-9]+]]:_(s32), [[UADDO69:%[0-9]+]]:_(s1) = G_UADDO [[UADDO66]], [[UMULH25]]
+    ; GFX9: [[ZEXT26:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO69]](s1)
+    ; GFX9: [[ADD32:%[0-9]+]]:_(s32) = G_ADD [[ZEXT25]], [[ZEXT26]]
+    ; GFX9: [[MUL32:%[0-9]+]]:_(s32) = G_MUL [[UV47]], [[UADDE18]]
+    ; GFX9: [[UMULH26:%[0-9]+]]:_(s32) = G_UMULH [[UV47]], [[UADDO64]]
+    ; GFX9: [[UMULH27:%[0-9]+]]:_(s32) = G_UMULH [[UV46]], [[UADDE18]]
+    ; GFX9: [[UADDO70:%[0-9]+]]:_(s32), [[UADDO71:%[0-9]+]]:_(s1) = G_UADDO [[MUL32]], [[UMULH26]]
+    ; GFX9: [[ZEXT27:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO71]](s1)
+    ; GFX9: [[UADDO72:%[0-9]+]]:_(s32), [[UADDO73:%[0-9]+]]:_(s1) = G_UADDO [[UADDO70]], [[UMULH27]]
+    ; GFX9: [[ZEXT28:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO73]](s1)
+    ; GFX9: [[ADD33:%[0-9]+]]:_(s32) = G_ADD [[ZEXT27]], [[ZEXT28]]
+    ; GFX9: [[UADDO74:%[0-9]+]]:_(s32), [[UADDO75:%[0-9]+]]:_(s1) = G_UADDO [[UADDO72]], [[ADD32]]
+    ; GFX9: [[ZEXT29:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO75]](s1)
+    ; GFX9: [[ADD34:%[0-9]+]]:_(s32) = G_ADD [[ADD33]], [[ZEXT29]]
+    ; GFX9: [[UMULH28:%[0-9]+]]:_(s32) = G_UMULH [[UV47]], [[UADDE18]]
+    ; GFX9: [[ADD35:%[0-9]+]]:_(s32) = G_ADD [[UMULH28]], [[ADD34]]
+    ; GFX9: [[UV48:%[0-9]+]]:_(s32), [[UV49:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR4]](s64)
+    ; GFX9: [[MUL33:%[0-9]+]]:_(s32) = G_MUL [[UV48]], [[UADDO74]]
+    ; GFX9: [[MUL34:%[0-9]+]]:_(s32) = G_MUL [[UV49]], [[UADDO74]]
+    ; GFX9: [[MUL35:%[0-9]+]]:_(s32) = G_MUL [[UV48]], [[ADD35]]
+    ; GFX9: [[UMULH29:%[0-9]+]]:_(s32) = G_UMULH [[UV48]], [[UADDO74]]
+    ; GFX9: [[ADD36:%[0-9]+]]:_(s32) = G_ADD [[MUL34]], [[MUL35]]
+    ; GFX9: [[ADD37:%[0-9]+]]:_(s32) = G_ADD [[ADD36]], [[UMULH29]]
+    ; GFX9: [[USUBO12:%[0-9]+]]:_(s32), [[USUBO13:%[0-9]+]]:_(s1) = G_USUBO [[UV44]], [[MUL33]]
+    ; GFX9: [[USUBE16:%[0-9]+]]:_(s32), [[USUBE17:%[0-9]+]]:_(s1) = G_USUBE [[UV45]], [[ADD37]], [[USUBO13]]
+    ; GFX9: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[UV45]], [[ADD37]]
+    ; GFX9: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO12]](s32), [[USUBE16]](s32)
+    ; GFX9: [[UV50:%[0-9]+]]:_(s32), [[UV51:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR4]](s64)
+    ; GFX9: [[ICMP8:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE16]](s32), [[UV51]]
+    ; GFX9: [[SEXT4:%[0-9]+]]:_(s32) = G_SEXT [[ICMP8]](s1)
+    ; GFX9: [[ICMP9:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO12]](s32), [[UV50]]
+    ; GFX9: [[SEXT5:%[0-9]+]]:_(s32) = G_SEXT [[ICMP9]](s1)
+    ; GFX9: [[ICMP10:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE16]](s32), [[UV51]]
+    ; GFX9: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP10]](s1), [[SEXT5]], [[SEXT4]]
+    ; GFX9: [[USUBO14:%[0-9]+]]:_(s32), [[USUBO15:%[0-9]+]]:_(s1) = G_USUBO [[USUBO12]], [[UV50]]
+    ; GFX9: [[USUBE18:%[0-9]+]]:_(s32), [[USUBE19:%[0-9]+]]:_(s1) = G_USUBE [[SUB1]], [[UV51]], [[USUBO13]]
+    ; GFX9: [[USUBE20:%[0-9]+]]:_(s32), [[USUBE21:%[0-9]+]]:_(s1) = G_USUBE [[USUBE18]], [[C6]], [[USUBO15]]
+    ; GFX9: [[MV9:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO14]](s32), [[USUBE20]](s32)
+    ; GFX9: [[ICMP11:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE20]](s32), [[UV51]]
+    ; GFX9: [[SEXT6:%[0-9]+]]:_(s32) = G_SEXT [[ICMP11]](s1)
+    ; GFX9: [[ICMP12:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO14]](s32), [[UV50]]
+    ; GFX9: [[SEXT7:%[0-9]+]]:_(s32) = G_SEXT [[ICMP12]](s1)
+    ; GFX9: [[ICMP13:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE20]](s32), [[UV51]]
+    ; GFX9: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP13]](s1), [[SEXT7]], [[SEXT6]]
+    ; GFX9: [[USUBO16:%[0-9]+]]:_(s32), [[USUBO17:%[0-9]+]]:_(s1) = G_USUBO [[USUBO14]], [[UV50]]
+    ; GFX9: [[USUBE22:%[0-9]+]]:_(s32), [[USUBE23:%[0-9]+]]:_(s1) = G_USUBE [[USUBE18]], [[UV51]], [[USUBO15]]
+    ; GFX9: [[USUBE24:%[0-9]+]]:_(s32), [[USUBE25:%[0-9]+]]:_(s1) = G_USUBE [[USUBE22]], [[C6]], [[USUBO17]]
+    ; GFX9: [[MV10:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO16]](s32), [[USUBE24]](s32)
+    ; GFX9: [[ICMP14:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT5]](s32), [[C6]]
+    ; GFX9: [[SELECT6:%[0-9]+]]:_(s64) = G_SELECT [[ICMP14]](s1), [[MV10]], [[MV9]]
+    ; GFX9: [[ICMP15:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT4]](s32), [[C6]]
+    ; GFX9: [[SELECT7:%[0-9]+]]:_(s64) = G_SELECT [[ICMP15]](s1), [[SELECT6]], [[MV8]]
+    ; GFX9: [[XOR5:%[0-9]+]]:_(s64) = G_XOR [[SELECT7]], [[ASHR2]]
+    ; GFX9: [[UV52:%[0-9]+]]:_(s32), [[UV53:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR5]](s64)
+    ; GFX9: [[UV54:%[0-9]+]]:_(s32), [[UV55:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR2]](s64)
+    ; GFX9: [[USUBO18:%[0-9]+]]:_(s32), [[USUBO19:%[0-9]+]]:_(s1) = G_USUBO [[UV52]], [[UV54]]
+    ; GFX9: [[USUBE26:%[0-9]+]]:_(s32), [[USUBE27:%[0-9]+]]:_(s1) = G_USUBE [[UV53]], [[UV55]], [[USUBO19]]
+    ; GFX9: [[MV11:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO18]](s32), [[USUBE26]](s32)
+    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV5]](s64), [[MV11]](s64)
     ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>)
     %0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
     %1:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7
@@ -1153,36 +2602,519 @@ body: |
     ; GFX6-LABEL: name: test_srem_s33
     ; GFX6: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
     ; GFX6: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
-    ; GFX6: [[TRUNC:%[0-9]+]]:_(s33) = G_TRUNC [[COPY]](s64)
-    ; GFX6: [[TRUNC1:%[0-9]+]]:_(s33) = G_TRUNC [[COPY1]](s64)
-    ; GFX6: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[TRUNC]](s33)
-    ; GFX6: [[SEXT1:%[0-9]+]]:_(s64) = G_SEXT [[TRUNC1]](s33)
-    ; GFX6: [[SREM:%[0-9]+]]:_(s64) = G_SREM [[SEXT]], [[SEXT1]]
-    ; GFX6: [[TRUNC2:%[0-9]+]]:_(s33) = G_TRUNC [[SREM]](s64)
-    ; GFX6: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[TRUNC2]](s33)
-    ; GFX6: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
+    ; GFX6: [[COPY2:%[0-9]+]]:_(s64) = COPY [[COPY]](s64)
+    ; GFX6: [[SEXT_INREG:%[0-9]+]]:_(s64) = G_SEXT_INREG [[COPY2]], 33
+    ; GFX6: [[COPY3:%[0-9]+]]:_(s64) = COPY [[COPY1]](s64)
+    ; GFX6: [[SEXT_INREG1:%[0-9]+]]:_(s64) = G_SEXT_INREG [[COPY3]], 33
+    ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 63
+    ; GFX6: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[SEXT_INREG]], [[C]](s32)
+    ; GFX6: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[SEXT_INREG1]], [[C]](s32)
+    ; GFX6: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SEXT_INREG]](s64)
+    ; GFX6: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64)
+    ; GFX6: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV]], [[UV2]]
+    ; GFX6: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV1]], [[UV3]], [[UADDO1]]
+    ; GFX6: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32)
+    ; GFX6: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SEXT_INREG1]](s64)
+    ; GFX6: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR1]](s64)
+    ; GFX6: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UV4]], [[UV6]]
+    ; GFX6: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UV5]], [[UV7]], [[UADDO3]]
+    ; GFX6: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO2]](s32), [[UADDE2]](s32)
+    ; GFX6: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[MV]], [[ASHR]]
+    ; GFX6: [[XOR1:%[0-9]+]]:_(s64) = G_XOR [[MV1]], [[ASHR1]]
+    ; GFX6: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64)
+    ; GFX6: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV8]](s32)
+    ; GFX6: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV9]](s32)
+    ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000
+    ; GFX6: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP1]], [[C1]]
+    ; GFX6: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UITOFP]]
+    ; GFX6: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[FADD]](s32)
+    ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x43EFFFFF80000000
+    ; GFX6: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]]
+    ; GFX6: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000
+    ; GFX6: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FMUL1]], [[C3]]
+    ; GFX6: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL2]]
+    ; GFX6: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000
+    ; GFX6: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC]], [[C4]]
+    ; GFX6: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[FMUL1]]
+    ; GFX6: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FADD1]](s32)
+    ; GFX6: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC]](s32)
+    ; GFX6: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; GFX6: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C5]](s64)
+    ; GFX6: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64)
+    ; GFX6: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV10]], [[UV12]]
+    ; GFX6: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV11]], [[UV13]], [[USUBO1]]
+    ; GFX6: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI]]
+    ; GFX6: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]]
+    ; GFX6: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]]
+    ; GFX6: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[FPTOUI]]
+    ; GFX6: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]]
+    ; GFX6: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]]
+    ; GFX6: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[MUL]]
+    ; GFX6: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]]
+    ; GFX6: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]]
+    ; GFX6: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]]
+    ; GFX6: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1)
+    ; GFX6: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH1]]
+    ; GFX6: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1)
+    ; GFX6: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]]
+    ; GFX6: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]]
+    ; GFX6: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL]]
+    ; GFX6: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]]
+    ; GFX6: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]]
+    ; GFX6: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1)
+    ; GFX6: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UMULH3]]
+    ; GFX6: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO11]](s1)
+    ; GFX6: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]]
+    ; GFX6: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[ADD2]]
+    ; GFX6: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1)
+    ; GFX6: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]]
+    ; GFX6: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]]
+    ; GFX6: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[ADD4]]
+    ; GFX6: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO12]]
+    ; GFX6: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO15]]
+    ; GFX6: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[ADD5]]
+    ; GFX6: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO14]]
+    ; GFX6: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO14]]
+    ; GFX6: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE4]]
+    ; GFX6: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO14]]
+    ; GFX6: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]]
+    ; GFX6: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ADD7]], [[UMULH5]]
+    ; GFX6: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[MUL6]]
+    ; GFX6: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO14]], [[ADD8]]
+    ; GFX6: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[MUL6]]
+    ; GFX6: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]]
+    ; GFX6: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1)
+    ; GFX6: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH6]]
+    ; GFX6: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1)
+    ; GFX6: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]]
+    ; GFX6: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[ADD8]]
+    ; GFX6: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[MUL6]]
+    ; GFX6: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[ADD8]]
+    ; GFX6: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]]
+    ; GFX6: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1)
+    ; GFX6: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UMULH8]]
+    ; GFX6: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO23]](s1)
+    ; GFX6: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]]
+    ; GFX6: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[UADDO22]], [[ADD9]]
+    ; GFX6: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1)
+    ; GFX6: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[ADD10]], [[ZEXT9]]
+    ; GFX6: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[ADD8]]
+    ; GFX6: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[ADD11]]
+    ; GFX6: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; GFX6: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO14]], [[UADDO24]]
+    ; GFX6: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[ADD6]], [[ADD12]], [[UADDO15]]
+    ; GFX6: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDE6]], [[C6]], [[UADDO27]]
+    ; GFX6: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64)
+    ; GFX6: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64)
+    ; GFX6: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDO26]]
+    ; GFX6: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV16]], [[UADDE8]]
+    ; GFX6: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDO26]]
+    ; GFX6: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]]
+    ; GFX6: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1)
+    ; GFX6: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH10]]
+    ; GFX6: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1)
+    ; GFX6: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]]
+    ; GFX6: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDE8]]
+    ; GFX6: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDO26]]
+    ; GFX6: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDE8]]
+    ; GFX6: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]]
+    ; GFX6: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1)
+    ; GFX6: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UMULH12]]
+    ; GFX6: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO35]](s1)
+    ; GFX6: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]]
+    ; GFX6: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[ADD13]]
+    ; GFX6: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO37]](s1)
+    ; GFX6: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[ADD14]], [[ZEXT14]]
+    ; GFX6: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDE8]]
+    ; GFX6: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[ADD15]]
+    ; GFX6: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64)
+    ; GFX6: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV18]], [[UADDO36]]
+    ; GFX6: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV19]], [[UADDO36]]
+    ; GFX6: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV18]], [[ADD16]]
+    ; GFX6: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV18]], [[UADDO36]]
+    ; GFX6: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]]
+    ; GFX6: [[ADD18:%[0-9]+]]:_(s32) = G_ADD [[ADD17]], [[UMULH14]]
+    ; GFX6: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV14]], [[MUL15]]
+    ; GFX6: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV15]], [[ADD18]], [[USUBO3]]
+    ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV15]], [[ADD18]]
+    ; GFX6: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO2]](s32), [[USUBE2]](s32)
+    ; GFX6: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64)
+    ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV21]]
+    ; GFX6: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1)
+    ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV20]]
+    ; GFX6: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[ICMP1]](s1)
+    ; GFX6: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV21]]
+    ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SEXT1]], [[SEXT]]
+    ; GFX6: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV20]]
+    ; GFX6: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV21]], [[USUBO3]]
+    ; GFX6: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C6]], [[USUBO5]]
+    ; GFX6: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO4]](s32), [[USUBE6]](s32)
+    ; GFX6: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV21]]
+    ; GFX6: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1)
+    ; GFX6: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV20]]
+    ; GFX6: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT [[ICMP4]](s1)
+    ; GFX6: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV21]]
+    ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]]
+    ; GFX6: [[USUBO6:%[0-9]+]]:_(s32), [[USUBO7:%[0-9]+]]:_(s1) = G_USUBO [[USUBO4]], [[UV20]]
+    ; GFX6: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[UV21]], [[USUBO5]]
+    ; GFX6: [[USUBE10:%[0-9]+]]:_(s32), [[USUBE11:%[0-9]+]]:_(s1) = G_USUBE [[USUBE8]], [[C6]], [[USUBO7]]
+    ; GFX6: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO6]](s32), [[USUBE10]](s32)
+    ; GFX6: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C6]]
+    ; GFX6: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV4]], [[MV3]]
+    ; GFX6: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C6]]
+    ; GFX6: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV2]]
+    ; GFX6: [[XOR2:%[0-9]+]]:_(s64) = G_XOR [[SELECT3]], [[ASHR]]
+    ; GFX6: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR2]](s64)
+    ; GFX6: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64)
+    ; GFX6: [[USUBO8:%[0-9]+]]:_(s32), [[USUBO9:%[0-9]+]]:_(s1) = G_USUBO [[UV22]], [[UV24]]
+    ; GFX6: [[USUBE12:%[0-9]+]]:_(s32), [[USUBE13:%[0-9]+]]:_(s1) = G_USUBE [[UV23]], [[UV25]], [[USUBO9]]
+    ; GFX6: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO8]](s32), [[USUBE12]](s32)
+    ; GFX6: [[COPY4:%[0-9]+]]:_(s64) = COPY [[MV5]](s64)
+    ; GFX6: $vgpr0_vgpr1 = COPY [[COPY4]](s64)
     ; GFX8-LABEL: name: test_srem_s33
     ; GFX8: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
     ; GFX8: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
-    ; GFX8: [[TRUNC:%[0-9]+]]:_(s33) = G_TRUNC [[COPY]](s64)
-    ; GFX8: [[TRUNC1:%[0-9]+]]:_(s33) = G_TRUNC [[COPY1]](s64)
-    ; GFX8: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[TRUNC]](s33)
-    ; GFX8: [[SEXT1:%[0-9]+]]:_(s64) = G_SEXT [[TRUNC1]](s33)
-    ; GFX8: [[SREM:%[0-9]+]]:_(s64) = G_SREM [[SEXT]], [[SEXT1]]
-    ; GFX8: [[TRUNC2:%[0-9]+]]:_(s33) = G_TRUNC [[SREM]](s64)
-    ; GFX8: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[TRUNC2]](s33)
-    ; GFX8: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
+    ; GFX8: [[COPY2:%[0-9]+]]:_(s64) = COPY [[COPY]](s64)
+    ; GFX8: [[SEXT_INREG:%[0-9]+]]:_(s64) = G_SEXT_INREG [[COPY2]], 33
+    ; GFX8: [[COPY3:%[0-9]+]]:_(s64) = COPY [[COPY1]](s64)
+    ; GFX8: [[SEXT_INREG1:%[0-9]+]]:_(s64) = G_SEXT_INREG [[COPY3]], 33
+    ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 63
+    ; GFX8: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[SEXT_INREG]], [[C]](s32)
+    ; GFX8: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[SEXT_INREG1]], [[C]](s32)
+    ; GFX8: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SEXT_INREG]](s64)
+    ; GFX8: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64)
+    ; GFX8: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV]], [[UV2]]
+    ; GFX8: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV1]], [[UV3]], [[UADDO1]]
+    ; GFX8: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32)
+    ; GFX8: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SEXT_INREG1]](s64)
+    ; GFX8: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR1]](s64)
+    ; GFX8: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UV4]], [[UV6]]
+    ; GFX8: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UV5]], [[UV7]], [[UADDO3]]
+    ; GFX8: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO2]](s32), [[UADDE2]](s32)
+    ; GFX8: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[MV]], [[ASHR]]
+    ; GFX8: [[XOR1:%[0-9]+]]:_(s64) = G_XOR [[MV1]], [[ASHR1]]
+    ; GFX8: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64)
+    ; GFX8: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV8]](s32)
+    ; GFX8: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV9]](s32)
+    ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000
+    ; GFX8: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP1]], [[C1]]
+    ; GFX8: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UITOFP]]
+    ; GFX8: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[FADD]](s32)
+    ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x43EFFFFF80000000
+    ; GFX8: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]]
+    ; GFX8: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000
+    ; GFX8: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FMUL1]], [[C3]]
+    ; GFX8: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL2]]
+    ; GFX8: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000
+    ; GFX8: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC]], [[C4]]
+    ; GFX8: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[FMUL1]]
+    ; GFX8: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FADD1]](s32)
+    ; GFX8: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC]](s32)
+    ; GFX8: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; GFX8: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C5]](s64)
+    ; GFX8: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64)
+    ; GFX8: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV10]], [[UV12]]
+    ; GFX8: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV11]], [[UV13]], [[USUBO1]]
+    ; GFX8: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI]]
+    ; GFX8: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]]
+    ; GFX8: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]]
+    ; GFX8: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[FPTOUI]]
+    ; GFX8: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]]
+    ; GFX8: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]]
+    ; GFX8: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[MUL]]
+    ; GFX8: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]]
+    ; GFX8: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]]
+    ; GFX8: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]]
+    ; GFX8: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1)
+    ; GFX8: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH1]]
+    ; GFX8: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1)
+    ; GFX8: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]]
+    ; GFX8: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]]
+    ; GFX8: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL]]
+    ; GFX8: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]]
+    ; GFX8: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]]
+    ; GFX8: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1)
+    ; GFX8: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UMULH3]]
+    ; GFX8: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO11]](s1)
+    ; GFX8: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]]
+    ; GFX8: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[ADD2]]
+    ; GFX8: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1)
+    ; GFX8: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]]
+    ; GFX8: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]]
+    ; GFX8: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[ADD4]]
+    ; GFX8: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO12]]
+    ; GFX8: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO15]]
+    ; GFX8: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[ADD5]]
+    ; GFX8: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO14]]
+    ; GFX8: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO14]]
+    ; GFX8: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE4]]
+    ; GFX8: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO14]]
+    ; GFX8: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]]
+    ; GFX8: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ADD7]], [[UMULH5]]
+    ; GFX8: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[MUL6]]
+    ; GFX8: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO14]], [[ADD8]]
+    ; GFX8: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[MUL6]]
+    ; GFX8: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]]
+    ; GFX8: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1)
+    ; GFX8: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH6]]
+    ; GFX8: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1)
+    ; GFX8: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]]
+    ; GFX8: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[ADD8]]
+    ; GFX8: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[MUL6]]
+    ; GFX8: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[ADD8]]
+    ; GFX8: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]]
+    ; GFX8: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1)
+    ; GFX8: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UMULH8]]
+    ; GFX8: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO23]](s1)
+    ; GFX8: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]]
+    ; GFX8: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[UADDO22]], [[ADD9]]
+    ; GFX8: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1)
+    ; GFX8: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[ADD10]], [[ZEXT9]]
+    ; GFX8: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[ADD8]]
+    ; GFX8: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[ADD11]]
+    ; GFX8: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; GFX8: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO14]], [[UADDO24]]
+    ; GFX8: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[ADD6]], [[ADD12]], [[UADDO15]]
+    ; GFX8: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDE6]], [[C6]], [[UADDO27]]
+    ; GFX8: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64)
+    ; GFX8: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64)
+    ; GFX8: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDO26]]
+    ; GFX8: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV16]], [[UADDE8]]
+    ; GFX8: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDO26]]
+    ; GFX8: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]]
+    ; GFX8: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1)
+    ; GFX8: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH10]]
+    ; GFX8: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1)
+    ; GFX8: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]]
+    ; GFX8: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDE8]]
+    ; GFX8: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDO26]]
+    ; GFX8: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDE8]]
+    ; GFX8: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]]
+    ; GFX8: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1)
+    ; GFX8: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UMULH12]]
+    ; GFX8: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO35]](s1)
+    ; GFX8: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]]
+    ; GFX8: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[ADD13]]
+    ; GFX8: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO37]](s1)
+    ; GFX8: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[ADD14]], [[ZEXT14]]
+    ; GFX8: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDE8]]
+    ; GFX8: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[ADD15]]
+    ; GFX8: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64)
+    ; GFX8: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV18]], [[UADDO36]]
+    ; GFX8: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV19]], [[UADDO36]]
+    ; GFX8: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV18]], [[ADD16]]
+    ; GFX8: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV18]], [[UADDO36]]
+    ; GFX8: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]]
+    ; GFX8: [[ADD18:%[0-9]+]]:_(s32) = G_ADD [[ADD17]], [[UMULH14]]
+    ; GFX8: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV14]], [[MUL15]]
+    ; GFX8: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV15]], [[ADD18]], [[USUBO3]]
+    ; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV15]], [[ADD18]]
+    ; GFX8: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO2]](s32), [[USUBE2]](s32)
+    ; GFX8: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64)
+    ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV21]]
+    ; GFX8: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1)
+    ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV20]]
+    ; GFX8: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[ICMP1]](s1)
+    ; GFX8: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV21]]
+    ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SEXT1]], [[SEXT]]
+    ; GFX8: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV20]]
+    ; GFX8: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV21]], [[USUBO3]]
+    ; GFX8: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C6]], [[USUBO5]]
+    ; GFX8: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO4]](s32), [[USUBE6]](s32)
+    ; GFX8: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV21]]
+    ; GFX8: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1)
+    ; GFX8: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV20]]
+    ; GFX8: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT [[ICMP4]](s1)
+    ; GFX8: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV21]]
+    ; GFX8: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]]
+    ; GFX8: [[USUBO6:%[0-9]+]]:_(s32), [[USUBO7:%[0-9]+]]:_(s1) = G_USUBO [[USUBO4]], [[UV20]]
+    ; GFX8: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[UV21]], [[USUBO5]]
+    ; GFX8: [[USUBE10:%[0-9]+]]:_(s32), [[USUBE11:%[0-9]+]]:_(s1) = G_USUBE [[USUBE8]], [[C6]], [[USUBO7]]
+    ; GFX8: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO6]](s32), [[USUBE10]](s32)
+    ; GFX8: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C6]]
+    ; GFX8: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV4]], [[MV3]]
+    ; GFX8: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C6]]
+    ; GFX8: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV2]]
+    ; GFX8: [[XOR2:%[0-9]+]]:_(s64) = G_XOR [[SELECT3]], [[ASHR]]
+    ; GFX8: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR2]](s64)
+    ; GFX8: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64)
+    ; GFX8: [[USUBO8:%[0-9]+]]:_(s32), [[USUBO9:%[0-9]+]]:_(s1) = G_USUBO [[UV22]], [[UV24]]
+    ; GFX8: [[USUBE12:%[0-9]+]]:_(s32), [[USUBE13:%[0-9]+]]:_(s1) = G_USUBE [[UV23]], [[UV25]], [[USUBO9]]
+    ; GFX8: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO8]](s32), [[USUBE12]](s32)
+    ; GFX8: [[COPY4:%[0-9]+]]:_(s64) = COPY [[MV5]](s64)
+    ; GFX8: $vgpr0_vgpr1 = COPY [[COPY4]](s64)
     ; GFX9-LABEL: name: test_srem_s33
     ; GFX9: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
     ; GFX9: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
-    ; GFX9: [[TRUNC:%[0-9]+]]:_(s33) = G_TRUNC [[COPY]](s64)
-    ; GFX9: [[TRUNC1:%[0-9]+]]:_(s33) = G_TRUNC [[COPY1]](s64)
-    ; GFX9: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[TRUNC]](s33)
-    ; GFX9: [[SEXT1:%[0-9]+]]:_(s64) = G_SEXT [[TRUNC1]](s33)
-    ; GFX9: [[SREM:%[0-9]+]]:_(s64) = G_SREM [[SEXT]], [[SEXT1]]
-    ; GFX9: [[TRUNC2:%[0-9]+]]:_(s33) = G_TRUNC [[SREM]](s64)
-    ; GFX9: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[TRUNC2]](s33)
-    ; GFX9: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
+    ; GFX9: [[COPY2:%[0-9]+]]:_(s64) = COPY [[COPY]](s64)
+    ; GFX9: [[SEXT_INREG:%[0-9]+]]:_(s64) = G_SEXT_INREG [[COPY2]], 33
+    ; GFX9: [[COPY3:%[0-9]+]]:_(s64) = COPY [[COPY1]](s64)
+    ; GFX9: [[SEXT_INREG1:%[0-9]+]]:_(s64) = G_SEXT_INREG [[COPY3]], 33
+    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 63
+    ; GFX9: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[SEXT_INREG]], [[C]](s32)
+    ; GFX9: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[SEXT_INREG1]], [[C]](s32)
+    ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SEXT_INREG]](s64)
+    ; GFX9: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64)
+    ; GFX9: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV]], [[UV2]]
+    ; GFX9: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV1]], [[UV3]], [[UADDO1]]
+    ; GFX9: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32)
+    ; GFX9: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SEXT_INREG1]](s64)
+    ; GFX9: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR1]](s64)
+    ; GFX9: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UV4]], [[UV6]]
+    ; GFX9: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UV5]], [[UV7]], [[UADDO3]]
+    ; GFX9: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO2]](s32), [[UADDE2]](s32)
+    ; GFX9: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[MV]], [[ASHR]]
+    ; GFX9: [[XOR1:%[0-9]+]]:_(s64) = G_XOR [[MV1]], [[ASHR1]]
+    ; GFX9: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64)
+    ; GFX9: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV8]](s32)
+    ; GFX9: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV9]](s32)
+    ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000
+    ; GFX9: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP1]], [[C1]]
+    ; GFX9: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UITOFP]]
+    ; GFX9: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[FADD]](s32)
+    ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x43EFFFFF80000000
+    ; GFX9: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]]
+    ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000
+    ; GFX9: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FMUL1]], [[C3]]
+    ; GFX9: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL2]]
+    ; GFX9: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000
+    ; GFX9: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC]], [[C4]]
+    ; GFX9: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[FMUL1]]
+    ; GFX9: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FADD1]](s32)
+    ; GFX9: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC]](s32)
+    ; GFX9: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; GFX9: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C5]](s64)
+    ; GFX9: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64)
+    ; GFX9: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV10]], [[UV12]]
+    ; GFX9: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV11]], [[UV13]], [[USUBO1]]
+    ; GFX9: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI]]
+    ; GFX9: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]]
+    ; GFX9: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]]
+    ; GFX9: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[FPTOUI]]
+    ; GFX9: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]]
+    ; GFX9: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]]
+    ; GFX9: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[MUL]]
+    ; GFX9: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]]
+    ; GFX9: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]]
+    ; GFX9: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]]
+    ; GFX9: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1)
+    ; GFX9: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH1]]
+    ; GFX9: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1)
+    ; GFX9: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]]
+    ; GFX9: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]]
+    ; GFX9: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL]]
+    ; GFX9: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]]
+    ; GFX9: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]]
+    ; GFX9: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1)
+    ; GFX9: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UMULH3]]
+    ; GFX9: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO11]](s1)
+    ; GFX9: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]]
+    ; GFX9: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[ADD2]]
+    ; GFX9: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1)
+    ; GFX9: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]]
+    ; GFX9: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]]
+    ; GFX9: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[ADD4]]
+    ; GFX9: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO12]]
+    ; GFX9: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO15]]
+    ; GFX9: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[ADD5]]
+    ; GFX9: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO14]]
+    ; GFX9: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO14]]
+    ; GFX9: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE4]]
+    ; GFX9: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO14]]
+    ; GFX9: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]]
+    ; GFX9: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ADD7]], [[UMULH5]]
+    ; GFX9: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[MUL6]]
+    ; GFX9: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO14]], [[ADD8]]
+    ; GFX9: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[MUL6]]
+    ; GFX9: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]]
+    ; GFX9: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1)
+    ; GFX9: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH6]]
+    ; GFX9: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1)
+    ; GFX9: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]]
+    ; GFX9: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[ADD8]]
+    ; GFX9: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[MUL6]]
+    ; GFX9: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[ADD8]]
+    ; GFX9: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]]
+    ; GFX9: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1)
+    ; GFX9: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UMULH8]]
+    ; GFX9: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO23]](s1)
+    ; GFX9: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]]
+    ; GFX9: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[UADDO22]], [[ADD9]]
+    ; GFX9: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1)
+    ; GFX9: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[ADD10]], [[ZEXT9]]
+    ; GFX9: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[ADD8]]
+    ; GFX9: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[ADD11]]
+    ; GFX9: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; GFX9: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO14]], [[UADDO24]]
+    ; GFX9: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[ADD6]], [[ADD12]], [[UADDO15]]
+    ; GFX9: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDE6]], [[C6]], [[UADDO27]]
+    ; GFX9: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64)
+    ; GFX9: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64)
+    ; GFX9: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDO26]]
+    ; GFX9: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV16]], [[UADDE8]]
+    ; GFX9: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDO26]]
+    ; GFX9: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]]
+    ; GFX9: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1)
+    ; GFX9: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH10]]
+    ; GFX9: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1)
+    ; GFX9: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]]
+    ; GFX9: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDE8]]
+    ; GFX9: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDO26]]
+    ; GFX9: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDE8]]
+    ; GFX9: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]]
+    ; GFX9: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1)
+    ; GFX9: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UMULH12]]
+    ; GFX9: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO35]](s1)
+    ; GFX9: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]]
+    ; GFX9: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[ADD13]]
+    ; GFX9: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO37]](s1)
+    ; GFX9: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[ADD14]], [[ZEXT14]]
+    ; GFX9: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDE8]]
+    ; GFX9: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[ADD15]]
+    ; GFX9: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64)
+    ; GFX9: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV18]], [[UADDO36]]
+    ; GFX9: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV19]], [[UADDO36]]
+    ; GFX9: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV18]], [[ADD16]]
+    ; GFX9: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV18]], [[UADDO36]]
+    ; GFX9: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]]
+    ; GFX9: [[ADD18:%[0-9]+]]:_(s32) = G_ADD [[ADD17]], [[UMULH14]]
+    ; GFX9: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV14]], [[MUL15]]
+    ; GFX9: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV15]], [[ADD18]], [[USUBO3]]
+    ; GFX9: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV15]], [[ADD18]]
+    ; GFX9: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO2]](s32), [[USUBE2]](s32)
+    ; GFX9: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64)
+    ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV21]]
+    ; GFX9: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1)
+    ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV20]]
+    ; GFX9: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[ICMP1]](s1)
+    ; GFX9: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV21]]
+    ; GFX9: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SEXT1]], [[SEXT]]
+    ; GFX9: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV20]]
+    ; GFX9: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV21]], [[USUBO3]]
+    ; GFX9: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C6]], [[USUBO5]]
+    ; GFX9: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO4]](s32), [[USUBE6]](s32)
+    ; GFX9: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV21]]
+    ; GFX9: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1)
+    ; GFX9: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV20]]
+    ; GFX9: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT [[ICMP4]](s1)
+    ; GFX9: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV21]]
+    ; GFX9: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]]
+    ; GFX9: [[USUBO6:%[0-9]+]]:_(s32), [[USUBO7:%[0-9]+]]:_(s1) = G_USUBO [[USUBO4]], [[UV20]]
+    ; GFX9: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[UV21]], [[USUBO5]]
+    ; GFX9: [[USUBE10:%[0-9]+]]:_(s32), [[USUBE11:%[0-9]+]]:_(s1) = G_USUBE [[USUBE8]], [[C6]], [[USUBO7]]
+    ; GFX9: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO6]](s32), [[USUBE10]](s32)
+    ; GFX9: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C6]]
+    ; GFX9: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV4]], [[MV3]]
+    ; GFX9: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C6]]
+    ; GFX9: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV2]]
+    ; GFX9: [[XOR2:%[0-9]+]]:_(s64) = G_XOR [[SELECT3]], [[ASHR]]
+    ; GFX9: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR2]](s64)
+    ; GFX9: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64)
+    ; GFX9: [[USUBO8:%[0-9]+]]:_(s32), [[USUBO9:%[0-9]+]]:_(s1) = G_USUBO [[UV22]], [[UV24]]
+    ; GFX9: [[USUBE12:%[0-9]+]]:_(s32), [[USUBE13:%[0-9]+]]:_(s1) = G_USUBE [[UV23]], [[UV25]], [[USUBO9]]
+    ; GFX9: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO8]](s32), [[USUBE12]](s32)
+    ; GFX9: [[COPY4:%[0-9]+]]:_(s64) = COPY [[MV5]](s64)
+    ; GFX9: $vgpr0_vgpr1 = COPY [[COPY4]](s64)
     %0:_(s64) = COPY $vgpr0_vgpr1
     %1:_(s64) = COPY $vgpr2_vgpr3
     %2:_(s33) = G_TRUNC %0

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll
new file mode 100644
index 000000000000..cc70c96c18c3
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll
@@ -0,0 +1,3783 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -global-isel -amdgpu-codegenprepare-disable-idiv-expansion=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=hawaii -denormal-fp-math-f32=preserve-sign < %s | FileCheck -check-prefixes=CHECK,GISEL %s
+; RUN: llc -global-isel -amdgpu-codegenprepare-disable-idiv-expansion=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=hawaii -denormal-fp-math-f32=preserve-sign < %s | FileCheck -check-prefixes=CHECK,CGP %s
+
+; The same 32-bit expansion is implemented in the legalizer and in AMDGPUCodeGenPrepare.
+
+define i64 @v_sdiv_i64(i64 %num, i64 %den) {
+; CHECK-LABEL: v_sdiv_i64:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT:    v_or_b32_e32 v5, v1, v3
+; CHECK-NEXT:    v_mov_b32_e32 v4, 0
+; CHECK-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[4:5]
+; CHECK-NEXT:    v_cmp_ne_u32_e64 s[4:5], 0, 1
+; CHECK-NEXT:    s_xor_b64 s[4:5], vcc, s[4:5]
+; CHECK-NEXT:    ; implicit-def: $vgpr4_vgpr5
+; CHECK-NEXT:    s_and_saveexec_b64 s[6:7], s[4:5]
+; CHECK-NEXT:    s_xor_b64 s[6:7], exec, s[6:7]
+; CHECK-NEXT:    s_cbranch_execz BB0_2
+; CHECK-NEXT:  ; %bb.1:
+; CHECK-NEXT:    v_ashrrev_i32_e32 v4, 31, v3
+; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v2, v4
+; CHECK-NEXT:    v_addc_u32_e32 v3, vcc, v3, v4, vcc
+; CHECK-NEXT:    v_xor_b32_e32 v5, v5, v4
+; CHECK-NEXT:    v_xor_b32_e32 v3, v3, v4
+; CHECK-NEXT:    v_cvt_f32_u32_e32 v6, v5
+; CHECK-NEXT:    v_cvt_f32_u32_e32 v7, v3
+; CHECK-NEXT:    v_ashrrev_i32_e32 v8, 31, v1
+; CHECK-NEXT:    v_mac_f32_e32 v6, 0x4f800000, v7
+; CHECK-NEXT:    v_rcp_iflag_f32_e32 v6, v6
+; CHECK-NEXT:    v_add_i32_e32 v7, vcc, v0, v8
+; CHECK-NEXT:    v_addc_u32_e32 v1, vcc, v1, v8, vcc
+; CHECK-NEXT:    v_sub_i32_e32 v10, vcc, 0, v5
+; CHECK-NEXT:    v_mul_f32_e32 v6, 0x5f7ffffc, v6
+; CHECK-NEXT:    v_mul_f32_e32 v9, 0x2f800000, v6
+; CHECK-NEXT:    v_trunc_f32_e32 v9, v9
+; CHECK-NEXT:    v_mac_f32_e32 v6, 0xcf800000, v9
+; CHECK-NEXT:    v_cvt_u32_f32_e32 v6, v6
+; CHECK-NEXT:    v_cvt_u32_f32_e32 v9, v9
+; CHECK-NEXT:    v_subb_u32_e32 v11, vcc, 0, v3, vcc
+; CHECK-NEXT:    v_xor_b32_e32 v7, v7, v8
+; CHECK-NEXT:    v_mul_lo_u32 v12, v11, v6
+; CHECK-NEXT:    v_mul_lo_u32 v13, v10, v9
+; CHECK-NEXT:    v_mul_hi_u32 v15, v10, v6
+; CHECK-NEXT:    v_mul_lo_u32 v14, v10, v6
+; CHECK-NEXT:    v_xor_b32_e32 v1, v1, v8
+; CHECK-NEXT:    v_add_i32_e32 v12, vcc, v12, v13
+; CHECK-NEXT:    v_add_i32_e32 v12, vcc, v12, v15
+; CHECK-NEXT:    v_mul_lo_u32 v13, v9, v14
+; CHECK-NEXT:    v_mul_lo_u32 v15, v6, v12
+; CHECK-NEXT:    v_mul_hi_u32 v16, v6, v14
+; CHECK-NEXT:    v_mul_hi_u32 v14, v9, v14
+; CHECK-NEXT:    v_add_i32_e32 v13, vcc, v13, v15
+; CHECK-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v13, vcc, v13, v16
+; CHECK-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
+; CHECK-NEXT:    v_mul_lo_u32 v16, v9, v12
+; CHECK-NEXT:    v_add_i32_e32 v13, vcc, v15, v13
+; CHECK-NEXT:    v_mul_hi_u32 v15, v6, v12
+; CHECK-NEXT:    v_mul_hi_u32 v12, v9, v12
+; CHECK-NEXT:    v_add_i32_e32 v14, vcc, v16, v14
+; CHECK-NEXT:    v_cndmask_b32_e64 v16, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v14, vcc, v14, v15
+; CHECK-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v15, vcc, v16, v15
+; CHECK-NEXT:    v_add_i32_e32 v13, vcc, v14, v13
+; CHECK-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v14, vcc, v15, v14
+; CHECK-NEXT:    v_add_i32_e32 v12, vcc, v12, v14
+; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v6, v13
+; CHECK-NEXT:    v_addc_u32_e64 v13, s[4:5], v9, v12, vcc
+; CHECK-NEXT:    v_mul_lo_u32 v11, v11, v6
+; CHECK-NEXT:    v_mul_lo_u32 v14, v10, v13
+; CHECK-NEXT:    v_mul_lo_u32 v15, v10, v6
+; CHECK-NEXT:    v_mul_hi_u32 v10, v10, v6
+; CHECK-NEXT:    v_add_i32_e64 v9, s[4:5], v9, v12
+; CHECK-NEXT:    v_add_i32_e64 v11, s[4:5], v11, v14
+; CHECK-NEXT:    v_mul_hi_u32 v12, v6, v15
+; CHECK-NEXT:    v_add_i32_e64 v10, s[4:5], v11, v10
+; CHECK-NEXT:    v_mul_lo_u32 v11, v13, v15
+; CHECK-NEXT:    v_mul_lo_u32 v14, v6, v10
+; CHECK-NEXT:    v_mul_hi_u32 v15, v13, v15
+; CHECK-NEXT:    v_add_i32_e64 v11, s[4:5], v11, v14
+; CHECK-NEXT:    v_cndmask_b32_e64 v14, 0, 1, s[4:5]
+; CHECK-NEXT:    v_add_i32_e64 v11, s[4:5], v11, v12
+; CHECK-NEXT:    v_cndmask_b32_e64 v11, 0, 1, s[4:5]
+; CHECK-NEXT:    v_mul_lo_u32 v12, v13, v10
+; CHECK-NEXT:    v_add_i32_e64 v11, s[4:5], v14, v11
+; CHECK-NEXT:    v_mul_hi_u32 v14, v6, v10
+; CHECK-NEXT:    v_mul_hi_u32 v10, v13, v10
+; CHECK-NEXT:    v_add_i32_e64 v12, s[4:5], v12, v15
+; CHECK-NEXT:    v_cndmask_b32_e64 v15, 0, 1, s[4:5]
+; CHECK-NEXT:    v_add_i32_e64 v12, s[4:5], v12, v14
+; CHECK-NEXT:    v_cndmask_b32_e64 v14, 0, 1, s[4:5]
+; CHECK-NEXT:    v_add_i32_e64 v14, s[4:5], v15, v14
+; CHECK-NEXT:    v_add_i32_e64 v11, s[4:5], v12, v11
+; CHECK-NEXT:    v_cndmask_b32_e64 v12, 0, 1, s[4:5]
+; CHECK-NEXT:    v_add_i32_e64 v12, s[4:5], v14, v12
+; CHECK-NEXT:    v_add_i32_e64 v10, s[4:5], v10, v12
+; CHECK-NEXT:    v_addc_u32_e32 v9, vcc, v9, v10, vcc
+; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v6, v11
+; CHECK-NEXT:    v_addc_u32_e32 v9, vcc, 0, v9, vcc
+; CHECK-NEXT:    v_mul_lo_u32 v10, v1, v6
+; CHECK-NEXT:    v_mul_lo_u32 v11, v7, v9
+; CHECK-NEXT:    v_mul_hi_u32 v12, v7, v6
+; CHECK-NEXT:    v_mul_hi_u32 v6, v1, v6
+; CHECK-NEXT:    v_add_i32_e32 v10, vcc, v10, v11
+; CHECK-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v10, vcc, v10, v12
+; CHECK-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
+; CHECK-NEXT:    v_mul_lo_u32 v12, v1, v9
+; CHECK-NEXT:    v_add_i32_e32 v10, vcc, v11, v10
+; CHECK-NEXT:    v_mul_hi_u32 v11, v7, v9
+; CHECK-NEXT:    v_mul_hi_u32 v9, v1, v9
+; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v12, v6
+; CHECK-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v6, v11
+; CHECK-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v11, vcc, v12, v11
+; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v6, v10
+; CHECK-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v10, vcc, v11, v10
+; CHECK-NEXT:    v_add_i32_e32 v9, vcc, v9, v10
+; CHECK-NEXT:    v_mul_lo_u32 v10, v3, v6
+; CHECK-NEXT:    v_mul_lo_u32 v11, v5, v9
+; CHECK-NEXT:    v_mul_hi_u32 v13, v5, v6
+; CHECK-NEXT:    v_mul_lo_u32 v12, v5, v6
+; CHECK-NEXT:    v_add_i32_e32 v10, vcc, v10, v11
+; CHECK-NEXT:    v_add_i32_e32 v10, vcc, v10, v13
+; CHECK-NEXT:    v_sub_i32_e32 v7, vcc, v7, v12
+; CHECK-NEXT:    v_subb_u32_e64 v11, s[4:5], v1, v10, vcc
+; CHECK-NEXT:    v_sub_i32_e64 v1, s[4:5], v1, v10
+; CHECK-NEXT:    v_cmp_ge_u32_e64 s[4:5], v11, v3
+; CHECK-NEXT:    v_subb_u32_e32 v1, vcc, v1, v3, vcc
+; CHECK-NEXT:    v_cndmask_b32_e64 v10, 0, -1, s[4:5]
+; CHECK-NEXT:    v_cmp_ge_u32_e64 s[4:5], v7, v5
+; CHECK-NEXT:    v_sub_i32_e32 v7, vcc, v7, v5
+; CHECK-NEXT:    v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
+; CHECK-NEXT:    v_cndmask_b32_e64 v12, 0, -1, s[4:5]
+; CHECK-NEXT:    v_cmp_eq_u32_e64 s[4:5], v11, v3
+; CHECK-NEXT:    v_add_i32_e32 v11, vcc, 1, v6
+; CHECK-NEXT:    v_cndmask_b32_e64 v10, v10, v12, s[4:5]
+; CHECK-NEXT:    v_addc_u32_e32 v12, vcc, 0, v9, vcc
+; CHECK-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v3
+; CHECK-NEXT:    v_cndmask_b32_e64 v13, 0, -1, vcc
+; CHECK-NEXT:    v_cmp_ge_u32_e32 vcc, v7, v5
+; CHECK-NEXT:    v_cndmask_b32_e64 v5, 0, -1, vcc
+; CHECK-NEXT:    v_cmp_eq_u32_e32 vcc, v1, v3
+; CHECK-NEXT:    v_cndmask_b32_e32 v1, v13, v5, vcc
+; CHECK-NEXT:    v_add_i32_e32 v3, vcc, 1, v11
+; CHECK-NEXT:    v_addc_u32_e32 v5, vcc, 0, v12, vcc
+; CHECK-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v1
+; CHECK-NEXT:    v_cndmask_b32_e32 v1, v11, v3, vcc
+; CHECK-NEXT:    v_cndmask_b32_e32 v3, v12, v5, vcc
+; CHECK-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v10
+; CHECK-NEXT:    v_cndmask_b32_e32 v1, v6, v1, vcc
+; CHECK-NEXT:    v_xor_b32_e32 v5, v8, v4
+; CHECK-NEXT:    v_cndmask_b32_e32 v3, v9, v3, vcc
+; CHECK-NEXT:    v_xor_b32_e32 v1, v1, v5
+; CHECK-NEXT:    v_xor_b32_e32 v3, v3, v5
+; CHECK-NEXT:    v_sub_i32_e32 v4, vcc, v1, v5
+; CHECK-NEXT:    v_subb_u32_e32 v5, vcc, v3, v5, vcc
+; CHECK-NEXT:  BB0_2: ; %Flow
+; CHECK-NEXT:    s_or_saveexec_b64 s[6:7], s[6:7]
+; CHECK-NEXT:    s_xor_b64 exec, exec, s[6:7]
+; CHECK-NEXT:    s_cbranch_execz BB0_4
+; CHECK-NEXT:  ; %bb.3:
+; CHECK-NEXT:    v_cvt_f32_u32_e32 v1, v2
+; CHECK-NEXT:    v_rcp_iflag_f32_e32 v1, v1
+; CHECK-NEXT:    v_mul_f32_e32 v1, 0x4f800000, v1
+; CHECK-NEXT:    v_cvt_u32_f32_e32 v1, v1
+; CHECK-NEXT:    v_mul_lo_u32 v3, v1, v2
+; CHECK-NEXT:    v_mul_hi_u32 v4, v1, v2
+; CHECK-NEXT:    v_sub_i32_e32 v5, vcc, 0, v3
+; CHECK-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v4
+; CHECK-NEXT:    v_cndmask_b32_e32 v3, v3, v5, vcc
+; CHECK-NEXT:    v_mul_hi_u32 v3, v3, v1
+; CHECK-NEXT:    v_add_i32_e64 v4, s[4:5], v1, v3
+; CHECK-NEXT:    v_sub_i32_e64 v1, s[4:5], v1, v3
+; CHECK-NEXT:    v_cndmask_b32_e32 v1, v1, v4, vcc
+; CHECK-NEXT:    v_mul_hi_u32 v1, v1, v0
+; CHECK-NEXT:    v_mul_lo_u32 v3, v1, v2
+; CHECK-NEXT:    v_add_i32_e32 v4, vcc, 1, v1
+; CHECK-NEXT:    v_subrev_i32_e32 v5, vcc, 1, v1
+; CHECK-NEXT:    v_sub_i32_e32 v6, vcc, v0, v3
+; CHECK-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v3
+; CHECK-NEXT:    v_cmp_ge_u32_e64 s[4:5], v6, v2
+; CHECK-NEXT:    s_and_b64 s[4:5], s[4:5], vcc
+; CHECK-NEXT:    v_cndmask_b32_e64 v0, v1, v4, s[4:5]
+; CHECK-NEXT:    v_cndmask_b32_e32 v4, v5, v0, vcc
+; CHECK-NEXT:    v_mov_b32_e32 v5, 0
+; CHECK-NEXT:  BB0_4:
+; CHECK-NEXT:    s_or_b64 exec, exec, s[6:7]
+; CHECK-NEXT:    v_mov_b32_e32 v0, v4
+; CHECK-NEXT:    v_mov_b32_e32 v1, v5
+; CHECK-NEXT:    s_setpc_b64 s[30:31]
+  %result = sdiv i64 %num, %den
+  ret i64 %result
+}
+
+; FIXME: This is a workaround for not handling uniform VGPR case.
+declare i32 @llvm.amdgcn.readfirstlane(i32)
+
+define amdgpu_ps i64 @s_sdiv_i64(i64 inreg %num, i64 inreg %den) {
+; CHECK-LABEL: s_sdiv_i64:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    s_or_b64 s[0:1], s[2:3], s[4:5]
+; CHECK-NEXT:    s_mov_b32 s6, 0
+; CHECK-NEXT:    s_mov_b32 s7, -1
+; CHECK-NEXT:    s_and_b64 s[0:1], s[0:1], s[6:7]
+; CHECK-NEXT:    v_cmp_eq_u64_e64 s[6:7], s[0:1], 0
+; CHECK-NEXT:    v_cmp_ne_u32_e64 s[8:9], 0, 1
+; CHECK-NEXT:    s_mov_b32 s0, 1
+; CHECK-NEXT:    s_xor_b64 vcc, s[6:7], s[8:9]
+; CHECK-NEXT:    ; implicit-def: $vgpr0_vgpr1
+; CHECK-NEXT:    s_cbranch_vccz BB1_2
+; CHECK-NEXT:  ; %bb.1:
+; CHECK-NEXT:    s_ashr_i32 s6, s3, 31
+; CHECK-NEXT:    s_ashr_i32 s8, s5, 31
+; CHECK-NEXT:    s_add_u32 s0, s2, s6
+; CHECK-NEXT:    s_cselect_b32 s1, 1, 0
+; CHECK-NEXT:    s_and_b32 s1, s1, 1
+; CHECK-NEXT:    s_cmp_lg_u32 s1, 0
+; CHECK-NEXT:    s_addc_u32 s1, s3, s6
+; CHECK-NEXT:    s_add_u32 s10, s4, s8
+; CHECK-NEXT:    s_cselect_b32 s3, 1, 0
+; CHECK-NEXT:    s_and_b32 s3, s3, 1
+; CHECK-NEXT:    s_cmp_lg_u32 s3, 0
+; CHECK-NEXT:    s_mov_b32 s9, s8
+; CHECK-NEXT:    s_addc_u32 s11, s5, s8
+; CHECK-NEXT:    s_xor_b64 s[10:11], s[10:11], s[8:9]
+; CHECK-NEXT:    v_cvt_f32_u32_e32 v0, s10
+; CHECK-NEXT:    v_cvt_f32_u32_e32 v1, s11
+; CHECK-NEXT:    s_mov_b32 s7, s6
+; CHECK-NEXT:    s_xor_b64 s[12:13], s[0:1], s[6:7]
+; CHECK-NEXT:    s_sub_u32 s3, 0, s10
+; CHECK-NEXT:    v_mac_f32_e32 v0, 0x4f800000, v1
+; CHECK-NEXT:    v_rcp_iflag_f32_e32 v0, v0
+; CHECK-NEXT:    s_cselect_b32 s0, 1, 0
+; CHECK-NEXT:    s_and_b32 s0, s0, 1
+; CHECK-NEXT:    s_cmp_lg_u32 s0, 0
+; CHECK-NEXT:    v_mul_f32_e32 v0, 0x5f7ffffc, v0
+; CHECK-NEXT:    v_mul_f32_e32 v1, 0x2f800000, v0
+; CHECK-NEXT:    v_trunc_f32_e32 v1, v1
+; CHECK-NEXT:    v_mac_f32_e32 v0, 0xcf800000, v1
+; CHECK-NEXT:    v_cvt_u32_f32_e32 v0, v0
+; CHECK-NEXT:    v_cvt_u32_f32_e32 v1, v1
+; CHECK-NEXT:    s_subb_u32 s5, 0, s11
+; CHECK-NEXT:    v_mul_lo_u32 v2, s5, v0
+; CHECK-NEXT:    v_mul_lo_u32 v3, s3, v1
+; CHECK-NEXT:    v_mul_hi_u32 v5, s3, v0
+; CHECK-NEXT:    v_mul_lo_u32 v4, s3, v0
+; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v3
+; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v5
+; CHECK-NEXT:    v_mul_lo_u32 v3, v1, v4
+; CHECK-NEXT:    v_mul_lo_u32 v5, v0, v2
+; CHECK-NEXT:    v_mul_hi_u32 v6, v0, v4
+; CHECK-NEXT:    v_mul_hi_u32 v4, v1, v4
+; CHECK-NEXT:    v_add_i32_e32 v3, vcc, v3, v5
+; CHECK-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v3, vcc, v3, v6
+; CHECK-NEXT:    v_cndmask_b32_e64 v3, 0, 1, vcc
+; CHECK-NEXT:    v_mul_lo_u32 v6, v1, v2
+; CHECK-NEXT:    v_add_i32_e32 v3, vcc, v5, v3
+; CHECK-NEXT:    v_mul_hi_u32 v5, v0, v2
+; CHECK-NEXT:    v_mul_hi_u32 v2, v1, v2
+; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v6, v4
+; CHECK-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v4, v5
+; CHECK-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v6, v5
+; CHECK-NEXT:    v_add_i32_e32 v3, vcc, v4, v3
+; CHECK-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v5, v4
+; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v4
+; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v3
+; CHECK-NEXT:    v_addc_u32_e64 v3, s[0:1], v1, v2, vcc
+; CHECK-NEXT:    v_mul_lo_u32 v4, s5, v0
+; CHECK-NEXT:    v_mul_lo_u32 v5, s3, v3
+; CHECK-NEXT:    v_mul_hi_u32 v7, s3, v0
+; CHECK-NEXT:    v_mul_lo_u32 v6, s3, v0
+; CHECK-NEXT:    v_add_i32_e64 v1, s[0:1], v1, v2
+; CHECK-NEXT:    v_add_i32_e64 v4, s[0:1], v4, v5
+; CHECK-NEXT:    v_add_i32_e64 v4, s[0:1], v4, v7
+; CHECK-NEXT:    v_mul_lo_u32 v5, v3, v6
+; CHECK-NEXT:    v_mul_lo_u32 v7, v0, v4
+; CHECK-NEXT:    v_mul_hi_u32 v2, v0, v6
+; CHECK-NEXT:    v_mul_hi_u32 v6, v3, v6
+; CHECK-NEXT:    v_add_i32_e64 v5, s[0:1], v5, v7
+; CHECK-NEXT:    v_cndmask_b32_e64 v7, 0, 1, s[0:1]
+; CHECK-NEXT:    v_add_i32_e64 v2, s[0:1], v5, v2
+; CHECK-NEXT:    v_cndmask_b32_e64 v2, 0, 1, s[0:1]
+; CHECK-NEXT:    v_mul_lo_u32 v5, v3, v4
+; CHECK-NEXT:    v_add_i32_e64 v2, s[0:1], v7, v2
+; CHECK-NEXT:    v_mul_hi_u32 v7, v0, v4
+; CHECK-NEXT:    v_mul_hi_u32 v3, v3, v4
+; CHECK-NEXT:    v_add_i32_e64 v5, s[0:1], v5, v6
+; CHECK-NEXT:    v_cndmask_b32_e64 v6, 0, 1, s[0:1]
+; CHECK-NEXT:    v_add_i32_e64 v5, s[0:1], v5, v7
+; CHECK-NEXT:    v_cndmask_b32_e64 v7, 0, 1, s[0:1]
+; CHECK-NEXT:    v_add_i32_e64 v6, s[0:1], v6, v7
+; CHECK-NEXT:    v_add_i32_e64 v2, s[0:1], v5, v2
+; CHECK-NEXT:    v_cndmask_b32_e64 v5, 0, 1, s[0:1]
+; CHECK-NEXT:    v_add_i32_e64 v4, s[0:1], v6, v5
+; CHECK-NEXT:    v_add_i32_e64 v3, s[0:1], v3, v4
+; CHECK-NEXT:    v_addc_u32_e32 v1, vcc, v1, v3, vcc
+; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
+; CHECK-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
+; CHECK-NEXT:    v_mul_lo_u32 v2, s13, v0
+; CHECK-NEXT:    v_mul_lo_u32 v3, s12, v1
+; CHECK-NEXT:    v_mul_hi_u32 v4, s12, v0
+; CHECK-NEXT:    v_mul_hi_u32 v0, s13, v0
+; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v3
+; CHECK-NEXT:    v_cndmask_b32_e64 v3, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v4
+; CHECK-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
+; CHECK-NEXT:    v_mul_lo_u32 v4, s13, v1
+; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v3, v2
+; CHECK-NEXT:    v_mul_hi_u32 v3, s12, v1
+; CHECK-NEXT:    v_mul_hi_u32 v1, s13, v1
+; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v4, v0
+; CHECK-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v3
+; CHECK-NEXT:    v_cndmask_b32_e64 v3, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v3, vcc, v4, v3
+; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
+; CHECK-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v3, v2
+; CHECK-NEXT:    v_add_i32_e32 v1, vcc, v1, v2
+; CHECK-NEXT:    v_mul_lo_u32 v2, s11, v0
+; CHECK-NEXT:    v_mul_lo_u32 v1, s10, v1
+; CHECK-NEXT:    v_mul_hi_u32 v4, s10, v0
+; CHECK-NEXT:    v_mul_lo_u32 v3, s10, v0
+; CHECK-NEXT:    v_add_i32_e32 v1, vcc, v2, v1
+; CHECK-NEXT:    v_add_i32_e32 v1, vcc, v1, v4
+; CHECK-NEXT:    v_mov_b32_e32 v2, s13
+; CHECK-NEXT:    v_sub_i32_e32 v3, vcc, s12, v3
+; CHECK-NEXT:    v_subb_u32_e64 v2, s[0:1], v2, v1, vcc
+; CHECK-NEXT:    v_sub_i32_e64 v1, s[0:1], s13, v1
+; CHECK-NEXT:    v_mov_b32_e32 v4, s11
+; CHECK-NEXT:    v_cmp_le_u32_e64 s[0:1], s11, v2
+; CHECK-NEXT:    v_subb_u32_e32 v1, vcc, v1, v4, vcc
+; CHECK-NEXT:    v_cndmask_b32_e64 v5, 0, -1, s[0:1]
+; CHECK-NEXT:    v_cmp_le_u32_e64 s[0:1], s10, v3
+; CHECK-NEXT:    v_subrev_i32_e32 v3, vcc, s10, v3
+; CHECK-NEXT:    v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v4, vcc, 1, v0
+; CHECK-NEXT:    v_cndmask_b32_e64 v6, 0, -1, s[0:1]
+; CHECK-NEXT:    v_cmp_eq_u32_e64 s[0:1], s11, v2
+; CHECK-NEXT:    v_cmp_le_u32_e32 vcc, s11, v1
+; CHECK-NEXT:    v_cndmask_b32_e64 v2, v5, v6, s[0:1]
+; CHECK-NEXT:    v_cndmask_b32_e64 v5, 0, -1, vcc
+; CHECK-NEXT:    v_cmp_le_u32_e32 vcc, s10, v3
+; CHECK-NEXT:    v_cndmask_b32_e64 v3, 0, -1, vcc
+; CHECK-NEXT:    v_cmp_eq_u32_e32 vcc, s11, v1
+; CHECK-NEXT:    v_cndmask_b32_e32 v1, v5, v3, vcc
+; CHECK-NEXT:    v_add_i32_e32 v3, vcc, 1, v4
+; CHECK-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v1
+; CHECK-NEXT:    v_cndmask_b32_e32 v1, v4, v3, vcc
+; CHECK-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v2
+; CHECK-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; CHECK-NEXT:    s_xor_b64 s[0:1], s[6:7], s[8:9]
+; CHECK-NEXT:    v_xor_b32_e32 v0, s0, v0
+; CHECK-NEXT:    v_subrev_i32_e32 v0, vcc, s0, v0
+; CHECK-NEXT:    s_mov_b32 s0, 0
+; CHECK-NEXT:  BB1_2: ; %Flow
+; CHECK-NEXT:    s_and_b32 s0, s0, 1
+; CHECK-NEXT:    s_cmp_lg_u32 s0, 0
+; CHECK-NEXT:    s_cbranch_scc0 BB1_4
+; CHECK-NEXT:  ; %bb.3:
+; CHECK-NEXT:    v_cvt_f32_u32_e32 v0, s4
+; CHECK-NEXT:    v_rcp_iflag_f32_e32 v0, v0
+; CHECK-NEXT:    v_mul_f32_e32 v0, 0x4f800000, v0
+; CHECK-NEXT:    v_cvt_u32_f32_e32 v0, v0
+; CHECK-NEXT:    v_mul_lo_u32 v1, v0, s4
+; CHECK-NEXT:    v_mul_hi_u32 v2, v0, s4
+; CHECK-NEXT:    v_sub_i32_e32 v3, vcc, 0, v1
+; CHECK-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v2
+; CHECK-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
+; CHECK-NEXT:    v_mul_hi_u32 v1, v1, v0
+; CHECK-NEXT:    v_add_i32_e64 v2, s[0:1], v0, v1
+; CHECK-NEXT:    v_sub_i32_e64 v0, s[0:1], v0, v1
+; CHECK-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; CHECK-NEXT:    v_mul_hi_u32 v0, v0, s2
+; CHECK-NEXT:    v_mul_lo_u32 v1, v0, s4
+; CHECK-NEXT:    v_add_i32_e32 v2, vcc, 1, v0
+; CHECK-NEXT:    v_subrev_i32_e32 v3, vcc, 1, v0
+; CHECK-NEXT:    v_sub_i32_e32 v4, vcc, s2, v1
+; CHECK-NEXT:    v_cmp_ge_u32_e32 vcc, s2, v1
+; CHECK-NEXT:    v_cmp_le_u32_e64 s[0:1], s4, v4
+; CHECK-NEXT:    s_and_b64 s[0:1], s[0:1], vcc
+; CHECK-NEXT:    v_cndmask_b32_e64 v0, v0, v2, s[0:1]
+; CHECK-NEXT:    v_cndmask_b32_e32 v0, v3, v0, vcc
+; CHECK-NEXT:  BB1_4:
+; CHECK-NEXT:    v_readfirstlane_b32 s0, v0
+; CHECK-NEXT:    s_mov_b32 s1, s0
+; CHECK-NEXT:    ; return to shader part epilog
+  %result = sdiv i64 %num, %den
+  %cast = bitcast i64 %result to <2 x i32>
+  %elt.0 = extractelement <2 x i32> %cast, i32 0
+  %elt.1 = extractelement <2 x i32> %cast, i32 1
+  %res.0 = call i32 @llvm.amdgcn.readfirstlane(i32 %elt.0)
+  %res.1 = call i32 @llvm.amdgcn.readfirstlane(i32 %elt.1)
+  %ins.0 = insertelement <2 x i32> undef, i32 %res.0, i32 0
+  %ins.1 = insertelement <2 x i32> %ins.0, i32 %res.0, i32 1
+  %cast.back = bitcast <2 x i32> %ins.1 to i64
+  ret i64 %cast.back
+}
+
+define <2 x i64> @v_sdiv_v2i64(<2 x i64> %num, <2 x i64> %den) {
+; GISEL-LABEL: v_sdiv_v2i64:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-NEXT:    v_ashrrev_i32_e32 v8, 31, v5
+; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v8
+; GISEL-NEXT:    v_addc_u32_e32 v5, vcc, v5, v8, vcc
+; GISEL-NEXT:    v_xor_b32_e32 v4, v4, v8
+; GISEL-NEXT:    v_xor_b32_e32 v5, v5, v8
+; GISEL-NEXT:    v_cvt_f32_u32_e32 v9, v4
+; GISEL-NEXT:    v_cvt_f32_u32_e32 v10, v5
+; GISEL-NEXT:    v_ashrrev_i32_e32 v11, 31, v1
+; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v11
+; GISEL-NEXT:    v_addc_u32_e32 v1, vcc, v1, v11, vcc
+; GISEL-NEXT:    v_mac_f32_e32 v9, 0x4f800000, v10
+; GISEL-NEXT:    v_rcp_iflag_f32_e32 v9, v9
+; GISEL-NEXT:    v_sub_i32_e32 v12, vcc, 0, v4
+; GISEL-NEXT:    v_subb_u32_e32 v13, vcc, 0, v5, vcc
+; GISEL-NEXT:    v_xor_b32_e32 v0, v0, v11
+; GISEL-NEXT:    v_mul_f32_e32 v9, 0x5f7ffffc, v9
+; GISEL-NEXT:    v_mul_f32_e32 v10, 0x2f800000, v9
+; GISEL-NEXT:    v_trunc_f32_e32 v10, v10
+; GISEL-NEXT:    v_mac_f32_e32 v9, 0xcf800000, v10
+; GISEL-NEXT:    v_cvt_u32_f32_e32 v9, v9
+; GISEL-NEXT:    v_cvt_u32_f32_e32 v10, v10
+; GISEL-NEXT:    v_xor_b32_e32 v1, v1, v11
+; GISEL-NEXT:    v_mul_lo_u32 v14, v13, v9
+; GISEL-NEXT:    v_mul_lo_u32 v15, v12, v10
+; GISEL-NEXT:    v_mul_hi_u32 v17, v12, v9
+; GISEL-NEXT:    v_mul_lo_u32 v16, v12, v9
+; GISEL-NEXT:    v_add_i32_e32 v14, vcc, v14, v15
+; GISEL-NEXT:    v_add_i32_e32 v14, vcc, v14, v17
+; GISEL-NEXT:    v_mul_lo_u32 v15, v10, v16
+; GISEL-NEXT:    v_mul_lo_u32 v17, v9, v14
+; GISEL-NEXT:    v_mul_hi_u32 v18, v9, v16
+; GISEL-NEXT:    v_mul_hi_u32 v16, v10, v16
+; GISEL-NEXT:    v_add_i32_e32 v15, vcc, v15, v17
+; GISEL-NEXT:    v_cndmask_b32_e64 v17, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v15, vcc, v15, v18
+; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
+; GISEL-NEXT:    v_mul_lo_u32 v18, v10, v14
+; GISEL-NEXT:    v_add_i32_e32 v15, vcc, v17, v15
+; GISEL-NEXT:    v_mul_hi_u32 v17, v9, v14
+; GISEL-NEXT:    v_mul_hi_u32 v14, v10, v14
+; GISEL-NEXT:    v_add_i32_e32 v16, vcc, v18, v16
+; GISEL-NEXT:    v_cndmask_b32_e64 v18, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v16, vcc, v16, v17
+; GISEL-NEXT:    v_cndmask_b32_e64 v17, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v17, vcc, v18, v17
+; GISEL-NEXT:    v_add_i32_e32 v15, vcc, v16, v15
+; GISEL-NEXT:    v_cndmask_b32_e64 v16, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v16, vcc, v17, v16
+; GISEL-NEXT:    v_add_i32_e32 v14, vcc, v14, v16
+; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v9, v15
+; GISEL-NEXT:    v_addc_u32_e64 v15, s[4:5], v10, v14, vcc
+; GISEL-NEXT:    v_mul_lo_u32 v13, v13, v9
+; GISEL-NEXT:    v_mul_lo_u32 v16, v12, v15
+; GISEL-NEXT:    v_mul_lo_u32 v17, v12, v9
+; GISEL-NEXT:    v_mul_hi_u32 v12, v12, v9
+; GISEL-NEXT:    v_add_i32_e64 v10, s[4:5], v10, v14
+; GISEL-NEXT:    v_add_i32_e64 v13, s[4:5], v13, v16
+; GISEL-NEXT:    v_mul_hi_u32 v14, v9, v17
+; GISEL-NEXT:    v_add_i32_e64 v12, s[4:5], v13, v12
+; GISEL-NEXT:    v_mul_lo_u32 v13, v15, v17
+; GISEL-NEXT:    v_mul_lo_u32 v16, v9, v12
+; GISEL-NEXT:    v_mul_hi_u32 v17, v15, v17
+; GISEL-NEXT:    v_add_i32_e64 v13, s[4:5], v13, v16
+; GISEL-NEXT:    v_cndmask_b32_e64 v16, 0, 1, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v13, s[4:5], v13, v14
+; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, 1, s[4:5]
+; GISEL-NEXT:    v_mul_lo_u32 v14, v15, v12
+; GISEL-NEXT:    v_add_i32_e64 v13, s[4:5], v16, v13
+; GISEL-NEXT:    v_mul_hi_u32 v16, v9, v12
+; GISEL-NEXT:    v_mul_hi_u32 v12, v15, v12
+; GISEL-NEXT:    v_add_i32_e64 v14, s[4:5], v14, v17
+; GISEL-NEXT:    v_cndmask_b32_e64 v17, 0, 1, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v14, s[4:5], v14, v16
+; GISEL-NEXT:    v_cndmask_b32_e64 v16, 0, 1, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v16, s[4:5], v17, v16
+; GISEL-NEXT:    v_add_i32_e64 v13, s[4:5], v14, v13
+; GISEL-NEXT:    v_cndmask_b32_e64 v14, 0, 1, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v14, s[4:5], v16, v14
+; GISEL-NEXT:    v_add_i32_e64 v12, s[4:5], v12, v14
+; GISEL-NEXT:    v_addc_u32_e32 v10, vcc, v10, v12, vcc
+; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v9, v13
+; GISEL-NEXT:    v_addc_u32_e32 v10, vcc, 0, v10, vcc
+; GISEL-NEXT:    v_mul_lo_u32 v12, v1, v9
+; GISEL-NEXT:    v_mul_lo_u32 v13, v0, v10
+; GISEL-NEXT:    v_mul_hi_u32 v14, v0, v9
+; GISEL-NEXT:    v_mul_hi_u32 v9, v1, v9
+; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v12, v13
+; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v12, v14
+; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
+; GISEL-NEXT:    v_mul_lo_u32 v14, v1, v10
+; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v13, v12
+; GISEL-NEXT:    v_mul_hi_u32 v13, v0, v10
+; GISEL-NEXT:    v_mul_hi_u32 v10, v1, v10
+; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v14, v9
+; GISEL-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v9, v13
+; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v14, v13
+; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v9, v12
+; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v13, v12
+; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v10, v12
+; GISEL-NEXT:    v_mul_lo_u32 v12, v5, v9
+; GISEL-NEXT:    v_mul_lo_u32 v13, v4, v10
+; GISEL-NEXT:    v_mul_hi_u32 v15, v4, v9
+; GISEL-NEXT:    v_mul_lo_u32 v14, v4, v9
+; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v12, v13
+; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v12, v15
+; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v14
+; GISEL-NEXT:    v_subb_u32_e64 v13, s[4:5], v1, v12, vcc
+; GISEL-NEXT:    v_sub_i32_e64 v1, s[4:5], v1, v12
+; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v13, v5
+; GISEL-NEXT:    v_subb_u32_e32 v1, vcc, v1, v5, vcc
+; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, -1, s[4:5]
+; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v0, v4
+; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v4
+; GISEL-NEXT:    v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
+; GISEL-NEXT:    v_cndmask_b32_e64 v14, 0, -1, s[4:5]
+; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], v13, v5
+; GISEL-NEXT:    v_add_i32_e32 v13, vcc, 1, v9
+; GISEL-NEXT:    v_cndmask_b32_e64 v12, v12, v14, s[4:5]
+; GISEL-NEXT:    v_addc_u32_e32 v14, vcc, 0, v10, vcc
+; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v5
+; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, -1, vcc
+; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v4
+; GISEL-NEXT:    v_cndmask_b32_e64 v0, 0, -1, vcc
+; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v1, v5
+; GISEL-NEXT:    v_cndmask_b32_e32 v0, v15, v0, vcc
+; GISEL-NEXT:    v_add_i32_e32 v1, vcc, 1, v13
+; GISEL-NEXT:    v_addc_u32_e32 v4, vcc, 0, v14, vcc
+; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
+; GISEL-NEXT:    v_cndmask_b32_e32 v0, v13, v1, vcc
+; GISEL-NEXT:    v_cndmask_b32_e32 v1, v14, v4, vcc
+; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v12
+; GISEL-NEXT:    v_ashrrev_i32_e32 v5, 31, v7
+; GISEL-NEXT:    v_cndmask_b32_e32 v1, v10, v1, vcc
+; GISEL-NEXT:    v_cndmask_b32_e32 v0, v9, v0, vcc
+; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v6, v5
+; GISEL-NEXT:    v_addc_u32_e32 v7, vcc, v7, v5, vcc
+; GISEL-NEXT:    v_xor_b32_e32 v7, v7, v5
+; GISEL-NEXT:    v_xor_b32_e32 v6, v6, v5
+; GISEL-NEXT:    v_xor_b32_e32 v4, v11, v8
+; GISEL-NEXT:    v_cvt_f32_u32_e32 v8, v6
+; GISEL-NEXT:    v_cvt_f32_u32_e32 v9, v7
+; GISEL-NEXT:    v_ashrrev_i32_e32 v10, 31, v3
+; GISEL-NEXT:    v_add_i32_e32 v2, vcc, v2, v10
+; GISEL-NEXT:    v_addc_u32_e32 v3, vcc, v3, v10, vcc
+; GISEL-NEXT:    v_mac_f32_e32 v8, 0x4f800000, v9
+; GISEL-NEXT:    v_rcp_iflag_f32_e32 v8, v8
+; GISEL-NEXT:    v_sub_i32_e32 v11, vcc, 0, v6
+; GISEL-NEXT:    v_subb_u32_e32 v12, vcc, 0, v7, vcc
+; GISEL-NEXT:    v_xor_b32_e32 v0, v0, v4
+; GISEL-NEXT:    v_mul_f32_e32 v8, 0x5f7ffffc, v8
+; GISEL-NEXT:    v_mul_f32_e32 v9, 0x2f800000, v8
+; GISEL-NEXT:    v_trunc_f32_e32 v9, v9
+; GISEL-NEXT:    v_mac_f32_e32 v8, 0xcf800000, v9
+; GISEL-NEXT:    v_cvt_u32_f32_e32 v8, v8
+; GISEL-NEXT:    v_cvt_u32_f32_e32 v9, v9
+; GISEL-NEXT:    v_xor_b32_e32 v2, v2, v10
+; GISEL-NEXT:    v_xor_b32_e32 v3, v3, v10
+; GISEL-NEXT:    v_mul_lo_u32 v13, v12, v8
+; GISEL-NEXT:    v_mul_lo_u32 v14, v11, v9
+; GISEL-NEXT:    v_mul_hi_u32 v16, v11, v8
+; GISEL-NEXT:    v_mul_lo_u32 v15, v11, v8
+; GISEL-NEXT:    v_xor_b32_e32 v1, v1, v4
+; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v13, v14
+; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v13, v16
+; GISEL-NEXT:    v_mul_lo_u32 v14, v9, v15
+; GISEL-NEXT:    v_mul_lo_u32 v16, v8, v13
+; GISEL-NEXT:    v_mul_hi_u32 v17, v8, v15
+; GISEL-NEXT:    v_mul_hi_u32 v15, v9, v15
+; GISEL-NEXT:    v_add_i32_e32 v14, vcc, v14, v16
+; GISEL-NEXT:    v_cndmask_b32_e64 v16, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v14, vcc, v14, v17
+; GISEL-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
+; GISEL-NEXT:    v_mul_lo_u32 v17, v9, v13
+; GISEL-NEXT:    v_add_i32_e32 v14, vcc, v16, v14
+; GISEL-NEXT:    v_mul_hi_u32 v16, v8, v13
+; GISEL-NEXT:    v_mul_hi_u32 v13, v9, v13
+; GISEL-NEXT:    v_add_i32_e32 v15, vcc, v17, v15
+; GISEL-NEXT:    v_cndmask_b32_e64 v17, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v15, vcc, v15, v16
+; GISEL-NEXT:    v_cndmask_b32_e64 v16, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v16, vcc, v17, v16
+; GISEL-NEXT:    v_add_i32_e32 v14, vcc, v15, v14
+; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v15, vcc, v16, v15
+; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v13, v15
+; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v14
+; GISEL-NEXT:    v_addc_u32_e64 v14, s[4:5], v9, v13, vcc
+; GISEL-NEXT:    v_mul_lo_u32 v12, v12, v8
+; GISEL-NEXT:    v_mul_lo_u32 v15, v11, v14
+; GISEL-NEXT:    v_mul_lo_u32 v16, v11, v8
+; GISEL-NEXT:    v_mul_hi_u32 v11, v11, v8
+; GISEL-NEXT:    v_add_i32_e64 v9, s[4:5], v9, v13
+; GISEL-NEXT:    v_add_i32_e64 v12, s[4:5], v12, v15
+; GISEL-NEXT:    v_mul_hi_u32 v13, v8, v16
+; GISEL-NEXT:    v_add_i32_e64 v11, s[4:5], v12, v11
+; GISEL-NEXT:    v_mul_lo_u32 v12, v14, v16
+; GISEL-NEXT:    v_mul_lo_u32 v15, v8, v11
+; GISEL-NEXT:    v_mul_hi_u32 v16, v14, v16
+; GISEL-NEXT:    v_add_i32_e64 v12, s[4:5], v12, v15
+; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, 1, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v12, s[4:5], v12, v13
+; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, s[4:5]
+; GISEL-NEXT:    v_mul_lo_u32 v13, v14, v11
+; GISEL-NEXT:    v_add_i32_e64 v12, s[4:5], v15, v12
+; GISEL-NEXT:    v_mul_hi_u32 v15, v8, v11
+; GISEL-NEXT:    v_mul_hi_u32 v11, v14, v11
+; GISEL-NEXT:    v_add_i32_e64 v13, s[4:5], v13, v16
+; GISEL-NEXT:    v_cndmask_b32_e64 v16, 0, 1, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v13, s[4:5], v13, v15
+; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, 1, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v15, s[4:5], v16, v15
+; GISEL-NEXT:    v_add_i32_e64 v12, s[4:5], v13, v12
+; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, 1, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v13, s[4:5], v15, v13
+; GISEL-NEXT:    v_add_i32_e64 v11, s[4:5], v11, v13
+; GISEL-NEXT:    v_addc_u32_e32 v9, vcc, v9, v11, vcc
+; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v12
+; GISEL-NEXT:    v_addc_u32_e32 v9, vcc, 0, v9, vcc
+; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v4
+; GISEL-NEXT:    v_mul_lo_u32 v11, v3, v8
+; GISEL-NEXT:    v_mul_lo_u32 v12, v2, v9
+; GISEL-NEXT:    v_subb_u32_e32 v1, vcc, v1, v4, vcc
+; GISEL-NEXT:    v_mul_hi_u32 v4, v2, v8
+; GISEL-NEXT:    v_mul_hi_u32 v8, v3, v8
+; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v11, v12
+; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v11, v4
+; GISEL-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
+; GISEL-NEXT:    v_mul_lo_u32 v11, v3, v9
+; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v12, v4
+; GISEL-NEXT:    v_mul_hi_u32 v12, v2, v9
+; GISEL-NEXT:    v_mul_hi_u32 v9, v3, v9
+; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v11, v8
+; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v12
+; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v11, v12
+; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v8, v4
+; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v11, v8
+; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v9, v8
+; GISEL-NEXT:    v_mul_lo_u32 v9, v7, v4
+; GISEL-NEXT:    v_mul_lo_u32 v11, v6, v8
+; GISEL-NEXT:    v_mul_hi_u32 v13, v6, v4
+; GISEL-NEXT:    v_mul_lo_u32 v12, v6, v4
+; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v9, v11
+; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v9, v13
+; GISEL-NEXT:    v_sub_i32_e32 v2, vcc, v2, v12
+; GISEL-NEXT:    v_subb_u32_e64 v11, s[4:5], v3, v9, vcc
+; GISEL-NEXT:    v_sub_i32_e64 v3, s[4:5], v3, v9
+; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v11, v7
+; GISEL-NEXT:    v_subb_u32_e32 v3, vcc, v3, v7, vcc
+; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, -1, s[4:5]
+; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v2, v6
+; GISEL-NEXT:    v_sub_i32_e32 v2, vcc, v2, v6
+; GISEL-NEXT:    v_subbrev_u32_e32 v3, vcc, 0, v3, vcc
+; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, -1, s[4:5]
+; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], v11, v7
+; GISEL-NEXT:    v_add_i32_e32 v11, vcc, 1, v4
+; GISEL-NEXT:    v_cndmask_b32_e64 v9, v9, v12, s[4:5]
+; GISEL-NEXT:    v_addc_u32_e32 v12, vcc, 0, v8, vcc
+; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v3, v7
+; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, -1, vcc
+; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v2, v6
+; GISEL-NEXT:    v_cndmask_b32_e64 v2, 0, -1, vcc
+; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v3, v7
+; GISEL-NEXT:    v_cndmask_b32_e32 v2, v13, v2, vcc
+; GISEL-NEXT:    v_add_i32_e32 v3, vcc, 1, v11
+; GISEL-NEXT:    v_addc_u32_e32 v6, vcc, 0, v12, vcc
+; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v2
+; GISEL-NEXT:    v_cndmask_b32_e32 v2, v11, v3, vcc
+; GISEL-NEXT:    v_cndmask_b32_e32 v3, v12, v6, vcc
+; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v9
+; GISEL-NEXT:    v_cndmask_b32_e32 v2, v4, v2, vcc
+; GISEL-NEXT:    v_xor_b32_e32 v4, v10, v5
+; GISEL-NEXT:    v_cndmask_b32_e32 v3, v8, v3, vcc
+; GISEL-NEXT:    v_xor_b32_e32 v2, v2, v4
+; GISEL-NEXT:    v_xor_b32_e32 v3, v3, v4
+; GISEL-NEXT:    v_sub_i32_e32 v2, vcc, v2, v4
+; GISEL-NEXT:    v_subb_u32_e32 v3, vcc, v3, v4, vcc
+; GISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; CGP-LABEL: v_sdiv_v2i64:
+; CGP:       ; %bb.0:
+; CGP-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CGP-NEXT:    v_mov_b32_e32 v9, v1
+; CGP-NEXT:    v_mov_b32_e32 v8, v0
+; CGP-NEXT:    v_or_b32_e32 v1, v9, v5
+; CGP-NEXT:    v_mov_b32_e32 v0, 0
+; CGP-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[0:1]
+; CGP-NEXT:    v_cmp_ne_u32_e64 s[4:5], 0, 1
+; CGP-NEXT:    s_xor_b64 s[4:5], vcc, s[4:5]
+; CGP-NEXT:    ; implicit-def: $vgpr0_vgpr1
+; CGP-NEXT:    s_and_saveexec_b64 s[6:7], s[4:5]
+; CGP-NEXT:    s_xor_b64 s[6:7], exec, s[6:7]
+; CGP-NEXT:    s_cbranch_execz BB2_2
+; CGP-NEXT:  ; %bb.1:
+; CGP-NEXT:    v_ashrrev_i32_e32 v0, 31, v5
+; CGP-NEXT:    v_add_i32_e32 v1, vcc, v4, v0
+; CGP-NEXT:    v_addc_u32_e32 v5, vcc, v5, v0, vcc
+; CGP-NEXT:    v_xor_b32_e32 v1, v1, v0
+; CGP-NEXT:    v_xor_b32_e32 v5, v5, v0
+; CGP-NEXT:    v_cvt_f32_u32_e32 v10, v1
+; CGP-NEXT:    v_cvt_f32_u32_e32 v11, v5
+; CGP-NEXT:    v_ashrrev_i32_e32 v12, 31, v9
+; CGP-NEXT:    v_mac_f32_e32 v10, 0x4f800000, v11
+; CGP-NEXT:    v_rcp_iflag_f32_e32 v10, v10
+; CGP-NEXT:    v_add_i32_e32 v11, vcc, v8, v12
+; CGP-NEXT:    v_addc_u32_e32 v9, vcc, v9, v12, vcc
+; CGP-NEXT:    v_sub_i32_e32 v14, vcc, 0, v1
+; CGP-NEXT:    v_mul_f32_e32 v10, 0x5f7ffffc, v10
+; CGP-NEXT:    v_mul_f32_e32 v13, 0x2f800000, v10
+; CGP-NEXT:    v_trunc_f32_e32 v13, v13
+; CGP-NEXT:    v_mac_f32_e32 v10, 0xcf800000, v13
+; CGP-NEXT:    v_cvt_u32_f32_e32 v10, v10
+; CGP-NEXT:    v_cvt_u32_f32_e32 v13, v13
+; CGP-NEXT:    v_subb_u32_e32 v15, vcc, 0, v5, vcc
+; CGP-NEXT:    v_xor_b32_e32 v11, v11, v12
+; CGP-NEXT:    v_mul_lo_u32 v16, v15, v10
+; CGP-NEXT:    v_mul_lo_u32 v17, v14, v13
+; CGP-NEXT:    v_mul_hi_u32 v19, v14, v10
+; CGP-NEXT:    v_mul_lo_u32 v18, v14, v10
+; CGP-NEXT:    v_xor_b32_e32 v9, v9, v12
+; CGP-NEXT:    v_add_i32_e32 v16, vcc, v16, v17
+; CGP-NEXT:    v_add_i32_e32 v16, vcc, v16, v19
+; CGP-NEXT:    v_mul_lo_u32 v17, v13, v18
+; CGP-NEXT:    v_mul_lo_u32 v19, v10, v16
+; CGP-NEXT:    v_mul_hi_u32 v20, v10, v18
+; CGP-NEXT:    v_mul_hi_u32 v18, v13, v18
+; CGP-NEXT:    v_add_i32_e32 v17, vcc, v17, v19
+; CGP-NEXT:    v_cndmask_b32_e64 v19, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v17, vcc, v17, v20
+; CGP-NEXT:    v_cndmask_b32_e64 v17, 0, 1, vcc
+; CGP-NEXT:    v_mul_lo_u32 v20, v13, v16
+; CGP-NEXT:    v_add_i32_e32 v17, vcc, v19, v17
+; CGP-NEXT:    v_mul_hi_u32 v19, v10, v16
+; CGP-NEXT:    v_mul_hi_u32 v16, v13, v16
+; CGP-NEXT:    v_add_i32_e32 v18, vcc, v20, v18
+; CGP-NEXT:    v_cndmask_b32_e64 v20, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v18, vcc, v18, v19
+; CGP-NEXT:    v_cndmask_b32_e64 v19, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v19, vcc, v20, v19
+; CGP-NEXT:    v_add_i32_e32 v17, vcc, v18, v17
+; CGP-NEXT:    v_cndmask_b32_e64 v18, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v18, vcc, v19, v18
+; CGP-NEXT:    v_add_i32_e32 v16, vcc, v16, v18
+; CGP-NEXT:    v_add_i32_e32 v10, vcc, v10, v17
+; CGP-NEXT:    v_addc_u32_e64 v17, s[4:5], v13, v16, vcc
+; CGP-NEXT:    v_mul_lo_u32 v15, v15, v10
+; CGP-NEXT:    v_mul_lo_u32 v18, v14, v17
+; CGP-NEXT:    v_mul_lo_u32 v19, v14, v10
+; CGP-NEXT:    v_mul_hi_u32 v14, v14, v10
+; CGP-NEXT:    v_add_i32_e64 v13, s[4:5], v13, v16
+; CGP-NEXT:    v_add_i32_e64 v15, s[4:5], v15, v18
+; CGP-NEXT:    v_mul_hi_u32 v16, v10, v19
+; CGP-NEXT:    v_add_i32_e64 v14, s[4:5], v15, v14
+; CGP-NEXT:    v_mul_lo_u32 v15, v17, v19
+; CGP-NEXT:    v_mul_lo_u32 v18, v10, v14
+; CGP-NEXT:    v_mul_hi_u32 v19, v17, v19
+; CGP-NEXT:    v_add_i32_e64 v15, s[4:5], v15, v18
+; CGP-NEXT:    v_cndmask_b32_e64 v18, 0, 1, s[4:5]
+; CGP-NEXT:    v_add_i32_e64 v15, s[4:5], v15, v16
+; CGP-NEXT:    v_cndmask_b32_e64 v15, 0, 1, s[4:5]
+; CGP-NEXT:    v_mul_lo_u32 v16, v17, v14
+; CGP-NEXT:    v_add_i32_e64 v15, s[4:5], v18, v15
+; CGP-NEXT:    v_mul_hi_u32 v18, v10, v14
+; CGP-NEXT:    v_mul_hi_u32 v14, v17, v14
+; CGP-NEXT:    v_add_i32_e64 v16, s[4:5], v16, v19
+; CGP-NEXT:    v_cndmask_b32_e64 v19, 0, 1, s[4:5]
+; CGP-NEXT:    v_add_i32_e64 v16, s[4:5], v16, v18
+; CGP-NEXT:    v_cndmask_b32_e64 v18, 0, 1, s[4:5]
+; CGP-NEXT:    v_add_i32_e64 v18, s[4:5], v19, v18
+; CGP-NEXT:    v_add_i32_e64 v15, s[4:5], v16, v15
+; CGP-NEXT:    v_cndmask_b32_e64 v16, 0, 1, s[4:5]
+; CGP-NEXT:    v_add_i32_e64 v16, s[4:5], v18, v16
+; CGP-NEXT:    v_add_i32_e64 v14, s[4:5], v14, v16
+; CGP-NEXT:    v_addc_u32_e32 v13, vcc, v13, v14, vcc
+; CGP-NEXT:    v_add_i32_e32 v10, vcc, v10, v15
+; CGP-NEXT:    v_addc_u32_e32 v13, vcc, 0, v13, vcc
+; CGP-NEXT:    v_mul_lo_u32 v14, v9, v10
+; CGP-NEXT:    v_mul_lo_u32 v15, v11, v13
+; CGP-NEXT:    v_mul_hi_u32 v16, v11, v10
+; CGP-NEXT:    v_mul_hi_u32 v10, v9, v10
+; CGP-NEXT:    v_add_i32_e32 v14, vcc, v14, v15
+; CGP-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v14, vcc, v14, v16
+; CGP-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
+; CGP-NEXT:    v_mul_lo_u32 v16, v9, v13
+; CGP-NEXT:    v_add_i32_e32 v14, vcc, v15, v14
+; CGP-NEXT:    v_mul_hi_u32 v15, v11, v13
+; CGP-NEXT:    v_mul_hi_u32 v13, v9, v13
+; CGP-NEXT:    v_add_i32_e32 v10, vcc, v16, v10
+; CGP-NEXT:    v_cndmask_b32_e64 v16, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v10, vcc, v10, v15
+; CGP-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v15, vcc, v16, v15
+; CGP-NEXT:    v_add_i32_e32 v10, vcc, v10, v14
+; CGP-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v14, vcc, v15, v14
+; CGP-NEXT:    v_add_i32_e32 v13, vcc, v13, v14
+; CGP-NEXT:    v_mul_lo_u32 v14, v5, v10
+; CGP-NEXT:    v_mul_lo_u32 v15, v1, v13
+; CGP-NEXT:    v_mul_hi_u32 v17, v1, v10
+; CGP-NEXT:    v_mul_lo_u32 v16, v1, v10
+; CGP-NEXT:    v_add_i32_e32 v14, vcc, v14, v15
+; CGP-NEXT:    v_add_i32_e32 v14, vcc, v14, v17
+; CGP-NEXT:    v_sub_i32_e32 v11, vcc, v11, v16
+; CGP-NEXT:    v_subb_u32_e64 v15, s[4:5], v9, v14, vcc
+; CGP-NEXT:    v_sub_i32_e64 v9, s[4:5], v9, v14
+; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v15, v5
+; CGP-NEXT:    v_subb_u32_e32 v9, vcc, v9, v5, vcc
+; CGP-NEXT:    v_cndmask_b32_e64 v14, 0, -1, s[4:5]
+; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v11, v1
+; CGP-NEXT:    v_sub_i32_e32 v11, vcc, v11, v1
+; CGP-NEXT:    v_subbrev_u32_e32 v9, vcc, 0, v9, vcc
+; CGP-NEXT:    v_cndmask_b32_e64 v16, 0, -1, s[4:5]
+; CGP-NEXT:    v_cmp_eq_u32_e64 s[4:5], v15, v5
+; CGP-NEXT:    v_add_i32_e32 v15, vcc, 1, v10
+; CGP-NEXT:    v_cndmask_b32_e64 v14, v14, v16, s[4:5]
+; CGP-NEXT:    v_addc_u32_e32 v16, vcc, 0, v13, vcc
+; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v9, v5
+; CGP-NEXT:    v_cndmask_b32_e64 v17, 0, -1, vcc
+; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v11, v1
+; CGP-NEXT:    v_cndmask_b32_e64 v1, 0, -1, vcc
+; CGP-NEXT:    v_cmp_eq_u32_e32 vcc, v9, v5
+; CGP-NEXT:    v_cndmask_b32_e32 v1, v17, v1, vcc
+; CGP-NEXT:    v_add_i32_e32 v5, vcc, 1, v15
+; CGP-NEXT:    v_addc_u32_e32 v9, vcc, 0, v16, vcc
+; CGP-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v1
+; CGP-NEXT:    v_cndmask_b32_e32 v1, v15, v5, vcc
+; CGP-NEXT:    v_cndmask_b32_e32 v5, v16, v9, vcc
+; CGP-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v14
+; CGP-NEXT:    v_cndmask_b32_e32 v1, v10, v1, vcc
+; CGP-NEXT:    v_xor_b32_e32 v9, v12, v0
+; CGP-NEXT:    v_cndmask_b32_e32 v5, v13, v5, vcc
+; CGP-NEXT:    v_xor_b32_e32 v0, v1, v9
+; CGP-NEXT:    v_xor_b32_e32 v1, v5, v9
+; CGP-NEXT:    v_sub_i32_e32 v0, vcc, v0, v9
+; CGP-NEXT:    v_subb_u32_e32 v1, vcc, v1, v9, vcc
+; CGP-NEXT:  BB2_2: ; %Flow2
+; CGP-NEXT:    s_or_saveexec_b64 s[6:7], s[6:7]
+; CGP-NEXT:    s_xor_b64 exec, exec, s[6:7]
+; CGP-NEXT:    s_cbranch_execz BB2_4
+; CGP-NEXT:  ; %bb.3:
+; CGP-NEXT:    v_cvt_f32_u32_e32 v0, v4
+; CGP-NEXT:    v_rcp_iflag_f32_e32 v0, v0
+; CGP-NEXT:    v_mul_f32_e32 v0, 0x4f800000, v0
+; CGP-NEXT:    v_cvt_u32_f32_e32 v0, v0
+; CGP-NEXT:    v_mul_lo_u32 v1, v0, v4
+; CGP-NEXT:    v_mul_hi_u32 v5, v0, v4
+; CGP-NEXT:    v_sub_i32_e32 v9, vcc, 0, v1
+; CGP-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v5
+; CGP-NEXT:    v_cndmask_b32_e32 v1, v1, v9, vcc
+; CGP-NEXT:    v_mul_hi_u32 v1, v1, v0
+; CGP-NEXT:    v_add_i32_e64 v5, s[4:5], v0, v1
+; CGP-NEXT:    v_sub_i32_e64 v0, s[4:5], v0, v1
+; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v5, vcc
+; CGP-NEXT:    v_mul_hi_u32 v0, v0, v8
+; CGP-NEXT:    v_mul_lo_u32 v1, v0, v4
+; CGP-NEXT:    v_add_i32_e32 v5, vcc, 1, v0
+; CGP-NEXT:    v_subrev_i32_e32 v9, vcc, 1, v0
+; CGP-NEXT:    v_sub_i32_e32 v10, vcc, v8, v1
+; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v8, v1
+; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v10, v4
+; CGP-NEXT:    s_and_b64 s[4:5], s[4:5], vcc
+; CGP-NEXT:    v_cndmask_b32_e64 v0, v0, v5, s[4:5]
+; CGP-NEXT:    v_cndmask_b32_e32 v0, v9, v0, vcc
+; CGP-NEXT:    v_mov_b32_e32 v1, 0
+; CGP-NEXT:  BB2_4:
+; CGP-NEXT:    s_or_b64 exec, exec, s[6:7]
+; CGP-NEXT:    v_or_b32_e32 v5, v3, v7
+; CGP-NEXT:    v_mov_b32_e32 v4, 0
+; CGP-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[4:5]
+; CGP-NEXT:    v_cmp_ne_u32_e64 s[4:5], 0, 1
+; CGP-NEXT:    s_xor_b64 s[4:5], vcc, s[4:5]
+; CGP-NEXT:    ; implicit-def: $vgpr4_vgpr5
+; CGP-NEXT:    s_and_saveexec_b64 s[6:7], s[4:5]
+; CGP-NEXT:    s_xor_b64 s[6:7], exec, s[6:7]
+; CGP-NEXT:    s_cbranch_execz BB2_6
+; CGP-NEXT:  ; %bb.5:
+; CGP-NEXT:    v_ashrrev_i32_e32 v4, 31, v7
+; CGP-NEXT:    v_add_i32_e32 v5, vcc, v6, v4
+; CGP-NEXT:    v_addc_u32_e32 v7, vcc, v7, v4, vcc
+; CGP-NEXT:    v_xor_b32_e32 v5, v5, v4
+; CGP-NEXT:    v_xor_b32_e32 v7, v7, v4
+; CGP-NEXT:    v_cvt_f32_u32_e32 v8, v5
+; CGP-NEXT:    v_cvt_f32_u32_e32 v9, v7
+; CGP-NEXT:    v_ashrrev_i32_e32 v10, 31, v3
+; CGP-NEXT:    v_mac_f32_e32 v8, 0x4f800000, v9
+; CGP-NEXT:    v_rcp_iflag_f32_e32 v8, v8
+; CGP-NEXT:    v_add_i32_e32 v9, vcc, v2, v10
+; CGP-NEXT:    v_addc_u32_e32 v3, vcc, v3, v10, vcc
+; CGP-NEXT:    v_sub_i32_e32 v12, vcc, 0, v5
+; CGP-NEXT:    v_mul_f32_e32 v8, 0x5f7ffffc, v8
+; CGP-NEXT:    v_mul_f32_e32 v11, 0x2f800000, v8
+; CGP-NEXT:    v_trunc_f32_e32 v11, v11
+; CGP-NEXT:    v_mac_f32_e32 v8, 0xcf800000, v11
+; CGP-NEXT:    v_cvt_u32_f32_e32 v8, v8
+; CGP-NEXT:    v_cvt_u32_f32_e32 v11, v11
+; CGP-NEXT:    v_subb_u32_e32 v13, vcc, 0, v7, vcc
+; CGP-NEXT:    v_xor_b32_e32 v9, v9, v10
+; CGP-NEXT:    v_mul_lo_u32 v14, v13, v8
+; CGP-NEXT:    v_mul_lo_u32 v15, v12, v11
+; CGP-NEXT:    v_mul_hi_u32 v17, v12, v8
+; CGP-NEXT:    v_mul_lo_u32 v16, v12, v8
+; CGP-NEXT:    v_xor_b32_e32 v3, v3, v10
+; CGP-NEXT:    v_add_i32_e32 v14, vcc, v14, v15
+; CGP-NEXT:    v_add_i32_e32 v14, vcc, v14, v17
+; CGP-NEXT:    v_mul_lo_u32 v15, v11, v16
+; CGP-NEXT:    v_mul_lo_u32 v17, v8, v14
+; CGP-NEXT:    v_mul_hi_u32 v18, v8, v16
+; CGP-NEXT:    v_mul_hi_u32 v16, v11, v16
+; CGP-NEXT:    v_add_i32_e32 v15, vcc, v15, v17
+; CGP-NEXT:    v_cndmask_b32_e64 v17, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v15, vcc, v15, v18
+; CGP-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
+; CGP-NEXT:    v_mul_lo_u32 v18, v11, v14
+; CGP-NEXT:    v_add_i32_e32 v15, vcc, v17, v15
+; CGP-NEXT:    v_mul_hi_u32 v17, v8, v14
+; CGP-NEXT:    v_mul_hi_u32 v14, v11, v14
+; CGP-NEXT:    v_add_i32_e32 v16, vcc, v18, v16
+; CGP-NEXT:    v_cndmask_b32_e64 v18, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v16, vcc, v16, v17
+; CGP-NEXT:    v_cndmask_b32_e64 v17, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v17, vcc, v18, v17
+; CGP-NEXT:    v_add_i32_e32 v15, vcc, v16, v15
+; CGP-NEXT:    v_cndmask_b32_e64 v16, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v16, vcc, v17, v16
+; CGP-NEXT:    v_add_i32_e32 v14, vcc, v14, v16
+; CGP-NEXT:    v_add_i32_e32 v8, vcc, v8, v15
+; CGP-NEXT:    v_addc_u32_e64 v15, s[4:5], v11, v14, vcc
+; CGP-NEXT:    v_mul_lo_u32 v13, v13, v8
+; CGP-NEXT:    v_mul_lo_u32 v16, v12, v15
+; CGP-NEXT:    v_mul_lo_u32 v17, v12, v8
+; CGP-NEXT:    v_mul_hi_u32 v12, v12, v8
+; CGP-NEXT:    v_add_i32_e64 v11, s[4:5], v11, v14
+; CGP-NEXT:    v_add_i32_e64 v13, s[4:5], v13, v16
+; CGP-NEXT:    v_mul_hi_u32 v14, v8, v17
+; CGP-NEXT:    v_add_i32_e64 v12, s[4:5], v13, v12
+; CGP-NEXT:    v_mul_lo_u32 v13, v15, v17
+; CGP-NEXT:    v_mul_lo_u32 v16, v8, v12
+; CGP-NEXT:    v_mul_hi_u32 v17, v15, v17
+; CGP-NEXT:    v_add_i32_e64 v13, s[4:5], v13, v16
+; CGP-NEXT:    v_cndmask_b32_e64 v16, 0, 1, s[4:5]
+; CGP-NEXT:    v_add_i32_e64 v13, s[4:5], v13, v14
+; CGP-NEXT:    v_cndmask_b32_e64 v13, 0, 1, s[4:5]
+; CGP-NEXT:    v_mul_lo_u32 v14, v15, v12
+; CGP-NEXT:    v_add_i32_e64 v13, s[4:5], v16, v13
+; CGP-NEXT:    v_mul_hi_u32 v16, v8, v12
+; CGP-NEXT:    v_mul_hi_u32 v12, v15, v12
+; CGP-NEXT:    v_add_i32_e64 v14, s[4:5], v14, v17
+; CGP-NEXT:    v_cndmask_b32_e64 v17, 0, 1, s[4:5]
+; CGP-NEXT:    v_add_i32_e64 v14, s[4:5], v14, v16
+; CGP-NEXT:    v_cndmask_b32_e64 v16, 0, 1, s[4:5]
+; CGP-NEXT:    v_add_i32_e64 v16, s[4:5], v17, v16
+; CGP-NEXT:    v_add_i32_e64 v13, s[4:5], v14, v13
+; CGP-NEXT:    v_cndmask_b32_e64 v14, 0, 1, s[4:5]
+; CGP-NEXT:    v_add_i32_e64 v14, s[4:5], v16, v14
+; CGP-NEXT:    v_add_i32_e64 v12, s[4:5], v12, v14
+; CGP-NEXT:    v_addc_u32_e32 v11, vcc, v11, v12, vcc
+; CGP-NEXT:    v_add_i32_e32 v8, vcc, v8, v13
+; CGP-NEXT:    v_addc_u32_e32 v11, vcc, 0, v11, vcc
+; CGP-NEXT:    v_mul_lo_u32 v12, v3, v8
+; CGP-NEXT:    v_mul_lo_u32 v13, v9, v11
+; CGP-NEXT:    v_mul_hi_u32 v14, v9, v8
+; CGP-NEXT:    v_mul_hi_u32 v8, v3, v8
+; CGP-NEXT:    v_add_i32_e32 v12, vcc, v12, v13
+; CGP-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v12, vcc, v12, v14
+; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
+; CGP-NEXT:    v_mul_lo_u32 v14, v3, v11
+; CGP-NEXT:    v_add_i32_e32 v12, vcc, v13, v12
+; CGP-NEXT:    v_mul_hi_u32 v13, v9, v11
+; CGP-NEXT:    v_mul_hi_u32 v11, v3, v11
+; CGP-NEXT:    v_add_i32_e32 v8, vcc, v14, v8
+; CGP-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v8, vcc, v8, v13
+; CGP-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v13, vcc, v14, v13
+; CGP-NEXT:    v_add_i32_e32 v8, vcc, v8, v12
+; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v12, vcc, v13, v12
+; CGP-NEXT:    v_add_i32_e32 v11, vcc, v11, v12
+; CGP-NEXT:    v_mul_lo_u32 v12, v7, v8
+; CGP-NEXT:    v_mul_lo_u32 v13, v5, v11
+; CGP-NEXT:    v_mul_hi_u32 v15, v5, v8
+; CGP-NEXT:    v_mul_lo_u32 v14, v5, v8
+; CGP-NEXT:    v_add_i32_e32 v12, vcc, v12, v13
+; CGP-NEXT:    v_add_i32_e32 v12, vcc, v12, v15
+; CGP-NEXT:    v_sub_i32_e32 v9, vcc, v9, v14
+; CGP-NEXT:    v_subb_u32_e64 v13, s[4:5], v3, v12, vcc
+; CGP-NEXT:    v_sub_i32_e64 v3, s[4:5], v3, v12
+; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v13, v7
+; CGP-NEXT:    v_subb_u32_e32 v3, vcc, v3, v7, vcc
+; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, -1, s[4:5]
+; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v9, v5
+; CGP-NEXT:    v_sub_i32_e32 v9, vcc, v9, v5
+; CGP-NEXT:    v_subbrev_u32_e32 v3, vcc, 0, v3, vcc
+; CGP-NEXT:    v_cndmask_b32_e64 v14, 0, -1, s[4:5]
+; CGP-NEXT:    v_cmp_eq_u32_e64 s[4:5], v13, v7
+; CGP-NEXT:    v_add_i32_e32 v13, vcc, 1, v8
+; CGP-NEXT:    v_cndmask_b32_e64 v12, v12, v14, s[4:5]
+; CGP-NEXT:    v_addc_u32_e32 v14, vcc, 0, v11, vcc
+; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v3, v7
+; CGP-NEXT:    v_cndmask_b32_e64 v15, 0, -1, vcc
+; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v9, v5
+; CGP-NEXT:    v_cndmask_b32_e64 v5, 0, -1, vcc
+; CGP-NEXT:    v_cmp_eq_u32_e32 vcc, v3, v7
+; CGP-NEXT:    v_cndmask_b32_e32 v3, v15, v5, vcc
+; CGP-NEXT:    v_add_i32_e32 v5, vcc, 1, v13
+; CGP-NEXT:    v_addc_u32_e32 v7, vcc, 0, v14, vcc
+; CGP-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v3
+; CGP-NEXT:    v_cndmask_b32_e32 v3, v13, v5, vcc
+; CGP-NEXT:    v_cndmask_b32_e32 v5, v14, v7, vcc
+; CGP-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v12
+; CGP-NEXT:    v_cndmask_b32_e32 v3, v8, v3, vcc
+; CGP-NEXT:    v_xor_b32_e32 v7, v10, v4
+; CGP-NEXT:    v_cndmask_b32_e32 v5, v11, v5, vcc
+; CGP-NEXT:    v_xor_b32_e32 v3, v3, v7
+; CGP-NEXT:    v_xor_b32_e32 v5, v5, v7
+; CGP-NEXT:    v_sub_i32_e32 v4, vcc, v3, v7
+; CGP-NEXT:    v_subb_u32_e32 v5, vcc, v5, v7, vcc
+; CGP-NEXT:  BB2_6: ; %Flow
+; CGP-NEXT:    s_or_saveexec_b64 s[6:7], s[6:7]
+; CGP-NEXT:    s_xor_b64 exec, exec, s[6:7]
+; CGP-NEXT:    s_cbranch_execz BB2_8
+; CGP-NEXT:  ; %bb.7:
+; CGP-NEXT:    v_cvt_f32_u32_e32 v3, v6
+; CGP-NEXT:    v_rcp_iflag_f32_e32 v3, v3
+; CGP-NEXT:    v_mul_f32_e32 v3, 0x4f800000, v3
+; CGP-NEXT:    v_cvt_u32_f32_e32 v3, v3
+; CGP-NEXT:    v_mul_lo_u32 v4, v3, v6
+; CGP-NEXT:    v_mul_hi_u32 v5, v3, v6
+; CGP-NEXT:    v_sub_i32_e32 v7, vcc, 0, v4
+; CGP-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v5
+; CGP-NEXT:    v_cndmask_b32_e32 v4, v4, v7, vcc
+; CGP-NEXT:    v_mul_hi_u32 v4, v4, v3
+; CGP-NEXT:    v_add_i32_e64 v5, s[4:5], v3, v4
+; CGP-NEXT:    v_sub_i32_e64 v3, s[4:5], v3, v4
+; CGP-NEXT:    v_cndmask_b32_e32 v3, v3, v5, vcc
+; CGP-NEXT:    v_mul_hi_u32 v3, v3, v2
+; CGP-NEXT:    v_mul_lo_u32 v4, v3, v6
+; CGP-NEXT:    v_add_i32_e32 v5, vcc, 1, v3
+; CGP-NEXT:    v_subrev_i32_e32 v7, vcc, 1, v3
+; CGP-NEXT:    v_sub_i32_e32 v8, vcc, v2, v4
+; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v2, v4
+; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v8, v6
+; CGP-NEXT:    s_and_b64 s[4:5], s[4:5], vcc
+; CGP-NEXT:    v_cndmask_b32_e64 v2, v3, v5, s[4:5]
+; CGP-NEXT:    v_cndmask_b32_e32 v4, v7, v2, vcc
+; CGP-NEXT:    v_mov_b32_e32 v5, 0
+; CGP-NEXT:  BB2_8:
+; CGP-NEXT:    s_or_b64 exec, exec, s[6:7]
+; CGP-NEXT:    v_mov_b32_e32 v2, v4
+; CGP-NEXT:    v_mov_b32_e32 v3, v5
+; CGP-NEXT:    s_setpc_b64 s[30:31]
+  %result = sdiv <2 x i64> %num, %den
+  ret <2 x i64> %result
+}
+
+define i64 @v_sdiv_i64_pow2k_denom(i64 %num) {
+; CHECK-LABEL: v_sdiv_i64_pow2k_denom:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT:    s_movk_i32 s6, 0x1000
+; CHECK-NEXT:    v_cvt_f32_u32_e32 v2, s6
+; CHECK-NEXT:    v_cvt_f32_u32_e32 v3, 0
+; CHECK-NEXT:    s_mov_b32 s7, 0xfffff000
+; CHECK-NEXT:    v_ashrrev_i32_e32 v4, 31, v1
+; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v4
+; CHECK-NEXT:    v_mac_f32_e32 v2, 0x4f800000, v3
+; CHECK-NEXT:    v_rcp_iflag_f32_e32 v2, v2
+; CHECK-NEXT:    v_addc_u32_e32 v1, vcc, v1, v4, vcc
+; CHECK-NEXT:    v_xor_b32_e32 v0, v0, v4
+; CHECK-NEXT:    v_xor_b32_e32 v1, v1, v4
+; CHECK-NEXT:    v_mul_f32_e32 v2, 0x5f7ffffc, v2
+; CHECK-NEXT:    v_mul_f32_e32 v3, 0x2f800000, v2
+; CHECK-NEXT:    v_trunc_f32_e32 v3, v3
+; CHECK-NEXT:    v_mac_f32_e32 v2, 0xcf800000, v3
+; CHECK-NEXT:    v_cvt_u32_f32_e32 v2, v2
+; CHECK-NEXT:    v_cvt_u32_f32_e32 v3, v3
+; CHECK-NEXT:    v_mul_lo_u32 v5, -1, v2
+; CHECK-NEXT:    v_mul_lo_u32 v6, s7, v3
+; CHECK-NEXT:    v_mul_hi_u32 v8, s7, v2
+; CHECK-NEXT:    v_mul_lo_u32 v7, s7, v2
+; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v5, v6
+; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v5, v8
+; CHECK-NEXT:    v_mul_lo_u32 v6, v3, v7
+; CHECK-NEXT:    v_mul_lo_u32 v8, v2, v5
+; CHECK-NEXT:    v_mul_hi_u32 v9, v2, v7
+; CHECK-NEXT:    v_mul_hi_u32 v7, v3, v7
+; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v6, v8
+; CHECK-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v6, v9
+; CHECK-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
+; CHECK-NEXT:    v_mul_lo_u32 v9, v3, v5
+; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v8, v6
+; CHECK-NEXT:    v_mul_hi_u32 v8, v2, v5
+; CHECK-NEXT:    v_mul_hi_u32 v5, v3, v5
+; CHECK-NEXT:    v_add_i32_e32 v7, vcc, v9, v7
+; CHECK-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v7, vcc, v7, v8
+; CHECK-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v8, vcc, v9, v8
+; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v7, v6
+; CHECK-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v7, vcc, v8, v7
+; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v5, v7
+; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v6
+; CHECK-NEXT:    v_addc_u32_e64 v6, s[4:5], v3, v5, vcc
+; CHECK-NEXT:    v_mul_lo_u32 v7, -1, v2
+; CHECK-NEXT:    v_mul_lo_u32 v8, s7, v6
+; CHECK-NEXT:    v_mul_hi_u32 v10, s7, v2
+; CHECK-NEXT:    v_mul_lo_u32 v9, s7, v2
+; CHECK-NEXT:    v_add_i32_e64 v3, s[4:5], v3, v5
+; CHECK-NEXT:    v_add_i32_e64 v7, s[4:5], v7, v8
+; CHECK-NEXT:    v_add_i32_e64 v7, s[4:5], v7, v10
+; CHECK-NEXT:    v_mul_lo_u32 v8, v6, v9
+; CHECK-NEXT:    v_mul_lo_u32 v10, v2, v7
+; CHECK-NEXT:    v_mul_hi_u32 v5, v2, v9
+; CHECK-NEXT:    v_mul_hi_u32 v9, v6, v9
+; CHECK-NEXT:    v_add_i32_e64 v8, s[4:5], v8, v10
+; CHECK-NEXT:    v_cndmask_b32_e64 v10, 0, 1, s[4:5]
+; CHECK-NEXT:    v_add_i32_e64 v5, s[4:5], v8, v5
+; CHECK-NEXT:    v_cndmask_b32_e64 v5, 0, 1, s[4:5]
+; CHECK-NEXT:    v_mul_lo_u32 v8, v6, v7
+; CHECK-NEXT:    v_add_i32_e64 v5, s[4:5], v10, v5
+; CHECK-NEXT:    v_mul_hi_u32 v10, v2, v7
+; CHECK-NEXT:    v_mul_hi_u32 v6, v6, v7
+; CHECK-NEXT:    v_add_i32_e64 v8, s[4:5], v8, v9
+; CHECK-NEXT:    v_cndmask_b32_e64 v9, 0, 1, s[4:5]
+; CHECK-NEXT:    v_add_i32_e64 v8, s[4:5], v8, v10
+; CHECK-NEXT:    v_cndmask_b32_e64 v10, 0, 1, s[4:5]
+; CHECK-NEXT:    v_add_i32_e64 v9, s[4:5], v9, v10
+; CHECK-NEXT:    v_add_i32_e64 v5, s[4:5], v8, v5
+; CHECK-NEXT:    v_cndmask_b32_e64 v8, 0, 1, s[4:5]
+; CHECK-NEXT:    v_add_i32_e64 v7, s[4:5], v9, v8
+; CHECK-NEXT:    v_add_i32_e64 v6, s[4:5], v6, v7
+; CHECK-NEXT:    v_addc_u32_e32 v3, vcc, v3, v6, vcc
+; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v5
+; CHECK-NEXT:    v_addc_u32_e32 v3, vcc, 0, v3, vcc
+; CHECK-NEXT:    v_mul_lo_u32 v5, v1, v2
+; CHECK-NEXT:    v_mul_lo_u32 v6, v0, v3
+; CHECK-NEXT:    v_mul_hi_u32 v7, v0, v2
+; CHECK-NEXT:    v_mul_hi_u32 v2, v1, v2
+; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v5, v6
+; CHECK-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v5, v7
+; CHECK-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
+; CHECK-NEXT:    v_mul_lo_u32 v7, v1, v3
+; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v6, v5
+; CHECK-NEXT:    v_mul_hi_u32 v6, v0, v3
+; CHECK-NEXT:    v_mul_hi_u32 v3, v1, v3
+; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v7, v2
+; CHECK-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v6
+; CHECK-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v7, v6
+; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v5
+; CHECK-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v6, v5
+; CHECK-NEXT:    v_add_i32_e32 v3, vcc, v3, v5
+; CHECK-NEXT:    v_mul_lo_u32 v5, 0, v2
+; CHECK-NEXT:    v_mul_lo_u32 v6, s6, v3
+; CHECK-NEXT:    v_mul_hi_u32 v8, s6, v2
+; CHECK-NEXT:    v_mul_lo_u32 v7, s6, v2
+; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v5, v6
+; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v5, v8
+; CHECK-NEXT:    v_sub_i32_e32 v0, vcc, v0, v7
+; CHECK-NEXT:    v_subb_u32_e64 v6, s[4:5], v1, v5, vcc
+; CHECK-NEXT:    v_sub_i32_e64 v1, s[4:5], v1, v5
+; CHECK-NEXT:    v_cmp_le_u32_e64 s[4:5], 0, v6
+; CHECK-NEXT:    v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
+; CHECK-NEXT:    v_cndmask_b32_e64 v5, 0, -1, s[4:5]
+; CHECK-NEXT:    v_cmp_le_u32_e64 s[4:5], s6, v0
+; CHECK-NEXT:    v_subrev_i32_e32 v0, vcc, s6, v0
+; CHECK-NEXT:    v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
+; CHECK-NEXT:    v_cndmask_b32_e64 v7, 0, -1, s[4:5]
+; CHECK-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v6
+; CHECK-NEXT:    v_add_i32_e32 v6, vcc, 1, v2
+; CHECK-NEXT:    v_cndmask_b32_e64 v5, v5, v7, s[4:5]
+; CHECK-NEXT:    v_addc_u32_e32 v7, vcc, 0, v3, vcc
+; CHECK-NEXT:    v_cmp_le_u32_e32 vcc, 0, v1
+; CHECK-NEXT:    v_cndmask_b32_e64 v8, 0, -1, vcc
+; CHECK-NEXT:    v_cmp_le_u32_e32 vcc, s6, v0
+; CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, -1, vcc
+; CHECK-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v1
+; CHECK-NEXT:    v_cndmask_b32_e32 v0, v8, v0, vcc
+; CHECK-NEXT:    v_add_i32_e32 v1, vcc, 1, v6
+; CHECK-NEXT:    v_addc_u32_e32 v8, vcc, 0, v7, vcc
+; CHECK-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
+; CHECK-NEXT:    v_cndmask_b32_e32 v0, v6, v1, vcc
+; CHECK-NEXT:    v_cndmask_b32_e32 v1, v7, v8, vcc
+; CHECK-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v5
+; CHECK-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
+; CHECK-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
+; CHECK-NEXT:    v_xor_b32_e32 v0, v0, v4
+; CHECK-NEXT:    v_xor_b32_e32 v1, v1, v4
+; CHECK-NEXT:    v_sub_i32_e32 v0, vcc, v0, v4
+; CHECK-NEXT:    v_subb_u32_e32 v1, vcc, v1, v4, vcc
+; CHECK-NEXT:    s_setpc_b64 s[30:31]
+  %result = sdiv i64 %num, 4096
+  ret i64 %result
+}
+
+define <2 x i64> @v_sdiv_v2i64_pow2k_denom(<2 x i64> %num) {
+; GISEL-LABEL: v_sdiv_v2i64_pow2k_denom:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-NEXT:    s_movk_i32 s10, 0x1000
+; GISEL-NEXT:    s_add_u32 s4, s10, 0
+; GISEL-NEXT:    s_cselect_b32 s5, 1, 0
+; GISEL-NEXT:    s_and_b32 s5, s5, 1
+; GISEL-NEXT:    s_cmp_lg_u32 s5, 0
+; GISEL-NEXT:    s_mov_b32 s6, 0
+; GISEL-NEXT:    s_mov_b32 s7, s6
+; GISEL-NEXT:    s_addc_u32 s5, 0, 0
+; GISEL-NEXT:    s_xor_b64 s[8:9], s[4:5], s[6:7]
+; GISEL-NEXT:    v_cvt_f32_u32_e32 v4, s8
+; GISEL-NEXT:    v_cvt_f32_u32_e32 v5, s9
+; GISEL-NEXT:    s_sub_u32 s11, 0, s8
+; GISEL-NEXT:    s_cselect_b32 s4, 1, 0
+; GISEL-NEXT:    s_and_b32 s4, s4, 1
+; GISEL-NEXT:    v_mac_f32_e32 v4, 0x4f800000, v5
+; GISEL-NEXT:    v_rcp_iflag_f32_e32 v4, v4
+; GISEL-NEXT:    s_cmp_lg_u32 s4, 0
+; GISEL-NEXT:    s_subb_u32 s12, 0, s9
+; GISEL-NEXT:    v_ashrrev_i32_e32 v6, 31, v1
+; GISEL-NEXT:    v_mul_f32_e32 v4, 0x5f7ffffc, v4
+; GISEL-NEXT:    v_mul_f32_e32 v5, 0x2f800000, v4
+; GISEL-NEXT:    v_trunc_f32_e32 v5, v5
+; GISEL-NEXT:    v_mac_f32_e32 v4, 0xcf800000, v5
+; GISEL-NEXT:    v_cvt_u32_f32_e32 v4, v4
+; GISEL-NEXT:    v_cvt_u32_f32_e32 v5, v5
+; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v6
+; GISEL-NEXT:    v_addc_u32_e32 v1, vcc, v1, v6, vcc
+; GISEL-NEXT:    v_mul_lo_u32 v7, s12, v4
+; GISEL-NEXT:    v_mul_lo_u32 v8, s11, v5
+; GISEL-NEXT:    v_mul_hi_u32 v10, s11, v4
+; GISEL-NEXT:    v_mul_lo_u32 v9, s11, v4
+; GISEL-NEXT:    v_xor_b32_e32 v0, v0, v6
+; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v7, v8
+; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v7, v10
+; GISEL-NEXT:    v_mul_lo_u32 v8, v5, v9
+; GISEL-NEXT:    v_mul_lo_u32 v10, v4, v7
+; GISEL-NEXT:    v_mul_hi_u32 v11, v4, v9
+; GISEL-NEXT:    v_mul_hi_u32 v9, v5, v9
+; GISEL-NEXT:    v_xor_b32_e32 v1, v1, v6
+; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v10
+; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v11
+; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
+; GISEL-NEXT:    v_mul_lo_u32 v11, v5, v7
+; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v10, v8
+; GISEL-NEXT:    v_mul_hi_u32 v10, v4, v7
+; GISEL-NEXT:    v_mul_hi_u32 v7, v5, v7
+; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v11, v9
+; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v9, v10
+; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v11, v10
+; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v9, v8
+; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v10, v9
+; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v7, v9
+; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v8
+; GISEL-NEXT:    v_addc_u32_e64 v8, s[4:5], v5, v7, vcc
+; GISEL-NEXT:    v_mul_lo_u32 v9, s12, v4
+; GISEL-NEXT:    v_mul_lo_u32 v10, s11, v8
+; GISEL-NEXT:    v_mul_hi_u32 v12, s11, v4
+; GISEL-NEXT:    v_mul_lo_u32 v11, s11, v4
+; GISEL-NEXT:    v_add_i32_e64 v5, s[4:5], v5, v7
+; GISEL-NEXT:    v_add_i32_e64 v9, s[4:5], v9, v10
+; GISEL-NEXT:    v_add_i32_e64 v9, s[4:5], v9, v12
+; GISEL-NEXT:    v_mul_lo_u32 v10, v8, v11
+; GISEL-NEXT:    v_mul_lo_u32 v12, v4, v9
+; GISEL-NEXT:    v_mul_hi_u32 v7, v4, v11
+; GISEL-NEXT:    v_mul_hi_u32 v11, v8, v11
+; GISEL-NEXT:    v_add_i32_e64 v10, s[4:5], v10, v12
+; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v7, s[4:5], v10, v7
+; GISEL-NEXT:    v_cndmask_b32_e64 v7, 0, 1, s[4:5]
+; GISEL-NEXT:    v_mul_lo_u32 v10, v8, v9
+; GISEL-NEXT:    v_add_i32_e64 v7, s[4:5], v12, v7
+; GISEL-NEXT:    v_mul_hi_u32 v12, v4, v9
+; GISEL-NEXT:    v_mul_hi_u32 v8, v8, v9
+; GISEL-NEXT:    v_add_i32_e64 v10, s[4:5], v10, v11
+; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v10, s[4:5], v10, v12
+; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v11, s[4:5], v11, v12
+; GISEL-NEXT:    v_add_i32_e64 v7, s[4:5], v10, v7
+; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, 1, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v9, s[4:5], v11, v10
+; GISEL-NEXT:    v_add_i32_e64 v8, s[4:5], v8, v9
+; GISEL-NEXT:    v_addc_u32_e32 v5, vcc, v5, v8, vcc
+; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v7
+; GISEL-NEXT:    v_addc_u32_e32 v5, vcc, 0, v5, vcc
+; GISEL-NEXT:    v_mul_lo_u32 v7, v1, v4
+; GISEL-NEXT:    v_mul_lo_u32 v8, v0, v5
+; GISEL-NEXT:    v_mul_hi_u32 v9, v0, v4
+; GISEL-NEXT:    v_mul_hi_u32 v4, v1, v4
+; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v7, v8
+; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v7, v9
+; GISEL-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
+; GISEL-NEXT:    v_mul_lo_u32 v9, v1, v5
+; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v8, v7
+; GISEL-NEXT:    v_mul_hi_u32 v8, v0, v5
+; GISEL-NEXT:    v_mul_hi_u32 v5, v1, v5
+; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v9, v4
+; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v8
+; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v9, v8
+; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v7
+; GISEL-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v8, v7
+; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v7
+; GISEL-NEXT:    v_mul_lo_u32 v7, s9, v4
+; GISEL-NEXT:    v_mul_lo_u32 v8, s8, v5
+; GISEL-NEXT:    v_mul_hi_u32 v10, s8, v4
+; GISEL-NEXT:    v_mul_lo_u32 v9, s8, v4
+; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v7, v8
+; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v7, v10
+; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v9
+; GISEL-NEXT:    v_subb_u32_e64 v8, s[4:5], v1, v7, vcc
+; GISEL-NEXT:    v_sub_i32_e64 v1, s[4:5], v1, v7
+; GISEL-NEXT:    v_mov_b32_e32 v7, s9
+; GISEL-NEXT:    v_cmp_le_u32_e64 s[4:5], s9, v8
+; GISEL-NEXT:    v_subb_u32_e32 v1, vcc, v1, v7, vcc
+; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, -1, s[4:5]
+; GISEL-NEXT:    v_cmp_le_u32_e64 s[4:5], s8, v0
+; GISEL-NEXT:    v_subrev_i32_e32 v0, vcc, s8, v0
+; GISEL-NEXT:    v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
+; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, -1, s[4:5]
+; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], s9, v8
+; GISEL-NEXT:    v_add_i32_e32 v7, vcc, 1, v4
+; GISEL-NEXT:    v_cndmask_b32_e64 v8, v9, v10, s[4:5]
+; GISEL-NEXT:    v_addc_u32_e32 v9, vcc, 0, v5, vcc
+; GISEL-NEXT:    v_cmp_le_u32_e32 vcc, s9, v1
+; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, -1, vcc
+; GISEL-NEXT:    v_cmp_le_u32_e32 vcc, s8, v0
+; GISEL-NEXT:    s_add_u32 s4, s10, 0
+; GISEL-NEXT:    s_cselect_b32 s5, 1, 0
+; GISEL-NEXT:    v_cndmask_b32_e64 v0, 0, -1, vcc
+; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, s9, v1
+; GISEL-NEXT:    s_and_b32 s5, s5, 1
+; GISEL-NEXT:    v_cndmask_b32_e32 v0, v10, v0, vcc
+; GISEL-NEXT:    v_add_i32_e32 v1, vcc, 1, v7
+; GISEL-NEXT:    s_cmp_lg_u32 s5, 0
+; GISEL-NEXT:    v_addc_u32_e32 v10, vcc, 0, v9, vcc
+; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
+; GISEL-NEXT:    s_addc_u32 s5, 0, 0
+; GISEL-NEXT:    v_cndmask_b32_e32 v0, v7, v1, vcc
+; GISEL-NEXT:    s_xor_b64 s[6:7], s[4:5], s[6:7]
+; GISEL-NEXT:    v_cndmask_b32_e32 v1, v9, v10, vcc
+; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v8
+; GISEL-NEXT:    v_cndmask_b32_e32 v0, v4, v0, vcc
+; GISEL-NEXT:    v_cndmask_b32_e32 v1, v5, v1, vcc
+; GISEL-NEXT:    v_cvt_f32_u32_e32 v4, s6
+; GISEL-NEXT:    v_cvt_f32_u32_e32 v5, s7
+; GISEL-NEXT:    s_sub_u32 s8, 0, s6
+; GISEL-NEXT:    s_cselect_b32 s4, 1, 0
+; GISEL-NEXT:    s_and_b32 s4, s4, 1
+; GISEL-NEXT:    v_mac_f32_e32 v4, 0x4f800000, v5
+; GISEL-NEXT:    v_rcp_iflag_f32_e32 v4, v4
+; GISEL-NEXT:    s_cmp_lg_u32 s4, 0
+; GISEL-NEXT:    s_subb_u32 s9, 0, s7
+; GISEL-NEXT:    v_ashrrev_i32_e32 v7, 31, v3
+; GISEL-NEXT:    v_mul_f32_e32 v4, 0x5f7ffffc, v4
+; GISEL-NEXT:    v_mul_f32_e32 v5, 0x2f800000, v4
+; GISEL-NEXT:    v_trunc_f32_e32 v5, v5
+; GISEL-NEXT:    v_mac_f32_e32 v4, 0xcf800000, v5
+; GISEL-NEXT:    v_cvt_u32_f32_e32 v4, v4
+; GISEL-NEXT:    v_cvt_u32_f32_e32 v5, v5
+; GISEL-NEXT:    v_add_i32_e32 v2, vcc, v2, v7
+; GISEL-NEXT:    v_addc_u32_e32 v3, vcc, v3, v7, vcc
+; GISEL-NEXT:    v_mul_lo_u32 v8, s9, v4
+; GISEL-NEXT:    v_mul_lo_u32 v9, s8, v5
+; GISEL-NEXT:    v_mul_hi_u32 v11, s8, v4
+; GISEL-NEXT:    v_mul_lo_u32 v10, s8, v4
+; GISEL-NEXT:    v_xor_b32_e32 v0, v0, v6
+; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v9
+; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v11
+; GISEL-NEXT:    v_mul_lo_u32 v9, v5, v10
+; GISEL-NEXT:    v_mul_lo_u32 v11, v4, v8
+; GISEL-NEXT:    v_mul_hi_u32 v12, v4, v10
+; GISEL-NEXT:    v_mul_hi_u32 v10, v5, v10
+; GISEL-NEXT:    v_xor_b32_e32 v2, v2, v7
+; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v9, v11
+; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v9, v12
+; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
+; GISEL-NEXT:    v_mul_lo_u32 v12, v5, v8
+; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v11, v9
+; GISEL-NEXT:    v_mul_hi_u32 v11, v4, v8
+; GISEL-NEXT:    v_mul_hi_u32 v8, v5, v8
+; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v12, v10
+; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v10, v11
+; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v12, v11
+; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v10, v9
+; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v11, v10
+; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v10
+; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v9
+; GISEL-NEXT:    v_addc_u32_e64 v9, s[4:5], v5, v8, vcc
+; GISEL-NEXT:    v_mul_lo_u32 v10, s9, v4
+; GISEL-NEXT:    v_mul_lo_u32 v11, s8, v9
+; GISEL-NEXT:    v_mul_hi_u32 v13, s8, v4
+; GISEL-NEXT:    v_mul_lo_u32 v12, s8, v4
+; GISEL-NEXT:    v_add_i32_e64 v5, s[4:5], v5, v8
+; GISEL-NEXT:    v_add_i32_e64 v10, s[4:5], v10, v11
+; GISEL-NEXT:    v_add_i32_e64 v10, s[4:5], v10, v13
+; GISEL-NEXT:    v_mul_lo_u32 v11, v9, v12
+; GISEL-NEXT:    v_mul_lo_u32 v13, v4, v10
+; GISEL-NEXT:    v_mul_hi_u32 v8, v4, v12
+; GISEL-NEXT:    v_mul_hi_u32 v12, v9, v12
+; GISEL-NEXT:    v_xor_b32_e32 v3, v3, v7
+; GISEL-NEXT:    v_add_i32_e64 v11, s[4:5], v11, v13
+; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, 1, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v8, s[4:5], v11, v8
+; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, 1, s[4:5]
+; GISEL-NEXT:    v_mul_lo_u32 v11, v9, v10
+; GISEL-NEXT:    v_add_i32_e64 v8, s[4:5], v13, v8
+; GISEL-NEXT:    v_mul_hi_u32 v13, v4, v10
+; GISEL-NEXT:    v_mul_hi_u32 v9, v9, v10
+; GISEL-NEXT:    v_add_i32_e64 v11, s[4:5], v11, v12
+; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v11, s[4:5], v11, v13
+; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, 1, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v12, s[4:5], v12, v13
+; GISEL-NEXT:    v_add_i32_e64 v8, s[4:5], v11, v8
+; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v10, s[4:5], v12, v11
+; GISEL-NEXT:    v_add_i32_e64 v9, s[4:5], v9, v10
+; GISEL-NEXT:    v_addc_u32_e32 v5, vcc, v5, v9, vcc
+; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v8
+; GISEL-NEXT:    v_addc_u32_e32 v5, vcc, 0, v5, vcc
+; GISEL-NEXT:    v_xor_b32_e32 v1, v1, v6
+; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v6
+; GISEL-NEXT:    v_mul_lo_u32 v8, v3, v4
+; GISEL-NEXT:    v_mul_lo_u32 v9, v2, v5
+; GISEL-NEXT:    v_subb_u32_e32 v1, vcc, v1, v6, vcc
+; GISEL-NEXT:    v_mul_hi_u32 v6, v2, v4
+; GISEL-NEXT:    v_mul_hi_u32 v4, v3, v4
+; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v9
+; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v8, v6
+; GISEL-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
+; GISEL-NEXT:    v_mul_lo_u32 v8, v3, v5
+; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v9, v6
+; GISEL-NEXT:    v_mul_hi_u32 v9, v2, v5
+; GISEL-NEXT:    v_mul_hi_u32 v5, v3, v5
+; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v8, v4
+; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v9
+; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v9
+; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v6
+; GISEL-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v8, v6
+; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v6
+; GISEL-NEXT:    v_mul_lo_u32 v6, s7, v4
+; GISEL-NEXT:    v_mul_lo_u32 v8, s6, v5
+; GISEL-NEXT:    v_mul_hi_u32 v10, s6, v4
+; GISEL-NEXT:    v_mul_lo_u32 v9, s6, v4
+; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v6, v8
+; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v6, v10
+; GISEL-NEXT:    v_sub_i32_e32 v2, vcc, v2, v9
+; GISEL-NEXT:    v_subb_u32_e64 v8, s[4:5], v3, v6, vcc
+; GISEL-NEXT:    v_sub_i32_e64 v3, s[4:5], v3, v6
+; GISEL-NEXT:    v_mov_b32_e32 v6, s7
+; GISEL-NEXT:    v_cmp_le_u32_e64 s[4:5], s7, v8
+; GISEL-NEXT:    v_subb_u32_e32 v3, vcc, v3, v6, vcc
+; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, -1, s[4:5]
+; GISEL-NEXT:    v_cmp_le_u32_e64 s[4:5], s6, v2
+; GISEL-NEXT:    v_subrev_i32_e32 v2, vcc, s6, v2
+; GISEL-NEXT:    v_subbrev_u32_e32 v3, vcc, 0, v3, vcc
+; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, -1, s[4:5]
+; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], s7, v8
+; GISEL-NEXT:    v_add_i32_e32 v6, vcc, 1, v4
+; GISEL-NEXT:    v_cndmask_b32_e64 v8, v9, v10, s[4:5]
+; GISEL-NEXT:    v_addc_u32_e32 v9, vcc, 0, v5, vcc
+; GISEL-NEXT:    v_cmp_le_u32_e32 vcc, s7, v3
+; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, -1, vcc
+; GISEL-NEXT:    v_cmp_le_u32_e32 vcc, s6, v2
+; GISEL-NEXT:    v_cndmask_b32_e64 v2, 0, -1, vcc
+; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, s7, v3
+; GISEL-NEXT:    v_cndmask_b32_e32 v2, v10, v2, vcc
+; GISEL-NEXT:    v_add_i32_e32 v3, vcc, 1, v6
+; GISEL-NEXT:    v_addc_u32_e32 v10, vcc, 0, v9, vcc
+; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v2
+; GISEL-NEXT:    v_cndmask_b32_e32 v2, v6, v3, vcc
+; GISEL-NEXT:    v_cndmask_b32_e32 v3, v9, v10, vcc
+; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v8
+; GISEL-NEXT:    v_cndmask_b32_e32 v2, v4, v2, vcc
+; GISEL-NEXT:    v_cndmask_b32_e32 v3, v5, v3, vcc
+; GISEL-NEXT:    v_xor_b32_e32 v2, v2, v7
+; GISEL-NEXT:    v_xor_b32_e32 v3, v3, v7
+; GISEL-NEXT:    v_sub_i32_e32 v2, vcc, v2, v7
+; GISEL-NEXT:    v_subb_u32_e32 v3, vcc, v3, v7, vcc
+; GISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; CGP-LABEL: v_sdiv_v2i64_pow2k_denom:
+; CGP:       ; %bb.0:
+; CGP-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CGP-NEXT:    s_movk_i32 s6, 0x1000
+; CGP-NEXT:    v_cvt_f32_u32_e32 v4, s6
+; CGP-NEXT:    v_cvt_f32_u32_e32 v5, 0
+; CGP-NEXT:    s_mov_b32 s7, 0xfffff000
+; CGP-NEXT:    v_ashrrev_i32_e32 v6, 31, v1
+; CGP-NEXT:    v_mov_b32_e32 v7, v4
+; CGP-NEXT:    v_mac_f32_e32 v7, 0x4f800000, v5
+; CGP-NEXT:    v_rcp_iflag_f32_e32 v7, v7
+; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v6
+; CGP-NEXT:    v_addc_u32_e32 v1, vcc, v1, v6, vcc
+; CGP-NEXT:    v_xor_b32_e32 v0, v0, v6
+; CGP-NEXT:    v_mul_f32_e32 v7, 0x5f7ffffc, v7
+; CGP-NEXT:    v_mul_f32_e32 v8, 0x2f800000, v7
+; CGP-NEXT:    v_trunc_f32_e32 v8, v8
+; CGP-NEXT:    v_mac_f32_e32 v7, 0xcf800000, v8
+; CGP-NEXT:    v_cvt_u32_f32_e32 v7, v7
+; CGP-NEXT:    v_cvt_u32_f32_e32 v8, v8
+; CGP-NEXT:    v_xor_b32_e32 v1, v1, v6
+; CGP-NEXT:    v_mac_f32_e32 v4, 0x4f800000, v5
+; CGP-NEXT:    v_mul_lo_u32 v9, -1, v7
+; CGP-NEXT:    v_mul_lo_u32 v10, s7, v8
+; CGP-NEXT:    v_mul_hi_u32 v12, s7, v7
+; CGP-NEXT:    v_mul_lo_u32 v11, s7, v7
+; CGP-NEXT:    v_rcp_iflag_f32_e32 v4, v4
+; CGP-NEXT:    v_add_i32_e32 v9, vcc, v9, v10
+; CGP-NEXT:    v_add_i32_e32 v9, vcc, v9, v12
+; CGP-NEXT:    v_mul_lo_u32 v10, v8, v11
+; CGP-NEXT:    v_mul_lo_u32 v12, v7, v9
+; CGP-NEXT:    v_mul_hi_u32 v13, v7, v11
+; CGP-NEXT:    v_mul_hi_u32 v11, v8, v11
+; CGP-NEXT:    v_mul_f32_e32 v4, 0x5f7ffffc, v4
+; CGP-NEXT:    v_add_i32_e32 v10, vcc, v10, v12
+; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v10, vcc, v10, v13
+; CGP-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
+; CGP-NEXT:    v_mul_lo_u32 v13, v8, v9
+; CGP-NEXT:    v_add_i32_e32 v10, vcc, v12, v10
+; CGP-NEXT:    v_mul_hi_u32 v12, v7, v9
+; CGP-NEXT:    v_mul_hi_u32 v9, v8, v9
+; CGP-NEXT:    v_add_i32_e32 v11, vcc, v13, v11
+; CGP-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v11, vcc, v11, v12
+; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v12, vcc, v13, v12
+; CGP-NEXT:    v_add_i32_e32 v10, vcc, v11, v10
+; CGP-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v11, vcc, v12, v11
+; CGP-NEXT:    v_add_i32_e32 v9, vcc, v9, v11
+; CGP-NEXT:    v_add_i32_e32 v7, vcc, v7, v10
+; CGP-NEXT:    v_addc_u32_e64 v10, s[4:5], v8, v9, vcc
+; CGP-NEXT:    v_mul_lo_u32 v11, -1, v7
+; CGP-NEXT:    v_mul_lo_u32 v12, s7, v10
+; CGP-NEXT:    v_mul_hi_u32 v14, s7, v7
+; CGP-NEXT:    v_mul_lo_u32 v13, s7, v7
+; CGP-NEXT:    v_add_i32_e64 v8, s[4:5], v8, v9
+; CGP-NEXT:    v_add_i32_e64 v11, s[4:5], v11, v12
+; CGP-NEXT:    v_add_i32_e64 v11, s[4:5], v11, v14
+; CGP-NEXT:    v_mul_lo_u32 v12, v10, v13
+; CGP-NEXT:    v_mul_lo_u32 v14, v7, v11
+; CGP-NEXT:    v_mul_hi_u32 v9, v7, v13
+; CGP-NEXT:    v_mul_hi_u32 v13, v10, v13
+; CGP-NEXT:    v_ashrrev_i32_e32 v5, 31, v3
+; CGP-NEXT:    v_add_i32_e64 v12, s[4:5], v12, v14
+; CGP-NEXT:    v_cndmask_b32_e64 v14, 0, 1, s[4:5]
+; CGP-NEXT:    v_add_i32_e64 v9, s[4:5], v12, v9
+; CGP-NEXT:    v_cndmask_b32_e64 v9, 0, 1, s[4:5]
+; CGP-NEXT:    v_mul_lo_u32 v12, v10, v11
+; CGP-NEXT:    v_add_i32_e64 v9, s[4:5], v14, v9
+; CGP-NEXT:    v_mul_hi_u32 v14, v7, v11
+; CGP-NEXT:    v_mul_hi_u32 v10, v10, v11
+; CGP-NEXT:    v_add_i32_e64 v12, s[4:5], v12, v13
+; CGP-NEXT:    v_cndmask_b32_e64 v13, 0, 1, s[4:5]
+; CGP-NEXT:    v_add_i32_e64 v12, s[4:5], v12, v14
+; CGP-NEXT:    v_cndmask_b32_e64 v14, 0, 1, s[4:5]
+; CGP-NEXT:    v_add_i32_e64 v13, s[4:5], v13, v14
+; CGP-NEXT:    v_add_i32_e64 v9, s[4:5], v12, v9
+; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, 1, s[4:5]
+; CGP-NEXT:    v_add_i32_e64 v11, s[4:5], v13, v12
+; CGP-NEXT:    v_add_i32_e64 v10, s[4:5], v10, v11
+; CGP-NEXT:    v_addc_u32_e32 v8, vcc, v8, v10, vcc
+; CGP-NEXT:    v_add_i32_e32 v7, vcc, v7, v9
+; CGP-NEXT:    v_addc_u32_e32 v8, vcc, 0, v8, vcc
+; CGP-NEXT:    v_mul_lo_u32 v9, v1, v7
+; CGP-NEXT:    v_mul_lo_u32 v10, v0, v8
+; CGP-NEXT:    v_mul_hi_u32 v11, v0, v7
+; CGP-NEXT:    v_mul_hi_u32 v7, v1, v7
+; CGP-NEXT:    v_add_i32_e32 v9, vcc, v9, v10
+; CGP-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v9, vcc, v9, v11
+; CGP-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
+; CGP-NEXT:    v_mul_lo_u32 v11, v1, v8
+; CGP-NEXT:    v_add_i32_e32 v9, vcc, v10, v9
+; CGP-NEXT:    v_mul_hi_u32 v10, v0, v8
+; CGP-NEXT:    v_mul_hi_u32 v8, v1, v8
+; CGP-NEXT:    v_add_i32_e32 v7, vcc, v11, v7
+; CGP-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v7, vcc, v7, v10
+; CGP-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v10, vcc, v11, v10
+; CGP-NEXT:    v_add_i32_e32 v7, vcc, v7, v9
+; CGP-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v9, vcc, v10, v9
+; CGP-NEXT:    v_add_i32_e32 v8, vcc, v8, v9
+; CGP-NEXT:    v_mul_lo_u32 v9, 0, v7
+; CGP-NEXT:    v_mul_lo_u32 v10, s6, v8
+; CGP-NEXT:    v_mul_hi_u32 v12, s6, v7
+; CGP-NEXT:    v_mul_lo_u32 v11, s6, v7
+; CGP-NEXT:    v_add_i32_e32 v9, vcc, v9, v10
+; CGP-NEXT:    v_add_i32_e32 v9, vcc, v9, v12
+; CGP-NEXT:    v_sub_i32_e32 v0, vcc, v0, v11
+; CGP-NEXT:    v_subb_u32_e64 v10, s[4:5], v1, v9, vcc
+; CGP-NEXT:    v_sub_i32_e64 v1, s[4:5], v1, v9
+; CGP-NEXT:    v_cmp_le_u32_e64 s[4:5], 0, v10
+; CGP-NEXT:    v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
+; CGP-NEXT:    v_cndmask_b32_e64 v9, 0, -1, s[4:5]
+; CGP-NEXT:    v_cmp_le_u32_e64 s[4:5], s6, v0
+; CGP-NEXT:    v_subrev_i32_e32 v0, vcc, s6, v0
+; CGP-NEXT:    v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
+; CGP-NEXT:    v_cndmask_b32_e64 v11, 0, -1, s[4:5]
+; CGP-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v10
+; CGP-NEXT:    v_add_i32_e32 v10, vcc, 1, v7
+; CGP-NEXT:    v_cndmask_b32_e64 v9, v9, v11, s[4:5]
+; CGP-NEXT:    v_addc_u32_e32 v11, vcc, 0, v8, vcc
+; CGP-NEXT:    v_cmp_le_u32_e32 vcc, 0, v1
+; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, -1, vcc
+; CGP-NEXT:    v_cmp_le_u32_e32 vcc, s6, v0
+; CGP-NEXT:    v_cndmask_b32_e64 v0, 0, -1, vcc
+; CGP-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v1
+; CGP-NEXT:    v_cndmask_b32_e32 v0, v12, v0, vcc
+; CGP-NEXT:    v_add_i32_e32 v1, vcc, 1, v10
+; CGP-NEXT:    v_addc_u32_e32 v12, vcc, 0, v11, vcc
+; CGP-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
+; CGP-NEXT:    v_cndmask_b32_e32 v0, v10, v1, vcc
+; CGP-NEXT:    v_cndmask_b32_e32 v1, v11, v12, vcc
+; CGP-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v9
+; CGP-NEXT:    v_cndmask_b32_e32 v0, v7, v0, vcc
+; CGP-NEXT:    v_mul_f32_e32 v7, 0x2f800000, v4
+; CGP-NEXT:    v_trunc_f32_e32 v7, v7
+; CGP-NEXT:    v_mac_f32_e32 v4, 0xcf800000, v7
+; CGP-NEXT:    v_cvt_u32_f32_e32 v4, v4
+; CGP-NEXT:    v_cvt_u32_f32_e32 v7, v7
+; CGP-NEXT:    v_cndmask_b32_e32 v1, v8, v1, vcc
+; CGP-NEXT:    v_add_i32_e32 v2, vcc, v2, v5
+; CGP-NEXT:    v_mul_lo_u32 v8, -1, v4
+; CGP-NEXT:    v_mul_lo_u32 v9, s7, v7
+; CGP-NEXT:    v_mul_hi_u32 v11, s7, v4
+; CGP-NEXT:    v_mul_lo_u32 v10, s7, v4
+; CGP-NEXT:    v_addc_u32_e32 v3, vcc, v3, v5, vcc
+; CGP-NEXT:    v_add_i32_e32 v8, vcc, v8, v9
+; CGP-NEXT:    v_add_i32_e32 v8, vcc, v8, v11
+; CGP-NEXT:    v_mul_lo_u32 v9, v7, v10
+; CGP-NEXT:    v_mul_lo_u32 v11, v4, v8
+; CGP-NEXT:    v_mul_hi_u32 v12, v4, v10
+; CGP-NEXT:    v_mul_hi_u32 v10, v7, v10
+; CGP-NEXT:    v_xor_b32_e32 v0, v0, v6
+; CGP-NEXT:    v_add_i32_e32 v9, vcc, v9, v11
+; CGP-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v9, vcc, v9, v12
+; CGP-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
+; CGP-NEXT:    v_mul_lo_u32 v12, v7, v8
+; CGP-NEXT:    v_add_i32_e32 v9, vcc, v11, v9
+; CGP-NEXT:    v_mul_hi_u32 v11, v4, v8
+; CGP-NEXT:    v_mul_hi_u32 v8, v7, v8
+; CGP-NEXT:    v_add_i32_e32 v10, vcc, v12, v10
+; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v10, vcc, v10, v11
+; CGP-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v11, vcc, v12, v11
+; CGP-NEXT:    v_add_i32_e32 v9, vcc, v10, v9
+; CGP-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v10, vcc, v11, v10
+; CGP-NEXT:    v_add_i32_e32 v8, vcc, v8, v10
+; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v9
+; CGP-NEXT:    v_addc_u32_e64 v9, s[4:5], v7, v8, vcc
+; CGP-NEXT:    v_mul_lo_u32 v10, -1, v4
+; CGP-NEXT:    v_mul_lo_u32 v11, s7, v9
+; CGP-NEXT:    v_mul_hi_u32 v13, s7, v4
+; CGP-NEXT:    v_mul_lo_u32 v12, s7, v4
+; CGP-NEXT:    v_add_i32_e64 v7, s[4:5], v7, v8
+; CGP-NEXT:    v_add_i32_e64 v10, s[4:5], v10, v11
+; CGP-NEXT:    v_add_i32_e64 v10, s[4:5], v10, v13
+; CGP-NEXT:    v_mul_lo_u32 v11, v9, v12
+; CGP-NEXT:    v_mul_lo_u32 v13, v4, v10
+; CGP-NEXT:    v_mul_hi_u32 v8, v4, v12
+; CGP-NEXT:    v_mul_hi_u32 v12, v9, v12
+; CGP-NEXT:    v_xor_b32_e32 v2, v2, v5
+; CGP-NEXT:    v_add_i32_e64 v11, s[4:5], v11, v13
+; CGP-NEXT:    v_cndmask_b32_e64 v13, 0, 1, s[4:5]
+; CGP-NEXT:    v_add_i32_e64 v8, s[4:5], v11, v8
+; CGP-NEXT:    v_cndmask_b32_e64 v8, 0, 1, s[4:5]
+; CGP-NEXT:    v_mul_lo_u32 v11, v9, v10
+; CGP-NEXT:    v_add_i32_e64 v8, s[4:5], v13, v8
+; CGP-NEXT:    v_mul_hi_u32 v13, v4, v10
+; CGP-NEXT:    v_mul_hi_u32 v9, v9, v10
+; CGP-NEXT:    v_add_i32_e64 v11, s[4:5], v11, v12
+; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, 1, s[4:5]
+; CGP-NEXT:    v_add_i32_e64 v11, s[4:5], v11, v13
+; CGP-NEXT:    v_cndmask_b32_e64 v13, 0, 1, s[4:5]
+; CGP-NEXT:    v_add_i32_e64 v12, s[4:5], v12, v13
+; CGP-NEXT:    v_add_i32_e64 v8, s[4:5], v11, v8
+; CGP-NEXT:    v_cndmask_b32_e64 v11, 0, 1, s[4:5]
+; CGP-NEXT:    v_add_i32_e64 v10, s[4:5], v12, v11
+; CGP-NEXT:    v_add_i32_e64 v9, s[4:5], v9, v10
+; CGP-NEXT:    v_addc_u32_e32 v7, vcc, v7, v9, vcc
+; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v8
+; CGP-NEXT:    v_addc_u32_e32 v7, vcc, 0, v7, vcc
+; CGP-NEXT:    v_xor_b32_e32 v3, v3, v5
+; CGP-NEXT:    v_xor_b32_e32 v1, v1, v6
+; CGP-NEXT:    v_sub_i32_e32 v0, vcc, v0, v6
+; CGP-NEXT:    v_mul_lo_u32 v8, v3, v4
+; CGP-NEXT:    v_mul_lo_u32 v9, v2, v7
+; CGP-NEXT:    v_subb_u32_e32 v1, vcc, v1, v6, vcc
+; CGP-NEXT:    v_mul_hi_u32 v6, v2, v4
+; CGP-NEXT:    v_mul_hi_u32 v4, v3, v4
+; CGP-NEXT:    v_add_i32_e32 v8, vcc, v8, v9
+; CGP-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v6, vcc, v8, v6
+; CGP-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
+; CGP-NEXT:    v_mul_lo_u32 v8, v3, v7
+; CGP-NEXT:    v_add_i32_e32 v6, vcc, v9, v6
+; CGP-NEXT:    v_mul_hi_u32 v9, v2, v7
+; CGP-NEXT:    v_mul_hi_u32 v7, v3, v7
+; CGP-NEXT:    v_add_i32_e32 v4, vcc, v8, v4
+; CGP-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v9
+; CGP-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v8, vcc, v8, v9
+; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v6
+; CGP-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v6, vcc, v8, v6
+; CGP-NEXT:    v_add_i32_e32 v6, vcc, v7, v6
+; CGP-NEXT:    v_mul_lo_u32 v7, 0, v4
+; CGP-NEXT:    v_mul_lo_u32 v8, s6, v6
+; CGP-NEXT:    v_mul_hi_u32 v10, s6, v4
+; CGP-NEXT:    v_mul_lo_u32 v9, s6, v4
+; CGP-NEXT:    v_add_i32_e32 v7, vcc, v7, v8
+; CGP-NEXT:    v_add_i32_e32 v7, vcc, v7, v10
+; CGP-NEXT:    v_sub_i32_e32 v2, vcc, v2, v9
+; CGP-NEXT:    v_subb_u32_e64 v8, s[4:5], v3, v7, vcc
+; CGP-NEXT:    v_sub_i32_e64 v3, s[4:5], v3, v7
+; CGP-NEXT:    v_cmp_le_u32_e64 s[4:5], 0, v8
+; CGP-NEXT:    v_subbrev_u32_e32 v3, vcc, 0, v3, vcc
+; CGP-NEXT:    v_cndmask_b32_e64 v7, 0, -1, s[4:5]
+; CGP-NEXT:    v_cmp_le_u32_e64 s[4:5], s6, v2
+; CGP-NEXT:    v_subrev_i32_e32 v2, vcc, s6, v2
+; CGP-NEXT:    v_subbrev_u32_e32 v3, vcc, 0, v3, vcc
+; CGP-NEXT:    v_cndmask_b32_e64 v9, 0, -1, s[4:5]
+; CGP-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v8
+; CGP-NEXT:    v_add_i32_e32 v8, vcc, 1, v4
+; CGP-NEXT:    v_cndmask_b32_e64 v7, v7, v9, s[4:5]
+; CGP-NEXT:    v_addc_u32_e32 v9, vcc, 0, v6, vcc
+; CGP-NEXT:    v_cmp_le_u32_e32 vcc, 0, v3
+; CGP-NEXT:    v_cndmask_b32_e64 v10, 0, -1, vcc
+; CGP-NEXT:    v_cmp_le_u32_e32 vcc, s6, v2
+; CGP-NEXT:    v_cndmask_b32_e64 v2, 0, -1, vcc
+; CGP-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v3
+; CGP-NEXT:    v_cndmask_b32_e32 v2, v10, v2, vcc
+; CGP-NEXT:    v_add_i32_e32 v3, vcc, 1, v8
+; CGP-NEXT:    v_addc_u32_e32 v10, vcc, 0, v9, vcc
+; CGP-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v2
+; CGP-NEXT:    v_cndmask_b32_e32 v2, v8, v3, vcc
+; CGP-NEXT:    v_cndmask_b32_e32 v3, v9, v10, vcc
+; CGP-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v7
+; CGP-NEXT:    v_cndmask_b32_e32 v2, v4, v2, vcc
+; CGP-NEXT:    v_cndmask_b32_e32 v3, v6, v3, vcc
+; CGP-NEXT:    v_xor_b32_e32 v2, v2, v5
+; CGP-NEXT:    v_xor_b32_e32 v3, v3, v5
+; CGP-NEXT:    v_sub_i32_e32 v2, vcc, v2, v5
+; CGP-NEXT:    v_subb_u32_e32 v3, vcc, v3, v5, vcc
+; CGP-NEXT:    s_setpc_b64 s[30:31]
+  %result = sdiv <2 x i64> %num, <i64 4096, i64 4096>
+  ret <2 x i64> %result
+}
+
+define i64 @v_sdiv_i64_oddk_denom(i64 %num) {
+; CHECK-LABEL: v_sdiv_i64_oddk_denom:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT:    s_mov_b32 s6, 0x12d8fb
+; CHECK-NEXT:    v_cvt_f32_u32_e32 v2, s6
+; CHECK-NEXT:    v_cvt_f32_u32_e32 v3, 0
+; CHECK-NEXT:    s_mov_b32 s7, 0xffed2705
+; CHECK-NEXT:    v_ashrrev_i32_e32 v4, 31, v1
+; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v4
+; CHECK-NEXT:    v_mac_f32_e32 v2, 0x4f800000, v3
+; CHECK-NEXT:    v_rcp_iflag_f32_e32 v2, v2
+; CHECK-NEXT:    v_addc_u32_e32 v1, vcc, v1, v4, vcc
+; CHECK-NEXT:    v_xor_b32_e32 v0, v0, v4
+; CHECK-NEXT:    v_xor_b32_e32 v1, v1, v4
+; CHECK-NEXT:    v_mul_f32_e32 v2, 0x5f7ffffc, v2
+; CHECK-NEXT:    v_mul_f32_e32 v3, 0x2f800000, v2
+; CHECK-NEXT:    v_trunc_f32_e32 v3, v3
+; CHECK-NEXT:    v_mac_f32_e32 v2, 0xcf800000, v3
+; CHECK-NEXT:    v_cvt_u32_f32_e32 v2, v2
+; CHECK-NEXT:    v_cvt_u32_f32_e32 v3, v3
+; CHECK-NEXT:    v_mul_lo_u32 v5, -1, v2
+; CHECK-NEXT:    v_mul_lo_u32 v6, s7, v3
+; CHECK-NEXT:    v_mul_hi_u32 v8, s7, v2
+; CHECK-NEXT:    v_mul_lo_u32 v7, s7, v2
+; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v5, v6
+; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v5, v8
+; CHECK-NEXT:    v_mul_lo_u32 v6, v3, v7
+; CHECK-NEXT:    v_mul_lo_u32 v8, v2, v5
+; CHECK-NEXT:    v_mul_hi_u32 v9, v2, v7
+; CHECK-NEXT:    v_mul_hi_u32 v7, v3, v7
+; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v6, v8
+; CHECK-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v6, v9
+; CHECK-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
+; CHECK-NEXT:    v_mul_lo_u32 v9, v3, v5
+; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v8, v6
+; CHECK-NEXT:    v_mul_hi_u32 v8, v2, v5
+; CHECK-NEXT:    v_mul_hi_u32 v5, v3, v5
+; CHECK-NEXT:    v_add_i32_e32 v7, vcc, v9, v7
+; CHECK-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v7, vcc, v7, v8
+; CHECK-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v8, vcc, v9, v8
+; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v7, v6
+; CHECK-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v7, vcc, v8, v7
+; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v5, v7
+; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v6
+; CHECK-NEXT:    v_addc_u32_e64 v6, s[4:5], v3, v5, vcc
+; CHECK-NEXT:    v_mul_lo_u32 v7, -1, v2
+; CHECK-NEXT:    v_mul_lo_u32 v8, s7, v6
+; CHECK-NEXT:    v_mul_hi_u32 v10, s7, v2
+; CHECK-NEXT:    v_mul_lo_u32 v9, s7, v2
+; CHECK-NEXT:    v_add_i32_e64 v3, s[4:5], v3, v5
+; CHECK-NEXT:    v_add_i32_e64 v7, s[4:5], v7, v8
+; CHECK-NEXT:    v_add_i32_e64 v7, s[4:5], v7, v10
+; CHECK-NEXT:    v_mul_lo_u32 v8, v6, v9
+; CHECK-NEXT:    v_mul_lo_u32 v10, v2, v7
+; CHECK-NEXT:    v_mul_hi_u32 v5, v2, v9
+; CHECK-NEXT:    v_mul_hi_u32 v9, v6, v9
+; CHECK-NEXT:    v_add_i32_e64 v8, s[4:5], v8, v10
+; CHECK-NEXT:    v_cndmask_b32_e64 v10, 0, 1, s[4:5]
+; CHECK-NEXT:    v_add_i32_e64 v5, s[4:5], v8, v5
+; CHECK-NEXT:    v_cndmask_b32_e64 v5, 0, 1, s[4:5]
+; CHECK-NEXT:    v_mul_lo_u32 v8, v6, v7
+; CHECK-NEXT:    v_add_i32_e64 v5, s[4:5], v10, v5
+; CHECK-NEXT:    v_mul_hi_u32 v10, v2, v7
+; CHECK-NEXT:    v_mul_hi_u32 v6, v6, v7
+; CHECK-NEXT:    v_add_i32_e64 v8, s[4:5], v8, v9
+; CHECK-NEXT:    v_cndmask_b32_e64 v9, 0, 1, s[4:5]
+; CHECK-NEXT:    v_add_i32_e64 v8, s[4:5], v8, v10
+; CHECK-NEXT:    v_cndmask_b32_e64 v10, 0, 1, s[4:5]
+; CHECK-NEXT:    v_add_i32_e64 v9, s[4:5], v9, v10
+; CHECK-NEXT:    v_add_i32_e64 v5, s[4:5], v8, v5
+; CHECK-NEXT:    v_cndmask_b32_e64 v8, 0, 1, s[4:5]
+; CHECK-NEXT:    v_add_i32_e64 v7, s[4:5], v9, v8
+; CHECK-NEXT:    v_add_i32_e64 v6, s[4:5], v6, v7
+; CHECK-NEXT:    v_addc_u32_e32 v3, vcc, v3, v6, vcc
+; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v5
+; CHECK-NEXT:    v_addc_u32_e32 v3, vcc, 0, v3, vcc
+; CHECK-NEXT:    v_mul_lo_u32 v5, v1, v2
+; CHECK-NEXT:    v_mul_lo_u32 v6, v0, v3
+; CHECK-NEXT:    v_mul_hi_u32 v7, v0, v2
+; CHECK-NEXT:    v_mul_hi_u32 v2, v1, v2
+; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v5, v6
+; CHECK-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v5, v7
+; CHECK-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
+; CHECK-NEXT:    v_mul_lo_u32 v7, v1, v3
+; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v6, v5
+; CHECK-NEXT:    v_mul_hi_u32 v6, v0, v3
+; CHECK-NEXT:    v_mul_hi_u32 v3, v1, v3
+; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v7, v2
+; CHECK-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v6
+; CHECK-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v7, v6
+; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v5
+; CHECK-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v6, v5
+; CHECK-NEXT:    v_add_i32_e32 v3, vcc, v3, v5
+; CHECK-NEXT:    v_mul_lo_u32 v5, 0, v2
+; CHECK-NEXT:    v_mul_lo_u32 v6, s6, v3
+; CHECK-NEXT:    v_mul_hi_u32 v8, s6, v2
+; CHECK-NEXT:    v_mul_lo_u32 v7, s6, v2
+; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v5, v6
+; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v5, v8
+; CHECK-NEXT:    v_sub_i32_e32 v0, vcc, v0, v7
+; CHECK-NEXT:    v_subb_u32_e64 v6, s[4:5], v1, v5, vcc
+; CHECK-NEXT:    v_sub_i32_e64 v1, s[4:5], v1, v5
+; CHECK-NEXT:    v_cmp_le_u32_e64 s[4:5], 0, v6
+; CHECK-NEXT:    v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
+; CHECK-NEXT:    v_cndmask_b32_e64 v5, 0, -1, s[4:5]
+; CHECK-NEXT:    v_cmp_le_u32_e64 s[4:5], s6, v0
+; CHECK-NEXT:    v_subrev_i32_e32 v0, vcc, s6, v0
+; CHECK-NEXT:    v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
+; CHECK-NEXT:    v_cndmask_b32_e64 v7, 0, -1, s[4:5]
+; CHECK-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v6
+; CHECK-NEXT:    v_add_i32_e32 v6, vcc, 1, v2
+; CHECK-NEXT:    v_cndmask_b32_e64 v5, v5, v7, s[4:5]
+; CHECK-NEXT:    v_addc_u32_e32 v7, vcc, 0, v3, vcc
+; CHECK-NEXT:    v_cmp_le_u32_e32 vcc, 0, v1
+; CHECK-NEXT:    v_cndmask_b32_e64 v8, 0, -1, vcc
+; CHECK-NEXT:    v_cmp_le_u32_e32 vcc, s6, v0
+; CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, -1, vcc
+; CHECK-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v1
+; CHECK-NEXT:    v_cndmask_b32_e32 v0, v8, v0, vcc
+; CHECK-NEXT:    v_add_i32_e32 v1, vcc, 1, v6
+; CHECK-NEXT:    v_addc_u32_e32 v8, vcc, 0, v7, vcc
+; CHECK-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
+; CHECK-NEXT:    v_cndmask_b32_e32 v0, v6, v1, vcc
+; CHECK-NEXT:    v_cndmask_b32_e32 v1, v7, v8, vcc
+; CHECK-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v5
+; CHECK-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
+; CHECK-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
+; CHECK-NEXT:    v_xor_b32_e32 v0, v0, v4
+; CHECK-NEXT:    v_xor_b32_e32 v1, v1, v4
+; CHECK-NEXT:    v_sub_i32_e32 v0, vcc, v0, v4
+; CHECK-NEXT:    v_subb_u32_e32 v1, vcc, v1, v4, vcc
+; CHECK-NEXT:    s_setpc_b64 s[30:31]
+  %result = sdiv i64 %num, 1235195
+  ret i64 %result
+}
+
+define <2 x i64> @v_sdiv_v2i64_oddk_denom(<2 x i64> %num) {
+; GISEL-LABEL: v_sdiv_v2i64_oddk_denom:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-NEXT:    s_mov_b32 s10, 0x12d8fb
+; GISEL-NEXT:    s_add_u32 s4, s10, 0
+; GISEL-NEXT:    s_cselect_b32 s5, 1, 0
+; GISEL-NEXT:    s_and_b32 s5, s5, 1
+; GISEL-NEXT:    s_cmp_lg_u32 s5, 0
+; GISEL-NEXT:    s_mov_b32 s6, 0
+; GISEL-NEXT:    s_mov_b32 s7, s6
+; GISEL-NEXT:    s_addc_u32 s5, 0, 0
+; GISEL-NEXT:    s_xor_b64 s[8:9], s[4:5], s[6:7]
+; GISEL-NEXT:    v_cvt_f32_u32_e32 v4, s8
+; GISEL-NEXT:    v_cvt_f32_u32_e32 v5, s9
+; GISEL-NEXT:    s_sub_u32 s11, 0, s8
+; GISEL-NEXT:    s_cselect_b32 s4, 1, 0
+; GISEL-NEXT:    s_and_b32 s4, s4, 1
+; GISEL-NEXT:    v_mac_f32_e32 v4, 0x4f800000, v5
+; GISEL-NEXT:    v_rcp_iflag_f32_e32 v4, v4
+; GISEL-NEXT:    s_cmp_lg_u32 s4, 0
+; GISEL-NEXT:    s_subb_u32 s12, 0, s9
+; GISEL-NEXT:    v_ashrrev_i32_e32 v6, 31, v1
+; GISEL-NEXT:    v_mul_f32_e32 v4, 0x5f7ffffc, v4
+; GISEL-NEXT:    v_mul_f32_e32 v5, 0x2f800000, v4
+; GISEL-NEXT:    v_trunc_f32_e32 v5, v5
+; GISEL-NEXT:    v_mac_f32_e32 v4, 0xcf800000, v5
+; GISEL-NEXT:    v_cvt_u32_f32_e32 v4, v4
+; GISEL-NEXT:    v_cvt_u32_f32_e32 v5, v5
+; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v6
+; GISEL-NEXT:    v_addc_u32_e32 v1, vcc, v1, v6, vcc
+; GISEL-NEXT:    v_mul_lo_u32 v7, s12, v4
+; GISEL-NEXT:    v_mul_lo_u32 v8, s11, v5
+; GISEL-NEXT:    v_mul_hi_u32 v10, s11, v4
+; GISEL-NEXT:    v_mul_lo_u32 v9, s11, v4
+; GISEL-NEXT:    v_xor_b32_e32 v0, v0, v6
+; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v7, v8
+; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v7, v10
+; GISEL-NEXT:    v_mul_lo_u32 v8, v5, v9
+; GISEL-NEXT:    v_mul_lo_u32 v10, v4, v7
+; GISEL-NEXT:    v_mul_hi_u32 v11, v4, v9
+; GISEL-NEXT:    v_mul_hi_u32 v9, v5, v9
+; GISEL-NEXT:    v_xor_b32_e32 v1, v1, v6
+; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v10
+; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v11
+; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
+; GISEL-NEXT:    v_mul_lo_u32 v11, v5, v7
+; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v10, v8
+; GISEL-NEXT:    v_mul_hi_u32 v10, v4, v7
+; GISEL-NEXT:    v_mul_hi_u32 v7, v5, v7
+; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v11, v9
+; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v9, v10
+; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v11, v10
+; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v9, v8
+; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v10, v9
+; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v7, v9
+; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v8
+; GISEL-NEXT:    v_addc_u32_e64 v8, s[4:5], v5, v7, vcc
+; GISEL-NEXT:    v_mul_lo_u32 v9, s12, v4
+; GISEL-NEXT:    v_mul_lo_u32 v10, s11, v8
+; GISEL-NEXT:    v_mul_hi_u32 v12, s11, v4
+; GISEL-NEXT:    v_mul_lo_u32 v11, s11, v4
+; GISEL-NEXT:    v_add_i32_e64 v5, s[4:5], v5, v7
+; GISEL-NEXT:    v_add_i32_e64 v9, s[4:5], v9, v10
+; GISEL-NEXT:    v_add_i32_e64 v9, s[4:5], v9, v12
+; GISEL-NEXT:    v_mul_lo_u32 v10, v8, v11
+; GISEL-NEXT:    v_mul_lo_u32 v12, v4, v9
+; GISEL-NEXT:    v_mul_hi_u32 v7, v4, v11
+; GISEL-NEXT:    v_mul_hi_u32 v11, v8, v11
+; GISEL-NEXT:    v_add_i32_e64 v10, s[4:5], v10, v12
+; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v7, s[4:5], v10, v7
+; GISEL-NEXT:    v_cndmask_b32_e64 v7, 0, 1, s[4:5]
+; GISEL-NEXT:    v_mul_lo_u32 v10, v8, v9
+; GISEL-NEXT:    v_add_i32_e64 v7, s[4:5], v12, v7
+; GISEL-NEXT:    v_mul_hi_u32 v12, v4, v9
+; GISEL-NEXT:    v_mul_hi_u32 v8, v8, v9
+; GISEL-NEXT:    v_add_i32_e64 v10, s[4:5], v10, v11
+; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v10, s[4:5], v10, v12
+; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v11, s[4:5], v11, v12
+; GISEL-NEXT:    v_add_i32_e64 v7, s[4:5], v10, v7
+; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, 1, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v9, s[4:5], v11, v10
+; GISEL-NEXT:    v_add_i32_e64 v8, s[4:5], v8, v9
+; GISEL-NEXT:    v_addc_u32_e32 v5, vcc, v5, v8, vcc
+; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v7
+; GISEL-NEXT:    v_addc_u32_e32 v5, vcc, 0, v5, vcc
+; GISEL-NEXT:    v_mul_lo_u32 v7, v1, v4
+; GISEL-NEXT:    v_mul_lo_u32 v8, v0, v5
+; GISEL-NEXT:    v_mul_hi_u32 v9, v0, v4
+; GISEL-NEXT:    v_mul_hi_u32 v4, v1, v4
+; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v7, v8
+; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v7, v9
+; GISEL-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
+; GISEL-NEXT:    v_mul_lo_u32 v9, v1, v5
+; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v8, v7
+; GISEL-NEXT:    v_mul_hi_u32 v8, v0, v5
+; GISEL-NEXT:    v_mul_hi_u32 v5, v1, v5
+; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v9, v4
+; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v8
+; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v9, v8
+; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v7
+; GISEL-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v8, v7
+; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v7
+; GISEL-NEXT:    v_mul_lo_u32 v7, s9, v4
+; GISEL-NEXT:    v_mul_lo_u32 v8, s8, v5
+; GISEL-NEXT:    v_mul_hi_u32 v10, s8, v4
+; GISEL-NEXT:    v_mul_lo_u32 v9, s8, v4
+; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v7, v8
+; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v7, v10
+; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v9
+; GISEL-NEXT:    v_subb_u32_e64 v8, s[4:5], v1, v7, vcc
+; GISEL-NEXT:    v_sub_i32_e64 v1, s[4:5], v1, v7
+; GISEL-NEXT:    v_mov_b32_e32 v7, s9
+; GISEL-NEXT:    v_cmp_le_u32_e64 s[4:5], s9, v8
+; GISEL-NEXT:    v_subb_u32_e32 v1, vcc, v1, v7, vcc
+; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, -1, s[4:5]
+; GISEL-NEXT:    v_cmp_le_u32_e64 s[4:5], s8, v0
+; GISEL-NEXT:    v_subrev_i32_e32 v0, vcc, s8, v0
+; GISEL-NEXT:    v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
+; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, -1, s[4:5]
+; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], s9, v8
+; GISEL-NEXT:    v_add_i32_e32 v7, vcc, 1, v4
+; GISEL-NEXT:    v_cndmask_b32_e64 v8, v9, v10, s[4:5]
+; GISEL-NEXT:    v_addc_u32_e32 v9, vcc, 0, v5, vcc
+; GISEL-NEXT:    v_cmp_le_u32_e32 vcc, s9, v1
+; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, -1, vcc
+; GISEL-NEXT:    v_cmp_le_u32_e32 vcc, s8, v0
+; GISEL-NEXT:    s_add_u32 s4, s10, 0
+; GISEL-NEXT:    s_cselect_b32 s5, 1, 0
+; GISEL-NEXT:    v_cndmask_b32_e64 v0, 0, -1, vcc
+; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, s9, v1
+; GISEL-NEXT:    s_and_b32 s5, s5, 1
+; GISEL-NEXT:    v_cndmask_b32_e32 v0, v10, v0, vcc
+; GISEL-NEXT:    v_add_i32_e32 v1, vcc, 1, v7
+; GISEL-NEXT:    s_cmp_lg_u32 s5, 0
+; GISEL-NEXT:    v_addc_u32_e32 v10, vcc, 0, v9, vcc
+; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
+; GISEL-NEXT:    s_addc_u32 s5, 0, 0
+; GISEL-NEXT:    v_cndmask_b32_e32 v0, v7, v1, vcc
+; GISEL-NEXT:    s_xor_b64 s[6:7], s[4:5], s[6:7]
+; GISEL-NEXT:    v_cndmask_b32_e32 v1, v9, v10, vcc
+; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v8
+; GISEL-NEXT:    v_cndmask_b32_e32 v0, v4, v0, vcc
+; GISEL-NEXT:    v_cndmask_b32_e32 v1, v5, v1, vcc
+; GISEL-NEXT:    v_cvt_f32_u32_e32 v4, s6
+; GISEL-NEXT:    v_cvt_f32_u32_e32 v5, s7
+; GISEL-NEXT:    s_sub_u32 s8, 0, s6
+; GISEL-NEXT:    s_cselect_b32 s4, 1, 0
+; GISEL-NEXT:    s_and_b32 s4, s4, 1
+; GISEL-NEXT:    v_mac_f32_e32 v4, 0x4f800000, v5
+; GISEL-NEXT:    v_rcp_iflag_f32_e32 v4, v4
+; GISEL-NEXT:    s_cmp_lg_u32 s4, 0
+; GISEL-NEXT:    s_subb_u32 s9, 0, s7
+; GISEL-NEXT:    v_ashrrev_i32_e32 v7, 31, v3
+; GISEL-NEXT:    v_mul_f32_e32 v4, 0x5f7ffffc, v4
+; GISEL-NEXT:    v_mul_f32_e32 v5, 0x2f800000, v4
+; GISEL-NEXT:    v_trunc_f32_e32 v5, v5
+; GISEL-NEXT:    v_mac_f32_e32 v4, 0xcf800000, v5
+; GISEL-NEXT:    v_cvt_u32_f32_e32 v4, v4
+; GISEL-NEXT:    v_cvt_u32_f32_e32 v5, v5
+; GISEL-NEXT:    v_add_i32_e32 v2, vcc, v2, v7
+; GISEL-NEXT:    v_addc_u32_e32 v3, vcc, v3, v7, vcc
+; GISEL-NEXT:    v_mul_lo_u32 v8, s9, v4
+; GISEL-NEXT:    v_mul_lo_u32 v9, s8, v5
+; GISEL-NEXT:    v_mul_hi_u32 v11, s8, v4
+; GISEL-NEXT:    v_mul_lo_u32 v10, s8, v4
+; GISEL-NEXT:    v_xor_b32_e32 v0, v0, v6
+; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v9
+; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v11
+; GISEL-NEXT:    v_mul_lo_u32 v9, v5, v10
+; GISEL-NEXT:    v_mul_lo_u32 v11, v4, v8
+; GISEL-NEXT:    v_mul_hi_u32 v12, v4, v10
+; GISEL-NEXT:    v_mul_hi_u32 v10, v5, v10
+; GISEL-NEXT:    v_xor_b32_e32 v2, v2, v7
+; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v9, v11
+; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v9, v12
+; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
+; GISEL-NEXT:    v_mul_lo_u32 v12, v5, v8
+; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v11, v9
+; GISEL-NEXT:    v_mul_hi_u32 v11, v4, v8
+; GISEL-NEXT:    v_mul_hi_u32 v8, v5, v8
+; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v12, v10
+; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v10, v11
+; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v12, v11
+; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v10, v9
+; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v11, v10
+; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v10
+; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v9
+; GISEL-NEXT:    v_addc_u32_e64 v9, s[4:5], v5, v8, vcc
+; GISEL-NEXT:    v_mul_lo_u32 v10, s9, v4
+; GISEL-NEXT:    v_mul_lo_u32 v11, s8, v9
+; GISEL-NEXT:    v_mul_hi_u32 v13, s8, v4
+; GISEL-NEXT:    v_mul_lo_u32 v12, s8, v4
+; GISEL-NEXT:    v_add_i32_e64 v5, s[4:5], v5, v8
+; GISEL-NEXT:    v_add_i32_e64 v10, s[4:5], v10, v11
+; GISEL-NEXT:    v_add_i32_e64 v10, s[4:5], v10, v13
+; GISEL-NEXT:    v_mul_lo_u32 v11, v9, v12
+; GISEL-NEXT:    v_mul_lo_u32 v13, v4, v10
+; GISEL-NEXT:    v_mul_hi_u32 v8, v4, v12
+; GISEL-NEXT:    v_mul_hi_u32 v12, v9, v12
+; GISEL-NEXT:    v_xor_b32_e32 v3, v3, v7
+; GISEL-NEXT:    v_add_i32_e64 v11, s[4:5], v11, v13
+; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, 1, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v8, s[4:5], v11, v8
+; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, 1, s[4:5]
+; GISEL-NEXT:    v_mul_lo_u32 v11, v9, v10
+; GISEL-NEXT:    v_add_i32_e64 v8, s[4:5], v13, v8
+; GISEL-NEXT:    v_mul_hi_u32 v13, v4, v10
+; GISEL-NEXT:    v_mul_hi_u32 v9, v9, v10
+; GISEL-NEXT:    v_add_i32_e64 v11, s[4:5], v11, v12
+; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v11, s[4:5], v11, v13
+; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, 1, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v12, s[4:5], v12, v13
+; GISEL-NEXT:    v_add_i32_e64 v8, s[4:5], v11, v8
+; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v10, s[4:5], v12, v11
+; GISEL-NEXT:    v_add_i32_e64 v9, s[4:5], v9, v10
+; GISEL-NEXT:    v_addc_u32_e32 v5, vcc, v5, v9, vcc
+; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v8
+; GISEL-NEXT:    v_addc_u32_e32 v5, vcc, 0, v5, vcc
+; GISEL-NEXT:    v_xor_b32_e32 v1, v1, v6
+; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v6
+; GISEL-NEXT:    v_mul_lo_u32 v8, v3, v4
+; GISEL-NEXT:    v_mul_lo_u32 v9, v2, v5
+; GISEL-NEXT:    v_subb_u32_e32 v1, vcc, v1, v6, vcc
+; GISEL-NEXT:    v_mul_hi_u32 v6, v2, v4
+; GISEL-NEXT:    v_mul_hi_u32 v4, v3, v4
+; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v9
+; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v8, v6
+; GISEL-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
+; GISEL-NEXT:    v_mul_lo_u32 v8, v3, v5
+; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v9, v6
+; GISEL-NEXT:    v_mul_hi_u32 v9, v2, v5
+; GISEL-NEXT:    v_mul_hi_u32 v5, v3, v5
+; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v8, v4
+; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v9
+; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v9
+; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v6
+; GISEL-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v8, v6
+; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v6
+; GISEL-NEXT:    v_mul_lo_u32 v6, s7, v4
+; GISEL-NEXT:    v_mul_lo_u32 v8, s6, v5
+; GISEL-NEXT:    v_mul_hi_u32 v10, s6, v4
+; GISEL-NEXT:    v_mul_lo_u32 v9, s6, v4
+; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v6, v8
+; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v6, v10
+; GISEL-NEXT:    v_sub_i32_e32 v2, vcc, v2, v9
+; GISEL-NEXT:    v_subb_u32_e64 v8, s[4:5], v3, v6, vcc
+; GISEL-NEXT:    v_sub_i32_e64 v3, s[4:5], v3, v6
+; GISEL-NEXT:    v_mov_b32_e32 v6, s7
+; GISEL-NEXT:    v_cmp_le_u32_e64 s[4:5], s7, v8
+; GISEL-NEXT:    v_subb_u32_e32 v3, vcc, v3, v6, vcc
+; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, -1, s[4:5]
+; GISEL-NEXT:    v_cmp_le_u32_e64 s[4:5], s6, v2
+; GISEL-NEXT:    v_subrev_i32_e32 v2, vcc, s6, v2
+; GISEL-NEXT:    v_subbrev_u32_e32 v3, vcc, 0, v3, vcc
+; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, -1, s[4:5]
+; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], s7, v8
+; GISEL-NEXT:    v_add_i32_e32 v6, vcc, 1, v4
+; GISEL-NEXT:    v_cndmask_b32_e64 v8, v9, v10, s[4:5]
+; GISEL-NEXT:    v_addc_u32_e32 v9, vcc, 0, v5, vcc
+; GISEL-NEXT:    v_cmp_le_u32_e32 vcc, s7, v3
+; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, -1, vcc
+; GISEL-NEXT:    v_cmp_le_u32_e32 vcc, s6, v2
+; GISEL-NEXT:    v_cndmask_b32_e64 v2, 0, -1, vcc
+; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, s7, v3
+; GISEL-NEXT:    v_cndmask_b32_e32 v2, v10, v2, vcc
+; GISEL-NEXT:    v_add_i32_e32 v3, vcc, 1, v6
+; GISEL-NEXT:    v_addc_u32_e32 v10, vcc, 0, v9, vcc
+; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v2
+; GISEL-NEXT:    v_cndmask_b32_e32 v2, v6, v3, vcc
+; GISEL-NEXT:    v_cndmask_b32_e32 v3, v9, v10, vcc
+; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v8
+; GISEL-NEXT:    v_cndmask_b32_e32 v2, v4, v2, vcc
+; GISEL-NEXT:    v_cndmask_b32_e32 v3, v5, v3, vcc
+; GISEL-NEXT:    v_xor_b32_e32 v2, v2, v7
+; GISEL-NEXT:    v_xor_b32_e32 v3, v3, v7
+; GISEL-NEXT:    v_sub_i32_e32 v2, vcc, v2, v7
+; GISEL-NEXT:    v_subb_u32_e32 v3, vcc, v3, v7, vcc
+; GISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; CGP-LABEL: v_sdiv_v2i64_oddk_denom:
+; CGP:       ; %bb.0:
+; CGP-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CGP-NEXT:    s_mov_b32 s6, 0x12d8fb
+; CGP-NEXT:    v_cvt_f32_u32_e32 v4, s6
+; CGP-NEXT:    v_cvt_f32_u32_e32 v5, 0
+; CGP-NEXT:    s_mov_b32 s7, 0xffed2705
+; CGP-NEXT:    v_ashrrev_i32_e32 v6, 31, v1
+; CGP-NEXT:    v_mov_b32_e32 v7, v4
+; CGP-NEXT:    v_mac_f32_e32 v7, 0x4f800000, v5
+; CGP-NEXT:    v_rcp_iflag_f32_e32 v7, v7
+; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v6
+; CGP-NEXT:    v_addc_u32_e32 v1, vcc, v1, v6, vcc
+; CGP-NEXT:    v_xor_b32_e32 v0, v0, v6
+; CGP-NEXT:    v_mul_f32_e32 v7, 0x5f7ffffc, v7
+; CGP-NEXT:    v_mul_f32_e32 v8, 0x2f800000, v7
+; CGP-NEXT:    v_trunc_f32_e32 v8, v8
+; CGP-NEXT:    v_mac_f32_e32 v7, 0xcf800000, v8
+; CGP-NEXT:    v_cvt_u32_f32_e32 v7, v7
+; CGP-NEXT:    v_cvt_u32_f32_e32 v8, v8
+; CGP-NEXT:    v_xor_b32_e32 v1, v1, v6
+; CGP-NEXT:    v_mac_f32_e32 v4, 0x4f800000, v5
+; CGP-NEXT:    v_mul_lo_u32 v9, -1, v7
+; CGP-NEXT:    v_mul_lo_u32 v10, s7, v8
+; CGP-NEXT:    v_mul_hi_u32 v12, s7, v7
+; CGP-NEXT:    v_mul_lo_u32 v11, s7, v7
+; CGP-NEXT:    v_rcp_iflag_f32_e32 v4, v4
+; CGP-NEXT:    v_add_i32_e32 v9, vcc, v9, v10
+; CGP-NEXT:    v_add_i32_e32 v9, vcc, v9, v12
+; CGP-NEXT:    v_mul_lo_u32 v10, v8, v11
+; CGP-NEXT:    v_mul_lo_u32 v12, v7, v9
+; CGP-NEXT:    v_mul_hi_u32 v13, v7, v11
+; CGP-NEXT:    v_mul_hi_u32 v11, v8, v11
+; CGP-NEXT:    v_mul_f32_e32 v4, 0x5f7ffffc, v4
+; CGP-NEXT:    v_add_i32_e32 v10, vcc, v10, v12
+; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v10, vcc, v10, v13
+; CGP-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
+; CGP-NEXT:    v_mul_lo_u32 v13, v8, v9
+; CGP-NEXT:    v_add_i32_e32 v10, vcc, v12, v10
+; CGP-NEXT:    v_mul_hi_u32 v12, v7, v9
+; CGP-NEXT:    v_mul_hi_u32 v9, v8, v9
+; CGP-NEXT:    v_add_i32_e32 v11, vcc, v13, v11
+; CGP-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v11, vcc, v11, v12
+; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v12, vcc, v13, v12
+; CGP-NEXT:    v_add_i32_e32 v10, vcc, v11, v10
+; CGP-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v11, vcc, v12, v11
+; CGP-NEXT:    v_add_i32_e32 v9, vcc, v9, v11
+; CGP-NEXT:    v_add_i32_e32 v7, vcc, v7, v10
+; CGP-NEXT:    v_addc_u32_e64 v10, s[4:5], v8, v9, vcc
+; CGP-NEXT:    v_mul_lo_u32 v11, -1, v7
+; CGP-NEXT:    v_mul_lo_u32 v12, s7, v10
+; CGP-NEXT:    v_mul_hi_u32 v14, s7, v7
+; CGP-NEXT:    v_mul_lo_u32 v13, s7, v7
+; CGP-NEXT:    v_add_i32_e64 v8, s[4:5], v8, v9
+; CGP-NEXT:    v_add_i32_e64 v11, s[4:5], v11, v12
+; CGP-NEXT:    v_add_i32_e64 v11, s[4:5], v11, v14
+; CGP-NEXT:    v_mul_lo_u32 v12, v10, v13
+; CGP-NEXT:    v_mul_lo_u32 v14, v7, v11
+; CGP-NEXT:    v_mul_hi_u32 v9, v7, v13
+; CGP-NEXT:    v_mul_hi_u32 v13, v10, v13
+; CGP-NEXT:    v_ashrrev_i32_e32 v5, 31, v3
+; CGP-NEXT:    v_add_i32_e64 v12, s[4:5], v12, v14
+; CGP-NEXT:    v_cndmask_b32_e64 v14, 0, 1, s[4:5]
+; CGP-NEXT:    v_add_i32_e64 v9, s[4:5], v12, v9
+; CGP-NEXT:    v_cndmask_b32_e64 v9, 0, 1, s[4:5]
+; CGP-NEXT:    v_mul_lo_u32 v12, v10, v11
+; CGP-NEXT:    v_add_i32_e64 v9, s[4:5], v14, v9
+; CGP-NEXT:    v_mul_hi_u32 v14, v7, v11
+; CGP-NEXT:    v_mul_hi_u32 v10, v10, v11
+; CGP-NEXT:    v_add_i32_e64 v12, s[4:5], v12, v13
+; CGP-NEXT:    v_cndmask_b32_e64 v13, 0, 1, s[4:5]
+; CGP-NEXT:    v_add_i32_e64 v12, s[4:5], v12, v14
+; CGP-NEXT:    v_cndmask_b32_e64 v14, 0, 1, s[4:5]
+; CGP-NEXT:    v_add_i32_e64 v13, s[4:5], v13, v14
+; CGP-NEXT:    v_add_i32_e64 v9, s[4:5], v12, v9
+; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, 1, s[4:5]
+; CGP-NEXT:    v_add_i32_e64 v11, s[4:5], v13, v12
+; CGP-NEXT:    v_add_i32_e64 v10, s[4:5], v10, v11
+; CGP-NEXT:    v_addc_u32_e32 v8, vcc, v8, v10, vcc
+; CGP-NEXT:    v_add_i32_e32 v7, vcc, v7, v9
+; CGP-NEXT:    v_addc_u32_e32 v8, vcc, 0, v8, vcc
+; CGP-NEXT:    v_mul_lo_u32 v9, v1, v7
+; CGP-NEXT:    v_mul_lo_u32 v10, v0, v8
+; CGP-NEXT:    v_mul_hi_u32 v11, v0, v7
+; CGP-NEXT:    v_mul_hi_u32 v7, v1, v7
+; CGP-NEXT:    v_add_i32_e32 v9, vcc, v9, v10
+; CGP-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v9, vcc, v9, v11
+; CGP-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
+; CGP-NEXT:    v_mul_lo_u32 v11, v1, v8
+; CGP-NEXT:    v_add_i32_e32 v9, vcc, v10, v9
+; CGP-NEXT:    v_mul_hi_u32 v10, v0, v8
+; CGP-NEXT:    v_mul_hi_u32 v8, v1, v8
+; CGP-NEXT:    v_add_i32_e32 v7, vcc, v11, v7
+; CGP-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v7, vcc, v7, v10
+; CGP-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v10, vcc, v11, v10
+; CGP-NEXT:    v_add_i32_e32 v7, vcc, v7, v9
+; CGP-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v9, vcc, v10, v9
+; CGP-NEXT:    v_add_i32_e32 v8, vcc, v8, v9
+; CGP-NEXT:    v_mul_lo_u32 v9, 0, v7
+; CGP-NEXT:    v_mul_lo_u32 v10, s6, v8
+; CGP-NEXT:    v_mul_hi_u32 v12, s6, v7
+; CGP-NEXT:    v_mul_lo_u32 v11, s6, v7
+; CGP-NEXT:    v_add_i32_e32 v9, vcc, v9, v10
+; CGP-NEXT:    v_add_i32_e32 v9, vcc, v9, v12
+; CGP-NEXT:    v_sub_i32_e32 v0, vcc, v0, v11
+; CGP-NEXT:    v_subb_u32_e64 v10, s[4:5], v1, v9, vcc
+; CGP-NEXT:    v_sub_i32_e64 v1, s[4:5], v1, v9
+; CGP-NEXT:    v_cmp_le_u32_e64 s[4:5], 0, v10
+; CGP-NEXT:    v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
+; CGP-NEXT:    v_cndmask_b32_e64 v9, 0, -1, s[4:5]
+; CGP-NEXT:    v_cmp_le_u32_e64 s[4:5], s6, v0
+; CGP-NEXT:    v_subrev_i32_e32 v0, vcc, s6, v0
+; CGP-NEXT:    v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
+; CGP-NEXT:    v_cndmask_b32_e64 v11, 0, -1, s[4:5]
+; CGP-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v10
+; CGP-NEXT:    v_add_i32_e32 v10, vcc, 1, v7
+; CGP-NEXT:    v_cndmask_b32_e64 v9, v9, v11, s[4:5]
+; CGP-NEXT:    v_addc_u32_e32 v11, vcc, 0, v8, vcc
+; CGP-NEXT:    v_cmp_le_u32_e32 vcc, 0, v1
+; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, -1, vcc
+; CGP-NEXT:    v_cmp_le_u32_e32 vcc, s6, v0
+; CGP-NEXT:    v_cndmask_b32_e64 v0, 0, -1, vcc
+; CGP-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v1
+; CGP-NEXT:    v_cndmask_b32_e32 v0, v12, v0, vcc
+; CGP-NEXT:    v_add_i32_e32 v1, vcc, 1, v10
+; CGP-NEXT:    v_addc_u32_e32 v12, vcc, 0, v11, vcc
+; CGP-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
+; CGP-NEXT:    v_cndmask_b32_e32 v0, v10, v1, vcc
+; CGP-NEXT:    v_cndmask_b32_e32 v1, v11, v12, vcc
+; CGP-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v9
+; CGP-NEXT:    v_cndmask_b32_e32 v0, v7, v0, vcc
+; CGP-NEXT:    v_mul_f32_e32 v7, 0x2f800000, v4
+; CGP-NEXT:    v_trunc_f32_e32 v7, v7
+; CGP-NEXT:    v_mac_f32_e32 v4, 0xcf800000, v7
+; CGP-NEXT:    v_cvt_u32_f32_e32 v4, v4
+; CGP-NEXT:    v_cvt_u32_f32_e32 v7, v7
+; CGP-NEXT:    v_cndmask_b32_e32 v1, v8, v1, vcc
+; CGP-NEXT:    v_add_i32_e32 v2, vcc, v2, v5
+; CGP-NEXT:    v_mul_lo_u32 v8, -1, v4
+; CGP-NEXT:    v_mul_lo_u32 v9, s7, v7
+; CGP-NEXT:    v_mul_hi_u32 v11, s7, v4
+; CGP-NEXT:    v_mul_lo_u32 v10, s7, v4
+; CGP-NEXT:    v_addc_u32_e32 v3, vcc, v3, v5, vcc
+; CGP-NEXT:    v_add_i32_e32 v8, vcc, v8, v9
+; CGP-NEXT:    v_add_i32_e32 v8, vcc, v8, v11
+; CGP-NEXT:    v_mul_lo_u32 v9, v7, v10
+; CGP-NEXT:    v_mul_lo_u32 v11, v4, v8
+; CGP-NEXT:    v_mul_hi_u32 v12, v4, v10
+; CGP-NEXT:    v_mul_hi_u32 v10, v7, v10
+; CGP-NEXT:    v_xor_b32_e32 v0, v0, v6
+; CGP-NEXT:    v_add_i32_e32 v9, vcc, v9, v11
+; CGP-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v9, vcc, v9, v12
+; CGP-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
+; CGP-NEXT:    v_mul_lo_u32 v12, v7, v8
+; CGP-NEXT:    v_add_i32_e32 v9, vcc, v11, v9
+; CGP-NEXT:    v_mul_hi_u32 v11, v4, v8
+; CGP-NEXT:    v_mul_hi_u32 v8, v7, v8
+; CGP-NEXT:    v_add_i32_e32 v10, vcc, v12, v10
+; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v10, vcc, v10, v11
+; CGP-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v11, vcc, v12, v11
+; CGP-NEXT:    v_add_i32_e32 v9, vcc, v10, v9
+; CGP-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v10, vcc, v11, v10
+; CGP-NEXT:    v_add_i32_e32 v8, vcc, v8, v10
+; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v9
+; CGP-NEXT:    v_addc_u32_e64 v9, s[4:5], v7, v8, vcc
+; CGP-NEXT:    v_mul_lo_u32 v10, -1, v4
+; CGP-NEXT:    v_mul_lo_u32 v11, s7, v9
+; CGP-NEXT:    v_mul_hi_u32 v13, s7, v4
+; CGP-NEXT:    v_mul_lo_u32 v12, s7, v4
+; CGP-NEXT:    v_add_i32_e64 v7, s[4:5], v7, v8
+; CGP-NEXT:    v_add_i32_e64 v10, s[4:5], v10, v11
+; CGP-NEXT:    v_add_i32_e64 v10, s[4:5], v10, v13
+; CGP-NEXT:    v_mul_lo_u32 v11, v9, v12
+; CGP-NEXT:    v_mul_lo_u32 v13, v4, v10
+; CGP-NEXT:    v_mul_hi_u32 v8, v4, v12
+; CGP-NEXT:    v_mul_hi_u32 v12, v9, v12
+; CGP-NEXT:    v_xor_b32_e32 v2, v2, v5
+; CGP-NEXT:    v_add_i32_e64 v11, s[4:5], v11, v13
+; CGP-NEXT:    v_cndmask_b32_e64 v13, 0, 1, s[4:5]
+; CGP-NEXT:    v_add_i32_e64 v8, s[4:5], v11, v8
+; CGP-NEXT:    v_cndmask_b32_e64 v8, 0, 1, s[4:5]
+; CGP-NEXT:    v_mul_lo_u32 v11, v9, v10
+; CGP-NEXT:    v_add_i32_e64 v8, s[4:5], v13, v8
+; CGP-NEXT:    v_mul_hi_u32 v13, v4, v10
+; CGP-NEXT:    v_mul_hi_u32 v9, v9, v10
+; CGP-NEXT:    v_add_i32_e64 v11, s[4:5], v11, v12
+; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, 1, s[4:5]
+; CGP-NEXT:    v_add_i32_e64 v11, s[4:5], v11, v13
+; CGP-NEXT:    v_cndmask_b32_e64 v13, 0, 1, s[4:5]
+; CGP-NEXT:    v_add_i32_e64 v12, s[4:5], v12, v13
+; CGP-NEXT:    v_add_i32_e64 v8, s[4:5], v11, v8
+; CGP-NEXT:    v_cndmask_b32_e64 v11, 0, 1, s[4:5]
+; CGP-NEXT:    v_add_i32_e64 v10, s[4:5], v12, v11
+; CGP-NEXT:    v_add_i32_e64 v9, s[4:5], v9, v10
+; CGP-NEXT:    v_addc_u32_e32 v7, vcc, v7, v9, vcc
+; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v8
+; CGP-NEXT:    v_addc_u32_e32 v7, vcc, 0, v7, vcc
+; CGP-NEXT:    v_xor_b32_e32 v3, v3, v5
+; CGP-NEXT:    v_xor_b32_e32 v1, v1, v6
+; CGP-NEXT:    v_sub_i32_e32 v0, vcc, v0, v6
+; CGP-NEXT:    v_mul_lo_u32 v8, v3, v4
+; CGP-NEXT:    v_mul_lo_u32 v9, v2, v7
+; CGP-NEXT:    v_subb_u32_e32 v1, vcc, v1, v6, vcc
+; CGP-NEXT:    v_mul_hi_u32 v6, v2, v4
+; CGP-NEXT:    v_mul_hi_u32 v4, v3, v4
+; CGP-NEXT:    v_add_i32_e32 v8, vcc, v8, v9
+; CGP-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v6, vcc, v8, v6
+; CGP-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
+; CGP-NEXT:    v_mul_lo_u32 v8, v3, v7
+; CGP-NEXT:    v_add_i32_e32 v6, vcc, v9, v6
+; CGP-NEXT:    v_mul_hi_u32 v9, v2, v7
+; CGP-NEXT:    v_mul_hi_u32 v7, v3, v7
+; CGP-NEXT:    v_add_i32_e32 v4, vcc, v8, v4
+; CGP-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v9
+; CGP-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v8, vcc, v8, v9
+; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v6
+; CGP-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v6, vcc, v8, v6
+; CGP-NEXT:    v_add_i32_e32 v6, vcc, v7, v6
+; CGP-NEXT:    v_mul_lo_u32 v7, 0, v4
+; CGP-NEXT:    v_mul_lo_u32 v8, s6, v6
+; CGP-NEXT:    v_mul_hi_u32 v10, s6, v4
+; CGP-NEXT:    v_mul_lo_u32 v9, s6, v4
+; CGP-NEXT:    v_add_i32_e32 v7, vcc, v7, v8
+; CGP-NEXT:    v_add_i32_e32 v7, vcc, v7, v10
+; CGP-NEXT:    v_sub_i32_e32 v2, vcc, v2, v9
+; CGP-NEXT:    v_subb_u32_e64 v8, s[4:5], v3, v7, vcc
+; CGP-NEXT:    v_sub_i32_e64 v3, s[4:5], v3, v7
+; CGP-NEXT:    v_cmp_le_u32_e64 s[4:5], 0, v8
+; CGP-NEXT:    v_subbrev_u32_e32 v3, vcc, 0, v3, vcc
+; CGP-NEXT:    v_cndmask_b32_e64 v7, 0, -1, s[4:5]
+; CGP-NEXT:    v_cmp_le_u32_e64 s[4:5], s6, v2
+; CGP-NEXT:    v_subrev_i32_e32 v2, vcc, s6, v2
+; CGP-NEXT:    v_subbrev_u32_e32 v3, vcc, 0, v3, vcc
+; CGP-NEXT:    v_cndmask_b32_e64 v9, 0, -1, s[4:5]
+; CGP-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v8
+; CGP-NEXT:    v_add_i32_e32 v8, vcc, 1, v4
+; CGP-NEXT:    v_cndmask_b32_e64 v7, v7, v9, s[4:5]
+; CGP-NEXT:    v_addc_u32_e32 v9, vcc, 0, v6, vcc
+; CGP-NEXT:    v_cmp_le_u32_e32 vcc, 0, v3
+; CGP-NEXT:    v_cndmask_b32_e64 v10, 0, -1, vcc
+; CGP-NEXT:    v_cmp_le_u32_e32 vcc, s6, v2
+; CGP-NEXT:    v_cndmask_b32_e64 v2, 0, -1, vcc
+; CGP-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v3
+; CGP-NEXT:    v_cndmask_b32_e32 v2, v10, v2, vcc
+; CGP-NEXT:    v_add_i32_e32 v3, vcc, 1, v8
+; CGP-NEXT:    v_addc_u32_e32 v10, vcc, 0, v9, vcc
+; CGP-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v2
+; CGP-NEXT:    v_cndmask_b32_e32 v2, v8, v3, vcc
+; CGP-NEXT:    v_cndmask_b32_e32 v3, v9, v10, vcc
+; CGP-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v7
+; CGP-NEXT:    v_cndmask_b32_e32 v2, v4, v2, vcc
+; CGP-NEXT:    v_cndmask_b32_e32 v3, v6, v3, vcc
+; CGP-NEXT:    v_xor_b32_e32 v2, v2, v5
+; CGP-NEXT:    v_xor_b32_e32 v3, v3, v5
+; CGP-NEXT:    v_sub_i32_e32 v2, vcc, v2, v5
+; CGP-NEXT:    v_subb_u32_e32 v3, vcc, v3, v5, vcc
+; CGP-NEXT:    s_setpc_b64 s[30:31]
+  %result = sdiv <2 x i64> %num, <i64 1235195, i64 1235195>
+  ret <2 x i64> %result
+}
+
+define i64 @v_sdiv_i64_pow2_shl_denom(i64 %x, i64 %y) {
+; CHECK-LABEL: v_sdiv_i64_pow2_shl_denom:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT:    s_movk_i32 s4, 0x1000
+; CHECK-NEXT:    s_mov_b32 s5, 0
+; CHECK-NEXT:    v_lshl_b64 v[4:5], s[4:5], v2
+; CHECK-NEXT:    v_mov_b32_e32 v2, 0
+; CHECK-NEXT:    v_or_b32_e32 v3, v1, v5
+; CHECK-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[2:3]
+; CHECK-NEXT:    v_cmp_ne_u32_e64 s[4:5], 0, 1
+; CHECK-NEXT:    s_xor_b64 s[4:5], vcc, s[4:5]
+; CHECK-NEXT:    ; implicit-def: $vgpr2_vgpr3
+; CHECK-NEXT:    s_and_saveexec_b64 s[6:7], s[4:5]
+; CHECK-NEXT:    s_xor_b64 s[6:7], exec, s[6:7]
+; CHECK-NEXT:    s_cbranch_execz BB7_2
+; CHECK-NEXT:  ; %bb.1:
+; CHECK-NEXT:    v_ashrrev_i32_e32 v2, 31, v5
+; CHECK-NEXT:    v_add_i32_e32 v3, vcc, v4, v2
+; CHECK-NEXT:    v_addc_u32_e32 v5, vcc, v5, v2, vcc
+; CHECK-NEXT:    v_xor_b32_e32 v3, v3, v2
+; CHECK-NEXT:    v_xor_b32_e32 v5, v5, v2
+; CHECK-NEXT:    v_cvt_f32_u32_e32 v6, v3
+; CHECK-NEXT:    v_cvt_f32_u32_e32 v7, v5
+; CHECK-NEXT:    v_ashrrev_i32_e32 v8, 31, v1
+; CHECK-NEXT:    v_mac_f32_e32 v6, 0x4f800000, v7
+; CHECK-NEXT:    v_rcp_iflag_f32_e32 v6, v6
+; CHECK-NEXT:    v_add_i32_e32 v7, vcc, v0, v8
+; CHECK-NEXT:    v_addc_u32_e32 v1, vcc, v1, v8, vcc
+; CHECK-NEXT:    v_sub_i32_e32 v10, vcc, 0, v3
+; CHECK-NEXT:    v_mul_f32_e32 v6, 0x5f7ffffc, v6
+; CHECK-NEXT:    v_mul_f32_e32 v9, 0x2f800000, v6
+; CHECK-NEXT:    v_trunc_f32_e32 v9, v9
+; CHECK-NEXT:    v_mac_f32_e32 v6, 0xcf800000, v9
+; CHECK-NEXT:    v_cvt_u32_f32_e32 v6, v6
+; CHECK-NEXT:    v_cvt_u32_f32_e32 v9, v9
+; CHECK-NEXT:    v_subb_u32_e32 v11, vcc, 0, v5, vcc
+; CHECK-NEXT:    v_xor_b32_e32 v7, v7, v8
+; CHECK-NEXT:    v_mul_lo_u32 v12, v11, v6
+; CHECK-NEXT:    v_mul_lo_u32 v13, v10, v9
+; CHECK-NEXT:    v_mul_hi_u32 v15, v10, v6
+; CHECK-NEXT:    v_mul_lo_u32 v14, v10, v6
+; CHECK-NEXT:    v_xor_b32_e32 v1, v1, v8
+; CHECK-NEXT:    v_add_i32_e32 v12, vcc, v12, v13
+; CHECK-NEXT:    v_add_i32_e32 v12, vcc, v12, v15
+; CHECK-NEXT:    v_mul_lo_u32 v13, v9, v14
+; CHECK-NEXT:    v_mul_lo_u32 v15, v6, v12
+; CHECK-NEXT:    v_mul_hi_u32 v16, v6, v14
+; CHECK-NEXT:    v_mul_hi_u32 v14, v9, v14
+; CHECK-NEXT:    v_add_i32_e32 v13, vcc, v13, v15
+; CHECK-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v13, vcc, v13, v16
+; CHECK-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
+; CHECK-NEXT:    v_mul_lo_u32 v16, v9, v12
+; CHECK-NEXT:    v_add_i32_e32 v13, vcc, v15, v13
+; CHECK-NEXT:    v_mul_hi_u32 v15, v6, v12
+; CHECK-NEXT:    v_mul_hi_u32 v12, v9, v12
+; CHECK-NEXT:    v_add_i32_e32 v14, vcc, v16, v14
+; CHECK-NEXT:    v_cndmask_b32_e64 v16, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v14, vcc, v14, v15
+; CHECK-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v15, vcc, v16, v15
+; CHECK-NEXT:    v_add_i32_e32 v13, vcc, v14, v13
+; CHECK-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v14, vcc, v15, v14
+; CHECK-NEXT:    v_add_i32_e32 v12, vcc, v12, v14
+; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v6, v13
+; CHECK-NEXT:    v_addc_u32_e64 v13, s[4:5], v9, v12, vcc
+; CHECK-NEXT:    v_mul_lo_u32 v11, v11, v6
+; CHECK-NEXT:    v_mul_lo_u32 v14, v10, v13
+; CHECK-NEXT:    v_mul_lo_u32 v15, v10, v6
+; CHECK-NEXT:    v_mul_hi_u32 v10, v10, v6
+; CHECK-NEXT:    v_add_i32_e64 v9, s[4:5], v9, v12
+; CHECK-NEXT:    v_add_i32_e64 v11, s[4:5], v11, v14
+; CHECK-NEXT:    v_mul_hi_u32 v12, v6, v15
+; CHECK-NEXT:    v_add_i32_e64 v10, s[4:5], v11, v10
+; CHECK-NEXT:    v_mul_lo_u32 v11, v13, v15
+; CHECK-NEXT:    v_mul_lo_u32 v14, v6, v10
+; CHECK-NEXT:    v_mul_hi_u32 v15, v13, v15
+; CHECK-NEXT:    v_add_i32_e64 v11, s[4:5], v11, v14
+; CHECK-NEXT:    v_cndmask_b32_e64 v14, 0, 1, s[4:5]
+; CHECK-NEXT:    v_add_i32_e64 v11, s[4:5], v11, v12
+; CHECK-NEXT:    v_cndmask_b32_e64 v11, 0, 1, s[4:5]
+; CHECK-NEXT:    v_mul_lo_u32 v12, v13, v10
+; CHECK-NEXT:    v_add_i32_e64 v11, s[4:5], v14, v11
+; CHECK-NEXT:    v_mul_hi_u32 v14, v6, v10
+; CHECK-NEXT:    v_mul_hi_u32 v10, v13, v10
+; CHECK-NEXT:    v_add_i32_e64 v12, s[4:5], v12, v15
+; CHECK-NEXT:    v_cndmask_b32_e64 v15, 0, 1, s[4:5]
+; CHECK-NEXT:    v_add_i32_e64 v12, s[4:5], v12, v14
+; CHECK-NEXT:    v_cndmask_b32_e64 v14, 0, 1, s[4:5]
+; CHECK-NEXT:    v_add_i32_e64 v14, s[4:5], v15, v14
+; CHECK-NEXT:    v_add_i32_e64 v11, s[4:5], v12, v11
+; CHECK-NEXT:    v_cndmask_b32_e64 v12, 0, 1, s[4:5]
+; CHECK-NEXT:    v_add_i32_e64 v12, s[4:5], v14, v12
+; CHECK-NEXT:    v_add_i32_e64 v10, s[4:5], v10, v12
+; CHECK-NEXT:    v_addc_u32_e32 v9, vcc, v9, v10, vcc
+; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v6, v11
+; CHECK-NEXT:    v_addc_u32_e32 v9, vcc, 0, v9, vcc
+; CHECK-NEXT:    v_mul_lo_u32 v10, v1, v6
+; CHECK-NEXT:    v_mul_lo_u32 v11, v7, v9
+; CHECK-NEXT:    v_mul_hi_u32 v12, v7, v6
+; CHECK-NEXT:    v_mul_hi_u32 v6, v1, v6
+; CHECK-NEXT:    v_add_i32_e32 v10, vcc, v10, v11
+; CHECK-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v10, vcc, v10, v12
+; CHECK-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
+; CHECK-NEXT:    v_mul_lo_u32 v12, v1, v9
+; CHECK-NEXT:    v_add_i32_e32 v10, vcc, v11, v10
+; CHECK-NEXT:    v_mul_hi_u32 v11, v7, v9
+; CHECK-NEXT:    v_mul_hi_u32 v9, v1, v9
+; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v12, v6
+; CHECK-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v6, v11
+; CHECK-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v11, vcc, v12, v11
+; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v6, v10
+; CHECK-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v10, vcc, v11, v10
+; CHECK-NEXT:    v_add_i32_e32 v9, vcc, v9, v10
+; CHECK-NEXT:    v_mul_lo_u32 v10, v5, v6
+; CHECK-NEXT:    v_mul_lo_u32 v11, v3, v9
+; CHECK-NEXT:    v_mul_hi_u32 v13, v3, v6
+; CHECK-NEXT:    v_mul_lo_u32 v12, v3, v6
+; CHECK-NEXT:    v_add_i32_e32 v10, vcc, v10, v11
+; CHECK-NEXT:    v_add_i32_e32 v10, vcc, v10, v13
+; CHECK-NEXT:    v_sub_i32_e32 v7, vcc, v7, v12
+; CHECK-NEXT:    v_subb_u32_e64 v11, s[4:5], v1, v10, vcc
+; CHECK-NEXT:    v_sub_i32_e64 v1, s[4:5], v1, v10
+; CHECK-NEXT:    v_cmp_ge_u32_e64 s[4:5], v11, v5
+; CHECK-NEXT:    v_subb_u32_e32 v1, vcc, v1, v5, vcc
+; CHECK-NEXT:    v_cndmask_b32_e64 v10, 0, -1, s[4:5]
+; CHECK-NEXT:    v_cmp_ge_u32_e64 s[4:5], v7, v3
+; CHECK-NEXT:    v_sub_i32_e32 v7, vcc, v7, v3
+; CHECK-NEXT:    v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
+; CHECK-NEXT:    v_cndmask_b32_e64 v12, 0, -1, s[4:5]
+; CHECK-NEXT:    v_cmp_eq_u32_e64 s[4:5], v11, v5
+; CHECK-NEXT:    v_add_i32_e32 v11, vcc, 1, v6
+; CHECK-NEXT:    v_cndmask_b32_e64 v10, v10, v12, s[4:5]
+; CHECK-NEXT:    v_addc_u32_e32 v12, vcc, 0, v9, vcc
+; CHECK-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v5
+; CHECK-NEXT:    v_cndmask_b32_e64 v13, 0, -1, vcc
+; CHECK-NEXT:    v_cmp_ge_u32_e32 vcc, v7, v3
+; CHECK-NEXT:    v_cndmask_b32_e64 v3, 0, -1, vcc
+; CHECK-NEXT:    v_cmp_eq_u32_e32 vcc, v1, v5
+; CHECK-NEXT:    v_cndmask_b32_e32 v1, v13, v3, vcc
+; CHECK-NEXT:    v_add_i32_e32 v3, vcc, 1, v11
+; CHECK-NEXT:    v_addc_u32_e32 v5, vcc, 0, v12, vcc
+; CHECK-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v1
+; CHECK-NEXT:    v_cndmask_b32_e32 v1, v11, v3, vcc
+; CHECK-NEXT:    v_cndmask_b32_e32 v3, v12, v5, vcc
+; CHECK-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v10
+; CHECK-NEXT:    v_cndmask_b32_e32 v1, v6, v1, vcc
+; CHECK-NEXT:    v_xor_b32_e32 v5, v8, v2
+; CHECK-NEXT:    v_cndmask_b32_e32 v3, v9, v3, vcc
+; CHECK-NEXT:    v_xor_b32_e32 v1, v1, v5
+; CHECK-NEXT:    v_xor_b32_e32 v3, v3, v5
+; CHECK-NEXT:    v_sub_i32_e32 v2, vcc, v1, v5
+; CHECK-NEXT:    v_subb_u32_e32 v3, vcc, v3, v5, vcc
+; CHECK-NEXT:  BB7_2: ; %Flow
+; CHECK-NEXT:    s_or_saveexec_b64 s[6:7], s[6:7]
+; CHECK-NEXT:    s_xor_b64 exec, exec, s[6:7]
+; CHECK-NEXT:    s_cbranch_execz BB7_4
+; CHECK-NEXT:  ; %bb.3:
+; CHECK-NEXT:    v_cvt_f32_u32_e32 v1, v4
+; CHECK-NEXT:    v_rcp_iflag_f32_e32 v1, v1
+; CHECK-NEXT:    v_mul_f32_e32 v1, 0x4f800000, v1
+; CHECK-NEXT:    v_cvt_u32_f32_e32 v1, v1
+; CHECK-NEXT:    v_mul_lo_u32 v2, v1, v4
+; CHECK-NEXT:    v_mul_hi_u32 v3, v1, v4
+; CHECK-NEXT:    v_sub_i32_e32 v5, vcc, 0, v2
+; CHECK-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v3
+; CHECK-NEXT:    v_cndmask_b32_e32 v2, v2, v5, vcc
+; CHECK-NEXT:    v_mul_hi_u32 v2, v2, v1
+; CHECK-NEXT:    v_add_i32_e64 v3, s[4:5], v1, v2
+; CHECK-NEXT:    v_sub_i32_e64 v1, s[4:5], v1, v2
+; CHECK-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
+; CHECK-NEXT:    v_mul_hi_u32 v1, v1, v0
+; CHECK-NEXT:    v_mul_lo_u32 v2, v1, v4
+; CHECK-NEXT:    v_add_i32_e32 v3, vcc, 1, v1
+; CHECK-NEXT:    v_subrev_i32_e32 v5, vcc, 1, v1
+; CHECK-NEXT:    v_sub_i32_e32 v6, vcc, v0, v2
+; CHECK-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v2
+; CHECK-NEXT:    v_cmp_ge_u32_e64 s[4:5], v6, v4
+; CHECK-NEXT:    s_and_b64 s[4:5], s[4:5], vcc
+; CHECK-NEXT:    v_cndmask_b32_e64 v0, v1, v3, s[4:5]
+; CHECK-NEXT:    v_cndmask_b32_e32 v2, v5, v0, vcc
+; CHECK-NEXT:    v_mov_b32_e32 v3, 0
+; CHECK-NEXT:  BB7_4:
+; CHECK-NEXT:    s_or_b64 exec, exec, s[6:7]
+; CHECK-NEXT:    v_mov_b32_e32 v0, v2
+; CHECK-NEXT:    v_mov_b32_e32 v1, v3
+; CHECK-NEXT:    s_setpc_b64 s[30:31]
+  %shl.y = shl i64 4096, %y
+  %r = sdiv i64 %x, %shl.y
+  ret i64 %r
+}
+
+define <2 x i64> @v_sdiv_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) {
+; GISEL-LABEL: v_sdiv_v2i64_pow2_shl_denom:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-NEXT:    s_movk_i32 s6, 0x1000
+; GISEL-NEXT:    s_mov_b32 s7, 0
+; GISEL-NEXT:    v_lshl_b64 v[4:5], s[6:7], v4
+; GISEL-NEXT:    v_ashrrev_i32_e32 v10, 31, v1
+; GISEL-NEXT:    v_ashrrev_i32_e32 v7, 31, v5
+; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v7
+; GISEL-NEXT:    v_addc_u32_e32 v5, vcc, v5, v7, vcc
+; GISEL-NEXT:    v_xor_b32_e32 v4, v4, v7
+; GISEL-NEXT:    v_xor_b32_e32 v5, v5, v7
+; GISEL-NEXT:    v_cvt_f32_u32_e32 v8, v4
+; GISEL-NEXT:    v_cvt_f32_u32_e32 v9, v5
+; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v10
+; GISEL-NEXT:    v_addc_u32_e32 v1, vcc, v1, v10, vcc
+; GISEL-NEXT:    v_sub_i32_e32 v11, vcc, 0, v4
+; GISEL-NEXT:    v_mac_f32_e32 v8, 0x4f800000, v9
+; GISEL-NEXT:    v_rcp_iflag_f32_e32 v8, v8
+; GISEL-NEXT:    v_xor_b32_e32 v9, v0, v10
+; GISEL-NEXT:    v_subb_u32_e32 v12, vcc, 0, v5, vcc
+; GISEL-NEXT:    v_xor_b32_e32 v17, v1, v10
+; GISEL-NEXT:    v_mul_f32_e32 v0, 0x5f7ffffc, v8
+; GISEL-NEXT:    v_mul_f32_e32 v8, 0x2f800000, v0
+; GISEL-NEXT:    v_trunc_f32_e32 v8, v8
+; GISEL-NEXT:    v_mac_f32_e32 v0, 0xcf800000, v8
+; GISEL-NEXT:    v_cvt_u32_f32_e32 v0, v0
+; GISEL-NEXT:    v_cvt_u32_f32_e32 v8, v8
+; GISEL-NEXT:    v_mul_lo_u32 v13, v12, v0
+; GISEL-NEXT:    v_mul_lo_u32 v14, v11, v8
+; GISEL-NEXT:    v_mul_hi_u32 v16, v11, v0
+; GISEL-NEXT:    v_mul_lo_u32 v15, v11, v0
+; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v13, v14
+; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v13, v16
+; GISEL-NEXT:    v_mul_lo_u32 v14, v8, v15
+; GISEL-NEXT:    v_mul_lo_u32 v16, v0, v13
+; GISEL-NEXT:    v_mul_hi_u32 v1, v0, v15
+; GISEL-NEXT:    v_mul_hi_u32 v15, v8, v15
+; GISEL-NEXT:    v_add_i32_e32 v14, vcc, v14, v16
+; GISEL-NEXT:    v_cndmask_b32_e64 v16, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v14, v1
+; GISEL-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
+; GISEL-NEXT:    v_mul_lo_u32 v14, v8, v13
+; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v16, v1
+; GISEL-NEXT:    v_mul_hi_u32 v16, v0, v13
+; GISEL-NEXT:    v_mul_hi_u32 v13, v8, v13
+; GISEL-NEXT:    v_add_i32_e32 v14, vcc, v14, v15
+; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v14, vcc, v14, v16
+; GISEL-NEXT:    v_cndmask_b32_e64 v16, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v15, vcc, v15, v16
+; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v14, v1
+; GISEL-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v14, vcc, v15, v14
+; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v13, v14
+; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v1
+; GISEL-NEXT:    v_addc_u32_e64 v1, s[4:5], v8, v13, vcc
+; GISEL-NEXT:    v_mul_lo_u32 v12, v12, v0
+; GISEL-NEXT:    v_mul_lo_u32 v14, v11, v1
+; GISEL-NEXT:    v_mul_lo_u32 v15, v11, v0
+; GISEL-NEXT:    v_mul_hi_u32 v11, v11, v0
+; GISEL-NEXT:    v_add_i32_e64 v8, s[4:5], v8, v13
+; GISEL-NEXT:    v_add_i32_e64 v12, s[4:5], v12, v14
+; GISEL-NEXT:    v_mul_hi_u32 v13, v0, v15
+; GISEL-NEXT:    v_add_i32_e64 v11, s[4:5], v12, v11
+; GISEL-NEXT:    v_mul_lo_u32 v12, v1, v15
+; GISEL-NEXT:    v_mul_lo_u32 v14, v0, v11
+; GISEL-NEXT:    v_mul_hi_u32 v15, v1, v15
+; GISEL-NEXT:    v_add_i32_e64 v12, s[4:5], v12, v14
+; GISEL-NEXT:    v_cndmask_b32_e64 v14, 0, 1, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v12, s[4:5], v12, v13
+; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, s[4:5]
+; GISEL-NEXT:    v_mul_lo_u32 v13, v1, v11
+; GISEL-NEXT:    v_add_i32_e64 v12, s[4:5], v14, v12
+; GISEL-NEXT:    v_mul_hi_u32 v14, v0, v11
+; GISEL-NEXT:    v_mul_hi_u32 v1, v1, v11
+; GISEL-NEXT:    v_add_i32_e64 v13, s[4:5], v13, v15
+; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, 1, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v13, s[4:5], v13, v14
+; GISEL-NEXT:    v_cndmask_b32_e64 v14, 0, 1, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v14, s[4:5], v15, v14
+; GISEL-NEXT:    v_add_i32_e64 v12, s[4:5], v13, v12
+; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, 1, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v11, s[4:5], v14, v13
+; GISEL-NEXT:    v_add_i32_e64 v1, s[4:5], v1, v11
+; GISEL-NEXT:    v_addc_u32_e32 v1, vcc, v8, v1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v0, v12
+; GISEL-NEXT:    v_addc_u32_e32 v11, vcc, 0, v1, vcc
+; GISEL-NEXT:    v_mul_lo_u32 v12, v17, v8
+; GISEL-NEXT:    v_mul_lo_u32 v13, v9, v11
+; GISEL-NEXT:    v_lshl_b64 v[0:1], s[6:7], v6
+; GISEL-NEXT:    v_mul_hi_u32 v6, v9, v8
+; GISEL-NEXT:    v_mul_hi_u32 v8, v17, v8
+; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v12, v13
+; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v12, v6
+; GISEL-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
+; GISEL-NEXT:    v_mul_lo_u32 v12, v17, v11
+; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v13, v6
+; GISEL-NEXT:    v_mul_hi_u32 v13, v9, v11
+; GISEL-NEXT:    v_mul_hi_u32 v11, v17, v11
+; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v12, v8
+; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v13
+; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v12, v13
+; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v8, v6
+; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v12, v8
+; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v11, v8
+; GISEL-NEXT:    v_mul_lo_u32 v11, v5, v6
+; GISEL-NEXT:    v_mul_lo_u32 v12, v4, v8
+; GISEL-NEXT:    v_mul_hi_u32 v14, v4, v6
+; GISEL-NEXT:    v_mul_lo_u32 v13, v4, v6
+; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v11, v12
+; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v11, v14
+; GISEL-NEXT:    v_sub_i32_e32 v9, vcc, v9, v13
+; GISEL-NEXT:    v_subb_u32_e64 v12, s[4:5], v17, v11, vcc
+; GISEL-NEXT:    v_sub_i32_e64 v11, s[4:5], v17, v11
+; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v12, v5
+; GISEL-NEXT:    v_subb_u32_e32 v11, vcc, v11, v5, vcc
+; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, -1, s[4:5]
+; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v9, v4
+; GISEL-NEXT:    v_sub_i32_e32 v9, vcc, v9, v4
+; GISEL-NEXT:    v_cndmask_b32_e64 v14, 0, -1, s[4:5]
+; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], v12, v5
+; GISEL-NEXT:    v_subbrev_u32_e32 v11, vcc, 0, v11, vcc
+; GISEL-NEXT:    v_cndmask_b32_e64 v12, v13, v14, s[4:5]
+; GISEL-NEXT:    v_add_i32_e32 v13, vcc, 1, v6
+; GISEL-NEXT:    v_addc_u32_e32 v14, vcc, 0, v8, vcc
+; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v11, v5
+; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, -1, vcc
+; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v9, v4
+; GISEL-NEXT:    v_cndmask_b32_e64 v4, 0, -1, vcc
+; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v11, v5
+; GISEL-NEXT:    v_cndmask_b32_e32 v4, v15, v4, vcc
+; GISEL-NEXT:    v_add_i32_e32 v5, vcc, 1, v13
+; GISEL-NEXT:    v_addc_u32_e32 v9, vcc, 0, v14, vcc
+; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v4
+; GISEL-NEXT:    v_cndmask_b32_e32 v4, v13, v5, vcc
+; GISEL-NEXT:    v_cndmask_b32_e32 v5, v14, v9, vcc
+; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v12
+; GISEL-NEXT:    v_cndmask_b32_e32 v4, v6, v4, vcc
+; GISEL-NEXT:    v_xor_b32_e32 v6, v10, v7
+; GISEL-NEXT:    v_ashrrev_i32_e32 v7, 31, v1
+; GISEL-NEXT:    v_cndmask_b32_e32 v5, v8, v5, vcc
+; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v7
+; GISEL-NEXT:    v_addc_u32_e32 v1, vcc, v1, v7, vcc
+; GISEL-NEXT:    v_xor_b32_e32 v8, v0, v7
+; GISEL-NEXT:    v_xor_b32_e32 v9, v1, v7
+; GISEL-NEXT:    v_cvt_f32_u32_e32 v0, v8
+; GISEL-NEXT:    v_cvt_f32_u32_e32 v1, v9
+; GISEL-NEXT:    v_ashrrev_i32_e32 v10, 31, v3
+; GISEL-NEXT:    v_xor_b32_e32 v4, v4, v6
+; GISEL-NEXT:    v_xor_b32_e32 v5, v5, v6
+; GISEL-NEXT:    v_mac_f32_e32 v0, 0x4f800000, v1
+; GISEL-NEXT:    v_rcp_iflag_f32_e32 v0, v0
+; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v2, v10
+; GISEL-NEXT:    v_addc_u32_e32 v2, vcc, v3, v10, vcc
+; GISEL-NEXT:    v_xor_b32_e32 v3, v1, v10
+; GISEL-NEXT:    v_mul_f32_e32 v0, 0x5f7ffffc, v0
+; GISEL-NEXT:    v_mul_f32_e32 v1, 0x2f800000, v0
+; GISEL-NEXT:    v_trunc_f32_e32 v1, v1
+; GISEL-NEXT:    v_mac_f32_e32 v0, 0xcf800000, v1
+; GISEL-NEXT:    v_cvt_u32_f32_e32 v0, v0
+; GISEL-NEXT:    v_cvt_u32_f32_e32 v1, v1
+; GISEL-NEXT:    v_sub_i32_e32 v11, vcc, 0, v8
+; GISEL-NEXT:    v_subb_u32_e32 v12, vcc, 0, v9, vcc
+; GISEL-NEXT:    v_mul_lo_u32 v13, v12, v0
+; GISEL-NEXT:    v_mul_lo_u32 v14, v11, v1
+; GISEL-NEXT:    v_mul_hi_u32 v16, v11, v0
+; GISEL-NEXT:    v_mul_lo_u32 v15, v11, v0
+; GISEL-NEXT:    v_xor_b32_e32 v2, v2, v10
+; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v13, v14
+; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v13, v16
+; GISEL-NEXT:    v_mul_lo_u32 v14, v1, v15
+; GISEL-NEXT:    v_mul_lo_u32 v16, v0, v13
+; GISEL-NEXT:    v_mul_hi_u32 v17, v0, v15
+; GISEL-NEXT:    v_mul_hi_u32 v15, v1, v15
+; GISEL-NEXT:    v_add_i32_e32 v14, vcc, v14, v16
+; GISEL-NEXT:    v_cndmask_b32_e64 v16, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v14, vcc, v14, v17
+; GISEL-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
+; GISEL-NEXT:    v_mul_lo_u32 v17, v1, v13
+; GISEL-NEXT:    v_add_i32_e32 v14, vcc, v16, v14
+; GISEL-NEXT:    v_mul_hi_u32 v16, v0, v13
+; GISEL-NEXT:    v_mul_hi_u32 v13, v1, v13
+; GISEL-NEXT:    v_add_i32_e32 v15, vcc, v17, v15
+; GISEL-NEXT:    v_cndmask_b32_e64 v17, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v15, vcc, v15, v16
+; GISEL-NEXT:    v_cndmask_b32_e64 v16, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v16, vcc, v17, v16
+; GISEL-NEXT:    v_add_i32_e32 v14, vcc, v15, v14
+; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v15, vcc, v16, v15
+; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v13, v15
+; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v14
+; GISEL-NEXT:    v_addc_u32_e64 v14, s[4:5], v1, v13, vcc
+; GISEL-NEXT:    v_mul_lo_u32 v12, v12, v0
+; GISEL-NEXT:    v_mul_lo_u32 v15, v11, v14
+; GISEL-NEXT:    v_mul_lo_u32 v16, v11, v0
+; GISEL-NEXT:    v_mul_hi_u32 v11, v11, v0
+; GISEL-NEXT:    v_add_i32_e64 v1, s[4:5], v1, v13
+; GISEL-NEXT:    v_add_i32_e64 v12, s[4:5], v12, v15
+; GISEL-NEXT:    v_mul_hi_u32 v13, v0, v16
+; GISEL-NEXT:    v_add_i32_e64 v11, s[4:5], v12, v11
+; GISEL-NEXT:    v_mul_lo_u32 v12, v14, v16
+; GISEL-NEXT:    v_mul_lo_u32 v15, v0, v11
+; GISEL-NEXT:    v_mul_hi_u32 v16, v14, v16
+; GISEL-NEXT:    v_add_i32_e64 v12, s[4:5], v12, v15
+; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, 1, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v12, s[4:5], v12, v13
+; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, s[4:5]
+; GISEL-NEXT:    v_mul_lo_u32 v13, v14, v11
+; GISEL-NEXT:    v_add_i32_e64 v12, s[4:5], v15, v12
+; GISEL-NEXT:    v_mul_hi_u32 v15, v0, v11
+; GISEL-NEXT:    v_mul_hi_u32 v11, v14, v11
+; GISEL-NEXT:    v_add_i32_e64 v13, s[4:5], v13, v16
+; GISEL-NEXT:    v_cndmask_b32_e64 v16, 0, 1, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v13, s[4:5], v13, v15
+; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, 1, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v15, s[4:5], v16, v15
+; GISEL-NEXT:    v_add_i32_e64 v12, s[4:5], v13, v12
+; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, 1, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v13, s[4:5], v15, v13
+; GISEL-NEXT:    v_add_i32_e64 v11, s[4:5], v11, v13
+; GISEL-NEXT:    v_addc_u32_e32 v1, vcc, v1, v11, vcc
+; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v0, v12
+; GISEL-NEXT:    v_addc_u32_e32 v12, vcc, 0, v1, vcc
+; GISEL-NEXT:    v_mul_lo_u32 v13, v2, v11
+; GISEL-NEXT:    v_mul_lo_u32 v14, v3, v12
+; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v4, v6
+; GISEL-NEXT:    v_mul_hi_u32 v4, v3, v11
+; GISEL-NEXT:    v_subb_u32_e32 v1, vcc, v5, v6, vcc
+; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v13, v14
+; GISEL-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v5, v4
+; GISEL-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
+; GISEL-NEXT:    v_mul_lo_u32 v5, v2, v12
+; GISEL-NEXT:    v_mul_hi_u32 v11, v2, v11
+; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v6, v4
+; GISEL-NEXT:    v_mul_hi_u32 v6, v3, v12
+; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v11
+; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v6
+; GISEL-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v11, v6
+; GISEL-NEXT:    v_mul_hi_u32 v11, v2, v12
+; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v5, v4
+; GISEL-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v6, v5
+; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v11, v5
+; GISEL-NEXT:    v_mul_lo_u32 v6, v9, v4
+; GISEL-NEXT:    v_mul_lo_u32 v11, v8, v5
+; GISEL-NEXT:    v_mul_hi_u32 v13, v8, v4
+; GISEL-NEXT:    v_mul_lo_u32 v12, v8, v4
+; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v6, v11
+; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v6, v13
+; GISEL-NEXT:    v_sub_i32_e32 v3, vcc, v3, v12
+; GISEL-NEXT:    v_subb_u32_e64 v11, s[4:5], v2, v6, vcc
+; GISEL-NEXT:    v_sub_i32_e64 v2, s[4:5], v2, v6
+; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v11, v9
+; GISEL-NEXT:    v_subb_u32_e32 v2, vcc, v2, v9, vcc
+; GISEL-NEXT:    v_cndmask_b32_e64 v6, 0, -1, s[4:5]
+; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v3, v8
+; GISEL-NEXT:    v_sub_i32_e32 v3, vcc, v3, v8
+; GISEL-NEXT:    v_subbrev_u32_e32 v2, vcc, 0, v2, vcc
+; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, -1, s[4:5]
+; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], v11, v9
+; GISEL-NEXT:    v_add_i32_e32 v11, vcc, 1, v4
+; GISEL-NEXT:    v_cndmask_b32_e64 v6, v6, v12, s[4:5]
+; GISEL-NEXT:    v_addc_u32_e32 v12, vcc, 0, v5, vcc
+; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v2, v9
+; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, -1, vcc
+; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v3, v8
+; GISEL-NEXT:    v_cndmask_b32_e64 v3, 0, -1, vcc
+; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v9
+; GISEL-NEXT:    v_cndmask_b32_e32 v2, v13, v3, vcc
+; GISEL-NEXT:    v_add_i32_e32 v3, vcc, 1, v11
+; GISEL-NEXT:    v_addc_u32_e32 v8, vcc, 0, v12, vcc
+; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v2
+; GISEL-NEXT:    v_cndmask_b32_e32 v2, v11, v3, vcc
+; GISEL-NEXT:    v_cndmask_b32_e32 v3, v12, v8, vcc
+; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v6
+; GISEL-NEXT:    v_cndmask_b32_e32 v2, v4, v2, vcc
+; GISEL-NEXT:    v_xor_b32_e32 v4, v10, v7
+; GISEL-NEXT:    v_cndmask_b32_e32 v3, v5, v3, vcc
+; GISEL-NEXT:    v_xor_b32_e32 v2, v2, v4
+; GISEL-NEXT:    v_xor_b32_e32 v3, v3, v4
+; GISEL-NEXT:    v_sub_i32_e32 v2, vcc, v2, v4
+; GISEL-NEXT:    v_subb_u32_e32 v3, vcc, v3, v4, vcc
+; GISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; CGP-LABEL: v_sdiv_v2i64_pow2_shl_denom:
+; CGP:       ; %bb.0:
+; CGP-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CGP-NEXT:    s_movk_i32 s4, 0x1000
+; CGP-NEXT:    s_mov_b32 s5, 0
+; CGP-NEXT:    v_lshl_b64 v[10:11], s[4:5], v4
+; CGP-NEXT:    v_mov_b32_e32 v7, v1
+; CGP-NEXT:    v_mov_b32_e32 v5, v0
+; CGP-NEXT:    v_or_b32_e32 v1, v7, v11
+; CGP-NEXT:    v_mov_b32_e32 v0, 0
+; CGP-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[0:1]
+; CGP-NEXT:    v_lshl_b64 v[8:9], s[4:5], v6
+; CGP-NEXT:    v_cmp_ne_u32_e64 s[4:5], 0, 1
+; CGP-NEXT:    s_xor_b64 s[4:5], vcc, s[4:5]
+; CGP-NEXT:    ; implicit-def: $vgpr0_vgpr1
+; CGP-NEXT:    s_and_saveexec_b64 s[6:7], s[4:5]
+; CGP-NEXT:    s_xor_b64 s[6:7], exec, s[6:7]
+; CGP-NEXT:    s_cbranch_execz BB8_2
+; CGP-NEXT:  ; %bb.1:
+; CGP-NEXT:    v_ashrrev_i32_e32 v0, 31, v11
+; CGP-NEXT:    v_add_i32_e32 v1, vcc, v10, v0
+; CGP-NEXT:    v_addc_u32_e32 v4, vcc, v11, v0, vcc
+; CGP-NEXT:    v_xor_b32_e32 v1, v1, v0
+; CGP-NEXT:    v_xor_b32_e32 v4, v4, v0
+; CGP-NEXT:    v_cvt_f32_u32_e32 v6, v1
+; CGP-NEXT:    v_cvt_f32_u32_e32 v11, v4
+; CGP-NEXT:    v_ashrrev_i32_e32 v12, 31, v7
+; CGP-NEXT:    v_mac_f32_e32 v6, 0x4f800000, v11
+; CGP-NEXT:    v_rcp_iflag_f32_e32 v6, v6
+; CGP-NEXT:    v_add_i32_e32 v11, vcc, v5, v12
+; CGP-NEXT:    v_addc_u32_e32 v7, vcc, v7, v12, vcc
+; CGP-NEXT:    v_sub_i32_e32 v14, vcc, 0, v1
+; CGP-NEXT:    v_mul_f32_e32 v6, 0x5f7ffffc, v6
+; CGP-NEXT:    v_mul_f32_e32 v13, 0x2f800000, v6
+; CGP-NEXT:    v_trunc_f32_e32 v13, v13
+; CGP-NEXT:    v_mac_f32_e32 v6, 0xcf800000, v13
+; CGP-NEXT:    v_cvt_u32_f32_e32 v6, v6
+; CGP-NEXT:    v_cvt_u32_f32_e32 v13, v13
+; CGP-NEXT:    v_subb_u32_e32 v15, vcc, 0, v4, vcc
+; CGP-NEXT:    v_xor_b32_e32 v11, v11, v12
+; CGP-NEXT:    v_mul_lo_u32 v16, v15, v6
+; CGP-NEXT:    v_mul_lo_u32 v17, v14, v13
+; CGP-NEXT:    v_mul_hi_u32 v19, v14, v6
+; CGP-NEXT:    v_mul_lo_u32 v18, v14, v6
+; CGP-NEXT:    v_xor_b32_e32 v7, v7, v12
+; CGP-NEXT:    v_add_i32_e32 v16, vcc, v16, v17
+; CGP-NEXT:    v_add_i32_e32 v16, vcc, v16, v19
+; CGP-NEXT:    v_mul_lo_u32 v17, v13, v18
+; CGP-NEXT:    v_mul_lo_u32 v19, v6, v16
+; CGP-NEXT:    v_mul_hi_u32 v20, v6, v18
+; CGP-NEXT:    v_mul_hi_u32 v18, v13, v18
+; CGP-NEXT:    v_add_i32_e32 v17, vcc, v17, v19
+; CGP-NEXT:    v_cndmask_b32_e64 v19, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v17, vcc, v17, v20
+; CGP-NEXT:    v_cndmask_b32_e64 v17, 0, 1, vcc
+; CGP-NEXT:    v_mul_lo_u32 v20, v13, v16
+; CGP-NEXT:    v_add_i32_e32 v17, vcc, v19, v17
+; CGP-NEXT:    v_mul_hi_u32 v19, v6, v16
+; CGP-NEXT:    v_mul_hi_u32 v16, v13, v16
+; CGP-NEXT:    v_add_i32_e32 v18, vcc, v20, v18
+; CGP-NEXT:    v_cndmask_b32_e64 v20, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v18, vcc, v18, v19
+; CGP-NEXT:    v_cndmask_b32_e64 v19, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v19, vcc, v20, v19
+; CGP-NEXT:    v_add_i32_e32 v17, vcc, v18, v17
+; CGP-NEXT:    v_cndmask_b32_e64 v18, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v18, vcc, v19, v18
+; CGP-NEXT:    v_add_i32_e32 v16, vcc, v16, v18
+; CGP-NEXT:    v_add_i32_e32 v6, vcc, v6, v17
+; CGP-NEXT:    v_addc_u32_e64 v17, s[4:5], v13, v16, vcc
+; CGP-NEXT:    v_mul_lo_u32 v15, v15, v6
+; CGP-NEXT:    v_mul_lo_u32 v18, v14, v17
+; CGP-NEXT:    v_mul_lo_u32 v19, v14, v6
+; CGP-NEXT:    v_mul_hi_u32 v14, v14, v6
+; CGP-NEXT:    v_add_i32_e64 v13, s[4:5], v13, v16
+; CGP-NEXT:    v_add_i32_e64 v15, s[4:5], v15, v18
+; CGP-NEXT:    v_mul_hi_u32 v16, v6, v19
+; CGP-NEXT:    v_add_i32_e64 v14, s[4:5], v15, v14
+; CGP-NEXT:    v_mul_lo_u32 v15, v17, v19
+; CGP-NEXT:    v_mul_lo_u32 v18, v6, v14
+; CGP-NEXT:    v_mul_hi_u32 v19, v17, v19
+; CGP-NEXT:    v_add_i32_e64 v15, s[4:5], v15, v18
+; CGP-NEXT:    v_cndmask_b32_e64 v18, 0, 1, s[4:5]
+; CGP-NEXT:    v_add_i32_e64 v15, s[4:5], v15, v16
+; CGP-NEXT:    v_cndmask_b32_e64 v15, 0, 1, s[4:5]
+; CGP-NEXT:    v_mul_lo_u32 v16, v17, v14
+; CGP-NEXT:    v_add_i32_e64 v15, s[4:5], v18, v15
+; CGP-NEXT:    v_mul_hi_u32 v18, v6, v14
+; CGP-NEXT:    v_mul_hi_u32 v14, v17, v14
+; CGP-NEXT:    v_add_i32_e64 v16, s[4:5], v16, v19
+; CGP-NEXT:    v_cndmask_b32_e64 v19, 0, 1, s[4:5]
+; CGP-NEXT:    v_add_i32_e64 v16, s[4:5], v16, v18
+; CGP-NEXT:    v_cndmask_b32_e64 v18, 0, 1, s[4:5]
+; CGP-NEXT:    v_add_i32_e64 v18, s[4:5], v19, v18
+; CGP-NEXT:    v_add_i32_e64 v15, s[4:5], v16, v15
+; CGP-NEXT:    v_cndmask_b32_e64 v16, 0, 1, s[4:5]
+; CGP-NEXT:    v_add_i32_e64 v16, s[4:5], v18, v16
+; CGP-NEXT:    v_add_i32_e64 v14, s[4:5], v14, v16
+; CGP-NEXT:    v_addc_u32_e32 v13, vcc, v13, v14, vcc
+; CGP-NEXT:    v_add_i32_e32 v6, vcc, v6, v15
+; CGP-NEXT:    v_addc_u32_e32 v13, vcc, 0, v13, vcc
+; CGP-NEXT:    v_mul_lo_u32 v14, v7, v6
+; CGP-NEXT:    v_mul_lo_u32 v15, v11, v13
+; CGP-NEXT:    v_mul_hi_u32 v16, v11, v6
+; CGP-NEXT:    v_mul_hi_u32 v6, v7, v6
+; CGP-NEXT:    v_add_i32_e32 v14, vcc, v14, v15
+; CGP-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v14, vcc, v14, v16
+; CGP-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
+; CGP-NEXT:    v_mul_lo_u32 v16, v7, v13
+; CGP-NEXT:    v_add_i32_e32 v14, vcc, v15, v14
+; CGP-NEXT:    v_mul_hi_u32 v15, v11, v13
+; CGP-NEXT:    v_mul_hi_u32 v13, v7, v13
+; CGP-NEXT:    v_add_i32_e32 v6, vcc, v16, v6
+; CGP-NEXT:    v_cndmask_b32_e64 v16, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v6, vcc, v6, v15
+; CGP-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v15, vcc, v16, v15
+; CGP-NEXT:    v_add_i32_e32 v6, vcc, v6, v14
+; CGP-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v14, vcc, v15, v14
+; CGP-NEXT:    v_add_i32_e32 v13, vcc, v13, v14
+; CGP-NEXT:    v_mul_lo_u32 v14, v4, v6
+; CGP-NEXT:    v_mul_lo_u32 v15, v1, v13
+; CGP-NEXT:    v_mul_hi_u32 v17, v1, v6
+; CGP-NEXT:    v_mul_lo_u32 v16, v1, v6
+; CGP-NEXT:    v_add_i32_e32 v14, vcc, v14, v15
+; CGP-NEXT:    v_add_i32_e32 v14, vcc, v14, v17
+; CGP-NEXT:    v_sub_i32_e32 v11, vcc, v11, v16
+; CGP-NEXT:    v_subb_u32_e64 v15, s[4:5], v7, v14, vcc
+; CGP-NEXT:    v_sub_i32_e64 v7, s[4:5], v7, v14
+; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v15, v4
+; CGP-NEXT:    v_subb_u32_e32 v7, vcc, v7, v4, vcc
+; CGP-NEXT:    v_cndmask_b32_e64 v14, 0, -1, s[4:5]
+; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v11, v1
+; CGP-NEXT:    v_sub_i32_e32 v11, vcc, v11, v1
+; CGP-NEXT:    v_subbrev_u32_e32 v7, vcc, 0, v7, vcc
+; CGP-NEXT:    v_cndmask_b32_e64 v16, 0, -1, s[4:5]
+; CGP-NEXT:    v_cmp_eq_u32_e64 s[4:5], v15, v4
+; CGP-NEXT:    v_add_i32_e32 v15, vcc, 1, v6
+; CGP-NEXT:    v_cndmask_b32_e64 v14, v14, v16, s[4:5]
+; CGP-NEXT:    v_addc_u32_e32 v16, vcc, 0, v13, vcc
+; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v7, v4
+; CGP-NEXT:    v_cndmask_b32_e64 v17, 0, -1, vcc
+; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v11, v1
+; CGP-NEXT:    v_cndmask_b32_e64 v1, 0, -1, vcc
+; CGP-NEXT:    v_cmp_eq_u32_e32 vcc, v7, v4
+; CGP-NEXT:    v_cndmask_b32_e32 v1, v17, v1, vcc
+; CGP-NEXT:    v_add_i32_e32 v4, vcc, 1, v15
+; CGP-NEXT:    v_addc_u32_e32 v7, vcc, 0, v16, vcc
+; CGP-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v1
+; CGP-NEXT:    v_cndmask_b32_e32 v1, v15, v4, vcc
+; CGP-NEXT:    v_cndmask_b32_e32 v4, v16, v7, vcc
+; CGP-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v14
+; CGP-NEXT:    v_cndmask_b32_e32 v1, v6, v1, vcc
+; CGP-NEXT:    v_xor_b32_e32 v6, v12, v0
+; CGP-NEXT:    v_cndmask_b32_e32 v4, v13, v4, vcc
+; CGP-NEXT:    v_xor_b32_e32 v0, v1, v6
+; CGP-NEXT:    v_xor_b32_e32 v1, v4, v6
+; CGP-NEXT:    v_sub_i32_e32 v0, vcc, v0, v6
+; CGP-NEXT:    v_subb_u32_e32 v1, vcc, v1, v6, vcc
+; CGP-NEXT:  BB8_2: ; %Flow2
+; CGP-NEXT:    s_or_saveexec_b64 s[6:7], s[6:7]
+; CGP-NEXT:    s_xor_b64 exec, exec, s[6:7]
+; CGP-NEXT:    s_cbranch_execz BB8_4
+; CGP-NEXT:  ; %bb.3:
+; CGP-NEXT:    v_cvt_f32_u32_e32 v0, v10
+; CGP-NEXT:    v_rcp_iflag_f32_e32 v0, v0
+; CGP-NEXT:    v_mul_f32_e32 v0, 0x4f800000, v0
+; CGP-NEXT:    v_cvt_u32_f32_e32 v0, v0
+; CGP-NEXT:    v_mul_lo_u32 v1, v0, v10
+; CGP-NEXT:    v_mul_hi_u32 v4, v0, v10
+; CGP-NEXT:    v_sub_i32_e32 v6, vcc, 0, v1
+; CGP-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v4
+; CGP-NEXT:    v_cndmask_b32_e32 v1, v1, v6, vcc
+; CGP-NEXT:    v_mul_hi_u32 v1, v1, v0
+; CGP-NEXT:    v_add_i32_e64 v4, s[4:5], v0, v1
+; CGP-NEXT:    v_sub_i32_e64 v0, s[4:5], v0, v1
+; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
+; CGP-NEXT:    v_mul_hi_u32 v0, v0, v5
+; CGP-NEXT:    v_mul_lo_u32 v1, v0, v10
+; CGP-NEXT:    v_add_i32_e32 v4, vcc, 1, v0
+; CGP-NEXT:    v_subrev_i32_e32 v6, vcc, 1, v0
+; CGP-NEXT:    v_sub_i32_e32 v7, vcc, v5, v1
+; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v5, v1
+; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v7, v10
+; CGP-NEXT:    s_and_b64 s[4:5], s[4:5], vcc
+; CGP-NEXT:    v_cndmask_b32_e64 v0, v0, v4, s[4:5]
+; CGP-NEXT:    v_cndmask_b32_e32 v0, v6, v0, vcc
+; CGP-NEXT:    v_mov_b32_e32 v1, 0
+; CGP-NEXT:  BB8_4:
+; CGP-NEXT:    s_or_b64 exec, exec, s[6:7]
+; CGP-NEXT:    v_or_b32_e32 v5, v3, v9
+; CGP-NEXT:    v_mov_b32_e32 v4, 0
+; CGP-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[4:5]
+; CGP-NEXT:    v_cmp_ne_u32_e64 s[4:5], 0, 1
+; CGP-NEXT:    s_xor_b64 s[4:5], vcc, s[4:5]
+; CGP-NEXT:    ; implicit-def: $vgpr4_vgpr5
+; CGP-NEXT:    s_and_saveexec_b64 s[6:7], s[4:5]
+; CGP-NEXT:    s_xor_b64 s[6:7], exec, s[6:7]
+; CGP-NEXT:    s_cbranch_execz BB8_6
+; CGP-NEXT:  ; %bb.5:
+; CGP-NEXT:    v_ashrrev_i32_e32 v4, 31, v9
+; CGP-NEXT:    v_add_i32_e32 v5, vcc, v8, v4
+; CGP-NEXT:    v_addc_u32_e32 v6, vcc, v9, v4, vcc
+; CGP-NEXT:    v_xor_b32_e32 v5, v5, v4
+; CGP-NEXT:    v_xor_b32_e32 v6, v6, v4
+; CGP-NEXT:    v_cvt_f32_u32_e32 v7, v5
+; CGP-NEXT:    v_cvt_f32_u32_e32 v9, v6
+; CGP-NEXT:    v_ashrrev_i32_e32 v10, 31, v3
+; CGP-NEXT:    v_mac_f32_e32 v7, 0x4f800000, v9
+; CGP-NEXT:    v_rcp_iflag_f32_e32 v7, v7
+; CGP-NEXT:    v_add_i32_e32 v9, vcc, v2, v10
+; CGP-NEXT:    v_addc_u32_e32 v3, vcc, v3, v10, vcc
+; CGP-NEXT:    v_sub_i32_e32 v12, vcc, 0, v5
+; CGP-NEXT:    v_mul_f32_e32 v7, 0x5f7ffffc, v7
+; CGP-NEXT:    v_mul_f32_e32 v11, 0x2f800000, v7
+; CGP-NEXT:    v_trunc_f32_e32 v11, v11
+; CGP-NEXT:    v_mac_f32_e32 v7, 0xcf800000, v11
+; CGP-NEXT:    v_cvt_u32_f32_e32 v7, v7
+; CGP-NEXT:    v_cvt_u32_f32_e32 v11, v11
+; CGP-NEXT:    v_subb_u32_e32 v13, vcc, 0, v6, vcc
+; CGP-NEXT:    v_xor_b32_e32 v9, v9, v10
+; CGP-NEXT:    v_mul_lo_u32 v14, v13, v7
+; CGP-NEXT:    v_mul_lo_u32 v15, v12, v11
+; CGP-NEXT:    v_mul_hi_u32 v17, v12, v7
+; CGP-NEXT:    v_mul_lo_u32 v16, v12, v7
+; CGP-NEXT:    v_xor_b32_e32 v3, v3, v10
+; CGP-NEXT:    v_add_i32_e32 v14, vcc, v14, v15
+; CGP-NEXT:    v_add_i32_e32 v14, vcc, v14, v17
+; CGP-NEXT:    v_mul_lo_u32 v15, v11, v16
+; CGP-NEXT:    v_mul_lo_u32 v17, v7, v14
+; CGP-NEXT:    v_mul_hi_u32 v18, v7, v16
+; CGP-NEXT:    v_mul_hi_u32 v16, v11, v16
+; CGP-NEXT:    v_add_i32_e32 v15, vcc, v15, v17
+; CGP-NEXT:    v_cndmask_b32_e64 v17, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v15, vcc, v15, v18
+; CGP-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
+; CGP-NEXT:    v_mul_lo_u32 v18, v11, v14
+; CGP-NEXT:    v_add_i32_e32 v15, vcc, v17, v15
+; CGP-NEXT:    v_mul_hi_u32 v17, v7, v14
+; CGP-NEXT:    v_mul_hi_u32 v14, v11, v14
+; CGP-NEXT:    v_add_i32_e32 v16, vcc, v18, v16
+; CGP-NEXT:    v_cndmask_b32_e64 v18, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v16, vcc, v16, v17
+; CGP-NEXT:    v_cndmask_b32_e64 v17, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v17, vcc, v18, v17
+; CGP-NEXT:    v_add_i32_e32 v15, vcc, v16, v15
+; CGP-NEXT:    v_cndmask_b32_e64 v16, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v16, vcc, v17, v16
+; CGP-NEXT:    v_add_i32_e32 v14, vcc, v14, v16
+; CGP-NEXT:    v_add_i32_e32 v7, vcc, v7, v15
+; CGP-NEXT:    v_addc_u32_e64 v15, s[4:5], v11, v14, vcc
+; CGP-NEXT:    v_mul_lo_u32 v13, v13, v7
+; CGP-NEXT:    v_mul_lo_u32 v16, v12, v15
+; CGP-NEXT:    v_mul_lo_u32 v17, v12, v7
+; CGP-NEXT:    v_mul_hi_u32 v12, v12, v7
+; CGP-NEXT:    v_add_i32_e64 v11, s[4:5], v11, v14
+; CGP-NEXT:    v_add_i32_e64 v13, s[4:5], v13, v16
+; CGP-NEXT:    v_mul_hi_u32 v14, v7, v17
+; CGP-NEXT:    v_add_i32_e64 v12, s[4:5], v13, v12
+; CGP-NEXT:    v_mul_lo_u32 v13, v15, v17
+; CGP-NEXT:    v_mul_lo_u32 v16, v7, v12
+; CGP-NEXT:    v_mul_hi_u32 v17, v15, v17
+; CGP-NEXT:    v_add_i32_e64 v13, s[4:5], v13, v16
+; CGP-NEXT:    v_cndmask_b32_e64 v16, 0, 1, s[4:5]
+; CGP-NEXT:    v_add_i32_e64 v13, s[4:5], v13, v14
+; CGP-NEXT:    v_cndmask_b32_e64 v13, 0, 1, s[4:5]
+; CGP-NEXT:    v_mul_lo_u32 v14, v15, v12
+; CGP-NEXT:    v_add_i32_e64 v13, s[4:5], v16, v13
+; CGP-NEXT:    v_mul_hi_u32 v16, v7, v12
+; CGP-NEXT:    v_mul_hi_u32 v12, v15, v12
+; CGP-NEXT:    v_add_i32_e64 v14, s[4:5], v14, v17
+; CGP-NEXT:    v_cndmask_b32_e64 v17, 0, 1, s[4:5]
+; CGP-NEXT:    v_add_i32_e64 v14, s[4:5], v14, v16
+; CGP-NEXT:    v_cndmask_b32_e64 v16, 0, 1, s[4:5]
+; CGP-NEXT:    v_add_i32_e64 v16, s[4:5], v17, v16
+; CGP-NEXT:    v_add_i32_e64 v13, s[4:5], v14, v13
+; CGP-NEXT:    v_cndmask_b32_e64 v14, 0, 1, s[4:5]
+; CGP-NEXT:    v_add_i32_e64 v14, s[4:5], v16, v14
+; CGP-NEXT:    v_add_i32_e64 v12, s[4:5], v12, v14
+; CGP-NEXT:    v_addc_u32_e32 v11, vcc, v11, v12, vcc
+; CGP-NEXT:    v_add_i32_e32 v7, vcc, v7, v13
+; CGP-NEXT:    v_addc_u32_e32 v11, vcc, 0, v11, vcc
+; CGP-NEXT:    v_mul_lo_u32 v12, v3, v7
+; CGP-NEXT:    v_mul_lo_u32 v13, v9, v11
+; CGP-NEXT:    v_mul_hi_u32 v14, v9, v7
+; CGP-NEXT:    v_mul_hi_u32 v7, v3, v7
+; CGP-NEXT:    v_add_i32_e32 v12, vcc, v12, v13
+; CGP-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v12, vcc, v12, v14
+; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
+; CGP-NEXT:    v_mul_lo_u32 v14, v3, v11
+; CGP-NEXT:    v_add_i32_e32 v12, vcc, v13, v12
+; CGP-NEXT:    v_mul_hi_u32 v13, v9, v11
+; CGP-NEXT:    v_mul_hi_u32 v11, v3, v11
+; CGP-NEXT:    v_add_i32_e32 v7, vcc, v14, v7
+; CGP-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v7, vcc, v7, v13
+; CGP-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v13, vcc, v14, v13
+; CGP-NEXT:    v_add_i32_e32 v7, vcc, v7, v12
+; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v12, vcc, v13, v12
+; CGP-NEXT:    v_add_i32_e32 v11, vcc, v11, v12
+; CGP-NEXT:    v_mul_lo_u32 v12, v6, v7
+; CGP-NEXT:    v_mul_lo_u32 v13, v5, v11
+; CGP-NEXT:    v_mul_hi_u32 v15, v5, v7
+; CGP-NEXT:    v_mul_lo_u32 v14, v5, v7
+; CGP-NEXT:    v_add_i32_e32 v12, vcc, v12, v13
+; CGP-NEXT:    v_add_i32_e32 v12, vcc, v12, v15
+; CGP-NEXT:    v_sub_i32_e32 v9, vcc, v9, v14
+; CGP-NEXT:    v_subb_u32_e64 v13, s[4:5], v3, v12, vcc
+; CGP-NEXT:    v_sub_i32_e64 v3, s[4:5], v3, v12
+; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v13, v6
+; CGP-NEXT:    v_subb_u32_e32 v3, vcc, v3, v6, vcc
+; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, -1, s[4:5]
+; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v9, v5
+; CGP-NEXT:    v_sub_i32_e32 v9, vcc, v9, v5
+; CGP-NEXT:    v_subbrev_u32_e32 v3, vcc, 0, v3, vcc
+; CGP-NEXT:    v_cndmask_b32_e64 v14, 0, -1, s[4:5]
+; CGP-NEXT:    v_cmp_eq_u32_e64 s[4:5], v13, v6
+; CGP-NEXT:    v_add_i32_e32 v13, vcc, 1, v7
+; CGP-NEXT:    v_cndmask_b32_e64 v12, v12, v14, s[4:5]
+; CGP-NEXT:    v_addc_u32_e32 v14, vcc, 0, v11, vcc
+; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v3, v6
+; CGP-NEXT:    v_cndmask_b32_e64 v15, 0, -1, vcc
+; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v9, v5
+; CGP-NEXT:    v_cndmask_b32_e64 v5, 0, -1, vcc
+; CGP-NEXT:    v_cmp_eq_u32_e32 vcc, v3, v6
+; CGP-NEXT:    v_cndmask_b32_e32 v3, v15, v5, vcc
+; CGP-NEXT:    v_add_i32_e32 v5, vcc, 1, v13
+; CGP-NEXT:    v_addc_u32_e32 v6, vcc, 0, v14, vcc
+; CGP-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v3
+; CGP-NEXT:    v_cndmask_b32_e32 v3, v13, v5, vcc
+; CGP-NEXT:    v_cndmask_b32_e32 v5, v14, v6, vcc
+; CGP-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v12
+; CGP-NEXT:    v_cndmask_b32_e32 v3, v7, v3, vcc
+; CGP-NEXT:    v_xor_b32_e32 v6, v10, v4
+; CGP-NEXT:    v_cndmask_b32_e32 v5, v11, v5, vcc
+; CGP-NEXT:    v_xor_b32_e32 v3, v3, v6
+; CGP-NEXT:    v_xor_b32_e32 v5, v5, v6
+; CGP-NEXT:    v_sub_i32_e32 v4, vcc, v3, v6
+; CGP-NEXT:    v_subb_u32_e32 v5, vcc, v5, v6, vcc
+; CGP-NEXT:  BB8_6: ; %Flow
+; CGP-NEXT:    s_or_saveexec_b64 s[6:7], s[6:7]
+; CGP-NEXT:    s_xor_b64 exec, exec, s[6:7]
+; CGP-NEXT:    s_cbranch_execz BB8_8
+; CGP-NEXT:  ; %bb.7:
+; CGP-NEXT:    v_cvt_f32_u32_e32 v3, v8
+; CGP-NEXT:    v_rcp_iflag_f32_e32 v3, v3
+; CGP-NEXT:    v_mul_f32_e32 v3, 0x4f800000, v3
+; CGP-NEXT:    v_cvt_u32_f32_e32 v3, v3
+; CGP-NEXT:    v_mul_lo_u32 v4, v3, v8
+; CGP-NEXT:    v_mul_hi_u32 v5, v3, v8
+; CGP-NEXT:    v_sub_i32_e32 v6, vcc, 0, v4
+; CGP-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v5
+; CGP-NEXT:    v_cndmask_b32_e32 v4, v4, v6, vcc
+; CGP-NEXT:    v_mul_hi_u32 v4, v4, v3
+; CGP-NEXT:    v_add_i32_e64 v5, s[4:5], v3, v4
+; CGP-NEXT:    v_sub_i32_e64 v3, s[4:5], v3, v4
+; CGP-NEXT:    v_cndmask_b32_e32 v3, v3, v5, vcc
+; CGP-NEXT:    v_mul_hi_u32 v3, v3, v2
+; CGP-NEXT:    v_mul_lo_u32 v4, v3, v8
+; CGP-NEXT:    v_add_i32_e32 v5, vcc, 1, v3
+; CGP-NEXT:    v_subrev_i32_e32 v6, vcc, 1, v3
+; CGP-NEXT:    v_sub_i32_e32 v7, vcc, v2, v4
+; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v2, v4
+; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v7, v8
+; CGP-NEXT:    s_and_b64 s[4:5], s[4:5], vcc
+; CGP-NEXT:    v_cndmask_b32_e64 v2, v3, v5, s[4:5]
+; CGP-NEXT:    v_cndmask_b32_e32 v4, v6, v2, vcc
+; CGP-NEXT:    v_mov_b32_e32 v5, 0
+; CGP-NEXT:  BB8_8:
+; CGP-NEXT:    s_or_b64 exec, exec, s[6:7]
+; CGP-NEXT:    v_mov_b32_e32 v2, v4
+; CGP-NEXT:    v_mov_b32_e32 v3, v5
+; CGP-NEXT:    s_setpc_b64 s[30:31]
+  %shl.y = shl <2 x i64> <i64 4096, i64 4096>, %y
+  %r = sdiv <2 x i64> %x, %shl.y
+  ret <2 x i64> %r
+}
+
+define i64 @v_sdiv_i64_24bit(i64 %num, i64 %den) {
+; GISEL-LABEL: v_sdiv_i64_24bit:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-NEXT:    s_mov_b32 s4, 0xffffff
+; GISEL-NEXT:    v_and_b32_e32 v1, s4, v2
+; GISEL-NEXT:    v_cvt_f32_u32_e32 v2, v1
+; GISEL-NEXT:    v_and_b32_e32 v0, s4, v0
+; GISEL-NEXT:    v_rcp_iflag_f32_e32 v2, v2
+; GISEL-NEXT:    v_mul_f32_e32 v2, 0x4f800000, v2
+; GISEL-NEXT:    v_cvt_u32_f32_e32 v2, v2
+; GISEL-NEXT:    v_mul_lo_u32 v3, v2, v1
+; GISEL-NEXT:    v_mul_hi_u32 v4, v2, v1
+; GISEL-NEXT:    v_sub_i32_e32 v5, vcc, 0, v3
+; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v4
+; GISEL-NEXT:    v_cndmask_b32_e32 v3, v3, v5, vcc
+; GISEL-NEXT:    v_mul_hi_u32 v3, v3, v2
+; GISEL-NEXT:    v_add_i32_e64 v4, s[4:5], v2, v3
+; GISEL-NEXT:    v_sub_i32_e64 v2, s[4:5], v2, v3
+; GISEL-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
+; GISEL-NEXT:    v_mul_hi_u32 v2, v2, v0
+; GISEL-NEXT:    v_mul_lo_u32 v3, v2, v1
+; GISEL-NEXT:    v_add_i32_e32 v4, vcc, 1, v2
+; GISEL-NEXT:    v_subrev_i32_e32 v5, vcc, 1, v2
+; GISEL-NEXT:    v_sub_i32_e32 v6, vcc, v0, v3
+; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v6, v1
+; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v3
+; GISEL-NEXT:    s_and_b64 s[4:5], s[4:5], vcc
+; GISEL-NEXT:    v_cndmask_b32_e64 v0, v2, v4, s[4:5]
+; GISEL-NEXT:    v_cndmask_b32_e32 v0, v5, v0, vcc
+; GISEL-NEXT:    v_mov_b32_e32 v1, 0
+; GISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; CGP-LABEL: v_sdiv_i64_24bit:
+; CGP:       ; %bb.0:
+; CGP-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CGP-NEXT:    s_mov_b32 s4, 0xffffff
+; CGP-NEXT:    v_and_b32_e32 v1, s4, v2
+; CGP-NEXT:    v_cvt_f32_i32_e32 v2, v1
+; CGP-NEXT:    v_and_b32_e32 v0, s4, v0
+; CGP-NEXT:    v_cvt_f32_i32_e32 v3, v0
+; CGP-NEXT:    v_xor_b32_e32 v0, v0, v1
+; CGP-NEXT:    v_rcp_f32_e32 v4, v2
+; CGP-NEXT:    v_ashrrev_i32_e32 v0, 30, v0
+; CGP-NEXT:    v_or_b32_e32 v0, 1, v0
+; CGP-NEXT:    v_mul_f32_e32 v1, v3, v4
+; CGP-NEXT:    v_trunc_f32_e32 v1, v1
+; CGP-NEXT:    v_mad_f32 v3, -v1, v2, v3
+; CGP-NEXT:    v_cvt_i32_f32_e32 v1, v1
+; CGP-NEXT:    v_cmp_ge_f32_e64 vcc, |v3|, |v2|
+; CGP-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
+; CGP-NEXT:    v_add_i32_e32 v0, vcc, v1, v0
+; CGP-NEXT:    v_lshlrev_b32_e32 v0, 7, v0
+; CGP-NEXT:    v_ashrrev_i32_e32 v0, 7, v0
+; CGP-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
+; CGP-NEXT:    s_setpc_b64 s[30:31]
+  %num.mask = and i64 %num, 16777215
+  %den.mask = and i64 %den, 16777215
+  %result = sdiv i64 %num.mask, %den.mask
+  ret i64 %result
+}
+
+define <2 x i64> @v_sdiv_v2i64_24bit(<2 x i64> %num, <2 x i64> %den) {
+; GISEL-LABEL: v_sdiv_v2i64_24bit:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-NEXT:    s_mov_b32 s6, 0xffffff
+; GISEL-NEXT:    v_and_b32_e32 v1, s6, v4
+; GISEL-NEXT:    v_add_i32_e32 v1, vcc, 0, v1
+; GISEL-NEXT:    v_addc_u32_e64 v3, s[4:5], 0, 0, vcc
+; GISEL-NEXT:    v_cvt_f32_u32_e32 v4, v1
+; GISEL-NEXT:    v_cvt_f32_u32_e32 v5, v3
+; GISEL-NEXT:    v_sub_i32_e32 v7, vcc, 0, v1
+; GISEL-NEXT:    v_subb_u32_e32 v8, vcc, 0, v3, vcc
+; GISEL-NEXT:    v_and_b32_e32 v0, s6, v0
+; GISEL-NEXT:    v_mac_f32_e32 v4, 0x4f800000, v5
+; GISEL-NEXT:    v_rcp_iflag_f32_e32 v4, v4
+; GISEL-NEXT:    v_and_b32_e32 v6, s6, v6
+; GISEL-NEXT:    v_and_b32_e32 v2, s6, v2
+; GISEL-NEXT:    v_mul_f32_e32 v4, 0x5f7ffffc, v4
+; GISEL-NEXT:    v_mul_f32_e32 v5, 0x2f800000, v4
+; GISEL-NEXT:    v_trunc_f32_e32 v5, v5
+; GISEL-NEXT:    v_mac_f32_e32 v4, 0xcf800000, v5
+; GISEL-NEXT:    v_cvt_u32_f32_e32 v4, v4
+; GISEL-NEXT:    v_cvt_u32_f32_e32 v5, v5
+; GISEL-NEXT:    v_mul_lo_u32 v9, v8, v4
+; GISEL-NEXT:    v_mul_lo_u32 v10, v7, v5
+; GISEL-NEXT:    v_mul_hi_u32 v12, v7, v4
+; GISEL-NEXT:    v_mul_lo_u32 v11, v7, v4
+; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v9, v10
+; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v9, v12
+; GISEL-NEXT:    v_mul_lo_u32 v10, v5, v11
+; GISEL-NEXT:    v_mul_lo_u32 v12, v4, v9
+; GISEL-NEXT:    v_mul_hi_u32 v14, v4, v11
+; GISEL-NEXT:    v_add_i32_e32 v0, vcc, 0, v0
+; GISEL-NEXT:    v_addc_u32_e64 v13, s[4:5], 0, 0, vcc
+; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v10, v12
+; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v10, v14
+; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
+; GISEL-NEXT:    v_mul_lo_u32 v14, v5, v9
+; GISEL-NEXT:    v_mul_hi_u32 v11, v5, v11
+; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v12, v10
+; GISEL-NEXT:    v_mul_hi_u32 v12, v4, v9
+; GISEL-NEXT:    v_mul_hi_u32 v9, v5, v9
+; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v14, v11
+; GISEL-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v11, v12
+; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v14, v12
+; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v11, v10
+; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v12, v11
+; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v9, v11
+; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v10
+; GISEL-NEXT:    v_addc_u32_e64 v10, s[4:5], v5, v9, vcc
+; GISEL-NEXT:    v_mul_lo_u32 v8, v8, v4
+; GISEL-NEXT:    v_mul_lo_u32 v11, v7, v10
+; GISEL-NEXT:    v_mul_lo_u32 v12, v7, v4
+; GISEL-NEXT:    v_mul_hi_u32 v7, v7, v4
+; GISEL-NEXT:    v_add_i32_e64 v5, s[4:5], v5, v9
+; GISEL-NEXT:    v_add_i32_e64 v8, s[4:5], v8, v11
+; GISEL-NEXT:    v_mul_hi_u32 v9, v4, v12
+; GISEL-NEXT:    v_add_i32_e64 v7, s[4:5], v8, v7
+; GISEL-NEXT:    v_mul_lo_u32 v8, v10, v12
+; GISEL-NEXT:    v_mul_lo_u32 v11, v4, v7
+; GISEL-NEXT:    v_mul_hi_u32 v12, v10, v12
+; GISEL-NEXT:    v_add_i32_e64 v8, s[4:5], v8, v11
+; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v8, s[4:5], v8, v9
+; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, 1, s[4:5]
+; GISEL-NEXT:    v_mul_lo_u32 v9, v10, v7
+; GISEL-NEXT:    v_add_i32_e64 v8, s[4:5], v11, v8
+; GISEL-NEXT:    v_mul_hi_u32 v11, v4, v7
+; GISEL-NEXT:    v_mul_hi_u32 v7, v10, v7
+; GISEL-NEXT:    v_add_i32_e64 v9, s[4:5], v9, v12
+; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v9, s[4:5], v9, v11
+; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v11, s[4:5], v12, v11
+; GISEL-NEXT:    v_add_i32_e64 v8, s[4:5], v9, v8
+; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, 1, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v9, s[4:5], v11, v9
+; GISEL-NEXT:    v_add_i32_e64 v7, s[4:5], v7, v9
+; GISEL-NEXT:    v_addc_u32_e32 v5, vcc, v5, v7, vcc
+; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v8
+; GISEL-NEXT:    v_addc_u32_e32 v5, vcc, 0, v5, vcc
+; GISEL-NEXT:    v_mul_lo_u32 v7, v13, v4
+; GISEL-NEXT:    v_mul_lo_u32 v8, v0, v5
+; GISEL-NEXT:    v_mul_hi_u32 v9, v0, v4
+; GISEL-NEXT:    v_mul_hi_u32 v4, v13, v4
+; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v7, v8
+; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v7, v9
+; GISEL-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
+; GISEL-NEXT:    v_mul_lo_u32 v9, v13, v5
+; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v8, v7
+; GISEL-NEXT:    v_mul_hi_u32 v8, v0, v5
+; GISEL-NEXT:    v_mul_hi_u32 v5, v13, v5
+; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v9, v4
+; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v8
+; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v9, v8
+; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v7
+; GISEL-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v8, v7
+; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v7
+; GISEL-NEXT:    v_mul_lo_u32 v7, v3, v4
+; GISEL-NEXT:    v_mul_lo_u32 v8, v1, v5
+; GISEL-NEXT:    v_mul_hi_u32 v10, v1, v4
+; GISEL-NEXT:    v_mul_lo_u32 v9, v1, v4
+; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v7, v8
+; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v7, v10
+; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v9
+; GISEL-NEXT:    v_subb_u32_e64 v8, s[4:5], v13, v7, vcc
+; GISEL-NEXT:    v_sub_i32_e64 v7, s[4:5], v13, v7
+; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v8, v3
+; GISEL-NEXT:    v_subb_u32_e32 v7, vcc, v7, v3, vcc
+; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, -1, s[4:5]
+; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v0, v1
+; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v1
+; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, -1, s[4:5]
+; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], v8, v3
+; GISEL-NEXT:    v_subbrev_u32_e32 v7, vcc, 0, v7, vcc
+; GISEL-NEXT:    v_cndmask_b32_e64 v8, v9, v10, s[4:5]
+; GISEL-NEXT:    v_add_i32_e32 v9, vcc, 1, v4
+; GISEL-NEXT:    v_addc_u32_e32 v10, vcc, 0, v5, vcc
+; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v7, v3
+; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, -1, vcc
+; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v1
+; GISEL-NEXT:    v_cndmask_b32_e64 v0, 0, -1, vcc
+; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v7, v3
+; GISEL-NEXT:    v_cndmask_b32_e32 v0, v11, v0, vcc
+; GISEL-NEXT:    v_add_i32_e32 v1, vcc, 1, v9
+; GISEL-NEXT:    v_addc_u32_e32 v3, vcc, 0, v10, vcc
+; GISEL-NEXT:    v_add_i32_e32 v6, vcc, 0, v6
+; GISEL-NEXT:    v_addc_u32_e64 v7, s[4:5], 0, 0, vcc
+; GISEL-NEXT:    v_cvt_f32_u32_e32 v11, v6
+; GISEL-NEXT:    v_cvt_f32_u32_e32 v12, v7
+; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
+; GISEL-NEXT:    v_cndmask_b32_e32 v0, v9, v1, vcc
+; GISEL-NEXT:    v_cndmask_b32_e32 v1, v10, v3, vcc
+; GISEL-NEXT:    v_mac_f32_e32 v11, 0x4f800000, v12
+; GISEL-NEXT:    v_rcp_iflag_f32_e32 v3, v11
+; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v8
+; GISEL-NEXT:    v_cndmask_b32_e32 v0, v4, v0, vcc
+; GISEL-NEXT:    v_cndmask_b32_e32 v1, v5, v1, vcc
+; GISEL-NEXT:    v_mul_f32_e32 v3, 0x5f7ffffc, v3
+; GISEL-NEXT:    v_mul_f32_e32 v4, 0x2f800000, v3
+; GISEL-NEXT:    v_trunc_f32_e32 v4, v4
+; GISEL-NEXT:    v_mac_f32_e32 v3, 0xcf800000, v4
+; GISEL-NEXT:    v_cvt_u32_f32_e32 v3, v3
+; GISEL-NEXT:    v_cvt_u32_f32_e32 v4, v4
+; GISEL-NEXT:    v_sub_i32_e32 v5, vcc, 0, v6
+; GISEL-NEXT:    v_subb_u32_e32 v8, vcc, 0, v7, vcc
+; GISEL-NEXT:    v_mul_lo_u32 v9, v8, v3
+; GISEL-NEXT:    v_mul_lo_u32 v10, v5, v4
+; GISEL-NEXT:    v_mul_hi_u32 v12, v5, v3
+; GISEL-NEXT:    v_mul_lo_u32 v11, v5, v3
+; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v9, v10
+; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v9, v12
+; GISEL-NEXT:    v_mul_lo_u32 v10, v4, v11
+; GISEL-NEXT:    v_mul_lo_u32 v12, v3, v9
+; GISEL-NEXT:    v_mul_hi_u32 v14, v3, v11
+; GISEL-NEXT:    v_add_i32_e32 v2, vcc, 0, v2
+; GISEL-NEXT:    v_addc_u32_e64 v13, s[4:5], 0, 0, vcc
+; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v10, v12
+; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v10, v14
+; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
+; GISEL-NEXT:    v_mul_lo_u32 v14, v4, v9
+; GISEL-NEXT:    v_mul_hi_u32 v11, v4, v11
+; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v12, v10
+; GISEL-NEXT:    v_mul_hi_u32 v12, v3, v9
+; GISEL-NEXT:    v_mul_hi_u32 v9, v4, v9
+; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v14, v11
+; GISEL-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v11, v12
+; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v14, v12
+; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v11, v10
+; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v12, v11
+; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v9, v11
+; GISEL-NEXT:    v_add_i32_e32 v3, vcc, v3, v10
+; GISEL-NEXT:    v_addc_u32_e64 v10, s[4:5], v4, v9, vcc
+; GISEL-NEXT:    v_mul_lo_u32 v8, v8, v3
+; GISEL-NEXT:    v_mul_lo_u32 v11, v5, v10
+; GISEL-NEXT:    v_mul_lo_u32 v12, v5, v3
+; GISEL-NEXT:    v_mul_hi_u32 v5, v5, v3
+; GISEL-NEXT:    v_add_i32_e64 v4, s[4:5], v4, v9
+; GISEL-NEXT:    v_add_i32_e64 v8, s[4:5], v8, v11
+; GISEL-NEXT:    v_mul_hi_u32 v9, v3, v12
+; GISEL-NEXT:    v_add_i32_e64 v5, s[4:5], v8, v5
+; GISEL-NEXT:    v_mul_lo_u32 v8, v10, v12
+; GISEL-NEXT:    v_mul_lo_u32 v11, v3, v5
+; GISEL-NEXT:    v_mul_hi_u32 v12, v10, v12
+; GISEL-NEXT:    v_add_i32_e64 v8, s[4:5], v8, v11
+; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v8, s[4:5], v8, v9
+; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, 1, s[4:5]
+; GISEL-NEXT:    v_mul_lo_u32 v9, v10, v5
+; GISEL-NEXT:    v_add_i32_e64 v8, s[4:5], v11, v8
+; GISEL-NEXT:    v_mul_hi_u32 v11, v3, v5
+; GISEL-NEXT:    v_mul_hi_u32 v5, v10, v5
+; GISEL-NEXT:    v_add_i32_e64 v9, s[4:5], v9, v12
+; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v9, s[4:5], v9, v11
+; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v11, s[4:5], v12, v11
+; GISEL-NEXT:    v_add_i32_e64 v8, s[4:5], v9, v8
+; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, 1, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v9, s[4:5], v11, v9
+; GISEL-NEXT:    v_add_i32_e64 v5, s[4:5], v5, v9
+; GISEL-NEXT:    v_addc_u32_e32 v4, vcc, v4, v5, vcc
+; GISEL-NEXT:    v_add_i32_e32 v3, vcc, v3, v8
+; GISEL-NEXT:    v_addc_u32_e32 v4, vcc, 0, v4, vcc
+; GISEL-NEXT:    v_mul_lo_u32 v5, v13, v3
+; GISEL-NEXT:    v_mul_lo_u32 v8, v2, v4
+; GISEL-NEXT:    v_mul_hi_u32 v9, v2, v3
+; GISEL-NEXT:    v_subrev_i32_e32 v0, vcc, 0, v0
+; GISEL-NEXT:    v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v8
+; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v9
+; GISEL-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
+; GISEL-NEXT:    v_mul_lo_u32 v9, v13, v4
+; GISEL-NEXT:    v_mul_hi_u32 v3, v13, v3
+; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v8, v5
+; GISEL-NEXT:    v_mul_hi_u32 v8, v2, v4
+; GISEL-NEXT:    v_mul_hi_u32 v4, v13, v4
+; GISEL-NEXT:    v_add_i32_e32 v3, vcc, v9, v3
+; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v3, vcc, v3, v8
+; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v9, v8
+; GISEL-NEXT:    v_add_i32_e32 v3, vcc, v3, v5
+; GISEL-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v8, v5
+; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v5
+; GISEL-NEXT:    v_mul_lo_u32 v5, v7, v3
+; GISEL-NEXT:    v_mul_lo_u32 v8, v6, v4
+; GISEL-NEXT:    v_mul_hi_u32 v10, v6, v3
+; GISEL-NEXT:    v_mul_lo_u32 v9, v6, v3
+; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v8
+; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v10
+; GISEL-NEXT:    v_sub_i32_e32 v2, vcc, v2, v9
+; GISEL-NEXT:    v_subb_u32_e64 v8, s[4:5], v13, v5, vcc
+; GISEL-NEXT:    v_sub_i32_e64 v5, s[4:5], v13, v5
+; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v8, v7
+; GISEL-NEXT:    v_subb_u32_e32 v5, vcc, v5, v7, vcc
+; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, -1, s[4:5]
+; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v2, v6
+; GISEL-NEXT:    v_sub_i32_e32 v2, vcc, v2, v6
+; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, -1, s[4:5]
+; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], v8, v7
+; GISEL-NEXT:    v_subbrev_u32_e32 v5, vcc, 0, v5, vcc
+; GISEL-NEXT:    v_cndmask_b32_e64 v8, v9, v10, s[4:5]
+; GISEL-NEXT:    v_add_i32_e32 v9, vcc, 1, v3
+; GISEL-NEXT:    v_addc_u32_e32 v10, vcc, 0, v4, vcc
+; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v5, v7
+; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, -1, vcc
+; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v2, v6
+; GISEL-NEXT:    v_cndmask_b32_e64 v2, 0, -1, vcc
+; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v5, v7
+; GISEL-NEXT:    v_cndmask_b32_e32 v2, v11, v2, vcc
+; GISEL-NEXT:    v_add_i32_e32 v5, vcc, 1, v9
+; GISEL-NEXT:    v_addc_u32_e32 v6, vcc, 0, v10, vcc
+; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v2
+; GISEL-NEXT:    v_cndmask_b32_e32 v2, v9, v5, vcc
+; GISEL-NEXT:    v_cndmask_b32_e32 v5, v10, v6, vcc
+; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v8
+; GISEL-NEXT:    v_cndmask_b32_e32 v2, v3, v2, vcc
+; GISEL-NEXT:    v_cndmask_b32_e32 v3, v4, v5, vcc
+; GISEL-NEXT:    v_subrev_i32_e32 v2, vcc, 0, v2
+; GISEL-NEXT:    v_subbrev_u32_e32 v3, vcc, 0, v3, vcc
+; GISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; CGP-LABEL: v_sdiv_v2i64_24bit:
+; CGP:       ; %bb.0:
+; CGP-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CGP-NEXT:    s_mov_b32 s4, 0xffffff
+; CGP-NEXT:    v_and_b32_e32 v1, s4, v4
+; CGP-NEXT:    v_cvt_f32_i32_e32 v3, v1
+; CGP-NEXT:    v_and_b32_e32 v0, s4, v0
+; CGP-NEXT:    v_and_b32_e32 v4, s4, v6
+; CGP-NEXT:    v_cvt_f32_i32_e32 v5, v0
+; CGP-NEXT:    v_rcp_f32_e32 v6, v3
+; CGP-NEXT:    v_xor_b32_e32 v0, v0, v1
+; CGP-NEXT:    v_and_b32_e32 v2, s4, v2
+; CGP-NEXT:    v_ashrrev_i32_e32 v0, 30, v0
+; CGP-NEXT:    v_mul_f32_e32 v1, v5, v6
+; CGP-NEXT:    v_trunc_f32_e32 v1, v1
+; CGP-NEXT:    v_mad_f32 v5, -v1, v3, v5
+; CGP-NEXT:    v_cmp_ge_f32_e64 vcc, |v5|, |v3|
+; CGP-NEXT:    v_cvt_f32_i32_e32 v3, v4
+; CGP-NEXT:    v_cvt_f32_i32_e32 v5, v2
+; CGP-NEXT:    v_cvt_i32_f32_e32 v1, v1
+; CGP-NEXT:    v_xor_b32_e32 v2, v2, v4
+; CGP-NEXT:    v_rcp_f32_e32 v6, v3
+; CGP-NEXT:    v_or_b32_e32 v0, 1, v0
+; CGP-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
+; CGP-NEXT:    v_add_i32_e32 v0, vcc, v1, v0
+; CGP-NEXT:    v_mul_f32_e32 v4, v5, v6
+; CGP-NEXT:    v_trunc_f32_e32 v4, v4
+; CGP-NEXT:    v_mad_f32 v5, -v4, v3, v5
+; CGP-NEXT:    v_cvt_i32_f32_e32 v4, v4
+; CGP-NEXT:    v_ashrrev_i32_e32 v2, 30, v2
+; CGP-NEXT:    v_or_b32_e32 v2, 1, v2
+; CGP-NEXT:    v_cmp_ge_f32_e64 vcc, |v5|, |v3|
+; CGP-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
+; CGP-NEXT:    v_add_i32_e32 v2, vcc, v4, v2
+; CGP-NEXT:    v_lshlrev_b32_e32 v0, 7, v0
+; CGP-NEXT:    v_lshlrev_b32_e32 v2, 7, v2
+; CGP-NEXT:    v_ashrrev_i32_e32 v0, 7, v0
+; CGP-NEXT:    v_ashrrev_i32_e32 v2, 7, v2
+; CGP-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
+; CGP-NEXT:    v_ashrrev_i32_e32 v3, 31, v2
+; CGP-NEXT:    s_setpc_b64 s[30:31]
+  %num.mask = and <2 x i64> %num, <i64 16777215, i64 16777215>
+  %den.mask = and <2 x i64> %den, <i64 16777215, i64 16777215>
+  %result = sdiv <2 x i64> %num.mask, %den.mask
+  ret <2 x i64> %result
+}

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll
new file mode 100644
index 000000000000..e9a808dce646
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll
@@ -0,0 +1,3749 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -global-isel -amdgpu-codegenprepare-disable-idiv-expansion=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=hawaii -denormal-fp-math-f32=preserve-sign < %s | FileCheck -check-prefixes=CHECK,GISEL %s
+; RUN: llc -global-isel -amdgpu-codegenprepare-disable-idiv-expansion=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=hawaii -denormal-fp-math-f32=preserve-sign < %s | FileCheck -check-prefixes=CHECK,CGP %s
+
+; The same 32-bit expansion is implemented in the legalizer and in AMDGPUCodeGenPrepare.
+
+define i64 @v_srem_i64(i64 %num, i64 %den) {
+; CHECK-LABEL: v_srem_i64:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT:    v_or_b32_e32 v5, v1, v3
+; CHECK-NEXT:    v_mov_b32_e32 v4, 0
+; CHECK-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[4:5]
+; CHECK-NEXT:    v_cmp_ne_u32_e64 s[4:5], 0, 1
+; CHECK-NEXT:    s_xor_b64 s[4:5], vcc, s[4:5]
+; CHECK-NEXT:    ; implicit-def: $vgpr4_vgpr5
+; CHECK-NEXT:    s_and_saveexec_b64 s[6:7], s[4:5]
+; CHECK-NEXT:    s_xor_b64 s[6:7], exec, s[6:7]
+; CHECK-NEXT:    s_cbranch_execz BB0_2
+; CHECK-NEXT:  ; %bb.1:
+; CHECK-NEXT:    v_ashrrev_i32_e32 v4, 31, v3
+; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v2, v4
+; CHECK-NEXT:    v_addc_u32_e32 v3, vcc, v3, v4, vcc
+; CHECK-NEXT:    v_xor_b32_e32 v3, v3, v4
+; CHECK-NEXT:    v_xor_b32_e32 v5, v5, v4
+; CHECK-NEXT:    v_cvt_f32_u32_e32 v4, v5
+; CHECK-NEXT:    v_cvt_f32_u32_e32 v6, v3
+; CHECK-NEXT:    v_ashrrev_i32_e32 v7, 31, v1
+; CHECK-NEXT:    v_mac_f32_e32 v4, 0x4f800000, v6
+; CHECK-NEXT:    v_rcp_iflag_f32_e32 v4, v4
+; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v0, v7
+; CHECK-NEXT:    v_addc_u32_e32 v1, vcc, v1, v7, vcc
+; CHECK-NEXT:    v_sub_i32_e32 v9, vcc, 0, v5
+; CHECK-NEXT:    v_mul_f32_e32 v4, 0x5f7ffffc, v4
+; CHECK-NEXT:    v_mul_f32_e32 v8, 0x2f800000, v4
+; CHECK-NEXT:    v_trunc_f32_e32 v8, v8
+; CHECK-NEXT:    v_mac_f32_e32 v4, 0xcf800000, v8
+; CHECK-NEXT:    v_cvt_u32_f32_e32 v4, v4
+; CHECK-NEXT:    v_cvt_u32_f32_e32 v8, v8
+; CHECK-NEXT:    v_subb_u32_e32 v10, vcc, 0, v3, vcc
+; CHECK-NEXT:    v_xor_b32_e32 v6, v6, v7
+; CHECK-NEXT:    v_mul_lo_u32 v11, v10, v4
+; CHECK-NEXT:    v_mul_lo_u32 v12, v9, v8
+; CHECK-NEXT:    v_mul_hi_u32 v14, v9, v4
+; CHECK-NEXT:    v_mul_lo_u32 v13, v9, v4
+; CHECK-NEXT:    v_xor_b32_e32 v1, v1, v7
+; CHECK-NEXT:    v_add_i32_e32 v11, vcc, v11, v12
+; CHECK-NEXT:    v_add_i32_e32 v11, vcc, v11, v14
+; CHECK-NEXT:    v_mul_lo_u32 v12, v8, v13
+; CHECK-NEXT:    v_mul_lo_u32 v14, v4, v11
+; CHECK-NEXT:    v_mul_hi_u32 v15, v4, v13
+; CHECK-NEXT:    v_mul_hi_u32 v13, v8, v13
+; CHECK-NEXT:    v_add_i32_e32 v12, vcc, v12, v14
+; CHECK-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v12, vcc, v12, v15
+; CHECK-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
+; CHECK-NEXT:    v_mul_lo_u32 v15, v8, v11
+; CHECK-NEXT:    v_add_i32_e32 v12, vcc, v14, v12
+; CHECK-NEXT:    v_mul_hi_u32 v14, v4, v11
+; CHECK-NEXT:    v_mul_hi_u32 v11, v8, v11
+; CHECK-NEXT:    v_add_i32_e32 v13, vcc, v15, v13
+; CHECK-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v13, vcc, v13, v14
+; CHECK-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v14, vcc, v15, v14
+; CHECK-NEXT:    v_add_i32_e32 v12, vcc, v13, v12
+; CHECK-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v13, vcc, v14, v13
+; CHECK-NEXT:    v_add_i32_e32 v11, vcc, v11, v13
+; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v4, v12
+; CHECK-NEXT:    v_addc_u32_e64 v12, s[4:5], v8, v11, vcc
+; CHECK-NEXT:    v_mul_lo_u32 v10, v10, v4
+; CHECK-NEXT:    v_mul_lo_u32 v13, v9, v12
+; CHECK-NEXT:    v_mul_lo_u32 v14, v9, v4
+; CHECK-NEXT:    v_mul_hi_u32 v9, v9, v4
+; CHECK-NEXT:    v_add_i32_e64 v8, s[4:5], v8, v11
+; CHECK-NEXT:    v_add_i32_e64 v10, s[4:5], v10, v13
+; CHECK-NEXT:    v_mul_hi_u32 v11, v4, v14
+; CHECK-NEXT:    v_add_i32_e64 v9, s[4:5], v10, v9
+; CHECK-NEXT:    v_mul_lo_u32 v10, v12, v14
+; CHECK-NEXT:    v_mul_lo_u32 v13, v4, v9
+; CHECK-NEXT:    v_mul_hi_u32 v14, v12, v14
+; CHECK-NEXT:    v_add_i32_e64 v10, s[4:5], v10, v13
+; CHECK-NEXT:    v_cndmask_b32_e64 v13, 0, 1, s[4:5]
+; CHECK-NEXT:    v_add_i32_e64 v10, s[4:5], v10, v11
+; CHECK-NEXT:    v_cndmask_b32_e64 v10, 0, 1, s[4:5]
+; CHECK-NEXT:    v_mul_lo_u32 v11, v12, v9
+; CHECK-NEXT:    v_add_i32_e64 v10, s[4:5], v13, v10
+; CHECK-NEXT:    v_mul_hi_u32 v13, v4, v9
+; CHECK-NEXT:    v_mul_hi_u32 v9, v12, v9
+; CHECK-NEXT:    v_add_i32_e64 v11, s[4:5], v11, v14
+; CHECK-NEXT:    v_cndmask_b32_e64 v14, 0, 1, s[4:5]
+; CHECK-NEXT:    v_add_i32_e64 v11, s[4:5], v11, v13
+; CHECK-NEXT:    v_cndmask_b32_e64 v13, 0, 1, s[4:5]
+; CHECK-NEXT:    v_add_i32_e64 v13, s[4:5], v14, v13
+; CHECK-NEXT:    v_add_i32_e64 v10, s[4:5], v11, v10
+; CHECK-NEXT:    v_cndmask_b32_e64 v11, 0, 1, s[4:5]
+; CHECK-NEXT:    v_add_i32_e64 v11, s[4:5], v13, v11
+; CHECK-NEXT:    v_add_i32_e64 v9, s[4:5], v9, v11
+; CHECK-NEXT:    v_addc_u32_e32 v8, vcc, v8, v9, vcc
+; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v4, v10
+; CHECK-NEXT:    v_addc_u32_e32 v8, vcc, 0, v8, vcc
+; CHECK-NEXT:    v_mul_lo_u32 v9, v1, v4
+; CHECK-NEXT:    v_mul_lo_u32 v10, v6, v8
+; CHECK-NEXT:    v_mul_hi_u32 v11, v6, v4
+; CHECK-NEXT:    v_mul_hi_u32 v4, v1, v4
+; CHECK-NEXT:    v_add_i32_e32 v9, vcc, v9, v10
+; CHECK-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v9, vcc, v9, v11
+; CHECK-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
+; CHECK-NEXT:    v_mul_lo_u32 v11, v1, v8
+; CHECK-NEXT:    v_add_i32_e32 v9, vcc, v10, v9
+; CHECK-NEXT:    v_mul_hi_u32 v10, v6, v8
+; CHECK-NEXT:    v_mul_hi_u32 v8, v1, v8
+; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v11, v4
+; CHECK-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v4, v10
+; CHECK-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v10, vcc, v11, v10
+; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v4, v9
+; CHECK-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v9, vcc, v10, v9
+; CHECK-NEXT:    v_add_i32_e32 v8, vcc, v8, v9
+; CHECK-NEXT:    v_mul_lo_u32 v9, v3, v4
+; CHECK-NEXT:    v_mul_lo_u32 v8, v5, v8
+; CHECK-NEXT:    v_mul_lo_u32 v10, v5, v4
+; CHECK-NEXT:    v_mul_hi_u32 v4, v5, v4
+; CHECK-NEXT:    v_add_i32_e32 v8, vcc, v9, v8
+; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v8, v4
+; CHECK-NEXT:    v_sub_i32_e32 v6, vcc, v6, v10
+; CHECK-NEXT:    v_subb_u32_e64 v8, s[4:5], v1, v4, vcc
+; CHECK-NEXT:    v_sub_i32_e64 v1, s[4:5], v1, v4
+; CHECK-NEXT:    v_cmp_ge_u32_e64 s[4:5], v8, v3
+; CHECK-NEXT:    v_cndmask_b32_e64 v4, 0, -1, s[4:5]
+; CHECK-NEXT:    v_cmp_ge_u32_e64 s[4:5], v6, v5
+; CHECK-NEXT:    v_cndmask_b32_e64 v9, 0, -1, s[4:5]
+; CHECK-NEXT:    v_cmp_eq_u32_e64 s[4:5], v8, v3
+; CHECK-NEXT:    v_subb_u32_e32 v1, vcc, v1, v3, vcc
+; CHECK-NEXT:    v_cndmask_b32_e64 v4, v4, v9, s[4:5]
+; CHECK-NEXT:    v_sub_i32_e32 v9, vcc, v6, v5
+; CHECK-NEXT:    v_subbrev_u32_e64 v10, s[4:5], 0, v1, vcc
+; CHECK-NEXT:    v_cmp_ge_u32_e64 s[4:5], v10, v3
+; CHECK-NEXT:    v_cndmask_b32_e64 v11, 0, -1, s[4:5]
+; CHECK-NEXT:    v_cmp_ge_u32_e64 s[4:5], v9, v5
+; CHECK-NEXT:    v_subb_u32_e32 v1, vcc, v1, v3, vcc
+; CHECK-NEXT:    v_cndmask_b32_e64 v12, 0, -1, s[4:5]
+; CHECK-NEXT:    v_cmp_eq_u32_e64 s[4:5], v10, v3
+; CHECK-NEXT:    v_sub_i32_e32 v3, vcc, v9, v5
+; CHECK-NEXT:    v_cndmask_b32_e64 v11, v11, v12, s[4:5]
+; CHECK-NEXT:    v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
+; CHECK-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v11
+; CHECK-NEXT:    v_cndmask_b32_e32 v3, v9, v3, vcc
+; CHECK-NEXT:    v_cndmask_b32_e32 v1, v10, v1, vcc
+; CHECK-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v4
+; CHECK-NEXT:    v_cndmask_b32_e32 v3, v6, v3, vcc
+; CHECK-NEXT:    v_cndmask_b32_e32 v1, v8, v1, vcc
+; CHECK-NEXT:    v_xor_b32_e32 v3, v3, v7
+; CHECK-NEXT:    v_xor_b32_e32 v1, v1, v7
+; CHECK-NEXT:    v_sub_i32_e32 v4, vcc, v3, v7
+; CHECK-NEXT:    v_subb_u32_e32 v5, vcc, v1, v7, vcc
+; CHECK-NEXT:  BB0_2: ; %Flow
+; CHECK-NEXT:    s_or_saveexec_b64 s[8:9], s[6:7]
+; CHECK-NEXT:    s_xor_b64 exec, exec, s[8:9]
+; CHECK-NEXT:    s_cbranch_execz BB0_4
+; CHECK-NEXT:  ; %bb.3:
+; CHECK-NEXT:    v_cvt_f32_u32_e32 v1, v2
+; CHECK-NEXT:    v_rcp_iflag_f32_e32 v1, v1
+; CHECK-NEXT:    v_mul_f32_e32 v1, 0x4f800000, v1
+; CHECK-NEXT:    v_cvt_u32_f32_e32 v1, v1
+; CHECK-NEXT:    v_mul_lo_u32 v3, v1, v2
+; CHECK-NEXT:    v_mul_hi_u32 v4, v1, v2
+; CHECK-NEXT:    v_sub_i32_e32 v5, vcc, 0, v3
+; CHECK-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v4
+; CHECK-NEXT:    v_cndmask_b32_e32 v3, v3, v5, vcc
+; CHECK-NEXT:    v_mul_hi_u32 v3, v3, v1
+; CHECK-NEXT:    v_mov_b32_e32 v5, 0
+; CHECK-NEXT:    v_add_i32_e64 v4, s[4:5], v1, v3
+; CHECK-NEXT:    v_sub_i32_e64 v1, s[4:5], v1, v3
+; CHECK-NEXT:    v_cndmask_b32_e32 v1, v1, v4, vcc
+; CHECK-NEXT:    v_mul_hi_u32 v1, v1, v0
+; CHECK-NEXT:    v_mul_lo_u32 v1, v1, v2
+; CHECK-NEXT:    v_sub_i32_e32 v3, vcc, v0, v1
+; CHECK-NEXT:    v_add_i32_e64 v4, s[4:5], v3, v2
+; CHECK-NEXT:    v_cmp_ge_u32_e64 s[4:5], v0, v1
+; CHECK-NEXT:    v_cmp_ge_u32_e32 vcc, v3, v2
+; CHECK-NEXT:    v_sub_i32_e64 v0, s[6:7], v3, v2
+; CHECK-NEXT:    s_and_b64 vcc, vcc, s[4:5]
+; CHECK-NEXT:    v_cndmask_b32_e32 v0, v3, v0, vcc
+; CHECK-NEXT:    v_cndmask_b32_e64 v4, v4, v0, s[4:5]
+; CHECK-NEXT:  BB0_4:
+; CHECK-NEXT:    s_or_b64 exec, exec, s[8:9]
+; CHECK-NEXT:    v_mov_b32_e32 v0, v4
+; CHECK-NEXT:    v_mov_b32_e32 v1, v5
+; CHECK-NEXT:    s_setpc_b64 s[30:31]
+  %result = srem i64 %num, %den
+  ret i64 %result
+}
+
+; FIXME: This is a workaround for not handling uniform VGPR case.
+declare i32 @llvm.amdgcn.readfirstlane(i32)
+
+define amdgpu_ps i64 @s_srem_i64(i64 inreg %num, i64 inreg %den) {
+; CHECK-LABEL: s_srem_i64:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    s_or_b64 s[0:1], s[2:3], s[4:5]
+; CHECK-NEXT:    s_mov_b32 s6, 0
+; CHECK-NEXT:    s_mov_b32 s7, -1
+; CHECK-NEXT:    s_and_b64 s[0:1], s[0:1], s[6:7]
+; CHECK-NEXT:    v_cmp_eq_u64_e64 s[6:7], s[0:1], 0
+; CHECK-NEXT:    v_cmp_ne_u32_e64 s[8:9], 0, 1
+; CHECK-NEXT:    s_mov_b32 s0, 1
+; CHECK-NEXT:    s_xor_b64 vcc, s[6:7], s[8:9]
+; CHECK-NEXT:    ; implicit-def: $vgpr0_vgpr1
+; CHECK-NEXT:    s_cbranch_vccz BB1_2
+; CHECK-NEXT:  ; %bb.1:
+; CHECK-NEXT:    s_ashr_i32 s0, s5, 31
+; CHECK-NEXT:    s_ashr_i32 s6, s3, 31
+; CHECK-NEXT:    s_add_u32 s8, s2, s6
+; CHECK-NEXT:    s_cselect_b32 s7, 1, 0
+; CHECK-NEXT:    s_and_b32 s7, s7, 1
+; CHECK-NEXT:    s_cmp_lg_u32 s7, 0
+; CHECK-NEXT:    s_addc_u32 s9, s3, s6
+; CHECK-NEXT:    s_add_u32 s10, s4, s0
+; CHECK-NEXT:    s_cselect_b32 s3, 1, 0
+; CHECK-NEXT:    s_and_b32 s3, s3, 1
+; CHECK-NEXT:    s_cmp_lg_u32 s3, 0
+; CHECK-NEXT:    s_mov_b32 s1, s0
+; CHECK-NEXT:    s_addc_u32 s11, s5, s0
+; CHECK-NEXT:    s_xor_b64 s[10:11], s[10:11], s[0:1]
+; CHECK-NEXT:    v_cvt_f32_u32_e32 v0, s10
+; CHECK-NEXT:    v_cvt_f32_u32_e32 v1, s11
+; CHECK-NEXT:    s_mov_b32 s7, s6
+; CHECK-NEXT:    s_xor_b64 s[8:9], s[8:9], s[6:7]
+; CHECK-NEXT:    s_sub_u32 s3, 0, s10
+; CHECK-NEXT:    v_mac_f32_e32 v0, 0x4f800000, v1
+; CHECK-NEXT:    v_rcp_iflag_f32_e32 v0, v0
+; CHECK-NEXT:    s_cselect_b32 s0, 1, 0
+; CHECK-NEXT:    s_and_b32 s0, s0, 1
+; CHECK-NEXT:    s_cmp_lg_u32 s0, 0
+; CHECK-NEXT:    v_mul_f32_e32 v0, 0x5f7ffffc, v0
+; CHECK-NEXT:    v_mul_f32_e32 v1, 0x2f800000, v0
+; CHECK-NEXT:    v_trunc_f32_e32 v1, v1
+; CHECK-NEXT:    v_mac_f32_e32 v0, 0xcf800000, v1
+; CHECK-NEXT:    v_cvt_u32_f32_e32 v0, v0
+; CHECK-NEXT:    v_cvt_u32_f32_e32 v1, v1
+; CHECK-NEXT:    s_subb_u32 s5, 0, s11
+; CHECK-NEXT:    v_mul_lo_u32 v2, s5, v0
+; CHECK-NEXT:    v_mul_lo_u32 v3, s3, v1
+; CHECK-NEXT:    v_mul_hi_u32 v5, s3, v0
+; CHECK-NEXT:    v_mul_lo_u32 v4, s3, v0
+; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v3
+; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v5
+; CHECK-NEXT:    v_mul_lo_u32 v3, v1, v4
+; CHECK-NEXT:    v_mul_lo_u32 v5, v0, v2
+; CHECK-NEXT:    v_mul_hi_u32 v6, v0, v4
+; CHECK-NEXT:    v_mul_hi_u32 v4, v1, v4
+; CHECK-NEXT:    v_add_i32_e32 v3, vcc, v3, v5
+; CHECK-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v3, vcc, v3, v6
+; CHECK-NEXT:    v_cndmask_b32_e64 v3, 0, 1, vcc
+; CHECK-NEXT:    v_mul_lo_u32 v6, v1, v2
+; CHECK-NEXT:    v_add_i32_e32 v3, vcc, v5, v3
+; CHECK-NEXT:    v_mul_hi_u32 v5, v0, v2
+; CHECK-NEXT:    v_mul_hi_u32 v2, v1, v2
+; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v6, v4
+; CHECK-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v4, v5
+; CHECK-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v6, v5
+; CHECK-NEXT:    v_add_i32_e32 v3, vcc, v4, v3
+; CHECK-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v5, v4
+; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v4
+; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v3
+; CHECK-NEXT:    v_addc_u32_e64 v3, s[0:1], v1, v2, vcc
+; CHECK-NEXT:    v_mul_lo_u32 v4, s5, v0
+; CHECK-NEXT:    v_mul_lo_u32 v5, s3, v3
+; CHECK-NEXT:    v_mul_hi_u32 v7, s3, v0
+; CHECK-NEXT:    v_mul_lo_u32 v6, s3, v0
+; CHECK-NEXT:    v_add_i32_e64 v1, s[0:1], v1, v2
+; CHECK-NEXT:    v_add_i32_e64 v4, s[0:1], v4, v5
+; CHECK-NEXT:    v_add_i32_e64 v4, s[0:1], v4, v7
+; CHECK-NEXT:    v_mul_lo_u32 v5, v3, v6
+; CHECK-NEXT:    v_mul_lo_u32 v7, v0, v4
+; CHECK-NEXT:    v_mul_hi_u32 v2, v0, v6
+; CHECK-NEXT:    v_mul_hi_u32 v6, v3, v6
+; CHECK-NEXT:    v_add_i32_e64 v5, s[0:1], v5, v7
+; CHECK-NEXT:    v_cndmask_b32_e64 v7, 0, 1, s[0:1]
+; CHECK-NEXT:    v_add_i32_e64 v2, s[0:1], v5, v2
+; CHECK-NEXT:    v_cndmask_b32_e64 v2, 0, 1, s[0:1]
+; CHECK-NEXT:    v_mul_lo_u32 v5, v3, v4
+; CHECK-NEXT:    v_add_i32_e64 v2, s[0:1], v7, v2
+; CHECK-NEXT:    v_mul_hi_u32 v7, v0, v4
+; CHECK-NEXT:    v_mul_hi_u32 v3, v3, v4
+; CHECK-NEXT:    v_add_i32_e64 v5, s[0:1], v5, v6
+; CHECK-NEXT:    v_cndmask_b32_e64 v6, 0, 1, s[0:1]
+; CHECK-NEXT:    v_add_i32_e64 v5, s[0:1], v5, v7
+; CHECK-NEXT:    v_cndmask_b32_e64 v7, 0, 1, s[0:1]
+; CHECK-NEXT:    v_add_i32_e64 v6, s[0:1], v6, v7
+; CHECK-NEXT:    v_add_i32_e64 v2, s[0:1], v5, v2
+; CHECK-NEXT:    v_cndmask_b32_e64 v5, 0, 1, s[0:1]
+; CHECK-NEXT:    v_add_i32_e64 v4, s[0:1], v6, v5
+; CHECK-NEXT:    v_add_i32_e64 v3, s[0:1], v3, v4
+; CHECK-NEXT:    v_addc_u32_e32 v1, vcc, v1, v3, vcc
+; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
+; CHECK-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
+; CHECK-NEXT:    v_mul_lo_u32 v2, s9, v0
+; CHECK-NEXT:    v_mul_lo_u32 v3, s8, v1
+; CHECK-NEXT:    v_mul_hi_u32 v4, s8, v0
+; CHECK-NEXT:    v_mul_hi_u32 v0, s9, v0
+; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v3
+; CHECK-NEXT:    v_cndmask_b32_e64 v3, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v4
+; CHECK-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
+; CHECK-NEXT:    v_mul_lo_u32 v4, s9, v1
+; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v3, v2
+; CHECK-NEXT:    v_mul_hi_u32 v3, s8, v1
+; CHECK-NEXT:    v_mul_hi_u32 v1, s9, v1
+; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v4, v0
+; CHECK-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v3
+; CHECK-NEXT:    v_cndmask_b32_e64 v3, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v3, vcc, v4, v3
+; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
+; CHECK-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v3, v2
+; CHECK-NEXT:    v_add_i32_e32 v1, vcc, v1, v2
+; CHECK-NEXT:    v_mul_lo_u32 v2, s11, v0
+; CHECK-NEXT:    v_mul_lo_u32 v1, s10, v1
+; CHECK-NEXT:    v_mul_lo_u32 v3, s10, v0
+; CHECK-NEXT:    v_mul_hi_u32 v0, s10, v0
+; CHECK-NEXT:    v_add_i32_e32 v1, vcc, v2, v1
+; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v1, v0
+; CHECK-NEXT:    v_sub_i32_e32 v2, vcc, s8, v3
+; CHECK-NEXT:    v_mov_b32_e32 v1, s9
+; CHECK-NEXT:    v_subb_u32_e64 v1, s[0:1], v1, v0, vcc
+; CHECK-NEXT:    v_sub_i32_e64 v0, s[0:1], s9, v0
+; CHECK-NEXT:    v_mov_b32_e32 v3, s11
+; CHECK-NEXT:    v_subb_u32_e32 v0, vcc, v0, v3, vcc
+; CHECK-NEXT:    v_subrev_i32_e32 v3, vcc, s10, v2
+; CHECK-NEXT:    v_cmp_le_u32_e64 s[0:1], s11, v1
+; CHECK-NEXT:    v_subbrev_u32_e32 v0, vcc, 0, v0, vcc
+; CHECK-NEXT:    v_cndmask_b32_e64 v4, 0, -1, s[0:1]
+; CHECK-NEXT:    v_cmp_le_u32_e64 s[0:1], s10, v2
+; CHECK-NEXT:    v_cndmask_b32_e64 v5, 0, -1, s[0:1]
+; CHECK-NEXT:    v_cmp_eq_u32_e64 s[0:1], s11, v1
+; CHECK-NEXT:    v_cmp_le_u32_e32 vcc, s11, v0
+; CHECK-NEXT:    v_cndmask_b32_e64 v1, v4, v5, s[0:1]
+; CHECK-NEXT:    v_cndmask_b32_e64 v4, 0, -1, vcc
+; CHECK-NEXT:    v_cmp_le_u32_e32 vcc, s10, v3
+; CHECK-NEXT:    v_cndmask_b32_e64 v5, 0, -1, vcc
+; CHECK-NEXT:    v_cmp_eq_u32_e32 vcc, s11, v0
+; CHECK-NEXT:    v_cndmask_b32_e32 v0, v4, v5, vcc
+; CHECK-NEXT:    v_subrev_i32_e32 v4, vcc, s10, v3
+; CHECK-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
+; CHECK-NEXT:    v_cndmask_b32_e32 v0, v3, v4, vcc
+; CHECK-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v1
+; CHECK-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
+; CHECK-NEXT:    v_xor_b32_e32 v0, s6, v0
+; CHECK-NEXT:    v_subrev_i32_e32 v0, vcc, s6, v0
+; CHECK-NEXT:    s_mov_b32 s0, 0
+; CHECK-NEXT:  BB1_2: ; %Flow
+; CHECK-NEXT:    s_and_b32 s0, s0, 1
+; CHECK-NEXT:    s_cmp_lg_u32 s0, 0
+; CHECK-NEXT:    s_cbranch_scc0 BB1_4
+; CHECK-NEXT:  ; %bb.3:
+; CHECK-NEXT:    v_cvt_f32_u32_e32 v0, s4
+; CHECK-NEXT:    v_rcp_iflag_f32_e32 v0, v0
+; CHECK-NEXT:    v_mul_f32_e32 v0, 0x4f800000, v0
+; CHECK-NEXT:    v_cvt_u32_f32_e32 v0, v0
+; CHECK-NEXT:    v_mul_lo_u32 v1, v0, s4
+; CHECK-NEXT:    v_mul_hi_u32 v2, v0, s4
+; CHECK-NEXT:    v_sub_i32_e32 v3, vcc, 0, v1
+; CHECK-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v2
+; CHECK-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
+; CHECK-NEXT:    v_mul_hi_u32 v1, v1, v0
+; CHECK-NEXT:    v_add_i32_e64 v2, s[0:1], v0, v1
+; CHECK-NEXT:    v_sub_i32_e64 v0, s[0:1], v0, v1
+; CHECK-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; CHECK-NEXT:    v_mul_hi_u32 v0, v0, s2
+; CHECK-NEXT:    v_mul_lo_u32 v0, v0, s4
+; CHECK-NEXT:    v_sub_i32_e32 v1, vcc, s2, v0
+; CHECK-NEXT:    v_add_i32_e64 v2, s[0:1], s4, v1
+; CHECK-NEXT:    v_cmp_ge_u32_e64 s[0:1], s2, v0
+; CHECK-NEXT:    v_cmp_le_u32_e32 vcc, s4, v1
+; CHECK-NEXT:    v_subrev_i32_e64 v0, s[2:3], s4, v1
+; CHECK-NEXT:    s_and_b64 vcc, vcc, s[0:1]
+; CHECK-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
+; CHECK-NEXT:    v_cndmask_b32_e64 v0, v2, v0, s[0:1]
+; CHECK-NEXT:  BB1_4:
+; CHECK-NEXT:    v_readfirstlane_b32 s0, v0
+; CHECK-NEXT:    s_mov_b32 s1, s0
+; CHECK-NEXT:    ; return to shader part epilog
+  %result = srem i64 %num, %den
+  %cast = bitcast i64 %result to <2 x i32>
+  %elt.0 = extractelement <2 x i32> %cast, i32 0
+  %elt.1 = extractelement <2 x i32> %cast, i32 1
+  %res.0 = call i32 @llvm.amdgcn.readfirstlane(i32 %elt.0)
+  %res.1 = call i32 @llvm.amdgcn.readfirstlane(i32 %elt.1)
+  %ins.0 = insertelement <2 x i32> undef, i32 %res.0, i32 0
+  %ins.1 = insertelement <2 x i32> %ins.0, i32 %res.0, i32 1
+  %cast.back = bitcast <2 x i32> %ins.1 to i64
+  ret i64 %cast.back
+}
+
+define <2 x i64> @v_srem_v2i64(<2 x i64> %num, <2 x i64> %den) {
+; GISEL-LABEL: v_srem_v2i64:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-NEXT:    v_ashrrev_i32_e32 v8, 31, v5
+; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v8
+; GISEL-NEXT:    v_addc_u32_e32 v5, vcc, v5, v8, vcc
+; GISEL-NEXT:    v_xor_b32_e32 v5, v5, v8
+; GISEL-NEXT:    v_xor_b32_e32 v4, v4, v8
+; GISEL-NEXT:    v_cvt_f32_u32_e32 v8, v4
+; GISEL-NEXT:    v_cvt_f32_u32_e32 v9, v5
+; GISEL-NEXT:    v_ashrrev_i32_e32 v10, 31, v1
+; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v10
+; GISEL-NEXT:    v_addc_u32_e32 v1, vcc, v1, v10, vcc
+; GISEL-NEXT:    v_mac_f32_e32 v8, 0x4f800000, v9
+; GISEL-NEXT:    v_rcp_iflag_f32_e32 v8, v8
+; GISEL-NEXT:    v_sub_i32_e32 v11, vcc, 0, v4
+; GISEL-NEXT:    v_subb_u32_e32 v12, vcc, 0, v5, vcc
+; GISEL-NEXT:    v_xor_b32_e32 v0, v0, v10
+; GISEL-NEXT:    v_mul_f32_e32 v8, 0x5f7ffffc, v8
+; GISEL-NEXT:    v_mul_f32_e32 v9, 0x2f800000, v8
+; GISEL-NEXT:    v_trunc_f32_e32 v9, v9
+; GISEL-NEXT:    v_mac_f32_e32 v8, 0xcf800000, v9
+; GISEL-NEXT:    v_cvt_u32_f32_e32 v8, v8
+; GISEL-NEXT:    v_cvt_u32_f32_e32 v9, v9
+; GISEL-NEXT:    v_xor_b32_e32 v1, v1, v10
+; GISEL-NEXT:    v_mul_lo_u32 v13, v12, v8
+; GISEL-NEXT:    v_mul_lo_u32 v14, v11, v9
+; GISEL-NEXT:    v_mul_hi_u32 v16, v11, v8
+; GISEL-NEXT:    v_mul_lo_u32 v15, v11, v8
+; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v13, v14
+; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v13, v16
+; GISEL-NEXT:    v_mul_lo_u32 v14, v9, v15
+; GISEL-NEXT:    v_mul_lo_u32 v16, v8, v13
+; GISEL-NEXT:    v_mul_hi_u32 v17, v8, v15
+; GISEL-NEXT:    v_mul_hi_u32 v15, v9, v15
+; GISEL-NEXT:    v_add_i32_e32 v14, vcc, v14, v16
+; GISEL-NEXT:    v_cndmask_b32_e64 v16, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v14, vcc, v14, v17
+; GISEL-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
+; GISEL-NEXT:    v_mul_lo_u32 v17, v9, v13
+; GISEL-NEXT:    v_add_i32_e32 v14, vcc, v16, v14
+; GISEL-NEXT:    v_mul_hi_u32 v16, v8, v13
+; GISEL-NEXT:    v_mul_hi_u32 v13, v9, v13
+; GISEL-NEXT:    v_add_i32_e32 v15, vcc, v17, v15
+; GISEL-NEXT:    v_cndmask_b32_e64 v17, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v15, vcc, v15, v16
+; GISEL-NEXT:    v_cndmask_b32_e64 v16, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v16, vcc, v17, v16
+; GISEL-NEXT:    v_add_i32_e32 v14, vcc, v15, v14
+; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v15, vcc, v16, v15
+; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v13, v15
+; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v14
+; GISEL-NEXT:    v_addc_u32_e64 v14, s[4:5], v9, v13, vcc
+; GISEL-NEXT:    v_mul_lo_u32 v12, v12, v8
+; GISEL-NEXT:    v_mul_lo_u32 v15, v11, v14
+; GISEL-NEXT:    v_mul_lo_u32 v16, v11, v8
+; GISEL-NEXT:    v_mul_hi_u32 v11, v11, v8
+; GISEL-NEXT:    v_add_i32_e64 v9, s[4:5], v9, v13
+; GISEL-NEXT:    v_add_i32_e64 v12, s[4:5], v12, v15
+; GISEL-NEXT:    v_mul_hi_u32 v13, v8, v16
+; GISEL-NEXT:    v_add_i32_e64 v11, s[4:5], v12, v11
+; GISEL-NEXT:    v_mul_lo_u32 v12, v14, v16
+; GISEL-NEXT:    v_mul_lo_u32 v15, v8, v11
+; GISEL-NEXT:    v_mul_hi_u32 v16, v14, v16
+; GISEL-NEXT:    v_add_i32_e64 v12, s[4:5], v12, v15
+; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, 1, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v12, s[4:5], v12, v13
+; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, s[4:5]
+; GISEL-NEXT:    v_mul_lo_u32 v13, v14, v11
+; GISEL-NEXT:    v_add_i32_e64 v12, s[4:5], v15, v12
+; GISEL-NEXT:    v_mul_hi_u32 v15, v8, v11
+; GISEL-NEXT:    v_mul_hi_u32 v11, v14, v11
+; GISEL-NEXT:    v_add_i32_e64 v13, s[4:5], v13, v16
+; GISEL-NEXT:    v_cndmask_b32_e64 v16, 0, 1, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v13, s[4:5], v13, v15
+; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, 1, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v15, s[4:5], v16, v15
+; GISEL-NEXT:    v_add_i32_e64 v12, s[4:5], v13, v12
+; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, 1, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v13, s[4:5], v15, v13
+; GISEL-NEXT:    v_add_i32_e64 v11, s[4:5], v11, v13
+; GISEL-NEXT:    v_addc_u32_e32 v9, vcc, v9, v11, vcc
+; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v12
+; GISEL-NEXT:    v_addc_u32_e32 v9, vcc, 0, v9, vcc
+; GISEL-NEXT:    v_mul_lo_u32 v11, v1, v8
+; GISEL-NEXT:    v_mul_lo_u32 v12, v0, v9
+; GISEL-NEXT:    v_mul_hi_u32 v13, v0, v8
+; GISEL-NEXT:    v_mul_hi_u32 v8, v1, v8
+; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v11, v12
+; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v11, v13
+; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
+; GISEL-NEXT:    v_mul_lo_u32 v13, v1, v9
+; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v12, v11
+; GISEL-NEXT:    v_mul_hi_u32 v12, v0, v9
+; GISEL-NEXT:    v_mul_hi_u32 v9, v1, v9
+; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v13, v8
+; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v12
+; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v13, v12
+; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v11
+; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v12, v11
+; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v9, v11
+; GISEL-NEXT:    v_mul_lo_u32 v11, v5, v8
+; GISEL-NEXT:    v_mul_lo_u32 v9, v4, v9
+; GISEL-NEXT:    v_mul_lo_u32 v12, v4, v8
+; GISEL-NEXT:    v_mul_hi_u32 v8, v4, v8
+; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v11, v9
+; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v9, v8
+; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v12
+; GISEL-NEXT:    v_subb_u32_e64 v9, s[4:5], v1, v8, vcc
+; GISEL-NEXT:    v_sub_i32_e64 v1, s[4:5], v1, v8
+; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v9, v5
+; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, -1, s[4:5]
+; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v0, v4
+; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, -1, s[4:5]
+; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], v9, v5
+; GISEL-NEXT:    v_subb_u32_e32 v1, vcc, v1, v5, vcc
+; GISEL-NEXT:    v_cndmask_b32_e64 v8, v8, v11, s[4:5]
+; GISEL-NEXT:    v_sub_i32_e32 v11, vcc, v0, v4
+; GISEL-NEXT:    v_subbrev_u32_e64 v12, s[4:5], 0, v1, vcc
+; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v12, v5
+; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, -1, s[4:5]
+; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v11, v4
+; GISEL-NEXT:    v_subb_u32_e32 v1, vcc, v1, v5, vcc
+; GISEL-NEXT:    v_sub_i32_e32 v4, vcc, v11, v4
+; GISEL-NEXT:    v_cndmask_b32_e64 v14, 0, -1, s[4:5]
+; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], v12, v5
+; GISEL-NEXT:    v_cndmask_b32_e64 v13, v13, v14, s[4:5]
+; GISEL-NEXT:    v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
+; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v13
+; GISEL-NEXT:    v_cndmask_b32_e32 v4, v11, v4, vcc
+; GISEL-NEXT:    v_cndmask_b32_e32 v1, v12, v1, vcc
+; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v8
+; GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
+; GISEL-NEXT:    v_ashrrev_i32_e32 v4, 31, v7
+; GISEL-NEXT:    v_cndmask_b32_e32 v1, v9, v1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v6, v4
+; GISEL-NEXT:    v_addc_u32_e32 v6, vcc, v7, v4, vcc
+; GISEL-NEXT:    v_xor_b32_e32 v5, v5, v4
+; GISEL-NEXT:    v_xor_b32_e32 v4, v6, v4
+; GISEL-NEXT:    v_cvt_f32_u32_e32 v6, v5
+; GISEL-NEXT:    v_cvt_f32_u32_e32 v7, v4
+; GISEL-NEXT:    v_ashrrev_i32_e32 v8, 31, v3
+; GISEL-NEXT:    v_add_i32_e32 v2, vcc, v2, v8
+; GISEL-NEXT:    v_addc_u32_e32 v3, vcc, v3, v8, vcc
+; GISEL-NEXT:    v_mac_f32_e32 v6, 0x4f800000, v7
+; GISEL-NEXT:    v_rcp_iflag_f32_e32 v6, v6
+; GISEL-NEXT:    v_sub_i32_e32 v9, vcc, 0, v5
+; GISEL-NEXT:    v_subb_u32_e32 v11, vcc, 0, v4, vcc
+; GISEL-NEXT:    v_xor_b32_e32 v0, v0, v10
+; GISEL-NEXT:    v_mul_f32_e32 v6, 0x5f7ffffc, v6
+; GISEL-NEXT:    v_mul_f32_e32 v7, 0x2f800000, v6
+; GISEL-NEXT:    v_trunc_f32_e32 v7, v7
+; GISEL-NEXT:    v_mac_f32_e32 v6, 0xcf800000, v7
+; GISEL-NEXT:    v_cvt_u32_f32_e32 v6, v6
+; GISEL-NEXT:    v_cvt_u32_f32_e32 v7, v7
+; GISEL-NEXT:    v_xor_b32_e32 v2, v2, v8
+; GISEL-NEXT:    v_xor_b32_e32 v3, v3, v8
+; GISEL-NEXT:    v_mul_lo_u32 v12, v11, v6
+; GISEL-NEXT:    v_mul_lo_u32 v13, v9, v7
+; GISEL-NEXT:    v_mul_hi_u32 v15, v9, v6
+; GISEL-NEXT:    v_mul_lo_u32 v14, v9, v6
+; GISEL-NEXT:    v_xor_b32_e32 v1, v1, v10
+; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v12, v13
+; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v12, v15
+; GISEL-NEXT:    v_mul_lo_u32 v13, v7, v14
+; GISEL-NEXT:    v_mul_lo_u32 v15, v6, v12
+; GISEL-NEXT:    v_mul_hi_u32 v16, v6, v14
+; GISEL-NEXT:    v_mul_hi_u32 v14, v7, v14
+; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v13, v15
+; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v13, v16
+; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
+; GISEL-NEXT:    v_mul_lo_u32 v16, v7, v12
+; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v15, v13
+; GISEL-NEXT:    v_mul_hi_u32 v15, v6, v12
+; GISEL-NEXT:    v_mul_hi_u32 v12, v7, v12
+; GISEL-NEXT:    v_add_i32_e32 v14, vcc, v16, v14
+; GISEL-NEXT:    v_cndmask_b32_e64 v16, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v14, vcc, v14, v15
+; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v15, vcc, v16, v15
+; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v14, v13
+; GISEL-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v14, vcc, v15, v14
+; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v12, v14
+; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v6, v13
+; GISEL-NEXT:    v_addc_u32_e64 v13, s[4:5], v7, v12, vcc
+; GISEL-NEXT:    v_mul_lo_u32 v11, v11, v6
+; GISEL-NEXT:    v_mul_lo_u32 v14, v9, v13
+; GISEL-NEXT:    v_mul_lo_u32 v15, v9, v6
+; GISEL-NEXT:    v_mul_hi_u32 v9, v9, v6
+; GISEL-NEXT:    v_add_i32_e64 v7, s[4:5], v7, v12
+; GISEL-NEXT:    v_add_i32_e64 v11, s[4:5], v11, v14
+; GISEL-NEXT:    v_mul_hi_u32 v12, v6, v15
+; GISEL-NEXT:    v_add_i32_e64 v9, s[4:5], v11, v9
+; GISEL-NEXT:    v_mul_lo_u32 v11, v13, v15
+; GISEL-NEXT:    v_mul_lo_u32 v14, v6, v9
+; GISEL-NEXT:    v_mul_hi_u32 v15, v13, v15
+; GISEL-NEXT:    v_add_i32_e64 v11, s[4:5], v11, v14
+; GISEL-NEXT:    v_cndmask_b32_e64 v14, 0, 1, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v11, s[4:5], v11, v12
+; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, s[4:5]
+; GISEL-NEXT:    v_mul_lo_u32 v12, v13, v9
+; GISEL-NEXT:    v_add_i32_e64 v11, s[4:5], v14, v11
+; GISEL-NEXT:    v_mul_hi_u32 v14, v6, v9
+; GISEL-NEXT:    v_mul_hi_u32 v9, v13, v9
+; GISEL-NEXT:    v_add_i32_e64 v12, s[4:5], v12, v15
+; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, 1, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v12, s[4:5], v12, v14
+; GISEL-NEXT:    v_cndmask_b32_e64 v14, 0, 1, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v14, s[4:5], v15, v14
+; GISEL-NEXT:    v_add_i32_e64 v11, s[4:5], v12, v11
+; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v12, s[4:5], v14, v12
+; GISEL-NEXT:    v_add_i32_e64 v9, s[4:5], v9, v12
+; GISEL-NEXT:    v_addc_u32_e32 v7, vcc, v7, v9, vcc
+; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v6, v11
+; GISEL-NEXT:    v_addc_u32_e32 v7, vcc, 0, v7, vcc
+; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v10
+; GISEL-NEXT:    v_mul_lo_u32 v9, v3, v6
+; GISEL-NEXT:    v_mul_lo_u32 v11, v2, v7
+; GISEL-NEXT:    v_subb_u32_e32 v1, vcc, v1, v10, vcc
+; GISEL-NEXT:    v_mul_hi_u32 v10, v2, v6
+; GISEL-NEXT:    v_mul_hi_u32 v6, v3, v6
+; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v9, v11
+; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v9, v10
+; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
+; GISEL-NEXT:    v_mul_lo_u32 v10, v3, v7
+; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v11, v9
+; GISEL-NEXT:    v_mul_hi_u32 v11, v2, v7
+; GISEL-NEXT:    v_mul_hi_u32 v7, v3, v7
+; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v10, v6
+; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v6, v11
+; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v10, v11
+; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v6, v9
+; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v10, v9
+; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v7, v9
+; GISEL-NEXT:    v_mul_lo_u32 v9, v4, v6
+; GISEL-NEXT:    v_mul_lo_u32 v7, v5, v7
+; GISEL-NEXT:    v_mul_lo_u32 v10, v5, v6
+; GISEL-NEXT:    v_mul_hi_u32 v6, v5, v6
+; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v9, v7
+; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v7, v6
+; GISEL-NEXT:    v_sub_i32_e32 v2, vcc, v2, v10
+; GISEL-NEXT:    v_subb_u32_e64 v7, s[4:5], v3, v6, vcc
+; GISEL-NEXT:    v_sub_i32_e64 v3, s[4:5], v3, v6
+; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v7, v4
+; GISEL-NEXT:    v_cndmask_b32_e64 v6, 0, -1, s[4:5]
+; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v2, v5
+; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, -1, s[4:5]
+; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], v7, v4
+; GISEL-NEXT:    v_subb_u32_e32 v3, vcc, v3, v4, vcc
+; GISEL-NEXT:    v_cndmask_b32_e64 v6, v6, v9, s[4:5]
+; GISEL-NEXT:    v_sub_i32_e32 v9, vcc, v2, v5
+; GISEL-NEXT:    v_subbrev_u32_e64 v10, s[4:5], 0, v3, vcc
+; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v10, v4
+; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, -1, s[4:5]
+; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v9, v5
+; GISEL-NEXT:    v_subb_u32_e32 v3, vcc, v3, v4, vcc
+; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, -1, s[4:5]
+; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], v10, v4
+; GISEL-NEXT:    v_sub_i32_e32 v4, vcc, v9, v5
+; GISEL-NEXT:    v_cndmask_b32_e64 v11, v11, v12, s[4:5]
+; GISEL-NEXT:    v_subbrev_u32_e32 v3, vcc, 0, v3, vcc
+; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v11
+; GISEL-NEXT:    v_cndmask_b32_e32 v4, v9, v4, vcc
+; GISEL-NEXT:    v_cndmask_b32_e32 v3, v10, v3, vcc
+; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v6
+; GISEL-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
+; GISEL-NEXT:    v_cndmask_b32_e32 v3, v7, v3, vcc
+; GISEL-NEXT:    v_xor_b32_e32 v2, v2, v8
+; GISEL-NEXT:    v_xor_b32_e32 v3, v3, v8
+; GISEL-NEXT:    v_sub_i32_e32 v2, vcc, v2, v8
+; GISEL-NEXT:    v_subb_u32_e32 v3, vcc, v3, v8, vcc
+; GISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; CGP-LABEL: v_srem_v2i64:
+; CGP:       ; %bb.0:
+; CGP-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CGP-NEXT:    v_mov_b32_e32 v9, v1
+; CGP-NEXT:    v_mov_b32_e32 v8, v0
+; CGP-NEXT:    v_or_b32_e32 v1, v9, v5
+; CGP-NEXT:    v_mov_b32_e32 v0, 0
+; CGP-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[0:1]
+; CGP-NEXT:    v_cmp_ne_u32_e64 s[4:5], 0, 1
+; CGP-NEXT:    s_xor_b64 s[4:5], vcc, s[4:5]
+; CGP-NEXT:    ; implicit-def: $vgpr0_vgpr1
+; CGP-NEXT:    s_and_saveexec_b64 s[6:7], s[4:5]
+; CGP-NEXT:    s_xor_b64 s[6:7], exec, s[6:7]
+; CGP-NEXT:    s_cbranch_execz BB2_2
+; CGP-NEXT:  ; %bb.1:
+; CGP-NEXT:    v_ashrrev_i32_e32 v0, 31, v5
+; CGP-NEXT:    v_add_i32_e32 v1, vcc, v4, v0
+; CGP-NEXT:    v_addc_u32_e32 v5, vcc, v5, v0, vcc
+; CGP-NEXT:    v_xor_b32_e32 v1, v1, v0
+; CGP-NEXT:    v_xor_b32_e32 v0, v5, v0
+; CGP-NEXT:    v_cvt_f32_u32_e32 v5, v1
+; CGP-NEXT:    v_cvt_f32_u32_e32 v10, v0
+; CGP-NEXT:    v_ashrrev_i32_e32 v11, 31, v9
+; CGP-NEXT:    v_mac_f32_e32 v5, 0x4f800000, v10
+; CGP-NEXT:    v_rcp_iflag_f32_e32 v5, v5
+; CGP-NEXT:    v_add_i32_e32 v10, vcc, v8, v11
+; CGP-NEXT:    v_addc_u32_e32 v9, vcc, v9, v11, vcc
+; CGP-NEXT:    v_sub_i32_e32 v13, vcc, 0, v1
+; CGP-NEXT:    v_mul_f32_e32 v5, 0x5f7ffffc, v5
+; CGP-NEXT:    v_mul_f32_e32 v12, 0x2f800000, v5
+; CGP-NEXT:    v_trunc_f32_e32 v12, v12
+; CGP-NEXT:    v_mac_f32_e32 v5, 0xcf800000, v12
+; CGP-NEXT:    v_cvt_u32_f32_e32 v5, v5
+; CGP-NEXT:    v_cvt_u32_f32_e32 v12, v12
+; CGP-NEXT:    v_subb_u32_e32 v14, vcc, 0, v0, vcc
+; CGP-NEXT:    v_xor_b32_e32 v10, v10, v11
+; CGP-NEXT:    v_mul_lo_u32 v15, v14, v5
+; CGP-NEXT:    v_mul_lo_u32 v16, v13, v12
+; CGP-NEXT:    v_mul_hi_u32 v18, v13, v5
+; CGP-NEXT:    v_mul_lo_u32 v17, v13, v5
+; CGP-NEXT:    v_xor_b32_e32 v9, v9, v11
+; CGP-NEXT:    v_add_i32_e32 v15, vcc, v15, v16
+; CGP-NEXT:    v_add_i32_e32 v15, vcc, v15, v18
+; CGP-NEXT:    v_mul_lo_u32 v16, v12, v17
+; CGP-NEXT:    v_mul_lo_u32 v18, v5, v15
+; CGP-NEXT:    v_mul_hi_u32 v19, v5, v17
+; CGP-NEXT:    v_mul_hi_u32 v17, v12, v17
+; CGP-NEXT:    v_add_i32_e32 v16, vcc, v16, v18
+; CGP-NEXT:    v_cndmask_b32_e64 v18, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v16, vcc, v16, v19
+; CGP-NEXT:    v_cndmask_b32_e64 v16, 0, 1, vcc
+; CGP-NEXT:    v_mul_lo_u32 v19, v12, v15
+; CGP-NEXT:    v_add_i32_e32 v16, vcc, v18, v16
+; CGP-NEXT:    v_mul_hi_u32 v18, v5, v15
+; CGP-NEXT:    v_mul_hi_u32 v15, v12, v15
+; CGP-NEXT:    v_add_i32_e32 v17, vcc, v19, v17
+; CGP-NEXT:    v_cndmask_b32_e64 v19, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v17, vcc, v17, v18
+; CGP-NEXT:    v_cndmask_b32_e64 v18, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v18, vcc, v19, v18
+; CGP-NEXT:    v_add_i32_e32 v16, vcc, v17, v16
+; CGP-NEXT:    v_cndmask_b32_e64 v17, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v17, vcc, v18, v17
+; CGP-NEXT:    v_add_i32_e32 v15, vcc, v15, v17
+; CGP-NEXT:    v_add_i32_e32 v5, vcc, v5, v16
+; CGP-NEXT:    v_addc_u32_e64 v16, s[4:5], v12, v15, vcc
+; CGP-NEXT:    v_mul_lo_u32 v14, v14, v5
+; CGP-NEXT:    v_mul_lo_u32 v17, v13, v16
+; CGP-NEXT:    v_mul_lo_u32 v18, v13, v5
+; CGP-NEXT:    v_mul_hi_u32 v13, v13, v5
+; CGP-NEXT:    v_add_i32_e64 v12, s[4:5], v12, v15
+; CGP-NEXT:    v_add_i32_e64 v14, s[4:5], v14, v17
+; CGP-NEXT:    v_mul_hi_u32 v15, v5, v18
+; CGP-NEXT:    v_add_i32_e64 v13, s[4:5], v14, v13
+; CGP-NEXT:    v_mul_lo_u32 v14, v16, v18
+; CGP-NEXT:    v_mul_lo_u32 v17, v5, v13
+; CGP-NEXT:    v_mul_hi_u32 v18, v16, v18
+; CGP-NEXT:    v_add_i32_e64 v14, s[4:5], v14, v17
+; CGP-NEXT:    v_cndmask_b32_e64 v17, 0, 1, s[4:5]
+; CGP-NEXT:    v_add_i32_e64 v14, s[4:5], v14, v15
+; CGP-NEXT:    v_cndmask_b32_e64 v14, 0, 1, s[4:5]
+; CGP-NEXT:    v_mul_lo_u32 v15, v16, v13
+; CGP-NEXT:    v_add_i32_e64 v14, s[4:5], v17, v14
+; CGP-NEXT:    v_mul_hi_u32 v17, v5, v13
+; CGP-NEXT:    v_mul_hi_u32 v13, v16, v13
+; CGP-NEXT:    v_add_i32_e64 v15, s[4:5], v15, v18
+; CGP-NEXT:    v_cndmask_b32_e64 v18, 0, 1, s[4:5]
+; CGP-NEXT:    v_add_i32_e64 v15, s[4:5], v15, v17
+; CGP-NEXT:    v_cndmask_b32_e64 v17, 0, 1, s[4:5]
+; CGP-NEXT:    v_add_i32_e64 v17, s[4:5], v18, v17
+; CGP-NEXT:    v_add_i32_e64 v14, s[4:5], v15, v14
+; CGP-NEXT:    v_cndmask_b32_e64 v15, 0, 1, s[4:5]
+; CGP-NEXT:    v_add_i32_e64 v15, s[4:5], v17, v15
+; CGP-NEXT:    v_add_i32_e64 v13, s[4:5], v13, v15
+; CGP-NEXT:    v_addc_u32_e32 v12, vcc, v12, v13, vcc
+; CGP-NEXT:    v_add_i32_e32 v5, vcc, v5, v14
+; CGP-NEXT:    v_addc_u32_e32 v12, vcc, 0, v12, vcc
+; CGP-NEXT:    v_mul_lo_u32 v13, v9, v5
+; CGP-NEXT:    v_mul_lo_u32 v14, v10, v12
+; CGP-NEXT:    v_mul_hi_u32 v15, v10, v5
+; CGP-NEXT:    v_mul_hi_u32 v5, v9, v5
+; CGP-NEXT:    v_add_i32_e32 v13, vcc, v13, v14
+; CGP-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v13, vcc, v13, v15
+; CGP-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
+; CGP-NEXT:    v_mul_lo_u32 v15, v9, v12
+; CGP-NEXT:    v_add_i32_e32 v13, vcc, v14, v13
+; CGP-NEXT:    v_mul_hi_u32 v14, v10, v12
+; CGP-NEXT:    v_mul_hi_u32 v12, v9, v12
+; CGP-NEXT:    v_add_i32_e32 v5, vcc, v15, v5
+; CGP-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v5, vcc, v5, v14
+; CGP-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v14, vcc, v15, v14
+; CGP-NEXT:    v_add_i32_e32 v5, vcc, v5, v13
+; CGP-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v13, vcc, v14, v13
+; CGP-NEXT:    v_add_i32_e32 v12, vcc, v12, v13
+; CGP-NEXT:    v_mul_lo_u32 v13, v0, v5
+; CGP-NEXT:    v_mul_lo_u32 v12, v1, v12
+; CGP-NEXT:    v_mul_lo_u32 v14, v1, v5
+; CGP-NEXT:    v_mul_hi_u32 v5, v1, v5
+; CGP-NEXT:    v_add_i32_e32 v12, vcc, v13, v12
+; CGP-NEXT:    v_add_i32_e32 v5, vcc, v12, v5
+; CGP-NEXT:    v_sub_i32_e32 v10, vcc, v10, v14
+; CGP-NEXT:    v_subb_u32_e64 v12, s[4:5], v9, v5, vcc
+; CGP-NEXT:    v_sub_i32_e64 v5, s[4:5], v9, v5
+; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v12, v0
+; CGP-NEXT:    v_cndmask_b32_e64 v9, 0, -1, s[4:5]
+; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v10, v1
+; CGP-NEXT:    v_cndmask_b32_e64 v13, 0, -1, s[4:5]
+; CGP-NEXT:    v_cmp_eq_u32_e64 s[4:5], v12, v0
+; CGP-NEXT:    v_subb_u32_e32 v5, vcc, v5, v0, vcc
+; CGP-NEXT:    v_cndmask_b32_e64 v9, v9, v13, s[4:5]
+; CGP-NEXT:    v_sub_i32_e32 v13, vcc, v10, v1
+; CGP-NEXT:    v_subbrev_u32_e64 v14, s[4:5], 0, v5, vcc
+; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v14, v0
+; CGP-NEXT:    v_cndmask_b32_e64 v15, 0, -1, s[4:5]
+; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v13, v1
+; CGP-NEXT:    v_cndmask_b32_e64 v16, 0, -1, s[4:5]
+; CGP-NEXT:    v_cmp_eq_u32_e64 s[4:5], v14, v0
+; CGP-NEXT:    v_subb_u32_e32 v0, vcc, v5, v0, vcc
+; CGP-NEXT:    v_sub_i32_e32 v1, vcc, v13, v1
+; CGP-NEXT:    v_cndmask_b32_e64 v15, v15, v16, s[4:5]
+; CGP-NEXT:    v_subbrev_u32_e32 v0, vcc, 0, v0, vcc
+; CGP-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v15
+; CGP-NEXT:    v_cndmask_b32_e32 v1, v13, v1, vcc
+; CGP-NEXT:    v_cndmask_b32_e32 v0, v14, v0, vcc
+; CGP-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v9
+; CGP-NEXT:    v_cndmask_b32_e32 v1, v10, v1, vcc
+; CGP-NEXT:    v_cndmask_b32_e32 v0, v12, v0, vcc
+; CGP-NEXT:    v_xor_b32_e32 v1, v1, v11
+; CGP-NEXT:    v_xor_b32_e32 v5, v0, v11
+; CGP-NEXT:    v_sub_i32_e32 v0, vcc, v1, v11
+; CGP-NEXT:    v_subb_u32_e32 v1, vcc, v5, v11, vcc
+; CGP-NEXT:  BB2_2: ; %Flow2
+; CGP-NEXT:    s_or_saveexec_b64 s[8:9], s[6:7]
+; CGP-NEXT:    s_xor_b64 exec, exec, s[8:9]
+; CGP-NEXT:    s_cbranch_execz BB2_4
+; CGP-NEXT:  ; %bb.3:
+; CGP-NEXT:    v_cvt_f32_u32_e32 v0, v4
+; CGP-NEXT:    v_rcp_iflag_f32_e32 v0, v0
+; CGP-NEXT:    v_mul_f32_e32 v0, 0x4f800000, v0
+; CGP-NEXT:    v_cvt_u32_f32_e32 v0, v0
+; CGP-NEXT:    v_mul_lo_u32 v1, v0, v4
+; CGP-NEXT:    v_mul_hi_u32 v5, v0, v4
+; CGP-NEXT:    v_sub_i32_e32 v9, vcc, 0, v1
+; CGP-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v5
+; CGP-NEXT:    v_cndmask_b32_e32 v1, v1, v9, vcc
+; CGP-NEXT:    v_mul_hi_u32 v1, v1, v0
+; CGP-NEXT:    v_add_i32_e64 v5, s[4:5], v0, v1
+; CGP-NEXT:    v_sub_i32_e64 v0, s[4:5], v0, v1
+; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v5, vcc
+; CGP-NEXT:    v_mul_hi_u32 v0, v0, v8
+; CGP-NEXT:    v_mul_lo_u32 v0, v0, v4
+; CGP-NEXT:    v_sub_i32_e32 v1, vcc, v8, v0
+; CGP-NEXT:    v_add_i32_e64 v5, s[4:5], v1, v4
+; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v8, v0
+; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v4
+; CGP-NEXT:    v_sub_i32_e64 v0, s[6:7], v1, v4
+; CGP-NEXT:    s_and_b64 vcc, vcc, s[4:5]
+; CGP-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
+; CGP-NEXT:    v_cndmask_b32_e64 v0, v5, v0, s[4:5]
+; CGP-NEXT:    v_mov_b32_e32 v1, 0
+; CGP-NEXT:  BB2_4:
+; CGP-NEXT:    s_or_b64 exec, exec, s[8:9]
+; CGP-NEXT:    v_or_b32_e32 v5, v3, v7
+; CGP-NEXT:    v_mov_b32_e32 v4, 0
+; CGP-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[4:5]
+; CGP-NEXT:    v_cmp_ne_u32_e64 s[4:5], 0, 1
+; CGP-NEXT:    s_xor_b64 s[4:5], vcc, s[4:5]
+; CGP-NEXT:    ; implicit-def: $vgpr4_vgpr5
+; CGP-NEXT:    s_and_saveexec_b64 s[6:7], s[4:5]
+; CGP-NEXT:    s_xor_b64 s[6:7], exec, s[6:7]
+; CGP-NEXT:    s_cbranch_execz BB2_6
+; CGP-NEXT:  ; %bb.5:
+; CGP-NEXT:    v_ashrrev_i32_e32 v4, 31, v7
+; CGP-NEXT:    v_add_i32_e32 v5, vcc, v6, v4
+; CGP-NEXT:    v_addc_u32_e32 v7, vcc, v7, v4, vcc
+; CGP-NEXT:    v_xor_b32_e32 v5, v5, v4
+; CGP-NEXT:    v_xor_b32_e32 v4, v7, v4
+; CGP-NEXT:    v_cvt_f32_u32_e32 v7, v5
+; CGP-NEXT:    v_cvt_f32_u32_e32 v8, v4
+; CGP-NEXT:    v_ashrrev_i32_e32 v9, 31, v3
+; CGP-NEXT:    v_mac_f32_e32 v7, 0x4f800000, v8
+; CGP-NEXT:    v_rcp_iflag_f32_e32 v7, v7
+; CGP-NEXT:    v_add_i32_e32 v8, vcc, v2, v9
+; CGP-NEXT:    v_addc_u32_e32 v3, vcc, v3, v9, vcc
+; CGP-NEXT:    v_sub_i32_e32 v11, vcc, 0, v5
+; CGP-NEXT:    v_mul_f32_e32 v7, 0x5f7ffffc, v7
+; CGP-NEXT:    v_mul_f32_e32 v10, 0x2f800000, v7
+; CGP-NEXT:    v_trunc_f32_e32 v10, v10
+; CGP-NEXT:    v_mac_f32_e32 v7, 0xcf800000, v10
+; CGP-NEXT:    v_cvt_u32_f32_e32 v7, v7
+; CGP-NEXT:    v_cvt_u32_f32_e32 v10, v10
+; CGP-NEXT:    v_subb_u32_e32 v12, vcc, 0, v4, vcc
+; CGP-NEXT:    v_xor_b32_e32 v8, v8, v9
+; CGP-NEXT:    v_mul_lo_u32 v13, v12, v7
+; CGP-NEXT:    v_mul_lo_u32 v14, v11, v10
+; CGP-NEXT:    v_mul_hi_u32 v16, v11, v7
+; CGP-NEXT:    v_mul_lo_u32 v15, v11, v7
+; CGP-NEXT:    v_xor_b32_e32 v3, v3, v9
+; CGP-NEXT:    v_add_i32_e32 v13, vcc, v13, v14
+; CGP-NEXT:    v_add_i32_e32 v13, vcc, v13, v16
+; CGP-NEXT:    v_mul_lo_u32 v14, v10, v15
+; CGP-NEXT:    v_mul_lo_u32 v16, v7, v13
+; CGP-NEXT:    v_mul_hi_u32 v17, v7, v15
+; CGP-NEXT:    v_mul_hi_u32 v15, v10, v15
+; CGP-NEXT:    v_add_i32_e32 v14, vcc, v14, v16
+; CGP-NEXT:    v_cndmask_b32_e64 v16, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v14, vcc, v14, v17
+; CGP-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
+; CGP-NEXT:    v_mul_lo_u32 v17, v10, v13
+; CGP-NEXT:    v_add_i32_e32 v14, vcc, v16, v14
+; CGP-NEXT:    v_mul_hi_u32 v16, v7, v13
+; CGP-NEXT:    v_mul_hi_u32 v13, v10, v13
+; CGP-NEXT:    v_add_i32_e32 v15, vcc, v17, v15
+; CGP-NEXT:    v_cndmask_b32_e64 v17, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v15, vcc, v15, v16
+; CGP-NEXT:    v_cndmask_b32_e64 v16, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v16, vcc, v17, v16
+; CGP-NEXT:    v_add_i32_e32 v14, vcc, v15, v14
+; CGP-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v15, vcc, v16, v15
+; CGP-NEXT:    v_add_i32_e32 v13, vcc, v13, v15
+; CGP-NEXT:    v_add_i32_e32 v7, vcc, v7, v14
+; CGP-NEXT:    v_addc_u32_e64 v14, s[4:5], v10, v13, vcc
+; CGP-NEXT:    v_mul_lo_u32 v12, v12, v7
+; CGP-NEXT:    v_mul_lo_u32 v15, v11, v14
+; CGP-NEXT:    v_mul_lo_u32 v16, v11, v7
+; CGP-NEXT:    v_mul_hi_u32 v11, v11, v7
+; CGP-NEXT:    v_add_i32_e64 v10, s[4:5], v10, v13
+; CGP-NEXT:    v_add_i32_e64 v12, s[4:5], v12, v15
+; CGP-NEXT:    v_mul_hi_u32 v13, v7, v16
+; CGP-NEXT:    v_add_i32_e64 v11, s[4:5], v12, v11
+; CGP-NEXT:    v_mul_lo_u32 v12, v14, v16
+; CGP-NEXT:    v_mul_lo_u32 v15, v7, v11
+; CGP-NEXT:    v_mul_hi_u32 v16, v14, v16
+; CGP-NEXT:    v_add_i32_e64 v12, s[4:5], v12, v15
+; CGP-NEXT:    v_cndmask_b32_e64 v15, 0, 1, s[4:5]
+; CGP-NEXT:    v_add_i32_e64 v12, s[4:5], v12, v13
+; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, 1, s[4:5]
+; CGP-NEXT:    v_mul_lo_u32 v13, v14, v11
+; CGP-NEXT:    v_add_i32_e64 v12, s[4:5], v15, v12
+; CGP-NEXT:    v_mul_hi_u32 v15, v7, v11
+; CGP-NEXT:    v_mul_hi_u32 v11, v14, v11
+; CGP-NEXT:    v_add_i32_e64 v13, s[4:5], v13, v16
+; CGP-NEXT:    v_cndmask_b32_e64 v16, 0, 1, s[4:5]
+; CGP-NEXT:    v_add_i32_e64 v13, s[4:5], v13, v15
+; CGP-NEXT:    v_cndmask_b32_e64 v15, 0, 1, s[4:5]
+; CGP-NEXT:    v_add_i32_e64 v15, s[4:5], v16, v15
+; CGP-NEXT:    v_add_i32_e64 v12, s[4:5], v13, v12
+; CGP-NEXT:    v_cndmask_b32_e64 v13, 0, 1, s[4:5]
+; CGP-NEXT:    v_add_i32_e64 v13, s[4:5], v15, v13
+; CGP-NEXT:    v_add_i32_e64 v11, s[4:5], v11, v13
+; CGP-NEXT:    v_addc_u32_e32 v10, vcc, v10, v11, vcc
+; CGP-NEXT:    v_add_i32_e32 v7, vcc, v7, v12
+; CGP-NEXT:    v_addc_u32_e32 v10, vcc, 0, v10, vcc
+; CGP-NEXT:    v_mul_lo_u32 v11, v3, v7
+; CGP-NEXT:    v_mul_lo_u32 v12, v8, v10
+; CGP-NEXT:    v_mul_hi_u32 v13, v8, v7
+; CGP-NEXT:    v_mul_hi_u32 v7, v3, v7
+; CGP-NEXT:    v_add_i32_e32 v11, vcc, v11, v12
+; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v11, vcc, v11, v13
+; CGP-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
+; CGP-NEXT:    v_mul_lo_u32 v13, v3, v10
+; CGP-NEXT:    v_add_i32_e32 v11, vcc, v12, v11
+; CGP-NEXT:    v_mul_hi_u32 v12, v8, v10
+; CGP-NEXT:    v_mul_hi_u32 v10, v3, v10
+; CGP-NEXT:    v_add_i32_e32 v7, vcc, v13, v7
+; CGP-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v7, vcc, v7, v12
+; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v12, vcc, v13, v12
+; CGP-NEXT:    v_add_i32_e32 v7, vcc, v7, v11
+; CGP-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v11, vcc, v12, v11
+; CGP-NEXT:    v_add_i32_e32 v10, vcc, v10, v11
+; CGP-NEXT:    v_mul_lo_u32 v11, v4, v7
+; CGP-NEXT:    v_mul_lo_u32 v10, v5, v10
+; CGP-NEXT:    v_mul_lo_u32 v12, v5, v7
+; CGP-NEXT:    v_mul_hi_u32 v7, v5, v7
+; CGP-NEXT:    v_add_i32_e32 v10, vcc, v11, v10
+; CGP-NEXT:    v_add_i32_e32 v7, vcc, v10, v7
+; CGP-NEXT:    v_sub_i32_e32 v8, vcc, v8, v12
+; CGP-NEXT:    v_subb_u32_e64 v10, s[4:5], v3, v7, vcc
+; CGP-NEXT:    v_sub_i32_e64 v3, s[4:5], v3, v7
+; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v10, v4
+; CGP-NEXT:    v_cndmask_b32_e64 v7, 0, -1, s[4:5]
+; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v8, v5
+; CGP-NEXT:    v_cndmask_b32_e64 v11, 0, -1, s[4:5]
+; CGP-NEXT:    v_cmp_eq_u32_e64 s[4:5], v10, v4
+; CGP-NEXT:    v_subb_u32_e32 v3, vcc, v3, v4, vcc
+; CGP-NEXT:    v_cndmask_b32_e64 v7, v7, v11, s[4:5]
+; CGP-NEXT:    v_sub_i32_e32 v11, vcc, v8, v5
+; CGP-NEXT:    v_subbrev_u32_e64 v12, s[4:5], 0, v3, vcc
+; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v12, v4
+; CGP-NEXT:    v_cndmask_b32_e64 v13, 0, -1, s[4:5]
+; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v11, v5
+; CGP-NEXT:    v_subb_u32_e32 v3, vcc, v3, v4, vcc
+; CGP-NEXT:    v_cndmask_b32_e64 v14, 0, -1, s[4:5]
+; CGP-NEXT:    v_cmp_eq_u32_e64 s[4:5], v12, v4
+; CGP-NEXT:    v_sub_i32_e32 v4, vcc, v11, v5
+; CGP-NEXT:    v_cndmask_b32_e64 v13, v13, v14, s[4:5]
+; CGP-NEXT:    v_subbrev_u32_e32 v3, vcc, 0, v3, vcc
+; CGP-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v13
+; CGP-NEXT:    v_cndmask_b32_e32 v4, v11, v4, vcc
+; CGP-NEXT:    v_cndmask_b32_e32 v3, v12, v3, vcc
+; CGP-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v7
+; CGP-NEXT:    v_cndmask_b32_e32 v4, v8, v4, vcc
+; CGP-NEXT:    v_cndmask_b32_e32 v3, v10, v3, vcc
+; CGP-NEXT:    v_xor_b32_e32 v4, v4, v9
+; CGP-NEXT:    v_xor_b32_e32 v3, v3, v9
+; CGP-NEXT:    v_sub_i32_e32 v4, vcc, v4, v9
+; CGP-NEXT:    v_subb_u32_e32 v5, vcc, v3, v9, vcc
+; CGP-NEXT:  BB2_6: ; %Flow
+; CGP-NEXT:    s_or_saveexec_b64 s[8:9], s[6:7]
+; CGP-NEXT:    s_xor_b64 exec, exec, s[8:9]
+; CGP-NEXT:    s_cbranch_execz BB2_8
+; CGP-NEXT:  ; %bb.7:
+; CGP-NEXT:    v_cvt_f32_u32_e32 v3, v6
+; CGP-NEXT:    v_rcp_iflag_f32_e32 v3, v3
+; CGP-NEXT:    v_mul_f32_e32 v3, 0x4f800000, v3
+; CGP-NEXT:    v_cvt_u32_f32_e32 v3, v3
+; CGP-NEXT:    v_mul_lo_u32 v4, v3, v6
+; CGP-NEXT:    v_mul_hi_u32 v5, v3, v6
+; CGP-NEXT:    v_sub_i32_e32 v7, vcc, 0, v4
+; CGP-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v5
+; CGP-NEXT:    v_cndmask_b32_e32 v4, v4, v7, vcc
+; CGP-NEXT:    v_mul_hi_u32 v4, v4, v3
+; CGP-NEXT:    v_add_i32_e64 v5, s[4:5], v3, v4
+; CGP-NEXT:    v_sub_i32_e64 v3, s[4:5], v3, v4
+; CGP-NEXT:    v_cndmask_b32_e32 v3, v3, v5, vcc
+; CGP-NEXT:    v_mul_hi_u32 v3, v3, v2
+; CGP-NEXT:    v_mul_lo_u32 v3, v3, v6
+; CGP-NEXT:    v_sub_i32_e32 v4, vcc, v2, v3
+; CGP-NEXT:    v_add_i32_e64 v5, s[4:5], v4, v6
+; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v2, v3
+; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v4, v6
+; CGP-NEXT:    v_sub_i32_e64 v2, s[6:7], v4, v6
+; CGP-NEXT:    s_and_b64 vcc, vcc, s[4:5]
+; CGP-NEXT:    v_cndmask_b32_e32 v2, v4, v2, vcc
+; CGP-NEXT:    v_cndmask_b32_e64 v4, v5, v2, s[4:5]
+; CGP-NEXT:    v_mov_b32_e32 v5, 0
+; CGP-NEXT:  BB2_8:
+; CGP-NEXT:    s_or_b64 exec, exec, s[8:9]
+; CGP-NEXT:    v_mov_b32_e32 v2, v4
+; CGP-NEXT:    v_mov_b32_e32 v3, v5
+; CGP-NEXT:    s_setpc_b64 s[30:31]
+  %result = srem <2 x i64> %num, %den
+  ret <2 x i64> %result
+}
+
+define i64 @v_srem_i64_pow2k_denom(i64 %num) {
+; CHECK-LABEL: v_srem_i64_pow2k_denom:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT:    s_movk_i32 s6, 0x1000
+; CHECK-NEXT:    v_cvt_f32_u32_e32 v2, s6
+; CHECK-NEXT:    v_cvt_f32_u32_e32 v3, 0
+; CHECK-NEXT:    s_mov_b32 s7, 0xfffff000
+; CHECK-NEXT:    v_ashrrev_i32_e32 v4, 31, v1
+; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v4
+; CHECK-NEXT:    v_mac_f32_e32 v2, 0x4f800000, v3
+; CHECK-NEXT:    v_rcp_iflag_f32_e32 v2, v2
+; CHECK-NEXT:    v_addc_u32_e32 v1, vcc, v1, v4, vcc
+; CHECK-NEXT:    v_xor_b32_e32 v0, v0, v4
+; CHECK-NEXT:    v_xor_b32_e32 v1, v1, v4
+; CHECK-NEXT:    v_mul_f32_e32 v2, 0x5f7ffffc, v2
+; CHECK-NEXT:    v_mul_f32_e32 v3, 0x2f800000, v2
+; CHECK-NEXT:    v_trunc_f32_e32 v3, v3
+; CHECK-NEXT:    v_mac_f32_e32 v2, 0xcf800000, v3
+; CHECK-NEXT:    v_cvt_u32_f32_e32 v2, v2
+; CHECK-NEXT:    v_cvt_u32_f32_e32 v3, v3
+; CHECK-NEXT:    v_mul_lo_u32 v5, -1, v2
+; CHECK-NEXT:    v_mul_lo_u32 v6, s7, v3
+; CHECK-NEXT:    v_mul_hi_u32 v8, s7, v2
+; CHECK-NEXT:    v_mul_lo_u32 v7, s7, v2
+; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v5, v6
+; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v5, v8
+; CHECK-NEXT:    v_mul_lo_u32 v6, v3, v7
+; CHECK-NEXT:    v_mul_lo_u32 v8, v2, v5
+; CHECK-NEXT:    v_mul_hi_u32 v9, v2, v7
+; CHECK-NEXT:    v_mul_hi_u32 v7, v3, v7
+; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v6, v8
+; CHECK-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v6, v9
+; CHECK-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
+; CHECK-NEXT:    v_mul_lo_u32 v9, v3, v5
+; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v8, v6
+; CHECK-NEXT:    v_mul_hi_u32 v8, v2, v5
+; CHECK-NEXT:    v_mul_hi_u32 v5, v3, v5
+; CHECK-NEXT:    v_add_i32_e32 v7, vcc, v9, v7
+; CHECK-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v7, vcc, v7, v8
+; CHECK-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v8, vcc, v9, v8
+; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v7, v6
+; CHECK-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v7, vcc, v8, v7
+; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v5, v7
+; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v6
+; CHECK-NEXT:    v_addc_u32_e64 v6, s[4:5], v3, v5, vcc
+; CHECK-NEXT:    v_mul_lo_u32 v7, -1, v2
+; CHECK-NEXT:    v_mul_lo_u32 v8, s7, v6
+; CHECK-NEXT:    v_mul_hi_u32 v10, s7, v2
+; CHECK-NEXT:    v_mul_lo_u32 v9, s7, v2
+; CHECK-NEXT:    v_add_i32_e64 v3, s[4:5], v3, v5
+; CHECK-NEXT:    v_add_i32_e64 v7, s[4:5], v7, v8
+; CHECK-NEXT:    v_add_i32_e64 v7, s[4:5], v7, v10
+; CHECK-NEXT:    v_mul_lo_u32 v8, v6, v9
+; CHECK-NEXT:    v_mul_lo_u32 v10, v2, v7
+; CHECK-NEXT:    v_mul_hi_u32 v5, v2, v9
+; CHECK-NEXT:    v_mul_hi_u32 v9, v6, v9
+; CHECK-NEXT:    v_add_i32_e64 v8, s[4:5], v8, v10
+; CHECK-NEXT:    v_cndmask_b32_e64 v10, 0, 1, s[4:5]
+; CHECK-NEXT:    v_add_i32_e64 v5, s[4:5], v8, v5
+; CHECK-NEXT:    v_cndmask_b32_e64 v5, 0, 1, s[4:5]
+; CHECK-NEXT:    v_mul_lo_u32 v8, v6, v7
+; CHECK-NEXT:    v_add_i32_e64 v5, s[4:5], v10, v5
+; CHECK-NEXT:    v_mul_hi_u32 v10, v2, v7
+; CHECK-NEXT:    v_mul_hi_u32 v6, v6, v7
+; CHECK-NEXT:    v_add_i32_e64 v8, s[4:5], v8, v9
+; CHECK-NEXT:    v_cndmask_b32_e64 v9, 0, 1, s[4:5]
+; CHECK-NEXT:    v_add_i32_e64 v8, s[4:5], v8, v10
+; CHECK-NEXT:    v_cndmask_b32_e64 v10, 0, 1, s[4:5]
+; CHECK-NEXT:    v_add_i32_e64 v9, s[4:5], v9, v10
+; CHECK-NEXT:    v_add_i32_e64 v5, s[4:5], v8, v5
+; CHECK-NEXT:    v_cndmask_b32_e64 v8, 0, 1, s[4:5]
+; CHECK-NEXT:    v_add_i32_e64 v7, s[4:5], v9, v8
+; CHECK-NEXT:    v_add_i32_e64 v6, s[4:5], v6, v7
+; CHECK-NEXT:    v_addc_u32_e32 v3, vcc, v3, v6, vcc
+; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v5
+; CHECK-NEXT:    v_addc_u32_e32 v3, vcc, 0, v3, vcc
+; CHECK-NEXT:    v_mul_lo_u32 v5, v1, v2
+; CHECK-NEXT:    v_mul_lo_u32 v6, v0, v3
+; CHECK-NEXT:    v_mul_hi_u32 v7, v0, v2
+; CHECK-NEXT:    v_mul_hi_u32 v2, v1, v2
+; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v5, v6
+; CHECK-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v5, v7
+; CHECK-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
+; CHECK-NEXT:    v_mul_lo_u32 v7, v1, v3
+; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v6, v5
+; CHECK-NEXT:    v_mul_hi_u32 v6, v0, v3
+; CHECK-NEXT:    v_mul_hi_u32 v3, v1, v3
+; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v7, v2
+; CHECK-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v6
+; CHECK-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v7, v6
+; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v5
+; CHECK-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v6, v5
+; CHECK-NEXT:    v_add_i32_e32 v3, vcc, v3, v5
+; CHECK-NEXT:    v_mul_lo_u32 v5, 0, v2
+; CHECK-NEXT:    v_mul_lo_u32 v3, s6, v3
+; CHECK-NEXT:    v_mul_lo_u32 v6, s6, v2
+; CHECK-NEXT:    v_mul_hi_u32 v2, s6, v2
+; CHECK-NEXT:    v_add_i32_e32 v3, vcc, v5, v3
+; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v3, v2
+; CHECK-NEXT:    v_sub_i32_e32 v0, vcc, v0, v6
+; CHECK-NEXT:    v_subb_u32_e64 v3, s[4:5], v1, v2, vcc
+; CHECK-NEXT:    v_sub_i32_e64 v1, s[4:5], v1, v2
+; CHECK-NEXT:    v_cmp_le_u32_e64 s[4:5], 0, v3
+; CHECK-NEXT:    v_cndmask_b32_e64 v2, 0, -1, s[4:5]
+; CHECK-NEXT:    v_cmp_le_u32_e64 s[4:5], s6, v0
+; CHECK-NEXT:    v_cndmask_b32_e64 v5, 0, -1, s[4:5]
+; CHECK-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v3
+; CHECK-NEXT:    v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
+; CHECK-NEXT:    v_cndmask_b32_e64 v2, v2, v5, s[4:5]
+; CHECK-NEXT:    v_subrev_i32_e32 v5, vcc, s6, v0
+; CHECK-NEXT:    v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
+; CHECK-NEXT:    v_cmp_le_u32_e32 vcc, 0, v1
+; CHECK-NEXT:    v_cndmask_b32_e64 v6, 0, -1, vcc
+; CHECK-NEXT:    v_cmp_le_u32_e32 vcc, s6, v5
+; CHECK-NEXT:    v_cndmask_b32_e64 v7, 0, -1, vcc
+; CHECK-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v1
+; CHECK-NEXT:    v_cndmask_b32_e32 v6, v6, v7, vcc
+; CHECK-NEXT:    v_subrev_i32_e32 v7, vcc, s6, v5
+; CHECK-NEXT:    v_subbrev_u32_e32 v8, vcc, 0, v1, vcc
+; CHECK-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v6
+; CHECK-NEXT:    v_cndmask_b32_e32 v5, v5, v7, vcc
+; CHECK-NEXT:    v_cndmask_b32_e32 v1, v1, v8, vcc
+; CHECK-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v2
+; CHECK-NEXT:    v_cndmask_b32_e32 v0, v0, v5, vcc
+; CHECK-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
+; CHECK-NEXT:    v_xor_b32_e32 v0, v0, v4
+; CHECK-NEXT:    v_xor_b32_e32 v1, v1, v4
+; CHECK-NEXT:    v_sub_i32_e32 v0, vcc, v0, v4
+; CHECK-NEXT:    v_subb_u32_e32 v1, vcc, v1, v4, vcc
+; CHECK-NEXT:    s_setpc_b64 s[30:31]
+  %result = srem i64 %num, 4096
+  ret i64 %result
+}
+
+define <2 x i64> @v_srem_v2i64_pow2k_denom(<2 x i64> %num) {
+; GISEL-LABEL: v_srem_v2i64_pow2k_denom:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-NEXT:    s_movk_i32 s10, 0x1000
+; GISEL-NEXT:    s_add_u32 s4, s10, 0
+; GISEL-NEXT:    s_cselect_b32 s5, 1, 0
+; GISEL-NEXT:    s_and_b32 s5, s5, 1
+; GISEL-NEXT:    s_cmp_lg_u32 s5, 0
+; GISEL-NEXT:    s_mov_b32 s6, 0
+; GISEL-NEXT:    s_mov_b32 s7, s6
+; GISEL-NEXT:    s_addc_u32 s5, 0, 0
+; GISEL-NEXT:    s_xor_b64 s[8:9], s[4:5], s[6:7]
+; GISEL-NEXT:    v_cvt_f32_u32_e32 v4, s8
+; GISEL-NEXT:    v_cvt_f32_u32_e32 v5, s9
+; GISEL-NEXT:    s_sub_u32 s11, 0, s8
+; GISEL-NEXT:    s_cselect_b32 s4, 1, 0
+; GISEL-NEXT:    s_and_b32 s4, s4, 1
+; GISEL-NEXT:    v_mac_f32_e32 v4, 0x4f800000, v5
+; GISEL-NEXT:    v_rcp_iflag_f32_e32 v4, v4
+; GISEL-NEXT:    s_cmp_lg_u32 s4, 0
+; GISEL-NEXT:    s_subb_u32 s12, 0, s9
+; GISEL-NEXT:    v_ashrrev_i32_e32 v6, 31, v1
+; GISEL-NEXT:    v_mul_f32_e32 v4, 0x5f7ffffc, v4
+; GISEL-NEXT:    v_mul_f32_e32 v5, 0x2f800000, v4
+; GISEL-NEXT:    v_trunc_f32_e32 v5, v5
+; GISEL-NEXT:    v_mac_f32_e32 v4, 0xcf800000, v5
+; GISEL-NEXT:    v_cvt_u32_f32_e32 v4, v4
+; GISEL-NEXT:    v_cvt_u32_f32_e32 v5, v5
+; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v6
+; GISEL-NEXT:    v_addc_u32_e32 v1, vcc, v1, v6, vcc
+; GISEL-NEXT:    v_mul_lo_u32 v7, s12, v4
+; GISEL-NEXT:    v_mul_lo_u32 v8, s11, v5
+; GISEL-NEXT:    v_mul_hi_u32 v10, s11, v4
+; GISEL-NEXT:    v_mul_lo_u32 v9, s11, v4
+; GISEL-NEXT:    v_xor_b32_e32 v0, v0, v6
+; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v7, v8
+; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v7, v10
+; GISEL-NEXT:    v_mul_lo_u32 v8, v5, v9
+; GISEL-NEXT:    v_mul_lo_u32 v10, v4, v7
+; GISEL-NEXT:    v_mul_hi_u32 v11, v4, v9
+; GISEL-NEXT:    v_mul_hi_u32 v9, v5, v9
+; GISEL-NEXT:    v_xor_b32_e32 v1, v1, v6
+; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v10
+; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v11
+; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
+; GISEL-NEXT:    v_mul_lo_u32 v11, v5, v7
+; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v10, v8
+; GISEL-NEXT:    v_mul_hi_u32 v10, v4, v7
+; GISEL-NEXT:    v_mul_hi_u32 v7, v5, v7
+; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v11, v9
+; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v9, v10
+; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v11, v10
+; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v9, v8
+; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v10, v9
+; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v7, v9
+; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v8
+; GISEL-NEXT:    v_addc_u32_e64 v8, s[4:5], v5, v7, vcc
+; GISEL-NEXT:    v_mul_lo_u32 v9, s12, v4
+; GISEL-NEXT:    v_mul_lo_u32 v10, s11, v8
+; GISEL-NEXT:    v_mul_hi_u32 v12, s11, v4
+; GISEL-NEXT:    v_mul_lo_u32 v11, s11, v4
+; GISEL-NEXT:    v_add_i32_e64 v5, s[4:5], v5, v7
+; GISEL-NEXT:    v_add_i32_e64 v9, s[4:5], v9, v10
+; GISEL-NEXT:    v_add_i32_e64 v9, s[4:5], v9, v12
+; GISEL-NEXT:    v_mul_lo_u32 v10, v8, v11
+; GISEL-NEXT:    v_mul_lo_u32 v12, v4, v9
+; GISEL-NEXT:    v_mul_hi_u32 v7, v4, v11
+; GISEL-NEXT:    v_mul_hi_u32 v11, v8, v11
+; GISEL-NEXT:    v_add_i32_e64 v10, s[4:5], v10, v12
+; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v7, s[4:5], v10, v7
+; GISEL-NEXT:    v_cndmask_b32_e64 v7, 0, 1, s[4:5]
+; GISEL-NEXT:    v_mul_lo_u32 v10, v8, v9
+; GISEL-NEXT:    v_add_i32_e64 v7, s[4:5], v12, v7
+; GISEL-NEXT:    v_mul_hi_u32 v12, v4, v9
+; GISEL-NEXT:    v_mul_hi_u32 v8, v8, v9
+; GISEL-NEXT:    v_add_i32_e64 v10, s[4:5], v10, v11
+; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v10, s[4:5], v10, v12
+; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v11, s[4:5], v11, v12
+; GISEL-NEXT:    v_add_i32_e64 v7, s[4:5], v10, v7
+; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, 1, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v9, s[4:5], v11, v10
+; GISEL-NEXT:    v_add_i32_e64 v8, s[4:5], v8, v9
+; GISEL-NEXT:    v_addc_u32_e32 v5, vcc, v5, v8, vcc
+; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v7
+; GISEL-NEXT:    v_addc_u32_e32 v5, vcc, 0, v5, vcc
+; GISEL-NEXT:    v_mul_lo_u32 v7, v1, v4
+; GISEL-NEXT:    v_mul_lo_u32 v8, v0, v5
+; GISEL-NEXT:    v_mul_hi_u32 v9, v0, v4
+; GISEL-NEXT:    v_mul_hi_u32 v4, v1, v4
+; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v7, v8
+; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v7, v9
+; GISEL-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
+; GISEL-NEXT:    v_mul_lo_u32 v9, v1, v5
+; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v8, v7
+; GISEL-NEXT:    v_mul_hi_u32 v8, v0, v5
+; GISEL-NEXT:    v_mul_hi_u32 v5, v1, v5
+; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v9, v4
+; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v8
+; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v9, v8
+; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v7
+; GISEL-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v8, v7
+; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v7
+; GISEL-NEXT:    v_mul_lo_u32 v7, s9, v4
+; GISEL-NEXT:    v_mul_lo_u32 v5, s8, v5
+; GISEL-NEXT:    v_mul_lo_u32 v8, s8, v4
+; GISEL-NEXT:    v_mul_hi_u32 v4, s8, v4
+; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v7, v5
+; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v5, v4
+; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v8
+; GISEL-NEXT:    v_subb_u32_e64 v5, s[4:5], v1, v4, vcc
+; GISEL-NEXT:    v_sub_i32_e64 v1, s[4:5], v1, v4
+; GISEL-NEXT:    v_cmp_le_u32_e64 s[4:5], s9, v5
+; GISEL-NEXT:    v_mov_b32_e32 v4, s9
+; GISEL-NEXT:    v_cndmask_b32_e64 v7, 0, -1, s[4:5]
+; GISEL-NEXT:    v_cmp_le_u32_e64 s[4:5], s8, v0
+; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, -1, s[4:5]
+; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], s9, v5
+; GISEL-NEXT:    v_subb_u32_e32 v1, vcc, v1, v4, vcc
+; GISEL-NEXT:    v_cndmask_b32_e64 v7, v7, v8, s[4:5]
+; GISEL-NEXT:    v_subrev_i32_e32 v8, vcc, s8, v0
+; GISEL-NEXT:    v_subbrev_u32_e64 v9, s[4:5], 0, v1, vcc
+; GISEL-NEXT:    v_cmp_le_u32_e64 s[4:5], s9, v9
+; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, -1, s[4:5]
+; GISEL-NEXT:    v_cmp_le_u32_e64 s[4:5], s8, v8
+; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, -1, s[4:5]
+; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], s9, v9
+; GISEL-NEXT:    v_cndmask_b32_e64 v10, v10, v11, s[4:5]
+; GISEL-NEXT:    s_add_u32 s4, s10, 0
+; GISEL-NEXT:    v_subb_u32_e32 v1, vcc, v1, v4, vcc
+; GISEL-NEXT:    s_cselect_b32 s5, 1, 0
+; GISEL-NEXT:    v_subrev_i32_e32 v4, vcc, s8, v8
+; GISEL-NEXT:    s_and_b32 s5, s5, 1
+; GISEL-NEXT:    s_cmp_lg_u32 s5, 0
+; GISEL-NEXT:    v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
+; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v10
+; GISEL-NEXT:    s_addc_u32 s5, 0, 0
+; GISEL-NEXT:    s_xor_b64 s[6:7], s[4:5], s[6:7]
+; GISEL-NEXT:    v_cndmask_b32_e32 v4, v8, v4, vcc
+; GISEL-NEXT:    v_cndmask_b32_e32 v1, v9, v1, vcc
+; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v7
+; GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
+; GISEL-NEXT:    v_cndmask_b32_e32 v1, v5, v1, vcc
+; GISEL-NEXT:    v_cvt_f32_u32_e32 v4, s6
+; GISEL-NEXT:    v_cvt_f32_u32_e32 v5, s7
+; GISEL-NEXT:    s_sub_u32 s8, 0, s6
+; GISEL-NEXT:    s_cselect_b32 s4, 1, 0
+; GISEL-NEXT:    s_and_b32 s4, s4, 1
+; GISEL-NEXT:    v_mac_f32_e32 v4, 0x4f800000, v5
+; GISEL-NEXT:    v_rcp_iflag_f32_e32 v4, v4
+; GISEL-NEXT:    s_cmp_lg_u32 s4, 0
+; GISEL-NEXT:    s_subb_u32 s9, 0, s7
+; GISEL-NEXT:    v_ashrrev_i32_e32 v7, 31, v3
+; GISEL-NEXT:    v_mul_f32_e32 v4, 0x5f7ffffc, v4
+; GISEL-NEXT:    v_mul_f32_e32 v5, 0x2f800000, v4
+; GISEL-NEXT:    v_trunc_f32_e32 v5, v5
+; GISEL-NEXT:    v_mac_f32_e32 v4, 0xcf800000, v5
+; GISEL-NEXT:    v_cvt_u32_f32_e32 v4, v4
+; GISEL-NEXT:    v_cvt_u32_f32_e32 v5, v5
+; GISEL-NEXT:    v_add_i32_e32 v2, vcc, v2, v7
+; GISEL-NEXT:    v_addc_u32_e32 v3, vcc, v3, v7, vcc
+; GISEL-NEXT:    v_mul_lo_u32 v8, s9, v4
+; GISEL-NEXT:    v_mul_lo_u32 v9, s8, v5
+; GISEL-NEXT:    v_mul_hi_u32 v11, s8, v4
+; GISEL-NEXT:    v_mul_lo_u32 v10, s8, v4
+; GISEL-NEXT:    v_xor_b32_e32 v0, v0, v6
+; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v9
+; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v11
+; GISEL-NEXT:    v_mul_lo_u32 v9, v5, v10
+; GISEL-NEXT:    v_mul_lo_u32 v11, v4, v8
+; GISEL-NEXT:    v_mul_hi_u32 v12, v4, v10
+; GISEL-NEXT:    v_mul_hi_u32 v10, v5, v10
+; GISEL-NEXT:    v_xor_b32_e32 v2, v2, v7
+; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v9, v11
+; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v9, v12
+; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
+; GISEL-NEXT:    v_mul_lo_u32 v12, v5, v8
+; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v11, v9
+; GISEL-NEXT:    v_mul_hi_u32 v11, v4, v8
+; GISEL-NEXT:    v_mul_hi_u32 v8, v5, v8
+; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v12, v10
+; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v10, v11
+; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v12, v11
+; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v10, v9
+; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v11, v10
+; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v10
+; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v9
+; GISEL-NEXT:    v_addc_u32_e64 v9, s[4:5], v5, v8, vcc
+; GISEL-NEXT:    v_mul_lo_u32 v10, s9, v4
+; GISEL-NEXT:    v_mul_lo_u32 v11, s8, v9
+; GISEL-NEXT:    v_mul_hi_u32 v13, s8, v4
+; GISEL-NEXT:    v_mul_lo_u32 v12, s8, v4
+; GISEL-NEXT:    v_add_i32_e64 v5, s[4:5], v5, v8
+; GISEL-NEXT:    v_add_i32_e64 v10, s[4:5], v10, v11
+; GISEL-NEXT:    v_add_i32_e64 v10, s[4:5], v10, v13
+; GISEL-NEXT:    v_mul_lo_u32 v11, v9, v12
+; GISEL-NEXT:    v_mul_lo_u32 v13, v4, v10
+; GISEL-NEXT:    v_mul_hi_u32 v8, v4, v12
+; GISEL-NEXT:    v_mul_hi_u32 v12, v9, v12
+; GISEL-NEXT:    v_xor_b32_e32 v3, v3, v7
+; GISEL-NEXT:    v_add_i32_e64 v11, s[4:5], v11, v13
+; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, 1, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v8, s[4:5], v11, v8
+; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, 1, s[4:5]
+; GISEL-NEXT:    v_mul_lo_u32 v11, v9, v10
+; GISEL-NEXT:    v_add_i32_e64 v8, s[4:5], v13, v8
+; GISEL-NEXT:    v_mul_hi_u32 v13, v4, v10
+; GISEL-NEXT:    v_mul_hi_u32 v9, v9, v10
+; GISEL-NEXT:    v_add_i32_e64 v11, s[4:5], v11, v12
+; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v11, s[4:5], v11, v13
+; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, 1, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v12, s[4:5], v12, v13
+; GISEL-NEXT:    v_add_i32_e64 v8, s[4:5], v11, v8
+; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v10, s[4:5], v12, v11
+; GISEL-NEXT:    v_add_i32_e64 v9, s[4:5], v9, v10
+; GISEL-NEXT:    v_addc_u32_e32 v5, vcc, v5, v9, vcc
+; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v8
+; GISEL-NEXT:    v_addc_u32_e32 v5, vcc, 0, v5, vcc
+; GISEL-NEXT:    v_xor_b32_e32 v1, v1, v6
+; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v6
+; GISEL-NEXT:    v_mul_lo_u32 v8, v3, v4
+; GISEL-NEXT:    v_mul_lo_u32 v9, v2, v5
+; GISEL-NEXT:    v_subb_u32_e32 v1, vcc, v1, v6, vcc
+; GISEL-NEXT:    v_mul_hi_u32 v6, v2, v4
+; GISEL-NEXT:    v_mul_hi_u32 v4, v3, v4
+; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v9
+; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v8, v6
+; GISEL-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
+; GISEL-NEXT:    v_mul_lo_u32 v8, v3, v5
+; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v9, v6
+; GISEL-NEXT:    v_mul_hi_u32 v9, v2, v5
+; GISEL-NEXT:    v_mul_hi_u32 v5, v3, v5
+; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v8, v4
+; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v9
+; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v9
+; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v6
+; GISEL-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v8, v6
+; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v6
+; GISEL-NEXT:    v_mul_lo_u32 v6, s7, v4
+; GISEL-NEXT:    v_mul_lo_u32 v5, s6, v5
+; GISEL-NEXT:    v_mul_lo_u32 v8, s6, v4
+; GISEL-NEXT:    v_mul_hi_u32 v4, s6, v4
+; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v6, v5
+; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v5, v4
+; GISEL-NEXT:    v_sub_i32_e32 v2, vcc, v2, v8
+; GISEL-NEXT:    v_subb_u32_e64 v5, s[4:5], v3, v4, vcc
+; GISEL-NEXT:    v_sub_i32_e64 v3, s[4:5], v3, v4
+; GISEL-NEXT:    v_cmp_le_u32_e64 s[4:5], s7, v5
+; GISEL-NEXT:    v_mov_b32_e32 v4, s7
+; GISEL-NEXT:    v_cndmask_b32_e64 v6, 0, -1, s[4:5]
+; GISEL-NEXT:    v_cmp_le_u32_e64 s[4:5], s6, v2
+; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, -1, s[4:5]
+; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], s7, v5
+; GISEL-NEXT:    v_subb_u32_e32 v3, vcc, v3, v4, vcc
+; GISEL-NEXT:    v_cndmask_b32_e64 v6, v6, v8, s[4:5]
+; GISEL-NEXT:    v_subrev_i32_e32 v8, vcc, s6, v2
+; GISEL-NEXT:    v_subbrev_u32_e64 v9, s[4:5], 0, v3, vcc
+; GISEL-NEXT:    v_cmp_le_u32_e64 s[4:5], s7, v9
+; GISEL-NEXT:    v_subb_u32_e32 v3, vcc, v3, v4, vcc
+; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, -1, s[4:5]
+; GISEL-NEXT:    v_cmp_le_u32_e64 s[4:5], s6, v8
+; GISEL-NEXT:    v_subrev_i32_e32 v4, vcc, s6, v8
+; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, -1, s[4:5]
+; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], s7, v9
+; GISEL-NEXT:    v_cndmask_b32_e64 v10, v10, v11, s[4:5]
+; GISEL-NEXT:    v_subbrev_u32_e32 v3, vcc, 0, v3, vcc
+; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v10
+; GISEL-NEXT:    v_cndmask_b32_e32 v4, v8, v4, vcc
+; GISEL-NEXT:    v_cndmask_b32_e32 v3, v9, v3, vcc
+; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v6
+; GISEL-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
+; GISEL-NEXT:    v_cndmask_b32_e32 v3, v5, v3, vcc
+; GISEL-NEXT:    v_xor_b32_e32 v2, v2, v7
+; GISEL-NEXT:    v_xor_b32_e32 v3, v3, v7
+; GISEL-NEXT:    v_sub_i32_e32 v2, vcc, v2, v7
+; GISEL-NEXT:    v_subb_u32_e32 v3, vcc, v3, v7, vcc
+; GISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; CGP-LABEL: v_srem_v2i64_pow2k_denom:
+; CGP:       ; %bb.0:
+; CGP-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CGP-NEXT:    s_movk_i32 s6, 0x1000
+; CGP-NEXT:    v_cvt_f32_u32_e32 v4, s6
+; CGP-NEXT:    v_cvt_f32_u32_e32 v5, 0
+; CGP-NEXT:    s_mov_b32 s7, 0xfffff000
+; CGP-NEXT:    v_ashrrev_i32_e32 v6, 31, v1
+; CGP-NEXT:    v_mov_b32_e32 v7, v4
+; CGP-NEXT:    v_mac_f32_e32 v7, 0x4f800000, v5
+; CGP-NEXT:    v_rcp_iflag_f32_e32 v7, v7
+; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v6
+; CGP-NEXT:    v_addc_u32_e32 v1, vcc, v1, v6, vcc
+; CGP-NEXT:    v_xor_b32_e32 v0, v0, v6
+; CGP-NEXT:    v_mul_f32_e32 v7, 0x5f7ffffc, v7
+; CGP-NEXT:    v_mul_f32_e32 v8, 0x2f800000, v7
+; CGP-NEXT:    v_trunc_f32_e32 v8, v8
+; CGP-NEXT:    v_mac_f32_e32 v7, 0xcf800000, v8
+; CGP-NEXT:    v_cvt_u32_f32_e32 v7, v7
+; CGP-NEXT:    v_cvt_u32_f32_e32 v8, v8
+; CGP-NEXT:    v_xor_b32_e32 v1, v1, v6
+; CGP-NEXT:    v_mac_f32_e32 v4, 0x4f800000, v5
+; CGP-NEXT:    v_mul_lo_u32 v9, -1, v7
+; CGP-NEXT:    v_mul_lo_u32 v10, s7, v8
+; CGP-NEXT:    v_mul_hi_u32 v12, s7, v7
+; CGP-NEXT:    v_mul_lo_u32 v11, s7, v7
+; CGP-NEXT:    v_rcp_iflag_f32_e32 v4, v4
+; CGP-NEXT:    v_add_i32_e32 v9, vcc, v9, v10
+; CGP-NEXT:    v_add_i32_e32 v9, vcc, v9, v12
+; CGP-NEXT:    v_mul_lo_u32 v10, v8, v11
+; CGP-NEXT:    v_mul_lo_u32 v12, v7, v9
+; CGP-NEXT:    v_mul_hi_u32 v13, v7, v11
+; CGP-NEXT:    v_mul_hi_u32 v11, v8, v11
+; CGP-NEXT:    v_mul_f32_e32 v4, 0x5f7ffffc, v4
+; CGP-NEXT:    v_add_i32_e32 v10, vcc, v10, v12
+; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v10, vcc, v10, v13
+; CGP-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
+; CGP-NEXT:    v_mul_lo_u32 v13, v8, v9
+; CGP-NEXT:    v_add_i32_e32 v10, vcc, v12, v10
+; CGP-NEXT:    v_mul_hi_u32 v12, v7, v9
+; CGP-NEXT:    v_mul_hi_u32 v9, v8, v9
+; CGP-NEXT:    v_add_i32_e32 v11, vcc, v13, v11
+; CGP-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v11, vcc, v11, v12
+; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v12, vcc, v13, v12
+; CGP-NEXT:    v_add_i32_e32 v10, vcc, v11, v10
+; CGP-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v11, vcc, v12, v11
+; CGP-NEXT:    v_add_i32_e32 v9, vcc, v9, v11
+; CGP-NEXT:    v_add_i32_e32 v7, vcc, v7, v10
+; CGP-NEXT:    v_addc_u32_e64 v10, s[4:5], v8, v9, vcc
+; CGP-NEXT:    v_mul_lo_u32 v11, -1, v7
+; CGP-NEXT:    v_mul_lo_u32 v12, s7, v10
+; CGP-NEXT:    v_mul_hi_u32 v14, s7, v7
+; CGP-NEXT:    v_mul_lo_u32 v13, s7, v7
+; CGP-NEXT:    v_add_i32_e64 v8, s[4:5], v8, v9
+; CGP-NEXT:    v_add_i32_e64 v11, s[4:5], v11, v12
+; CGP-NEXT:    v_add_i32_e64 v11, s[4:5], v11, v14
+; CGP-NEXT:    v_mul_lo_u32 v12, v10, v13
+; CGP-NEXT:    v_mul_lo_u32 v14, v7, v11
+; CGP-NEXT:    v_mul_hi_u32 v9, v7, v13
+; CGP-NEXT:    v_mul_hi_u32 v13, v10, v13
+; CGP-NEXT:    v_ashrrev_i32_e32 v5, 31, v3
+; CGP-NEXT:    v_add_i32_e64 v12, s[4:5], v12, v14
+; CGP-NEXT:    v_cndmask_b32_e64 v14, 0, 1, s[4:5]
+; CGP-NEXT:    v_add_i32_e64 v9, s[4:5], v12, v9
+; CGP-NEXT:    v_cndmask_b32_e64 v9, 0, 1, s[4:5]
+; CGP-NEXT:    v_mul_lo_u32 v12, v10, v11
+; CGP-NEXT:    v_add_i32_e64 v9, s[4:5], v14, v9
+; CGP-NEXT:    v_mul_hi_u32 v14, v7, v11
+; CGP-NEXT:    v_mul_hi_u32 v10, v10, v11
+; CGP-NEXT:    v_add_i32_e64 v12, s[4:5], v12, v13
+; CGP-NEXT:    v_cndmask_b32_e64 v13, 0, 1, s[4:5]
+; CGP-NEXT:    v_add_i32_e64 v12, s[4:5], v12, v14
+; CGP-NEXT:    v_cndmask_b32_e64 v14, 0, 1, s[4:5]
+; CGP-NEXT:    v_add_i32_e64 v13, s[4:5], v13, v14
+; CGP-NEXT:    v_add_i32_e64 v9, s[4:5], v12, v9
+; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, 1, s[4:5]
+; CGP-NEXT:    v_add_i32_e64 v11, s[4:5], v13, v12
+; CGP-NEXT:    v_add_i32_e64 v10, s[4:5], v10, v11
+; CGP-NEXT:    v_addc_u32_e32 v8, vcc, v8, v10, vcc
+; CGP-NEXT:    v_add_i32_e32 v7, vcc, v7, v9
+; CGP-NEXT:    v_addc_u32_e32 v8, vcc, 0, v8, vcc
+; CGP-NEXT:    v_mul_lo_u32 v9, v1, v7
+; CGP-NEXT:    v_mul_lo_u32 v10, v0, v8
+; CGP-NEXT:    v_mul_hi_u32 v11, v0, v7
+; CGP-NEXT:    v_mul_hi_u32 v7, v1, v7
+; CGP-NEXT:    v_add_i32_e32 v9, vcc, v9, v10
+; CGP-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v9, vcc, v9, v11
+; CGP-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
+; CGP-NEXT:    v_mul_lo_u32 v11, v1, v8
+; CGP-NEXT:    v_add_i32_e32 v9, vcc, v10, v9
+; CGP-NEXT:    v_mul_hi_u32 v10, v0, v8
+; CGP-NEXT:    v_mul_hi_u32 v8, v1, v8
+; CGP-NEXT:    v_add_i32_e32 v7, vcc, v11, v7
+; CGP-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v7, vcc, v7, v10
+; CGP-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v10, vcc, v11, v10
+; CGP-NEXT:    v_add_i32_e32 v7, vcc, v7, v9
+; CGP-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v9, vcc, v10, v9
+; CGP-NEXT:    v_add_i32_e32 v8, vcc, v8, v9
+; CGP-NEXT:    v_mul_lo_u32 v9, 0, v7
+; CGP-NEXT:    v_mul_lo_u32 v8, s6, v8
+; CGP-NEXT:    v_mul_lo_u32 v10, s6, v7
+; CGP-NEXT:    v_mul_hi_u32 v7, s6, v7
+; CGP-NEXT:    v_add_i32_e32 v8, vcc, v9, v8
+; CGP-NEXT:    v_add_i32_e32 v7, vcc, v8, v7
+; CGP-NEXT:    v_sub_i32_e32 v0, vcc, v0, v10
+; CGP-NEXT:    v_subb_u32_e64 v8, s[4:5], v1, v7, vcc
+; CGP-NEXT:    v_sub_i32_e64 v1, s[4:5], v1, v7
+; CGP-NEXT:    v_cmp_le_u32_e64 s[4:5], 0, v8
+; CGP-NEXT:    v_cndmask_b32_e64 v7, 0, -1, s[4:5]
+; CGP-NEXT:    v_cmp_le_u32_e64 s[4:5], s6, v0
+; CGP-NEXT:    v_cndmask_b32_e64 v9, 0, -1, s[4:5]
+; CGP-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v8
+; CGP-NEXT:    v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
+; CGP-NEXT:    v_cndmask_b32_e64 v7, v7, v9, s[4:5]
+; CGP-NEXT:    v_subrev_i32_e32 v9, vcc, s6, v0
+; CGP-NEXT:    v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
+; CGP-NEXT:    v_cmp_le_u32_e32 vcc, 0, v1
+; CGP-NEXT:    v_cndmask_b32_e64 v10, 0, -1, vcc
+; CGP-NEXT:    v_cmp_le_u32_e32 vcc, s6, v9
+; CGP-NEXT:    v_cndmask_b32_e64 v11, 0, -1, vcc
+; CGP-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v1
+; CGP-NEXT:    v_cndmask_b32_e32 v10, v10, v11, vcc
+; CGP-NEXT:    v_subrev_i32_e32 v11, vcc, s6, v9
+; CGP-NEXT:    v_subbrev_u32_e32 v12, vcc, 0, v1, vcc
+; CGP-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v10
+; CGP-NEXT:    v_cndmask_b32_e32 v9, v9, v11, vcc
+; CGP-NEXT:    v_cndmask_b32_e32 v1, v1, v12, vcc
+; CGP-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v7
+; CGP-NEXT:    v_mul_f32_e32 v7, 0x2f800000, v4
+; CGP-NEXT:    v_trunc_f32_e32 v7, v7
+; CGP-NEXT:    v_mac_f32_e32 v4, 0xcf800000, v7
+; CGP-NEXT:    v_cvt_u32_f32_e32 v4, v4
+; CGP-NEXT:    v_cvt_u32_f32_e32 v7, v7
+; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v9, vcc
+; CGP-NEXT:    v_cndmask_b32_e32 v1, v8, v1, vcc
+; CGP-NEXT:    v_mul_lo_u32 v8, -1, v4
+; CGP-NEXT:    v_mul_lo_u32 v9, s7, v7
+; CGP-NEXT:    v_mul_hi_u32 v11, s7, v4
+; CGP-NEXT:    v_mul_lo_u32 v10, s7, v4
+; CGP-NEXT:    v_add_i32_e32 v2, vcc, v2, v5
+; CGP-NEXT:    v_addc_u32_e32 v3, vcc, v3, v5, vcc
+; CGP-NEXT:    v_add_i32_e32 v8, vcc, v8, v9
+; CGP-NEXT:    v_add_i32_e32 v8, vcc, v8, v11
+; CGP-NEXT:    v_mul_lo_u32 v9, v7, v10
+; CGP-NEXT:    v_mul_lo_u32 v11, v4, v8
+; CGP-NEXT:    v_mul_hi_u32 v12, v4, v10
+; CGP-NEXT:    v_mul_hi_u32 v10, v7, v10
+; CGP-NEXT:    v_xor_b32_e32 v0, v0, v6
+; CGP-NEXT:    v_add_i32_e32 v9, vcc, v9, v11
+; CGP-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v9, vcc, v9, v12
+; CGP-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
+; CGP-NEXT:    v_mul_lo_u32 v12, v7, v8
+; CGP-NEXT:    v_add_i32_e32 v9, vcc, v11, v9
+; CGP-NEXT:    v_mul_hi_u32 v11, v4, v8
+; CGP-NEXT:    v_mul_hi_u32 v8, v7, v8
+; CGP-NEXT:    v_add_i32_e32 v10, vcc, v12, v10
+; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v10, vcc, v10, v11
+; CGP-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v11, vcc, v12, v11
+; CGP-NEXT:    v_add_i32_e32 v9, vcc, v10, v9
+; CGP-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v10, vcc, v11, v10
+; CGP-NEXT:    v_add_i32_e32 v8, vcc, v8, v10
+; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v9
+; CGP-NEXT:    v_addc_u32_e64 v9, s[4:5], v7, v8, vcc
+; CGP-NEXT:    v_mul_lo_u32 v10, -1, v4
+; CGP-NEXT:    v_mul_lo_u32 v11, s7, v9
+; CGP-NEXT:    v_mul_hi_u32 v13, s7, v4
+; CGP-NEXT:    v_mul_lo_u32 v12, s7, v4
+; CGP-NEXT:    v_add_i32_e64 v7, s[4:5], v7, v8
+; CGP-NEXT:    v_add_i32_e64 v10, s[4:5], v10, v11
+; CGP-NEXT:    v_add_i32_e64 v10, s[4:5], v10, v13
+; CGP-NEXT:    v_mul_lo_u32 v11, v9, v12
+; CGP-NEXT:    v_mul_lo_u32 v13, v4, v10
+; CGP-NEXT:    v_mul_hi_u32 v8, v4, v12
+; CGP-NEXT:    v_mul_hi_u32 v12, v9, v12
+; CGP-NEXT:    v_xor_b32_e32 v2, v2, v5
+; CGP-NEXT:    v_add_i32_e64 v11, s[4:5], v11, v13
+; CGP-NEXT:    v_cndmask_b32_e64 v13, 0, 1, s[4:5]
+; CGP-NEXT:    v_add_i32_e64 v8, s[4:5], v11, v8
+; CGP-NEXT:    v_cndmask_b32_e64 v8, 0, 1, s[4:5]
+; CGP-NEXT:    v_mul_lo_u32 v11, v9, v10
+; CGP-NEXT:    v_add_i32_e64 v8, s[4:5], v13, v8
+; CGP-NEXT:    v_mul_hi_u32 v13, v4, v10
+; CGP-NEXT:    v_mul_hi_u32 v9, v9, v10
+; CGP-NEXT:    v_add_i32_e64 v11, s[4:5], v11, v12
+; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, 1, s[4:5]
+; CGP-NEXT:    v_add_i32_e64 v11, s[4:5], v11, v13
+; CGP-NEXT:    v_cndmask_b32_e64 v13, 0, 1, s[4:5]
+; CGP-NEXT:    v_add_i32_e64 v12, s[4:5], v12, v13
+; CGP-NEXT:    v_add_i32_e64 v8, s[4:5], v11, v8
+; CGP-NEXT:    v_cndmask_b32_e64 v11, 0, 1, s[4:5]
+; CGP-NEXT:    v_add_i32_e64 v10, s[4:5], v12, v11
+; CGP-NEXT:    v_add_i32_e64 v9, s[4:5], v9, v10
+; CGP-NEXT:    v_addc_u32_e32 v7, vcc, v7, v9, vcc
+; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v8
+; CGP-NEXT:    v_addc_u32_e32 v7, vcc, 0, v7, vcc
+; CGP-NEXT:    v_xor_b32_e32 v3, v3, v5
+; CGP-NEXT:    v_xor_b32_e32 v1, v1, v6
+; CGP-NEXT:    v_sub_i32_e32 v0, vcc, v0, v6
+; CGP-NEXT:    v_mul_lo_u32 v8, v3, v4
+; CGP-NEXT:    v_mul_lo_u32 v9, v2, v7
+; CGP-NEXT:    v_subb_u32_e32 v1, vcc, v1, v6, vcc
+; CGP-NEXT:    v_mul_hi_u32 v6, v2, v4
+; CGP-NEXT:    v_mul_hi_u32 v4, v3, v4
+; CGP-NEXT:    v_add_i32_e32 v8, vcc, v8, v9
+; CGP-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v6, vcc, v8, v6
+; CGP-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
+; CGP-NEXT:    v_mul_lo_u32 v8, v3, v7
+; CGP-NEXT:    v_add_i32_e32 v6, vcc, v9, v6
+; CGP-NEXT:    v_mul_hi_u32 v9, v2, v7
+; CGP-NEXT:    v_mul_hi_u32 v7, v3, v7
+; CGP-NEXT:    v_add_i32_e32 v4, vcc, v8, v4
+; CGP-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v9
+; CGP-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v8, vcc, v8, v9
+; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v6
+; CGP-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v6, vcc, v8, v6
+; CGP-NEXT:    v_add_i32_e32 v6, vcc, v7, v6
+; CGP-NEXT:    v_mul_lo_u32 v7, 0, v4
+; CGP-NEXT:    v_mul_lo_u32 v6, s6, v6
+; CGP-NEXT:    v_mul_lo_u32 v8, s6, v4
+; CGP-NEXT:    v_mul_hi_u32 v4, s6, v4
+; CGP-NEXT:    v_add_i32_e32 v6, vcc, v7, v6
+; CGP-NEXT:    v_add_i32_e32 v4, vcc, v6, v4
+; CGP-NEXT:    v_sub_i32_e32 v2, vcc, v2, v8
+; CGP-NEXT:    v_subb_u32_e64 v6, s[4:5], v3, v4, vcc
+; CGP-NEXT:    v_sub_i32_e64 v3, s[4:5], v3, v4
+; CGP-NEXT:    v_cmp_le_u32_e64 s[4:5], 0, v6
+; CGP-NEXT:    v_cndmask_b32_e64 v4, 0, -1, s[4:5]
+; CGP-NEXT:    v_cmp_le_u32_e64 s[4:5], s6, v2
+; CGP-NEXT:    v_cndmask_b32_e64 v7, 0, -1, s[4:5]
+; CGP-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v6
+; CGP-NEXT:    v_subbrev_u32_e32 v3, vcc, 0, v3, vcc
+; CGP-NEXT:    v_cndmask_b32_e64 v4, v4, v7, s[4:5]
+; CGP-NEXT:    v_subrev_i32_e32 v7, vcc, s6, v2
+; CGP-NEXT:    v_subbrev_u32_e32 v3, vcc, 0, v3, vcc
+; CGP-NEXT:    v_cmp_le_u32_e32 vcc, 0, v3
+; CGP-NEXT:    v_cndmask_b32_e64 v8, 0, -1, vcc
+; CGP-NEXT:    v_cmp_le_u32_e32 vcc, s6, v7
+; CGP-NEXT:    v_cndmask_b32_e64 v9, 0, -1, vcc
+; CGP-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v3
+; CGP-NEXT:    v_cndmask_b32_e32 v8, v8, v9, vcc
+; CGP-NEXT:    v_subrev_i32_e32 v9, vcc, s6, v7
+; CGP-NEXT:    v_subbrev_u32_e32 v10, vcc, 0, v3, vcc
+; CGP-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v8
+; CGP-NEXT:    v_cndmask_b32_e32 v7, v7, v9, vcc
+; CGP-NEXT:    v_cndmask_b32_e32 v3, v3, v10, vcc
+; CGP-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v4
+; CGP-NEXT:    v_cndmask_b32_e32 v2, v2, v7, vcc
+; CGP-NEXT:    v_cndmask_b32_e32 v3, v6, v3, vcc
+; CGP-NEXT:    v_xor_b32_e32 v2, v2, v5
+; CGP-NEXT:    v_xor_b32_e32 v3, v3, v5
+; CGP-NEXT:    v_sub_i32_e32 v2, vcc, v2, v5
+; CGP-NEXT:    v_subb_u32_e32 v3, vcc, v3, v5, vcc
+; CGP-NEXT:    s_setpc_b64 s[30:31]
+  %result = srem <2 x i64> %num, <i64 4096, i64 4096>
+  ret <2 x i64> %result
+}
+
+define i64 @v_srem_i64_oddk_denom(i64 %num) {
+; CHECK-LABEL: v_srem_i64_oddk_denom:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT:    s_mov_b32 s6, 0x12d8fb
+; CHECK-NEXT:    v_cvt_f32_u32_e32 v2, s6
+; CHECK-NEXT:    v_cvt_f32_u32_e32 v3, 0
+; CHECK-NEXT:    s_mov_b32 s7, 0xffed2705
+; CHECK-NEXT:    v_ashrrev_i32_e32 v4, 31, v1
+; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v4
+; CHECK-NEXT:    v_mac_f32_e32 v2, 0x4f800000, v3
+; CHECK-NEXT:    v_rcp_iflag_f32_e32 v2, v2
+; CHECK-NEXT:    v_addc_u32_e32 v1, vcc, v1, v4, vcc
+; CHECK-NEXT:    v_xor_b32_e32 v0, v0, v4
+; CHECK-NEXT:    v_xor_b32_e32 v1, v1, v4
+; CHECK-NEXT:    v_mul_f32_e32 v2, 0x5f7ffffc, v2
+; CHECK-NEXT:    v_mul_f32_e32 v3, 0x2f800000, v2
+; CHECK-NEXT:    v_trunc_f32_e32 v3, v3
+; CHECK-NEXT:    v_mac_f32_e32 v2, 0xcf800000, v3
+; CHECK-NEXT:    v_cvt_u32_f32_e32 v2, v2
+; CHECK-NEXT:    v_cvt_u32_f32_e32 v3, v3
+; CHECK-NEXT:    v_mul_lo_u32 v5, -1, v2
+; CHECK-NEXT:    v_mul_lo_u32 v6, s7, v3
+; CHECK-NEXT:    v_mul_hi_u32 v8, s7, v2
+; CHECK-NEXT:    v_mul_lo_u32 v7, s7, v2
+; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v5, v6
+; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v5, v8
+; CHECK-NEXT:    v_mul_lo_u32 v6, v3, v7
+; CHECK-NEXT:    v_mul_lo_u32 v8, v2, v5
+; CHECK-NEXT:    v_mul_hi_u32 v9, v2, v7
+; CHECK-NEXT:    v_mul_hi_u32 v7, v3, v7
+; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v6, v8
+; CHECK-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v6, v9
+; CHECK-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
+; CHECK-NEXT:    v_mul_lo_u32 v9, v3, v5
+; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v8, v6
+; CHECK-NEXT:    v_mul_hi_u32 v8, v2, v5
+; CHECK-NEXT:    v_mul_hi_u32 v5, v3, v5
+; CHECK-NEXT:    v_add_i32_e32 v7, vcc, v9, v7
+; CHECK-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v7, vcc, v7, v8
+; CHECK-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v8, vcc, v9, v8
+; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v7, v6
+; CHECK-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v7, vcc, v8, v7
+; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v5, v7
+; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v6
+; CHECK-NEXT:    v_addc_u32_e64 v6, s[4:5], v3, v5, vcc
+; CHECK-NEXT:    v_mul_lo_u32 v7, -1, v2
+; CHECK-NEXT:    v_mul_lo_u32 v8, s7, v6
+; CHECK-NEXT:    v_mul_hi_u32 v10, s7, v2
+; CHECK-NEXT:    v_mul_lo_u32 v9, s7, v2
+; CHECK-NEXT:    v_add_i32_e64 v3, s[4:5], v3, v5
+; CHECK-NEXT:    v_add_i32_e64 v7, s[4:5], v7, v8
+; CHECK-NEXT:    v_add_i32_e64 v7, s[4:5], v7, v10
+; CHECK-NEXT:    v_mul_lo_u32 v8, v6, v9
+; CHECK-NEXT:    v_mul_lo_u32 v10, v2, v7
+; CHECK-NEXT:    v_mul_hi_u32 v5, v2, v9
+; CHECK-NEXT:    v_mul_hi_u32 v9, v6, v9
+; CHECK-NEXT:    v_add_i32_e64 v8, s[4:5], v8, v10
+; CHECK-NEXT:    v_cndmask_b32_e64 v10, 0, 1, s[4:5]
+; CHECK-NEXT:    v_add_i32_e64 v5, s[4:5], v8, v5
+; CHECK-NEXT:    v_cndmask_b32_e64 v5, 0, 1, s[4:5]
+; CHECK-NEXT:    v_mul_lo_u32 v8, v6, v7
+; CHECK-NEXT:    v_add_i32_e64 v5, s[4:5], v10, v5
+; CHECK-NEXT:    v_mul_hi_u32 v10, v2, v7
+; CHECK-NEXT:    v_mul_hi_u32 v6, v6, v7
+; CHECK-NEXT:    v_add_i32_e64 v8, s[4:5], v8, v9
+; CHECK-NEXT:    v_cndmask_b32_e64 v9, 0, 1, s[4:5]
+; CHECK-NEXT:    v_add_i32_e64 v8, s[4:5], v8, v10
+; CHECK-NEXT:    v_cndmask_b32_e64 v10, 0, 1, s[4:5]
+; CHECK-NEXT:    v_add_i32_e64 v9, s[4:5], v9, v10
+; CHECK-NEXT:    v_add_i32_e64 v5, s[4:5], v8, v5
+; CHECK-NEXT:    v_cndmask_b32_e64 v8, 0, 1, s[4:5]
+; CHECK-NEXT:    v_add_i32_e64 v7, s[4:5], v9, v8
+; CHECK-NEXT:    v_add_i32_e64 v6, s[4:5], v6, v7
+; CHECK-NEXT:    v_addc_u32_e32 v3, vcc, v3, v6, vcc
+; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v5
+; CHECK-NEXT:    v_addc_u32_e32 v3, vcc, 0, v3, vcc
+; CHECK-NEXT:    v_mul_lo_u32 v5, v1, v2
+; CHECK-NEXT:    v_mul_lo_u32 v6, v0, v3
+; CHECK-NEXT:    v_mul_hi_u32 v7, v0, v2
+; CHECK-NEXT:    v_mul_hi_u32 v2, v1, v2
+; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v5, v6
+; CHECK-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v5, v7
+; CHECK-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
+; CHECK-NEXT:    v_mul_lo_u32 v7, v1, v3
+; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v6, v5
+; CHECK-NEXT:    v_mul_hi_u32 v6, v0, v3
+; CHECK-NEXT:    v_mul_hi_u32 v3, v1, v3
+; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v7, v2
+; CHECK-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v6
+; CHECK-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v7, v6
+; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v5
+; CHECK-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v6, v5
+; CHECK-NEXT:    v_add_i32_e32 v3, vcc, v3, v5
+; CHECK-NEXT:    v_mul_lo_u32 v5, 0, v2
+; CHECK-NEXT:    v_mul_lo_u32 v3, s6, v3
+; CHECK-NEXT:    v_mul_lo_u32 v6, s6, v2
+; CHECK-NEXT:    v_mul_hi_u32 v2, s6, v2
+; CHECK-NEXT:    v_add_i32_e32 v3, vcc, v5, v3
+; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v3, v2
+; CHECK-NEXT:    v_sub_i32_e32 v0, vcc, v0, v6
+; CHECK-NEXT:    v_subb_u32_e64 v3, s[4:5], v1, v2, vcc
+; CHECK-NEXT:    v_sub_i32_e64 v1, s[4:5], v1, v2
+; CHECK-NEXT:    v_cmp_le_u32_e64 s[4:5], 0, v3
+; CHECK-NEXT:    v_cndmask_b32_e64 v2, 0, -1, s[4:5]
+; CHECK-NEXT:    v_cmp_le_u32_e64 s[4:5], s6, v0
+; CHECK-NEXT:    v_cndmask_b32_e64 v5, 0, -1, s[4:5]
+; CHECK-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v3
+; CHECK-NEXT:    v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
+; CHECK-NEXT:    v_cndmask_b32_e64 v2, v2, v5, s[4:5]
+; CHECK-NEXT:    v_subrev_i32_e32 v5, vcc, s6, v0
+; CHECK-NEXT:    v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
+; CHECK-NEXT:    v_cmp_le_u32_e32 vcc, 0, v1
+; CHECK-NEXT:    v_cndmask_b32_e64 v6, 0, -1, vcc
+; CHECK-NEXT:    v_cmp_le_u32_e32 vcc, s6, v5
+; CHECK-NEXT:    v_cndmask_b32_e64 v7, 0, -1, vcc
+; CHECK-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v1
+; CHECK-NEXT:    v_cndmask_b32_e32 v6, v6, v7, vcc
+; CHECK-NEXT:    v_subrev_i32_e32 v7, vcc, s6, v5
+; CHECK-NEXT:    v_subbrev_u32_e32 v8, vcc, 0, v1, vcc
+; CHECK-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v6
+; CHECK-NEXT:    v_cndmask_b32_e32 v5, v5, v7, vcc
+; CHECK-NEXT:    v_cndmask_b32_e32 v1, v1, v8, vcc
+; CHECK-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v2
+; CHECK-NEXT:    v_cndmask_b32_e32 v0, v0, v5, vcc
+; CHECK-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
+; CHECK-NEXT:    v_xor_b32_e32 v0, v0, v4
+; CHECK-NEXT:    v_xor_b32_e32 v1, v1, v4
+; CHECK-NEXT:    v_sub_i32_e32 v0, vcc, v0, v4
+; CHECK-NEXT:    v_subb_u32_e32 v1, vcc, v1, v4, vcc
+; CHECK-NEXT:    s_setpc_b64 s[30:31]
+  %result = srem i64 %num, 1235195
+  ret i64 %result
+}
+
+define <2 x i64> @v_srem_v2i64_oddk_denom(<2 x i64> %num) {
+; GISEL-LABEL: v_srem_v2i64_oddk_denom:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-NEXT:    s_mov_b32 s10, 0x12d8fb
+; GISEL-NEXT:    s_add_u32 s4, s10, 0
+; GISEL-NEXT:    s_cselect_b32 s5, 1, 0
+; GISEL-NEXT:    s_and_b32 s5, s5, 1
+; GISEL-NEXT:    s_cmp_lg_u32 s5, 0
+; GISEL-NEXT:    s_mov_b32 s6, 0
+; GISEL-NEXT:    s_mov_b32 s7, s6
+; GISEL-NEXT:    s_addc_u32 s5, 0, 0
+; GISEL-NEXT:    s_xor_b64 s[8:9], s[4:5], s[6:7]
+; GISEL-NEXT:    v_cvt_f32_u32_e32 v4, s8
+; GISEL-NEXT:    v_cvt_f32_u32_e32 v5, s9
+; GISEL-NEXT:    s_sub_u32 s11, 0, s8
+; GISEL-NEXT:    s_cselect_b32 s4, 1, 0
+; GISEL-NEXT:    s_and_b32 s4, s4, 1
+; GISEL-NEXT:    v_mac_f32_e32 v4, 0x4f800000, v5
+; GISEL-NEXT:    v_rcp_iflag_f32_e32 v4, v4
+; GISEL-NEXT:    s_cmp_lg_u32 s4, 0
+; GISEL-NEXT:    s_subb_u32 s12, 0, s9
+; GISEL-NEXT:    v_ashrrev_i32_e32 v6, 31, v1
+; GISEL-NEXT:    v_mul_f32_e32 v4, 0x5f7ffffc, v4
+; GISEL-NEXT:    v_mul_f32_e32 v5, 0x2f800000, v4
+; GISEL-NEXT:    v_trunc_f32_e32 v5, v5
+; GISEL-NEXT:    v_mac_f32_e32 v4, 0xcf800000, v5
+; GISEL-NEXT:    v_cvt_u32_f32_e32 v4, v4
+; GISEL-NEXT:    v_cvt_u32_f32_e32 v5, v5
+; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v6
+; GISEL-NEXT:    v_addc_u32_e32 v1, vcc, v1, v6, vcc
+; GISEL-NEXT:    v_mul_lo_u32 v7, s12, v4
+; GISEL-NEXT:    v_mul_lo_u32 v8, s11, v5
+; GISEL-NEXT:    v_mul_hi_u32 v10, s11, v4
+; GISEL-NEXT:    v_mul_lo_u32 v9, s11, v4
+; GISEL-NEXT:    v_xor_b32_e32 v0, v0, v6
+; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v7, v8
+; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v7, v10
+; GISEL-NEXT:    v_mul_lo_u32 v8, v5, v9
+; GISEL-NEXT:    v_mul_lo_u32 v10, v4, v7
+; GISEL-NEXT:    v_mul_hi_u32 v11, v4, v9
+; GISEL-NEXT:    v_mul_hi_u32 v9, v5, v9
+; GISEL-NEXT:    v_xor_b32_e32 v1, v1, v6
+; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v10
+; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v11
+; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
+; GISEL-NEXT:    v_mul_lo_u32 v11, v5, v7
+; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v10, v8
+; GISEL-NEXT:    v_mul_hi_u32 v10, v4, v7
+; GISEL-NEXT:    v_mul_hi_u32 v7, v5, v7
+; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v11, v9
+; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v9, v10
+; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v11, v10
+; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v9, v8
+; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v10, v9
+; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v7, v9
+; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v8
+; GISEL-NEXT:    v_addc_u32_e64 v8, s[4:5], v5, v7, vcc
+; GISEL-NEXT:    v_mul_lo_u32 v9, s12, v4
+; GISEL-NEXT:    v_mul_lo_u32 v10, s11, v8
+; GISEL-NEXT:    v_mul_hi_u32 v12, s11, v4
+; GISEL-NEXT:    v_mul_lo_u32 v11, s11, v4
+; GISEL-NEXT:    v_add_i32_e64 v5, s[4:5], v5, v7
+; GISEL-NEXT:    v_add_i32_e64 v9, s[4:5], v9, v10
+; GISEL-NEXT:    v_add_i32_e64 v9, s[4:5], v9, v12
+; GISEL-NEXT:    v_mul_lo_u32 v10, v8, v11
+; GISEL-NEXT:    v_mul_lo_u32 v12, v4, v9
+; GISEL-NEXT:    v_mul_hi_u32 v7, v4, v11
+; GISEL-NEXT:    v_mul_hi_u32 v11, v8, v11
+; GISEL-NEXT:    v_add_i32_e64 v10, s[4:5], v10, v12
+; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v7, s[4:5], v10, v7
+; GISEL-NEXT:    v_cndmask_b32_e64 v7, 0, 1, s[4:5]
+; GISEL-NEXT:    v_mul_lo_u32 v10, v8, v9
+; GISEL-NEXT:    v_add_i32_e64 v7, s[4:5], v12, v7
+; GISEL-NEXT:    v_mul_hi_u32 v12, v4, v9
+; GISEL-NEXT:    v_mul_hi_u32 v8, v8, v9
+; GISEL-NEXT:    v_add_i32_e64 v10, s[4:5], v10, v11
+; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v10, s[4:5], v10, v12
+; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v11, s[4:5], v11, v12
+; GISEL-NEXT:    v_add_i32_e64 v7, s[4:5], v10, v7
+; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, 1, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v9, s[4:5], v11, v10
+; GISEL-NEXT:    v_add_i32_e64 v8, s[4:5], v8, v9
+; GISEL-NEXT:    v_addc_u32_e32 v5, vcc, v5, v8, vcc
+; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v7
+; GISEL-NEXT:    v_addc_u32_e32 v5, vcc, 0, v5, vcc
+; GISEL-NEXT:    v_mul_lo_u32 v7, v1, v4
+; GISEL-NEXT:    v_mul_lo_u32 v8, v0, v5
+; GISEL-NEXT:    v_mul_hi_u32 v9, v0, v4
+; GISEL-NEXT:    v_mul_hi_u32 v4, v1, v4
+; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v7, v8
+; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v7, v9
+; GISEL-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
+; GISEL-NEXT:    v_mul_lo_u32 v9, v1, v5
+; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v8, v7
+; GISEL-NEXT:    v_mul_hi_u32 v8, v0, v5
+; GISEL-NEXT:    v_mul_hi_u32 v5, v1, v5
+; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v9, v4
+; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v8
+; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v9, v8
+; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v7
+; GISEL-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v8, v7
+; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v7
+; GISEL-NEXT:    v_mul_lo_u32 v7, s9, v4
+; GISEL-NEXT:    v_mul_lo_u32 v5, s8, v5
+; GISEL-NEXT:    v_mul_lo_u32 v8, s8, v4
+; GISEL-NEXT:    v_mul_hi_u32 v4, s8, v4
+; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v7, v5
+; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v5, v4
+; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v8
+; GISEL-NEXT:    v_subb_u32_e64 v5, s[4:5], v1, v4, vcc
+; GISEL-NEXT:    v_sub_i32_e64 v1, s[4:5], v1, v4
+; GISEL-NEXT:    v_cmp_le_u32_e64 s[4:5], s9, v5
+; GISEL-NEXT:    v_mov_b32_e32 v4, s9
+; GISEL-NEXT:    v_cndmask_b32_e64 v7, 0, -1, s[4:5]
+; GISEL-NEXT:    v_cmp_le_u32_e64 s[4:5], s8, v0
+; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, -1, s[4:5]
+; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], s9, v5
+; GISEL-NEXT:    v_subb_u32_e32 v1, vcc, v1, v4, vcc
+; GISEL-NEXT:    v_cndmask_b32_e64 v7, v7, v8, s[4:5]
+; GISEL-NEXT:    v_subrev_i32_e32 v8, vcc, s8, v0
+; GISEL-NEXT:    v_subbrev_u32_e64 v9, s[4:5], 0, v1, vcc
+; GISEL-NEXT:    v_cmp_le_u32_e64 s[4:5], s9, v9
+; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, -1, s[4:5]
+; GISEL-NEXT:    v_cmp_le_u32_e64 s[4:5], s8, v8
+; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, -1, s[4:5]
+; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], s9, v9
+; GISEL-NEXT:    v_cndmask_b32_e64 v10, v10, v11, s[4:5]
+; GISEL-NEXT:    s_add_u32 s4, s10, 0
+; GISEL-NEXT:    v_subb_u32_e32 v1, vcc, v1, v4, vcc
+; GISEL-NEXT:    s_cselect_b32 s5, 1, 0
+; GISEL-NEXT:    v_subrev_i32_e32 v4, vcc, s8, v8
+; GISEL-NEXT:    s_and_b32 s5, s5, 1
+; GISEL-NEXT:    s_cmp_lg_u32 s5, 0
+; GISEL-NEXT:    v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
+; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v10
+; GISEL-NEXT:    s_addc_u32 s5, 0, 0
+; GISEL-NEXT:    s_xor_b64 s[6:7], s[4:5], s[6:7]
+; GISEL-NEXT:    v_cndmask_b32_e32 v4, v8, v4, vcc
+; GISEL-NEXT:    v_cndmask_b32_e32 v1, v9, v1, vcc
+; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v7
+; GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
+; GISEL-NEXT:    v_cndmask_b32_e32 v1, v5, v1, vcc
+; GISEL-NEXT:    v_cvt_f32_u32_e32 v4, s6
+; GISEL-NEXT:    v_cvt_f32_u32_e32 v5, s7
+; GISEL-NEXT:    s_sub_u32 s8, 0, s6
+; GISEL-NEXT:    s_cselect_b32 s4, 1, 0
+; GISEL-NEXT:    s_and_b32 s4, s4, 1
+; GISEL-NEXT:    v_mac_f32_e32 v4, 0x4f800000, v5
+; GISEL-NEXT:    v_rcp_iflag_f32_e32 v4, v4
+; GISEL-NEXT:    s_cmp_lg_u32 s4, 0
+; GISEL-NEXT:    s_subb_u32 s9, 0, s7
+; GISEL-NEXT:    v_ashrrev_i32_e32 v7, 31, v3
+; GISEL-NEXT:    v_mul_f32_e32 v4, 0x5f7ffffc, v4
+; GISEL-NEXT:    v_mul_f32_e32 v5, 0x2f800000, v4
+; GISEL-NEXT:    v_trunc_f32_e32 v5, v5
+; GISEL-NEXT:    v_mac_f32_e32 v4, 0xcf800000, v5
+; GISEL-NEXT:    v_cvt_u32_f32_e32 v4, v4
+; GISEL-NEXT:    v_cvt_u32_f32_e32 v5, v5
+; GISEL-NEXT:    v_add_i32_e32 v2, vcc, v2, v7
+; GISEL-NEXT:    v_addc_u32_e32 v3, vcc, v3, v7, vcc
+; GISEL-NEXT:    v_mul_lo_u32 v8, s9, v4
+; GISEL-NEXT:    v_mul_lo_u32 v9, s8, v5
+; GISEL-NEXT:    v_mul_hi_u32 v11, s8, v4
+; GISEL-NEXT:    v_mul_lo_u32 v10, s8, v4
+; GISEL-NEXT:    v_xor_b32_e32 v0, v0, v6
+; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v9
+; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v11
+; GISEL-NEXT:    v_mul_lo_u32 v9, v5, v10
+; GISEL-NEXT:    v_mul_lo_u32 v11, v4, v8
+; GISEL-NEXT:    v_mul_hi_u32 v12, v4, v10
+; GISEL-NEXT:    v_mul_hi_u32 v10, v5, v10
+; GISEL-NEXT:    v_xor_b32_e32 v2, v2, v7
+; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v9, v11
+; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v9, v12
+; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
+; GISEL-NEXT:    v_mul_lo_u32 v12, v5, v8
+; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v11, v9
+; GISEL-NEXT:    v_mul_hi_u32 v11, v4, v8
+; GISEL-NEXT:    v_mul_hi_u32 v8, v5, v8
+; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v12, v10
+; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v10, v11
+; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v12, v11
+; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v10, v9
+; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v11, v10
+; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v10
+; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v9
+; GISEL-NEXT:    v_addc_u32_e64 v9, s[4:5], v5, v8, vcc
+; GISEL-NEXT:    v_mul_lo_u32 v10, s9, v4
+; GISEL-NEXT:    v_mul_lo_u32 v11, s8, v9
+; GISEL-NEXT:    v_mul_hi_u32 v13, s8, v4
+; GISEL-NEXT:    v_mul_lo_u32 v12, s8, v4
+; GISEL-NEXT:    v_add_i32_e64 v5, s[4:5], v5, v8
+; GISEL-NEXT:    v_add_i32_e64 v10, s[4:5], v10, v11
+; GISEL-NEXT:    v_add_i32_e64 v10, s[4:5], v10, v13
+; GISEL-NEXT:    v_mul_lo_u32 v11, v9, v12
+; GISEL-NEXT:    v_mul_lo_u32 v13, v4, v10
+; GISEL-NEXT:    v_mul_hi_u32 v8, v4, v12
+; GISEL-NEXT:    v_mul_hi_u32 v12, v9, v12
+; GISEL-NEXT:    v_xor_b32_e32 v3, v3, v7
+; GISEL-NEXT:    v_add_i32_e64 v11, s[4:5], v11, v13
+; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, 1, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v8, s[4:5], v11, v8
+; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, 1, s[4:5]
+; GISEL-NEXT:    v_mul_lo_u32 v11, v9, v10
+; GISEL-NEXT:    v_add_i32_e64 v8, s[4:5], v13, v8
+; GISEL-NEXT:    v_mul_hi_u32 v13, v4, v10
+; GISEL-NEXT:    v_mul_hi_u32 v9, v9, v10
+; GISEL-NEXT:    v_add_i32_e64 v11, s[4:5], v11, v12
+; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v11, s[4:5], v11, v13
+; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, 1, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v12, s[4:5], v12, v13
+; GISEL-NEXT:    v_add_i32_e64 v8, s[4:5], v11, v8
+; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v10, s[4:5], v12, v11
+; GISEL-NEXT:    v_add_i32_e64 v9, s[4:5], v9, v10
+; GISEL-NEXT:    v_addc_u32_e32 v5, vcc, v5, v9, vcc
+; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v8
+; GISEL-NEXT:    v_addc_u32_e32 v5, vcc, 0, v5, vcc
+; GISEL-NEXT:    v_xor_b32_e32 v1, v1, v6
+; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v6
+; GISEL-NEXT:    v_mul_lo_u32 v8, v3, v4
+; GISEL-NEXT:    v_mul_lo_u32 v9, v2, v5
+; GISEL-NEXT:    v_subb_u32_e32 v1, vcc, v1, v6, vcc
+; GISEL-NEXT:    v_mul_hi_u32 v6, v2, v4
+; GISEL-NEXT:    v_mul_hi_u32 v4, v3, v4
+; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v9
+; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v8, v6
+; GISEL-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
+; GISEL-NEXT:    v_mul_lo_u32 v8, v3, v5
+; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v9, v6
+; GISEL-NEXT:    v_mul_hi_u32 v9, v2, v5
+; GISEL-NEXT:    v_mul_hi_u32 v5, v3, v5
+; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v8, v4
+; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v9
+; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v9
+; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v6
+; GISEL-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v8, v6
+; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v6
+; GISEL-NEXT:    v_mul_lo_u32 v6, s7, v4
+; GISEL-NEXT:    v_mul_lo_u32 v5, s6, v5
+; GISEL-NEXT:    v_mul_lo_u32 v8, s6, v4
+; GISEL-NEXT:    v_mul_hi_u32 v4, s6, v4
+; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v6, v5
+; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v5, v4
+; GISEL-NEXT:    v_sub_i32_e32 v2, vcc, v2, v8
+; GISEL-NEXT:    v_subb_u32_e64 v5, s[4:5], v3, v4, vcc
+; GISEL-NEXT:    v_sub_i32_e64 v3, s[4:5], v3, v4
+; GISEL-NEXT:    v_cmp_le_u32_e64 s[4:5], s7, v5
+; GISEL-NEXT:    v_mov_b32_e32 v4, s7
+; GISEL-NEXT:    v_cndmask_b32_e64 v6, 0, -1, s[4:5]
+; GISEL-NEXT:    v_cmp_le_u32_e64 s[4:5], s6, v2
+; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, -1, s[4:5]
+; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], s7, v5
+; GISEL-NEXT:    v_subb_u32_e32 v3, vcc, v3, v4, vcc
+; GISEL-NEXT:    v_cndmask_b32_e64 v6, v6, v8, s[4:5]
+; GISEL-NEXT:    v_subrev_i32_e32 v8, vcc, s6, v2
+; GISEL-NEXT:    v_subbrev_u32_e64 v9, s[4:5], 0, v3, vcc
+; GISEL-NEXT:    v_cmp_le_u32_e64 s[4:5], s7, v9
+; GISEL-NEXT:    v_subb_u32_e32 v3, vcc, v3, v4, vcc
+; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, -1, s[4:5]
+; GISEL-NEXT:    v_cmp_le_u32_e64 s[4:5], s6, v8
+; GISEL-NEXT:    v_subrev_i32_e32 v4, vcc, s6, v8
+; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, -1, s[4:5]
+; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], s7, v9
+; GISEL-NEXT:    v_cndmask_b32_e64 v10, v10, v11, s[4:5]
+; GISEL-NEXT:    v_subbrev_u32_e32 v3, vcc, 0, v3, vcc
+; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v10
+; GISEL-NEXT:    v_cndmask_b32_e32 v4, v8, v4, vcc
+; GISEL-NEXT:    v_cndmask_b32_e32 v3, v9, v3, vcc
+; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v6
+; GISEL-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
+; GISEL-NEXT:    v_cndmask_b32_e32 v3, v5, v3, vcc
+; GISEL-NEXT:    v_xor_b32_e32 v2, v2, v7
+; GISEL-NEXT:    v_xor_b32_e32 v3, v3, v7
+; GISEL-NEXT:    v_sub_i32_e32 v2, vcc, v2, v7
+; GISEL-NEXT:    v_subb_u32_e32 v3, vcc, v3, v7, vcc
+; GISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; CGP-LABEL: v_srem_v2i64_oddk_denom:
+; CGP:       ; %bb.0:
+; CGP-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CGP-NEXT:    s_mov_b32 s6, 0x12d8fb
+; CGP-NEXT:    v_cvt_f32_u32_e32 v4, s6
+; CGP-NEXT:    v_cvt_f32_u32_e32 v5, 0
+; CGP-NEXT:    s_mov_b32 s7, 0xffed2705
+; CGP-NEXT:    v_ashrrev_i32_e32 v6, 31, v1
+; CGP-NEXT:    v_mov_b32_e32 v7, v4
+; CGP-NEXT:    v_mac_f32_e32 v7, 0x4f800000, v5
+; CGP-NEXT:    v_rcp_iflag_f32_e32 v7, v7
+; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v6
+; CGP-NEXT:    v_addc_u32_e32 v1, vcc, v1, v6, vcc
+; CGP-NEXT:    v_xor_b32_e32 v0, v0, v6
+; CGP-NEXT:    v_mul_f32_e32 v7, 0x5f7ffffc, v7
+; CGP-NEXT:    v_mul_f32_e32 v8, 0x2f800000, v7
+; CGP-NEXT:    v_trunc_f32_e32 v8, v8
+; CGP-NEXT:    v_mac_f32_e32 v7, 0xcf800000, v8
+; CGP-NEXT:    v_cvt_u32_f32_e32 v7, v7
+; CGP-NEXT:    v_cvt_u32_f32_e32 v8, v8
+; CGP-NEXT:    v_xor_b32_e32 v1, v1, v6
+; CGP-NEXT:    v_mac_f32_e32 v4, 0x4f800000, v5
+; CGP-NEXT:    v_mul_lo_u32 v9, -1, v7
+; CGP-NEXT:    v_mul_lo_u32 v10, s7, v8
+; CGP-NEXT:    v_mul_hi_u32 v12, s7, v7
+; CGP-NEXT:    v_mul_lo_u32 v11, s7, v7
+; CGP-NEXT:    v_rcp_iflag_f32_e32 v4, v4
+; CGP-NEXT:    v_add_i32_e32 v9, vcc, v9, v10
+; CGP-NEXT:    v_add_i32_e32 v9, vcc, v9, v12
+; CGP-NEXT:    v_mul_lo_u32 v10, v8, v11
+; CGP-NEXT:    v_mul_lo_u32 v12, v7, v9
+; CGP-NEXT:    v_mul_hi_u32 v13, v7, v11
+; CGP-NEXT:    v_mul_hi_u32 v11, v8, v11
+; CGP-NEXT:    v_mul_f32_e32 v4, 0x5f7ffffc, v4
+; CGP-NEXT:    v_add_i32_e32 v10, vcc, v10, v12
+; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v10, vcc, v10, v13
+; CGP-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
+; CGP-NEXT:    v_mul_lo_u32 v13, v8, v9
+; CGP-NEXT:    v_add_i32_e32 v10, vcc, v12, v10
+; CGP-NEXT:    v_mul_hi_u32 v12, v7, v9
+; CGP-NEXT:    v_mul_hi_u32 v9, v8, v9
+; CGP-NEXT:    v_add_i32_e32 v11, vcc, v13, v11
+; CGP-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v11, vcc, v11, v12
+; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v12, vcc, v13, v12
+; CGP-NEXT:    v_add_i32_e32 v10, vcc, v11, v10
+; CGP-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v11, vcc, v12, v11
+; CGP-NEXT:    v_add_i32_e32 v9, vcc, v9, v11
+; CGP-NEXT:    v_add_i32_e32 v7, vcc, v7, v10
+; CGP-NEXT:    v_addc_u32_e64 v10, s[4:5], v8, v9, vcc
+; CGP-NEXT:    v_mul_lo_u32 v11, -1, v7
+; CGP-NEXT:    v_mul_lo_u32 v12, s7, v10
+; CGP-NEXT:    v_mul_hi_u32 v14, s7, v7
+; CGP-NEXT:    v_mul_lo_u32 v13, s7, v7
+; CGP-NEXT:    v_add_i32_e64 v8, s[4:5], v8, v9
+; CGP-NEXT:    v_add_i32_e64 v11, s[4:5], v11, v12
+; CGP-NEXT:    v_add_i32_e64 v11, s[4:5], v11, v14
+; CGP-NEXT:    v_mul_lo_u32 v12, v10, v13
+; CGP-NEXT:    v_mul_lo_u32 v14, v7, v11
+; CGP-NEXT:    v_mul_hi_u32 v9, v7, v13
+; CGP-NEXT:    v_mul_hi_u32 v13, v10, v13
+; CGP-NEXT:    v_ashrrev_i32_e32 v5, 31, v3
+; CGP-NEXT:    v_add_i32_e64 v12, s[4:5], v12, v14
+; CGP-NEXT:    v_cndmask_b32_e64 v14, 0, 1, s[4:5]
+; CGP-NEXT:    v_add_i32_e64 v9, s[4:5], v12, v9
+; CGP-NEXT:    v_cndmask_b32_e64 v9, 0, 1, s[4:5]
+; CGP-NEXT:    v_mul_lo_u32 v12, v10, v11
+; CGP-NEXT:    v_add_i32_e64 v9, s[4:5], v14, v9
+; CGP-NEXT:    v_mul_hi_u32 v14, v7, v11
+; CGP-NEXT:    v_mul_hi_u32 v10, v10, v11
+; CGP-NEXT:    v_add_i32_e64 v12, s[4:5], v12, v13
+; CGP-NEXT:    v_cndmask_b32_e64 v13, 0, 1, s[4:5]
+; CGP-NEXT:    v_add_i32_e64 v12, s[4:5], v12, v14
+; CGP-NEXT:    v_cndmask_b32_e64 v14, 0, 1, s[4:5]
+; CGP-NEXT:    v_add_i32_e64 v13, s[4:5], v13, v14
+; CGP-NEXT:    v_add_i32_e64 v9, s[4:5], v12, v9
+; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, 1, s[4:5]
+; CGP-NEXT:    v_add_i32_e64 v11, s[4:5], v13, v12
+; CGP-NEXT:    v_add_i32_e64 v10, s[4:5], v10, v11
+; CGP-NEXT:    v_addc_u32_e32 v8, vcc, v8, v10, vcc
+; CGP-NEXT:    v_add_i32_e32 v7, vcc, v7, v9
+; CGP-NEXT:    v_addc_u32_e32 v8, vcc, 0, v8, vcc
+; CGP-NEXT:    v_mul_lo_u32 v9, v1, v7
+; CGP-NEXT:    v_mul_lo_u32 v10, v0, v8
+; CGP-NEXT:    v_mul_hi_u32 v11, v0, v7
+; CGP-NEXT:    v_mul_hi_u32 v7, v1, v7
+; CGP-NEXT:    v_add_i32_e32 v9, vcc, v9, v10
+; CGP-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v9, vcc, v9, v11
+; CGP-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
+; CGP-NEXT:    v_mul_lo_u32 v11, v1, v8
+; CGP-NEXT:    v_add_i32_e32 v9, vcc, v10, v9
+; CGP-NEXT:    v_mul_hi_u32 v10, v0, v8
+; CGP-NEXT:    v_mul_hi_u32 v8, v1, v8
+; CGP-NEXT:    v_add_i32_e32 v7, vcc, v11, v7
+; CGP-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v7, vcc, v7, v10
+; CGP-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v10, vcc, v11, v10
+; CGP-NEXT:    v_add_i32_e32 v7, vcc, v7, v9
+; CGP-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v9, vcc, v10, v9
+; CGP-NEXT:    v_add_i32_e32 v8, vcc, v8, v9
+; CGP-NEXT:    v_mul_lo_u32 v9, 0, v7
+; CGP-NEXT:    v_mul_lo_u32 v8, s6, v8
+; CGP-NEXT:    v_mul_lo_u32 v10, s6, v7
+; CGP-NEXT:    v_mul_hi_u32 v7, s6, v7
+; CGP-NEXT:    v_add_i32_e32 v8, vcc, v9, v8
+; CGP-NEXT:    v_add_i32_e32 v7, vcc, v8, v7
+; CGP-NEXT:    v_sub_i32_e32 v0, vcc, v0, v10
+; CGP-NEXT:    v_subb_u32_e64 v8, s[4:5], v1, v7, vcc
+; CGP-NEXT:    v_sub_i32_e64 v1, s[4:5], v1, v7
+; CGP-NEXT:    v_cmp_le_u32_e64 s[4:5], 0, v8
+; CGP-NEXT:    v_cndmask_b32_e64 v7, 0, -1, s[4:5]
+; CGP-NEXT:    v_cmp_le_u32_e64 s[4:5], s6, v0
+; CGP-NEXT:    v_cndmask_b32_e64 v9, 0, -1, s[4:5]
+; CGP-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v8
+; CGP-NEXT:    v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
+; CGP-NEXT:    v_cndmask_b32_e64 v7, v7, v9, s[4:5]
+; CGP-NEXT:    v_subrev_i32_e32 v9, vcc, s6, v0
+; CGP-NEXT:    v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
+; CGP-NEXT:    v_cmp_le_u32_e32 vcc, 0, v1
+; CGP-NEXT:    v_cndmask_b32_e64 v10, 0, -1, vcc
+; CGP-NEXT:    v_cmp_le_u32_e32 vcc, s6, v9
+; CGP-NEXT:    v_cndmask_b32_e64 v11, 0, -1, vcc
+; CGP-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v1
+; CGP-NEXT:    v_cndmask_b32_e32 v10, v10, v11, vcc
+; CGP-NEXT:    v_subrev_i32_e32 v11, vcc, s6, v9
+; CGP-NEXT:    v_subbrev_u32_e32 v12, vcc, 0, v1, vcc
+; CGP-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v10
+; CGP-NEXT:    v_cndmask_b32_e32 v9, v9, v11, vcc
+; CGP-NEXT:    v_cndmask_b32_e32 v1, v1, v12, vcc
+; CGP-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v7
+; CGP-NEXT:    v_mul_f32_e32 v7, 0x2f800000, v4
+; CGP-NEXT:    v_trunc_f32_e32 v7, v7
+; CGP-NEXT:    v_mac_f32_e32 v4, 0xcf800000, v7
+; CGP-NEXT:    v_cvt_u32_f32_e32 v4, v4
+; CGP-NEXT:    v_cvt_u32_f32_e32 v7, v7
+; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v9, vcc
+; CGP-NEXT:    v_cndmask_b32_e32 v1, v8, v1, vcc
+; CGP-NEXT:    v_mul_lo_u32 v8, -1, v4
+; CGP-NEXT:    v_mul_lo_u32 v9, s7, v7
+; CGP-NEXT:    v_mul_hi_u32 v11, s7, v4
+; CGP-NEXT:    v_mul_lo_u32 v10, s7, v4
+; CGP-NEXT:    v_add_i32_e32 v2, vcc, v2, v5
+; CGP-NEXT:    v_addc_u32_e32 v3, vcc, v3, v5, vcc
+; CGP-NEXT:    v_add_i32_e32 v8, vcc, v8, v9
+; CGP-NEXT:    v_add_i32_e32 v8, vcc, v8, v11
+; CGP-NEXT:    v_mul_lo_u32 v9, v7, v10
+; CGP-NEXT:    v_mul_lo_u32 v11, v4, v8
+; CGP-NEXT:    v_mul_hi_u32 v12, v4, v10
+; CGP-NEXT:    v_mul_hi_u32 v10, v7, v10
+; CGP-NEXT:    v_xor_b32_e32 v0, v0, v6
+; CGP-NEXT:    v_add_i32_e32 v9, vcc, v9, v11
+; CGP-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v9, vcc, v9, v12
+; CGP-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
+; CGP-NEXT:    v_mul_lo_u32 v12, v7, v8
+; CGP-NEXT:    v_add_i32_e32 v9, vcc, v11, v9
+; CGP-NEXT:    v_mul_hi_u32 v11, v4, v8
+; CGP-NEXT:    v_mul_hi_u32 v8, v7, v8
+; CGP-NEXT:    v_add_i32_e32 v10, vcc, v12, v10
+; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v10, vcc, v10, v11
+; CGP-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v11, vcc, v12, v11
+; CGP-NEXT:    v_add_i32_e32 v9, vcc, v10, v9
+; CGP-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v10, vcc, v11, v10
+; CGP-NEXT:    v_add_i32_e32 v8, vcc, v8, v10
+; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v9
+; CGP-NEXT:    v_addc_u32_e64 v9, s[4:5], v7, v8, vcc
+; CGP-NEXT:    v_mul_lo_u32 v10, -1, v4
+; CGP-NEXT:    v_mul_lo_u32 v11, s7, v9
+; CGP-NEXT:    v_mul_hi_u32 v13, s7, v4
+; CGP-NEXT:    v_mul_lo_u32 v12, s7, v4
+; CGP-NEXT:    v_add_i32_e64 v7, s[4:5], v7, v8
+; CGP-NEXT:    v_add_i32_e64 v10, s[4:5], v10, v11
+; CGP-NEXT:    v_add_i32_e64 v10, s[4:5], v10, v13
+; CGP-NEXT:    v_mul_lo_u32 v11, v9, v12
+; CGP-NEXT:    v_mul_lo_u32 v13, v4, v10
+; CGP-NEXT:    v_mul_hi_u32 v8, v4, v12
+; CGP-NEXT:    v_mul_hi_u32 v12, v9, v12
+; CGP-NEXT:    v_xor_b32_e32 v2, v2, v5
+; CGP-NEXT:    v_add_i32_e64 v11, s[4:5], v11, v13
+; CGP-NEXT:    v_cndmask_b32_e64 v13, 0, 1, s[4:5]
+; CGP-NEXT:    v_add_i32_e64 v8, s[4:5], v11, v8
+; CGP-NEXT:    v_cndmask_b32_e64 v8, 0, 1, s[4:5]
+; CGP-NEXT:    v_mul_lo_u32 v11, v9, v10
+; CGP-NEXT:    v_add_i32_e64 v8, s[4:5], v13, v8
+; CGP-NEXT:    v_mul_hi_u32 v13, v4, v10
+; CGP-NEXT:    v_mul_hi_u32 v9, v9, v10
+; CGP-NEXT:    v_add_i32_e64 v11, s[4:5], v11, v12
+; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, 1, s[4:5]
+; CGP-NEXT:    v_add_i32_e64 v11, s[4:5], v11, v13
+; CGP-NEXT:    v_cndmask_b32_e64 v13, 0, 1, s[4:5]
+; CGP-NEXT:    v_add_i32_e64 v12, s[4:5], v12, v13
+; CGP-NEXT:    v_add_i32_e64 v8, s[4:5], v11, v8
+; CGP-NEXT:    v_cndmask_b32_e64 v11, 0, 1, s[4:5]
+; CGP-NEXT:    v_add_i32_e64 v10, s[4:5], v12, v11
+; CGP-NEXT:    v_add_i32_e64 v9, s[4:5], v9, v10
+; CGP-NEXT:    v_addc_u32_e32 v7, vcc, v7, v9, vcc
+; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v8
+; CGP-NEXT:    v_addc_u32_e32 v7, vcc, 0, v7, vcc
+; CGP-NEXT:    v_xor_b32_e32 v3, v3, v5
+; CGP-NEXT:    v_xor_b32_e32 v1, v1, v6
+; CGP-NEXT:    v_sub_i32_e32 v0, vcc, v0, v6
+; CGP-NEXT:    v_mul_lo_u32 v8, v3, v4
+; CGP-NEXT:    v_mul_lo_u32 v9, v2, v7
+; CGP-NEXT:    v_subb_u32_e32 v1, vcc, v1, v6, vcc
+; CGP-NEXT:    v_mul_hi_u32 v6, v2, v4
+; CGP-NEXT:    v_mul_hi_u32 v4, v3, v4
+; CGP-NEXT:    v_add_i32_e32 v8, vcc, v8, v9
+; CGP-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v6, vcc, v8, v6
+; CGP-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
+; CGP-NEXT:    v_mul_lo_u32 v8, v3, v7
+; CGP-NEXT:    v_add_i32_e32 v6, vcc, v9, v6
+; CGP-NEXT:    v_mul_hi_u32 v9, v2, v7
+; CGP-NEXT:    v_mul_hi_u32 v7, v3, v7
+; CGP-NEXT:    v_add_i32_e32 v4, vcc, v8, v4
+; CGP-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v9
+; CGP-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v8, vcc, v8, v9
+; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v6
+; CGP-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v6, vcc, v8, v6
+; CGP-NEXT:    v_add_i32_e32 v6, vcc, v7, v6
+; CGP-NEXT:    v_mul_lo_u32 v7, 0, v4
+; CGP-NEXT:    v_mul_lo_u32 v6, s6, v6
+; CGP-NEXT:    v_mul_lo_u32 v8, s6, v4
+; CGP-NEXT:    v_mul_hi_u32 v4, s6, v4
+; CGP-NEXT:    v_add_i32_e32 v6, vcc, v7, v6
+; CGP-NEXT:    v_add_i32_e32 v4, vcc, v6, v4
+; CGP-NEXT:    v_sub_i32_e32 v2, vcc, v2, v8
+; CGP-NEXT:    v_subb_u32_e64 v6, s[4:5], v3, v4, vcc
+; CGP-NEXT:    v_sub_i32_e64 v3, s[4:5], v3, v4
+; CGP-NEXT:    v_cmp_le_u32_e64 s[4:5], 0, v6
+; CGP-NEXT:    v_cndmask_b32_e64 v4, 0, -1, s[4:5]
+; CGP-NEXT:    v_cmp_le_u32_e64 s[4:5], s6, v2
+; CGP-NEXT:    v_cndmask_b32_e64 v7, 0, -1, s[4:5]
+; CGP-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v6
+; CGP-NEXT:    v_subbrev_u32_e32 v3, vcc, 0, v3, vcc
+; CGP-NEXT:    v_cndmask_b32_e64 v4, v4, v7, s[4:5]
+; CGP-NEXT:    v_subrev_i32_e32 v7, vcc, s6, v2
+; CGP-NEXT:    v_subbrev_u32_e32 v3, vcc, 0, v3, vcc
+; CGP-NEXT:    v_cmp_le_u32_e32 vcc, 0, v3
+; CGP-NEXT:    v_cndmask_b32_e64 v8, 0, -1, vcc
+; CGP-NEXT:    v_cmp_le_u32_e32 vcc, s6, v7
+; CGP-NEXT:    v_cndmask_b32_e64 v9, 0, -1, vcc
+; CGP-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v3
+; CGP-NEXT:    v_cndmask_b32_e32 v8, v8, v9, vcc
+; CGP-NEXT:    v_subrev_i32_e32 v9, vcc, s6, v7
+; CGP-NEXT:    v_subbrev_u32_e32 v10, vcc, 0, v3, vcc
+; CGP-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v8
+; CGP-NEXT:    v_cndmask_b32_e32 v7, v7, v9, vcc
+; CGP-NEXT:    v_cndmask_b32_e32 v3, v3, v10, vcc
+; CGP-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v4
+; CGP-NEXT:    v_cndmask_b32_e32 v2, v2, v7, vcc
+; CGP-NEXT:    v_cndmask_b32_e32 v3, v6, v3, vcc
+; CGP-NEXT:    v_xor_b32_e32 v2, v2, v5
+; CGP-NEXT:    v_xor_b32_e32 v3, v3, v5
+; CGP-NEXT:    v_sub_i32_e32 v2, vcc, v2, v5
+; CGP-NEXT:    v_subb_u32_e32 v3, vcc, v3, v5, vcc
+; CGP-NEXT:    s_setpc_b64 s[30:31]
+  %result = srem <2 x i64> %num, <i64 1235195, i64 1235195>
+  ret <2 x i64> %result
+}
+
+define i64 @v_srem_i64_pow2_shl_denom(i64 %x, i64 %y) {
+; CHECK-LABEL: v_srem_i64_pow2_shl_denom:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT:    s_movk_i32 s4, 0x1000
+; CHECK-NEXT:    s_mov_b32 s5, 0
+; CHECK-NEXT:    v_lshl_b64 v[4:5], s[4:5], v2
+; CHECK-NEXT:    v_mov_b32_e32 v2, 0
+; CHECK-NEXT:    v_or_b32_e32 v3, v1, v5
+; CHECK-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[2:3]
+; CHECK-NEXT:    v_cmp_ne_u32_e64 s[4:5], 0, 1
+; CHECK-NEXT:    s_xor_b64 s[4:5], vcc, s[4:5]
+; CHECK-NEXT:    ; implicit-def: $vgpr2_vgpr3
+; CHECK-NEXT:    s_and_saveexec_b64 s[6:7], s[4:5]
+; CHECK-NEXT:    s_xor_b64 s[6:7], exec, s[6:7]
+; CHECK-NEXT:    s_cbranch_execz BB7_2
+; CHECK-NEXT:  ; %bb.1:
+; CHECK-NEXT:    v_ashrrev_i32_e32 v2, 31, v5
+; CHECK-NEXT:    v_add_i32_e32 v3, vcc, v4, v2
+; CHECK-NEXT:    v_addc_u32_e32 v5, vcc, v5, v2, vcc
+; CHECK-NEXT:    v_xor_b32_e32 v3, v3, v2
+; CHECK-NEXT:    v_xor_b32_e32 v2, v5, v2
+; CHECK-NEXT:    v_cvt_f32_u32_e32 v5, v3
+; CHECK-NEXT:    v_cvt_f32_u32_e32 v6, v2
+; CHECK-NEXT:    v_ashrrev_i32_e32 v7, 31, v1
+; CHECK-NEXT:    v_mac_f32_e32 v5, 0x4f800000, v6
+; CHECK-NEXT:    v_rcp_iflag_f32_e32 v5, v5
+; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v0, v7
+; CHECK-NEXT:    v_addc_u32_e32 v1, vcc, v1, v7, vcc
+; CHECK-NEXT:    v_sub_i32_e32 v9, vcc, 0, v3
+; CHECK-NEXT:    v_mul_f32_e32 v5, 0x5f7ffffc, v5
+; CHECK-NEXT:    v_mul_f32_e32 v8, 0x2f800000, v5
+; CHECK-NEXT:    v_trunc_f32_e32 v8, v8
+; CHECK-NEXT:    v_mac_f32_e32 v5, 0xcf800000, v8
+; CHECK-NEXT:    v_cvt_u32_f32_e32 v5, v5
+; CHECK-NEXT:    v_cvt_u32_f32_e32 v8, v8
+; CHECK-NEXT:    v_subb_u32_e32 v10, vcc, 0, v2, vcc
+; CHECK-NEXT:    v_xor_b32_e32 v6, v6, v7
+; CHECK-NEXT:    v_mul_lo_u32 v11, v10, v5
+; CHECK-NEXT:    v_mul_lo_u32 v12, v9, v8
+; CHECK-NEXT:    v_mul_hi_u32 v14, v9, v5
+; CHECK-NEXT:    v_mul_lo_u32 v13, v9, v5
+; CHECK-NEXT:    v_xor_b32_e32 v1, v1, v7
+; CHECK-NEXT:    v_add_i32_e32 v11, vcc, v11, v12
+; CHECK-NEXT:    v_add_i32_e32 v11, vcc, v11, v14
+; CHECK-NEXT:    v_mul_lo_u32 v12, v8, v13
+; CHECK-NEXT:    v_mul_lo_u32 v14, v5, v11
+; CHECK-NEXT:    v_mul_hi_u32 v15, v5, v13
+; CHECK-NEXT:    v_mul_hi_u32 v13, v8, v13
+; CHECK-NEXT:    v_add_i32_e32 v12, vcc, v12, v14
+; CHECK-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v12, vcc, v12, v15
+; CHECK-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
+; CHECK-NEXT:    v_mul_lo_u32 v15, v8, v11
+; CHECK-NEXT:    v_add_i32_e32 v12, vcc, v14, v12
+; CHECK-NEXT:    v_mul_hi_u32 v14, v5, v11
+; CHECK-NEXT:    v_mul_hi_u32 v11, v8, v11
+; CHECK-NEXT:    v_add_i32_e32 v13, vcc, v15, v13
+; CHECK-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v13, vcc, v13, v14
+; CHECK-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v14, vcc, v15, v14
+; CHECK-NEXT:    v_add_i32_e32 v12, vcc, v13, v12
+; CHECK-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v13, vcc, v14, v13
+; CHECK-NEXT:    v_add_i32_e32 v11, vcc, v11, v13
+; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v5, v12
+; CHECK-NEXT:    v_addc_u32_e64 v12, s[4:5], v8, v11, vcc
+; CHECK-NEXT:    v_mul_lo_u32 v10, v10, v5
+; CHECK-NEXT:    v_mul_lo_u32 v13, v9, v12
+; CHECK-NEXT:    v_mul_lo_u32 v14, v9, v5
+; CHECK-NEXT:    v_mul_hi_u32 v9, v9, v5
+; CHECK-NEXT:    v_add_i32_e64 v8, s[4:5], v8, v11
+; CHECK-NEXT:    v_add_i32_e64 v10, s[4:5], v10, v13
+; CHECK-NEXT:    v_mul_hi_u32 v11, v5, v14
+; CHECK-NEXT:    v_add_i32_e64 v9, s[4:5], v10, v9
+; CHECK-NEXT:    v_mul_lo_u32 v10, v12, v14
+; CHECK-NEXT:    v_mul_lo_u32 v13, v5, v9
+; CHECK-NEXT:    v_mul_hi_u32 v14, v12, v14
+; CHECK-NEXT:    v_add_i32_e64 v10, s[4:5], v10, v13
+; CHECK-NEXT:    v_cndmask_b32_e64 v13, 0, 1, s[4:5]
+; CHECK-NEXT:    v_add_i32_e64 v10, s[4:5], v10, v11
+; CHECK-NEXT:    v_cndmask_b32_e64 v10, 0, 1, s[4:5]
+; CHECK-NEXT:    v_mul_lo_u32 v11, v12, v9
+; CHECK-NEXT:    v_add_i32_e64 v10, s[4:5], v13, v10
+; CHECK-NEXT:    v_mul_hi_u32 v13, v5, v9
+; CHECK-NEXT:    v_mul_hi_u32 v9, v12, v9
+; CHECK-NEXT:    v_add_i32_e64 v11, s[4:5], v11, v14
+; CHECK-NEXT:    v_cndmask_b32_e64 v14, 0, 1, s[4:5]
+; CHECK-NEXT:    v_add_i32_e64 v11, s[4:5], v11, v13
+; CHECK-NEXT:    v_cndmask_b32_e64 v13, 0, 1, s[4:5]
+; CHECK-NEXT:    v_add_i32_e64 v13, s[4:5], v14, v13
+; CHECK-NEXT:    v_add_i32_e64 v10, s[4:5], v11, v10
+; CHECK-NEXT:    v_cndmask_b32_e64 v11, 0, 1, s[4:5]
+; CHECK-NEXT:    v_add_i32_e64 v11, s[4:5], v13, v11
+; CHECK-NEXT:    v_add_i32_e64 v9, s[4:5], v9, v11
+; CHECK-NEXT:    v_addc_u32_e32 v8, vcc, v8, v9, vcc
+; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v5, v10
+; CHECK-NEXT:    v_addc_u32_e32 v8, vcc, 0, v8, vcc
+; CHECK-NEXT:    v_mul_lo_u32 v9, v1, v5
+; CHECK-NEXT:    v_mul_lo_u32 v10, v6, v8
+; CHECK-NEXT:    v_mul_hi_u32 v11, v6, v5
+; CHECK-NEXT:    v_mul_hi_u32 v5, v1, v5
+; CHECK-NEXT:    v_add_i32_e32 v9, vcc, v9, v10
+; CHECK-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v9, vcc, v9, v11
+; CHECK-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
+; CHECK-NEXT:    v_mul_lo_u32 v11, v1, v8
+; CHECK-NEXT:    v_add_i32_e32 v9, vcc, v10, v9
+; CHECK-NEXT:    v_mul_hi_u32 v10, v6, v8
+; CHECK-NEXT:    v_mul_hi_u32 v8, v1, v8
+; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v11, v5
+; CHECK-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v5, v10
+; CHECK-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v10, vcc, v11, v10
+; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v5, v9
+; CHECK-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v9, vcc, v10, v9
+; CHECK-NEXT:    v_add_i32_e32 v8, vcc, v8, v9
+; CHECK-NEXT:    v_mul_lo_u32 v9, v2, v5
+; CHECK-NEXT:    v_mul_lo_u32 v8, v3, v8
+; CHECK-NEXT:    v_mul_lo_u32 v10, v3, v5
+; CHECK-NEXT:    v_mul_hi_u32 v5, v3, v5
+; CHECK-NEXT:    v_add_i32_e32 v8, vcc, v9, v8
+; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v8, v5
+; CHECK-NEXT:    v_sub_i32_e32 v6, vcc, v6, v10
+; CHECK-NEXT:    v_subb_u32_e64 v8, s[4:5], v1, v5, vcc
+; CHECK-NEXT:    v_sub_i32_e64 v1, s[4:5], v1, v5
+; CHECK-NEXT:    v_cmp_ge_u32_e64 s[4:5], v8, v2
+; CHECK-NEXT:    v_cndmask_b32_e64 v5, 0, -1, s[4:5]
+; CHECK-NEXT:    v_cmp_ge_u32_e64 s[4:5], v6, v3
+; CHECK-NEXT:    v_cndmask_b32_e64 v9, 0, -1, s[4:5]
+; CHECK-NEXT:    v_cmp_eq_u32_e64 s[4:5], v8, v2
+; CHECK-NEXT:    v_subb_u32_e32 v1, vcc, v1, v2, vcc
+; CHECK-NEXT:    v_cndmask_b32_e64 v5, v5, v9, s[4:5]
+; CHECK-NEXT:    v_sub_i32_e32 v9, vcc, v6, v3
+; CHECK-NEXT:    v_subbrev_u32_e64 v10, s[4:5], 0, v1, vcc
+; CHECK-NEXT:    v_cmp_ge_u32_e64 s[4:5], v10, v2
+; CHECK-NEXT:    v_cndmask_b32_e64 v11, 0, -1, s[4:5]
+; CHECK-NEXT:    v_cmp_ge_u32_e64 s[4:5], v9, v3
+; CHECK-NEXT:    v_subb_u32_e32 v1, vcc, v1, v2, vcc
+; CHECK-NEXT:    v_cndmask_b32_e64 v12, 0, -1, s[4:5]
+; CHECK-NEXT:    v_cmp_eq_u32_e64 s[4:5], v10, v2
+; CHECK-NEXT:    v_sub_i32_e32 v2, vcc, v9, v3
+; CHECK-NEXT:    v_cndmask_b32_e64 v11, v11, v12, s[4:5]
+; CHECK-NEXT:    v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
+; CHECK-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v11
+; CHECK-NEXT:    v_cndmask_b32_e32 v2, v9, v2, vcc
+; CHECK-NEXT:    v_cndmask_b32_e32 v1, v10, v1, vcc
+; CHECK-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v5
+; CHECK-NEXT:    v_cndmask_b32_e32 v2, v6, v2, vcc
+; CHECK-NEXT:    v_cndmask_b32_e32 v1, v8, v1, vcc
+; CHECK-NEXT:    v_xor_b32_e32 v2, v2, v7
+; CHECK-NEXT:    v_xor_b32_e32 v1, v1, v7
+; CHECK-NEXT:    v_sub_i32_e32 v2, vcc, v2, v7
+; CHECK-NEXT:    v_subb_u32_e32 v3, vcc, v1, v7, vcc
+; CHECK-NEXT:  BB7_2: ; %Flow
+; CHECK-NEXT:    s_or_saveexec_b64 s[8:9], s[6:7]
+; CHECK-NEXT:    s_xor_b64 exec, exec, s[8:9]
+; CHECK-NEXT:    s_cbranch_execz BB7_4
+; CHECK-NEXT:  ; %bb.3:
+; CHECK-NEXT:    v_cvt_f32_u32_e32 v1, v4
+; CHECK-NEXT:    v_rcp_iflag_f32_e32 v1, v1
+; CHECK-NEXT:    v_mul_f32_e32 v1, 0x4f800000, v1
+; CHECK-NEXT:    v_cvt_u32_f32_e32 v1, v1
+; CHECK-NEXT:    v_mul_lo_u32 v2, v1, v4
+; CHECK-NEXT:    v_mul_hi_u32 v3, v1, v4
+; CHECK-NEXT:    v_sub_i32_e32 v5, vcc, 0, v2
+; CHECK-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v3
+; CHECK-NEXT:    v_cndmask_b32_e32 v2, v2, v5, vcc
+; CHECK-NEXT:    v_mul_hi_u32 v2, v2, v1
+; CHECK-NEXT:    v_add_i32_e64 v3, s[4:5], v1, v2
+; CHECK-NEXT:    v_sub_i32_e64 v1, s[4:5], v1, v2
+; CHECK-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
+; CHECK-NEXT:    v_mul_hi_u32 v1, v1, v0
+; CHECK-NEXT:    v_mul_lo_u32 v1, v1, v4
+; CHECK-NEXT:    v_sub_i32_e32 v2, vcc, v0, v1
+; CHECK-NEXT:    v_add_i32_e64 v3, s[4:5], v2, v4
+; CHECK-NEXT:    v_cmp_ge_u32_e64 s[4:5], v0, v1
+; CHECK-NEXT:    v_cmp_ge_u32_e32 vcc, v2, v4
+; CHECK-NEXT:    v_sub_i32_e64 v0, s[6:7], v2, v4
+; CHECK-NEXT:    s_and_b64 vcc, vcc, s[4:5]
+; CHECK-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
+; CHECK-NEXT:    v_cndmask_b32_e64 v2, v3, v0, s[4:5]
+; CHECK-NEXT:    v_mov_b32_e32 v3, 0
+; CHECK-NEXT:  BB7_4:
+; CHECK-NEXT:    s_or_b64 exec, exec, s[8:9]
+; CHECK-NEXT:    v_mov_b32_e32 v0, v2
+; CHECK-NEXT:    v_mov_b32_e32 v1, v3
+; CHECK-NEXT:    s_setpc_b64 s[30:31]
+  %shl.y = shl i64 4096, %y
+  %r = srem i64 %x, %shl.y
+  ret i64 %r
+}
+
+define <2 x i64> @v_srem_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) {
+; GISEL-LABEL: v_srem_v2i64_pow2_shl_denom:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-NEXT:    s_movk_i32 s6, 0x1000
+; GISEL-NEXT:    s_mov_b32 s7, 0
+; GISEL-NEXT:    v_lshl_b64 v[4:5], s[6:7], v4
+; GISEL-NEXT:    v_ashrrev_i32_e32 v9, 31, v1
+; GISEL-NEXT:    v_ashrrev_i32_e32 v7, 31, v5
+; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v7
+; GISEL-NEXT:    v_addc_u32_e32 v5, vcc, v5, v7, vcc
+; GISEL-NEXT:    v_xor_b32_e32 v5, v5, v7
+; GISEL-NEXT:    v_xor_b32_e32 v4, v4, v7
+; GISEL-NEXT:    v_cvt_f32_u32_e32 v7, v4
+; GISEL-NEXT:    v_cvt_f32_u32_e32 v8, v5
+; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v9
+; GISEL-NEXT:    v_addc_u32_e32 v1, vcc, v1, v9, vcc
+; GISEL-NEXT:    v_sub_i32_e32 v10, vcc, 0, v4
+; GISEL-NEXT:    v_mac_f32_e32 v7, 0x4f800000, v8
+; GISEL-NEXT:    v_rcp_iflag_f32_e32 v7, v7
+; GISEL-NEXT:    v_xor_b32_e32 v8, v0, v9
+; GISEL-NEXT:    v_subb_u32_e32 v11, vcc, 0, v5, vcc
+; GISEL-NEXT:    v_xor_b32_e32 v16, v1, v9
+; GISEL-NEXT:    v_mul_f32_e32 v0, 0x5f7ffffc, v7
+; GISEL-NEXT:    v_mul_f32_e32 v7, 0x2f800000, v0
+; GISEL-NEXT:    v_trunc_f32_e32 v7, v7
+; GISEL-NEXT:    v_mac_f32_e32 v0, 0xcf800000, v7
+; GISEL-NEXT:    v_cvt_u32_f32_e32 v0, v0
+; GISEL-NEXT:    v_cvt_u32_f32_e32 v7, v7
+; GISEL-NEXT:    v_mul_lo_u32 v12, v11, v0
+; GISEL-NEXT:    v_mul_lo_u32 v13, v10, v7
+; GISEL-NEXT:    v_mul_hi_u32 v15, v10, v0
+; GISEL-NEXT:    v_mul_lo_u32 v14, v10, v0
+; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v12, v13
+; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v12, v15
+; GISEL-NEXT:    v_mul_lo_u32 v13, v7, v14
+; GISEL-NEXT:    v_mul_lo_u32 v15, v0, v12
+; GISEL-NEXT:    v_mul_hi_u32 v1, v0, v14
+; GISEL-NEXT:    v_mul_hi_u32 v14, v7, v14
+; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v13, v15
+; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v13, v1
+; GISEL-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
+; GISEL-NEXT:    v_mul_lo_u32 v13, v7, v12
+; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v15, v1
+; GISEL-NEXT:    v_mul_hi_u32 v15, v0, v12
+; GISEL-NEXT:    v_mul_hi_u32 v12, v7, v12
+; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v13, v14
+; GISEL-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v13, v15
+; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v14, vcc, v14, v15
+; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v13, v1
+; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v14, v13
+; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v12, v13
+; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v1
+; GISEL-NEXT:    v_addc_u32_e64 v1, s[4:5], v7, v12, vcc
+; GISEL-NEXT:    v_mul_lo_u32 v11, v11, v0
+; GISEL-NEXT:    v_mul_lo_u32 v13, v10, v1
+; GISEL-NEXT:    v_mul_lo_u32 v14, v10, v0
+; GISEL-NEXT:    v_mul_hi_u32 v10, v10, v0
+; GISEL-NEXT:    v_add_i32_e64 v7, s[4:5], v7, v12
+; GISEL-NEXT:    v_add_i32_e64 v11, s[4:5], v11, v13
+; GISEL-NEXT:    v_mul_hi_u32 v12, v0, v14
+; GISEL-NEXT:    v_add_i32_e64 v10, s[4:5], v11, v10
+; GISEL-NEXT:    v_mul_lo_u32 v11, v1, v14
+; GISEL-NEXT:    v_mul_lo_u32 v13, v0, v10
+; GISEL-NEXT:    v_mul_hi_u32 v14, v1, v14
+; GISEL-NEXT:    v_add_i32_e64 v11, s[4:5], v11, v13
+; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, 1, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v11, s[4:5], v11, v12
+; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, s[4:5]
+; GISEL-NEXT:    v_mul_lo_u32 v12, v1, v10
+; GISEL-NEXT:    v_add_i32_e64 v11, s[4:5], v13, v11
+; GISEL-NEXT:    v_mul_hi_u32 v13, v0, v10
+; GISEL-NEXT:    v_mul_hi_u32 v1, v1, v10
+; GISEL-NEXT:    v_add_i32_e64 v12, s[4:5], v12, v14
+; GISEL-NEXT:    v_cndmask_b32_e64 v14, 0, 1, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v12, s[4:5], v12, v13
+; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, 1, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v13, s[4:5], v14, v13
+; GISEL-NEXT:    v_add_i32_e64 v11, s[4:5], v12, v11
+; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v10, s[4:5], v13, v12
+; GISEL-NEXT:    v_add_i32_e64 v1, s[4:5], v1, v10
+; GISEL-NEXT:    v_addc_u32_e32 v1, vcc, v7, v1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v0, v11
+; GISEL-NEXT:    v_addc_u32_e32 v10, vcc, 0, v1, vcc
+; GISEL-NEXT:    v_mul_lo_u32 v11, v16, v7
+; GISEL-NEXT:    v_mul_lo_u32 v12, v8, v10
+; GISEL-NEXT:    v_lshl_b64 v[0:1], s[6:7], v6
+; GISEL-NEXT:    v_mul_hi_u32 v6, v8, v7
+; GISEL-NEXT:    v_mul_hi_u32 v7, v16, v7
+; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v11, v12
+; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v11, v6
+; GISEL-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
+; GISEL-NEXT:    v_mul_lo_u32 v11, v16, v10
+; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v12, v6
+; GISEL-NEXT:    v_mul_hi_u32 v12, v8, v10
+; GISEL-NEXT:    v_mul_hi_u32 v10, v16, v10
+; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v11, v7
+; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v7, v12
+; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v11, v12
+; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v7, v6
+; GISEL-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v11, v7
+; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v10, v7
+; GISEL-NEXT:    v_mul_lo_u32 v10, v5, v6
+; GISEL-NEXT:    v_mul_lo_u32 v7, v4, v7
+; GISEL-NEXT:    v_mul_lo_u32 v11, v4, v6
+; GISEL-NEXT:    v_mul_hi_u32 v6, v4, v6
+; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v10, v7
+; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v7, v6
+; GISEL-NEXT:    v_sub_i32_e32 v7, vcc, v8, v11
+; GISEL-NEXT:    v_subb_u32_e64 v8, s[4:5], v16, v6, vcc
+; GISEL-NEXT:    v_sub_i32_e64 v6, s[4:5], v16, v6
+; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v8, v5
+; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, -1, s[4:5]
+; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v7, v4
+; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, -1, s[4:5]
+; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], v8, v5
+; GISEL-NEXT:    v_subb_u32_e32 v6, vcc, v6, v5, vcc
+; GISEL-NEXT:    v_cndmask_b32_e64 v10, v10, v11, s[4:5]
+; GISEL-NEXT:    v_sub_i32_e32 v11, vcc, v7, v4
+; GISEL-NEXT:    v_subbrev_u32_e64 v12, s[4:5], 0, v6, vcc
+; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v12, v5
+; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, -1, s[4:5]
+; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v11, v4
+; GISEL-NEXT:    v_cndmask_b32_e64 v14, 0, -1, s[4:5]
+; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], v12, v5
+; GISEL-NEXT:    v_subb_u32_e32 v5, vcc, v6, v5, vcc
+; GISEL-NEXT:    v_sub_i32_e32 v4, vcc, v11, v4
+; GISEL-NEXT:    v_cndmask_b32_e64 v13, v13, v14, s[4:5]
+; GISEL-NEXT:    v_subbrev_u32_e32 v5, vcc, 0, v5, vcc
+; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v13
+; GISEL-NEXT:    v_cndmask_b32_e32 v4, v11, v4, vcc
+; GISEL-NEXT:    v_cndmask_b32_e32 v5, v12, v5, vcc
+; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v10
+; GISEL-NEXT:    v_ashrrev_i32_e32 v6, 31, v1
+; GISEL-NEXT:    v_cndmask_b32_e32 v5, v8, v5, vcc
+; GISEL-NEXT:    v_cndmask_b32_e32 v4, v7, v4, vcc
+; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v6
+; GISEL-NEXT:    v_addc_u32_e32 v1, vcc, v1, v6, vcc
+; GISEL-NEXT:    v_xor_b32_e32 v7, v0, v6
+; GISEL-NEXT:    v_xor_b32_e32 v6, v1, v6
+; GISEL-NEXT:    v_cvt_f32_u32_e32 v0, v7
+; GISEL-NEXT:    v_cvt_f32_u32_e32 v1, v6
+; GISEL-NEXT:    v_ashrrev_i32_e32 v8, 31, v3
+; GISEL-NEXT:    v_xor_b32_e32 v4, v4, v9
+; GISEL-NEXT:    v_xor_b32_e32 v5, v5, v9
+; GISEL-NEXT:    v_mac_f32_e32 v0, 0x4f800000, v1
+; GISEL-NEXT:    v_rcp_iflag_f32_e32 v0, v0
+; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v2, v8
+; GISEL-NEXT:    v_addc_u32_e32 v2, vcc, v3, v8, vcc
+; GISEL-NEXT:    v_xor_b32_e32 v3, v1, v8
+; GISEL-NEXT:    v_mul_f32_e32 v0, 0x5f7ffffc, v0
+; GISEL-NEXT:    v_mul_f32_e32 v1, 0x2f800000, v0
+; GISEL-NEXT:    v_trunc_f32_e32 v1, v1
+; GISEL-NEXT:    v_mac_f32_e32 v0, 0xcf800000, v1
+; GISEL-NEXT:    v_cvt_u32_f32_e32 v0, v0
+; GISEL-NEXT:    v_cvt_u32_f32_e32 v1, v1
+; GISEL-NEXT:    v_sub_i32_e32 v10, vcc, 0, v7
+; GISEL-NEXT:    v_subb_u32_e32 v11, vcc, 0, v6, vcc
+; GISEL-NEXT:    v_mul_lo_u32 v12, v11, v0
+; GISEL-NEXT:    v_mul_lo_u32 v13, v10, v1
+; GISEL-NEXT:    v_mul_hi_u32 v15, v10, v0
+; GISEL-NEXT:    v_mul_lo_u32 v14, v10, v0
+; GISEL-NEXT:    v_xor_b32_e32 v2, v2, v8
+; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v12, v13
+; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v12, v15
+; GISEL-NEXT:    v_mul_lo_u32 v13, v1, v14
+; GISEL-NEXT:    v_mul_lo_u32 v15, v0, v12
+; GISEL-NEXT:    v_mul_hi_u32 v16, v0, v14
+; GISEL-NEXT:    v_mul_hi_u32 v14, v1, v14
+; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v13, v15
+; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v13, v16
+; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
+; GISEL-NEXT:    v_mul_lo_u32 v16, v1, v12
+; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v15, v13
+; GISEL-NEXT:    v_mul_hi_u32 v15, v0, v12
+; GISEL-NEXT:    v_mul_hi_u32 v12, v1, v12
+; GISEL-NEXT:    v_add_i32_e32 v14, vcc, v16, v14
+; GISEL-NEXT:    v_cndmask_b32_e64 v16, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v14, vcc, v14, v15
+; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v15, vcc, v16, v15
+; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v14, v13
+; GISEL-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v14, vcc, v15, v14
+; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v12, v14
+; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v13
+; GISEL-NEXT:    v_addc_u32_e64 v13, s[4:5], v1, v12, vcc
+; GISEL-NEXT:    v_mul_lo_u32 v11, v11, v0
+; GISEL-NEXT:    v_mul_lo_u32 v14, v10, v13
+; GISEL-NEXT:    v_mul_lo_u32 v15, v10, v0
+; GISEL-NEXT:    v_mul_hi_u32 v10, v10, v0
+; GISEL-NEXT:    v_add_i32_e64 v1, s[4:5], v1, v12
+; GISEL-NEXT:    v_add_i32_e64 v11, s[4:5], v11, v14
+; GISEL-NEXT:    v_mul_hi_u32 v12, v0, v15
+; GISEL-NEXT:    v_add_i32_e64 v10, s[4:5], v11, v10
+; GISEL-NEXT:    v_mul_lo_u32 v11, v13, v15
+; GISEL-NEXT:    v_mul_lo_u32 v14, v0, v10
+; GISEL-NEXT:    v_mul_hi_u32 v15, v13, v15
+; GISEL-NEXT:    v_add_i32_e64 v11, s[4:5], v11, v14
+; GISEL-NEXT:    v_cndmask_b32_e64 v14, 0, 1, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v11, s[4:5], v11, v12
+; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, s[4:5]
+; GISEL-NEXT:    v_mul_lo_u32 v12, v13, v10
+; GISEL-NEXT:    v_add_i32_e64 v11, s[4:5], v14, v11
+; GISEL-NEXT:    v_mul_hi_u32 v14, v0, v10
+; GISEL-NEXT:    v_mul_hi_u32 v10, v13, v10
+; GISEL-NEXT:    v_add_i32_e64 v12, s[4:5], v12, v15
+; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, 1, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v12, s[4:5], v12, v14
+; GISEL-NEXT:    v_cndmask_b32_e64 v14, 0, 1, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v14, s[4:5], v15, v14
+; GISEL-NEXT:    v_add_i32_e64 v11, s[4:5], v12, v11
+; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v12, s[4:5], v14, v12
+; GISEL-NEXT:    v_add_i32_e64 v10, s[4:5], v10, v12
+; GISEL-NEXT:    v_addc_u32_e32 v1, vcc, v1, v10, vcc
+; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v0, v11
+; GISEL-NEXT:    v_addc_u32_e32 v11, vcc, 0, v1, vcc
+; GISEL-NEXT:    v_mul_lo_u32 v12, v2, v10
+; GISEL-NEXT:    v_mul_lo_u32 v13, v3, v11
+; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v4, v9
+; GISEL-NEXT:    v_mul_hi_u32 v4, v3, v10
+; GISEL-NEXT:    v_subb_u32_e32 v1, vcc, v5, v9, vcc
+; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v12, v13
+; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v5, v4
+; GISEL-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
+; GISEL-NEXT:    v_mul_lo_u32 v5, v2, v11
+; GISEL-NEXT:    v_mul_hi_u32 v10, v2, v10
+; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v9, v4
+; GISEL-NEXT:    v_mul_hi_u32 v9, v3, v11
+; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v10
+; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v9
+; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v10, v9
+; GISEL-NEXT:    v_mul_hi_u32 v10, v2, v11
+; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v5, v4
+; GISEL-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v9, v5
+; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v10, v5
+; GISEL-NEXT:    v_mul_lo_u32 v9, v6, v4
+; GISEL-NEXT:    v_mul_lo_u32 v5, v7, v5
+; GISEL-NEXT:    v_mul_lo_u32 v10, v7, v4
+; GISEL-NEXT:    v_mul_hi_u32 v4, v7, v4
+; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v9, v5
+; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v5, v4
+; GISEL-NEXT:    v_sub_i32_e32 v3, vcc, v3, v10
+; GISEL-NEXT:    v_subb_u32_e64 v5, s[4:5], v2, v4, vcc
+; GISEL-NEXT:    v_sub_i32_e64 v2, s[4:5], v2, v4
+; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v5, v6
+; GISEL-NEXT:    v_cndmask_b32_e64 v4, 0, -1, s[4:5]
+; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v3, v7
+; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, -1, s[4:5]
+; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], v5, v6
+; GISEL-NEXT:    v_subb_u32_e32 v2, vcc, v2, v6, vcc
+; GISEL-NEXT:    v_cndmask_b32_e64 v4, v4, v9, s[4:5]
+; GISEL-NEXT:    v_sub_i32_e32 v9, vcc, v3, v7
+; GISEL-NEXT:    v_subbrev_u32_e64 v10, s[4:5], 0, v2, vcc
+; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v10, v6
+; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, -1, s[4:5]
+; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v9, v7
+; GISEL-NEXT:    v_subb_u32_e32 v2, vcc, v2, v6, vcc
+; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, -1, s[4:5]
+; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], v10, v6
+; GISEL-NEXT:    v_sub_i32_e32 v6, vcc, v9, v7
+; GISEL-NEXT:    v_cndmask_b32_e64 v11, v11, v12, s[4:5]
+; GISEL-NEXT:    v_subbrev_u32_e32 v2, vcc, 0, v2, vcc
+; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v11
+; GISEL-NEXT:    v_cndmask_b32_e32 v6, v9, v6, vcc
+; GISEL-NEXT:    v_cndmask_b32_e32 v2, v10, v2, vcc
+; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v4
+; GISEL-NEXT:    v_cndmask_b32_e32 v3, v3, v6, vcc
+; GISEL-NEXT:    v_cndmask_b32_e32 v2, v5, v2, vcc
+; GISEL-NEXT:    v_xor_b32_e32 v3, v3, v8
+; GISEL-NEXT:    v_xor_b32_e32 v4, v2, v8
+; GISEL-NEXT:    v_sub_i32_e32 v2, vcc, v3, v8
+; GISEL-NEXT:    v_subb_u32_e32 v3, vcc, v4, v8, vcc
+; GISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; CGP-LABEL: v_srem_v2i64_pow2_shl_denom:
+; CGP:       ; %bb.0:
+; CGP-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CGP-NEXT:    s_movk_i32 s4, 0x1000
+; CGP-NEXT:    s_mov_b32 s5, 0
+; CGP-NEXT:    v_lshl_b64 v[10:11], s[4:5], v4
+; CGP-NEXT:    v_mov_b32_e32 v7, v1
+; CGP-NEXT:    v_mov_b32_e32 v5, v0
+; CGP-NEXT:    v_or_b32_e32 v1, v7, v11
+; CGP-NEXT:    v_mov_b32_e32 v0, 0
+; CGP-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[0:1]
+; CGP-NEXT:    v_lshl_b64 v[8:9], s[4:5], v6
+; CGP-NEXT:    v_cmp_ne_u32_e64 s[4:5], 0, 1
+; CGP-NEXT:    s_xor_b64 s[4:5], vcc, s[4:5]
+; CGP-NEXT:    ; implicit-def: $vgpr0_vgpr1
+; CGP-NEXT:    s_and_saveexec_b64 s[6:7], s[4:5]
+; CGP-NEXT:    s_xor_b64 s[6:7], exec, s[6:7]
+; CGP-NEXT:    s_cbranch_execz BB8_2
+; CGP-NEXT:  ; %bb.1:
+; CGP-NEXT:    v_ashrrev_i32_e32 v0, 31, v11
+; CGP-NEXT:    v_add_i32_e32 v1, vcc, v10, v0
+; CGP-NEXT:    v_addc_u32_e32 v4, vcc, v11, v0, vcc
+; CGP-NEXT:    v_xor_b32_e32 v1, v1, v0
+; CGP-NEXT:    v_xor_b32_e32 v0, v4, v0
+; CGP-NEXT:    v_cvt_f32_u32_e32 v4, v1
+; CGP-NEXT:    v_cvt_f32_u32_e32 v6, v0
+; CGP-NEXT:    v_ashrrev_i32_e32 v11, 31, v7
+; CGP-NEXT:    v_mac_f32_e32 v4, 0x4f800000, v6
+; CGP-NEXT:    v_rcp_iflag_f32_e32 v4, v4
+; CGP-NEXT:    v_add_i32_e32 v6, vcc, v5, v11
+; CGP-NEXT:    v_addc_u32_e32 v7, vcc, v7, v11, vcc
+; CGP-NEXT:    v_sub_i32_e32 v13, vcc, 0, v1
+; CGP-NEXT:    v_mul_f32_e32 v4, 0x5f7ffffc, v4
+; CGP-NEXT:    v_mul_f32_e32 v12, 0x2f800000, v4
+; CGP-NEXT:    v_trunc_f32_e32 v12, v12
+; CGP-NEXT:    v_mac_f32_e32 v4, 0xcf800000, v12
+; CGP-NEXT:    v_cvt_u32_f32_e32 v4, v4
+; CGP-NEXT:    v_cvt_u32_f32_e32 v12, v12
+; CGP-NEXT:    v_subb_u32_e32 v14, vcc, 0, v0, vcc
+; CGP-NEXT:    v_xor_b32_e32 v6, v6, v11
+; CGP-NEXT:    v_mul_lo_u32 v15, v14, v4
+; CGP-NEXT:    v_mul_lo_u32 v16, v13, v12
+; CGP-NEXT:    v_mul_hi_u32 v18, v13, v4
+; CGP-NEXT:    v_mul_lo_u32 v17, v13, v4
+; CGP-NEXT:    v_xor_b32_e32 v7, v7, v11
+; CGP-NEXT:    v_add_i32_e32 v15, vcc, v15, v16
+; CGP-NEXT:    v_add_i32_e32 v15, vcc, v15, v18
+; CGP-NEXT:    v_mul_lo_u32 v16, v12, v17
+; CGP-NEXT:    v_mul_lo_u32 v18, v4, v15
+; CGP-NEXT:    v_mul_hi_u32 v19, v4, v17
+; CGP-NEXT:    v_mul_hi_u32 v17, v12, v17
+; CGP-NEXT:    v_add_i32_e32 v16, vcc, v16, v18
+; CGP-NEXT:    v_cndmask_b32_e64 v18, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v16, vcc, v16, v19
+; CGP-NEXT:    v_cndmask_b32_e64 v16, 0, 1, vcc
+; CGP-NEXT:    v_mul_lo_u32 v19, v12, v15
+; CGP-NEXT:    v_add_i32_e32 v16, vcc, v18, v16
+; CGP-NEXT:    v_mul_hi_u32 v18, v4, v15
+; CGP-NEXT:    v_mul_hi_u32 v15, v12, v15
+; CGP-NEXT:    v_add_i32_e32 v17, vcc, v19, v17
+; CGP-NEXT:    v_cndmask_b32_e64 v19, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v17, vcc, v17, v18
+; CGP-NEXT:    v_cndmask_b32_e64 v18, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v18, vcc, v19, v18
+; CGP-NEXT:    v_add_i32_e32 v16, vcc, v17, v16
+; CGP-NEXT:    v_cndmask_b32_e64 v17, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v17, vcc, v18, v17
+; CGP-NEXT:    v_add_i32_e32 v15, vcc, v15, v17
+; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v16
+; CGP-NEXT:    v_addc_u32_e64 v16, s[4:5], v12, v15, vcc
+; CGP-NEXT:    v_mul_lo_u32 v14, v14, v4
+; CGP-NEXT:    v_mul_lo_u32 v17, v13, v16
+; CGP-NEXT:    v_mul_lo_u32 v18, v13, v4
+; CGP-NEXT:    v_mul_hi_u32 v13, v13, v4
+; CGP-NEXT:    v_add_i32_e64 v12, s[4:5], v12, v15
+; CGP-NEXT:    v_add_i32_e64 v14, s[4:5], v14, v17
+; CGP-NEXT:    v_mul_hi_u32 v15, v4, v18
+; CGP-NEXT:    v_add_i32_e64 v13, s[4:5], v14, v13
+; CGP-NEXT:    v_mul_lo_u32 v14, v16, v18
+; CGP-NEXT:    v_mul_lo_u32 v17, v4, v13
+; CGP-NEXT:    v_mul_hi_u32 v18, v16, v18
+; CGP-NEXT:    v_add_i32_e64 v14, s[4:5], v14, v17
+; CGP-NEXT:    v_cndmask_b32_e64 v17, 0, 1, s[4:5]
+; CGP-NEXT:    v_add_i32_e64 v14, s[4:5], v14, v15
+; CGP-NEXT:    v_cndmask_b32_e64 v14, 0, 1, s[4:5]
+; CGP-NEXT:    v_mul_lo_u32 v15, v16, v13
+; CGP-NEXT:    v_add_i32_e64 v14, s[4:5], v17, v14
+; CGP-NEXT:    v_mul_hi_u32 v17, v4, v13
+; CGP-NEXT:    v_mul_hi_u32 v13, v16, v13
+; CGP-NEXT:    v_add_i32_e64 v15, s[4:5], v15, v18
+; CGP-NEXT:    v_cndmask_b32_e64 v18, 0, 1, s[4:5]
+; CGP-NEXT:    v_add_i32_e64 v15, s[4:5], v15, v17
+; CGP-NEXT:    v_cndmask_b32_e64 v17, 0, 1, s[4:5]
+; CGP-NEXT:    v_add_i32_e64 v17, s[4:5], v18, v17
+; CGP-NEXT:    v_add_i32_e64 v14, s[4:5], v15, v14
+; CGP-NEXT:    v_cndmask_b32_e64 v15, 0, 1, s[4:5]
+; CGP-NEXT:    v_add_i32_e64 v15, s[4:5], v17, v15
+; CGP-NEXT:    v_add_i32_e64 v13, s[4:5], v13, v15
+; CGP-NEXT:    v_addc_u32_e32 v12, vcc, v12, v13, vcc
+; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v14
+; CGP-NEXT:    v_addc_u32_e32 v12, vcc, 0, v12, vcc
+; CGP-NEXT:    v_mul_lo_u32 v13, v7, v4
+; CGP-NEXT:    v_mul_lo_u32 v14, v6, v12
+; CGP-NEXT:    v_mul_hi_u32 v15, v6, v4
+; CGP-NEXT:    v_mul_hi_u32 v4, v7, v4
+; CGP-NEXT:    v_add_i32_e32 v13, vcc, v13, v14
+; CGP-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v13, vcc, v13, v15
+; CGP-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
+; CGP-NEXT:    v_mul_lo_u32 v15, v7, v12
+; CGP-NEXT:    v_add_i32_e32 v13, vcc, v14, v13
+; CGP-NEXT:    v_mul_hi_u32 v14, v6, v12
+; CGP-NEXT:    v_mul_hi_u32 v12, v7, v12
+; CGP-NEXT:    v_add_i32_e32 v4, vcc, v15, v4
+; CGP-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v14
+; CGP-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v14, vcc, v15, v14
+; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v13
+; CGP-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v13, vcc, v14, v13
+; CGP-NEXT:    v_add_i32_e32 v12, vcc, v12, v13
+; CGP-NEXT:    v_mul_lo_u32 v13, v0, v4
+; CGP-NEXT:    v_mul_lo_u32 v12, v1, v12
+; CGP-NEXT:    v_mul_lo_u32 v14, v1, v4
+; CGP-NEXT:    v_mul_hi_u32 v4, v1, v4
+; CGP-NEXT:    v_add_i32_e32 v12, vcc, v13, v12
+; CGP-NEXT:    v_add_i32_e32 v4, vcc, v12, v4
+; CGP-NEXT:    v_sub_i32_e32 v6, vcc, v6, v14
+; CGP-NEXT:    v_subb_u32_e64 v12, s[4:5], v7, v4, vcc
+; CGP-NEXT:    v_sub_i32_e64 v4, s[4:5], v7, v4
+; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v12, v0
+; CGP-NEXT:    v_cndmask_b32_e64 v7, 0, -1, s[4:5]
+; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v6, v1
+; CGP-NEXT:    v_cndmask_b32_e64 v13, 0, -1, s[4:5]
+; CGP-NEXT:    v_cmp_eq_u32_e64 s[4:5], v12, v0
+; CGP-NEXT:    v_subb_u32_e32 v4, vcc, v4, v0, vcc
+; CGP-NEXT:    v_cndmask_b32_e64 v7, v7, v13, s[4:5]
+; CGP-NEXT:    v_sub_i32_e32 v13, vcc, v6, v1
+; CGP-NEXT:    v_subbrev_u32_e64 v14, s[4:5], 0, v4, vcc
+; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v14, v0
+; CGP-NEXT:    v_cndmask_b32_e64 v15, 0, -1, s[4:5]
+; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v13, v1
+; CGP-NEXT:    v_cndmask_b32_e64 v16, 0, -1, s[4:5]
+; CGP-NEXT:    v_cmp_eq_u32_e64 s[4:5], v14, v0
+; CGP-NEXT:    v_subb_u32_e32 v0, vcc, v4, v0, vcc
+; CGP-NEXT:    v_sub_i32_e32 v1, vcc, v13, v1
+; CGP-NEXT:    v_cndmask_b32_e64 v15, v15, v16, s[4:5]
+; CGP-NEXT:    v_subbrev_u32_e32 v0, vcc, 0, v0, vcc
+; CGP-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v15
+; CGP-NEXT:    v_cndmask_b32_e32 v1, v13, v1, vcc
+; CGP-NEXT:    v_cndmask_b32_e32 v0, v14, v0, vcc
+; CGP-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v7
+; CGP-NEXT:    v_cndmask_b32_e32 v1, v6, v1, vcc
+; CGP-NEXT:    v_cndmask_b32_e32 v0, v12, v0, vcc
+; CGP-NEXT:    v_xor_b32_e32 v1, v1, v11
+; CGP-NEXT:    v_xor_b32_e32 v4, v0, v11
+; CGP-NEXT:    v_sub_i32_e32 v0, vcc, v1, v11
+; CGP-NEXT:    v_subb_u32_e32 v1, vcc, v4, v11, vcc
+; CGP-NEXT:  BB8_2: ; %Flow2
+; CGP-NEXT:    s_or_saveexec_b64 s[8:9], s[6:7]
+; CGP-NEXT:    s_xor_b64 exec, exec, s[8:9]
+; CGP-NEXT:    s_cbranch_execz BB8_4
+; CGP-NEXT:  ; %bb.3:
+; CGP-NEXT:    v_cvt_f32_u32_e32 v0, v10
+; CGP-NEXT:    v_rcp_iflag_f32_e32 v0, v0
+; CGP-NEXT:    v_mul_f32_e32 v0, 0x4f800000, v0
+; CGP-NEXT:    v_cvt_u32_f32_e32 v0, v0
+; CGP-NEXT:    v_mul_lo_u32 v1, v0, v10
+; CGP-NEXT:    v_mul_hi_u32 v4, v0, v10
+; CGP-NEXT:    v_sub_i32_e32 v6, vcc, 0, v1
+; CGP-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v4
+; CGP-NEXT:    v_cndmask_b32_e32 v1, v1, v6, vcc
+; CGP-NEXT:    v_mul_hi_u32 v1, v1, v0
+; CGP-NEXT:    v_add_i32_e64 v4, s[4:5], v0, v1
+; CGP-NEXT:    v_sub_i32_e64 v0, s[4:5], v0, v1
+; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
+; CGP-NEXT:    v_mul_hi_u32 v0, v0, v5
+; CGP-NEXT:    v_mul_lo_u32 v0, v0, v10
+; CGP-NEXT:    v_sub_i32_e32 v1, vcc, v5, v0
+; CGP-NEXT:    v_add_i32_e64 v4, s[4:5], v1, v10
+; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v5, v0
+; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v10
+; CGP-NEXT:    v_sub_i32_e64 v0, s[6:7], v1, v10
+; CGP-NEXT:    s_and_b64 vcc, vcc, s[4:5]
+; CGP-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
+; CGP-NEXT:    v_cndmask_b32_e64 v0, v4, v0, s[4:5]
+; CGP-NEXT:    v_mov_b32_e32 v1, 0
+; CGP-NEXT:  BB8_4:
+; CGP-NEXT:    s_or_b64 exec, exec, s[8:9]
+; CGP-NEXT:    v_or_b32_e32 v5, v3, v9
+; CGP-NEXT:    v_mov_b32_e32 v4, 0
+; CGP-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[4:5]
+; CGP-NEXT:    v_cmp_ne_u32_e64 s[4:5], 0, 1
+; CGP-NEXT:    s_xor_b64 s[4:5], vcc, s[4:5]
+; CGP-NEXT:    ; implicit-def: $vgpr4_vgpr5
+; CGP-NEXT:    s_and_saveexec_b64 s[6:7], s[4:5]
+; CGP-NEXT:    s_xor_b64 s[6:7], exec, s[6:7]
+; CGP-NEXT:    s_cbranch_execz BB8_6
+; CGP-NEXT:  ; %bb.5:
+; CGP-NEXT:    v_ashrrev_i32_e32 v4, 31, v9
+; CGP-NEXT:    v_add_i32_e32 v5, vcc, v8, v4
+; CGP-NEXT:    v_addc_u32_e32 v6, vcc, v9, v4, vcc
+; CGP-NEXT:    v_xor_b32_e32 v5, v5, v4
+; CGP-NEXT:    v_xor_b32_e32 v4, v6, v4
+; CGP-NEXT:    v_cvt_f32_u32_e32 v6, v5
+; CGP-NEXT:    v_cvt_f32_u32_e32 v7, v4
+; CGP-NEXT:    v_ashrrev_i32_e32 v9, 31, v3
+; CGP-NEXT:    v_mac_f32_e32 v6, 0x4f800000, v7
+; CGP-NEXT:    v_rcp_iflag_f32_e32 v6, v6
+; CGP-NEXT:    v_add_i32_e32 v7, vcc, v2, v9
+; CGP-NEXT:    v_addc_u32_e32 v3, vcc, v3, v9, vcc
+; CGP-NEXT:    v_sub_i32_e32 v11, vcc, 0, v5
+; CGP-NEXT:    v_mul_f32_e32 v6, 0x5f7ffffc, v6
+; CGP-NEXT:    v_mul_f32_e32 v10, 0x2f800000, v6
+; CGP-NEXT:    v_trunc_f32_e32 v10, v10
+; CGP-NEXT:    v_mac_f32_e32 v6, 0xcf800000, v10
+; CGP-NEXT:    v_cvt_u32_f32_e32 v6, v6
+; CGP-NEXT:    v_cvt_u32_f32_e32 v10, v10
+; CGP-NEXT:    v_subb_u32_e32 v12, vcc, 0, v4, vcc
+; CGP-NEXT:    v_xor_b32_e32 v7, v7, v9
+; CGP-NEXT:    v_mul_lo_u32 v13, v12, v6
+; CGP-NEXT:    v_mul_lo_u32 v14, v11, v10
+; CGP-NEXT:    v_mul_hi_u32 v16, v11, v6
+; CGP-NEXT:    v_mul_lo_u32 v15, v11, v6
+; CGP-NEXT:    v_xor_b32_e32 v3, v3, v9
+; CGP-NEXT:    v_add_i32_e32 v13, vcc, v13, v14
+; CGP-NEXT:    v_add_i32_e32 v13, vcc, v13, v16
+; CGP-NEXT:    v_mul_lo_u32 v14, v10, v15
+; CGP-NEXT:    v_mul_lo_u32 v16, v6, v13
+; CGP-NEXT:    v_mul_hi_u32 v17, v6, v15
+; CGP-NEXT:    v_mul_hi_u32 v15, v10, v15
+; CGP-NEXT:    v_add_i32_e32 v14, vcc, v14, v16
+; CGP-NEXT:    v_cndmask_b32_e64 v16, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v14, vcc, v14, v17
+; CGP-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
+; CGP-NEXT:    v_mul_lo_u32 v17, v10, v13
+; CGP-NEXT:    v_add_i32_e32 v14, vcc, v16, v14
+; CGP-NEXT:    v_mul_hi_u32 v16, v6, v13
+; CGP-NEXT:    v_mul_hi_u32 v13, v10, v13
+; CGP-NEXT:    v_add_i32_e32 v15, vcc, v17, v15
+; CGP-NEXT:    v_cndmask_b32_e64 v17, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v15, vcc, v15, v16
+; CGP-NEXT:    v_cndmask_b32_e64 v16, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v16, vcc, v17, v16
+; CGP-NEXT:    v_add_i32_e32 v14, vcc, v15, v14
+; CGP-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v15, vcc, v16, v15
+; CGP-NEXT:    v_add_i32_e32 v13, vcc, v13, v15
+; CGP-NEXT:    v_add_i32_e32 v6, vcc, v6, v14
+; CGP-NEXT:    v_addc_u32_e64 v14, s[4:5], v10, v13, vcc
+; CGP-NEXT:    v_mul_lo_u32 v12, v12, v6
+; CGP-NEXT:    v_mul_lo_u32 v15, v11, v14
+; CGP-NEXT:    v_mul_lo_u32 v16, v11, v6
+; CGP-NEXT:    v_mul_hi_u32 v11, v11, v6
+; CGP-NEXT:    v_add_i32_e64 v10, s[4:5], v10, v13
+; CGP-NEXT:    v_add_i32_e64 v12, s[4:5], v12, v15
+; CGP-NEXT:    v_mul_hi_u32 v13, v6, v16
+; CGP-NEXT:    v_add_i32_e64 v11, s[4:5], v12, v11
+; CGP-NEXT:    v_mul_lo_u32 v12, v14, v16
+; CGP-NEXT:    v_mul_lo_u32 v15, v6, v11
+; CGP-NEXT:    v_mul_hi_u32 v16, v14, v16
+; CGP-NEXT:    v_add_i32_e64 v12, s[4:5], v12, v15
+; CGP-NEXT:    v_cndmask_b32_e64 v15, 0, 1, s[4:5]
+; CGP-NEXT:    v_add_i32_e64 v12, s[4:5], v12, v13
+; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, 1, s[4:5]
+; CGP-NEXT:    v_mul_lo_u32 v13, v14, v11
+; CGP-NEXT:    v_add_i32_e64 v12, s[4:5], v15, v12
+; CGP-NEXT:    v_mul_hi_u32 v15, v6, v11
+; CGP-NEXT:    v_mul_hi_u32 v11, v14, v11
+; CGP-NEXT:    v_add_i32_e64 v13, s[4:5], v13, v16
+; CGP-NEXT:    v_cndmask_b32_e64 v16, 0, 1, s[4:5]
+; CGP-NEXT:    v_add_i32_e64 v13, s[4:5], v13, v15
+; CGP-NEXT:    v_cndmask_b32_e64 v15, 0, 1, s[4:5]
+; CGP-NEXT:    v_add_i32_e64 v15, s[4:5], v16, v15
+; CGP-NEXT:    v_add_i32_e64 v12, s[4:5], v13, v12
+; CGP-NEXT:    v_cndmask_b32_e64 v13, 0, 1, s[4:5]
+; CGP-NEXT:    v_add_i32_e64 v13, s[4:5], v15, v13
+; CGP-NEXT:    v_add_i32_e64 v11, s[4:5], v11, v13
+; CGP-NEXT:    v_addc_u32_e32 v10, vcc, v10, v11, vcc
+; CGP-NEXT:    v_add_i32_e32 v6, vcc, v6, v12
+; CGP-NEXT:    v_addc_u32_e32 v10, vcc, 0, v10, vcc
+; CGP-NEXT:    v_mul_lo_u32 v11, v3, v6
+; CGP-NEXT:    v_mul_lo_u32 v12, v7, v10
+; CGP-NEXT:    v_mul_hi_u32 v13, v7, v6
+; CGP-NEXT:    v_mul_hi_u32 v6, v3, v6
+; CGP-NEXT:    v_add_i32_e32 v11, vcc, v11, v12
+; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v11, vcc, v11, v13
+; CGP-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
+; CGP-NEXT:    v_mul_lo_u32 v13, v3, v10
+; CGP-NEXT:    v_add_i32_e32 v11, vcc, v12, v11
+; CGP-NEXT:    v_mul_hi_u32 v12, v7, v10
+; CGP-NEXT:    v_mul_hi_u32 v10, v3, v10
+; CGP-NEXT:    v_add_i32_e32 v6, vcc, v13, v6
+; CGP-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v6, vcc, v6, v12
+; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v12, vcc, v13, v12
+; CGP-NEXT:    v_add_i32_e32 v6, vcc, v6, v11
+; CGP-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v11, vcc, v12, v11
+; CGP-NEXT:    v_add_i32_e32 v10, vcc, v10, v11
+; CGP-NEXT:    v_mul_lo_u32 v11, v4, v6
+; CGP-NEXT:    v_mul_lo_u32 v10, v5, v10
+; CGP-NEXT:    v_mul_lo_u32 v12, v5, v6
+; CGP-NEXT:    v_mul_hi_u32 v6, v5, v6
+; CGP-NEXT:    v_add_i32_e32 v10, vcc, v11, v10
+; CGP-NEXT:    v_add_i32_e32 v6, vcc, v10, v6
+; CGP-NEXT:    v_sub_i32_e32 v7, vcc, v7, v12
+; CGP-NEXT:    v_subb_u32_e64 v10, s[4:5], v3, v6, vcc
+; CGP-NEXT:    v_sub_i32_e64 v3, s[4:5], v3, v6
+; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v10, v4
+; CGP-NEXT:    v_cndmask_b32_e64 v6, 0, -1, s[4:5]
+; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v7, v5
+; CGP-NEXT:    v_cndmask_b32_e64 v11, 0, -1, s[4:5]
+; CGP-NEXT:    v_cmp_eq_u32_e64 s[4:5], v10, v4
+; CGP-NEXT:    v_subb_u32_e32 v3, vcc, v3, v4, vcc
+; CGP-NEXT:    v_cndmask_b32_e64 v6, v6, v11, s[4:5]
+; CGP-NEXT:    v_sub_i32_e32 v11, vcc, v7, v5
+; CGP-NEXT:    v_subbrev_u32_e64 v12, s[4:5], 0, v3, vcc
+; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v12, v4
+; CGP-NEXT:    v_cndmask_b32_e64 v13, 0, -1, s[4:5]
+; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v11, v5
+; CGP-NEXT:    v_subb_u32_e32 v3, vcc, v3, v4, vcc
+; CGP-NEXT:    v_cndmask_b32_e64 v14, 0, -1, s[4:5]
+; CGP-NEXT:    v_cmp_eq_u32_e64 s[4:5], v12, v4
+; CGP-NEXT:    v_sub_i32_e32 v4, vcc, v11, v5
+; CGP-NEXT:    v_cndmask_b32_e64 v13, v13, v14, s[4:5]
+; CGP-NEXT:    v_subbrev_u32_e32 v3, vcc, 0, v3, vcc
+; CGP-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v13
+; CGP-NEXT:    v_cndmask_b32_e32 v4, v11, v4, vcc
+; CGP-NEXT:    v_cndmask_b32_e32 v3, v12, v3, vcc
+; CGP-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v6
+; CGP-NEXT:    v_cndmask_b32_e32 v4, v7, v4, vcc
+; CGP-NEXT:    v_cndmask_b32_e32 v3, v10, v3, vcc
+; CGP-NEXT:    v_xor_b32_e32 v4, v4, v9
+; CGP-NEXT:    v_xor_b32_e32 v3, v3, v9
+; CGP-NEXT:    v_sub_i32_e32 v4, vcc, v4, v9
+; CGP-NEXT:    v_subb_u32_e32 v5, vcc, v3, v9, vcc
+; CGP-NEXT:  BB8_6: ; %Flow
+; CGP-NEXT:    s_or_saveexec_b64 s[8:9], s[6:7]
+; CGP-NEXT:    s_xor_b64 exec, exec, s[8:9]
+; CGP-NEXT:    s_cbranch_execz BB8_8
+; CGP-NEXT:  ; %bb.7:
+; CGP-NEXT:    v_cvt_f32_u32_e32 v3, v8
+; CGP-NEXT:    v_rcp_iflag_f32_e32 v3, v3
+; CGP-NEXT:    v_mul_f32_e32 v3, 0x4f800000, v3
+; CGP-NEXT:    v_cvt_u32_f32_e32 v3, v3
+; CGP-NEXT:    v_mul_lo_u32 v4, v3, v8
+; CGP-NEXT:    v_mul_hi_u32 v5, v3, v8
+; CGP-NEXT:    v_sub_i32_e32 v6, vcc, 0, v4
+; CGP-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v5
+; CGP-NEXT:    v_cndmask_b32_e32 v4, v4, v6, vcc
+; CGP-NEXT:    v_mul_hi_u32 v4, v4, v3
+; CGP-NEXT:    v_add_i32_e64 v5, s[4:5], v3, v4
+; CGP-NEXT:    v_sub_i32_e64 v3, s[4:5], v3, v4
+; CGP-NEXT:    v_cndmask_b32_e32 v3, v3, v5, vcc
+; CGP-NEXT:    v_mul_hi_u32 v3, v3, v2
+; CGP-NEXT:    v_mul_lo_u32 v3, v3, v8
+; CGP-NEXT:    v_sub_i32_e32 v4, vcc, v2, v3
+; CGP-NEXT:    v_add_i32_e64 v5, s[4:5], v4, v8
+; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v2, v3
+; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v4, v8
+; CGP-NEXT:    v_sub_i32_e64 v2, s[6:7], v4, v8
+; CGP-NEXT:    s_and_b64 vcc, vcc, s[4:5]
+; CGP-NEXT:    v_cndmask_b32_e32 v2, v4, v2, vcc
+; CGP-NEXT:    v_cndmask_b32_e64 v4, v5, v2, s[4:5]
+; CGP-NEXT:    v_mov_b32_e32 v5, 0
+; CGP-NEXT:  BB8_8:
+; CGP-NEXT:    s_or_b64 exec, exec, s[8:9]
+; CGP-NEXT:    v_mov_b32_e32 v2, v4
+; CGP-NEXT:    v_mov_b32_e32 v3, v5
+; CGP-NEXT:    s_setpc_b64 s[30:31]
+  %shl.y = shl <2 x i64> <i64 4096, i64 4096>, %y
+  %r = srem <2 x i64> %x, %shl.y
+  ret <2 x i64> %r
+}
+
+define i64 @v_srem_i64_24bit(i64 %num, i64 %den) {
+; GISEL-LABEL: v_srem_i64_24bit:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-NEXT:    s_mov_b32 s4, 0xffffff
+; GISEL-NEXT:    v_and_b32_e32 v1, s4, v2
+; GISEL-NEXT:    v_cvt_f32_u32_e32 v2, v1
+; GISEL-NEXT:    v_and_b32_e32 v0, s4, v0
+; GISEL-NEXT:    v_rcp_iflag_f32_e32 v2, v2
+; GISEL-NEXT:    v_mul_f32_e32 v2, 0x4f800000, v2
+; GISEL-NEXT:    v_cvt_u32_f32_e32 v2, v2
+; GISEL-NEXT:    v_mul_lo_u32 v3, v2, v1
+; GISEL-NEXT:    v_mul_hi_u32 v4, v2, v1
+; GISEL-NEXT:    v_sub_i32_e32 v5, vcc, 0, v3
+; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v4
+; GISEL-NEXT:    v_cndmask_b32_e32 v3, v3, v5, vcc
+; GISEL-NEXT:    v_mul_hi_u32 v3, v3, v2
+; GISEL-NEXT:    v_add_i32_e64 v4, s[4:5], v2, v3
+; GISEL-NEXT:    v_sub_i32_e64 v2, s[4:5], v2, v3
+; GISEL-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
+; GISEL-NEXT:    v_mul_hi_u32 v2, v2, v0
+; GISEL-NEXT:    v_mul_lo_u32 v2, v2, v1
+; GISEL-NEXT:    v_sub_i32_e32 v3, vcc, v0, v2
+; GISEL-NEXT:    v_add_i32_e64 v4, s[4:5], v3, v1
+; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v0, v2
+; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v3, v1
+; GISEL-NEXT:    v_sub_i32_e64 v0, s[6:7], v3, v1
+; GISEL-NEXT:    s_and_b64 vcc, vcc, s[4:5]
+; GISEL-NEXT:    v_cndmask_b32_e32 v0, v3, v0, vcc
+; GISEL-NEXT:    v_cndmask_b32_e64 v0, v4, v0, s[4:5]
+; GISEL-NEXT:    v_mov_b32_e32 v1, 0
+; GISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; CGP-LABEL: v_srem_i64_24bit:
+; CGP:       ; %bb.0:
+; CGP-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CGP-NEXT:    s_mov_b32 s4, 0xffffff
+; CGP-NEXT:    v_and_b32_e32 v1, s4, v2
+; CGP-NEXT:    v_cvt_f32_i32_e32 v2, v1
+; CGP-NEXT:    v_and_b32_e32 v0, s4, v0
+; CGP-NEXT:    v_cvt_f32_i32_e32 v3, v0
+; CGP-NEXT:    v_xor_b32_e32 v5, v0, v1
+; CGP-NEXT:    v_rcp_f32_e32 v4, v2
+; CGP-NEXT:    v_ashrrev_i32_e32 v5, 30, v5
+; CGP-NEXT:    v_or_b32_e32 v5, 1, v5
+; CGP-NEXT:    v_mul_f32_e32 v4, v3, v4
+; CGP-NEXT:    v_trunc_f32_e32 v4, v4
+; CGP-NEXT:    v_mad_f32 v3, -v4, v2, v3
+; CGP-NEXT:    v_cvt_i32_f32_e32 v4, v4
+; CGP-NEXT:    v_cmp_ge_f32_e64 vcc, |v3|, |v2|
+; CGP-NEXT:    v_cndmask_b32_e32 v2, 0, v5, vcc
+; CGP-NEXT:    v_add_i32_e32 v2, vcc, v4, v2
+; CGP-NEXT:    v_mul_lo_u32 v1, v2, v1
+; CGP-NEXT:    v_sub_i32_e32 v0, vcc, v0, v1
+; CGP-NEXT:    v_lshlrev_b32_e32 v0, 7, v0
+; CGP-NEXT:    v_ashrrev_i32_e32 v0, 7, v0
+; CGP-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
+; CGP-NEXT:    s_setpc_b64 s[30:31]
+  %num.mask = and i64 %num, 16777215
+  %den.mask = and i64 %den, 16777215
+  %result = srem i64 %num.mask, %den.mask
+  ret i64 %result
+}
+
+define <2 x i64> @v_srem_v2i64_24bit(<2 x i64> %num, <2 x i64> %den) {
+; GISEL-LABEL: v_srem_v2i64_24bit:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-NEXT:    s_mov_b32 s6, 0xffffff
+; GISEL-NEXT:    v_and_b32_e32 v1, s6, v4
+; GISEL-NEXT:    v_add_i32_e32 v1, vcc, 0, v1
+; GISEL-NEXT:    v_addc_u32_e64 v3, s[4:5], 0, 0, vcc
+; GISEL-NEXT:    v_cvt_f32_u32_e32 v4, v1
+; GISEL-NEXT:    v_cvt_f32_u32_e32 v5, v3
+; GISEL-NEXT:    v_sub_i32_e32 v7, vcc, 0, v1
+; GISEL-NEXT:    v_subb_u32_e32 v8, vcc, 0, v3, vcc
+; GISEL-NEXT:    v_and_b32_e32 v0, s6, v0
+; GISEL-NEXT:    v_mac_f32_e32 v4, 0x4f800000, v5
+; GISEL-NEXT:    v_rcp_iflag_f32_e32 v4, v4
+; GISEL-NEXT:    v_and_b32_e32 v6, s6, v6
+; GISEL-NEXT:    v_and_b32_e32 v2, s6, v2
+; GISEL-NEXT:    v_mul_f32_e32 v4, 0x5f7ffffc, v4
+; GISEL-NEXT:    v_mul_f32_e32 v5, 0x2f800000, v4
+; GISEL-NEXT:    v_trunc_f32_e32 v5, v5
+; GISEL-NEXT:    v_mac_f32_e32 v4, 0xcf800000, v5
+; GISEL-NEXT:    v_cvt_u32_f32_e32 v4, v4
+; GISEL-NEXT:    v_cvt_u32_f32_e32 v5, v5
+; GISEL-NEXT:    v_mul_lo_u32 v9, v8, v4
+; GISEL-NEXT:    v_mul_lo_u32 v10, v7, v5
+; GISEL-NEXT:    v_mul_hi_u32 v12, v7, v4
+; GISEL-NEXT:    v_mul_lo_u32 v11, v7, v4
+; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v9, v10
+; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v9, v12
+; GISEL-NEXT:    v_mul_lo_u32 v10, v5, v11
+; GISEL-NEXT:    v_mul_lo_u32 v12, v4, v9
+; GISEL-NEXT:    v_mul_hi_u32 v14, v4, v11
+; GISEL-NEXT:    v_add_i32_e32 v0, vcc, 0, v0
+; GISEL-NEXT:    v_addc_u32_e64 v13, s[4:5], 0, 0, vcc
+; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v10, v12
+; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v10, v14
+; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
+; GISEL-NEXT:    v_mul_lo_u32 v14, v5, v9
+; GISEL-NEXT:    v_mul_hi_u32 v11, v5, v11
+; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v12, v10
+; GISEL-NEXT:    v_mul_hi_u32 v12, v4, v9
+; GISEL-NEXT:    v_mul_hi_u32 v9, v5, v9
+; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v14, v11
+; GISEL-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v11, v12
+; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v14, v12
+; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v11, v10
+; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v12, v11
+; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v9, v11
+; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v10
+; GISEL-NEXT:    v_addc_u32_e64 v10, s[4:5], v5, v9, vcc
+; GISEL-NEXT:    v_mul_lo_u32 v8, v8, v4
+; GISEL-NEXT:    v_mul_lo_u32 v11, v7, v10
+; GISEL-NEXT:    v_mul_lo_u32 v12, v7, v4
+; GISEL-NEXT:    v_mul_hi_u32 v7, v7, v4
+; GISEL-NEXT:    v_add_i32_e64 v5, s[4:5], v5, v9
+; GISEL-NEXT:    v_add_i32_e64 v8, s[4:5], v8, v11
+; GISEL-NEXT:    v_mul_hi_u32 v9, v4, v12
+; GISEL-NEXT:    v_add_i32_e64 v7, s[4:5], v8, v7
+; GISEL-NEXT:    v_mul_lo_u32 v8, v10, v12
+; GISEL-NEXT:    v_mul_lo_u32 v11, v4, v7
+; GISEL-NEXT:    v_mul_hi_u32 v12, v10, v12
+; GISEL-NEXT:    v_add_i32_e64 v8, s[4:5], v8, v11
+; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v8, s[4:5], v8, v9
+; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, 1, s[4:5]
+; GISEL-NEXT:    v_mul_lo_u32 v9, v10, v7
+; GISEL-NEXT:    v_add_i32_e64 v8, s[4:5], v11, v8
+; GISEL-NEXT:    v_mul_hi_u32 v11, v4, v7
+; GISEL-NEXT:    v_mul_hi_u32 v7, v10, v7
+; GISEL-NEXT:    v_add_i32_e64 v9, s[4:5], v9, v12
+; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v9, s[4:5], v9, v11
+; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v11, s[4:5], v12, v11
+; GISEL-NEXT:    v_add_i32_e64 v8, s[4:5], v9, v8
+; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, 1, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v9, s[4:5], v11, v9
+; GISEL-NEXT:    v_add_i32_e64 v7, s[4:5], v7, v9
+; GISEL-NEXT:    v_addc_u32_e32 v5, vcc, v5, v7, vcc
+; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v8
+; GISEL-NEXT:    v_addc_u32_e32 v5, vcc, 0, v5, vcc
+; GISEL-NEXT:    v_mul_lo_u32 v7, v13, v4
+; GISEL-NEXT:    v_mul_lo_u32 v8, v0, v5
+; GISEL-NEXT:    v_mul_hi_u32 v9, v0, v4
+; GISEL-NEXT:    v_mul_hi_u32 v4, v13, v4
+; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v7, v8
+; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v7, v9
+; GISEL-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
+; GISEL-NEXT:    v_mul_lo_u32 v9, v13, v5
+; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v8, v7
+; GISEL-NEXT:    v_mul_hi_u32 v8, v0, v5
+; GISEL-NEXT:    v_mul_hi_u32 v5, v13, v5
+; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v9, v4
+; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v8
+; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v9, v8
+; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v7
+; GISEL-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v8, v7
+; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v7
+; GISEL-NEXT:    v_mul_lo_u32 v7, v3, v4
+; GISEL-NEXT:    v_mul_lo_u32 v5, v1, v5
+; GISEL-NEXT:    v_mul_lo_u32 v8, v1, v4
+; GISEL-NEXT:    v_mul_hi_u32 v4, v1, v4
+; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v7, v5
+; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v5, v4
+; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v8
+; GISEL-NEXT:    v_subb_u32_e64 v5, s[4:5], v13, v4, vcc
+; GISEL-NEXT:    v_sub_i32_e64 v4, s[4:5], v13, v4
+; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v5, v3
+; GISEL-NEXT:    v_cndmask_b32_e64 v7, 0, -1, s[4:5]
+; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v0, v1
+; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, -1, s[4:5]
+; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], v5, v3
+; GISEL-NEXT:    v_subb_u32_e32 v4, vcc, v4, v3, vcc
+; GISEL-NEXT:    v_cndmask_b32_e64 v7, v7, v8, s[4:5]
+; GISEL-NEXT:    v_sub_i32_e32 v8, vcc, v0, v1
+; GISEL-NEXT:    v_subbrev_u32_e64 v9, s[4:5], 0, v4, vcc
+; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v9, v3
+; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, -1, s[4:5]
+; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v8, v1
+; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, -1, s[4:5]
+; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], v9, v3
+; GISEL-NEXT:    v_subb_u32_e32 v3, vcc, v4, v3, vcc
+; GISEL-NEXT:    v_sub_i32_e32 v1, vcc, v8, v1
+; GISEL-NEXT:    v_subbrev_u32_e32 v3, vcc, 0, v3, vcc
+; GISEL-NEXT:    v_add_i32_e32 v4, vcc, 0, v6
+; GISEL-NEXT:    v_cndmask_b32_e64 v10, v10, v11, s[4:5]
+; GISEL-NEXT:    v_addc_u32_e64 v6, s[4:5], 0, 0, vcc
+; GISEL-NEXT:    v_cvt_f32_u32_e32 v11, v4
+; GISEL-NEXT:    v_cvt_f32_u32_e32 v12, v6
+; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v10
+; GISEL-NEXT:    v_cndmask_b32_e32 v1, v8, v1, vcc
+; GISEL-NEXT:    v_cndmask_b32_e32 v3, v9, v3, vcc
+; GISEL-NEXT:    v_mac_f32_e32 v11, 0x4f800000, v12
+; GISEL-NEXT:    v_rcp_iflag_f32_e32 v8, v11
+; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v7
+; GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GISEL-NEXT:    v_cndmask_b32_e32 v1, v5, v3, vcc
+; GISEL-NEXT:    v_mul_f32_e32 v3, 0x5f7ffffc, v8
+; GISEL-NEXT:    v_mul_f32_e32 v5, 0x2f800000, v3
+; GISEL-NEXT:    v_trunc_f32_e32 v5, v5
+; GISEL-NEXT:    v_mac_f32_e32 v3, 0xcf800000, v5
+; GISEL-NEXT:    v_cvt_u32_f32_e32 v3, v3
+; GISEL-NEXT:    v_cvt_u32_f32_e32 v5, v5
+; GISEL-NEXT:    v_sub_i32_e32 v7, vcc, 0, v4
+; GISEL-NEXT:    v_subb_u32_e32 v8, vcc, 0, v6, vcc
+; GISEL-NEXT:    v_mul_lo_u32 v9, v8, v3
+; GISEL-NEXT:    v_mul_lo_u32 v10, v7, v5
+; GISEL-NEXT:    v_mul_hi_u32 v12, v7, v3
+; GISEL-NEXT:    v_mul_lo_u32 v11, v7, v3
+; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v9, v10
+; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v9, v12
+; GISEL-NEXT:    v_mul_lo_u32 v10, v5, v11
+; GISEL-NEXT:    v_mul_lo_u32 v12, v3, v9
+; GISEL-NEXT:    v_mul_hi_u32 v14, v3, v11
+; GISEL-NEXT:    v_add_i32_e32 v2, vcc, 0, v2
+; GISEL-NEXT:    v_addc_u32_e64 v13, s[4:5], 0, 0, vcc
+; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v10, v12
+; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v10, v14
+; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
+; GISEL-NEXT:    v_mul_lo_u32 v14, v5, v9
+; GISEL-NEXT:    v_mul_hi_u32 v11, v5, v11
+; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v12, v10
+; GISEL-NEXT:    v_mul_hi_u32 v12, v3, v9
+; GISEL-NEXT:    v_mul_hi_u32 v9, v5, v9
+; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v14, v11
+; GISEL-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v11, v12
+; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v14, v12
+; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v11, v10
+; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v12, v11
+; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v9, v11
+; GISEL-NEXT:    v_add_i32_e32 v3, vcc, v3, v10
+; GISEL-NEXT:    v_addc_u32_e64 v10, s[4:5], v5, v9, vcc
+; GISEL-NEXT:    v_mul_lo_u32 v8, v8, v3
+; GISEL-NEXT:    v_mul_lo_u32 v11, v7, v10
+; GISEL-NEXT:    v_mul_lo_u32 v12, v7, v3
+; GISEL-NEXT:    v_mul_hi_u32 v7, v7, v3
+; GISEL-NEXT:    v_add_i32_e64 v5, s[4:5], v5, v9
+; GISEL-NEXT:    v_add_i32_e64 v8, s[4:5], v8, v11
+; GISEL-NEXT:    v_mul_hi_u32 v9, v3, v12
+; GISEL-NEXT:    v_add_i32_e64 v7, s[4:5], v8, v7
+; GISEL-NEXT:    v_mul_lo_u32 v8, v10, v12
+; GISEL-NEXT:    v_mul_lo_u32 v11, v3, v7
+; GISEL-NEXT:    v_mul_hi_u32 v12, v10, v12
+; GISEL-NEXT:    v_add_i32_e64 v8, s[4:5], v8, v11
+; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v8, s[4:5], v8, v9
+; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, 1, s[4:5]
+; GISEL-NEXT:    v_mul_lo_u32 v9, v10, v7
+; GISEL-NEXT:    v_add_i32_e64 v8, s[4:5], v11, v8
+; GISEL-NEXT:    v_mul_hi_u32 v11, v3, v7
+; GISEL-NEXT:    v_mul_hi_u32 v7, v10, v7
+; GISEL-NEXT:    v_add_i32_e64 v9, s[4:5], v9, v12
+; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v9, s[4:5], v9, v11
+; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v11, s[4:5], v12, v11
+; GISEL-NEXT:    v_add_i32_e64 v8, s[4:5], v9, v8
+; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, 1, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v9, s[4:5], v11, v9
+; GISEL-NEXT:    v_add_i32_e64 v7, s[4:5], v7, v9
+; GISEL-NEXT:    v_addc_u32_e32 v5, vcc, v5, v7, vcc
+; GISEL-NEXT:    v_add_i32_e32 v3, vcc, v3, v8
+; GISEL-NEXT:    v_addc_u32_e32 v5, vcc, 0, v5, vcc
+; GISEL-NEXT:    v_mul_lo_u32 v7, v13, v3
+; GISEL-NEXT:    v_mul_lo_u32 v8, v2, v5
+; GISEL-NEXT:    v_mul_hi_u32 v9, v2, v3
+; GISEL-NEXT:    v_subrev_i32_e32 v0, vcc, 0, v0
+; GISEL-NEXT:    v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v7, v8
+; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v7, v9
+; GISEL-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
+; GISEL-NEXT:    v_mul_lo_u32 v9, v13, v5
+; GISEL-NEXT:    v_mul_hi_u32 v3, v13, v3
+; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v8, v7
+; GISEL-NEXT:    v_mul_hi_u32 v8, v2, v5
+; GISEL-NEXT:    v_mul_hi_u32 v5, v13, v5
+; GISEL-NEXT:    v_add_i32_e32 v3, vcc, v9, v3
+; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v3, vcc, v3, v8
+; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v9, v8
+; GISEL-NEXT:    v_add_i32_e32 v3, vcc, v3, v7
+; GISEL-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v8, v7
+; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v7
+; GISEL-NEXT:    v_mul_lo_u32 v7, v6, v3
+; GISEL-NEXT:    v_mul_lo_u32 v5, v4, v5
+; GISEL-NEXT:    v_mul_lo_u32 v8, v4, v3
+; GISEL-NEXT:    v_mul_hi_u32 v3, v4, v3
+; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v7, v5
+; GISEL-NEXT:    v_add_i32_e32 v3, vcc, v5, v3
+; GISEL-NEXT:    v_sub_i32_e32 v2, vcc, v2, v8
+; GISEL-NEXT:    v_subb_u32_e64 v5, s[4:5], v13, v3, vcc
+; GISEL-NEXT:    v_sub_i32_e64 v3, s[4:5], v13, v3
+; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v5, v6
+; GISEL-NEXT:    v_cndmask_b32_e64 v7, 0, -1, s[4:5]
+; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v2, v4
+; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, -1, s[4:5]
+; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], v5, v6
+; GISEL-NEXT:    v_subb_u32_e32 v3, vcc, v3, v6, vcc
+; GISEL-NEXT:    v_cndmask_b32_e64 v7, v7, v8, s[4:5]
+; GISEL-NEXT:    v_sub_i32_e32 v8, vcc, v2, v4
+; GISEL-NEXT:    v_subbrev_u32_e64 v9, s[4:5], 0, v3, vcc
+; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v9, v6
+; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, -1, s[4:5]
+; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v8, v4
+; GISEL-NEXT:    v_subb_u32_e32 v3, vcc, v3, v6, vcc
+; GISEL-NEXT:    v_sub_i32_e32 v4, vcc, v8, v4
+; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, -1, s[4:5]
+; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], v9, v6
+; GISEL-NEXT:    v_cndmask_b32_e64 v10, v10, v11, s[4:5]
+; GISEL-NEXT:    v_subbrev_u32_e32 v3, vcc, 0, v3, vcc
+; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v10
+; GISEL-NEXT:    v_cndmask_b32_e32 v4, v8, v4, vcc
+; GISEL-NEXT:    v_cndmask_b32_e32 v3, v9, v3, vcc
+; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v7
+; GISEL-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
+; GISEL-NEXT:    v_cndmask_b32_e32 v3, v5, v3, vcc
+; GISEL-NEXT:    v_subrev_i32_e32 v2, vcc, 0, v2
+; GISEL-NEXT:    v_subbrev_u32_e32 v3, vcc, 0, v3, vcc
+; GISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; CGP-LABEL: v_srem_v2i64_24bit:
+; CGP:       ; %bb.0:
+; CGP-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CGP-NEXT:    s_mov_b32 s4, 0xffffff
+; CGP-NEXT:    v_and_b32_e32 v1, s4, v4
+; CGP-NEXT:    v_cvt_f32_i32_e32 v3, v1
+; CGP-NEXT:    v_and_b32_e32 v0, s4, v0
+; CGP-NEXT:    v_cvt_f32_i32_e32 v4, v0
+; CGP-NEXT:    v_xor_b32_e32 v7, v0, v1
+; CGP-NEXT:    v_rcp_f32_e32 v5, v3
+; CGP-NEXT:    v_ashrrev_i32_e32 v7, 30, v7
+; CGP-NEXT:    v_or_b32_e32 v7, 1, v7
+; CGP-NEXT:    v_and_b32_e32 v2, s4, v2
+; CGP-NEXT:    v_mul_f32_e32 v5, v4, v5
+; CGP-NEXT:    v_trunc_f32_e32 v5, v5
+; CGP-NEXT:    v_mad_f32 v4, -v5, v3, v4
+; CGP-NEXT:    v_cvt_i32_f32_e32 v5, v5
+; CGP-NEXT:    v_cmp_ge_f32_e64 vcc, |v4|, |v3|
+; CGP-NEXT:    v_cndmask_b32_e32 v3, 0, v7, vcc
+; CGP-NEXT:    v_add_i32_e32 v3, vcc, v5, v3
+; CGP-NEXT:    v_mul_lo_u32 v1, v3, v1
+; CGP-NEXT:    v_and_b32_e32 v3, s4, v6
+; CGP-NEXT:    v_cvt_f32_i32_e32 v4, v3
+; CGP-NEXT:    v_xor_b32_e32 v6, v2, v3
+; CGP-NEXT:    v_sub_i32_e32 v0, vcc, v0, v1
+; CGP-NEXT:    v_cvt_f32_i32_e32 v1, v2
+; CGP-NEXT:    v_rcp_f32_e32 v5, v4
+; CGP-NEXT:    v_ashrrev_i32_e32 v6, 30, v6
+; CGP-NEXT:    v_or_b32_e32 v6, 1, v6
+; CGP-NEXT:    v_lshlrev_b32_e32 v0, 7, v0
+; CGP-NEXT:    v_mul_f32_e32 v5, v1, v5
+; CGP-NEXT:    v_trunc_f32_e32 v5, v5
+; CGP-NEXT:    v_mad_f32 v1, -v5, v4, v1
+; CGP-NEXT:    v_cvt_i32_f32_e32 v5, v5
+; CGP-NEXT:    v_cmp_ge_f32_e64 vcc, |v1|, |v4|
+; CGP-NEXT:    v_cndmask_b32_e32 v1, 0, v6, vcc
+; CGP-NEXT:    v_ashrrev_i32_e32 v0, 7, v0
+; CGP-NEXT:    v_add_i32_e32 v1, vcc, v5, v1
+; CGP-NEXT:    v_mul_lo_u32 v3, v1, v3
+; CGP-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
+; CGP-NEXT:    v_sub_i32_e32 v2, vcc, v2, v3
+; CGP-NEXT:    v_lshlrev_b32_e32 v2, 7, v2
+; CGP-NEXT:    v_ashrrev_i32_e32 v2, 7, v2
+; CGP-NEXT:    v_ashrrev_i32_e32 v3, 31, v2
+; CGP-NEXT:    s_setpc_b64 s[30:31]
+  %num.mask = and <2 x i64> %num, <i64 16777215, i64 16777215>
+  %den.mask = and <2 x i64> %den, <i64 16777215, i64 16777215>
+  %result = srem <2 x i64> %num.mask, %den.mask
+  ret <2 x i64> %result
+}


        


More information about the llvm-commits mailing list