[llvm] 90a36bb - AMDGPU/GlobalISel: Legalize 64-bit G_UDIV/G_UREM

Matt Arsenault via llvm-commits llvm-commits at lists.llvm.org
Mon Mar 30 07:57:49 PDT 2020


Author: Matt Arsenault
Date: 2020-03-30T10:57:37-04:00
New Revision: 90a36bbd7c2cfae76ffe9a914bed32c45715f349

URL: https://github.com/llvm/llvm-project/commit/90a36bbd7c2cfae76ffe9a914bed32c45715f349
DIFF: https://github.com/llvm/llvm-project/commit/90a36bbd7c2cfae76ffe9a914bed32c45715f349.diff

LOG: AMDGPU/GlobalISel: Legalize 64-bit G_UDIV/G_UREM

Mostly ported from the DAG version. This results in much worse code
than the DAG version, largely due to a much worse expansion for
G_UMULH.

Added: 
    llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i64.ll
    llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll

Modified: 
    llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
    llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h
    llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-udiv.mir
    llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-urem.mir

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index 0f67af9ceeca..210aa9eaeff0 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -2529,11 +2529,176 @@ bool AMDGPULegalizerInfo::legalizeUDIV_UREM32(MachineInstr &MI,
   return true;
 }
 
+// Build integer reciprocal sequence arounud V_RCP_IFLAG_F32
+//
+// Return lo, hi of result
+//
+// %cvt.lo = G_UITOFP Val.lo
+// %cvt.hi = G_UITOFP Val.hi
+// %mad = G_FMAD %cvt.hi, 2**32, %cvt.lo
+// %rcp = G_AMDGPU_RCP_IFLAG %mad
+// %mul1 = G_FMUL %rcp, 0x5f7ffffc
+// %mul2 = G_FMUL %mul1, 2**(-32)
+// %trunc = G_INTRINSIC_TRUNC %mul2
+// %mad2 = G_FMAD %trunc, -(2**32), %mul1
+// return {G_FPTOUI %mad2, G_FPTOUI %trunc}
+static std::pair<Register, Register> emitReciprocalU64(MachineIRBuilder &B,
+                                                       Register Val) {
+  const LLT S32 = LLT::scalar(32);
+  auto Unmerge = B.buildUnmerge(S32, Val);
+
+  auto CvtLo = B.buildUITOFP(S32, Unmerge.getReg(0));
+  auto CvtHi = B.buildUITOFP(S32, Unmerge.getReg(1));
+
+  auto Mad = B.buildFMAD(S32, CvtHi, // 2**32
+                         B.buildFConstant(S32, BitsToFloat(0x4f800000)), CvtLo);
+
+  auto Rcp = B.buildInstr(AMDGPU::G_AMDGPU_RCP_IFLAG, {S32}, {Mad});
+  auto Mul1 =
+      B.buildFMul(S32, Rcp, B.buildFConstant(S32, BitsToFloat(0x5f7ffffc)));
+
+  // 2**(-32)
+  auto Mul2 =
+      B.buildFMul(S32, Mul1, B.buildFConstant(S32, BitsToFloat(0x2f800000)));
+  auto Trunc = B.buildIntrinsicTrunc(S32, Mul2);
+
+  // -(2**32)
+  auto Mad2 = B.buildFMAD(S32, Trunc,
+                          B.buildFConstant(S32, BitsToFloat(0xcf800000)), Mul1);
+
+  auto ResultLo = B.buildFPTOUI(S32, Mad2);
+  auto ResultHi = B.buildFPTOUI(S32, Trunc);
+
+  return {ResultLo.getReg(0), ResultHi.getReg(0)};
+}
+
+bool AMDGPULegalizerInfo::legalizeUDIV_UREM64(MachineInstr &MI,
+                                              MachineRegisterInfo &MRI,
+                                              MachineIRBuilder &B) const {
+  B.setInstr(MI);
+
+  const bool IsDiv = MI.getOpcode() == TargetOpcode::G_UDIV;
+  const LLT S32 = LLT::scalar(32);
+  const LLT S64 = LLT::scalar(64);
+  const LLT S1 = LLT::scalar(1);
+  Register Numer = MI.getOperand(1).getReg();
+  Register Denom = MI.getOperand(2).getReg();
+  Register RcpLo, RcpHi;
+
+  std::tie(RcpLo, RcpHi) = emitReciprocalU64(B, Denom);
+
+  auto Rcp = B.buildMerge(S64, {RcpLo, RcpHi});
+
+  auto Zero64 = B.buildConstant(S64, 0);
+  auto NegDenom = B.buildSub(S64, Zero64, Denom);
+
+  auto MulLo1 = B.buildMul(S64, NegDenom, Rcp);
+  auto MulHi1 = B.buildUMulH(S64, Rcp, MulLo1);
+
+  auto UnmergeMulHi1 = B.buildUnmerge(S32, MulHi1);
+  Register MulHi1_Lo = UnmergeMulHi1.getReg(0);
+  Register MulHi1_Hi = UnmergeMulHi1.getReg(1);
+
+  auto Add1_Lo = B.buildUAddo(S32, S1, RcpLo, MulHi1_Lo);
+  auto Add1_Hi = B.buildUAdde(S32, S1, RcpHi, MulHi1_Hi, Add1_Lo.getReg(1));
+  auto Add1_HiNc = B.buildAdd(S32, RcpHi, MulHi1_Hi);
+  auto Add1 = B.buildMerge(S64, {Add1_Lo, Add1_Hi});
+
+  auto MulLo2 = B.buildMul(S64, NegDenom, Add1);
+  auto MulHi2 = B.buildUMulH(S64, Add1, MulLo2);
+  auto UnmergeMulHi2 = B.buildUnmerge(S32, MulHi2);
+  Register MulHi2_Lo = UnmergeMulHi2.getReg(0);
+  Register MulHi2_Hi = UnmergeMulHi2.getReg(1);
+
+  auto Zero32 = B.buildConstant(S32, 0);
+  auto Add2_Lo = B.buildUAddo(S32, S1, Add1_Lo, MulHi2_Lo);
+  auto Add2_HiC =
+      B.buildUAdde(S32, S1, Add1_HiNc, MulHi2_Hi, Add1_Lo.getReg(1));
+  auto Add2_Hi = B.buildUAdde(S32, S1, Add2_HiC, Zero32, Add2_Lo.getReg(1));
+  auto Add2 = B.buildMerge(S64, {Add2_Lo, Add2_Hi});
+
+  auto UnmergeNumer = B.buildUnmerge(S32, Numer);
+  Register NumerLo = UnmergeNumer.getReg(0);
+  Register NumerHi = UnmergeNumer.getReg(1);
+
+  auto MulHi3 = B.buildUMulH(S64, Numer, Add2);
+  auto Mul3 = B.buildMul(S64, Denom, MulHi3);
+  auto UnmergeMul3 = B.buildUnmerge(S32, Mul3);
+  Register Mul3_Lo = UnmergeMul3.getReg(0);
+  Register Mul3_Hi = UnmergeMul3.getReg(1);
+  auto Sub1_Lo = B.buildUSubo(S32, S1, NumerLo, Mul3_Lo);
+  auto Sub1_Hi = B.buildUSube(S32, S1, NumerHi, Mul3_Hi, Sub1_Lo.getReg(1));
+  auto Sub1_Mi = B.buildSub(S32, NumerHi, Mul3_Hi);
+  auto Sub1 = B.buildMerge(S64, {Sub1_Lo, Sub1_Hi});
+
+  auto UnmergeDenom = B.buildUnmerge(S32, Denom);
+  Register DenomLo = UnmergeDenom.getReg(0);
+  Register DenomHi = UnmergeDenom.getReg(1);
+
+  auto CmpHi = B.buildICmp(CmpInst::ICMP_UGE, S1, Sub1_Hi, DenomHi);
+  auto C1 = B.buildSExt(S32, CmpHi);
+
+  auto CmpLo = B.buildICmp(CmpInst::ICMP_UGE, S1, Sub1_Lo, DenomLo);
+  auto C2 = B.buildSExt(S32, CmpLo);
+
+  auto CmpEq = B.buildICmp(CmpInst::ICMP_EQ, S1, Sub1_Hi, DenomHi);
+  auto C3 = B.buildSelect(S32, CmpEq, C2, C1);
+
+  // TODO: Here and below portions of the code can be enclosed into if/endif.
+  // Currently control flow is unconditional and we have 4 selects after
+  // potential endif to substitute PHIs.
+
+  // if C3 != 0 ...
+  auto Sub2_Lo = B.buildUSubo(S32, S1, Sub1_Lo, DenomLo);
+  auto Sub2_Mi = B.buildUSube(S32, S1, Sub1_Mi, DenomHi, Sub1_Lo.getReg(1));
+  auto Sub2_Hi = B.buildUSube(S32, S1, Sub2_Mi, Zero32, Sub2_Lo.getReg(1));
+  auto Sub2 = B.buildMerge(S64, {Sub2_Lo, Sub2_Hi});
+
+  auto One64 = B.buildConstant(S64, 1);
+  auto Add3 = B.buildAdd(S64, MulHi3, One64);
+
+  auto C4 =
+      B.buildSExt(S32, B.buildICmp(CmpInst::ICMP_UGE, S1, Sub2_Hi, DenomHi));
+  auto C5 =
+      B.buildSExt(S32, B.buildICmp(CmpInst::ICMP_UGE, S1, Sub2_Lo, DenomLo));
+  auto C6 = B.buildSelect(
+      S32, B.buildICmp(CmpInst::ICMP_EQ, S1, Sub2_Hi, DenomHi), C5, C4);
+
+  // if (C6 != 0)
+  auto Add4 = B.buildAdd(S64, Add3, One64);
+  auto Sub3_Lo = B.buildUSubo(S32, S1, Sub2_Lo, DenomLo);
+
+  auto Sub3_Mi = B.buildUSube(S32, S1, Sub2_Mi, DenomHi, Sub2_Lo.getReg(1));
+  auto Sub3_Hi = B.buildUSube(S32, S1, Sub3_Mi, Zero32, Sub3_Lo.getReg(1));
+  auto Sub3 = B.buildMerge(S64, {Sub3_Lo, Sub3_Hi});
+
+  // endif C6
+  // endif C3
+
+  if (IsDiv) {
+    auto Sel1 = B.buildSelect(
+        S64, B.buildICmp(CmpInst::ICMP_NE, S1, C6, Zero32), Add4, Add3);
+    B.buildSelect(MI.getOperand(0),
+                  B.buildICmp(CmpInst::ICMP_NE, S1, C3, Zero32), Sel1, MulHi3);
+  } else {
+    auto Sel2 = B.buildSelect(
+        S64, B.buildICmp(CmpInst::ICMP_NE, S1, C6, Zero32), Sub3, Sub2);
+    B.buildSelect(MI.getOperand(0),
+                  B.buildICmp(CmpInst::ICMP_NE, S1, C3, Zero32), Sel2, Sub1);
+  }
+
+  MI.eraseFromParent();
+  return true;
+}
+
 bool AMDGPULegalizerInfo::legalizeUDIV_UREM(MachineInstr &MI,
                                             MachineRegisterInfo &MRI,
                                             MachineIRBuilder &B) const {
-  if (MRI.getType(MI.getOperand(0).getReg()) == LLT::scalar(32))
+  LLT Ty = MRI.getType(MI.getOperand(0).getReg());
+  if (Ty == LLT::scalar(32))
     return legalizeUDIV_UREM32(MI, MRI, B);
+  if (Ty == LLT::scalar(64))
+    return legalizeUDIV_UREM64(MI, MRI, B);
   return false;
 }
 

diff  --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h
index 270195f8eddd..48012d7a1013 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h
@@ -111,9 +111,11 @@ class AMDGPULegalizerInfo : public LegalizerInfo {
                                bool IsRem) const;
   bool legalizeUDIV_UREM32(MachineInstr &MI, MachineRegisterInfo &MRI,
                            MachineIRBuilder &B) const;
-
   bool legalizeSDIV_SREM32(MachineInstr &MI, MachineRegisterInfo &MRI,
                            MachineIRBuilder &B) const;
+
+  bool legalizeUDIV_UREM64(MachineInstr &MI, MachineRegisterInfo &MRI,
+                           MachineIRBuilder &B) const;
   bool legalizeSDIV_SREM(MachineInstr &MI, MachineRegisterInfo &MRI,
                          MachineIRBuilder &B) const;
 

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-udiv.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-udiv.mir
index 5a0d3879e55b..6230a3728273 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-udiv.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-udiv.mir
@@ -1,7 +1,7 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -O0 -run-pass=legalizer -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX6 %s
-# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer  -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX8 %s
-# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -O0 -run-pass=legalizer  -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s
+# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -O0 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX6 %s
+# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX8 %s
+# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -O0 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX9 %s
 
 ---
 name: test_udiv_s32
@@ -297,18 +297,462 @@ body: |
     ; GFX6-LABEL: name: test_udiv_s64
     ; GFX6: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
     ; GFX6: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
-    ; GFX6: [[UDIV:%[0-9]+]]:_(s64) = G_UDIV [[COPY]], [[COPY1]]
-    ; GFX6: $vgpr0_vgpr1 = COPY [[UDIV]](s64)
+    ; GFX6: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64)
+    ; GFX6: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV]](s32)
+    ; GFX6: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV1]](s32)
+    ; GFX6: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000
+    ; GFX6: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG %8(s32)
+    ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x43EFFFFF80000000
+    ; GFX6: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]]
+    ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000
+    ; GFX6: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[FMUL]], [[C2]]
+    ; GFX6: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL1]]
+    ; GFX6: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000
+    ; GFX6: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI %16(s32)
+    ; GFX6: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC]](s32)
+    ; GFX6: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; GFX6: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C4]](s64)
+    ; GFX6: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64)
+    ; GFX6: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV2]], [[UV4]]
+    ; GFX6: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV3]], [[UV5]], [[USUBO1]]
+    ; GFX6: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI]]
+    ; GFX6: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]]
+    ; GFX6: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]]
+    ; GFX6: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[FPTOUI]]
+    ; GFX6: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]]
+    ; GFX6: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]]
+    ; GFX6: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[MUL]]
+    ; GFX6: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]]
+    ; GFX6: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]]
+    ; GFX6: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]]
+    ; GFX6: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1)
+    ; GFX6: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH1]]
+    ; GFX6: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1)
+    ; GFX6: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]]
+    ; GFX6: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]]
+    ; GFX6: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL]]
+    ; GFX6: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]]
+    ; GFX6: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]]
+    ; GFX6: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1)
+    ; GFX6: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH3]]
+    ; GFX6: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1)
+    ; GFX6: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]]
+    ; GFX6: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD2]]
+    ; GFX6: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1)
+    ; GFX6: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]]
+    ; GFX6: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]]
+    ; GFX6: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[ADD4]]
+    ; GFX6: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO8]]
+    ; GFX6: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO11]]
+    ; GFX6: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[ADD5]]
+    ; GFX6: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO10]]
+    ; GFX6: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO10]]
+    ; GFX6: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE]]
+    ; GFX6: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO10]]
+    ; GFX6: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]]
+    ; GFX6: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ADD7]], [[UMULH5]]
+    ; GFX6: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[MUL6]]
+    ; GFX6: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO10]], [[ADD8]]
+    ; GFX6: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[MUL6]]
+    ; GFX6: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]]
+    ; GFX6: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1)
+    ; GFX6: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO12]], [[UMULH6]]
+    ; GFX6: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO15]](s1)
+    ; GFX6: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]]
+    ; GFX6: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[ADD8]]
+    ; GFX6: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[MUL6]]
+    ; GFX6: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[ADD8]]
+    ; GFX6: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]]
+    ; GFX6: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1)
+    ; GFX6: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH8]]
+    ; GFX6: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1)
+    ; GFX6: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]]
+    ; GFX6: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDO18]], [[ADD9]]
+    ; GFX6: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1)
+    ; GFX6: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[ADD10]], [[ZEXT9]]
+    ; GFX6: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[ADD8]]
+    ; GFX6: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[ADD11]]
+    ; GFX6: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; GFX6: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[UADDO20]]
+    ; GFX6: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[ADD6]], [[ADD12]], [[UADDO11]]
+    ; GFX6: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UADDE2]], [[C5]], [[UADDO23]]
+    ; GFX6: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64)
+    ; GFX6: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64)
+    ; GFX6: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV9]], [[UADDO22]]
+    ; GFX6: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV8]], [[UADDE4]]
+    ; GFX6: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV8]], [[UADDO22]]
+    ; GFX6: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]]
+    ; GFX6: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1)
+    ; GFX6: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO24]], [[UMULH10]]
+    ; GFX6: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO27]](s1)
+    ; GFX6: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]]
+    ; GFX6: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV9]], [[UADDE4]]
+    ; GFX6: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV9]], [[UADDO22]]
+    ; GFX6: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV8]], [[UADDE4]]
+    ; GFX6: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]]
+    ; GFX6: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1)
+    ; GFX6: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH12]]
+    ; GFX6: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1)
+    ; GFX6: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]]
+    ; GFX6: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[UADDO30]], [[ADD13]]
+    ; GFX6: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1)
+    ; GFX6: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[ADD14]], [[ZEXT14]]
+    ; GFX6: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV9]], [[UADDE4]]
+    ; GFX6: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[ADD15]]
+    ; GFX6: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO32]](s32), [[ADD16]](s32)
+    ; GFX6: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64)
+    ; GFX6: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV10]], [[UADDO32]]
+    ; GFX6: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV11]], [[UADDO32]]
+    ; GFX6: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV10]], [[ADD16]]
+    ; GFX6: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV10]], [[UADDO32]]
+    ; GFX6: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]]
+    ; GFX6: [[ADD18:%[0-9]+]]:_(s32) = G_ADD [[ADD17]], [[UMULH14]]
+    ; GFX6: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV6]], [[MUL15]]
+    ; GFX6: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV7]], [[ADD18]], [[USUBO3]]
+    ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV7]], [[ADD18]]
+    ; GFX6: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64)
+    ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV13]]
+    ; GFX6: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1)
+    ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV12]]
+    ; GFX6: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[ICMP1]](s1)
+    ; GFX6: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV13]]
+    ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SEXT1]], [[SEXT]]
+    ; GFX6: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV12]]
+    ; GFX6: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV13]], [[USUBO3]]
+    ; GFX6: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C5]], [[USUBO5]]
+    ; GFX6: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; GFX6: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C6]](s64)
+    ; GFX6: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UV14]]
+    ; GFX6: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[ADD16]], [[UV15]], [[UADDO35]]
+    ; GFX6: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO34]](s32), [[UADDE6]](s32)
+    ; GFX6: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV13]]
+    ; GFX6: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1)
+    ; GFX6: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV12]]
+    ; GFX6: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT [[ICMP4]](s1)
+    ; GFX6: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV13]]
+    ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]]
+    ; GFX6: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C6]](s64)
+    ; GFX6: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[UV16]]
+    ; GFX6: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDE6]], [[UV17]], [[UADDO37]]
+    ; GFX6: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO36]](s32), [[UADDE8]](s32)
+    ; GFX6: [[USUBO6:%[0-9]+]]:_(s32), [[USUBO7:%[0-9]+]]:_(s1) = G_USUBO [[USUBO4]], [[UV12]]
+    ; GFX6: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[UV13]], [[USUBO5]]
+    ; GFX6: [[USUBE10:%[0-9]+]]:_(s32), [[USUBE11:%[0-9]+]]:_(s1) = G_USUBE [[USUBE8]], [[C5]], [[USUBO7]]
+    ; GFX6: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C5]]
+    ; GFX6: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV2]], [[MV1]]
+    ; GFX6: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C5]]
+    ; GFX6: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV]]
+    ; GFX6: $vgpr0_vgpr1 = COPY [[SELECT3]](s64)
+    ; GFX6: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC]], [[C3]]
+    ; GFX6: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL2]], [[FMUL]]
+    ; GFX6: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP1]], [[C]]
+    ; GFX6: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[UITOFP]]
     ; GFX8-LABEL: name: test_udiv_s64
     ; GFX8: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
     ; GFX8: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
-    ; GFX8: [[UDIV:%[0-9]+]]:_(s64) = G_UDIV [[COPY]], [[COPY1]]
-    ; GFX8: $vgpr0_vgpr1 = COPY [[UDIV]](s64)
+    ; GFX8: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64)
+    ; GFX8: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV]](s32)
+    ; GFX8: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV1]](s32)
+    ; GFX8: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000
+    ; GFX8: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG %8(s32)
+    ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x43EFFFFF80000000
+    ; GFX8: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]]
+    ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000
+    ; GFX8: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[FMUL]], [[C2]]
+    ; GFX8: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL1]]
+    ; GFX8: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000
+    ; GFX8: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI %16(s32)
+    ; GFX8: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC]](s32)
+    ; GFX8: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; GFX8: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C4]](s64)
+    ; GFX8: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64)
+    ; GFX8: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV2]], [[UV4]]
+    ; GFX8: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV3]], [[UV5]], [[USUBO1]]
+    ; GFX8: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI]]
+    ; GFX8: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]]
+    ; GFX8: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]]
+    ; GFX8: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[FPTOUI]]
+    ; GFX8: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]]
+    ; GFX8: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]]
+    ; GFX8: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[MUL]]
+    ; GFX8: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]]
+    ; GFX8: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]]
+    ; GFX8: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]]
+    ; GFX8: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1)
+    ; GFX8: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH1]]
+    ; GFX8: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1)
+    ; GFX8: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]]
+    ; GFX8: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]]
+    ; GFX8: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL]]
+    ; GFX8: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]]
+    ; GFX8: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]]
+    ; GFX8: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1)
+    ; GFX8: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH3]]
+    ; GFX8: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1)
+    ; GFX8: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]]
+    ; GFX8: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD2]]
+    ; GFX8: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1)
+    ; GFX8: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]]
+    ; GFX8: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]]
+    ; GFX8: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[ADD4]]
+    ; GFX8: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO8]]
+    ; GFX8: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO11]]
+    ; GFX8: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[ADD5]]
+    ; GFX8: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO10]]
+    ; GFX8: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO10]]
+    ; GFX8: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE]]
+    ; GFX8: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO10]]
+    ; GFX8: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]]
+    ; GFX8: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ADD7]], [[UMULH5]]
+    ; GFX8: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[MUL6]]
+    ; GFX8: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO10]], [[ADD8]]
+    ; GFX8: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[MUL6]]
+    ; GFX8: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]]
+    ; GFX8: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1)
+    ; GFX8: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO12]], [[UMULH6]]
+    ; GFX8: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO15]](s1)
+    ; GFX8: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]]
+    ; GFX8: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[ADD8]]
+    ; GFX8: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[MUL6]]
+    ; GFX8: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[ADD8]]
+    ; GFX8: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]]
+    ; GFX8: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1)
+    ; GFX8: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH8]]
+    ; GFX8: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1)
+    ; GFX8: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]]
+    ; GFX8: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDO18]], [[ADD9]]
+    ; GFX8: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1)
+    ; GFX8: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[ADD10]], [[ZEXT9]]
+    ; GFX8: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[ADD8]]
+    ; GFX8: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[ADD11]]
+    ; GFX8: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; GFX8: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[UADDO20]]
+    ; GFX8: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[ADD6]], [[ADD12]], [[UADDO11]]
+    ; GFX8: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UADDE2]], [[C5]], [[UADDO23]]
+    ; GFX8: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64)
+    ; GFX8: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64)
+    ; GFX8: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV9]], [[UADDO22]]
+    ; GFX8: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV8]], [[UADDE4]]
+    ; GFX8: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV8]], [[UADDO22]]
+    ; GFX8: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]]
+    ; GFX8: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1)
+    ; GFX8: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO24]], [[UMULH10]]
+    ; GFX8: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO27]](s1)
+    ; GFX8: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]]
+    ; GFX8: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV9]], [[UADDE4]]
+    ; GFX8: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV9]], [[UADDO22]]
+    ; GFX8: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV8]], [[UADDE4]]
+    ; GFX8: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]]
+    ; GFX8: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1)
+    ; GFX8: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH12]]
+    ; GFX8: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1)
+    ; GFX8: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]]
+    ; GFX8: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[UADDO30]], [[ADD13]]
+    ; GFX8: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1)
+    ; GFX8: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[ADD14]], [[ZEXT14]]
+    ; GFX8: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV9]], [[UADDE4]]
+    ; GFX8: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[ADD15]]
+    ; GFX8: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO32]](s32), [[ADD16]](s32)
+    ; GFX8: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64)
+    ; GFX8: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV10]], [[UADDO32]]
+    ; GFX8: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV11]], [[UADDO32]]
+    ; GFX8: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV10]], [[ADD16]]
+    ; GFX8: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV10]], [[UADDO32]]
+    ; GFX8: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]]
+    ; GFX8: [[ADD18:%[0-9]+]]:_(s32) = G_ADD [[ADD17]], [[UMULH14]]
+    ; GFX8: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV6]], [[MUL15]]
+    ; GFX8: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV7]], [[ADD18]], [[USUBO3]]
+    ; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV7]], [[ADD18]]
+    ; GFX8: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64)
+    ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV13]]
+    ; GFX8: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1)
+    ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV12]]
+    ; GFX8: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[ICMP1]](s1)
+    ; GFX8: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV13]]
+    ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SEXT1]], [[SEXT]]
+    ; GFX8: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV12]]
+    ; GFX8: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV13]], [[USUBO3]]
+    ; GFX8: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C5]], [[USUBO5]]
+    ; GFX8: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; GFX8: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C6]](s64)
+    ; GFX8: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UV14]]
+    ; GFX8: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[ADD16]], [[UV15]], [[UADDO35]]
+    ; GFX8: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO34]](s32), [[UADDE6]](s32)
+    ; GFX8: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV13]]
+    ; GFX8: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1)
+    ; GFX8: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV12]]
+    ; GFX8: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT [[ICMP4]](s1)
+    ; GFX8: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV13]]
+    ; GFX8: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]]
+    ; GFX8: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C6]](s64)
+    ; GFX8: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[UV16]]
+    ; GFX8: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDE6]], [[UV17]], [[UADDO37]]
+    ; GFX8: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO36]](s32), [[UADDE8]](s32)
+    ; GFX8: [[USUBO6:%[0-9]+]]:_(s32), [[USUBO7:%[0-9]+]]:_(s1) = G_USUBO [[USUBO4]], [[UV12]]
+    ; GFX8: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[UV13]], [[USUBO5]]
+    ; GFX8: [[USUBE10:%[0-9]+]]:_(s32), [[USUBE11:%[0-9]+]]:_(s1) = G_USUBE [[USUBE8]], [[C5]], [[USUBO7]]
+    ; GFX8: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C5]]
+    ; GFX8: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV2]], [[MV1]]
+    ; GFX8: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C5]]
+    ; GFX8: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV]]
+    ; GFX8: $vgpr0_vgpr1 = COPY [[SELECT3]](s64)
+    ; GFX8: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC]], [[C3]]
+    ; GFX8: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL2]], [[FMUL]]
+    ; GFX8: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP1]], [[C]]
+    ; GFX8: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[UITOFP]]
     ; GFX9-LABEL: name: test_udiv_s64
     ; GFX9: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
     ; GFX9: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
-    ; GFX9: [[UDIV:%[0-9]+]]:_(s64) = G_UDIV [[COPY]], [[COPY1]]
-    ; GFX9: $vgpr0_vgpr1 = COPY [[UDIV]](s64)
+    ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64)
+    ; GFX9: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV]](s32)
+    ; GFX9: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV1]](s32)
+    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000
+    ; GFX9: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG %8(s32)
+    ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x43EFFFFF80000000
+    ; GFX9: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]]
+    ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000
+    ; GFX9: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[FMUL]], [[C2]]
+    ; GFX9: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL1]]
+    ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000
+    ; GFX9: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI %16(s32)
+    ; GFX9: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC]](s32)
+    ; GFX9: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; GFX9: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C4]](s64)
+    ; GFX9: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64)
+    ; GFX9: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV2]], [[UV4]]
+    ; GFX9: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV3]], [[UV5]], [[USUBO1]]
+    ; GFX9: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI]]
+    ; GFX9: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]]
+    ; GFX9: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]]
+    ; GFX9: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[FPTOUI]]
+    ; GFX9: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]]
+    ; GFX9: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]]
+    ; GFX9: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[MUL]]
+    ; GFX9: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]]
+    ; GFX9: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]]
+    ; GFX9: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]]
+    ; GFX9: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1)
+    ; GFX9: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH1]]
+    ; GFX9: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1)
+    ; GFX9: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]]
+    ; GFX9: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]]
+    ; GFX9: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL]]
+    ; GFX9: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]]
+    ; GFX9: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]]
+    ; GFX9: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1)
+    ; GFX9: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH3]]
+    ; GFX9: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1)
+    ; GFX9: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]]
+    ; GFX9: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD2]]
+    ; GFX9: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1)
+    ; GFX9: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]]
+    ; GFX9: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]]
+    ; GFX9: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[ADD4]]
+    ; GFX9: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO8]]
+    ; GFX9: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO11]]
+    ; GFX9: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[ADD5]]
+    ; GFX9: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO10]]
+    ; GFX9: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO10]]
+    ; GFX9: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE]]
+    ; GFX9: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO10]]
+    ; GFX9: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]]
+    ; GFX9: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ADD7]], [[UMULH5]]
+    ; GFX9: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[MUL6]]
+    ; GFX9: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO10]], [[ADD8]]
+    ; GFX9: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[MUL6]]
+    ; GFX9: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]]
+    ; GFX9: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1)
+    ; GFX9: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO12]], [[UMULH6]]
+    ; GFX9: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO15]](s1)
+    ; GFX9: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]]
+    ; GFX9: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[ADD8]]
+    ; GFX9: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[MUL6]]
+    ; GFX9: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[ADD8]]
+    ; GFX9: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]]
+    ; GFX9: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1)
+    ; GFX9: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH8]]
+    ; GFX9: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1)
+    ; GFX9: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]]
+    ; GFX9: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDO18]], [[ADD9]]
+    ; GFX9: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1)
+    ; GFX9: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[ADD10]], [[ZEXT9]]
+    ; GFX9: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[ADD8]]
+    ; GFX9: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[ADD11]]
+    ; GFX9: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; GFX9: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[UADDO20]]
+    ; GFX9: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[ADD6]], [[ADD12]], [[UADDO11]]
+    ; GFX9: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UADDE2]], [[C5]], [[UADDO23]]
+    ; GFX9: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64)
+    ; GFX9: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64)
+    ; GFX9: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV9]], [[UADDO22]]
+    ; GFX9: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV8]], [[UADDE4]]
+    ; GFX9: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV8]], [[UADDO22]]
+    ; GFX9: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]]
+    ; GFX9: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1)
+    ; GFX9: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO24]], [[UMULH10]]
+    ; GFX9: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO27]](s1)
+    ; GFX9: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]]
+    ; GFX9: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV9]], [[UADDE4]]
+    ; GFX9: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV9]], [[UADDO22]]
+    ; GFX9: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV8]], [[UADDE4]]
+    ; GFX9: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]]
+    ; GFX9: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1)
+    ; GFX9: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH12]]
+    ; GFX9: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1)
+    ; GFX9: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]]
+    ; GFX9: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[UADDO30]], [[ADD13]]
+    ; GFX9: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1)
+    ; GFX9: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[ADD14]], [[ZEXT14]]
+    ; GFX9: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV9]], [[UADDE4]]
+    ; GFX9: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[ADD15]]
+    ; GFX9: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO32]](s32), [[ADD16]](s32)
+    ; GFX9: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64)
+    ; GFX9: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV10]], [[UADDO32]]
+    ; GFX9: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV11]], [[UADDO32]]
+    ; GFX9: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV10]], [[ADD16]]
+    ; GFX9: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV10]], [[UADDO32]]
+    ; GFX9: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]]
+    ; GFX9: [[ADD18:%[0-9]+]]:_(s32) = G_ADD [[ADD17]], [[UMULH14]]
+    ; GFX9: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV6]], [[MUL15]]
+    ; GFX9: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV7]], [[ADD18]], [[USUBO3]]
+    ; GFX9: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV7]], [[ADD18]]
+    ; GFX9: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64)
+    ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV13]]
+    ; GFX9: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1)
+    ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV12]]
+    ; GFX9: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[ICMP1]](s1)
+    ; GFX9: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV13]]
+    ; GFX9: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SEXT1]], [[SEXT]]
+    ; GFX9: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV12]]
+    ; GFX9: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV13]], [[USUBO3]]
+    ; GFX9: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C5]], [[USUBO5]]
+    ; GFX9: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; GFX9: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C6]](s64)
+    ; GFX9: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UV14]]
+    ; GFX9: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[ADD16]], [[UV15]], [[UADDO35]]
+    ; GFX9: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO34]](s32), [[UADDE6]](s32)
+    ; GFX9: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV13]]
+    ; GFX9: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1)
+    ; GFX9: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV12]]
+    ; GFX9: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT [[ICMP4]](s1)
+    ; GFX9: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV13]]
+    ; GFX9: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]]
+    ; GFX9: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C6]](s64)
+    ; GFX9: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[UV16]]
+    ; GFX9: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDE6]], [[UV17]], [[UADDO37]]
+    ; GFX9: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO36]](s32), [[UADDE8]](s32)
+    ; GFX9: [[USUBO6:%[0-9]+]]:_(s32), [[USUBO7:%[0-9]+]]:_(s1) = G_USUBO [[USUBO4]], [[UV12]]
+    ; GFX9: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[UV13]], [[USUBO5]]
+    ; GFX9: [[USUBE10:%[0-9]+]]:_(s32), [[USUBE11:%[0-9]+]]:_(s1) = G_USUBE [[USUBE8]], [[C5]], [[USUBO7]]
+    ; GFX9: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C5]]
+    ; GFX9: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV2]], [[MV1]]
+    ; GFX9: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C5]]
+    ; GFX9: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV]]
+    ; GFX9: $vgpr0_vgpr1 = COPY [[SELECT3]](s64)
+    ; GFX9: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC]], [[C3]]
+    ; GFX9: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL2]], [[FMUL]]
+    ; GFX9: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP1]], [[C]]
+    ; GFX9: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[UITOFP]]
     %0:_(s64) = COPY $vgpr0_vgpr1
     %1:_(s64) = COPY $vgpr2_vgpr3
     %2:_(s64) = G_UDIV %0, %1
@@ -326,28 +770,907 @@ body: |
     ; GFX6: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7
     ; GFX6: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>)
     ; GFX6: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>)
-    ; GFX6: [[UDIV:%[0-9]+]]:_(s64) = G_UDIV [[UV]], [[UV2]]
-    ; GFX6: [[UDIV1:%[0-9]+]]:_(s64) = G_UDIV [[UV1]], [[UV3]]
-    ; GFX6: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[UDIV]](s64), [[UDIV1]](s64)
+    ; GFX6: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64)
+    ; GFX6: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV4]](s32)
+    ; GFX6: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV5]](s32)
+    ; GFX6: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000
+    ; GFX6: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG %250(s32)
+    ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x43EFFFFF80000000
+    ; GFX6: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]]
+    ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000
+    ; GFX6: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[FMUL]], [[C2]]
+    ; GFX6: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL1]]
+    ; GFX6: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000
+    ; GFX6: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI %258(s32)
+    ; GFX6: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC]](s32)
+    ; GFX6: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; GFX6: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C4]](s64)
+    ; GFX6: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64)
+    ; GFX6: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV6]], [[UV8]]
+    ; GFX6: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV7]], [[UV9]], [[USUBO1]]
+    ; GFX6: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI]]
+    ; GFX6: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]]
+    ; GFX6: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]]
+    ; GFX6: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[FPTOUI]]
+    ; GFX6: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]]
+    ; GFX6: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]]
+    ; GFX6: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[MUL]]
+    ; GFX6: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]]
+    ; GFX6: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]]
+    ; GFX6: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]]
+    ; GFX6: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1)
+    ; GFX6: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH1]]
+    ; GFX6: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1)
+    ; GFX6: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]]
+    ; GFX6: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]]
+    ; GFX6: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL]]
+    ; GFX6: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]]
+    ; GFX6: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]]
+    ; GFX6: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1)
+    ; GFX6: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH3]]
+    ; GFX6: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1)
+    ; GFX6: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]]
+    ; GFX6: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD2]]
+    ; GFX6: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1)
+    ; GFX6: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]]
+    ; GFX6: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]]
+    ; GFX6: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[ADD4]]
+    ; GFX6: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO8]]
+    ; GFX6: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO11]]
+    ; GFX6: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[ADD5]]
+    ; GFX6: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO10]]
+    ; GFX6: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO10]]
+    ; GFX6: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE]]
+    ; GFX6: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO10]]
+    ; GFX6: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]]
+    ; GFX6: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ADD7]], [[UMULH5]]
+    ; GFX6: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[MUL6]]
+    ; GFX6: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO10]], [[ADD8]]
+    ; GFX6: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[MUL6]]
+    ; GFX6: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]]
+    ; GFX6: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1)
+    ; GFX6: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO12]], [[UMULH6]]
+    ; GFX6: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO15]](s1)
+    ; GFX6: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]]
+    ; GFX6: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[ADD8]]
+    ; GFX6: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[MUL6]]
+    ; GFX6: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[ADD8]]
+    ; GFX6: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]]
+    ; GFX6: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1)
+    ; GFX6: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH8]]
+    ; GFX6: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1)
+    ; GFX6: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]]
+    ; GFX6: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDO18]], [[ADD9]]
+    ; GFX6: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1)
+    ; GFX6: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[ADD10]], [[ZEXT9]]
+    ; GFX6: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[ADD8]]
+    ; GFX6: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[ADD11]]
+    ; GFX6: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; GFX6: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[UADDO20]]
+    ; GFX6: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[ADD6]], [[ADD12]], [[UADDO11]]
+    ; GFX6: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UADDE2]], [[C5]], [[UADDO23]]
+    ; GFX6: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64)
+    ; GFX6: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64)
+    ; GFX6: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV13]], [[UADDO22]]
+    ; GFX6: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV12]], [[UADDE4]]
+    ; GFX6: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV12]], [[UADDO22]]
+    ; GFX6: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]]
+    ; GFX6: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1)
+    ; GFX6: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO24]], [[UMULH10]]
+    ; GFX6: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO27]](s1)
+    ; GFX6: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]]
+    ; GFX6: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV13]], [[UADDE4]]
+    ; GFX6: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV13]], [[UADDO22]]
+    ; GFX6: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV12]], [[UADDE4]]
+    ; GFX6: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]]
+    ; GFX6: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1)
+    ; GFX6: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH12]]
+    ; GFX6: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1)
+    ; GFX6: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]]
+    ; GFX6: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[UADDO30]], [[ADD13]]
+    ; GFX6: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1)
+    ; GFX6: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[ADD14]], [[ZEXT14]]
+    ; GFX6: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV13]], [[UADDE4]]
+    ; GFX6: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[ADD15]]
+    ; GFX6: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO32]](s32), [[ADD16]](s32)
+    ; GFX6: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64)
+    ; GFX6: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV14]], [[UADDO32]]
+    ; GFX6: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV15]], [[UADDO32]]
+    ; GFX6: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV14]], [[ADD16]]
+    ; GFX6: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV14]], [[UADDO32]]
+    ; GFX6: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]]
+    ; GFX6: [[ADD18:%[0-9]+]]:_(s32) = G_ADD [[ADD17]], [[UMULH14]]
+    ; GFX6: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV10]], [[MUL15]]
+    ; GFX6: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV11]], [[ADD18]], [[USUBO3]]
+    ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV11]], [[ADD18]]
+    ; GFX6: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64)
+    ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV17]]
+    ; GFX6: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1)
+    ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV16]]
+    ; GFX6: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[ICMP1]](s1)
+    ; GFX6: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV17]]
+    ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SEXT1]], [[SEXT]]
+    ; GFX6: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV16]]
+    ; GFX6: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV17]], [[USUBO3]]
+    ; GFX6: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C5]], [[USUBO5]]
+    ; GFX6: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; GFX6: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C6]](s64)
+    ; GFX6: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UV18]]
+    ; GFX6: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[ADD16]], [[UV19]], [[UADDO35]]
+    ; GFX6: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO34]](s32), [[UADDE6]](s32)
+    ; GFX6: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV17]]
+    ; GFX6: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1)
+    ; GFX6: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV16]]
+    ; GFX6: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT [[ICMP4]](s1)
+    ; GFX6: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV17]]
+    ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]]
+    ; GFX6: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C6]](s64)
+    ; GFX6: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[UV20]]
+    ; GFX6: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDE6]], [[UV21]], [[UADDO37]]
+    ; GFX6: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO36]](s32), [[UADDE8]](s32)
+    ; GFX6: [[USUBO6:%[0-9]+]]:_(s32), [[USUBO7:%[0-9]+]]:_(s1) = G_USUBO [[USUBO4]], [[UV16]]
+    ; GFX6: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[UV17]], [[USUBO5]]
+    ; GFX6: [[USUBE10:%[0-9]+]]:_(s32), [[USUBE11:%[0-9]+]]:_(s1) = G_USUBE [[USUBE8]], [[C5]], [[USUBO7]]
+    ; GFX6: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C5]]
+    ; GFX6: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV2]], [[MV1]]
+    ; GFX6: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C5]]
+    ; GFX6: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV]]
+    ; GFX6: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64)
+    ; GFX6: [[UITOFP2:%[0-9]+]]:_(s32) = G_UITOFP [[UV22]](s32)
+    ; GFX6: [[UITOFP3:%[0-9]+]]:_(s32) = G_UITOFP [[UV23]](s32)
+    ; GFX6: [[C7:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000
+    ; GFX6: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG %14(s32)
+    ; GFX6: [[C8:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x43EFFFFF80000000
+    ; GFX6: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C8]]
+    ; GFX6: [[C9:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000
+    ; GFX6: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[FMUL2]], [[C9]]
+    ; GFX6: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL3]]
+    ; GFX6: [[C10:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000
+    ; GFX6: [[FPTOUI2:%[0-9]+]]:_(s32) = G_FPTOUI %22(s32)
+    ; GFX6: [[FPTOUI3:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC1]](s32)
+    ; GFX6: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C4]](s64)
+    ; GFX6: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64)
+    ; GFX6: [[USUBO8:%[0-9]+]]:_(s32), [[USUBO9:%[0-9]+]]:_(s1) = G_USUBO [[UV24]], [[UV26]]
+    ; GFX6: [[USUBE12:%[0-9]+]]:_(s32), [[USUBE13:%[0-9]+]]:_(s1) = G_USUBE [[UV25]], [[UV27]], [[USUBO9]]
+    ; GFX6: [[MUL18:%[0-9]+]]:_(s32) = G_MUL [[USUBO8]], [[FPTOUI2]]
+    ; GFX6: [[MUL19:%[0-9]+]]:_(s32) = G_MUL [[USUBE12]], [[FPTOUI2]]
+    ; GFX6: [[MUL20:%[0-9]+]]:_(s32) = G_MUL [[USUBO8]], [[FPTOUI3]]
+    ; GFX6: [[UMULH15:%[0-9]+]]:_(s32) = G_UMULH [[USUBO8]], [[FPTOUI2]]
+    ; GFX6: [[ADD19:%[0-9]+]]:_(s32) = G_ADD [[MUL19]], [[MUL20]]
+    ; GFX6: [[ADD20:%[0-9]+]]:_(s32) = G_ADD [[ADD19]], [[UMULH15]]
+    ; GFX6: [[MUL21:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[MUL18]]
+    ; GFX6: [[MUL22:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI2]], [[ADD20]]
+    ; GFX6: [[UMULH16:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[MUL18]]
+    ; GFX6: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[MUL21]], [[MUL22]]
+    ; GFX6: [[ZEXT15:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO39]](s1)
+    ; GFX6: [[UADDO40:%[0-9]+]]:_(s32), [[UADDO41:%[0-9]+]]:_(s1) = G_UADDO [[UADDO38]], [[UMULH16]]
+    ; GFX6: [[ZEXT16:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO41]](s1)
+    ; GFX6: [[ADD21:%[0-9]+]]:_(s32) = G_ADD [[ZEXT15]], [[ZEXT16]]
+    ; GFX6: [[MUL23:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[ADD20]]
+    ; GFX6: [[UMULH17:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[MUL18]]
+    ; GFX6: [[UMULH18:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[ADD20]]
+    ; GFX6: [[UADDO42:%[0-9]+]]:_(s32), [[UADDO43:%[0-9]+]]:_(s1) = G_UADDO [[MUL23]], [[UMULH17]]
+    ; GFX6: [[ZEXT17:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO43]](s1)
+    ; GFX6: [[UADDO44:%[0-9]+]]:_(s32), [[UADDO45:%[0-9]+]]:_(s1) = G_UADDO [[UADDO42]], [[UMULH18]]
+    ; GFX6: [[ZEXT18:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO45]](s1)
+    ; GFX6: [[ADD22:%[0-9]+]]:_(s32) = G_ADD [[ZEXT17]], [[ZEXT18]]
+    ; GFX6: [[UADDO46:%[0-9]+]]:_(s32), [[UADDO47:%[0-9]+]]:_(s1) = G_UADDO [[UADDO44]], [[ADD21]]
+    ; GFX6: [[ZEXT19:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO47]](s1)
+    ; GFX6: [[ADD23:%[0-9]+]]:_(s32) = G_ADD [[ADD22]], [[ZEXT19]]
+    ; GFX6: [[UMULH19:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[ADD20]]
+    ; GFX6: [[ADD24:%[0-9]+]]:_(s32) = G_ADD [[UMULH19]], [[ADD23]]
+    ; GFX6: [[UADDO48:%[0-9]+]]:_(s32), [[UADDO49:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI2]], [[UADDO46]]
+    ; GFX6: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI3]], [[ADD24]], [[UADDO49]]
+    ; GFX6: [[ADD25:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI3]], [[ADD24]]
+    ; GFX6: [[MUL24:%[0-9]+]]:_(s32) = G_MUL [[USUBO8]], [[UADDO48]]
+    ; GFX6: [[MUL25:%[0-9]+]]:_(s32) = G_MUL [[USUBE12]], [[UADDO48]]
+    ; GFX6: [[MUL26:%[0-9]+]]:_(s32) = G_MUL [[USUBO8]], [[UADDE10]]
+    ; GFX6: [[UMULH20:%[0-9]+]]:_(s32) = G_UMULH [[USUBO8]], [[UADDO48]]
+    ; GFX6: [[ADD26:%[0-9]+]]:_(s32) = G_ADD [[MUL25]], [[MUL26]]
+    ; GFX6: [[ADD27:%[0-9]+]]:_(s32) = G_ADD [[ADD26]], [[UMULH20]]
+    ; GFX6: [[MUL27:%[0-9]+]]:_(s32) = G_MUL [[UADDE10]], [[MUL24]]
+    ; GFX6: [[MUL28:%[0-9]+]]:_(s32) = G_MUL [[UADDO48]], [[ADD27]]
+    ; GFX6: [[UMULH21:%[0-9]+]]:_(s32) = G_UMULH [[UADDO48]], [[MUL24]]
+    ; GFX6: [[UADDO50:%[0-9]+]]:_(s32), [[UADDO51:%[0-9]+]]:_(s1) = G_UADDO [[MUL27]], [[MUL28]]
+    ; GFX6: [[ZEXT20:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO51]](s1)
+    ; GFX6: [[UADDO52:%[0-9]+]]:_(s32), [[UADDO53:%[0-9]+]]:_(s1) = G_UADDO [[UADDO50]], [[UMULH21]]
+    ; GFX6: [[ZEXT21:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO53]](s1)
+    ; GFX6: [[ADD28:%[0-9]+]]:_(s32) = G_ADD [[ZEXT20]], [[ZEXT21]]
+    ; GFX6: [[MUL29:%[0-9]+]]:_(s32) = G_MUL [[UADDE10]], [[ADD27]]
+    ; GFX6: [[UMULH22:%[0-9]+]]:_(s32) = G_UMULH [[UADDE10]], [[MUL24]]
+    ; GFX6: [[UMULH23:%[0-9]+]]:_(s32) = G_UMULH [[UADDO48]], [[ADD27]]
+    ; GFX6: [[UADDO54:%[0-9]+]]:_(s32), [[UADDO55:%[0-9]+]]:_(s1) = G_UADDO [[MUL29]], [[UMULH22]]
+    ; GFX6: [[ZEXT22:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO55]](s1)
+    ; GFX6: [[UADDO56:%[0-9]+]]:_(s32), [[UADDO57:%[0-9]+]]:_(s1) = G_UADDO [[UADDO54]], [[UMULH23]]
+    ; GFX6: [[ZEXT23:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO57]](s1)
+    ; GFX6: [[ADD29:%[0-9]+]]:_(s32) = G_ADD [[ZEXT22]], [[ZEXT23]]
+    ; GFX6: [[UADDO58:%[0-9]+]]:_(s32), [[UADDO59:%[0-9]+]]:_(s1) = G_UADDO [[UADDO56]], [[ADD28]]
+    ; GFX6: [[ZEXT24:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO59]](s1)
+    ; GFX6: [[ADD30:%[0-9]+]]:_(s32) = G_ADD [[ADD29]], [[ZEXT24]]
+    ; GFX6: [[UMULH24:%[0-9]+]]:_(s32) = G_UMULH [[UADDE10]], [[ADD27]]
+    ; GFX6: [[ADD31:%[0-9]+]]:_(s32) = G_ADD [[UMULH24]], [[ADD30]]
+    ; GFX6: [[UADDO60:%[0-9]+]]:_(s32), [[UADDO61:%[0-9]+]]:_(s1) = G_UADDO [[UADDO48]], [[UADDO58]]
+    ; GFX6: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[ADD25]], [[ADD31]], [[UADDO49]]
+    ; GFX6: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UADDE12]], [[C5]], [[UADDO61]]
+    ; GFX6: [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64)
+    ; GFX6: [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64)
+    ; GFX6: [[MUL30:%[0-9]+]]:_(s32) = G_MUL [[UV31]], [[UADDO60]]
+    ; GFX6: [[MUL31:%[0-9]+]]:_(s32) = G_MUL [[UV30]], [[UADDE14]]
+    ; GFX6: [[UMULH25:%[0-9]+]]:_(s32) = G_UMULH [[UV30]], [[UADDO60]]
+    ; GFX6: [[UADDO62:%[0-9]+]]:_(s32), [[UADDO63:%[0-9]+]]:_(s1) = G_UADDO [[MUL30]], [[MUL31]]
+    ; GFX6: [[ZEXT25:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO63]](s1)
+    ; GFX6: [[UADDO64:%[0-9]+]]:_(s32), [[UADDO65:%[0-9]+]]:_(s1) = G_UADDO [[UADDO62]], [[UMULH25]]
+    ; GFX6: [[ZEXT26:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO65]](s1)
+    ; GFX6: [[ADD32:%[0-9]+]]:_(s32) = G_ADD [[ZEXT25]], [[ZEXT26]]
+    ; GFX6: [[MUL32:%[0-9]+]]:_(s32) = G_MUL [[UV31]], [[UADDE14]]
+    ; GFX6: [[UMULH26:%[0-9]+]]:_(s32) = G_UMULH [[UV31]], [[UADDO60]]
+    ; GFX6: [[UMULH27:%[0-9]+]]:_(s32) = G_UMULH [[UV30]], [[UADDE14]]
+    ; GFX6: [[UADDO66:%[0-9]+]]:_(s32), [[UADDO67:%[0-9]+]]:_(s1) = G_UADDO [[MUL32]], [[UMULH26]]
+    ; GFX6: [[ZEXT27:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO67]](s1)
+    ; GFX6: [[UADDO68:%[0-9]+]]:_(s32), [[UADDO69:%[0-9]+]]:_(s1) = G_UADDO [[UADDO66]], [[UMULH27]]
+    ; GFX6: [[ZEXT28:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO69]](s1)
+    ; GFX6: [[ADD33:%[0-9]+]]:_(s32) = G_ADD [[ZEXT27]], [[ZEXT28]]
+    ; GFX6: [[UADDO70:%[0-9]+]]:_(s32), [[UADDO71:%[0-9]+]]:_(s1) = G_UADDO [[UADDO68]], [[ADD32]]
+    ; GFX6: [[ZEXT29:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO71]](s1)
+    ; GFX6: [[ADD34:%[0-9]+]]:_(s32) = G_ADD [[ADD33]], [[ZEXT29]]
+    ; GFX6: [[UMULH28:%[0-9]+]]:_(s32) = G_UMULH [[UV31]], [[UADDE14]]
+    ; GFX6: [[ADD35:%[0-9]+]]:_(s32) = G_ADD [[UMULH28]], [[ADD34]]
+    ; GFX6: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO70]](s32), [[ADD35]](s32)
+    ; GFX6: [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64)
+    ; GFX6: [[MUL33:%[0-9]+]]:_(s32) = G_MUL [[UV32]], [[UADDO70]]
+    ; GFX6: [[MUL34:%[0-9]+]]:_(s32) = G_MUL [[UV33]], [[UADDO70]]
+    ; GFX6: [[MUL35:%[0-9]+]]:_(s32) = G_MUL [[UV32]], [[ADD35]]
+    ; GFX6: [[UMULH29:%[0-9]+]]:_(s32) = G_UMULH [[UV32]], [[UADDO70]]
+    ; GFX6: [[ADD36:%[0-9]+]]:_(s32) = G_ADD [[MUL34]], [[MUL35]]
+    ; GFX6: [[ADD37:%[0-9]+]]:_(s32) = G_ADD [[ADD36]], [[UMULH29]]
+    ; GFX6: [[USUBO10:%[0-9]+]]:_(s32), [[USUBO11:%[0-9]+]]:_(s1) = G_USUBO [[UV28]], [[MUL33]]
+    ; GFX6: [[USUBE14:%[0-9]+]]:_(s32), [[USUBE15:%[0-9]+]]:_(s1) = G_USUBE [[UV29]], [[ADD37]], [[USUBO11]]
+    ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[UV29]], [[ADD37]]
+    ; GFX6: [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64)
+    ; GFX6: [[ICMP8:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE14]](s32), [[UV35]]
+    ; GFX6: [[SEXT4:%[0-9]+]]:_(s32) = G_SEXT [[ICMP8]](s1)
+    ; GFX6: [[ICMP9:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO10]](s32), [[UV34]]
+    ; GFX6: [[SEXT5:%[0-9]+]]:_(s32) = G_SEXT [[ICMP9]](s1)
+    ; GFX6: [[ICMP10:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE14]](s32), [[UV35]]
+    ; GFX6: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP10]](s1), [[SEXT5]], [[SEXT4]]
+    ; GFX6: [[USUBO12:%[0-9]+]]:_(s32), [[USUBO13:%[0-9]+]]:_(s1) = G_USUBO [[USUBO10]], [[UV34]]
+    ; GFX6: [[USUBE16:%[0-9]+]]:_(s32), [[USUBE17:%[0-9]+]]:_(s1) = G_USUBE [[SUB1]], [[UV35]], [[USUBO11]]
+    ; GFX6: [[USUBE18:%[0-9]+]]:_(s32), [[USUBE19:%[0-9]+]]:_(s1) = G_USUBE [[USUBE16]], [[C5]], [[USUBO13]]
+    ; GFX6: [[UV36:%[0-9]+]]:_(s32), [[UV37:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C6]](s64)
+    ; GFX6: [[UADDO72:%[0-9]+]]:_(s32), [[UADDO73:%[0-9]+]]:_(s1) = G_UADDO [[UADDO70]], [[UV36]]
+    ; GFX6: [[UADDE16:%[0-9]+]]:_(s32), [[UADDE17:%[0-9]+]]:_(s1) = G_UADDE [[ADD35]], [[UV37]], [[UADDO73]]
+    ; GFX6: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO72]](s32), [[UADDE16]](s32)
+    ; GFX6: [[ICMP11:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE18]](s32), [[UV35]]
+    ; GFX6: [[SEXT6:%[0-9]+]]:_(s32) = G_SEXT [[ICMP11]](s1)
+    ; GFX6: [[ICMP12:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO12]](s32), [[UV34]]
+    ; GFX6: [[SEXT7:%[0-9]+]]:_(s32) = G_SEXT [[ICMP12]](s1)
+    ; GFX6: [[ICMP13:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE18]](s32), [[UV35]]
+    ; GFX6: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP13]](s1), [[SEXT7]], [[SEXT6]]
+    ; GFX6: [[UV38:%[0-9]+]]:_(s32), [[UV39:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C6]](s64)
+    ; GFX6: [[UADDO74:%[0-9]+]]:_(s32), [[UADDO75:%[0-9]+]]:_(s1) = G_UADDO [[UADDO72]], [[UV38]]
+    ; GFX6: [[UADDE18:%[0-9]+]]:_(s32), [[UADDE19:%[0-9]+]]:_(s1) = G_UADDE [[UADDE16]], [[UV39]], [[UADDO75]]
+    ; GFX6: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO74]](s32), [[UADDE18]](s32)
+    ; GFX6: [[USUBO14:%[0-9]+]]:_(s32), [[USUBO15:%[0-9]+]]:_(s1) = G_USUBO [[USUBO12]], [[UV34]]
+    ; GFX6: [[USUBE20:%[0-9]+]]:_(s32), [[USUBE21:%[0-9]+]]:_(s1) = G_USUBE [[USUBE16]], [[UV35]], [[USUBO13]]
+    ; GFX6: [[USUBE22:%[0-9]+]]:_(s32), [[USUBE23:%[0-9]+]]:_(s1) = G_USUBE [[USUBE20]], [[C5]], [[USUBO15]]
+    ; GFX6: [[ICMP14:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT5]](s32), [[C5]]
+    ; GFX6: [[SELECT6:%[0-9]+]]:_(s64) = G_SELECT [[ICMP14]](s1), [[MV5]], [[MV4]]
+    ; GFX6: [[ICMP15:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT4]](s32), [[C5]]
+    ; GFX6: [[SELECT7:%[0-9]+]]:_(s64) = G_SELECT [[ICMP15]](s1), [[SELECT6]], [[MV3]]
+    ; GFX6: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SELECT3]](s64), [[SELECT7]](s64)
     ; GFX6: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>)
+    ; GFX6: [[FMUL4:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC1]], [[C10]]
+    ; GFX6: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL4]], [[FMUL2]]
+    ; GFX6: [[FMUL5:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP3]], [[C7]]
+    ; GFX6: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL5]], [[UITOFP2]]
+    ; GFX6: [[FMUL6:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC]], [[C3]]
+    ; GFX6: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FMUL6]], [[FMUL]]
+    ; GFX6: [[FMUL7:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP1]], [[C]]
+    ; GFX6: [[FADD3:%[0-9]+]]:_(s32) = G_FADD [[FMUL7]], [[UITOFP]]
     ; GFX8-LABEL: name: test_udiv_v2s64
     ; GFX8: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
     ; GFX8: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7
     ; GFX8: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>)
     ; GFX8: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>)
-    ; GFX8: [[UDIV:%[0-9]+]]:_(s64) = G_UDIV [[UV]], [[UV2]]
-    ; GFX8: [[UDIV1:%[0-9]+]]:_(s64) = G_UDIV [[UV1]], [[UV3]]
-    ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[UDIV]](s64), [[UDIV1]](s64)
+    ; GFX8: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64)
+    ; GFX8: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV4]](s32)
+    ; GFX8: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV5]](s32)
+    ; GFX8: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000
+    ; GFX8: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG %250(s32)
+    ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x43EFFFFF80000000
+    ; GFX8: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]]
+    ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000
+    ; GFX8: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[FMUL]], [[C2]]
+    ; GFX8: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL1]]
+    ; GFX8: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000
+    ; GFX8: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI %258(s32)
+    ; GFX8: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC]](s32)
+    ; GFX8: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; GFX8: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C4]](s64)
+    ; GFX8: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64)
+    ; GFX8: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV6]], [[UV8]]
+    ; GFX8: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV7]], [[UV9]], [[USUBO1]]
+    ; GFX8: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI]]
+    ; GFX8: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]]
+    ; GFX8: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]]
+    ; GFX8: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[FPTOUI]]
+    ; GFX8: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]]
+    ; GFX8: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]]
+    ; GFX8: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[MUL]]
+    ; GFX8: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]]
+    ; GFX8: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]]
+    ; GFX8: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]]
+    ; GFX8: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1)
+    ; GFX8: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH1]]
+    ; GFX8: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1)
+    ; GFX8: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]]
+    ; GFX8: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]]
+    ; GFX8: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL]]
+    ; GFX8: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]]
+    ; GFX8: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]]
+    ; GFX8: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1)
+    ; GFX8: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH3]]
+    ; GFX8: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1)
+    ; GFX8: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]]
+    ; GFX8: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD2]]
+    ; GFX8: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1)
+    ; GFX8: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]]
+    ; GFX8: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]]
+    ; GFX8: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[ADD4]]
+    ; GFX8: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO8]]
+    ; GFX8: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO11]]
+    ; GFX8: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[ADD5]]
+    ; GFX8: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO10]]
+    ; GFX8: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO10]]
+    ; GFX8: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE]]
+    ; GFX8: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO10]]
+    ; GFX8: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]]
+    ; GFX8: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ADD7]], [[UMULH5]]
+    ; GFX8: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[MUL6]]
+    ; GFX8: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO10]], [[ADD8]]
+    ; GFX8: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[MUL6]]
+    ; GFX8: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]]
+    ; GFX8: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1)
+    ; GFX8: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO12]], [[UMULH6]]
+    ; GFX8: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO15]](s1)
+    ; GFX8: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]]
+    ; GFX8: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[ADD8]]
+    ; GFX8: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[MUL6]]
+    ; GFX8: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[ADD8]]
+    ; GFX8: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]]
+    ; GFX8: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1)
+    ; GFX8: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH8]]
+    ; GFX8: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1)
+    ; GFX8: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]]
+    ; GFX8: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDO18]], [[ADD9]]
+    ; GFX8: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1)
+    ; GFX8: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[ADD10]], [[ZEXT9]]
+    ; GFX8: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[ADD8]]
+    ; GFX8: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[ADD11]]
+    ; GFX8: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; GFX8: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[UADDO20]]
+    ; GFX8: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[ADD6]], [[ADD12]], [[UADDO11]]
+    ; GFX8: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UADDE2]], [[C5]], [[UADDO23]]
+    ; GFX8: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64)
+    ; GFX8: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64)
+    ; GFX8: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV13]], [[UADDO22]]
+    ; GFX8: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV12]], [[UADDE4]]
+    ; GFX8: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV12]], [[UADDO22]]
+    ; GFX8: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]]
+    ; GFX8: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1)
+    ; GFX8: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO24]], [[UMULH10]]
+    ; GFX8: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO27]](s1)
+    ; GFX8: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]]
+    ; GFX8: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV13]], [[UADDE4]]
+    ; GFX8: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV13]], [[UADDO22]]
+    ; GFX8: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV12]], [[UADDE4]]
+    ; GFX8: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]]
+    ; GFX8: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1)
+    ; GFX8: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH12]]
+    ; GFX8: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1)
+    ; GFX8: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]]
+    ; GFX8: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[UADDO30]], [[ADD13]]
+    ; GFX8: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1)
+    ; GFX8: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[ADD14]], [[ZEXT14]]
+    ; GFX8: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV13]], [[UADDE4]]
+    ; GFX8: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[ADD15]]
+    ; GFX8: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO32]](s32), [[ADD16]](s32)
+    ; GFX8: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64)
+    ; GFX8: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV14]], [[UADDO32]]
+    ; GFX8: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV15]], [[UADDO32]]
+    ; GFX8: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV14]], [[ADD16]]
+    ; GFX8: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV14]], [[UADDO32]]
+    ; GFX8: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]]
+    ; GFX8: [[ADD18:%[0-9]+]]:_(s32) = G_ADD [[ADD17]], [[UMULH14]]
+    ; GFX8: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV10]], [[MUL15]]
+    ; GFX8: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV11]], [[ADD18]], [[USUBO3]]
+    ; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV11]], [[ADD18]]
+    ; GFX8: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64)
+    ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV17]]
+    ; GFX8: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1)
+    ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV16]]
+    ; GFX8: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[ICMP1]](s1)
+    ; GFX8: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV17]]
+    ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SEXT1]], [[SEXT]]
+    ; GFX8: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV16]]
+    ; GFX8: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV17]], [[USUBO3]]
+    ; GFX8: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C5]], [[USUBO5]]
+    ; GFX8: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; GFX8: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C6]](s64)
+    ; GFX8: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UV18]]
+    ; GFX8: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[ADD16]], [[UV19]], [[UADDO35]]
+    ; GFX8: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO34]](s32), [[UADDE6]](s32)
+    ; GFX8: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV17]]
+    ; GFX8: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1)
+    ; GFX8: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV16]]
+    ; GFX8: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT [[ICMP4]](s1)
+    ; GFX8: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV17]]
+    ; GFX8: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]]
+    ; GFX8: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C6]](s64)
+    ; GFX8: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[UV20]]
+    ; GFX8: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDE6]], [[UV21]], [[UADDO37]]
+    ; GFX8: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO36]](s32), [[UADDE8]](s32)
+    ; GFX8: [[USUBO6:%[0-9]+]]:_(s32), [[USUBO7:%[0-9]+]]:_(s1) = G_USUBO [[USUBO4]], [[UV16]]
+    ; GFX8: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[UV17]], [[USUBO5]]
+    ; GFX8: [[USUBE10:%[0-9]+]]:_(s32), [[USUBE11:%[0-9]+]]:_(s1) = G_USUBE [[USUBE8]], [[C5]], [[USUBO7]]
+    ; GFX8: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C5]]
+    ; GFX8: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV2]], [[MV1]]
+    ; GFX8: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C5]]
+    ; GFX8: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV]]
+    ; GFX8: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64)
+    ; GFX8: [[UITOFP2:%[0-9]+]]:_(s32) = G_UITOFP [[UV22]](s32)
+    ; GFX8: [[UITOFP3:%[0-9]+]]:_(s32) = G_UITOFP [[UV23]](s32)
+    ; GFX8: [[C7:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000
+    ; GFX8: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG %14(s32)
+    ; GFX8: [[C8:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x43EFFFFF80000000
+    ; GFX8: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C8]]
+    ; GFX8: [[C9:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000
+    ; GFX8: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[FMUL2]], [[C9]]
+    ; GFX8: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL3]]
+    ; GFX8: [[C10:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000
+    ; GFX8: [[FPTOUI2:%[0-9]+]]:_(s32) = G_FPTOUI %22(s32)
+    ; GFX8: [[FPTOUI3:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC1]](s32)
+    ; GFX8: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C4]](s64)
+    ; GFX8: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64)
+    ; GFX8: [[USUBO8:%[0-9]+]]:_(s32), [[USUBO9:%[0-9]+]]:_(s1) = G_USUBO [[UV24]], [[UV26]]
+    ; GFX8: [[USUBE12:%[0-9]+]]:_(s32), [[USUBE13:%[0-9]+]]:_(s1) = G_USUBE [[UV25]], [[UV27]], [[USUBO9]]
+    ; GFX8: [[MUL18:%[0-9]+]]:_(s32) = G_MUL [[USUBO8]], [[FPTOUI2]]
+    ; GFX8: [[MUL19:%[0-9]+]]:_(s32) = G_MUL [[USUBE12]], [[FPTOUI2]]
+    ; GFX8: [[MUL20:%[0-9]+]]:_(s32) = G_MUL [[USUBO8]], [[FPTOUI3]]
+    ; GFX8: [[UMULH15:%[0-9]+]]:_(s32) = G_UMULH [[USUBO8]], [[FPTOUI2]]
+    ; GFX8: [[ADD19:%[0-9]+]]:_(s32) = G_ADD [[MUL19]], [[MUL20]]
+    ; GFX8: [[ADD20:%[0-9]+]]:_(s32) = G_ADD [[ADD19]], [[UMULH15]]
+    ; GFX8: [[MUL21:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[MUL18]]
+    ; GFX8: [[MUL22:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI2]], [[ADD20]]
+    ; GFX8: [[UMULH16:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[MUL18]]
+    ; GFX8: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[MUL21]], [[MUL22]]
+    ; GFX8: [[ZEXT15:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO39]](s1)
+    ; GFX8: [[UADDO40:%[0-9]+]]:_(s32), [[UADDO41:%[0-9]+]]:_(s1) = G_UADDO [[UADDO38]], [[UMULH16]]
+    ; GFX8: [[ZEXT16:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO41]](s1)
+    ; GFX8: [[ADD21:%[0-9]+]]:_(s32) = G_ADD [[ZEXT15]], [[ZEXT16]]
+    ; GFX8: [[MUL23:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[ADD20]]
+    ; GFX8: [[UMULH17:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[MUL18]]
+    ; GFX8: [[UMULH18:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[ADD20]]
+    ; GFX8: [[UADDO42:%[0-9]+]]:_(s32), [[UADDO43:%[0-9]+]]:_(s1) = G_UADDO [[MUL23]], [[UMULH17]]
+    ; GFX8: [[ZEXT17:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO43]](s1)
+    ; GFX8: [[UADDO44:%[0-9]+]]:_(s32), [[UADDO45:%[0-9]+]]:_(s1) = G_UADDO [[UADDO42]], [[UMULH18]]
+    ; GFX8: [[ZEXT18:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO45]](s1)
+    ; GFX8: [[ADD22:%[0-9]+]]:_(s32) = G_ADD [[ZEXT17]], [[ZEXT18]]
+    ; GFX8: [[UADDO46:%[0-9]+]]:_(s32), [[UADDO47:%[0-9]+]]:_(s1) = G_UADDO [[UADDO44]], [[ADD21]]
+    ; GFX8: [[ZEXT19:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO47]](s1)
+    ; GFX8: [[ADD23:%[0-9]+]]:_(s32) = G_ADD [[ADD22]], [[ZEXT19]]
+    ; GFX8: [[UMULH19:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[ADD20]]
+    ; GFX8: [[ADD24:%[0-9]+]]:_(s32) = G_ADD [[UMULH19]], [[ADD23]]
+    ; GFX8: [[UADDO48:%[0-9]+]]:_(s32), [[UADDO49:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI2]], [[UADDO46]]
+    ; GFX8: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI3]], [[ADD24]], [[UADDO49]]
+    ; GFX8: [[ADD25:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI3]], [[ADD24]]
+    ; GFX8: [[MUL24:%[0-9]+]]:_(s32) = G_MUL [[USUBO8]], [[UADDO48]]
+    ; GFX8: [[MUL25:%[0-9]+]]:_(s32) = G_MUL [[USUBE12]], [[UADDO48]]
+    ; GFX8: [[MUL26:%[0-9]+]]:_(s32) = G_MUL [[USUBO8]], [[UADDE10]]
+    ; GFX8: [[UMULH20:%[0-9]+]]:_(s32) = G_UMULH [[USUBO8]], [[UADDO48]]
+    ; GFX8: [[ADD26:%[0-9]+]]:_(s32) = G_ADD [[MUL25]], [[MUL26]]
+    ; GFX8: [[ADD27:%[0-9]+]]:_(s32) = G_ADD [[ADD26]], [[UMULH20]]
+    ; GFX8: [[MUL27:%[0-9]+]]:_(s32) = G_MUL [[UADDE10]], [[MUL24]]
+    ; GFX8: [[MUL28:%[0-9]+]]:_(s32) = G_MUL [[UADDO48]], [[ADD27]]
+    ; GFX8: [[UMULH21:%[0-9]+]]:_(s32) = G_UMULH [[UADDO48]], [[MUL24]]
+    ; GFX8: [[UADDO50:%[0-9]+]]:_(s32), [[UADDO51:%[0-9]+]]:_(s1) = G_UADDO [[MUL27]], [[MUL28]]
+    ; GFX8: [[ZEXT20:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO51]](s1)
+    ; GFX8: [[UADDO52:%[0-9]+]]:_(s32), [[UADDO53:%[0-9]+]]:_(s1) = G_UADDO [[UADDO50]], [[UMULH21]]
+    ; GFX8: [[ZEXT21:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO53]](s1)
+    ; GFX8: [[ADD28:%[0-9]+]]:_(s32) = G_ADD [[ZEXT20]], [[ZEXT21]]
+    ; GFX8: [[MUL29:%[0-9]+]]:_(s32) = G_MUL [[UADDE10]], [[ADD27]]
+    ; GFX8: [[UMULH22:%[0-9]+]]:_(s32) = G_UMULH [[UADDE10]], [[MUL24]]
+    ; GFX8: [[UMULH23:%[0-9]+]]:_(s32) = G_UMULH [[UADDO48]], [[ADD27]]
+    ; GFX8: [[UADDO54:%[0-9]+]]:_(s32), [[UADDO55:%[0-9]+]]:_(s1) = G_UADDO [[MUL29]], [[UMULH22]]
+    ; GFX8: [[ZEXT22:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO55]](s1)
+    ; GFX8: [[UADDO56:%[0-9]+]]:_(s32), [[UADDO57:%[0-9]+]]:_(s1) = G_UADDO [[UADDO54]], [[UMULH23]]
+    ; GFX8: [[ZEXT23:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO57]](s1)
+    ; GFX8: [[ADD29:%[0-9]+]]:_(s32) = G_ADD [[ZEXT22]], [[ZEXT23]]
+    ; GFX8: [[UADDO58:%[0-9]+]]:_(s32), [[UADDO59:%[0-9]+]]:_(s1) = G_UADDO [[UADDO56]], [[ADD28]]
+    ; GFX8: [[ZEXT24:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO59]](s1)
+    ; GFX8: [[ADD30:%[0-9]+]]:_(s32) = G_ADD [[ADD29]], [[ZEXT24]]
+    ; GFX8: [[UMULH24:%[0-9]+]]:_(s32) = G_UMULH [[UADDE10]], [[ADD27]]
+    ; GFX8: [[ADD31:%[0-9]+]]:_(s32) = G_ADD [[UMULH24]], [[ADD30]]
+    ; GFX8: [[UADDO60:%[0-9]+]]:_(s32), [[UADDO61:%[0-9]+]]:_(s1) = G_UADDO [[UADDO48]], [[UADDO58]]
+    ; GFX8: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[ADD25]], [[ADD31]], [[UADDO49]]
+    ; GFX8: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UADDE12]], [[C5]], [[UADDO61]]
+    ; GFX8: [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64)
+    ; GFX8: [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64)
+    ; GFX8: [[MUL30:%[0-9]+]]:_(s32) = G_MUL [[UV31]], [[UADDO60]]
+    ; GFX8: [[MUL31:%[0-9]+]]:_(s32) = G_MUL [[UV30]], [[UADDE14]]
+    ; GFX8: [[UMULH25:%[0-9]+]]:_(s32) = G_UMULH [[UV30]], [[UADDO60]]
+    ; GFX8: [[UADDO62:%[0-9]+]]:_(s32), [[UADDO63:%[0-9]+]]:_(s1) = G_UADDO [[MUL30]], [[MUL31]]
+    ; GFX8: [[ZEXT25:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO63]](s1)
+    ; GFX8: [[UADDO64:%[0-9]+]]:_(s32), [[UADDO65:%[0-9]+]]:_(s1) = G_UADDO [[UADDO62]], [[UMULH25]]
+    ; GFX8: [[ZEXT26:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO65]](s1)
+    ; GFX8: [[ADD32:%[0-9]+]]:_(s32) = G_ADD [[ZEXT25]], [[ZEXT26]]
+    ; GFX8: [[MUL32:%[0-9]+]]:_(s32) = G_MUL [[UV31]], [[UADDE14]]
+    ; GFX8: [[UMULH26:%[0-9]+]]:_(s32) = G_UMULH [[UV31]], [[UADDO60]]
+    ; GFX8: [[UMULH27:%[0-9]+]]:_(s32) = G_UMULH [[UV30]], [[UADDE14]]
+    ; GFX8: [[UADDO66:%[0-9]+]]:_(s32), [[UADDO67:%[0-9]+]]:_(s1) = G_UADDO [[MUL32]], [[UMULH26]]
+    ; GFX8: [[ZEXT27:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO67]](s1)
+    ; GFX8: [[UADDO68:%[0-9]+]]:_(s32), [[UADDO69:%[0-9]+]]:_(s1) = G_UADDO [[UADDO66]], [[UMULH27]]
+    ; GFX8: [[ZEXT28:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO69]](s1)
+    ; GFX8: [[ADD33:%[0-9]+]]:_(s32) = G_ADD [[ZEXT27]], [[ZEXT28]]
+    ; GFX8: [[UADDO70:%[0-9]+]]:_(s32), [[UADDO71:%[0-9]+]]:_(s1) = G_UADDO [[UADDO68]], [[ADD32]]
+    ; GFX8: [[ZEXT29:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO71]](s1)
+    ; GFX8: [[ADD34:%[0-9]+]]:_(s32) = G_ADD [[ADD33]], [[ZEXT29]]
+    ; GFX8: [[UMULH28:%[0-9]+]]:_(s32) = G_UMULH [[UV31]], [[UADDE14]]
+    ; GFX8: [[ADD35:%[0-9]+]]:_(s32) = G_ADD [[UMULH28]], [[ADD34]]
+    ; GFX8: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO70]](s32), [[ADD35]](s32)
+    ; GFX8: [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64)
+    ; GFX8: [[MUL33:%[0-9]+]]:_(s32) = G_MUL [[UV32]], [[UADDO70]]
+    ; GFX8: [[MUL34:%[0-9]+]]:_(s32) = G_MUL [[UV33]], [[UADDO70]]
+    ; GFX8: [[MUL35:%[0-9]+]]:_(s32) = G_MUL [[UV32]], [[ADD35]]
+    ; GFX8: [[UMULH29:%[0-9]+]]:_(s32) = G_UMULH [[UV32]], [[UADDO70]]
+    ; GFX8: [[ADD36:%[0-9]+]]:_(s32) = G_ADD [[MUL34]], [[MUL35]]
+    ; GFX8: [[ADD37:%[0-9]+]]:_(s32) = G_ADD [[ADD36]], [[UMULH29]]
+    ; GFX8: [[USUBO10:%[0-9]+]]:_(s32), [[USUBO11:%[0-9]+]]:_(s1) = G_USUBO [[UV28]], [[MUL33]]
+    ; GFX8: [[USUBE14:%[0-9]+]]:_(s32), [[USUBE15:%[0-9]+]]:_(s1) = G_USUBE [[UV29]], [[ADD37]], [[USUBO11]]
+    ; GFX8: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[UV29]], [[ADD37]]
+    ; GFX8: [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64)
+    ; GFX8: [[ICMP8:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE14]](s32), [[UV35]]
+    ; GFX8: [[SEXT4:%[0-9]+]]:_(s32) = G_SEXT [[ICMP8]](s1)
+    ; GFX8: [[ICMP9:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO10]](s32), [[UV34]]
+    ; GFX8: [[SEXT5:%[0-9]+]]:_(s32) = G_SEXT [[ICMP9]](s1)
+    ; GFX8: [[ICMP10:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE14]](s32), [[UV35]]
+    ; GFX8: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP10]](s1), [[SEXT5]], [[SEXT4]]
+    ; GFX8: [[USUBO12:%[0-9]+]]:_(s32), [[USUBO13:%[0-9]+]]:_(s1) = G_USUBO [[USUBO10]], [[UV34]]
+    ; GFX8: [[USUBE16:%[0-9]+]]:_(s32), [[USUBE17:%[0-9]+]]:_(s1) = G_USUBE [[SUB1]], [[UV35]], [[USUBO11]]
+    ; GFX8: [[USUBE18:%[0-9]+]]:_(s32), [[USUBE19:%[0-9]+]]:_(s1) = G_USUBE [[USUBE16]], [[C5]], [[USUBO13]]
+    ; GFX8: [[UV36:%[0-9]+]]:_(s32), [[UV37:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C6]](s64)
+    ; GFX8: [[UADDO72:%[0-9]+]]:_(s32), [[UADDO73:%[0-9]+]]:_(s1) = G_UADDO [[UADDO70]], [[UV36]]
+    ; GFX8: [[UADDE16:%[0-9]+]]:_(s32), [[UADDE17:%[0-9]+]]:_(s1) = G_UADDE [[ADD35]], [[UV37]], [[UADDO73]]
+    ; GFX8: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO72]](s32), [[UADDE16]](s32)
+    ; GFX8: [[ICMP11:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE18]](s32), [[UV35]]
+    ; GFX8: [[SEXT6:%[0-9]+]]:_(s32) = G_SEXT [[ICMP11]](s1)
+    ; GFX8: [[ICMP12:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO12]](s32), [[UV34]]
+    ; GFX8: [[SEXT7:%[0-9]+]]:_(s32) = G_SEXT [[ICMP12]](s1)
+    ; GFX8: [[ICMP13:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE18]](s32), [[UV35]]
+    ; GFX8: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP13]](s1), [[SEXT7]], [[SEXT6]]
+    ; GFX8: [[UV38:%[0-9]+]]:_(s32), [[UV39:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C6]](s64)
+    ; GFX8: [[UADDO74:%[0-9]+]]:_(s32), [[UADDO75:%[0-9]+]]:_(s1) = G_UADDO [[UADDO72]], [[UV38]]
+    ; GFX8: [[UADDE18:%[0-9]+]]:_(s32), [[UADDE19:%[0-9]+]]:_(s1) = G_UADDE [[UADDE16]], [[UV39]], [[UADDO75]]
+    ; GFX8: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO74]](s32), [[UADDE18]](s32)
+    ; GFX8: [[USUBO14:%[0-9]+]]:_(s32), [[USUBO15:%[0-9]+]]:_(s1) = G_USUBO [[USUBO12]], [[UV34]]
+    ; GFX8: [[USUBE20:%[0-9]+]]:_(s32), [[USUBE21:%[0-9]+]]:_(s1) = G_USUBE [[USUBE16]], [[UV35]], [[USUBO13]]
+    ; GFX8: [[USUBE22:%[0-9]+]]:_(s32), [[USUBE23:%[0-9]+]]:_(s1) = G_USUBE [[USUBE20]], [[C5]], [[USUBO15]]
+    ; GFX8: [[ICMP14:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT5]](s32), [[C5]]
+    ; GFX8: [[SELECT6:%[0-9]+]]:_(s64) = G_SELECT [[ICMP14]](s1), [[MV5]], [[MV4]]
+    ; GFX8: [[ICMP15:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT4]](s32), [[C5]]
+    ; GFX8: [[SELECT7:%[0-9]+]]:_(s64) = G_SELECT [[ICMP15]](s1), [[SELECT6]], [[MV3]]
+    ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SELECT3]](s64), [[SELECT7]](s64)
     ; GFX8: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>)
+    ; GFX8: [[FMUL4:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC1]], [[C10]]
+    ; GFX8: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL4]], [[FMUL2]]
+    ; GFX8: [[FMUL5:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP3]], [[C7]]
+    ; GFX8: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL5]], [[UITOFP2]]
+    ; GFX8: [[FMUL6:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC]], [[C3]]
+    ; GFX8: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FMUL6]], [[FMUL]]
+    ; GFX8: [[FMUL7:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP1]], [[C]]
+    ; GFX8: [[FADD3:%[0-9]+]]:_(s32) = G_FADD [[FMUL7]], [[UITOFP]]
     ; GFX9-LABEL: name: test_udiv_v2s64
     ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
     ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7
     ; GFX9: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>)
     ; GFX9: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>)
-    ; GFX9: [[UDIV:%[0-9]+]]:_(s64) = G_UDIV [[UV]], [[UV2]]
-    ; GFX9: [[UDIV1:%[0-9]+]]:_(s64) = G_UDIV [[UV1]], [[UV3]]
-    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[UDIV]](s64), [[UDIV1]](s64)
+    ; GFX9: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64)
+    ; GFX9: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV4]](s32)
+    ; GFX9: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV5]](s32)
+    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000
+    ; GFX9: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG %250(s32)
+    ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x43EFFFFF80000000
+    ; GFX9: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]]
+    ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000
+    ; GFX9: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[FMUL]], [[C2]]
+    ; GFX9: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL1]]
+    ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000
+    ; GFX9: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI %258(s32)
+    ; GFX9: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC]](s32)
+    ; GFX9: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; GFX9: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C4]](s64)
+    ; GFX9: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64)
+    ; GFX9: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV6]], [[UV8]]
+    ; GFX9: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV7]], [[UV9]], [[USUBO1]]
+    ; GFX9: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI]]
+    ; GFX9: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]]
+    ; GFX9: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]]
+    ; GFX9: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[FPTOUI]]
+    ; GFX9: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]]
+    ; GFX9: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]]
+    ; GFX9: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[MUL]]
+    ; GFX9: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]]
+    ; GFX9: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]]
+    ; GFX9: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]]
+    ; GFX9: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1)
+    ; GFX9: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH1]]
+    ; GFX9: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1)
+    ; GFX9: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]]
+    ; GFX9: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]]
+    ; GFX9: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL]]
+    ; GFX9: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]]
+    ; GFX9: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]]
+    ; GFX9: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1)
+    ; GFX9: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH3]]
+    ; GFX9: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1)
+    ; GFX9: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]]
+    ; GFX9: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD2]]
+    ; GFX9: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1)
+    ; GFX9: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]]
+    ; GFX9: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]]
+    ; GFX9: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[ADD4]]
+    ; GFX9: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO8]]
+    ; GFX9: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO11]]
+    ; GFX9: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[ADD5]]
+    ; GFX9: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO10]]
+    ; GFX9: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO10]]
+    ; GFX9: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE]]
+    ; GFX9: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO10]]
+    ; GFX9: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]]
+    ; GFX9: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ADD7]], [[UMULH5]]
+    ; GFX9: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[MUL6]]
+    ; GFX9: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO10]], [[ADD8]]
+    ; GFX9: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[MUL6]]
+    ; GFX9: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]]
+    ; GFX9: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1)
+    ; GFX9: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO12]], [[UMULH6]]
+    ; GFX9: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO15]](s1)
+    ; GFX9: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]]
+    ; GFX9: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[ADD8]]
+    ; GFX9: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[MUL6]]
+    ; GFX9: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[ADD8]]
+    ; GFX9: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]]
+    ; GFX9: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1)
+    ; GFX9: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH8]]
+    ; GFX9: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1)
+    ; GFX9: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]]
+    ; GFX9: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDO18]], [[ADD9]]
+    ; GFX9: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1)
+    ; GFX9: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[ADD10]], [[ZEXT9]]
+    ; GFX9: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[ADD8]]
+    ; GFX9: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[ADD11]]
+    ; GFX9: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; GFX9: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[UADDO20]]
+    ; GFX9: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[ADD6]], [[ADD12]], [[UADDO11]]
+    ; GFX9: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UADDE2]], [[C5]], [[UADDO23]]
+    ; GFX9: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64)
+    ; GFX9: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64)
+    ; GFX9: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV13]], [[UADDO22]]
+    ; GFX9: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV12]], [[UADDE4]]
+    ; GFX9: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV12]], [[UADDO22]]
+    ; GFX9: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]]
+    ; GFX9: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1)
+    ; GFX9: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO24]], [[UMULH10]]
+    ; GFX9: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO27]](s1)
+    ; GFX9: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]]
+    ; GFX9: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV13]], [[UADDE4]]
+    ; GFX9: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV13]], [[UADDO22]]
+    ; GFX9: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV12]], [[UADDE4]]
+    ; GFX9: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]]
+    ; GFX9: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1)
+    ; GFX9: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH12]]
+    ; GFX9: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1)
+    ; GFX9: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]]
+    ; GFX9: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[UADDO30]], [[ADD13]]
+    ; GFX9: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1)
+    ; GFX9: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[ADD14]], [[ZEXT14]]
+    ; GFX9: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV13]], [[UADDE4]]
+    ; GFX9: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[ADD15]]
+    ; GFX9: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO32]](s32), [[ADD16]](s32)
+    ; GFX9: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64)
+    ; GFX9: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV14]], [[UADDO32]]
+    ; GFX9: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV15]], [[UADDO32]]
+    ; GFX9: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV14]], [[ADD16]]
+    ; GFX9: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV14]], [[UADDO32]]
+    ; GFX9: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]]
+    ; GFX9: [[ADD18:%[0-9]+]]:_(s32) = G_ADD [[ADD17]], [[UMULH14]]
+    ; GFX9: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV10]], [[MUL15]]
+    ; GFX9: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV11]], [[ADD18]], [[USUBO3]]
+    ; GFX9: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV11]], [[ADD18]]
+    ; GFX9: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64)
+    ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV17]]
+    ; GFX9: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1)
+    ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV16]]
+    ; GFX9: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[ICMP1]](s1)
+    ; GFX9: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV17]]
+    ; GFX9: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SEXT1]], [[SEXT]]
+    ; GFX9: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV16]]
+    ; GFX9: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV17]], [[USUBO3]]
+    ; GFX9: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C5]], [[USUBO5]]
+    ; GFX9: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; GFX9: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C6]](s64)
+    ; GFX9: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UV18]]
+    ; GFX9: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[ADD16]], [[UV19]], [[UADDO35]]
+    ; GFX9: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO34]](s32), [[UADDE6]](s32)
+    ; GFX9: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV17]]
+    ; GFX9: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1)
+    ; GFX9: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV16]]
+    ; GFX9: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT [[ICMP4]](s1)
+    ; GFX9: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV17]]
+    ; GFX9: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]]
+    ; GFX9: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C6]](s64)
+    ; GFX9: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[UV20]]
+    ; GFX9: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDE6]], [[UV21]], [[UADDO37]]
+    ; GFX9: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO36]](s32), [[UADDE8]](s32)
+    ; GFX9: [[USUBO6:%[0-9]+]]:_(s32), [[USUBO7:%[0-9]+]]:_(s1) = G_USUBO [[USUBO4]], [[UV16]]
+    ; GFX9: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[UV17]], [[USUBO5]]
+    ; GFX9: [[USUBE10:%[0-9]+]]:_(s32), [[USUBE11:%[0-9]+]]:_(s1) = G_USUBE [[USUBE8]], [[C5]], [[USUBO7]]
+    ; GFX9: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C5]]
+    ; GFX9: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV2]], [[MV1]]
+    ; GFX9: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C5]]
+    ; GFX9: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV]]
+    ; GFX9: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64)
+    ; GFX9: [[UITOFP2:%[0-9]+]]:_(s32) = G_UITOFP [[UV22]](s32)
+    ; GFX9: [[UITOFP3:%[0-9]+]]:_(s32) = G_UITOFP [[UV23]](s32)
+    ; GFX9: [[C7:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000
+    ; GFX9: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG %14(s32)
+    ; GFX9: [[C8:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x43EFFFFF80000000
+    ; GFX9: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C8]]
+    ; GFX9: [[C9:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000
+    ; GFX9: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[FMUL2]], [[C9]]
+    ; GFX9: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL3]]
+    ; GFX9: [[C10:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000
+    ; GFX9: [[FPTOUI2:%[0-9]+]]:_(s32) = G_FPTOUI %22(s32)
+    ; GFX9: [[FPTOUI3:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC1]](s32)
+    ; GFX9: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C4]](s64)
+    ; GFX9: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64)
+    ; GFX9: [[USUBO8:%[0-9]+]]:_(s32), [[USUBO9:%[0-9]+]]:_(s1) = G_USUBO [[UV24]], [[UV26]]
+    ; GFX9: [[USUBE12:%[0-9]+]]:_(s32), [[USUBE13:%[0-9]+]]:_(s1) = G_USUBE [[UV25]], [[UV27]], [[USUBO9]]
+    ; GFX9: [[MUL18:%[0-9]+]]:_(s32) = G_MUL [[USUBO8]], [[FPTOUI2]]
+    ; GFX9: [[MUL19:%[0-9]+]]:_(s32) = G_MUL [[USUBE12]], [[FPTOUI2]]
+    ; GFX9: [[MUL20:%[0-9]+]]:_(s32) = G_MUL [[USUBO8]], [[FPTOUI3]]
+    ; GFX9: [[UMULH15:%[0-9]+]]:_(s32) = G_UMULH [[USUBO8]], [[FPTOUI2]]
+    ; GFX9: [[ADD19:%[0-9]+]]:_(s32) = G_ADD [[MUL19]], [[MUL20]]
+    ; GFX9: [[ADD20:%[0-9]+]]:_(s32) = G_ADD [[ADD19]], [[UMULH15]]
+    ; GFX9: [[MUL21:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[MUL18]]
+    ; GFX9: [[MUL22:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI2]], [[ADD20]]
+    ; GFX9: [[UMULH16:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[MUL18]]
+    ; GFX9: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[MUL21]], [[MUL22]]
+    ; GFX9: [[ZEXT15:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO39]](s1)
+    ; GFX9: [[UADDO40:%[0-9]+]]:_(s32), [[UADDO41:%[0-9]+]]:_(s1) = G_UADDO [[UADDO38]], [[UMULH16]]
+    ; GFX9: [[ZEXT16:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO41]](s1)
+    ; GFX9: [[ADD21:%[0-9]+]]:_(s32) = G_ADD [[ZEXT15]], [[ZEXT16]]
+    ; GFX9: [[MUL23:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[ADD20]]
+    ; GFX9: [[UMULH17:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[MUL18]]
+    ; GFX9: [[UMULH18:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[ADD20]]
+    ; GFX9: [[UADDO42:%[0-9]+]]:_(s32), [[UADDO43:%[0-9]+]]:_(s1) = G_UADDO [[MUL23]], [[UMULH17]]
+    ; GFX9: [[ZEXT17:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO43]](s1)
+    ; GFX9: [[UADDO44:%[0-9]+]]:_(s32), [[UADDO45:%[0-9]+]]:_(s1) = G_UADDO [[UADDO42]], [[UMULH18]]
+    ; GFX9: [[ZEXT18:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO45]](s1)
+    ; GFX9: [[ADD22:%[0-9]+]]:_(s32) = G_ADD [[ZEXT17]], [[ZEXT18]]
+    ; GFX9: [[UADDO46:%[0-9]+]]:_(s32), [[UADDO47:%[0-9]+]]:_(s1) = G_UADDO [[UADDO44]], [[ADD21]]
+    ; GFX9: [[ZEXT19:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO47]](s1)
+    ; GFX9: [[ADD23:%[0-9]+]]:_(s32) = G_ADD [[ADD22]], [[ZEXT19]]
+    ; GFX9: [[UMULH19:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[ADD20]]
+    ; GFX9: [[ADD24:%[0-9]+]]:_(s32) = G_ADD [[UMULH19]], [[ADD23]]
+    ; GFX9: [[UADDO48:%[0-9]+]]:_(s32), [[UADDO49:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI2]], [[UADDO46]]
+    ; GFX9: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI3]], [[ADD24]], [[UADDO49]]
+    ; GFX9: [[ADD25:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI3]], [[ADD24]]
+    ; GFX9: [[MUL24:%[0-9]+]]:_(s32) = G_MUL [[USUBO8]], [[UADDO48]]
+    ; GFX9: [[MUL25:%[0-9]+]]:_(s32) = G_MUL [[USUBE12]], [[UADDO48]]
+    ; GFX9: [[MUL26:%[0-9]+]]:_(s32) = G_MUL [[USUBO8]], [[UADDE10]]
+    ; GFX9: [[UMULH20:%[0-9]+]]:_(s32) = G_UMULH [[USUBO8]], [[UADDO48]]
+    ; GFX9: [[ADD26:%[0-9]+]]:_(s32) = G_ADD [[MUL25]], [[MUL26]]
+    ; GFX9: [[ADD27:%[0-9]+]]:_(s32) = G_ADD [[ADD26]], [[UMULH20]]
+    ; GFX9: [[MUL27:%[0-9]+]]:_(s32) = G_MUL [[UADDE10]], [[MUL24]]
+    ; GFX9: [[MUL28:%[0-9]+]]:_(s32) = G_MUL [[UADDO48]], [[ADD27]]
+    ; GFX9: [[UMULH21:%[0-9]+]]:_(s32) = G_UMULH [[UADDO48]], [[MUL24]]
+    ; GFX9: [[UADDO50:%[0-9]+]]:_(s32), [[UADDO51:%[0-9]+]]:_(s1) = G_UADDO [[MUL27]], [[MUL28]]
+    ; GFX9: [[ZEXT20:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO51]](s1)
+    ; GFX9: [[UADDO52:%[0-9]+]]:_(s32), [[UADDO53:%[0-9]+]]:_(s1) = G_UADDO [[UADDO50]], [[UMULH21]]
+    ; GFX9: [[ZEXT21:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO53]](s1)
+    ; GFX9: [[ADD28:%[0-9]+]]:_(s32) = G_ADD [[ZEXT20]], [[ZEXT21]]
+    ; GFX9: [[MUL29:%[0-9]+]]:_(s32) = G_MUL [[UADDE10]], [[ADD27]]
+    ; GFX9: [[UMULH22:%[0-9]+]]:_(s32) = G_UMULH [[UADDE10]], [[MUL24]]
+    ; GFX9: [[UMULH23:%[0-9]+]]:_(s32) = G_UMULH [[UADDO48]], [[ADD27]]
+    ; GFX9: [[UADDO54:%[0-9]+]]:_(s32), [[UADDO55:%[0-9]+]]:_(s1) = G_UADDO [[MUL29]], [[UMULH22]]
+    ; GFX9: [[ZEXT22:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO55]](s1)
+    ; GFX9: [[UADDO56:%[0-9]+]]:_(s32), [[UADDO57:%[0-9]+]]:_(s1) = G_UADDO [[UADDO54]], [[UMULH23]]
+    ; GFX9: [[ZEXT23:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO57]](s1)
+    ; GFX9: [[ADD29:%[0-9]+]]:_(s32) = G_ADD [[ZEXT22]], [[ZEXT23]]
+    ; GFX9: [[UADDO58:%[0-9]+]]:_(s32), [[UADDO59:%[0-9]+]]:_(s1) = G_UADDO [[UADDO56]], [[ADD28]]
+    ; GFX9: [[ZEXT24:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO59]](s1)
+    ; GFX9: [[ADD30:%[0-9]+]]:_(s32) = G_ADD [[ADD29]], [[ZEXT24]]
+    ; GFX9: [[UMULH24:%[0-9]+]]:_(s32) = G_UMULH [[UADDE10]], [[ADD27]]
+    ; GFX9: [[ADD31:%[0-9]+]]:_(s32) = G_ADD [[UMULH24]], [[ADD30]]
+    ; GFX9: [[UADDO60:%[0-9]+]]:_(s32), [[UADDO61:%[0-9]+]]:_(s1) = G_UADDO [[UADDO48]], [[UADDO58]]
+    ; GFX9: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[ADD25]], [[ADD31]], [[UADDO49]]
+    ; GFX9: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UADDE12]], [[C5]], [[UADDO61]]
+    ; GFX9: [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64)
+    ; GFX9: [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64)
+    ; GFX9: [[MUL30:%[0-9]+]]:_(s32) = G_MUL [[UV31]], [[UADDO60]]
+    ; GFX9: [[MUL31:%[0-9]+]]:_(s32) = G_MUL [[UV30]], [[UADDE14]]
+    ; GFX9: [[UMULH25:%[0-9]+]]:_(s32) = G_UMULH [[UV30]], [[UADDO60]]
+    ; GFX9: [[UADDO62:%[0-9]+]]:_(s32), [[UADDO63:%[0-9]+]]:_(s1) = G_UADDO [[MUL30]], [[MUL31]]
+    ; GFX9: [[ZEXT25:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO63]](s1)
+    ; GFX9: [[UADDO64:%[0-9]+]]:_(s32), [[UADDO65:%[0-9]+]]:_(s1) = G_UADDO [[UADDO62]], [[UMULH25]]
+    ; GFX9: [[ZEXT26:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO65]](s1)
+    ; GFX9: [[ADD32:%[0-9]+]]:_(s32) = G_ADD [[ZEXT25]], [[ZEXT26]]
+    ; GFX9: [[MUL32:%[0-9]+]]:_(s32) = G_MUL [[UV31]], [[UADDE14]]
+    ; GFX9: [[UMULH26:%[0-9]+]]:_(s32) = G_UMULH [[UV31]], [[UADDO60]]
+    ; GFX9: [[UMULH27:%[0-9]+]]:_(s32) = G_UMULH [[UV30]], [[UADDE14]]
+    ; GFX9: [[UADDO66:%[0-9]+]]:_(s32), [[UADDO67:%[0-9]+]]:_(s1) = G_UADDO [[MUL32]], [[UMULH26]]
+    ; GFX9: [[ZEXT27:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO67]](s1)
+    ; GFX9: [[UADDO68:%[0-9]+]]:_(s32), [[UADDO69:%[0-9]+]]:_(s1) = G_UADDO [[UADDO66]], [[UMULH27]]
+    ; GFX9: [[ZEXT28:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO69]](s1)
+    ; GFX9: [[ADD33:%[0-9]+]]:_(s32) = G_ADD [[ZEXT27]], [[ZEXT28]]
+    ; GFX9: [[UADDO70:%[0-9]+]]:_(s32), [[UADDO71:%[0-9]+]]:_(s1) = G_UADDO [[UADDO68]], [[ADD32]]
+    ; GFX9: [[ZEXT29:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO71]](s1)
+    ; GFX9: [[ADD34:%[0-9]+]]:_(s32) = G_ADD [[ADD33]], [[ZEXT29]]
+    ; GFX9: [[UMULH28:%[0-9]+]]:_(s32) = G_UMULH [[UV31]], [[UADDE14]]
+    ; GFX9: [[ADD35:%[0-9]+]]:_(s32) = G_ADD [[UMULH28]], [[ADD34]]
+    ; GFX9: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO70]](s32), [[ADD35]](s32)
+    ; GFX9: [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64)
+    ; GFX9: [[MUL33:%[0-9]+]]:_(s32) = G_MUL [[UV32]], [[UADDO70]]
+    ; GFX9: [[MUL34:%[0-9]+]]:_(s32) = G_MUL [[UV33]], [[UADDO70]]
+    ; GFX9: [[MUL35:%[0-9]+]]:_(s32) = G_MUL [[UV32]], [[ADD35]]
+    ; GFX9: [[UMULH29:%[0-9]+]]:_(s32) = G_UMULH [[UV32]], [[UADDO70]]
+    ; GFX9: [[ADD36:%[0-9]+]]:_(s32) = G_ADD [[MUL34]], [[MUL35]]
+    ; GFX9: [[ADD37:%[0-9]+]]:_(s32) = G_ADD [[ADD36]], [[UMULH29]]
+    ; GFX9: [[USUBO10:%[0-9]+]]:_(s32), [[USUBO11:%[0-9]+]]:_(s1) = G_USUBO [[UV28]], [[MUL33]]
+    ; GFX9: [[USUBE14:%[0-9]+]]:_(s32), [[USUBE15:%[0-9]+]]:_(s1) = G_USUBE [[UV29]], [[ADD37]], [[USUBO11]]
+    ; GFX9: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[UV29]], [[ADD37]]
+    ; GFX9: [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64)
+    ; GFX9: [[ICMP8:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE14]](s32), [[UV35]]
+    ; GFX9: [[SEXT4:%[0-9]+]]:_(s32) = G_SEXT [[ICMP8]](s1)
+    ; GFX9: [[ICMP9:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO10]](s32), [[UV34]]
+    ; GFX9: [[SEXT5:%[0-9]+]]:_(s32) = G_SEXT [[ICMP9]](s1)
+    ; GFX9: [[ICMP10:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE14]](s32), [[UV35]]
+    ; GFX9: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP10]](s1), [[SEXT5]], [[SEXT4]]
+    ; GFX9: [[USUBO12:%[0-9]+]]:_(s32), [[USUBO13:%[0-9]+]]:_(s1) = G_USUBO [[USUBO10]], [[UV34]]
+    ; GFX9: [[USUBE16:%[0-9]+]]:_(s32), [[USUBE17:%[0-9]+]]:_(s1) = G_USUBE [[SUB1]], [[UV35]], [[USUBO11]]
+    ; GFX9: [[USUBE18:%[0-9]+]]:_(s32), [[USUBE19:%[0-9]+]]:_(s1) = G_USUBE [[USUBE16]], [[C5]], [[USUBO13]]
+    ; GFX9: [[UV36:%[0-9]+]]:_(s32), [[UV37:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C6]](s64)
+    ; GFX9: [[UADDO72:%[0-9]+]]:_(s32), [[UADDO73:%[0-9]+]]:_(s1) = G_UADDO [[UADDO70]], [[UV36]]
+    ; GFX9: [[UADDE16:%[0-9]+]]:_(s32), [[UADDE17:%[0-9]+]]:_(s1) = G_UADDE [[ADD35]], [[UV37]], [[UADDO73]]
+    ; GFX9: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO72]](s32), [[UADDE16]](s32)
+    ; GFX9: [[ICMP11:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE18]](s32), [[UV35]]
+    ; GFX9: [[SEXT6:%[0-9]+]]:_(s32) = G_SEXT [[ICMP11]](s1)
+    ; GFX9: [[ICMP12:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO12]](s32), [[UV34]]
+    ; GFX9: [[SEXT7:%[0-9]+]]:_(s32) = G_SEXT [[ICMP12]](s1)
+    ; GFX9: [[ICMP13:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE18]](s32), [[UV35]]
+    ; GFX9: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP13]](s1), [[SEXT7]], [[SEXT6]]
+    ; GFX9: [[UV38:%[0-9]+]]:_(s32), [[UV39:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C6]](s64)
+    ; GFX9: [[UADDO74:%[0-9]+]]:_(s32), [[UADDO75:%[0-9]+]]:_(s1) = G_UADDO [[UADDO72]], [[UV38]]
+    ; GFX9: [[UADDE18:%[0-9]+]]:_(s32), [[UADDE19:%[0-9]+]]:_(s1) = G_UADDE [[UADDE16]], [[UV39]], [[UADDO75]]
+    ; GFX9: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO74]](s32), [[UADDE18]](s32)
+    ; GFX9: [[USUBO14:%[0-9]+]]:_(s32), [[USUBO15:%[0-9]+]]:_(s1) = G_USUBO [[USUBO12]], [[UV34]]
+    ; GFX9: [[USUBE20:%[0-9]+]]:_(s32), [[USUBE21:%[0-9]+]]:_(s1) = G_USUBE [[USUBE16]], [[UV35]], [[USUBO13]]
+    ; GFX9: [[USUBE22:%[0-9]+]]:_(s32), [[USUBE23:%[0-9]+]]:_(s1) = G_USUBE [[USUBE20]], [[C5]], [[USUBO15]]
+    ; GFX9: [[ICMP14:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT5]](s32), [[C5]]
+    ; GFX9: [[SELECT6:%[0-9]+]]:_(s64) = G_SELECT [[ICMP14]](s1), [[MV5]], [[MV4]]
+    ; GFX9: [[ICMP15:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT4]](s32), [[C5]]
+    ; GFX9: [[SELECT7:%[0-9]+]]:_(s64) = G_SELECT [[ICMP15]](s1), [[SELECT6]], [[MV3]]
+    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SELECT3]](s64), [[SELECT7]](s64)
     ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>)
+    ; GFX9: [[FMUL4:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC1]], [[C10]]
+    ; GFX9: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL4]], [[FMUL2]]
+    ; GFX9: [[FMUL5:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP3]], [[C7]]
+    ; GFX9: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL5]], [[UITOFP2]]
+    ; GFX9: [[FMUL6:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC]], [[C3]]
+    ; GFX9: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FMUL6]], [[FMUL]]
+    ; GFX9: [[FMUL7:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP1]], [[C]]
+    ; GFX9: [[FADD3:%[0-9]+]]:_(s32) = G_FADD [[FMUL7]], [[UITOFP]]
     %0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
     %1:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7
     %2:_(<2 x s64>) = G_UDIV %0, %1
@@ -968,36 +2291,480 @@ body: |
     ; GFX6-LABEL: name: test_udiv_s33
     ; GFX6: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
     ; GFX6: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
-    ; GFX6: [[TRUNC:%[0-9]+]]:_(s33) = G_TRUNC [[COPY]](s64)
-    ; GFX6: [[TRUNC1:%[0-9]+]]:_(s33) = G_TRUNC [[COPY1]](s64)
-    ; GFX6: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[TRUNC]](s33)
-    ; GFX6: [[ZEXT1:%[0-9]+]]:_(s64) = G_ZEXT [[TRUNC1]](s33)
-    ; GFX6: [[UDIV:%[0-9]+]]:_(s64) = G_UDIV [[ZEXT]], [[ZEXT1]]
-    ; GFX6: [[TRUNC2:%[0-9]+]]:_(s33) = G_TRUNC [[UDIV]](s64)
-    ; GFX6: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[TRUNC2]](s33)
-    ; GFX6: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
+    ; GFX6: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8589934591
+    ; GFX6: [[COPY2:%[0-9]+]]:_(s64) = COPY [[COPY]](s64)
+    ; GFX6: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY2]], [[C]]
+    ; GFX6: [[COPY3:%[0-9]+]]:_(s64) = COPY [[COPY1]](s64)
+    ; GFX6: [[AND1:%[0-9]+]]:_(s64) = G_AND [[COPY3]], [[C]]
+    ; GFX6: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND1]](s64)
+    ; GFX6: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV]](s32)
+    ; GFX6: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV1]](s32)
+    ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000
+    ; GFX6: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG %14(s32)
+    ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x43EFFFFF80000000
+    ; GFX6: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]]
+    ; GFX6: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000
+    ; GFX6: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[FMUL]], [[C3]]
+    ; GFX6: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL1]]
+    ; GFX6: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000
+    ; GFX6: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI %22(s32)
+    ; GFX6: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC]](s32)
+    ; GFX6: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; GFX6: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C5]](s64)
+    ; GFX6: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND1]](s64)
+    ; GFX6: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV2]], [[UV4]]
+    ; GFX6: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV3]], [[UV5]], [[USUBO1]]
+    ; GFX6: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI]]
+    ; GFX6: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]]
+    ; GFX6: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]]
+    ; GFX6: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[FPTOUI]]
+    ; GFX6: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]]
+    ; GFX6: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]]
+    ; GFX6: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[MUL]]
+    ; GFX6: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]]
+    ; GFX6: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]]
+    ; GFX6: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]]
+    ; GFX6: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1)
+    ; GFX6: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH1]]
+    ; GFX6: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1)
+    ; GFX6: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]]
+    ; GFX6: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]]
+    ; GFX6: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL]]
+    ; GFX6: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]]
+    ; GFX6: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]]
+    ; GFX6: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1)
+    ; GFX6: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH3]]
+    ; GFX6: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1)
+    ; GFX6: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]]
+    ; GFX6: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD2]]
+    ; GFX6: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1)
+    ; GFX6: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]]
+    ; GFX6: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]]
+    ; GFX6: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[ADD4]]
+    ; GFX6: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO8]]
+    ; GFX6: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO11]]
+    ; GFX6: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[ADD5]]
+    ; GFX6: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO10]]
+    ; GFX6: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO10]]
+    ; GFX6: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE]]
+    ; GFX6: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO10]]
+    ; GFX6: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]]
+    ; GFX6: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ADD7]], [[UMULH5]]
+    ; GFX6: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[MUL6]]
+    ; GFX6: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO10]], [[ADD8]]
+    ; GFX6: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[MUL6]]
+    ; GFX6: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]]
+    ; GFX6: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1)
+    ; GFX6: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO12]], [[UMULH6]]
+    ; GFX6: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO15]](s1)
+    ; GFX6: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]]
+    ; GFX6: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[ADD8]]
+    ; GFX6: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[MUL6]]
+    ; GFX6: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[ADD8]]
+    ; GFX6: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]]
+    ; GFX6: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1)
+    ; GFX6: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH8]]
+    ; GFX6: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1)
+    ; GFX6: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]]
+    ; GFX6: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDO18]], [[ADD9]]
+    ; GFX6: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1)
+    ; GFX6: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[ADD10]], [[ZEXT9]]
+    ; GFX6: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[ADD8]]
+    ; GFX6: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[ADD11]]
+    ; GFX6: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; GFX6: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[UADDO20]]
+    ; GFX6: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[ADD6]], [[ADD12]], [[UADDO11]]
+    ; GFX6: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UADDE2]], [[C6]], [[UADDO23]]
+    ; GFX6: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND]](s64)
+    ; GFX6: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND]](s64)
+    ; GFX6: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV9]], [[UADDO22]]
+    ; GFX6: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV8]], [[UADDE4]]
+    ; GFX6: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV8]], [[UADDO22]]
+    ; GFX6: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]]
+    ; GFX6: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1)
+    ; GFX6: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO24]], [[UMULH10]]
+    ; GFX6: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO27]](s1)
+    ; GFX6: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]]
+    ; GFX6: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV9]], [[UADDE4]]
+    ; GFX6: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV9]], [[UADDO22]]
+    ; GFX6: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV8]], [[UADDE4]]
+    ; GFX6: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]]
+    ; GFX6: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1)
+    ; GFX6: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH12]]
+    ; GFX6: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1)
+    ; GFX6: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]]
+    ; GFX6: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[UADDO30]], [[ADD13]]
+    ; GFX6: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1)
+    ; GFX6: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[ADD14]], [[ZEXT14]]
+    ; GFX6: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV9]], [[UADDE4]]
+    ; GFX6: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[ADD15]]
+    ; GFX6: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO32]](s32), [[ADD16]](s32)
+    ; GFX6: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND1]](s64)
+    ; GFX6: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV10]], [[UADDO32]]
+    ; GFX6: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV11]], [[UADDO32]]
+    ; GFX6: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV10]], [[ADD16]]
+    ; GFX6: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV10]], [[UADDO32]]
+    ; GFX6: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]]
+    ; GFX6: [[ADD18:%[0-9]+]]:_(s32) = G_ADD [[ADD17]], [[UMULH14]]
+    ; GFX6: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV6]], [[MUL15]]
+    ; GFX6: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV7]], [[ADD18]], [[USUBO3]]
+    ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV7]], [[ADD18]]
+    ; GFX6: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND1]](s64)
+    ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV13]]
+    ; GFX6: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1)
+    ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV12]]
+    ; GFX6: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[ICMP1]](s1)
+    ; GFX6: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV13]]
+    ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SEXT1]], [[SEXT]]
+    ; GFX6: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV12]]
+    ; GFX6: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV13]], [[USUBO3]]
+    ; GFX6: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C6]], [[USUBO5]]
+    ; GFX6: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; GFX6: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64)
+    ; GFX6: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UV14]]
+    ; GFX6: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[ADD16]], [[UV15]], [[UADDO35]]
+    ; GFX6: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO34]](s32), [[UADDE6]](s32)
+    ; GFX6: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV13]]
+    ; GFX6: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1)
+    ; GFX6: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV12]]
+    ; GFX6: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT [[ICMP4]](s1)
+    ; GFX6: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV13]]
+    ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]]
+    ; GFX6: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64)
+    ; GFX6: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[UV16]]
+    ; GFX6: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDE6]], [[UV17]], [[UADDO37]]
+    ; GFX6: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO36]](s32), [[UADDE8]](s32)
+    ; GFX6: [[USUBO6:%[0-9]+]]:_(s32), [[USUBO7:%[0-9]+]]:_(s1) = G_USUBO [[USUBO4]], [[UV12]]
+    ; GFX6: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[UV13]], [[USUBO5]]
+    ; GFX6: [[USUBE10:%[0-9]+]]:_(s32), [[USUBE11:%[0-9]+]]:_(s1) = G_USUBE [[USUBE8]], [[C6]], [[USUBO7]]
+    ; GFX6: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C6]]
+    ; GFX6: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV2]], [[MV1]]
+    ; GFX6: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C6]]
+    ; GFX6: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV]]
+    ; GFX6: [[COPY4:%[0-9]+]]:_(s64) = COPY [[SELECT3]](s64)
+    ; GFX6: $vgpr0_vgpr1 = COPY [[COPY4]](s64)
+    ; GFX6: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC]], [[C4]]
+    ; GFX6: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL2]], [[FMUL]]
+    ; GFX6: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP1]], [[C1]]
+    ; GFX6: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[UITOFP]]
     ; GFX8-LABEL: name: test_udiv_s33
     ; GFX8: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
     ; GFX8: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
-    ; GFX8: [[TRUNC:%[0-9]+]]:_(s33) = G_TRUNC [[COPY]](s64)
-    ; GFX8: [[TRUNC1:%[0-9]+]]:_(s33) = G_TRUNC [[COPY1]](s64)
-    ; GFX8: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[TRUNC]](s33)
-    ; GFX8: [[ZEXT1:%[0-9]+]]:_(s64) = G_ZEXT [[TRUNC1]](s33)
-    ; GFX8: [[UDIV:%[0-9]+]]:_(s64) = G_UDIV [[ZEXT]], [[ZEXT1]]
-    ; GFX8: [[TRUNC2:%[0-9]+]]:_(s33) = G_TRUNC [[UDIV]](s64)
-    ; GFX8: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[TRUNC2]](s33)
-    ; GFX8: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
+    ; GFX8: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8589934591
+    ; GFX8: [[COPY2:%[0-9]+]]:_(s64) = COPY [[COPY]](s64)
+    ; GFX8: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY2]], [[C]]
+    ; GFX8: [[COPY3:%[0-9]+]]:_(s64) = COPY [[COPY1]](s64)
+    ; GFX8: [[AND1:%[0-9]+]]:_(s64) = G_AND [[COPY3]], [[C]]
+    ; GFX8: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND1]](s64)
+    ; GFX8: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV]](s32)
+    ; GFX8: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV1]](s32)
+    ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000
+    ; GFX8: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG %14(s32)
+    ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x43EFFFFF80000000
+    ; GFX8: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]]
+    ; GFX8: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000
+    ; GFX8: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[FMUL]], [[C3]]
+    ; GFX8: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL1]]
+    ; GFX8: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000
+    ; GFX8: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI %22(s32)
+    ; GFX8: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC]](s32)
+    ; GFX8: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; GFX8: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C5]](s64)
+    ; GFX8: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND1]](s64)
+    ; GFX8: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV2]], [[UV4]]
+    ; GFX8: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV3]], [[UV5]], [[USUBO1]]
+    ; GFX8: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI]]
+    ; GFX8: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]]
+    ; GFX8: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]]
+    ; GFX8: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[FPTOUI]]
+    ; GFX8: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]]
+    ; GFX8: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]]
+    ; GFX8: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[MUL]]
+    ; GFX8: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]]
+    ; GFX8: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]]
+    ; GFX8: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]]
+    ; GFX8: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1)
+    ; GFX8: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH1]]
+    ; GFX8: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1)
+    ; GFX8: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]]
+    ; GFX8: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]]
+    ; GFX8: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL]]
+    ; GFX8: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]]
+    ; GFX8: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]]
+    ; GFX8: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1)
+    ; GFX8: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH3]]
+    ; GFX8: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1)
+    ; GFX8: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]]
+    ; GFX8: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD2]]
+    ; GFX8: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1)
+    ; GFX8: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]]
+    ; GFX8: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]]
+    ; GFX8: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[ADD4]]
+    ; GFX8: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO8]]
+    ; GFX8: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO11]]
+    ; GFX8: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[ADD5]]
+    ; GFX8: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO10]]
+    ; GFX8: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO10]]
+    ; GFX8: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE]]
+    ; GFX8: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO10]]
+    ; GFX8: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]]
+    ; GFX8: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ADD7]], [[UMULH5]]
+    ; GFX8: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[MUL6]]
+    ; GFX8: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO10]], [[ADD8]]
+    ; GFX8: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[MUL6]]
+    ; GFX8: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]]
+    ; GFX8: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1)
+    ; GFX8: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO12]], [[UMULH6]]
+    ; GFX8: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO15]](s1)
+    ; GFX8: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]]
+    ; GFX8: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[ADD8]]
+    ; GFX8: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[MUL6]]
+    ; GFX8: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[ADD8]]
+    ; GFX8: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]]
+    ; GFX8: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1)
+    ; GFX8: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH8]]
+    ; GFX8: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1)
+    ; GFX8: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]]
+    ; GFX8: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDO18]], [[ADD9]]
+    ; GFX8: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1)
+    ; GFX8: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[ADD10]], [[ZEXT9]]
+    ; GFX8: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[ADD8]]
+    ; GFX8: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[ADD11]]
+    ; GFX8: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; GFX8: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[UADDO20]]
+    ; GFX8: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[ADD6]], [[ADD12]], [[UADDO11]]
+    ; GFX8: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UADDE2]], [[C6]], [[UADDO23]]
+    ; GFX8: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND]](s64)
+    ; GFX8: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND]](s64)
+    ; GFX8: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV9]], [[UADDO22]]
+    ; GFX8: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV8]], [[UADDE4]]
+    ; GFX8: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV8]], [[UADDO22]]
+    ; GFX8: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]]
+    ; GFX8: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1)
+    ; GFX8: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO24]], [[UMULH10]]
+    ; GFX8: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO27]](s1)
+    ; GFX8: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]]
+    ; GFX8: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV9]], [[UADDE4]]
+    ; GFX8: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV9]], [[UADDO22]]
+    ; GFX8: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV8]], [[UADDE4]]
+    ; GFX8: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]]
+    ; GFX8: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1)
+    ; GFX8: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH12]]
+    ; GFX8: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1)
+    ; GFX8: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]]
+    ; GFX8: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[UADDO30]], [[ADD13]]
+    ; GFX8: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1)
+    ; GFX8: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[ADD14]], [[ZEXT14]]
+    ; GFX8: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV9]], [[UADDE4]]
+    ; GFX8: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[ADD15]]
+    ; GFX8: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO32]](s32), [[ADD16]](s32)
+    ; GFX8: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND1]](s64)
+    ; GFX8: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV10]], [[UADDO32]]
+    ; GFX8: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV11]], [[UADDO32]]
+    ; GFX8: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV10]], [[ADD16]]
+    ; GFX8: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV10]], [[UADDO32]]
+    ; GFX8: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]]
+    ; GFX8: [[ADD18:%[0-9]+]]:_(s32) = G_ADD [[ADD17]], [[UMULH14]]
+    ; GFX8: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV6]], [[MUL15]]
+    ; GFX8: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV7]], [[ADD18]], [[USUBO3]]
+    ; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV7]], [[ADD18]]
+    ; GFX8: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND1]](s64)
+    ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV13]]
+    ; GFX8: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1)
+    ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV12]]
+    ; GFX8: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[ICMP1]](s1)
+    ; GFX8: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV13]]
+    ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SEXT1]], [[SEXT]]
+    ; GFX8: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV12]]
+    ; GFX8: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV13]], [[USUBO3]]
+    ; GFX8: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C6]], [[USUBO5]]
+    ; GFX8: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; GFX8: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64)
+    ; GFX8: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UV14]]
+    ; GFX8: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[ADD16]], [[UV15]], [[UADDO35]]
+    ; GFX8: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO34]](s32), [[UADDE6]](s32)
+    ; GFX8: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV13]]
+    ; GFX8: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1)
+    ; GFX8: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV12]]
+    ; GFX8: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT [[ICMP4]](s1)
+    ; GFX8: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV13]]
+    ; GFX8: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]]
+    ; GFX8: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64)
+    ; GFX8: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[UV16]]
+    ; GFX8: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDE6]], [[UV17]], [[UADDO37]]
+    ; GFX8: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO36]](s32), [[UADDE8]](s32)
+    ; GFX8: [[USUBO6:%[0-9]+]]:_(s32), [[USUBO7:%[0-9]+]]:_(s1) = G_USUBO [[USUBO4]], [[UV12]]
+    ; GFX8: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[UV13]], [[USUBO5]]
+    ; GFX8: [[USUBE10:%[0-9]+]]:_(s32), [[USUBE11:%[0-9]+]]:_(s1) = G_USUBE [[USUBE8]], [[C6]], [[USUBO7]]
+    ; GFX8: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C6]]
+    ; GFX8: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV2]], [[MV1]]
+    ; GFX8: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C6]]
+    ; GFX8: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV]]
+    ; GFX8: [[COPY4:%[0-9]+]]:_(s64) = COPY [[SELECT3]](s64)
+    ; GFX8: $vgpr0_vgpr1 = COPY [[COPY4]](s64)
+    ; GFX8: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC]], [[C4]]
+    ; GFX8: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL2]], [[FMUL]]
+    ; GFX8: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP1]], [[C1]]
+    ; GFX8: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[UITOFP]]
     ; GFX9-LABEL: name: test_udiv_s33
     ; GFX9: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
     ; GFX9: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
-    ; GFX9: [[TRUNC:%[0-9]+]]:_(s33) = G_TRUNC [[COPY]](s64)
-    ; GFX9: [[TRUNC1:%[0-9]+]]:_(s33) = G_TRUNC [[COPY1]](s64)
-    ; GFX9: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[TRUNC]](s33)
-    ; GFX9: [[ZEXT1:%[0-9]+]]:_(s64) = G_ZEXT [[TRUNC1]](s33)
-    ; GFX9: [[UDIV:%[0-9]+]]:_(s64) = G_UDIV [[ZEXT]], [[ZEXT1]]
-    ; GFX9: [[TRUNC2:%[0-9]+]]:_(s33) = G_TRUNC [[UDIV]](s64)
-    ; GFX9: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[TRUNC2]](s33)
-    ; GFX9: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
+    ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8589934591
+    ; GFX9: [[COPY2:%[0-9]+]]:_(s64) = COPY [[COPY]](s64)
+    ; GFX9: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY2]], [[C]]
+    ; GFX9: [[COPY3:%[0-9]+]]:_(s64) = COPY [[COPY1]](s64)
+    ; GFX9: [[AND1:%[0-9]+]]:_(s64) = G_AND [[COPY3]], [[C]]
+    ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND1]](s64)
+    ; GFX9: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV]](s32)
+    ; GFX9: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV1]](s32)
+    ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000
+    ; GFX9: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG %14(s32)
+    ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x43EFFFFF80000000
+    ; GFX9: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]]
+    ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000
+    ; GFX9: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[FMUL]], [[C3]]
+    ; GFX9: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL1]]
+    ; GFX9: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000
+    ; GFX9: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI %22(s32)
+    ; GFX9: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC]](s32)
+    ; GFX9: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; GFX9: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C5]](s64)
+    ; GFX9: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND1]](s64)
+    ; GFX9: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV2]], [[UV4]]
+    ; GFX9: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV3]], [[UV5]], [[USUBO1]]
+    ; GFX9: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI]]
+    ; GFX9: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]]
+    ; GFX9: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]]
+    ; GFX9: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[FPTOUI]]
+    ; GFX9: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]]
+    ; GFX9: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]]
+    ; GFX9: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[MUL]]
+    ; GFX9: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]]
+    ; GFX9: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]]
+    ; GFX9: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]]
+    ; GFX9: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1)
+    ; GFX9: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH1]]
+    ; GFX9: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1)
+    ; GFX9: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]]
+    ; GFX9: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]]
+    ; GFX9: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL]]
+    ; GFX9: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]]
+    ; GFX9: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]]
+    ; GFX9: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1)
+    ; GFX9: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH3]]
+    ; GFX9: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1)
+    ; GFX9: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]]
+    ; GFX9: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD2]]
+    ; GFX9: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1)
+    ; GFX9: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]]
+    ; GFX9: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]]
+    ; GFX9: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[ADD4]]
+    ; GFX9: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO8]]
+    ; GFX9: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO11]]
+    ; GFX9: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[ADD5]]
+    ; GFX9: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO10]]
+    ; GFX9: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO10]]
+    ; GFX9: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE]]
+    ; GFX9: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO10]]
+    ; GFX9: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]]
+    ; GFX9: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ADD7]], [[UMULH5]]
+    ; GFX9: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[MUL6]]
+    ; GFX9: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO10]], [[ADD8]]
+    ; GFX9: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[MUL6]]
+    ; GFX9: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]]
+    ; GFX9: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1)
+    ; GFX9: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO12]], [[UMULH6]]
+    ; GFX9: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO15]](s1)
+    ; GFX9: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]]
+    ; GFX9: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[ADD8]]
+    ; GFX9: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[MUL6]]
+    ; GFX9: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[ADD8]]
+    ; GFX9: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]]
+    ; GFX9: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1)
+    ; GFX9: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH8]]
+    ; GFX9: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1)
+    ; GFX9: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]]
+    ; GFX9: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDO18]], [[ADD9]]
+    ; GFX9: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1)
+    ; GFX9: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[ADD10]], [[ZEXT9]]
+    ; GFX9: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[ADD8]]
+    ; GFX9: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[ADD11]]
+    ; GFX9: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; GFX9: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[UADDO20]]
+    ; GFX9: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[ADD6]], [[ADD12]], [[UADDO11]]
+    ; GFX9: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UADDE2]], [[C6]], [[UADDO23]]
+    ; GFX9: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND]](s64)
+    ; GFX9: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND]](s64)
+    ; GFX9: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV9]], [[UADDO22]]
+    ; GFX9: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV8]], [[UADDE4]]
+    ; GFX9: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV8]], [[UADDO22]]
+    ; GFX9: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]]
+    ; GFX9: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1)
+    ; GFX9: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO24]], [[UMULH10]]
+    ; GFX9: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO27]](s1)
+    ; GFX9: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]]
+    ; GFX9: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV9]], [[UADDE4]]
+    ; GFX9: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV9]], [[UADDO22]]
+    ; GFX9: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV8]], [[UADDE4]]
+    ; GFX9: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]]
+    ; GFX9: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1)
+    ; GFX9: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH12]]
+    ; GFX9: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1)
+    ; GFX9: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]]
+    ; GFX9: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[UADDO30]], [[ADD13]]
+    ; GFX9: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1)
+    ; GFX9: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[ADD14]], [[ZEXT14]]
+    ; GFX9: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV9]], [[UADDE4]]
+    ; GFX9: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[ADD15]]
+    ; GFX9: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO32]](s32), [[ADD16]](s32)
+    ; GFX9: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND1]](s64)
+    ; GFX9: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV10]], [[UADDO32]]
+    ; GFX9: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV11]], [[UADDO32]]
+    ; GFX9: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV10]], [[ADD16]]
+    ; GFX9: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV10]], [[UADDO32]]
+    ; GFX9: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]]
+    ; GFX9: [[ADD18:%[0-9]+]]:_(s32) = G_ADD [[ADD17]], [[UMULH14]]
+    ; GFX9: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV6]], [[MUL15]]
+    ; GFX9: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV7]], [[ADD18]], [[USUBO3]]
+    ; GFX9: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV7]], [[ADD18]]
+    ; GFX9: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND1]](s64)
+    ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV13]]
+    ; GFX9: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1)
+    ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV12]]
+    ; GFX9: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[ICMP1]](s1)
+    ; GFX9: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV13]]
+    ; GFX9: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SEXT1]], [[SEXT]]
+    ; GFX9: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV12]]
+    ; GFX9: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV13]], [[USUBO3]]
+    ; GFX9: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C6]], [[USUBO5]]
+    ; GFX9: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; GFX9: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64)
+    ; GFX9: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UV14]]
+    ; GFX9: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[ADD16]], [[UV15]], [[UADDO35]]
+    ; GFX9: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO34]](s32), [[UADDE6]](s32)
+    ; GFX9: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV13]]
+    ; GFX9: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1)
+    ; GFX9: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV12]]
+    ; GFX9: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT [[ICMP4]](s1)
+    ; GFX9: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV13]]
+    ; GFX9: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]]
+    ; GFX9: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64)
+    ; GFX9: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[UV16]]
+    ; GFX9: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDE6]], [[UV17]], [[UADDO37]]
+    ; GFX9: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO36]](s32), [[UADDE8]](s32)
+    ; GFX9: [[USUBO6:%[0-9]+]]:_(s32), [[USUBO7:%[0-9]+]]:_(s1) = G_USUBO [[USUBO4]], [[UV12]]
+    ; GFX9: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[UV13]], [[USUBO5]]
+    ; GFX9: [[USUBE10:%[0-9]+]]:_(s32), [[USUBE11:%[0-9]+]]:_(s1) = G_USUBE [[USUBE8]], [[C6]], [[USUBO7]]
+    ; GFX9: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C6]]
+    ; GFX9: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV2]], [[MV1]]
+    ; GFX9: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C6]]
+    ; GFX9: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV]]
+    ; GFX9: [[COPY4:%[0-9]+]]:_(s64) = COPY [[SELECT3]](s64)
+    ; GFX9: $vgpr0_vgpr1 = COPY [[COPY4]](s64)
+    ; GFX9: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC]], [[C4]]
+    ; GFX9: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL2]], [[FMUL]]
+    ; GFX9: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP1]], [[C1]]
+    ; GFX9: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[UITOFP]]
     %0:_(s64) = COPY $vgpr0_vgpr1
     %1:_(s64) = COPY $vgpr2_vgpr3
     %2:_(s33) = G_TRUNC %0

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-urem.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-urem.mir
index 973cbb61c588..dfc52a8a7ce5 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-urem.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-urem.mir
@@ -1,7 +1,7 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -O0 -run-pass=legalizer -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX6 %s
-# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer  -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX8 %s
-# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -O0 -run-pass=legalizer  -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s
+# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -O0 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX6 %s
+# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX8 %s
+# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -O0 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX9 %s
 
 ---
 name: test_urem_s32
@@ -291,18 +291,441 @@ body: |
     ; GFX6-LABEL: name: test_urem_s64
     ; GFX6: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
     ; GFX6: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
-    ; GFX6: [[UREM:%[0-9]+]]:_(s64) = G_UREM [[COPY]], [[COPY1]]
-    ; GFX6: $vgpr0_vgpr1 = COPY [[UREM]](s64)
+    ; GFX6: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64)
+    ; GFX6: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV]](s32)
+    ; GFX6: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV1]](s32)
+    ; GFX6: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000
+    ; GFX6: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG %8(s32)
+    ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x43EFFFFF80000000
+    ; GFX6: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]]
+    ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000
+    ; GFX6: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[FMUL]], [[C2]]
+    ; GFX6: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL1]]
+    ; GFX6: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000
+    ; GFX6: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI %16(s32)
+    ; GFX6: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC]](s32)
+    ; GFX6: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; GFX6: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C4]](s64)
+    ; GFX6: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64)
+    ; GFX6: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV2]], [[UV4]]
+    ; GFX6: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV3]], [[UV5]], [[USUBO1]]
+    ; GFX6: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI]]
+    ; GFX6: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]]
+    ; GFX6: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]]
+    ; GFX6: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[FPTOUI]]
+    ; GFX6: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]]
+    ; GFX6: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]]
+    ; GFX6: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[MUL]]
+    ; GFX6: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]]
+    ; GFX6: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]]
+    ; GFX6: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]]
+    ; GFX6: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1)
+    ; GFX6: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH1]]
+    ; GFX6: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1)
+    ; GFX6: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]]
+    ; GFX6: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]]
+    ; GFX6: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL]]
+    ; GFX6: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]]
+    ; GFX6: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]]
+    ; GFX6: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1)
+    ; GFX6: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH3]]
+    ; GFX6: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1)
+    ; GFX6: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]]
+    ; GFX6: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD2]]
+    ; GFX6: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1)
+    ; GFX6: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]]
+    ; GFX6: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]]
+    ; GFX6: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[ADD4]]
+    ; GFX6: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO8]]
+    ; GFX6: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO11]]
+    ; GFX6: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[ADD5]]
+    ; GFX6: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO10]]
+    ; GFX6: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO10]]
+    ; GFX6: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE]]
+    ; GFX6: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO10]]
+    ; GFX6: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]]
+    ; GFX6: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ADD7]], [[UMULH5]]
+    ; GFX6: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[MUL6]]
+    ; GFX6: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO10]], [[ADD8]]
+    ; GFX6: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[MUL6]]
+    ; GFX6: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]]
+    ; GFX6: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1)
+    ; GFX6: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO12]], [[UMULH6]]
+    ; GFX6: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO15]](s1)
+    ; GFX6: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]]
+    ; GFX6: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[ADD8]]
+    ; GFX6: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[MUL6]]
+    ; GFX6: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[ADD8]]
+    ; GFX6: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]]
+    ; GFX6: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1)
+    ; GFX6: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH8]]
+    ; GFX6: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1)
+    ; GFX6: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]]
+    ; GFX6: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDO18]], [[ADD9]]
+    ; GFX6: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1)
+    ; GFX6: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[ADD10]], [[ZEXT9]]
+    ; GFX6: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[ADD8]]
+    ; GFX6: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[ADD11]]
+    ; GFX6: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; GFX6: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[UADDO20]]
+    ; GFX6: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[ADD6]], [[ADD12]], [[UADDO11]]
+    ; GFX6: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UADDE2]], [[C5]], [[UADDO23]]
+    ; GFX6: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64)
+    ; GFX6: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64)
+    ; GFX6: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV9]], [[UADDO22]]
+    ; GFX6: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV8]], [[UADDE4]]
+    ; GFX6: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV8]], [[UADDO22]]
+    ; GFX6: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]]
+    ; GFX6: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1)
+    ; GFX6: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO24]], [[UMULH10]]
+    ; GFX6: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO27]](s1)
+    ; GFX6: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]]
+    ; GFX6: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV9]], [[UADDE4]]
+    ; GFX6: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV9]], [[UADDO22]]
+    ; GFX6: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV8]], [[UADDE4]]
+    ; GFX6: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]]
+    ; GFX6: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1)
+    ; GFX6: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH12]]
+    ; GFX6: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1)
+    ; GFX6: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]]
+    ; GFX6: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[UADDO30]], [[ADD13]]
+    ; GFX6: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1)
+    ; GFX6: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[ADD14]], [[ZEXT14]]
+    ; GFX6: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV9]], [[UADDE4]]
+    ; GFX6: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[ADD15]]
+    ; GFX6: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64)
+    ; GFX6: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV10]], [[UADDO32]]
+    ; GFX6: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV11]], [[UADDO32]]
+    ; GFX6: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV10]], [[ADD16]]
+    ; GFX6: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV10]], [[UADDO32]]
+    ; GFX6: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]]
+    ; GFX6: [[ADD18:%[0-9]+]]:_(s32) = G_ADD [[ADD17]], [[UMULH14]]
+    ; GFX6: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV6]], [[MUL15]]
+    ; GFX6: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV7]], [[ADD18]], [[USUBO3]]
+    ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV7]], [[ADD18]]
+    ; GFX6: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO2]](s32), [[USUBE2]](s32)
+    ; GFX6: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64)
+    ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV13]]
+    ; GFX6: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1)
+    ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV12]]
+    ; GFX6: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[ICMP1]](s1)
+    ; GFX6: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV13]]
+    ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SEXT1]], [[SEXT]]
+    ; GFX6: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV12]]
+    ; GFX6: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV13]], [[USUBO3]]
+    ; GFX6: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C5]], [[USUBO5]]
+    ; GFX6: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO4]](s32), [[USUBE6]](s32)
+    ; GFX6: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV13]]
+    ; GFX6: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1)
+    ; GFX6: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV12]]
+    ; GFX6: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT [[ICMP4]](s1)
+    ; GFX6: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV13]]
+    ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]]
+    ; GFX6: [[USUBO6:%[0-9]+]]:_(s32), [[USUBO7:%[0-9]+]]:_(s1) = G_USUBO [[USUBO4]], [[UV12]]
+    ; GFX6: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[UV13]], [[USUBO5]]
+    ; GFX6: [[USUBE10:%[0-9]+]]:_(s32), [[USUBE11:%[0-9]+]]:_(s1) = G_USUBE [[USUBE8]], [[C5]], [[USUBO7]]
+    ; GFX6: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO6]](s32), [[USUBE10]](s32)
+    ; GFX6: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C5]]
+    ; GFX6: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV2]], [[MV1]]
+    ; GFX6: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C5]]
+    ; GFX6: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV]]
+    ; GFX6: $vgpr0_vgpr1 = COPY [[SELECT3]](s64)
+    ; GFX6: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC]], [[C3]]
+    ; GFX6: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL2]], [[FMUL]]
+    ; GFX6: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP1]], [[C]]
+    ; GFX6: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[UITOFP]]
     ; GFX8-LABEL: name: test_urem_s64
     ; GFX8: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
     ; GFX8: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
-    ; GFX8: [[UREM:%[0-9]+]]:_(s64) = G_UREM [[COPY]], [[COPY1]]
-    ; GFX8: $vgpr0_vgpr1 = COPY [[UREM]](s64)
+    ; GFX8: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64)
+    ; GFX8: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV]](s32)
+    ; GFX8: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV1]](s32)
+    ; GFX8: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000
+    ; GFX8: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG %8(s32)
+    ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x43EFFFFF80000000
+    ; GFX8: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]]
+    ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000
+    ; GFX8: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[FMUL]], [[C2]]
+    ; GFX8: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL1]]
+    ; GFX8: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000
+    ; GFX8: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI %16(s32)
+    ; GFX8: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC]](s32)
+    ; GFX8: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; GFX8: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C4]](s64)
+    ; GFX8: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64)
+    ; GFX8: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV2]], [[UV4]]
+    ; GFX8: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV3]], [[UV5]], [[USUBO1]]
+    ; GFX8: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI]]
+    ; GFX8: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]]
+    ; GFX8: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]]
+    ; GFX8: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[FPTOUI]]
+    ; GFX8: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]]
+    ; GFX8: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]]
+    ; GFX8: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[MUL]]
+    ; GFX8: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]]
+    ; GFX8: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]]
+    ; GFX8: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]]
+    ; GFX8: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1)
+    ; GFX8: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH1]]
+    ; GFX8: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1)
+    ; GFX8: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]]
+    ; GFX8: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]]
+    ; GFX8: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL]]
+    ; GFX8: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]]
+    ; GFX8: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]]
+    ; GFX8: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1)
+    ; GFX8: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH3]]
+    ; GFX8: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1)
+    ; GFX8: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]]
+    ; GFX8: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD2]]
+    ; GFX8: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1)
+    ; GFX8: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]]
+    ; GFX8: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]]
+    ; GFX8: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[ADD4]]
+    ; GFX8: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO8]]
+    ; GFX8: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO11]]
+    ; GFX8: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[ADD5]]
+    ; GFX8: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO10]]
+    ; GFX8: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO10]]
+    ; GFX8: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE]]
+    ; GFX8: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO10]]
+    ; GFX8: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]]
+    ; GFX8: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ADD7]], [[UMULH5]]
+    ; GFX8: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[MUL6]]
+    ; GFX8: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO10]], [[ADD8]]
+    ; GFX8: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[MUL6]]
+    ; GFX8: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]]
+    ; GFX8: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1)
+    ; GFX8: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO12]], [[UMULH6]]
+    ; GFX8: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO15]](s1)
+    ; GFX8: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]]
+    ; GFX8: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[ADD8]]
+    ; GFX8: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[MUL6]]
+    ; GFX8: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[ADD8]]
+    ; GFX8: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]]
+    ; GFX8: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1)
+    ; GFX8: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH8]]
+    ; GFX8: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1)
+    ; GFX8: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]]
+    ; GFX8: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDO18]], [[ADD9]]
+    ; GFX8: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1)
+    ; GFX8: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[ADD10]], [[ZEXT9]]
+    ; GFX8: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[ADD8]]
+    ; GFX8: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[ADD11]]
+    ; GFX8: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; GFX8: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[UADDO20]]
+    ; GFX8: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[ADD6]], [[ADD12]], [[UADDO11]]
+    ; GFX8: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UADDE2]], [[C5]], [[UADDO23]]
+    ; GFX8: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64)
+    ; GFX8: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64)
+    ; GFX8: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV9]], [[UADDO22]]
+    ; GFX8: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV8]], [[UADDE4]]
+    ; GFX8: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV8]], [[UADDO22]]
+    ; GFX8: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]]
+    ; GFX8: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1)
+    ; GFX8: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO24]], [[UMULH10]]
+    ; GFX8: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO27]](s1)
+    ; GFX8: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]]
+    ; GFX8: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV9]], [[UADDE4]]
+    ; GFX8: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV9]], [[UADDO22]]
+    ; GFX8: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV8]], [[UADDE4]]
+    ; GFX8: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]]
+    ; GFX8: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1)
+    ; GFX8: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH12]]
+    ; GFX8: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1)
+    ; GFX8: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]]
+    ; GFX8: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[UADDO30]], [[ADD13]]
+    ; GFX8: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1)
+    ; GFX8: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[ADD14]], [[ZEXT14]]
+    ; GFX8: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV9]], [[UADDE4]]
+    ; GFX8: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[ADD15]]
+    ; GFX8: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64)
+    ; GFX8: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV10]], [[UADDO32]]
+    ; GFX8: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV11]], [[UADDO32]]
+    ; GFX8: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV10]], [[ADD16]]
+    ; GFX8: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV10]], [[UADDO32]]
+    ; GFX8: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]]
+    ; GFX8: [[ADD18:%[0-9]+]]:_(s32) = G_ADD [[ADD17]], [[UMULH14]]
+    ; GFX8: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV6]], [[MUL15]]
+    ; GFX8: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV7]], [[ADD18]], [[USUBO3]]
+    ; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV7]], [[ADD18]]
+    ; GFX8: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO2]](s32), [[USUBE2]](s32)
+    ; GFX8: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64)
+    ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV13]]
+    ; GFX8: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1)
+    ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV12]]
+    ; GFX8: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[ICMP1]](s1)
+    ; GFX8: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV13]]
+    ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SEXT1]], [[SEXT]]
+    ; GFX8: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV12]]
+    ; GFX8: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV13]], [[USUBO3]]
+    ; GFX8: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C5]], [[USUBO5]]
+    ; GFX8: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO4]](s32), [[USUBE6]](s32)
+    ; GFX8: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV13]]
+    ; GFX8: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1)
+    ; GFX8: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV12]]
+    ; GFX8: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT [[ICMP4]](s1)
+    ; GFX8: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV13]]
+    ; GFX8: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]]
+    ; GFX8: [[USUBO6:%[0-9]+]]:_(s32), [[USUBO7:%[0-9]+]]:_(s1) = G_USUBO [[USUBO4]], [[UV12]]
+    ; GFX8: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[UV13]], [[USUBO5]]
+    ; GFX8: [[USUBE10:%[0-9]+]]:_(s32), [[USUBE11:%[0-9]+]]:_(s1) = G_USUBE [[USUBE8]], [[C5]], [[USUBO7]]
+    ; GFX8: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO6]](s32), [[USUBE10]](s32)
+    ; GFX8: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C5]]
+    ; GFX8: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV2]], [[MV1]]
+    ; GFX8: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C5]]
+    ; GFX8: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV]]
+    ; GFX8: $vgpr0_vgpr1 = COPY [[SELECT3]](s64)
+    ; GFX8: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC]], [[C3]]
+    ; GFX8: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL2]], [[FMUL]]
+    ; GFX8: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP1]], [[C]]
+    ; GFX8: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[UITOFP]]
     ; GFX9-LABEL: name: test_urem_s64
     ; GFX9: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
     ; GFX9: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
-    ; GFX9: [[UREM:%[0-9]+]]:_(s64) = G_UREM [[COPY]], [[COPY1]]
-    ; GFX9: $vgpr0_vgpr1 = COPY [[UREM]](s64)
+    ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64)
+    ; GFX9: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV]](s32)
+    ; GFX9: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV1]](s32)
+    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000
+    ; GFX9: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG %8(s32)
+    ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x43EFFFFF80000000
+    ; GFX9: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]]
+    ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000
+    ; GFX9: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[FMUL]], [[C2]]
+    ; GFX9: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL1]]
+    ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000
+    ; GFX9: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI %16(s32)
+    ; GFX9: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC]](s32)
+    ; GFX9: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; GFX9: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C4]](s64)
+    ; GFX9: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64)
+    ; GFX9: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV2]], [[UV4]]
+    ; GFX9: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV3]], [[UV5]], [[USUBO1]]
+    ; GFX9: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI]]
+    ; GFX9: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]]
+    ; GFX9: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]]
+    ; GFX9: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[FPTOUI]]
+    ; GFX9: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]]
+    ; GFX9: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]]
+    ; GFX9: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[MUL]]
+    ; GFX9: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]]
+    ; GFX9: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]]
+    ; GFX9: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]]
+    ; GFX9: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1)
+    ; GFX9: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH1]]
+    ; GFX9: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1)
+    ; GFX9: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]]
+    ; GFX9: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]]
+    ; GFX9: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL]]
+    ; GFX9: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]]
+    ; GFX9: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]]
+    ; GFX9: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1)
+    ; GFX9: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH3]]
+    ; GFX9: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1)
+    ; GFX9: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]]
+    ; GFX9: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD2]]
+    ; GFX9: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1)
+    ; GFX9: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]]
+    ; GFX9: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]]
+    ; GFX9: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[ADD4]]
+    ; GFX9: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO8]]
+    ; GFX9: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO11]]
+    ; GFX9: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[ADD5]]
+    ; GFX9: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO10]]
+    ; GFX9: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO10]]
+    ; GFX9: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE]]
+    ; GFX9: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO10]]
+    ; GFX9: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]]
+    ; GFX9: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ADD7]], [[UMULH5]]
+    ; GFX9: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[MUL6]]
+    ; GFX9: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO10]], [[ADD8]]
+    ; GFX9: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[MUL6]]
+    ; GFX9: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]]
+    ; GFX9: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1)
+    ; GFX9: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO12]], [[UMULH6]]
+    ; GFX9: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO15]](s1)
+    ; GFX9: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]]
+    ; GFX9: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[ADD8]]
+    ; GFX9: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[MUL6]]
+    ; GFX9: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[ADD8]]
+    ; GFX9: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]]
+    ; GFX9: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1)
+    ; GFX9: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH8]]
+    ; GFX9: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1)
+    ; GFX9: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]]
+    ; GFX9: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDO18]], [[ADD9]]
+    ; GFX9: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1)
+    ; GFX9: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[ADD10]], [[ZEXT9]]
+    ; GFX9: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[ADD8]]
+    ; GFX9: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[ADD11]]
+    ; GFX9: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; GFX9: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[UADDO20]]
+    ; GFX9: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[ADD6]], [[ADD12]], [[UADDO11]]
+    ; GFX9: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UADDE2]], [[C5]], [[UADDO23]]
+    ; GFX9: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64)
+    ; GFX9: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64)
+    ; GFX9: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV9]], [[UADDO22]]
+    ; GFX9: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV8]], [[UADDE4]]
+    ; GFX9: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV8]], [[UADDO22]]
+    ; GFX9: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]]
+    ; GFX9: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1)
+    ; GFX9: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO24]], [[UMULH10]]
+    ; GFX9: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO27]](s1)
+    ; GFX9: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]]
+    ; GFX9: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV9]], [[UADDE4]]
+    ; GFX9: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV9]], [[UADDO22]]
+    ; GFX9: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV8]], [[UADDE4]]
+    ; GFX9: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]]
+    ; GFX9: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1)
+    ; GFX9: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH12]]
+    ; GFX9: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1)
+    ; GFX9: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]]
+    ; GFX9: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[UADDO30]], [[ADD13]]
+    ; GFX9: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1)
+    ; GFX9: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[ADD14]], [[ZEXT14]]
+    ; GFX9: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV9]], [[UADDE4]]
+    ; GFX9: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[ADD15]]
+    ; GFX9: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64)
+    ; GFX9: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV10]], [[UADDO32]]
+    ; GFX9: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV11]], [[UADDO32]]
+    ; GFX9: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV10]], [[ADD16]]
+    ; GFX9: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV10]], [[UADDO32]]
+    ; GFX9: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]]
+    ; GFX9: [[ADD18:%[0-9]+]]:_(s32) = G_ADD [[ADD17]], [[UMULH14]]
+    ; GFX9: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV6]], [[MUL15]]
+    ; GFX9: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV7]], [[ADD18]], [[USUBO3]]
+    ; GFX9: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV7]], [[ADD18]]
+    ; GFX9: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO2]](s32), [[USUBE2]](s32)
+    ; GFX9: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64)
+    ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV13]]
+    ; GFX9: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1)
+    ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV12]]
+    ; GFX9: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[ICMP1]](s1)
+    ; GFX9: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV13]]
+    ; GFX9: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SEXT1]], [[SEXT]]
+    ; GFX9: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV12]]
+    ; GFX9: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV13]], [[USUBO3]]
+    ; GFX9: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C5]], [[USUBO5]]
+    ; GFX9: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO4]](s32), [[USUBE6]](s32)
+    ; GFX9: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV13]]
+    ; GFX9: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1)
+    ; GFX9: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV12]]
+    ; GFX9: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT [[ICMP4]](s1)
+    ; GFX9: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV13]]
+    ; GFX9: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]]
+    ; GFX9: [[USUBO6:%[0-9]+]]:_(s32), [[USUBO7:%[0-9]+]]:_(s1) = G_USUBO [[USUBO4]], [[UV12]]
+    ; GFX9: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[UV13]], [[USUBO5]]
+    ; GFX9: [[USUBE10:%[0-9]+]]:_(s32), [[USUBE11:%[0-9]+]]:_(s1) = G_USUBE [[USUBE8]], [[C5]], [[USUBO7]]
+    ; GFX9: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO6]](s32), [[USUBE10]](s32)
+    ; GFX9: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C5]]
+    ; GFX9: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV2]], [[MV1]]
+    ; GFX9: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C5]]
+    ; GFX9: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV]]
+    ; GFX9: $vgpr0_vgpr1 = COPY [[SELECT3]](s64)
+    ; GFX9: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC]], [[C3]]
+    ; GFX9: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL2]], [[FMUL]]
+    ; GFX9: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP1]], [[C]]
+    ; GFX9: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[UITOFP]]
     %0:_(s64) = COPY $vgpr0_vgpr1
     %1:_(s64) = COPY $vgpr2_vgpr3
     %2:_(s64) = G_UREM %0, %1
@@ -320,28 +743,868 @@ body: |
     ; GFX6: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7
     ; GFX6: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>)
     ; GFX6: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>)
-    ; GFX6: [[UREM:%[0-9]+]]:_(s64) = G_UREM [[UV]], [[UV2]]
-    ; GFX6: [[UREM1:%[0-9]+]]:_(s64) = G_UREM [[UV1]], [[UV3]]
-    ; GFX6: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[UREM]](s64), [[UREM1]](s64)
+    ; GFX6: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64)
+    ; GFX6: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV4]](s32)
+    ; GFX6: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV5]](s32)
+    ; GFX6: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000
+    ; GFX6: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG %234(s32)
+    ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x43EFFFFF80000000
+    ; GFX6: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]]
+    ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000
+    ; GFX6: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[FMUL]], [[C2]]
+    ; GFX6: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL1]]
+    ; GFX6: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000
+    ; GFX6: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI %242(s32)
+    ; GFX6: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC]](s32)
+    ; GFX6: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; GFX6: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C4]](s64)
+    ; GFX6: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64)
+    ; GFX6: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV6]], [[UV8]]
+    ; GFX6: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV7]], [[UV9]], [[USUBO1]]
+    ; GFX6: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI]]
+    ; GFX6: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]]
+    ; GFX6: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]]
+    ; GFX6: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[FPTOUI]]
+    ; GFX6: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]]
+    ; GFX6: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]]
+    ; GFX6: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[MUL]]
+    ; GFX6: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]]
+    ; GFX6: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]]
+    ; GFX6: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]]
+    ; GFX6: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1)
+    ; GFX6: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH1]]
+    ; GFX6: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1)
+    ; GFX6: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]]
+    ; GFX6: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]]
+    ; GFX6: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL]]
+    ; GFX6: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]]
+    ; GFX6: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]]
+    ; GFX6: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1)
+    ; GFX6: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH3]]
+    ; GFX6: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1)
+    ; GFX6: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]]
+    ; GFX6: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD2]]
+    ; GFX6: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1)
+    ; GFX6: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]]
+    ; GFX6: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]]
+    ; GFX6: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[ADD4]]
+    ; GFX6: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO8]]
+    ; GFX6: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO11]]
+    ; GFX6: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[ADD5]]
+    ; GFX6: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO10]]
+    ; GFX6: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO10]]
+    ; GFX6: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE]]
+    ; GFX6: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO10]]
+    ; GFX6: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]]
+    ; GFX6: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ADD7]], [[UMULH5]]
+    ; GFX6: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[MUL6]]
+    ; GFX6: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO10]], [[ADD8]]
+    ; GFX6: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[MUL6]]
+    ; GFX6: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]]
+    ; GFX6: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1)
+    ; GFX6: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO12]], [[UMULH6]]
+    ; GFX6: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO15]](s1)
+    ; GFX6: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]]
+    ; GFX6: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[ADD8]]
+    ; GFX6: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[MUL6]]
+    ; GFX6: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[ADD8]]
+    ; GFX6: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]]
+    ; GFX6: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1)
+    ; GFX6: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH8]]
+    ; GFX6: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1)
+    ; GFX6: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]]
+    ; GFX6: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDO18]], [[ADD9]]
+    ; GFX6: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1)
+    ; GFX6: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[ADD10]], [[ZEXT9]]
+    ; GFX6: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[ADD8]]
+    ; GFX6: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[ADD11]]
+    ; GFX6: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; GFX6: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[UADDO20]]
+    ; GFX6: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[ADD6]], [[ADD12]], [[UADDO11]]
+    ; GFX6: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UADDE2]], [[C5]], [[UADDO23]]
+    ; GFX6: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64)
+    ; GFX6: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64)
+    ; GFX6: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV13]], [[UADDO22]]
+    ; GFX6: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV12]], [[UADDE4]]
+    ; GFX6: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV12]], [[UADDO22]]
+    ; GFX6: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]]
+    ; GFX6: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1)
+    ; GFX6: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO24]], [[UMULH10]]
+    ; GFX6: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO27]](s1)
+    ; GFX6: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]]
+    ; GFX6: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV13]], [[UADDE4]]
+    ; GFX6: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV13]], [[UADDO22]]
+    ; GFX6: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV12]], [[UADDE4]]
+    ; GFX6: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]]
+    ; GFX6: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1)
+    ; GFX6: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH12]]
+    ; GFX6: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1)
+    ; GFX6: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]]
+    ; GFX6: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[UADDO30]], [[ADD13]]
+    ; GFX6: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1)
+    ; GFX6: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[ADD14]], [[ZEXT14]]
+    ; GFX6: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV13]], [[UADDE4]]
+    ; GFX6: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[ADD15]]
+    ; GFX6: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64)
+    ; GFX6: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV14]], [[UADDO32]]
+    ; GFX6: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV15]], [[UADDO32]]
+    ; GFX6: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV14]], [[ADD16]]
+    ; GFX6: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV14]], [[UADDO32]]
+    ; GFX6: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]]
+    ; GFX6: [[ADD18:%[0-9]+]]:_(s32) = G_ADD [[ADD17]], [[UMULH14]]
+    ; GFX6: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV10]], [[MUL15]]
+    ; GFX6: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV11]], [[ADD18]], [[USUBO3]]
+    ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV11]], [[ADD18]]
+    ; GFX6: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO2]](s32), [[USUBE2]](s32)
+    ; GFX6: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64)
+    ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV17]]
+    ; GFX6: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1)
+    ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV16]]
+    ; GFX6: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[ICMP1]](s1)
+    ; GFX6: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV17]]
+    ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SEXT1]], [[SEXT]]
+    ; GFX6: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV16]]
+    ; GFX6: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV17]], [[USUBO3]]
+    ; GFX6: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C5]], [[USUBO5]]
+    ; GFX6: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO4]](s32), [[USUBE6]](s32)
+    ; GFX6: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV17]]
+    ; GFX6: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1)
+    ; GFX6: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV16]]
+    ; GFX6: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT [[ICMP4]](s1)
+    ; GFX6: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV17]]
+    ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]]
+    ; GFX6: [[USUBO6:%[0-9]+]]:_(s32), [[USUBO7:%[0-9]+]]:_(s1) = G_USUBO [[USUBO4]], [[UV16]]
+    ; GFX6: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[UV17]], [[USUBO5]]
+    ; GFX6: [[USUBE10:%[0-9]+]]:_(s32), [[USUBE11:%[0-9]+]]:_(s1) = G_USUBE [[USUBE8]], [[C5]], [[USUBO7]]
+    ; GFX6: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO6]](s32), [[USUBE10]](s32)
+    ; GFX6: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C5]]
+    ; GFX6: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV2]], [[MV1]]
+    ; GFX6: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C5]]
+    ; GFX6: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV]]
+    ; GFX6: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64)
+    ; GFX6: [[UITOFP2:%[0-9]+]]:_(s32) = G_UITOFP [[UV18]](s32)
+    ; GFX6: [[UITOFP3:%[0-9]+]]:_(s32) = G_UITOFP [[UV19]](s32)
+    ; GFX6: [[C6:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000
+    ; GFX6: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG %14(s32)
+    ; GFX6: [[C7:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x43EFFFFF80000000
+    ; GFX6: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C7]]
+    ; GFX6: [[C8:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000
+    ; GFX6: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[FMUL2]], [[C8]]
+    ; GFX6: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL3]]
+    ; GFX6: [[C9:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000
+    ; GFX6: [[FPTOUI2:%[0-9]+]]:_(s32) = G_FPTOUI %22(s32)
+    ; GFX6: [[FPTOUI3:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC1]](s32)
+    ; GFX6: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C4]](s64)
+    ; GFX6: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64)
+    ; GFX6: [[USUBO8:%[0-9]+]]:_(s32), [[USUBO9:%[0-9]+]]:_(s1) = G_USUBO [[UV20]], [[UV22]]
+    ; GFX6: [[USUBE12:%[0-9]+]]:_(s32), [[USUBE13:%[0-9]+]]:_(s1) = G_USUBE [[UV21]], [[UV23]], [[USUBO9]]
+    ; GFX6: [[MUL18:%[0-9]+]]:_(s32) = G_MUL [[USUBO8]], [[FPTOUI2]]
+    ; GFX6: [[MUL19:%[0-9]+]]:_(s32) = G_MUL [[USUBE12]], [[FPTOUI2]]
+    ; GFX6: [[MUL20:%[0-9]+]]:_(s32) = G_MUL [[USUBO8]], [[FPTOUI3]]
+    ; GFX6: [[UMULH15:%[0-9]+]]:_(s32) = G_UMULH [[USUBO8]], [[FPTOUI2]]
+    ; GFX6: [[ADD19:%[0-9]+]]:_(s32) = G_ADD [[MUL19]], [[MUL20]]
+    ; GFX6: [[ADD20:%[0-9]+]]:_(s32) = G_ADD [[ADD19]], [[UMULH15]]
+    ; GFX6: [[MUL21:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[MUL18]]
+    ; GFX6: [[MUL22:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI2]], [[ADD20]]
+    ; GFX6: [[UMULH16:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[MUL18]]
+    ; GFX6: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[MUL21]], [[MUL22]]
+    ; GFX6: [[ZEXT15:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO35]](s1)
+    ; GFX6: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[UMULH16]]
+    ; GFX6: [[ZEXT16:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO37]](s1)
+    ; GFX6: [[ADD21:%[0-9]+]]:_(s32) = G_ADD [[ZEXT15]], [[ZEXT16]]
+    ; GFX6: [[MUL23:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[ADD20]]
+    ; GFX6: [[UMULH17:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[MUL18]]
+    ; GFX6: [[UMULH18:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[ADD20]]
+    ; GFX6: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[MUL23]], [[UMULH17]]
+    ; GFX6: [[ZEXT17:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO39]](s1)
+    ; GFX6: [[UADDO40:%[0-9]+]]:_(s32), [[UADDO41:%[0-9]+]]:_(s1) = G_UADDO [[UADDO38]], [[UMULH18]]
+    ; GFX6: [[ZEXT18:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO41]](s1)
+    ; GFX6: [[ADD22:%[0-9]+]]:_(s32) = G_ADD [[ZEXT17]], [[ZEXT18]]
+    ; GFX6: [[UADDO42:%[0-9]+]]:_(s32), [[UADDO43:%[0-9]+]]:_(s1) = G_UADDO [[UADDO40]], [[ADD21]]
+    ; GFX6: [[ZEXT19:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO43]](s1)
+    ; GFX6: [[ADD23:%[0-9]+]]:_(s32) = G_ADD [[ADD22]], [[ZEXT19]]
+    ; GFX6: [[UMULH19:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[ADD20]]
+    ; GFX6: [[ADD24:%[0-9]+]]:_(s32) = G_ADD [[UMULH19]], [[ADD23]]
+    ; GFX6: [[UADDO44:%[0-9]+]]:_(s32), [[UADDO45:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI2]], [[UADDO42]]
+    ; GFX6: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI3]], [[ADD24]], [[UADDO45]]
+    ; GFX6: [[ADD25:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI3]], [[ADD24]]
+    ; GFX6: [[MUL24:%[0-9]+]]:_(s32) = G_MUL [[USUBO8]], [[UADDO44]]
+    ; GFX6: [[MUL25:%[0-9]+]]:_(s32) = G_MUL [[USUBE12]], [[UADDO44]]
+    ; GFX6: [[MUL26:%[0-9]+]]:_(s32) = G_MUL [[USUBO8]], [[UADDE6]]
+    ; GFX6: [[UMULH20:%[0-9]+]]:_(s32) = G_UMULH [[USUBO8]], [[UADDO44]]
+    ; GFX6: [[ADD26:%[0-9]+]]:_(s32) = G_ADD [[MUL25]], [[MUL26]]
+    ; GFX6: [[ADD27:%[0-9]+]]:_(s32) = G_ADD [[ADD26]], [[UMULH20]]
+    ; GFX6: [[MUL27:%[0-9]+]]:_(s32) = G_MUL [[UADDE6]], [[MUL24]]
+    ; GFX6: [[MUL28:%[0-9]+]]:_(s32) = G_MUL [[UADDO44]], [[ADD27]]
+    ; GFX6: [[UMULH21:%[0-9]+]]:_(s32) = G_UMULH [[UADDO44]], [[MUL24]]
+    ; GFX6: [[UADDO46:%[0-9]+]]:_(s32), [[UADDO47:%[0-9]+]]:_(s1) = G_UADDO [[MUL27]], [[MUL28]]
+    ; GFX6: [[ZEXT20:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO47]](s1)
+    ; GFX6: [[UADDO48:%[0-9]+]]:_(s32), [[UADDO49:%[0-9]+]]:_(s1) = G_UADDO [[UADDO46]], [[UMULH21]]
+    ; GFX6: [[ZEXT21:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO49]](s1)
+    ; GFX6: [[ADD28:%[0-9]+]]:_(s32) = G_ADD [[ZEXT20]], [[ZEXT21]]
+    ; GFX6: [[MUL29:%[0-9]+]]:_(s32) = G_MUL [[UADDE6]], [[ADD27]]
+    ; GFX6: [[UMULH22:%[0-9]+]]:_(s32) = G_UMULH [[UADDE6]], [[MUL24]]
+    ; GFX6: [[UMULH23:%[0-9]+]]:_(s32) = G_UMULH [[UADDO44]], [[ADD27]]
+    ; GFX6: [[UADDO50:%[0-9]+]]:_(s32), [[UADDO51:%[0-9]+]]:_(s1) = G_UADDO [[MUL29]], [[UMULH22]]
+    ; GFX6: [[ZEXT22:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO51]](s1)
+    ; GFX6: [[UADDO52:%[0-9]+]]:_(s32), [[UADDO53:%[0-9]+]]:_(s1) = G_UADDO [[UADDO50]], [[UMULH23]]
+    ; GFX6: [[ZEXT23:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO53]](s1)
+    ; GFX6: [[ADD29:%[0-9]+]]:_(s32) = G_ADD [[ZEXT22]], [[ZEXT23]]
+    ; GFX6: [[UADDO54:%[0-9]+]]:_(s32), [[UADDO55:%[0-9]+]]:_(s1) = G_UADDO [[UADDO52]], [[ADD28]]
+    ; GFX6: [[ZEXT24:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO55]](s1)
+    ; GFX6: [[ADD30:%[0-9]+]]:_(s32) = G_ADD [[ADD29]], [[ZEXT24]]
+    ; GFX6: [[UMULH24:%[0-9]+]]:_(s32) = G_UMULH [[UADDE6]], [[ADD27]]
+    ; GFX6: [[ADD31:%[0-9]+]]:_(s32) = G_ADD [[UMULH24]], [[ADD30]]
+    ; GFX6: [[UADDO56:%[0-9]+]]:_(s32), [[UADDO57:%[0-9]+]]:_(s1) = G_UADDO [[UADDO44]], [[UADDO54]]
+    ; GFX6: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[ADD25]], [[ADD31]], [[UADDO45]]
+    ; GFX6: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UADDE8]], [[C5]], [[UADDO57]]
+    ; GFX6: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64)
+    ; GFX6: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64)
+    ; GFX6: [[MUL30:%[0-9]+]]:_(s32) = G_MUL [[UV27]], [[UADDO56]]
+    ; GFX6: [[MUL31:%[0-9]+]]:_(s32) = G_MUL [[UV26]], [[UADDE10]]
+    ; GFX6: [[UMULH25:%[0-9]+]]:_(s32) = G_UMULH [[UV26]], [[UADDO56]]
+    ; GFX6: [[UADDO58:%[0-9]+]]:_(s32), [[UADDO59:%[0-9]+]]:_(s1) = G_UADDO [[MUL30]], [[MUL31]]
+    ; GFX6: [[ZEXT25:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO59]](s1)
+    ; GFX6: [[UADDO60:%[0-9]+]]:_(s32), [[UADDO61:%[0-9]+]]:_(s1) = G_UADDO [[UADDO58]], [[UMULH25]]
+    ; GFX6: [[ZEXT26:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO61]](s1)
+    ; GFX6: [[ADD32:%[0-9]+]]:_(s32) = G_ADD [[ZEXT25]], [[ZEXT26]]
+    ; GFX6: [[MUL32:%[0-9]+]]:_(s32) = G_MUL [[UV27]], [[UADDE10]]
+    ; GFX6: [[UMULH26:%[0-9]+]]:_(s32) = G_UMULH [[UV27]], [[UADDO56]]
+    ; GFX6: [[UMULH27:%[0-9]+]]:_(s32) = G_UMULH [[UV26]], [[UADDE10]]
+    ; GFX6: [[UADDO62:%[0-9]+]]:_(s32), [[UADDO63:%[0-9]+]]:_(s1) = G_UADDO [[MUL32]], [[UMULH26]]
+    ; GFX6: [[ZEXT27:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO63]](s1)
+    ; GFX6: [[UADDO64:%[0-9]+]]:_(s32), [[UADDO65:%[0-9]+]]:_(s1) = G_UADDO [[UADDO62]], [[UMULH27]]
+    ; GFX6: [[ZEXT28:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO65]](s1)
+    ; GFX6: [[ADD33:%[0-9]+]]:_(s32) = G_ADD [[ZEXT27]], [[ZEXT28]]
+    ; GFX6: [[UADDO66:%[0-9]+]]:_(s32), [[UADDO67:%[0-9]+]]:_(s1) = G_UADDO [[UADDO64]], [[ADD32]]
+    ; GFX6: [[ZEXT29:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO67]](s1)
+    ; GFX6: [[ADD34:%[0-9]+]]:_(s32) = G_ADD [[ADD33]], [[ZEXT29]]
+    ; GFX6: [[UMULH28:%[0-9]+]]:_(s32) = G_UMULH [[UV27]], [[UADDE10]]
+    ; GFX6: [[ADD35:%[0-9]+]]:_(s32) = G_ADD [[UMULH28]], [[ADD34]]
+    ; GFX6: [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64)
+    ; GFX6: [[MUL33:%[0-9]+]]:_(s32) = G_MUL [[UV28]], [[UADDO66]]
+    ; GFX6: [[MUL34:%[0-9]+]]:_(s32) = G_MUL [[UV29]], [[UADDO66]]
+    ; GFX6: [[MUL35:%[0-9]+]]:_(s32) = G_MUL [[UV28]], [[ADD35]]
+    ; GFX6: [[UMULH29:%[0-9]+]]:_(s32) = G_UMULH [[UV28]], [[UADDO66]]
+    ; GFX6: [[ADD36:%[0-9]+]]:_(s32) = G_ADD [[MUL34]], [[MUL35]]
+    ; GFX6: [[ADD37:%[0-9]+]]:_(s32) = G_ADD [[ADD36]], [[UMULH29]]
+    ; GFX6: [[USUBO10:%[0-9]+]]:_(s32), [[USUBO11:%[0-9]+]]:_(s1) = G_USUBO [[UV24]], [[MUL33]]
+    ; GFX6: [[USUBE14:%[0-9]+]]:_(s32), [[USUBE15:%[0-9]+]]:_(s1) = G_USUBE [[UV25]], [[ADD37]], [[USUBO11]]
+    ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[UV25]], [[ADD37]]
+    ; GFX6: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO10]](s32), [[USUBE14]](s32)
+    ; GFX6: [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64)
+    ; GFX6: [[ICMP8:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE14]](s32), [[UV31]]
+    ; GFX6: [[SEXT4:%[0-9]+]]:_(s32) = G_SEXT [[ICMP8]](s1)
+    ; GFX6: [[ICMP9:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO10]](s32), [[UV30]]
+    ; GFX6: [[SEXT5:%[0-9]+]]:_(s32) = G_SEXT [[ICMP9]](s1)
+    ; GFX6: [[ICMP10:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE14]](s32), [[UV31]]
+    ; GFX6: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP10]](s1), [[SEXT5]], [[SEXT4]]
+    ; GFX6: [[USUBO12:%[0-9]+]]:_(s32), [[USUBO13:%[0-9]+]]:_(s1) = G_USUBO [[USUBO10]], [[UV30]]
+    ; GFX6: [[USUBE16:%[0-9]+]]:_(s32), [[USUBE17:%[0-9]+]]:_(s1) = G_USUBE [[SUB1]], [[UV31]], [[USUBO11]]
+    ; GFX6: [[USUBE18:%[0-9]+]]:_(s32), [[USUBE19:%[0-9]+]]:_(s1) = G_USUBE [[USUBE16]], [[C5]], [[USUBO13]]
+    ; GFX6: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO12]](s32), [[USUBE18]](s32)
+    ; GFX6: [[ICMP11:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE18]](s32), [[UV31]]
+    ; GFX6: [[SEXT6:%[0-9]+]]:_(s32) = G_SEXT [[ICMP11]](s1)
+    ; GFX6: [[ICMP12:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO12]](s32), [[UV30]]
+    ; GFX6: [[SEXT7:%[0-9]+]]:_(s32) = G_SEXT [[ICMP12]](s1)
+    ; GFX6: [[ICMP13:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE18]](s32), [[UV31]]
+    ; GFX6: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP13]](s1), [[SEXT7]], [[SEXT6]]
+    ; GFX6: [[USUBO14:%[0-9]+]]:_(s32), [[USUBO15:%[0-9]+]]:_(s1) = G_USUBO [[USUBO12]], [[UV30]]
+    ; GFX6: [[USUBE20:%[0-9]+]]:_(s32), [[USUBE21:%[0-9]+]]:_(s1) = G_USUBE [[USUBE16]], [[UV31]], [[USUBO13]]
+    ; GFX6: [[USUBE22:%[0-9]+]]:_(s32), [[USUBE23:%[0-9]+]]:_(s1) = G_USUBE [[USUBE20]], [[C5]], [[USUBO15]]
+    ; GFX6: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO14]](s32), [[USUBE22]](s32)
+    ; GFX6: [[ICMP14:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT5]](s32), [[C5]]
+    ; GFX6: [[SELECT6:%[0-9]+]]:_(s64) = G_SELECT [[ICMP14]](s1), [[MV5]], [[MV4]]
+    ; GFX6: [[ICMP15:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT4]](s32), [[C5]]
+    ; GFX6: [[SELECT7:%[0-9]+]]:_(s64) = G_SELECT [[ICMP15]](s1), [[SELECT6]], [[MV3]]
+    ; GFX6: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SELECT3]](s64), [[SELECT7]](s64)
     ; GFX6: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>)
+    ; GFX6: [[FMUL4:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC1]], [[C9]]
+    ; GFX6: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL4]], [[FMUL2]]
+    ; GFX6: [[FMUL5:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP3]], [[C6]]
+    ; GFX6: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL5]], [[UITOFP2]]
+    ; GFX6: [[FMUL6:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC]], [[C3]]
+    ; GFX6: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FMUL6]], [[FMUL]]
+    ; GFX6: [[FMUL7:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP1]], [[C]]
+    ; GFX6: [[FADD3:%[0-9]+]]:_(s32) = G_FADD [[FMUL7]], [[UITOFP]]
     ; GFX8-LABEL: name: test_urem_v2s64
     ; GFX8: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
     ; GFX8: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7
     ; GFX8: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>)
     ; GFX8: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>)
-    ; GFX8: [[UREM:%[0-9]+]]:_(s64) = G_UREM [[UV]], [[UV2]]
-    ; GFX8: [[UREM1:%[0-9]+]]:_(s64) = G_UREM [[UV1]], [[UV3]]
-    ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[UREM]](s64), [[UREM1]](s64)
+    ; GFX8: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64)
+    ; GFX8: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV4]](s32)
+    ; GFX8: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV5]](s32)
+    ; GFX8: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000
+    ; GFX8: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG %234(s32)
+    ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x43EFFFFF80000000
+    ; GFX8: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]]
+    ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000
+    ; GFX8: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[FMUL]], [[C2]]
+    ; GFX8: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL1]]
+    ; GFX8: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000
+    ; GFX8: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI %242(s32)
+    ; GFX8: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC]](s32)
+    ; GFX8: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; GFX8: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C4]](s64)
+    ; GFX8: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64)
+    ; GFX8: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV6]], [[UV8]]
+    ; GFX8: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV7]], [[UV9]], [[USUBO1]]
+    ; GFX8: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI]]
+    ; GFX8: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]]
+    ; GFX8: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]]
+    ; GFX8: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[FPTOUI]]
+    ; GFX8: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]]
+    ; GFX8: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]]
+    ; GFX8: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[MUL]]
+    ; GFX8: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]]
+    ; GFX8: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]]
+    ; GFX8: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]]
+    ; GFX8: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1)
+    ; GFX8: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH1]]
+    ; GFX8: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1)
+    ; GFX8: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]]
+    ; GFX8: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]]
+    ; GFX8: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL]]
+    ; GFX8: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]]
+    ; GFX8: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]]
+    ; GFX8: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1)
+    ; GFX8: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH3]]
+    ; GFX8: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1)
+    ; GFX8: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]]
+    ; GFX8: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD2]]
+    ; GFX8: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1)
+    ; GFX8: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]]
+    ; GFX8: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]]
+    ; GFX8: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[ADD4]]
+    ; GFX8: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO8]]
+    ; GFX8: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO11]]
+    ; GFX8: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[ADD5]]
+    ; GFX8: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO10]]
+    ; GFX8: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO10]]
+    ; GFX8: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE]]
+    ; GFX8: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO10]]
+    ; GFX8: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]]
+    ; GFX8: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ADD7]], [[UMULH5]]
+    ; GFX8: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[MUL6]]
+    ; GFX8: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO10]], [[ADD8]]
+    ; GFX8: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[MUL6]]
+    ; GFX8: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]]
+    ; GFX8: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1)
+    ; GFX8: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO12]], [[UMULH6]]
+    ; GFX8: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO15]](s1)
+    ; GFX8: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]]
+    ; GFX8: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[ADD8]]
+    ; GFX8: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[MUL6]]
+    ; GFX8: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[ADD8]]
+    ; GFX8: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]]
+    ; GFX8: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1)
+    ; GFX8: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH8]]
+    ; GFX8: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1)
+    ; GFX8: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]]
+    ; GFX8: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDO18]], [[ADD9]]
+    ; GFX8: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1)
+    ; GFX8: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[ADD10]], [[ZEXT9]]
+    ; GFX8: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[ADD8]]
+    ; GFX8: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[ADD11]]
+    ; GFX8: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; GFX8: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[UADDO20]]
+    ; GFX8: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[ADD6]], [[ADD12]], [[UADDO11]]
+    ; GFX8: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UADDE2]], [[C5]], [[UADDO23]]
+    ; GFX8: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64)
+    ; GFX8: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64)
+    ; GFX8: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV13]], [[UADDO22]]
+    ; GFX8: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV12]], [[UADDE4]]
+    ; GFX8: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV12]], [[UADDO22]]
+    ; GFX8: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]]
+    ; GFX8: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1)
+    ; GFX8: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO24]], [[UMULH10]]
+    ; GFX8: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO27]](s1)
+    ; GFX8: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]]
+    ; GFX8: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV13]], [[UADDE4]]
+    ; GFX8: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV13]], [[UADDO22]]
+    ; GFX8: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV12]], [[UADDE4]]
+    ; GFX8: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]]
+    ; GFX8: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1)
+    ; GFX8: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH12]]
+    ; GFX8: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1)
+    ; GFX8: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]]
+    ; GFX8: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[UADDO30]], [[ADD13]]
+    ; GFX8: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1)
+    ; GFX8: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[ADD14]], [[ZEXT14]]
+    ; GFX8: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV13]], [[UADDE4]]
+    ; GFX8: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[ADD15]]
+    ; GFX8: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64)
+    ; GFX8: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV14]], [[UADDO32]]
+    ; GFX8: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV15]], [[UADDO32]]
+    ; GFX8: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV14]], [[ADD16]]
+    ; GFX8: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV14]], [[UADDO32]]
+    ; GFX8: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]]
+    ; GFX8: [[ADD18:%[0-9]+]]:_(s32) = G_ADD [[ADD17]], [[UMULH14]]
+    ; GFX8: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV10]], [[MUL15]]
+    ; GFX8: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV11]], [[ADD18]], [[USUBO3]]
+    ; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV11]], [[ADD18]]
+    ; GFX8: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO2]](s32), [[USUBE2]](s32)
+    ; GFX8: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64)
+    ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV17]]
+    ; GFX8: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1)
+    ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV16]]
+    ; GFX8: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[ICMP1]](s1)
+    ; GFX8: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV17]]
+    ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SEXT1]], [[SEXT]]
+    ; GFX8: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV16]]
+    ; GFX8: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV17]], [[USUBO3]]
+    ; GFX8: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C5]], [[USUBO5]]
+    ; GFX8: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO4]](s32), [[USUBE6]](s32)
+    ; GFX8: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV17]]
+    ; GFX8: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1)
+    ; GFX8: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV16]]
+    ; GFX8: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT [[ICMP4]](s1)
+    ; GFX8: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV17]]
+    ; GFX8: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]]
+    ; GFX8: [[USUBO6:%[0-9]+]]:_(s32), [[USUBO7:%[0-9]+]]:_(s1) = G_USUBO [[USUBO4]], [[UV16]]
+    ; GFX8: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[UV17]], [[USUBO5]]
+    ; GFX8: [[USUBE10:%[0-9]+]]:_(s32), [[USUBE11:%[0-9]+]]:_(s1) = G_USUBE [[USUBE8]], [[C5]], [[USUBO7]]
+    ; GFX8: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO6]](s32), [[USUBE10]](s32)
+    ; GFX8: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C5]]
+    ; GFX8: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV2]], [[MV1]]
+    ; GFX8: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C5]]
+    ; GFX8: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV]]
+    ; GFX8: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64)
+    ; GFX8: [[UITOFP2:%[0-9]+]]:_(s32) = G_UITOFP [[UV18]](s32)
+    ; GFX8: [[UITOFP3:%[0-9]+]]:_(s32) = G_UITOFP [[UV19]](s32)
+    ; GFX8: [[C6:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000
+    ; GFX8: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG %14(s32)
+    ; GFX8: [[C7:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x43EFFFFF80000000
+    ; GFX8: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C7]]
+    ; GFX8: [[C8:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000
+    ; GFX8: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[FMUL2]], [[C8]]
+    ; GFX8: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL3]]
+    ; GFX8: [[C9:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000
+    ; GFX8: [[FPTOUI2:%[0-9]+]]:_(s32) = G_FPTOUI %22(s32)
+    ; GFX8: [[FPTOUI3:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC1]](s32)
+    ; GFX8: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C4]](s64)
+    ; GFX8: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64)
+    ; GFX8: [[USUBO8:%[0-9]+]]:_(s32), [[USUBO9:%[0-9]+]]:_(s1) = G_USUBO [[UV20]], [[UV22]]
+    ; GFX8: [[USUBE12:%[0-9]+]]:_(s32), [[USUBE13:%[0-9]+]]:_(s1) = G_USUBE [[UV21]], [[UV23]], [[USUBO9]]
+    ; GFX8: [[MUL18:%[0-9]+]]:_(s32) = G_MUL [[USUBO8]], [[FPTOUI2]]
+    ; GFX8: [[MUL19:%[0-9]+]]:_(s32) = G_MUL [[USUBE12]], [[FPTOUI2]]
+    ; GFX8: [[MUL20:%[0-9]+]]:_(s32) = G_MUL [[USUBO8]], [[FPTOUI3]]
+    ; GFX8: [[UMULH15:%[0-9]+]]:_(s32) = G_UMULH [[USUBO8]], [[FPTOUI2]]
+    ; GFX8: [[ADD19:%[0-9]+]]:_(s32) = G_ADD [[MUL19]], [[MUL20]]
+    ; GFX8: [[ADD20:%[0-9]+]]:_(s32) = G_ADD [[ADD19]], [[UMULH15]]
+    ; GFX8: [[MUL21:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[MUL18]]
+    ; GFX8: [[MUL22:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI2]], [[ADD20]]
+    ; GFX8: [[UMULH16:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[MUL18]]
+    ; GFX8: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[MUL21]], [[MUL22]]
+    ; GFX8: [[ZEXT15:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO35]](s1)
+    ; GFX8: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[UMULH16]]
+    ; GFX8: [[ZEXT16:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO37]](s1)
+    ; GFX8: [[ADD21:%[0-9]+]]:_(s32) = G_ADD [[ZEXT15]], [[ZEXT16]]
+    ; GFX8: [[MUL23:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[ADD20]]
+    ; GFX8: [[UMULH17:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[MUL18]]
+    ; GFX8: [[UMULH18:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[ADD20]]
+    ; GFX8: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[MUL23]], [[UMULH17]]
+    ; GFX8: [[ZEXT17:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO39]](s1)
+    ; GFX8: [[UADDO40:%[0-9]+]]:_(s32), [[UADDO41:%[0-9]+]]:_(s1) = G_UADDO [[UADDO38]], [[UMULH18]]
+    ; GFX8: [[ZEXT18:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO41]](s1)
+    ; GFX8: [[ADD22:%[0-9]+]]:_(s32) = G_ADD [[ZEXT17]], [[ZEXT18]]
+    ; GFX8: [[UADDO42:%[0-9]+]]:_(s32), [[UADDO43:%[0-9]+]]:_(s1) = G_UADDO [[UADDO40]], [[ADD21]]
+    ; GFX8: [[ZEXT19:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO43]](s1)
+    ; GFX8: [[ADD23:%[0-9]+]]:_(s32) = G_ADD [[ADD22]], [[ZEXT19]]
+    ; GFX8: [[UMULH19:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[ADD20]]
+    ; GFX8: [[ADD24:%[0-9]+]]:_(s32) = G_ADD [[UMULH19]], [[ADD23]]
+    ; GFX8: [[UADDO44:%[0-9]+]]:_(s32), [[UADDO45:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI2]], [[UADDO42]]
+    ; GFX8: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI3]], [[ADD24]], [[UADDO45]]
+    ; GFX8: [[ADD25:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI3]], [[ADD24]]
+    ; GFX8: [[MUL24:%[0-9]+]]:_(s32) = G_MUL [[USUBO8]], [[UADDO44]]
+    ; GFX8: [[MUL25:%[0-9]+]]:_(s32) = G_MUL [[USUBE12]], [[UADDO44]]
+    ; GFX8: [[MUL26:%[0-9]+]]:_(s32) = G_MUL [[USUBO8]], [[UADDE6]]
+    ; GFX8: [[UMULH20:%[0-9]+]]:_(s32) = G_UMULH [[USUBO8]], [[UADDO44]]
+    ; GFX8: [[ADD26:%[0-9]+]]:_(s32) = G_ADD [[MUL25]], [[MUL26]]
+    ; GFX8: [[ADD27:%[0-9]+]]:_(s32) = G_ADD [[ADD26]], [[UMULH20]]
+    ; GFX8: [[MUL27:%[0-9]+]]:_(s32) = G_MUL [[UADDE6]], [[MUL24]]
+    ; GFX8: [[MUL28:%[0-9]+]]:_(s32) = G_MUL [[UADDO44]], [[ADD27]]
+    ; GFX8: [[UMULH21:%[0-9]+]]:_(s32) = G_UMULH [[UADDO44]], [[MUL24]]
+    ; GFX8: [[UADDO46:%[0-9]+]]:_(s32), [[UADDO47:%[0-9]+]]:_(s1) = G_UADDO [[MUL27]], [[MUL28]]
+    ; GFX8: [[ZEXT20:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO47]](s1)
+    ; GFX8: [[UADDO48:%[0-9]+]]:_(s32), [[UADDO49:%[0-9]+]]:_(s1) = G_UADDO [[UADDO46]], [[UMULH21]]
+    ; GFX8: [[ZEXT21:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO49]](s1)
+    ; GFX8: [[ADD28:%[0-9]+]]:_(s32) = G_ADD [[ZEXT20]], [[ZEXT21]]
+    ; GFX8: [[MUL29:%[0-9]+]]:_(s32) = G_MUL [[UADDE6]], [[ADD27]]
+    ; GFX8: [[UMULH22:%[0-9]+]]:_(s32) = G_UMULH [[UADDE6]], [[MUL24]]
+    ; GFX8: [[UMULH23:%[0-9]+]]:_(s32) = G_UMULH [[UADDO44]], [[ADD27]]
+    ; GFX8: [[UADDO50:%[0-9]+]]:_(s32), [[UADDO51:%[0-9]+]]:_(s1) = G_UADDO [[MUL29]], [[UMULH22]]
+    ; GFX8: [[ZEXT22:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO51]](s1)
+    ; GFX8: [[UADDO52:%[0-9]+]]:_(s32), [[UADDO53:%[0-9]+]]:_(s1) = G_UADDO [[UADDO50]], [[UMULH23]]
+    ; GFX8: [[ZEXT23:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO53]](s1)
+    ; GFX8: [[ADD29:%[0-9]+]]:_(s32) = G_ADD [[ZEXT22]], [[ZEXT23]]
+    ; GFX8: [[UADDO54:%[0-9]+]]:_(s32), [[UADDO55:%[0-9]+]]:_(s1) = G_UADDO [[UADDO52]], [[ADD28]]
+    ; GFX8: [[ZEXT24:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO55]](s1)
+    ; GFX8: [[ADD30:%[0-9]+]]:_(s32) = G_ADD [[ADD29]], [[ZEXT24]]
+    ; GFX8: [[UMULH24:%[0-9]+]]:_(s32) = G_UMULH [[UADDE6]], [[ADD27]]
+    ; GFX8: [[ADD31:%[0-9]+]]:_(s32) = G_ADD [[UMULH24]], [[ADD30]]
+    ; GFX8: [[UADDO56:%[0-9]+]]:_(s32), [[UADDO57:%[0-9]+]]:_(s1) = G_UADDO [[UADDO44]], [[UADDO54]]
+    ; GFX8: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[ADD25]], [[ADD31]], [[UADDO45]]
+    ; GFX8: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UADDE8]], [[C5]], [[UADDO57]]
+    ; GFX8: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64)
+    ; GFX8: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64)
+    ; GFX8: [[MUL30:%[0-9]+]]:_(s32) = G_MUL [[UV27]], [[UADDO56]]
+    ; GFX8: [[MUL31:%[0-9]+]]:_(s32) = G_MUL [[UV26]], [[UADDE10]]
+    ; GFX8: [[UMULH25:%[0-9]+]]:_(s32) = G_UMULH [[UV26]], [[UADDO56]]
+    ; GFX8: [[UADDO58:%[0-9]+]]:_(s32), [[UADDO59:%[0-9]+]]:_(s1) = G_UADDO [[MUL30]], [[MUL31]]
+    ; GFX8: [[ZEXT25:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO59]](s1)
+    ; GFX8: [[UADDO60:%[0-9]+]]:_(s32), [[UADDO61:%[0-9]+]]:_(s1) = G_UADDO [[UADDO58]], [[UMULH25]]
+    ; GFX8: [[ZEXT26:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO61]](s1)
+    ; GFX8: [[ADD32:%[0-9]+]]:_(s32) = G_ADD [[ZEXT25]], [[ZEXT26]]
+    ; GFX8: [[MUL32:%[0-9]+]]:_(s32) = G_MUL [[UV27]], [[UADDE10]]
+    ; GFX8: [[UMULH26:%[0-9]+]]:_(s32) = G_UMULH [[UV27]], [[UADDO56]]
+    ; GFX8: [[UMULH27:%[0-9]+]]:_(s32) = G_UMULH [[UV26]], [[UADDE10]]
+    ; GFX8: [[UADDO62:%[0-9]+]]:_(s32), [[UADDO63:%[0-9]+]]:_(s1) = G_UADDO [[MUL32]], [[UMULH26]]
+    ; GFX8: [[ZEXT27:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO63]](s1)
+    ; GFX8: [[UADDO64:%[0-9]+]]:_(s32), [[UADDO65:%[0-9]+]]:_(s1) = G_UADDO [[UADDO62]], [[UMULH27]]
+    ; GFX8: [[ZEXT28:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO65]](s1)
+    ; GFX8: [[ADD33:%[0-9]+]]:_(s32) = G_ADD [[ZEXT27]], [[ZEXT28]]
+    ; GFX8: [[UADDO66:%[0-9]+]]:_(s32), [[UADDO67:%[0-9]+]]:_(s1) = G_UADDO [[UADDO64]], [[ADD32]]
+    ; GFX8: [[ZEXT29:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO67]](s1)
+    ; GFX8: [[ADD34:%[0-9]+]]:_(s32) = G_ADD [[ADD33]], [[ZEXT29]]
+    ; GFX8: [[UMULH28:%[0-9]+]]:_(s32) = G_UMULH [[UV27]], [[UADDE10]]
+    ; GFX8: [[ADD35:%[0-9]+]]:_(s32) = G_ADD [[UMULH28]], [[ADD34]]
+    ; GFX8: [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64)
+    ; GFX8: [[MUL33:%[0-9]+]]:_(s32) = G_MUL [[UV28]], [[UADDO66]]
+    ; GFX8: [[MUL34:%[0-9]+]]:_(s32) = G_MUL [[UV29]], [[UADDO66]]
+    ; GFX8: [[MUL35:%[0-9]+]]:_(s32) = G_MUL [[UV28]], [[ADD35]]
+    ; GFX8: [[UMULH29:%[0-9]+]]:_(s32) = G_UMULH [[UV28]], [[UADDO66]]
+    ; GFX8: [[ADD36:%[0-9]+]]:_(s32) = G_ADD [[MUL34]], [[MUL35]]
+    ; GFX8: [[ADD37:%[0-9]+]]:_(s32) = G_ADD [[ADD36]], [[UMULH29]]
+    ; GFX8: [[USUBO10:%[0-9]+]]:_(s32), [[USUBO11:%[0-9]+]]:_(s1) = G_USUBO [[UV24]], [[MUL33]]
+    ; GFX8: [[USUBE14:%[0-9]+]]:_(s32), [[USUBE15:%[0-9]+]]:_(s1) = G_USUBE [[UV25]], [[ADD37]], [[USUBO11]]
+    ; GFX8: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[UV25]], [[ADD37]]
+    ; GFX8: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO10]](s32), [[USUBE14]](s32)
+    ; GFX8: [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64)
+    ; GFX8: [[ICMP8:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE14]](s32), [[UV31]]
+    ; GFX8: [[SEXT4:%[0-9]+]]:_(s32) = G_SEXT [[ICMP8]](s1)
+    ; GFX8: [[ICMP9:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO10]](s32), [[UV30]]
+    ; GFX8: [[SEXT5:%[0-9]+]]:_(s32) = G_SEXT [[ICMP9]](s1)
+    ; GFX8: [[ICMP10:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE14]](s32), [[UV31]]
+    ; GFX8: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP10]](s1), [[SEXT5]], [[SEXT4]]
+    ; GFX8: [[USUBO12:%[0-9]+]]:_(s32), [[USUBO13:%[0-9]+]]:_(s1) = G_USUBO [[USUBO10]], [[UV30]]
+    ; GFX8: [[USUBE16:%[0-9]+]]:_(s32), [[USUBE17:%[0-9]+]]:_(s1) = G_USUBE [[SUB1]], [[UV31]], [[USUBO11]]
+    ; GFX8: [[USUBE18:%[0-9]+]]:_(s32), [[USUBE19:%[0-9]+]]:_(s1) = G_USUBE [[USUBE16]], [[C5]], [[USUBO13]]
+    ; GFX8: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO12]](s32), [[USUBE18]](s32)
+    ; GFX8: [[ICMP11:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE18]](s32), [[UV31]]
+    ; GFX8: [[SEXT6:%[0-9]+]]:_(s32) = G_SEXT [[ICMP11]](s1)
+    ; GFX8: [[ICMP12:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO12]](s32), [[UV30]]
+    ; GFX8: [[SEXT7:%[0-9]+]]:_(s32) = G_SEXT [[ICMP12]](s1)
+    ; GFX8: [[ICMP13:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE18]](s32), [[UV31]]
+    ; GFX8: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP13]](s1), [[SEXT7]], [[SEXT6]]
+    ; GFX8: [[USUBO14:%[0-9]+]]:_(s32), [[USUBO15:%[0-9]+]]:_(s1) = G_USUBO [[USUBO12]], [[UV30]]
+    ; GFX8: [[USUBE20:%[0-9]+]]:_(s32), [[USUBE21:%[0-9]+]]:_(s1) = G_USUBE [[USUBE16]], [[UV31]], [[USUBO13]]
+    ; GFX8: [[USUBE22:%[0-9]+]]:_(s32), [[USUBE23:%[0-9]+]]:_(s1) = G_USUBE [[USUBE20]], [[C5]], [[USUBO15]]
+    ; GFX8: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO14]](s32), [[USUBE22]](s32)
+    ; GFX8: [[ICMP14:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT5]](s32), [[C5]]
+    ; GFX8: [[SELECT6:%[0-9]+]]:_(s64) = G_SELECT [[ICMP14]](s1), [[MV5]], [[MV4]]
+    ; GFX8: [[ICMP15:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT4]](s32), [[C5]]
+    ; GFX8: [[SELECT7:%[0-9]+]]:_(s64) = G_SELECT [[ICMP15]](s1), [[SELECT6]], [[MV3]]
+    ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SELECT3]](s64), [[SELECT7]](s64)
     ; GFX8: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>)
+    ; GFX8: [[FMUL4:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC1]], [[C9]]
+    ; GFX8: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL4]], [[FMUL2]]
+    ; GFX8: [[FMUL5:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP3]], [[C6]]
+    ; GFX8: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL5]], [[UITOFP2]]
+    ; GFX8: [[FMUL6:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC]], [[C3]]
+    ; GFX8: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FMUL6]], [[FMUL]]
+    ; GFX8: [[FMUL7:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP1]], [[C]]
+    ; GFX8: [[FADD3:%[0-9]+]]:_(s32) = G_FADD [[FMUL7]], [[UITOFP]]
     ; GFX9-LABEL: name: test_urem_v2s64
     ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
     ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7
     ; GFX9: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>)
     ; GFX9: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>)
-    ; GFX9: [[UREM:%[0-9]+]]:_(s64) = G_UREM [[UV]], [[UV2]]
-    ; GFX9: [[UREM1:%[0-9]+]]:_(s64) = G_UREM [[UV1]], [[UV3]]
-    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[UREM]](s64), [[UREM1]](s64)
+    ; GFX9: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64)
+    ; GFX9: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV4]](s32)
+    ; GFX9: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV5]](s32)
+    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000
+    ; GFX9: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG %234(s32)
+    ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x43EFFFFF80000000
+    ; GFX9: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]]
+    ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000
+    ; GFX9: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[FMUL]], [[C2]]
+    ; GFX9: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL1]]
+    ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000
+    ; GFX9: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI %242(s32)
+    ; GFX9: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC]](s32)
+    ; GFX9: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; GFX9: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C4]](s64)
+    ; GFX9: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64)
+    ; GFX9: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV6]], [[UV8]]
+    ; GFX9: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV7]], [[UV9]], [[USUBO1]]
+    ; GFX9: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI]]
+    ; GFX9: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]]
+    ; GFX9: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]]
+    ; GFX9: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[FPTOUI]]
+    ; GFX9: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]]
+    ; GFX9: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]]
+    ; GFX9: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[MUL]]
+    ; GFX9: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]]
+    ; GFX9: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]]
+    ; GFX9: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]]
+    ; GFX9: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1)
+    ; GFX9: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH1]]
+    ; GFX9: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1)
+    ; GFX9: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]]
+    ; GFX9: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]]
+    ; GFX9: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL]]
+    ; GFX9: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]]
+    ; GFX9: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]]
+    ; GFX9: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1)
+    ; GFX9: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH3]]
+    ; GFX9: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1)
+    ; GFX9: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]]
+    ; GFX9: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD2]]
+    ; GFX9: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1)
+    ; GFX9: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]]
+    ; GFX9: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]]
+    ; GFX9: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[ADD4]]
+    ; GFX9: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO8]]
+    ; GFX9: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO11]]
+    ; GFX9: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[ADD5]]
+    ; GFX9: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO10]]
+    ; GFX9: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO10]]
+    ; GFX9: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE]]
+    ; GFX9: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO10]]
+    ; GFX9: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]]
+    ; GFX9: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ADD7]], [[UMULH5]]
+    ; GFX9: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[MUL6]]
+    ; GFX9: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO10]], [[ADD8]]
+    ; GFX9: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[MUL6]]
+    ; GFX9: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]]
+    ; GFX9: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1)
+    ; GFX9: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO12]], [[UMULH6]]
+    ; GFX9: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO15]](s1)
+    ; GFX9: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]]
+    ; GFX9: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[ADD8]]
+    ; GFX9: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[MUL6]]
+    ; GFX9: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[ADD8]]
+    ; GFX9: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]]
+    ; GFX9: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1)
+    ; GFX9: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH8]]
+    ; GFX9: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1)
+    ; GFX9: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]]
+    ; GFX9: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDO18]], [[ADD9]]
+    ; GFX9: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1)
+    ; GFX9: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[ADD10]], [[ZEXT9]]
+    ; GFX9: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[ADD8]]
+    ; GFX9: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[ADD11]]
+    ; GFX9: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; GFX9: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[UADDO20]]
+    ; GFX9: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[ADD6]], [[ADD12]], [[UADDO11]]
+    ; GFX9: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UADDE2]], [[C5]], [[UADDO23]]
+    ; GFX9: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64)
+    ; GFX9: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64)
+    ; GFX9: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV13]], [[UADDO22]]
+    ; GFX9: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV12]], [[UADDE4]]
+    ; GFX9: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV12]], [[UADDO22]]
+    ; GFX9: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]]
+    ; GFX9: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1)
+    ; GFX9: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO24]], [[UMULH10]]
+    ; GFX9: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO27]](s1)
+    ; GFX9: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]]
+    ; GFX9: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV13]], [[UADDE4]]
+    ; GFX9: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV13]], [[UADDO22]]
+    ; GFX9: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV12]], [[UADDE4]]
+    ; GFX9: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]]
+    ; GFX9: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1)
+    ; GFX9: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH12]]
+    ; GFX9: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1)
+    ; GFX9: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]]
+    ; GFX9: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[UADDO30]], [[ADD13]]
+    ; GFX9: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1)
+    ; GFX9: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[ADD14]], [[ZEXT14]]
+    ; GFX9: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV13]], [[UADDE4]]
+    ; GFX9: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[ADD15]]
+    ; GFX9: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64)
+    ; GFX9: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV14]], [[UADDO32]]
+    ; GFX9: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV15]], [[UADDO32]]
+    ; GFX9: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV14]], [[ADD16]]
+    ; GFX9: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV14]], [[UADDO32]]
+    ; GFX9: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]]
+    ; GFX9: [[ADD18:%[0-9]+]]:_(s32) = G_ADD [[ADD17]], [[UMULH14]]
+    ; GFX9: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV10]], [[MUL15]]
+    ; GFX9: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV11]], [[ADD18]], [[USUBO3]]
+    ; GFX9: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV11]], [[ADD18]]
+    ; GFX9: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO2]](s32), [[USUBE2]](s32)
+    ; GFX9: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64)
+    ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV17]]
+    ; GFX9: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1)
+    ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV16]]
+    ; GFX9: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[ICMP1]](s1)
+    ; GFX9: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV17]]
+    ; GFX9: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SEXT1]], [[SEXT]]
+    ; GFX9: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV16]]
+    ; GFX9: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV17]], [[USUBO3]]
+    ; GFX9: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C5]], [[USUBO5]]
+    ; GFX9: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO4]](s32), [[USUBE6]](s32)
+    ; GFX9: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV17]]
+    ; GFX9: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1)
+    ; GFX9: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV16]]
+    ; GFX9: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT [[ICMP4]](s1)
+    ; GFX9: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV17]]
+    ; GFX9: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]]
+    ; GFX9: [[USUBO6:%[0-9]+]]:_(s32), [[USUBO7:%[0-9]+]]:_(s1) = G_USUBO [[USUBO4]], [[UV16]]
+    ; GFX9: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[UV17]], [[USUBO5]]
+    ; GFX9: [[USUBE10:%[0-9]+]]:_(s32), [[USUBE11:%[0-9]+]]:_(s1) = G_USUBE [[USUBE8]], [[C5]], [[USUBO7]]
+    ; GFX9: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO6]](s32), [[USUBE10]](s32)
+    ; GFX9: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C5]]
+    ; GFX9: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV2]], [[MV1]]
+    ; GFX9: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C5]]
+    ; GFX9: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV]]
+    ; GFX9: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64)
+    ; GFX9: [[UITOFP2:%[0-9]+]]:_(s32) = G_UITOFP [[UV18]](s32)
+    ; GFX9: [[UITOFP3:%[0-9]+]]:_(s32) = G_UITOFP [[UV19]](s32)
+    ; GFX9: [[C6:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000
+    ; GFX9: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG %14(s32)
+    ; GFX9: [[C7:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x43EFFFFF80000000
+    ; GFX9: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C7]]
+    ; GFX9: [[C8:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000
+    ; GFX9: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[FMUL2]], [[C8]]
+    ; GFX9: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL3]]
+    ; GFX9: [[C9:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000
+    ; GFX9: [[FPTOUI2:%[0-9]+]]:_(s32) = G_FPTOUI %22(s32)
+    ; GFX9: [[FPTOUI3:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC1]](s32)
+    ; GFX9: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C4]](s64)
+    ; GFX9: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64)
+    ; GFX9: [[USUBO8:%[0-9]+]]:_(s32), [[USUBO9:%[0-9]+]]:_(s1) = G_USUBO [[UV20]], [[UV22]]
+    ; GFX9: [[USUBE12:%[0-9]+]]:_(s32), [[USUBE13:%[0-9]+]]:_(s1) = G_USUBE [[UV21]], [[UV23]], [[USUBO9]]
+    ; GFX9: [[MUL18:%[0-9]+]]:_(s32) = G_MUL [[USUBO8]], [[FPTOUI2]]
+    ; GFX9: [[MUL19:%[0-9]+]]:_(s32) = G_MUL [[USUBE12]], [[FPTOUI2]]
+    ; GFX9: [[MUL20:%[0-9]+]]:_(s32) = G_MUL [[USUBO8]], [[FPTOUI3]]
+    ; GFX9: [[UMULH15:%[0-9]+]]:_(s32) = G_UMULH [[USUBO8]], [[FPTOUI2]]
+    ; GFX9: [[ADD19:%[0-9]+]]:_(s32) = G_ADD [[MUL19]], [[MUL20]]
+    ; GFX9: [[ADD20:%[0-9]+]]:_(s32) = G_ADD [[ADD19]], [[UMULH15]]
+    ; GFX9: [[MUL21:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[MUL18]]
+    ; GFX9: [[MUL22:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI2]], [[ADD20]]
+    ; GFX9: [[UMULH16:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[MUL18]]
+    ; GFX9: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[MUL21]], [[MUL22]]
+    ; GFX9: [[ZEXT15:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO35]](s1)
+    ; GFX9: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[UMULH16]]
+    ; GFX9: [[ZEXT16:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO37]](s1)
+    ; GFX9: [[ADD21:%[0-9]+]]:_(s32) = G_ADD [[ZEXT15]], [[ZEXT16]]
+    ; GFX9: [[MUL23:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[ADD20]]
+    ; GFX9: [[UMULH17:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[MUL18]]
+    ; GFX9: [[UMULH18:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[ADD20]]
+    ; GFX9: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[MUL23]], [[UMULH17]]
+    ; GFX9: [[ZEXT17:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO39]](s1)
+    ; GFX9: [[UADDO40:%[0-9]+]]:_(s32), [[UADDO41:%[0-9]+]]:_(s1) = G_UADDO [[UADDO38]], [[UMULH18]]
+    ; GFX9: [[ZEXT18:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO41]](s1)
+    ; GFX9: [[ADD22:%[0-9]+]]:_(s32) = G_ADD [[ZEXT17]], [[ZEXT18]]
+    ; GFX9: [[UADDO42:%[0-9]+]]:_(s32), [[UADDO43:%[0-9]+]]:_(s1) = G_UADDO [[UADDO40]], [[ADD21]]
+    ; GFX9: [[ZEXT19:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO43]](s1)
+    ; GFX9: [[ADD23:%[0-9]+]]:_(s32) = G_ADD [[ADD22]], [[ZEXT19]]
+    ; GFX9: [[UMULH19:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[ADD20]]
+    ; GFX9: [[ADD24:%[0-9]+]]:_(s32) = G_ADD [[UMULH19]], [[ADD23]]
+    ; GFX9: [[UADDO44:%[0-9]+]]:_(s32), [[UADDO45:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI2]], [[UADDO42]]
+    ; GFX9: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI3]], [[ADD24]], [[UADDO45]]
+    ; GFX9: [[ADD25:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI3]], [[ADD24]]
+    ; GFX9: [[MUL24:%[0-9]+]]:_(s32) = G_MUL [[USUBO8]], [[UADDO44]]
+    ; GFX9: [[MUL25:%[0-9]+]]:_(s32) = G_MUL [[USUBE12]], [[UADDO44]]
+    ; GFX9: [[MUL26:%[0-9]+]]:_(s32) = G_MUL [[USUBO8]], [[UADDE6]]
+    ; GFX9: [[UMULH20:%[0-9]+]]:_(s32) = G_UMULH [[USUBO8]], [[UADDO44]]
+    ; GFX9: [[ADD26:%[0-9]+]]:_(s32) = G_ADD [[MUL25]], [[MUL26]]
+    ; GFX9: [[ADD27:%[0-9]+]]:_(s32) = G_ADD [[ADD26]], [[UMULH20]]
+    ; GFX9: [[MUL27:%[0-9]+]]:_(s32) = G_MUL [[UADDE6]], [[MUL24]]
+    ; GFX9: [[MUL28:%[0-9]+]]:_(s32) = G_MUL [[UADDO44]], [[ADD27]]
+    ; GFX9: [[UMULH21:%[0-9]+]]:_(s32) = G_UMULH [[UADDO44]], [[MUL24]]
+    ; GFX9: [[UADDO46:%[0-9]+]]:_(s32), [[UADDO47:%[0-9]+]]:_(s1) = G_UADDO [[MUL27]], [[MUL28]]
+    ; GFX9: [[ZEXT20:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO47]](s1)
+    ; GFX9: [[UADDO48:%[0-9]+]]:_(s32), [[UADDO49:%[0-9]+]]:_(s1) = G_UADDO [[UADDO46]], [[UMULH21]]
+    ; GFX9: [[ZEXT21:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO49]](s1)
+    ; GFX9: [[ADD28:%[0-9]+]]:_(s32) = G_ADD [[ZEXT20]], [[ZEXT21]]
+    ; GFX9: [[MUL29:%[0-9]+]]:_(s32) = G_MUL [[UADDE6]], [[ADD27]]
+    ; GFX9: [[UMULH22:%[0-9]+]]:_(s32) = G_UMULH [[UADDE6]], [[MUL24]]
+    ; GFX9: [[UMULH23:%[0-9]+]]:_(s32) = G_UMULH [[UADDO44]], [[ADD27]]
+    ; GFX9: [[UADDO50:%[0-9]+]]:_(s32), [[UADDO51:%[0-9]+]]:_(s1) = G_UADDO [[MUL29]], [[UMULH22]]
+    ; GFX9: [[ZEXT22:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO51]](s1)
+    ; GFX9: [[UADDO52:%[0-9]+]]:_(s32), [[UADDO53:%[0-9]+]]:_(s1) = G_UADDO [[UADDO50]], [[UMULH23]]
+    ; GFX9: [[ZEXT23:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO53]](s1)
+    ; GFX9: [[ADD29:%[0-9]+]]:_(s32) = G_ADD [[ZEXT22]], [[ZEXT23]]
+    ; GFX9: [[UADDO54:%[0-9]+]]:_(s32), [[UADDO55:%[0-9]+]]:_(s1) = G_UADDO [[UADDO52]], [[ADD28]]
+    ; GFX9: [[ZEXT24:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO55]](s1)
+    ; GFX9: [[ADD30:%[0-9]+]]:_(s32) = G_ADD [[ADD29]], [[ZEXT24]]
+    ; GFX9: [[UMULH24:%[0-9]+]]:_(s32) = G_UMULH [[UADDE6]], [[ADD27]]
+    ; GFX9: [[ADD31:%[0-9]+]]:_(s32) = G_ADD [[UMULH24]], [[ADD30]]
+    ; GFX9: [[UADDO56:%[0-9]+]]:_(s32), [[UADDO57:%[0-9]+]]:_(s1) = G_UADDO [[UADDO44]], [[UADDO54]]
+    ; GFX9: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[ADD25]], [[ADD31]], [[UADDO45]]
+    ; GFX9: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UADDE8]], [[C5]], [[UADDO57]]
+    ; GFX9: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64)
+    ; GFX9: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64)
+    ; GFX9: [[MUL30:%[0-9]+]]:_(s32) = G_MUL [[UV27]], [[UADDO56]]
+    ; GFX9: [[MUL31:%[0-9]+]]:_(s32) = G_MUL [[UV26]], [[UADDE10]]
+    ; GFX9: [[UMULH25:%[0-9]+]]:_(s32) = G_UMULH [[UV26]], [[UADDO56]]
+    ; GFX9: [[UADDO58:%[0-9]+]]:_(s32), [[UADDO59:%[0-9]+]]:_(s1) = G_UADDO [[MUL30]], [[MUL31]]
+    ; GFX9: [[ZEXT25:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO59]](s1)
+    ; GFX9: [[UADDO60:%[0-9]+]]:_(s32), [[UADDO61:%[0-9]+]]:_(s1) = G_UADDO [[UADDO58]], [[UMULH25]]
+    ; GFX9: [[ZEXT26:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO61]](s1)
+    ; GFX9: [[ADD32:%[0-9]+]]:_(s32) = G_ADD [[ZEXT25]], [[ZEXT26]]
+    ; GFX9: [[MUL32:%[0-9]+]]:_(s32) = G_MUL [[UV27]], [[UADDE10]]
+    ; GFX9: [[UMULH26:%[0-9]+]]:_(s32) = G_UMULH [[UV27]], [[UADDO56]]
+    ; GFX9: [[UMULH27:%[0-9]+]]:_(s32) = G_UMULH [[UV26]], [[UADDE10]]
+    ; GFX9: [[UADDO62:%[0-9]+]]:_(s32), [[UADDO63:%[0-9]+]]:_(s1) = G_UADDO [[MUL32]], [[UMULH26]]
+    ; GFX9: [[ZEXT27:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO63]](s1)
+    ; GFX9: [[UADDO64:%[0-9]+]]:_(s32), [[UADDO65:%[0-9]+]]:_(s1) = G_UADDO [[UADDO62]], [[UMULH27]]
+    ; GFX9: [[ZEXT28:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO65]](s1)
+    ; GFX9: [[ADD33:%[0-9]+]]:_(s32) = G_ADD [[ZEXT27]], [[ZEXT28]]
+    ; GFX9: [[UADDO66:%[0-9]+]]:_(s32), [[UADDO67:%[0-9]+]]:_(s1) = G_UADDO [[UADDO64]], [[ADD32]]
+    ; GFX9: [[ZEXT29:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO67]](s1)
+    ; GFX9: [[ADD34:%[0-9]+]]:_(s32) = G_ADD [[ADD33]], [[ZEXT29]]
+    ; GFX9: [[UMULH28:%[0-9]+]]:_(s32) = G_UMULH [[UV27]], [[UADDE10]]
+    ; GFX9: [[ADD35:%[0-9]+]]:_(s32) = G_ADD [[UMULH28]], [[ADD34]]
+    ; GFX9: [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64)
+    ; GFX9: [[MUL33:%[0-9]+]]:_(s32) = G_MUL [[UV28]], [[UADDO66]]
+    ; GFX9: [[MUL34:%[0-9]+]]:_(s32) = G_MUL [[UV29]], [[UADDO66]]
+    ; GFX9: [[MUL35:%[0-9]+]]:_(s32) = G_MUL [[UV28]], [[ADD35]]
+    ; GFX9: [[UMULH29:%[0-9]+]]:_(s32) = G_UMULH [[UV28]], [[UADDO66]]
+    ; GFX9: [[ADD36:%[0-9]+]]:_(s32) = G_ADD [[MUL34]], [[MUL35]]
+    ; GFX9: [[ADD37:%[0-9]+]]:_(s32) = G_ADD [[ADD36]], [[UMULH29]]
+    ; GFX9: [[USUBO10:%[0-9]+]]:_(s32), [[USUBO11:%[0-9]+]]:_(s1) = G_USUBO [[UV24]], [[MUL33]]
+    ; GFX9: [[USUBE14:%[0-9]+]]:_(s32), [[USUBE15:%[0-9]+]]:_(s1) = G_USUBE [[UV25]], [[ADD37]], [[USUBO11]]
+    ; GFX9: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[UV25]], [[ADD37]]
+    ; GFX9: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO10]](s32), [[USUBE14]](s32)
+    ; GFX9: [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64)
+    ; GFX9: [[ICMP8:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE14]](s32), [[UV31]]
+    ; GFX9: [[SEXT4:%[0-9]+]]:_(s32) = G_SEXT [[ICMP8]](s1)
+    ; GFX9: [[ICMP9:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO10]](s32), [[UV30]]
+    ; GFX9: [[SEXT5:%[0-9]+]]:_(s32) = G_SEXT [[ICMP9]](s1)
+    ; GFX9: [[ICMP10:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE14]](s32), [[UV31]]
+    ; GFX9: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP10]](s1), [[SEXT5]], [[SEXT4]]
+    ; GFX9: [[USUBO12:%[0-9]+]]:_(s32), [[USUBO13:%[0-9]+]]:_(s1) = G_USUBO [[USUBO10]], [[UV30]]
+    ; GFX9: [[USUBE16:%[0-9]+]]:_(s32), [[USUBE17:%[0-9]+]]:_(s1) = G_USUBE [[SUB1]], [[UV31]], [[USUBO11]]
+    ; GFX9: [[USUBE18:%[0-9]+]]:_(s32), [[USUBE19:%[0-9]+]]:_(s1) = G_USUBE [[USUBE16]], [[C5]], [[USUBO13]]
+    ; GFX9: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO12]](s32), [[USUBE18]](s32)
+    ; GFX9: [[ICMP11:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE18]](s32), [[UV31]]
+    ; GFX9: [[SEXT6:%[0-9]+]]:_(s32) = G_SEXT [[ICMP11]](s1)
+    ; GFX9: [[ICMP12:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO12]](s32), [[UV30]]
+    ; GFX9: [[SEXT7:%[0-9]+]]:_(s32) = G_SEXT [[ICMP12]](s1)
+    ; GFX9: [[ICMP13:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE18]](s32), [[UV31]]
+    ; GFX9: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP13]](s1), [[SEXT7]], [[SEXT6]]
+    ; GFX9: [[USUBO14:%[0-9]+]]:_(s32), [[USUBO15:%[0-9]+]]:_(s1) = G_USUBO [[USUBO12]], [[UV30]]
+    ; GFX9: [[USUBE20:%[0-9]+]]:_(s32), [[USUBE21:%[0-9]+]]:_(s1) = G_USUBE [[USUBE16]], [[UV31]], [[USUBO13]]
+    ; GFX9: [[USUBE22:%[0-9]+]]:_(s32), [[USUBE23:%[0-9]+]]:_(s1) = G_USUBE [[USUBE20]], [[C5]], [[USUBO15]]
+    ; GFX9: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO14]](s32), [[USUBE22]](s32)
+    ; GFX9: [[ICMP14:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT5]](s32), [[C5]]
+    ; GFX9: [[SELECT6:%[0-9]+]]:_(s64) = G_SELECT [[ICMP14]](s1), [[MV5]], [[MV4]]
+    ; GFX9: [[ICMP15:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT4]](s32), [[C5]]
+    ; GFX9: [[SELECT7:%[0-9]+]]:_(s64) = G_SELECT [[ICMP15]](s1), [[SELECT6]], [[MV3]]
+    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SELECT3]](s64), [[SELECT7]](s64)
     ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>)
+    ; GFX9: [[FMUL4:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC1]], [[C9]]
+    ; GFX9: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL4]], [[FMUL2]]
+    ; GFX9: [[FMUL5:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP3]], [[C6]]
+    ; GFX9: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL5]], [[UITOFP2]]
+    ; GFX9: [[FMUL6:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC]], [[C3]]
+    ; GFX9: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FMUL6]], [[FMUL]]
+    ; GFX9: [[FMUL7:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP1]], [[C]]
+    ; GFX9: [[FADD3:%[0-9]+]]:_(s32) = G_FADD [[FMUL7]], [[UITOFP]]
     %0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
     %1:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7
     %2:_(<2 x s64>) = G_UREM %0, %1
@@ -950,36 +2213,459 @@ body: |
     ; GFX6-LABEL: name: test_urem_s33
     ; GFX6: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
     ; GFX6: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
-    ; GFX6: [[TRUNC:%[0-9]+]]:_(s33) = G_TRUNC [[COPY]](s64)
-    ; GFX6: [[TRUNC1:%[0-9]+]]:_(s33) = G_TRUNC [[COPY1]](s64)
-    ; GFX6: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[TRUNC]](s33)
-    ; GFX6: [[ZEXT1:%[0-9]+]]:_(s64) = G_ZEXT [[TRUNC1]](s33)
-    ; GFX6: [[UREM:%[0-9]+]]:_(s64) = G_UREM [[ZEXT]], [[ZEXT1]]
-    ; GFX6: [[TRUNC2:%[0-9]+]]:_(s33) = G_TRUNC [[UREM]](s64)
-    ; GFX6: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[TRUNC2]](s33)
-    ; GFX6: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
+    ; GFX6: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8589934591
+    ; GFX6: [[COPY2:%[0-9]+]]:_(s64) = COPY [[COPY]](s64)
+    ; GFX6: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY2]], [[C]]
+    ; GFX6: [[COPY3:%[0-9]+]]:_(s64) = COPY [[COPY1]](s64)
+    ; GFX6: [[AND1:%[0-9]+]]:_(s64) = G_AND [[COPY3]], [[C]]
+    ; GFX6: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND1]](s64)
+    ; GFX6: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV]](s32)
+    ; GFX6: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV1]](s32)
+    ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000
+    ; GFX6: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG %14(s32)
+    ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x43EFFFFF80000000
+    ; GFX6: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]]
+    ; GFX6: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000
+    ; GFX6: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[FMUL]], [[C3]]
+    ; GFX6: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL1]]
+    ; GFX6: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000
+    ; GFX6: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI %22(s32)
+    ; GFX6: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC]](s32)
+    ; GFX6: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; GFX6: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C5]](s64)
+    ; GFX6: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND1]](s64)
+    ; GFX6: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV2]], [[UV4]]
+    ; GFX6: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV3]], [[UV5]], [[USUBO1]]
+    ; GFX6: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI]]
+    ; GFX6: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]]
+    ; GFX6: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]]
+    ; GFX6: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[FPTOUI]]
+    ; GFX6: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]]
+    ; GFX6: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]]
+    ; GFX6: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[MUL]]
+    ; GFX6: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]]
+    ; GFX6: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]]
+    ; GFX6: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]]
+    ; GFX6: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1)
+    ; GFX6: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH1]]
+    ; GFX6: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1)
+    ; GFX6: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]]
+    ; GFX6: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]]
+    ; GFX6: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL]]
+    ; GFX6: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]]
+    ; GFX6: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]]
+    ; GFX6: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1)
+    ; GFX6: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH3]]
+    ; GFX6: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1)
+    ; GFX6: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]]
+    ; GFX6: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD2]]
+    ; GFX6: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1)
+    ; GFX6: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]]
+    ; GFX6: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]]
+    ; GFX6: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[ADD4]]
+    ; GFX6: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO8]]
+    ; GFX6: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO11]]
+    ; GFX6: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[ADD5]]
+    ; GFX6: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO10]]
+    ; GFX6: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO10]]
+    ; GFX6: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE]]
+    ; GFX6: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO10]]
+    ; GFX6: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]]
+    ; GFX6: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ADD7]], [[UMULH5]]
+    ; GFX6: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[MUL6]]
+    ; GFX6: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO10]], [[ADD8]]
+    ; GFX6: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[MUL6]]
+    ; GFX6: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]]
+    ; GFX6: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1)
+    ; GFX6: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO12]], [[UMULH6]]
+    ; GFX6: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO15]](s1)
+    ; GFX6: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]]
+    ; GFX6: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[ADD8]]
+    ; GFX6: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[MUL6]]
+    ; GFX6: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[ADD8]]
+    ; GFX6: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]]
+    ; GFX6: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1)
+    ; GFX6: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH8]]
+    ; GFX6: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1)
+    ; GFX6: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]]
+    ; GFX6: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDO18]], [[ADD9]]
+    ; GFX6: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1)
+    ; GFX6: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[ADD10]], [[ZEXT9]]
+    ; GFX6: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[ADD8]]
+    ; GFX6: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[ADD11]]
+    ; GFX6: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; GFX6: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[UADDO20]]
+    ; GFX6: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[ADD6]], [[ADD12]], [[UADDO11]]
+    ; GFX6: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UADDE2]], [[C6]], [[UADDO23]]
+    ; GFX6: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND]](s64)
+    ; GFX6: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND]](s64)
+    ; GFX6: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV9]], [[UADDO22]]
+    ; GFX6: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV8]], [[UADDE4]]
+    ; GFX6: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV8]], [[UADDO22]]
+    ; GFX6: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]]
+    ; GFX6: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1)
+    ; GFX6: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO24]], [[UMULH10]]
+    ; GFX6: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO27]](s1)
+    ; GFX6: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]]
+    ; GFX6: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV9]], [[UADDE4]]
+    ; GFX6: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV9]], [[UADDO22]]
+    ; GFX6: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV8]], [[UADDE4]]
+    ; GFX6: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]]
+    ; GFX6: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1)
+    ; GFX6: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH12]]
+    ; GFX6: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1)
+    ; GFX6: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]]
+    ; GFX6: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[UADDO30]], [[ADD13]]
+    ; GFX6: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1)
+    ; GFX6: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[ADD14]], [[ZEXT14]]
+    ; GFX6: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV9]], [[UADDE4]]
+    ; GFX6: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[ADD15]]
+    ; GFX6: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND1]](s64)
+    ; GFX6: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV10]], [[UADDO32]]
+    ; GFX6: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV11]], [[UADDO32]]
+    ; GFX6: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV10]], [[ADD16]]
+    ; GFX6: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV10]], [[UADDO32]]
+    ; GFX6: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]]
+    ; GFX6: [[ADD18:%[0-9]+]]:_(s32) = G_ADD [[ADD17]], [[UMULH14]]
+    ; GFX6: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV6]], [[MUL15]]
+    ; GFX6: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV7]], [[ADD18]], [[USUBO3]]
+    ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV7]], [[ADD18]]
+    ; GFX6: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO2]](s32), [[USUBE2]](s32)
+    ; GFX6: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND1]](s64)
+    ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV13]]
+    ; GFX6: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1)
+    ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV12]]
+    ; GFX6: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[ICMP1]](s1)
+    ; GFX6: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV13]]
+    ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SEXT1]], [[SEXT]]
+    ; GFX6: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV12]]
+    ; GFX6: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV13]], [[USUBO3]]
+    ; GFX6: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C6]], [[USUBO5]]
+    ; GFX6: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO4]](s32), [[USUBE6]](s32)
+    ; GFX6: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV13]]
+    ; GFX6: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1)
+    ; GFX6: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV12]]
+    ; GFX6: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT [[ICMP4]](s1)
+    ; GFX6: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV13]]
+    ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]]
+    ; GFX6: [[USUBO6:%[0-9]+]]:_(s32), [[USUBO7:%[0-9]+]]:_(s1) = G_USUBO [[USUBO4]], [[UV12]]
+    ; GFX6: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[UV13]], [[USUBO5]]
+    ; GFX6: [[USUBE10:%[0-9]+]]:_(s32), [[USUBE11:%[0-9]+]]:_(s1) = G_USUBE [[USUBE8]], [[C6]], [[USUBO7]]
+    ; GFX6: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO6]](s32), [[USUBE10]](s32)
+    ; GFX6: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C6]]
+    ; GFX6: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV2]], [[MV1]]
+    ; GFX6: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C6]]
+    ; GFX6: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV]]
+    ; GFX6: [[COPY4:%[0-9]+]]:_(s64) = COPY [[SELECT3]](s64)
+    ; GFX6: $vgpr0_vgpr1 = COPY [[COPY4]](s64)
+    ; GFX6: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC]], [[C4]]
+    ; GFX6: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL2]], [[FMUL]]
+    ; GFX6: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP1]], [[C1]]
+    ; GFX6: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[UITOFP]]
     ; GFX8-LABEL: name: test_urem_s33
     ; GFX8: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
     ; GFX8: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
-    ; GFX8: [[TRUNC:%[0-9]+]]:_(s33) = G_TRUNC [[COPY]](s64)
-    ; GFX8: [[TRUNC1:%[0-9]+]]:_(s33) = G_TRUNC [[COPY1]](s64)
-    ; GFX8: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[TRUNC]](s33)
-    ; GFX8: [[ZEXT1:%[0-9]+]]:_(s64) = G_ZEXT [[TRUNC1]](s33)
-    ; GFX8: [[UREM:%[0-9]+]]:_(s64) = G_UREM [[ZEXT]], [[ZEXT1]]
-    ; GFX8: [[TRUNC2:%[0-9]+]]:_(s33) = G_TRUNC [[UREM]](s64)
-    ; GFX8: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[TRUNC2]](s33)
-    ; GFX8: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
+    ; GFX8: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8589934591
+    ; GFX8: [[COPY2:%[0-9]+]]:_(s64) = COPY [[COPY]](s64)
+    ; GFX8: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY2]], [[C]]
+    ; GFX8: [[COPY3:%[0-9]+]]:_(s64) = COPY [[COPY1]](s64)
+    ; GFX8: [[AND1:%[0-9]+]]:_(s64) = G_AND [[COPY3]], [[C]]
+    ; GFX8: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND1]](s64)
+    ; GFX8: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV]](s32)
+    ; GFX8: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV1]](s32)
+    ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000
+    ; GFX8: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG %14(s32)
+    ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x43EFFFFF80000000
+    ; GFX8: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]]
+    ; GFX8: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000
+    ; GFX8: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[FMUL]], [[C3]]
+    ; GFX8: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL1]]
+    ; GFX8: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000
+    ; GFX8: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI %22(s32)
+    ; GFX8: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC]](s32)
+    ; GFX8: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; GFX8: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C5]](s64)
+    ; GFX8: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND1]](s64)
+    ; GFX8: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV2]], [[UV4]]
+    ; GFX8: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV3]], [[UV5]], [[USUBO1]]
+    ; GFX8: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI]]
+    ; GFX8: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]]
+    ; GFX8: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]]
+    ; GFX8: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[FPTOUI]]
+    ; GFX8: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]]
+    ; GFX8: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]]
+    ; GFX8: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[MUL]]
+    ; GFX8: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]]
+    ; GFX8: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]]
+    ; GFX8: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]]
+    ; GFX8: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1)
+    ; GFX8: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH1]]
+    ; GFX8: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1)
+    ; GFX8: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]]
+    ; GFX8: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]]
+    ; GFX8: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL]]
+    ; GFX8: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]]
+    ; GFX8: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]]
+    ; GFX8: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1)
+    ; GFX8: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH3]]
+    ; GFX8: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1)
+    ; GFX8: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]]
+    ; GFX8: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD2]]
+    ; GFX8: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1)
+    ; GFX8: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]]
+    ; GFX8: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]]
+    ; GFX8: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[ADD4]]
+    ; GFX8: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO8]]
+    ; GFX8: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO11]]
+    ; GFX8: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[ADD5]]
+    ; GFX8: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO10]]
+    ; GFX8: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO10]]
+    ; GFX8: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE]]
+    ; GFX8: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO10]]
+    ; GFX8: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]]
+    ; GFX8: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ADD7]], [[UMULH5]]
+    ; GFX8: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[MUL6]]
+    ; GFX8: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO10]], [[ADD8]]
+    ; GFX8: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[MUL6]]
+    ; GFX8: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]]
+    ; GFX8: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1)
+    ; GFX8: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO12]], [[UMULH6]]
+    ; GFX8: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO15]](s1)
+    ; GFX8: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]]
+    ; GFX8: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[ADD8]]
+    ; GFX8: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[MUL6]]
+    ; GFX8: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[ADD8]]
+    ; GFX8: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]]
+    ; GFX8: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1)
+    ; GFX8: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH8]]
+    ; GFX8: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1)
+    ; GFX8: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]]
+    ; GFX8: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDO18]], [[ADD9]]
+    ; GFX8: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1)
+    ; GFX8: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[ADD10]], [[ZEXT9]]
+    ; GFX8: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[ADD8]]
+    ; GFX8: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[ADD11]]
+    ; GFX8: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; GFX8: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[UADDO20]]
+    ; GFX8: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[ADD6]], [[ADD12]], [[UADDO11]]
+    ; GFX8: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UADDE2]], [[C6]], [[UADDO23]]
+    ; GFX8: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND]](s64)
+    ; GFX8: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND]](s64)
+    ; GFX8: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV9]], [[UADDO22]]
+    ; GFX8: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV8]], [[UADDE4]]
+    ; GFX8: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV8]], [[UADDO22]]
+    ; GFX8: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]]
+    ; GFX8: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1)
+    ; GFX8: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO24]], [[UMULH10]]
+    ; GFX8: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO27]](s1)
+    ; GFX8: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]]
+    ; GFX8: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV9]], [[UADDE4]]
+    ; GFX8: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV9]], [[UADDO22]]
+    ; GFX8: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV8]], [[UADDE4]]
+    ; GFX8: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]]
+    ; GFX8: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1)
+    ; GFX8: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH12]]
+    ; GFX8: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1)
+    ; GFX8: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]]
+    ; GFX8: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[UADDO30]], [[ADD13]]
+    ; GFX8: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1)
+    ; GFX8: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[ADD14]], [[ZEXT14]]
+    ; GFX8: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV9]], [[UADDE4]]
+    ; GFX8: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[ADD15]]
+    ; GFX8: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND1]](s64)
+    ; GFX8: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV10]], [[UADDO32]]
+    ; GFX8: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV11]], [[UADDO32]]
+    ; GFX8: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV10]], [[ADD16]]
+    ; GFX8: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV10]], [[UADDO32]]
+    ; GFX8: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]]
+    ; GFX8: [[ADD18:%[0-9]+]]:_(s32) = G_ADD [[ADD17]], [[UMULH14]]
+    ; GFX8: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV6]], [[MUL15]]
+    ; GFX8: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV7]], [[ADD18]], [[USUBO3]]
+    ; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV7]], [[ADD18]]
+    ; GFX8: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO2]](s32), [[USUBE2]](s32)
+    ; GFX8: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND1]](s64)
+    ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV13]]
+    ; GFX8: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1)
+    ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV12]]
+    ; GFX8: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[ICMP1]](s1)
+    ; GFX8: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV13]]
+    ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SEXT1]], [[SEXT]]
+    ; GFX8: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV12]]
+    ; GFX8: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV13]], [[USUBO3]]
+    ; GFX8: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C6]], [[USUBO5]]
+    ; GFX8: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO4]](s32), [[USUBE6]](s32)
+    ; GFX8: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV13]]
+    ; GFX8: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1)
+    ; GFX8: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV12]]
+    ; GFX8: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT [[ICMP4]](s1)
+    ; GFX8: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV13]]
+    ; GFX8: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]]
+    ; GFX8: [[USUBO6:%[0-9]+]]:_(s32), [[USUBO7:%[0-9]+]]:_(s1) = G_USUBO [[USUBO4]], [[UV12]]
+    ; GFX8: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[UV13]], [[USUBO5]]
+    ; GFX8: [[USUBE10:%[0-9]+]]:_(s32), [[USUBE11:%[0-9]+]]:_(s1) = G_USUBE [[USUBE8]], [[C6]], [[USUBO7]]
+    ; GFX8: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO6]](s32), [[USUBE10]](s32)
+    ; GFX8: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C6]]
+    ; GFX8: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV2]], [[MV1]]
+    ; GFX8: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C6]]
+    ; GFX8: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV]]
+    ; GFX8: [[COPY4:%[0-9]+]]:_(s64) = COPY [[SELECT3]](s64)
+    ; GFX8: $vgpr0_vgpr1 = COPY [[COPY4]](s64)
+    ; GFX8: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC]], [[C4]]
+    ; GFX8: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL2]], [[FMUL]]
+    ; GFX8: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP1]], [[C1]]
+    ; GFX8: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[UITOFP]]
     ; GFX9-LABEL: name: test_urem_s33
     ; GFX9: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
     ; GFX9: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
-    ; GFX9: [[TRUNC:%[0-9]+]]:_(s33) = G_TRUNC [[COPY]](s64)
-    ; GFX9: [[TRUNC1:%[0-9]+]]:_(s33) = G_TRUNC [[COPY1]](s64)
-    ; GFX9: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[TRUNC]](s33)
-    ; GFX9: [[ZEXT1:%[0-9]+]]:_(s64) = G_ZEXT [[TRUNC1]](s33)
-    ; GFX9: [[UREM:%[0-9]+]]:_(s64) = G_UREM [[ZEXT]], [[ZEXT1]]
-    ; GFX9: [[TRUNC2:%[0-9]+]]:_(s33) = G_TRUNC [[UREM]](s64)
-    ; GFX9: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[TRUNC2]](s33)
-    ; GFX9: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
+    ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8589934591
+    ; GFX9: [[COPY2:%[0-9]+]]:_(s64) = COPY [[COPY]](s64)
+    ; GFX9: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY2]], [[C]]
+    ; GFX9: [[COPY3:%[0-9]+]]:_(s64) = COPY [[COPY1]](s64)
+    ; GFX9: [[AND1:%[0-9]+]]:_(s64) = G_AND [[COPY3]], [[C]]
+    ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND1]](s64)
+    ; GFX9: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV]](s32)
+    ; GFX9: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV1]](s32)
+    ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000
+    ; GFX9: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG %14(s32)
+    ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x43EFFFFF80000000
+    ; GFX9: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]]
+    ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000
+    ; GFX9: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[FMUL]], [[C3]]
+    ; GFX9: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL1]]
+    ; GFX9: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000
+    ; GFX9: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI %22(s32)
+    ; GFX9: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC]](s32)
+    ; GFX9: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; GFX9: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C5]](s64)
+    ; GFX9: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND1]](s64)
+    ; GFX9: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV2]], [[UV4]]
+    ; GFX9: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV3]], [[UV5]], [[USUBO1]]
+    ; GFX9: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI]]
+    ; GFX9: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]]
+    ; GFX9: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]]
+    ; GFX9: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[FPTOUI]]
+    ; GFX9: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]]
+    ; GFX9: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]]
+    ; GFX9: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[MUL]]
+    ; GFX9: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]]
+    ; GFX9: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]]
+    ; GFX9: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]]
+    ; GFX9: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1)
+    ; GFX9: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH1]]
+    ; GFX9: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1)
+    ; GFX9: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]]
+    ; GFX9: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]]
+    ; GFX9: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL]]
+    ; GFX9: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]]
+    ; GFX9: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]]
+    ; GFX9: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1)
+    ; GFX9: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH3]]
+    ; GFX9: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1)
+    ; GFX9: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]]
+    ; GFX9: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD2]]
+    ; GFX9: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1)
+    ; GFX9: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]]
+    ; GFX9: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]]
+    ; GFX9: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[ADD4]]
+    ; GFX9: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO8]]
+    ; GFX9: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO11]]
+    ; GFX9: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[ADD5]]
+    ; GFX9: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO10]]
+    ; GFX9: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO10]]
+    ; GFX9: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE]]
+    ; GFX9: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO10]]
+    ; GFX9: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]]
+    ; GFX9: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ADD7]], [[UMULH5]]
+    ; GFX9: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[MUL6]]
+    ; GFX9: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO10]], [[ADD8]]
+    ; GFX9: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[MUL6]]
+    ; GFX9: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]]
+    ; GFX9: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1)
+    ; GFX9: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO12]], [[UMULH6]]
+    ; GFX9: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO15]](s1)
+    ; GFX9: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]]
+    ; GFX9: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE]], [[ADD8]]
+    ; GFX9: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[MUL6]]
+    ; GFX9: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO10]], [[ADD8]]
+    ; GFX9: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]]
+    ; GFX9: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1)
+    ; GFX9: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH8]]
+    ; GFX9: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1)
+    ; GFX9: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]]
+    ; GFX9: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[UADDO18]], [[ADD9]]
+    ; GFX9: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1)
+    ; GFX9: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[ADD10]], [[ZEXT9]]
+    ; GFX9: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE]], [[ADD8]]
+    ; GFX9: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[ADD11]]
+    ; GFX9: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; GFX9: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[UADDO20]]
+    ; GFX9: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[ADD6]], [[ADD12]], [[UADDO11]]
+    ; GFX9: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UADDE2]], [[C6]], [[UADDO23]]
+    ; GFX9: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND]](s64)
+    ; GFX9: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND]](s64)
+    ; GFX9: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV9]], [[UADDO22]]
+    ; GFX9: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV8]], [[UADDE4]]
+    ; GFX9: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV8]], [[UADDO22]]
+    ; GFX9: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]]
+    ; GFX9: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1)
+    ; GFX9: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO24]], [[UMULH10]]
+    ; GFX9: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO27]](s1)
+    ; GFX9: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]]
+    ; GFX9: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV9]], [[UADDE4]]
+    ; GFX9: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV9]], [[UADDO22]]
+    ; GFX9: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV8]], [[UADDE4]]
+    ; GFX9: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]]
+    ; GFX9: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1)
+    ; GFX9: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH12]]
+    ; GFX9: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1)
+    ; GFX9: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]]
+    ; GFX9: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[UADDO30]], [[ADD13]]
+    ; GFX9: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1)
+    ; GFX9: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[ADD14]], [[ZEXT14]]
+    ; GFX9: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV9]], [[UADDE4]]
+    ; GFX9: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[ADD15]]
+    ; GFX9: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND1]](s64)
+    ; GFX9: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV10]], [[UADDO32]]
+    ; GFX9: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV11]], [[UADDO32]]
+    ; GFX9: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV10]], [[ADD16]]
+    ; GFX9: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV10]], [[UADDO32]]
+    ; GFX9: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]]
+    ; GFX9: [[ADD18:%[0-9]+]]:_(s32) = G_ADD [[ADD17]], [[UMULH14]]
+    ; GFX9: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV6]], [[MUL15]]
+    ; GFX9: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV7]], [[ADD18]], [[USUBO3]]
+    ; GFX9: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV7]], [[ADD18]]
+    ; GFX9: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO2]](s32), [[USUBE2]](s32)
+    ; GFX9: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND1]](s64)
+    ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV13]]
+    ; GFX9: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1)
+    ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV12]]
+    ; GFX9: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[ICMP1]](s1)
+    ; GFX9: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV13]]
+    ; GFX9: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SEXT1]], [[SEXT]]
+    ; GFX9: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV12]]
+    ; GFX9: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV13]], [[USUBO3]]
+    ; GFX9: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C6]], [[USUBO5]]
+    ; GFX9: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO4]](s32), [[USUBE6]](s32)
+    ; GFX9: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV13]]
+    ; GFX9: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1)
+    ; GFX9: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV12]]
+    ; GFX9: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT [[ICMP4]](s1)
+    ; GFX9: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV13]]
+    ; GFX9: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]]
+    ; GFX9: [[USUBO6:%[0-9]+]]:_(s32), [[USUBO7:%[0-9]+]]:_(s1) = G_USUBO [[USUBO4]], [[UV12]]
+    ; GFX9: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[UV13]], [[USUBO5]]
+    ; GFX9: [[USUBE10:%[0-9]+]]:_(s32), [[USUBE11:%[0-9]+]]:_(s1) = G_USUBE [[USUBE8]], [[C6]], [[USUBO7]]
+    ; GFX9: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO6]](s32), [[USUBE10]](s32)
+    ; GFX9: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C6]]
+    ; GFX9: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV2]], [[MV1]]
+    ; GFX9: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C6]]
+    ; GFX9: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV]]
+    ; GFX9: [[COPY4:%[0-9]+]]:_(s64) = COPY [[SELECT3]](s64)
+    ; GFX9: $vgpr0_vgpr1 = COPY [[COPY4]](s64)
+    ; GFX9: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC]], [[C4]]
+    ; GFX9: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL2]], [[FMUL]]
+    ; GFX9: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP1]], [[C1]]
+    ; GFX9: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[UITOFP]]
     %0:_(s64) = COPY $vgpr0_vgpr1
     %1:_(s64) = COPY $vgpr2_vgpr3
     %2:_(s33) = G_TRUNC %0

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i64.ll
new file mode 100644
index 000000000000..27448be1a419
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i64.ll
@@ -0,0 +1,3456 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -global-isel -amdgpu-codegenprepare-disable-idiv-expansion=1 -mtriple=amdgcn-amd-amdhsa < %s | FileCheck -check-prefixes=CHECK,GISEL %s
+; RUN: llc -global-isel -amdgpu-codegenprepare-disable-idiv-expansion=0 -mtriple=amdgcn-amd-amdhsa < %s | FileCheck -check-prefixes=CHECK,CGP %s
+
+; The same 32-bit expansion is implemented in the legalizer and in AMDGPUCodeGenPrepare.
+
+define i64 @v_udiv_i64(i64 %num, i64 %den) {
+; CHECK-LABEL: v_udiv_i64:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT:    v_or_b32_e32 v5, v1, v3
+; CHECK-NEXT:    v_mov_b32_e32 v4, 0
+; CHECK-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[4:5]
+; CHECK-NEXT:    v_cmp_ne_u32_e64 s[4:5], 0, 1
+; CHECK-NEXT:    s_xor_b64 s[4:5], vcc, s[4:5]
+; CHECK-NEXT:    ; implicit-def: $vgpr4_vgpr5
+; CHECK-NEXT:    s_and_saveexec_b64 s[6:7], s[4:5]
+; CHECK-NEXT:    s_xor_b64 s[6:7], exec, s[6:7]
+; CHECK-NEXT:    s_cbranch_execnz BB0_2
+; CHECK-NEXT:  ; %bb.1:
+; CHECK-NEXT:    v_cvt_f32_u32_e32 v4, v2
+; CHECK-NEXT:    v_cvt_f32_u32_e32 v5, v3
+; CHECK-NEXT:    v_sub_i32_e32 v6, vcc, 0, v2
+; CHECK-NEXT:    v_subb_u32_e32 v7, vcc, 0, v3, vcc
+; CHECK-NEXT:    v_mac_f32_e32 v4, 0x4f800000, v5
+; CHECK-NEXT:    v_rcp_iflag_f32_e32 v4, v4
+; CHECK-NEXT:    v_mul_f32_e32 v4, 0x5f7ffffc, v4
+; CHECK-NEXT:    v_mul_f32_e32 v5, 0x2f800000, v4
+; CHECK-NEXT:    v_trunc_f32_e32 v5, v5
+; CHECK-NEXT:    v_mac_f32_e32 v4, 0xcf800000, v5
+; CHECK-NEXT:    v_cvt_u32_f32_e32 v5, v5
+; CHECK-NEXT:    v_cvt_u32_f32_e32 v4, v4
+; CHECK-NEXT:    v_mul_lo_u32 v8, v6, v5
+; CHECK-NEXT:    v_mul_lo_u32 v9, v6, v4
+; CHECK-NEXT:    v_mul_lo_u32 v10, v7, v4
+; CHECK-NEXT:    v_mul_hi_u32 v11, v6, v4
+; CHECK-NEXT:    v_add_i32_e32 v8, vcc, v10, v8
+; CHECK-NEXT:    v_mul_lo_u32 v10, v5, v9
+; CHECK-NEXT:    v_mul_hi_u32 v12, v4, v9
+; CHECK-NEXT:    v_mul_hi_u32 v9, v5, v9
+; CHECK-NEXT:    v_add_i32_e32 v8, vcc, v8, v11
+; CHECK-NEXT:    v_mul_lo_u32 v11, v4, v8
+; CHECK-NEXT:    v_mul_lo_u32 v13, v5, v8
+; CHECK-NEXT:    v_mul_hi_u32 v14, v4, v8
+; CHECK-NEXT:    v_mul_hi_u32 v8, v5, v8
+; CHECK-NEXT:    v_add_i32_e32 v10, vcc, v10, v11
+; CHECK-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v9, vcc, v13, v9
+; CHECK-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v10, vcc, v10, v12
+; CHECK-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v9, vcc, v9, v14
+; CHECK-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v10, vcc, v11, v10
+; CHECK-NEXT:    v_add_i32_e32 v11, vcc, v13, v12
+; CHECK-NEXT:    v_add_i32_e32 v9, vcc, v9, v10
+; CHECK-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v10, vcc, v11, v10
+; CHECK-NEXT:    v_add_i32_e32 v8, vcc, v8, v10
+; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v4, v9
+; CHECK-NEXT:    v_addc_u32_e64 v9, s[4:5], v5, v8, vcc
+; CHECK-NEXT:    v_add_i32_e64 v5, s[4:5], v5, v8
+; CHECK-NEXT:    v_mul_lo_u32 v8, v6, v4
+; CHECK-NEXT:    v_mul_lo_u32 v7, v7, v4
+; CHECK-NEXT:    v_mul_hi_u32 v10, v6, v4
+; CHECK-NEXT:    v_mul_lo_u32 v6, v6, v9
+; CHECK-NEXT:    v_mul_lo_u32 v11, v9, v8
+; CHECK-NEXT:    v_mul_hi_u32 v12, v4, v8
+; CHECK-NEXT:    v_mul_hi_u32 v8, v9, v8
+; CHECK-NEXT:    v_add_i32_e64 v6, s[4:5], v7, v6
+; CHECK-NEXT:    v_add_i32_e64 v6, s[4:5], v6, v10
+; CHECK-NEXT:    v_mul_lo_u32 v7, v4, v6
+; CHECK-NEXT:    v_mul_lo_u32 v10, v9, v6
+; CHECK-NEXT:    v_mul_hi_u32 v13, v4, v6
+; CHECK-NEXT:    v_mul_hi_u32 v6, v9, v6
+; CHECK-NEXT:    v_add_i32_e64 v7, s[4:5], v11, v7
+; CHECK-NEXT:    v_cndmask_b32_e64 v9, 0, 1, s[4:5]
+; CHECK-NEXT:    v_add_i32_e64 v8, s[4:5], v10, v8
+; CHECK-NEXT:    v_cndmask_b32_e64 v10, 0, 1, s[4:5]
+; CHECK-NEXT:    v_add_i32_e64 v7, s[4:5], v7, v12
+; CHECK-NEXT:    v_cndmask_b32_e64 v7, 0, 1, s[4:5]
+; CHECK-NEXT:    v_add_i32_e64 v8, s[4:5], v8, v13
+; CHECK-NEXT:    v_cndmask_b32_e64 v11, 0, 1, s[4:5]
+; CHECK-NEXT:    v_add_i32_e64 v7, s[4:5], v9, v7
+; CHECK-NEXT:    v_add_i32_e64 v9, s[4:5], v10, v11
+; CHECK-NEXT:    v_add_i32_e64 v7, s[4:5], v8, v7
+; CHECK-NEXT:    v_cndmask_b32_e64 v8, 0, 1, s[4:5]
+; CHECK-NEXT:    v_add_i32_e64 v8, s[4:5], v9, v8
+; CHECK-NEXT:    v_add_i32_e64 v6, s[4:5], v6, v8
+; CHECK-NEXT:    v_addc_u32_e32 v5, vcc, v5, v6, vcc
+; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v4, v7
+; CHECK-NEXT:    v_addc_u32_e32 v5, vcc, 0, v5, vcc
+; CHECK-NEXT:    v_mul_lo_u32 v6, v1, v4
+; CHECK-NEXT:    v_mul_hi_u32 v7, v0, v4
+; CHECK-NEXT:    v_mul_hi_u32 v4, v1, v4
+; CHECK-NEXT:    v_mul_lo_u32 v8, v0, v5
+; CHECK-NEXT:    v_mul_lo_u32 v9, v1, v5
+; CHECK-NEXT:    v_mul_hi_u32 v10, v0, v5
+; CHECK-NEXT:    v_mul_hi_u32 v5, v1, v5
+; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v6, v8
+; CHECK-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v9, v4
+; CHECK-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v6, v7
+; CHECK-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v4, v10
+; CHECK-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v8, v6
+; CHECK-NEXT:    v_add_i32_e32 v7, vcc, v9, v7
+; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v4, v6
+; CHECK-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v7, v6
+; CHECK-NEXT:    v_mul_lo_u32 v7, v2, v4
+; CHECK-NEXT:    v_mul_lo_u32 v8, v3, v4
+; CHECK-NEXT:    v_mul_hi_u32 v9, v2, v4
+; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v5, v6
+; CHECK-NEXT:    v_mul_lo_u32 v6, v2, v5
+; CHECK-NEXT:    v_add_i32_e32 v10, vcc, 1, v4
+; CHECK-NEXT:    v_addc_u32_e32 v11, vcc, 0, v5, vcc
+; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v8, v6
+; CHECK-NEXT:    v_add_i32_e32 v8, vcc, 1, v10
+; CHECK-NEXT:    v_addc_u32_e32 v12, vcc, 0, v11, vcc
+; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v6, v9
+; CHECK-NEXT:    v_sub_i32_e32 v7, vcc, v0, v7
+; CHECK-NEXT:    v_subb_u32_e64 v9, s[4:5], v1, v6, vcc
+; CHECK-NEXT:    v_sub_i32_e64 v1, s[4:5], v1, v6
+; CHECK-NEXT:    v_cmp_ge_u32_e64 s[4:5], v7, v2
+; CHECK-NEXT:    v_cndmask_b32_e64 v6, 0, -1, s[4:5]
+; CHECK-NEXT:    v_cmp_ge_u32_e64 s[4:5], v9, v3
+; CHECK-NEXT:    v_cndmask_b32_e64 v13, 0, -1, s[4:5]
+; CHECK-NEXT:    v_subb_u32_e32 v1, vcc, v1, v3, vcc
+; CHECK-NEXT:    v_cmp_eq_u32_e32 vcc, v9, v3
+; CHECK-NEXT:    v_cndmask_b32_e32 v6, v13, v6, vcc
+; CHECK-NEXT:    v_sub_i32_e32 v7, vcc, v7, v2
+; CHECK-NEXT:    v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
+; CHECK-NEXT:    v_cmp_ge_u32_e32 vcc, v7, v2
+; CHECK-NEXT:    v_cndmask_b32_e64 v7, 0, -1, vcc
+; CHECK-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v3
+; CHECK-NEXT:    v_cndmask_b32_e64 v9, 0, -1, vcc
+; CHECK-NEXT:    v_cmp_eq_u32_e32 vcc, v1, v3
+; CHECK-NEXT:    v_cndmask_b32_e32 v1, v9, v7, vcc
+; CHECK-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v1
+; CHECK-NEXT:    v_cndmask_b32_e32 v1, v10, v8, vcc
+; CHECK-NEXT:    v_cndmask_b32_e32 v3, v11, v12, vcc
+; CHECK-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v6
+; CHECK-NEXT:    v_cndmask_b32_e32 v4, v4, v1, vcc
+; CHECK-NEXT:    v_cndmask_b32_e32 v5, v5, v3, vcc
+; CHECK-NEXT:  BB0_2: ; %Flow
+; CHECK-NEXT:    s_or_saveexec_b64 s[6:7], s[6:7]
+; CHECK-NEXT:    s_xor_b64 exec, exec, s[6:7]
+; CHECK-NEXT:    s_cbranch_execz BB0_4
+; CHECK-NEXT:  ; %bb.3:
+; CHECK-NEXT:    v_cvt_f32_u32_e32 v1, v2
+; CHECK-NEXT:    v_rcp_iflag_f32_e32 v1, v1
+; CHECK-NEXT:    v_mul_f32_e32 v1, 0x4f800000, v1
+; CHECK-NEXT:    v_cvt_u32_f32_e32 v1, v1
+; CHECK-NEXT:    v_mul_lo_u32 v3, v1, v2
+; CHECK-NEXT:    v_mul_hi_u32 v4, v1, v2
+; CHECK-NEXT:    v_sub_i32_e32 v5, vcc, 0, v3
+; CHECK-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v4
+; CHECK-NEXT:    v_cndmask_b32_e32 v3, v3, v5, vcc
+; CHECK-NEXT:    v_mul_hi_u32 v3, v3, v1
+; CHECK-NEXT:    v_add_i32_e64 v4, s[4:5], v1, v3
+; CHECK-NEXT:    v_sub_i32_e64 v1, s[4:5], v1, v3
+; CHECK-NEXT:    v_cndmask_b32_e32 v1, v1, v4, vcc
+; CHECK-NEXT:    v_mul_hi_u32 v1, v1, v0
+; CHECK-NEXT:    v_mul_lo_u32 v3, v1, v2
+; CHECK-NEXT:    v_add_i32_e32 v4, vcc, 1, v1
+; CHECK-NEXT:    v_subrev_i32_e32 v5, vcc, 1, v1
+; CHECK-NEXT:    v_sub_i32_e32 v6, vcc, v0, v3
+; CHECK-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v3
+; CHECK-NEXT:    v_cmp_ge_u32_e64 s[4:5], v6, v2
+; CHECK-NEXT:    s_and_b64 s[4:5], s[4:5], vcc
+; CHECK-NEXT:    v_cndmask_b32_e64 v0, v4, v1, s[4:5]
+; CHECK-NEXT:    v_cndmask_b32_e32 v4, v5, v0, vcc
+; CHECK-NEXT:    v_mov_b32_e32 v5, 0
+; CHECK-NEXT:  BB0_4:
+; CHECK-NEXT:    s_or_b64 exec, exec, s[6:7]
+; CHECK-NEXT:    v_mov_b32_e32 v0, v4
+; CHECK-NEXT:    v_mov_b32_e32 v1, v5
+; CHECK-NEXT:    s_setpc_b64 s[30:31]
+  %result = udiv i64 %num, %den
+  ret i64 %result
+}
+
+; FIXME: This is a workaround for not handling uniform VGPR case.
+declare i32 @llvm.amdgcn.readfirstlane(i32)
+
+define amdgpu_ps i64 @s_udiv_i64(i64 inreg %num, i64 inreg %den) {
+; CHECK-LABEL: s_udiv_i64:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    s_or_b64 s[4:5], s[0:1], s[2:3]
+; CHECK-NEXT:    s_mov_b32 s6, 0
+; CHECK-NEXT:    s_mov_b32 s7, -1
+; CHECK-NEXT:    s_and_b64 s[4:5], s[4:5], s[6:7]
+; CHECK-NEXT:    v_cvt_f32_u32_e32 v0, s2
+; CHECK-NEXT:    v_cmp_ne_u64_e64 vcc, s[4:5], 0
+; CHECK-NEXT:    s_cbranch_vccnz BB1_2
+; CHECK-NEXT:  ; %bb.1:
+; CHECK-NEXT:    v_rcp_iflag_f32_e32 v0, v0
+; CHECK-NEXT:    v_mul_f32_e32 v0, 0x4f800000, v0
+; CHECK-NEXT:    v_cvt_u32_f32_e32 v0, v0
+; CHECK-NEXT:    v_mul_lo_u32 v1, v0, s2
+; CHECK-NEXT:    v_mul_hi_u32 v2, v0, s2
+; CHECK-NEXT:    v_sub_i32_e32 v3, vcc, 0, v1
+; CHECK-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v2
+; CHECK-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
+; CHECK-NEXT:    v_mul_hi_u32 v1, v1, v0
+; CHECK-NEXT:    v_add_i32_e64 v2, s[4:5], v0, v1
+; CHECK-NEXT:    v_sub_i32_e64 v0, s[4:5], v0, v1
+; CHECK-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; CHECK-NEXT:    v_mul_hi_u32 v0, v0, s0
+; CHECK-NEXT:    v_mul_lo_u32 v1, v0, s2
+; CHECK-NEXT:    v_add_i32_e32 v2, vcc, 1, v0
+; CHECK-NEXT:    v_subrev_i32_e32 v3, vcc, 1, v0
+; CHECK-NEXT:    v_sub_i32_e32 v4, vcc, s0, v1
+; CHECK-NEXT:    v_cmp_ge_u32_e32 vcc, s0, v1
+; CHECK-NEXT:    v_cmp_le_u32_e64 s[0:1], s2, v4
+; CHECK-NEXT:    s_and_b64 s[0:1], s[0:1], vcc
+; CHECK-NEXT:    v_cndmask_b32_e64 v0, v2, v0, s[0:1]
+; CHECK-NEXT:    v_cndmask_b32_e32 v0, v3, v0, vcc
+; CHECK-NEXT:    s_branch BB1_3
+; CHECK-NEXT:  BB1_2:
+; CHECK-NEXT:    v_mov_b32_e32 v1, s3
+; CHECK-NEXT:    v_cvt_f32_u32_e32 v2, s3
+; CHECK-NEXT:    s_sub_u32 s6, 0, s2
+; CHECK-NEXT:    s_cselect_b32 s4, 1, 0
+; CHECK-NEXT:    v_mov_b32_e32 v3, s1
+; CHECK-NEXT:    v_mac_f32_e32 v0, 0x4f800000, v2
+; CHECK-NEXT:    s_and_b32 s4, s4, 1
+; CHECK-NEXT:    v_rcp_iflag_f32_e32 v0, v0
+; CHECK-NEXT:    v_mul_f32_e32 v0, 0x5f7ffffc, v0
+; CHECK-NEXT:    s_cmp_lg_u32 s4, 0
+; CHECK-NEXT:    s_subb_u32 s7, 0, s3
+; CHECK-NEXT:    v_mul_f32_e32 v2, 0x2f800000, v0
+; CHECK-NEXT:    v_trunc_f32_e32 v2, v2
+; CHECK-NEXT:    v_mac_f32_e32 v0, 0xcf800000, v2
+; CHECK-NEXT:    v_cvt_u32_f32_e32 v2, v2
+; CHECK-NEXT:    v_cvt_u32_f32_e32 v0, v0
+; CHECK-NEXT:    v_mul_lo_u32 v4, s6, v2
+; CHECK-NEXT:    v_mul_lo_u32 v5, s6, v0
+; CHECK-NEXT:    v_mul_lo_u32 v6, s7, v0
+; CHECK-NEXT:    v_mul_hi_u32 v7, s6, v0
+; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v6, v4
+; CHECK-NEXT:    v_mul_lo_u32 v6, v2, v5
+; CHECK-NEXT:    v_mul_hi_u32 v8, v0, v5
+; CHECK-NEXT:    v_mul_hi_u32 v5, v2, v5
+; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v4, v7
+; CHECK-NEXT:    v_mul_lo_u32 v7, v0, v4
+; CHECK-NEXT:    v_mul_lo_u32 v9, v2, v4
+; CHECK-NEXT:    v_mul_hi_u32 v10, v0, v4
+; CHECK-NEXT:    v_mul_hi_u32 v4, v2, v4
+; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v6, v7
+; CHECK-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v9, v5
+; CHECK-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v6, v8
+; CHECK-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v5, v10
+; CHECK-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v7, v6
+; CHECK-NEXT:    v_add_i32_e32 v7, vcc, v9, v8
+; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v5, v6
+; CHECK-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v7, v6
+; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v4, v6
+; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v5
+; CHECK-NEXT:    v_addc_u32_e64 v5, s[4:5], v2, v4, vcc
+; CHECK-NEXT:    v_add_i32_e64 v2, s[4:5], v2, v4
+; CHECK-NEXT:    v_mul_lo_u32 v4, s6, v0
+; CHECK-NEXT:    v_mul_lo_u32 v6, s7, v0
+; CHECK-NEXT:    v_mul_hi_u32 v7, s6, v0
+; CHECK-NEXT:    v_mul_lo_u32 v8, s6, v5
+; CHECK-NEXT:    v_mul_lo_u32 v9, v5, v4
+; CHECK-NEXT:    v_mul_hi_u32 v10, v0, v4
+; CHECK-NEXT:    v_mul_hi_u32 v4, v5, v4
+; CHECK-NEXT:    v_add_i32_e64 v6, s[4:5], v6, v8
+; CHECK-NEXT:    v_add_i32_e64 v6, s[4:5], v6, v7
+; CHECK-NEXT:    v_mul_lo_u32 v7, v0, v6
+; CHECK-NEXT:    v_mul_lo_u32 v8, v5, v6
+; CHECK-NEXT:    v_mul_hi_u32 v11, v0, v6
+; CHECK-NEXT:    v_mul_hi_u32 v5, v5, v6
+; CHECK-NEXT:    v_add_i32_e64 v6, s[4:5], v9, v7
+; CHECK-NEXT:    v_cndmask_b32_e64 v7, 0, 1, s[4:5]
+; CHECK-NEXT:    v_add_i32_e64 v4, s[4:5], v8, v4
+; CHECK-NEXT:    v_cndmask_b32_e64 v8, 0, 1, s[4:5]
+; CHECK-NEXT:    v_add_i32_e64 v6, s[4:5], v6, v10
+; CHECK-NEXT:    v_cndmask_b32_e64 v6, 0, 1, s[4:5]
+; CHECK-NEXT:    v_add_i32_e64 v4, s[4:5], v4, v11
+; CHECK-NEXT:    v_cndmask_b32_e64 v9, 0, 1, s[4:5]
+; CHECK-NEXT:    v_add_i32_e64 v6, s[4:5], v7, v6
+; CHECK-NEXT:    v_add_i32_e64 v7, s[4:5], v8, v9
+; CHECK-NEXT:    v_add_i32_e64 v4, s[4:5], v4, v6
+; CHECK-NEXT:    v_cndmask_b32_e64 v6, 0, 1, s[4:5]
+; CHECK-NEXT:    v_add_i32_e64 v6, s[4:5], v7, v6
+; CHECK-NEXT:    v_add_i32_e64 v5, s[4:5], v5, v6
+; CHECK-NEXT:    v_addc_u32_e32 v2, vcc, v2, v5, vcc
+; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v4
+; CHECK-NEXT:    v_addc_u32_e32 v2, vcc, 0, v2, vcc
+; CHECK-NEXT:    v_mul_lo_u32 v4, s1, v0
+; CHECK-NEXT:    v_mul_hi_u32 v5, s0, v0
+; CHECK-NEXT:    v_mul_hi_u32 v0, s1, v0
+; CHECK-NEXT:    v_mul_lo_u32 v6, s0, v2
+; CHECK-NEXT:    v_mul_lo_u32 v7, s1, v2
+; CHECK-NEXT:    v_mul_hi_u32 v8, s0, v2
+; CHECK-NEXT:    v_mul_hi_u32 v2, s1, v2
+; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v4, v6
+; CHECK-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v7, v0
+; CHECK-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v4, v5
+; CHECK-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v8
+; CHECK-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v6, v4
+; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v7, v5
+; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v4
+; CHECK-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v5, v4
+; CHECK-NEXT:    v_mul_lo_u32 v5, s2, v0
+; CHECK-NEXT:    v_mul_lo_u32 v6, s3, v0
+; CHECK-NEXT:    v_mul_hi_u32 v7, s2, v0
+; CHECK-NEXT:    v_add_i32_e32 v8, vcc, 1, v0
+; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v4
+; CHECK-NEXT:    v_add_i32_e32 v4, vcc, 1, v8
+; CHECK-NEXT:    v_mul_lo_u32 v2, s2, v2
+; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v6, v2
+; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v7
+; CHECK-NEXT:    v_sub_i32_e32 v5, vcc, s0, v5
+; CHECK-NEXT:    v_subb_u32_e64 v3, s[4:5], v3, v2, vcc
+; CHECK-NEXT:    v_sub_i32_e64 v2, s[0:1], s1, v2
+; CHECK-NEXT:    v_cmp_le_u32_e64 s[0:1], s2, v5
+; CHECK-NEXT:    v_cndmask_b32_e64 v6, 0, -1, s[0:1]
+; CHECK-NEXT:    v_cmp_le_u32_e64 s[0:1], s3, v3
+; CHECK-NEXT:    v_cndmask_b32_e64 v7, 0, -1, s[0:1]
+; CHECK-NEXT:    v_subb_u32_e32 v1, vcc, v2, v1, vcc
+; CHECK-NEXT:    v_cmp_eq_u32_e32 vcc, s3, v3
+; CHECK-NEXT:    v_cndmask_b32_e32 v2, v7, v6, vcc
+; CHECK-NEXT:    v_subrev_i32_e32 v3, vcc, s2, v5
+; CHECK-NEXT:    v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
+; CHECK-NEXT:    v_cmp_le_u32_e32 vcc, s2, v3
+; CHECK-NEXT:    v_cndmask_b32_e64 v3, 0, -1, vcc
+; CHECK-NEXT:    v_cmp_le_u32_e32 vcc, s3, v1
+; CHECK-NEXT:    v_cndmask_b32_e64 v5, 0, -1, vcc
+; CHECK-NEXT:    v_cmp_eq_u32_e32 vcc, s3, v1
+; CHECK-NEXT:    v_cndmask_b32_e32 v1, v5, v3, vcc
+; CHECK-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v1
+; CHECK-NEXT:    v_cndmask_b32_e32 v1, v8, v4, vcc
+; CHECK-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v2
+; CHECK-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; CHECK-NEXT:  BB1_3:
+; CHECK-NEXT:    v_readfirstlane_b32 s0, v0
+; CHECK-NEXT:    s_mov_b32 s1, s0
+; CHECK-NEXT:    ; return to shader part epilog
+  %result = udiv i64 %num, %den
+  %cast = bitcast i64 %result to <2 x i32>
+  %elt.0 = extractelement <2 x i32> %cast, i32 0
+  %elt.1 = extractelement <2 x i32> %cast, i32 1
+  %res.0 = call i32 @llvm.amdgcn.readfirstlane(i32 %elt.0)
+  %res.1 = call i32 @llvm.amdgcn.readfirstlane(i32 %elt.1)
+  %ins.0 = insertelement <2 x i32> undef, i32 %res.0, i32 0
+  %ins.1 = insertelement <2 x i32> %ins.0, i32 %res.0, i32 1
+  %cast.back = bitcast <2 x i32> %ins.1 to i64
+  ret i64 %cast.back
+}
+
+define <2 x i64> @v_udiv_v2i64(<2 x i64> %num, <2 x i64> %den) {
+; GISEL-LABEL: v_udiv_v2i64:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-NEXT:    v_cvt_f32_u32_e32 v8, v4
+; GISEL-NEXT:    v_cvt_f32_u32_e32 v9, v5
+; GISEL-NEXT:    v_mac_f32_e32 v8, 0x4f800000, v9
+; GISEL-NEXT:    v_rcp_iflag_f32_e32 v8, v8
+; GISEL-NEXT:    v_mul_f32_e32 v8, 0x5f7ffffc, v8
+; GISEL-NEXT:    v_mul_f32_e32 v9, 0x2f800000, v8
+; GISEL-NEXT:    v_trunc_f32_e32 v9, v9
+; GISEL-NEXT:    v_mac_f32_e32 v8, 0xcf800000, v9
+; GISEL-NEXT:    v_cvt_u32_f32_e32 v8, v8
+; GISEL-NEXT:    v_cvt_u32_f32_e32 v9, v9
+; GISEL-NEXT:    v_sub_i32_e32 v10, vcc, 0, v4
+; GISEL-NEXT:    v_subb_u32_e32 v11, vcc, 0, v5, vcc
+; GISEL-NEXT:    v_mul_lo_u32 v12, v10, v8
+; GISEL-NEXT:    v_mul_lo_u32 v13, v11, v8
+; GISEL-NEXT:    v_mul_lo_u32 v14, v10, v9
+; GISEL-NEXT:    v_mul_hi_u32 v15, v10, v8
+; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v13, v14
+; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v13, v15
+; GISEL-NEXT:    v_mul_lo_u32 v14, v9, v12
+; GISEL-NEXT:    v_mul_lo_u32 v15, v8, v13
+; GISEL-NEXT:    v_mul_hi_u32 v16, v8, v12
+; GISEL-NEXT:    v_add_i32_e32 v14, vcc, v14, v15
+; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v14, vcc, v14, v16
+; GISEL-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v14, vcc, v15, v14
+; GISEL-NEXT:    v_mul_lo_u32 v15, v9, v13
+; GISEL-NEXT:    v_mul_hi_u32 v12, v9, v12
+; GISEL-NEXT:    v_mul_hi_u32 v16, v8, v13
+; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v15, v12
+; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v12, v16
+; GISEL-NEXT:    v_cndmask_b32_e64 v16, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v15, vcc, v15, v16
+; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v12, v14
+; GISEL-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v14, vcc, v15, v14
+; GISEL-NEXT:    v_mul_hi_u32 v13, v9, v13
+; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v13, v14
+; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v12
+; GISEL-NEXT:    v_addc_u32_e64 v12, s[4:5], v9, v13, vcc
+; GISEL-NEXT:    v_add_i32_e64 v9, s[4:5], v9, v13
+; GISEL-NEXT:    v_mul_lo_u32 v13, v10, v8
+; GISEL-NEXT:    v_mul_lo_u32 v11, v11, v8
+; GISEL-NEXT:    v_mul_lo_u32 v14, v10, v12
+; GISEL-NEXT:    v_mul_hi_u32 v10, v10, v8
+; GISEL-NEXT:    v_add_i32_e64 v11, s[4:5], v11, v14
+; GISEL-NEXT:    v_add_i32_e64 v10, s[4:5], v11, v10
+; GISEL-NEXT:    v_mul_lo_u32 v11, v12, v13
+; GISEL-NEXT:    v_mul_lo_u32 v14, v8, v10
+; GISEL-NEXT:    v_mul_hi_u32 v15, v8, v13
+; GISEL-NEXT:    v_add_i32_e64 v11, s[4:5], v11, v14
+; GISEL-NEXT:    v_cndmask_b32_e64 v14, 0, 1, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v11, s[4:5], v11, v15
+; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v11, s[4:5], v14, v11
+; GISEL-NEXT:    v_mul_lo_u32 v14, v12, v10
+; GISEL-NEXT:    v_mul_hi_u32 v13, v12, v13
+; GISEL-NEXT:    v_mul_hi_u32 v15, v8, v10
+; GISEL-NEXT:    v_add_i32_e64 v13, s[4:5], v14, v13
+; GISEL-NEXT:    v_cndmask_b32_e64 v14, 0, 1, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v13, s[4:5], v13, v15
+; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, 1, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v14, s[4:5], v14, v15
+; GISEL-NEXT:    v_add_i32_e64 v11, s[4:5], v13, v11
+; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, 1, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v13, s[4:5], v14, v13
+; GISEL-NEXT:    v_mul_hi_u32 v10, v12, v10
+; GISEL-NEXT:    v_add_i32_e64 v10, s[4:5], v10, v13
+; GISEL-NEXT:    v_add_i32_e64 v8, s[4:5], v8, v11
+; GISEL-NEXT:    v_addc_u32_e32 v9, vcc, v9, v10, vcc
+; GISEL-NEXT:    v_addc_u32_e64 v9, vcc, 0, v9, s[4:5]
+; GISEL-NEXT:    v_mul_lo_u32 v10, v1, v8
+; GISEL-NEXT:    v_mul_lo_u32 v11, v0, v9
+; GISEL-NEXT:    v_mul_hi_u32 v12, v0, v8
+; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v10, v11
+; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v10, v12
+; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v11, v10
+; GISEL-NEXT:    v_mul_lo_u32 v11, v1, v9
+; GISEL-NEXT:    v_mul_hi_u32 v8, v1, v8
+; GISEL-NEXT:    v_mul_hi_u32 v12, v0, v9
+; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v11, v8
+; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v12
+; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v11, v12
+; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v10
+; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v11, v10
+; GISEL-NEXT:    v_mul_hi_u32 v9, v1, v9
+; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v9, v10
+; GISEL-NEXT:    v_mul_lo_u32 v10, v4, v8
+; GISEL-NEXT:    v_mul_lo_u32 v11, v5, v8
+; GISEL-NEXT:    v_mul_lo_u32 v12, v4, v9
+; GISEL-NEXT:    v_mul_hi_u32 v13, v4, v8
+; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v11, v12
+; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v11, v13
+; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v10
+; GISEL-NEXT:    v_subb_u32_e64 v10, s[4:5], v1, v11, vcc
+; GISEL-NEXT:    v_sub_i32_e64 v1, s[4:5], v1, v11
+; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v10, v5
+; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, -1, s[4:5]
+; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v0, v4
+; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, -1, s[4:5]
+; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], v10, v5
+; GISEL-NEXT:    v_cndmask_b32_e64 v10, v11, v12, s[4:5]
+; GISEL-NEXT:    v_sub_i32_e64 v0, s[4:5], v0, v4
+; GISEL-NEXT:    v_subb_u32_e32 v1, vcc, v1, v5, vcc
+; GISEL-NEXT:    v_subbrev_u32_e64 v1, vcc, 0, v1, s[4:5]
+; GISEL-NEXT:    v_add_i32_e32 v11, vcc, 1, v8
+; GISEL-NEXT:    v_addc_u32_e32 v12, vcc, 0, v9, vcc
+; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v5
+; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, -1, vcc
+; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v4
+; GISEL-NEXT:    v_cndmask_b32_e64 v0, 0, -1, vcc
+; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v1, v5
+; GISEL-NEXT:    v_cndmask_b32_e32 v0, v13, v0, vcc
+; GISEL-NEXT:    v_add_i32_e32 v1, vcc, 1, v11
+; GISEL-NEXT:    v_addc_u32_e32 v4, vcc, 0, v12, vcc
+; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
+; GISEL-NEXT:    v_cndmask_b32_e32 v0, v11, v1, vcc
+; GISEL-NEXT:    v_cndmask_b32_e32 v1, v12, v4, vcc
+; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v10
+; GISEL-NEXT:    v_cndmask_b32_e32 v0, v8, v0, vcc
+; GISEL-NEXT:    v_cndmask_b32_e32 v1, v9, v1, vcc
+; GISEL-NEXT:    v_cvt_f32_u32_e32 v4, v6
+; GISEL-NEXT:    v_cvt_f32_u32_e32 v5, v7
+; GISEL-NEXT:    v_mac_f32_e32 v4, 0x4f800000, v5
+; GISEL-NEXT:    v_rcp_iflag_f32_e32 v4, v4
+; GISEL-NEXT:    v_mul_f32_e32 v4, 0x5f7ffffc, v4
+; GISEL-NEXT:    v_mul_f32_e32 v5, 0x2f800000, v4
+; GISEL-NEXT:    v_trunc_f32_e32 v5, v5
+; GISEL-NEXT:    v_mac_f32_e32 v4, 0xcf800000, v5
+; GISEL-NEXT:    v_cvt_u32_f32_e32 v4, v4
+; GISEL-NEXT:    v_cvt_u32_f32_e32 v5, v5
+; GISEL-NEXT:    v_sub_i32_e32 v8, vcc, 0, v6
+; GISEL-NEXT:    v_subb_u32_e32 v9, vcc, 0, v7, vcc
+; GISEL-NEXT:    v_mul_lo_u32 v10, v8, v4
+; GISEL-NEXT:    v_mul_lo_u32 v11, v9, v4
+; GISEL-NEXT:    v_mul_lo_u32 v12, v8, v5
+; GISEL-NEXT:    v_mul_hi_u32 v13, v8, v4
+; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v11, v12
+; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v11, v13
+; GISEL-NEXT:    v_mul_lo_u32 v12, v5, v10
+; GISEL-NEXT:    v_mul_lo_u32 v13, v4, v11
+; GISEL-NEXT:    v_mul_hi_u32 v14, v4, v10
+; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v12, v13
+; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v12, v14
+; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v13, v12
+; GISEL-NEXT:    v_mul_lo_u32 v13, v5, v11
+; GISEL-NEXT:    v_mul_hi_u32 v10, v5, v10
+; GISEL-NEXT:    v_mul_hi_u32 v14, v4, v11
+; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v13, v10
+; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v10, v14
+; GISEL-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v13, v14
+; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v10, v12
+; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v13, v12
+; GISEL-NEXT:    v_mul_hi_u32 v11, v5, v11
+; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v11, v12
+; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v10
+; GISEL-NEXT:    v_addc_u32_e64 v10, s[4:5], v5, v11, vcc
+; GISEL-NEXT:    v_add_i32_e64 v5, s[4:5], v5, v11
+; GISEL-NEXT:    v_mul_lo_u32 v11, v8, v4
+; GISEL-NEXT:    v_mul_lo_u32 v9, v9, v4
+; GISEL-NEXT:    v_mul_lo_u32 v12, v8, v10
+; GISEL-NEXT:    v_mul_hi_u32 v8, v8, v4
+; GISEL-NEXT:    v_add_i32_e64 v9, s[4:5], v9, v12
+; GISEL-NEXT:    v_add_i32_e64 v8, s[4:5], v9, v8
+; GISEL-NEXT:    v_mul_lo_u32 v9, v10, v11
+; GISEL-NEXT:    v_mul_lo_u32 v12, v4, v8
+; GISEL-NEXT:    v_mul_hi_u32 v13, v4, v11
+; GISEL-NEXT:    v_add_i32_e64 v9, s[4:5], v9, v12
+; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v9, s[4:5], v9, v13
+; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, 1, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v9, s[4:5], v12, v9
+; GISEL-NEXT:    v_mul_lo_u32 v12, v10, v8
+; GISEL-NEXT:    v_mul_hi_u32 v11, v10, v11
+; GISEL-NEXT:    v_mul_hi_u32 v13, v4, v8
+; GISEL-NEXT:    v_add_i32_e64 v11, s[4:5], v12, v11
+; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v11, s[4:5], v11, v13
+; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, 1, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v12, s[4:5], v12, v13
+; GISEL-NEXT:    v_add_i32_e64 v9, s[4:5], v11, v9
+; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v11, s[4:5], v12, v11
+; GISEL-NEXT:    v_mul_hi_u32 v8, v10, v8
+; GISEL-NEXT:    v_add_i32_e64 v8, s[4:5], v8, v11
+; GISEL-NEXT:    v_add_i32_e64 v4, s[4:5], v4, v9
+; GISEL-NEXT:    v_addc_u32_e32 v5, vcc, v5, v8, vcc
+; GISEL-NEXT:    v_addc_u32_e64 v5, vcc, 0, v5, s[4:5]
+; GISEL-NEXT:    v_mul_lo_u32 v8, v3, v4
+; GISEL-NEXT:    v_mul_lo_u32 v9, v2, v5
+; GISEL-NEXT:    v_mul_hi_u32 v10, v2, v4
+; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v9
+; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v10
+; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v9, v8
+; GISEL-NEXT:    v_mul_lo_u32 v9, v3, v5
+; GISEL-NEXT:    v_mul_hi_u32 v4, v3, v4
+; GISEL-NEXT:    v_mul_hi_u32 v10, v2, v5
+; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v9, v4
+; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v10
+; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v9, v10
+; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v8
+; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v9, v8
+; GISEL-NEXT:    v_mul_hi_u32 v5, v3, v5
+; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v8
+; GISEL-NEXT:    v_mul_lo_u32 v8, v6, v4
+; GISEL-NEXT:    v_mul_lo_u32 v9, v7, v4
+; GISEL-NEXT:    v_mul_lo_u32 v10, v6, v5
+; GISEL-NEXT:    v_mul_hi_u32 v11, v6, v4
+; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v9, v10
+; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v9, v11
+; GISEL-NEXT:    v_sub_i32_e32 v2, vcc, v2, v8
+; GISEL-NEXT:    v_subb_u32_e64 v8, s[4:5], v3, v9, vcc
+; GISEL-NEXT:    v_sub_i32_e64 v3, s[4:5], v3, v9
+; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v8, v7
+; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, -1, s[4:5]
+; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v2, v6
+; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, -1, s[4:5]
+; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], v8, v7
+; GISEL-NEXT:    v_cndmask_b32_e64 v8, v9, v10, s[4:5]
+; GISEL-NEXT:    v_sub_i32_e64 v2, s[4:5], v2, v6
+; GISEL-NEXT:    v_subb_u32_e32 v3, vcc, v3, v7, vcc
+; GISEL-NEXT:    v_subbrev_u32_e64 v3, vcc, 0, v3, s[4:5]
+; GISEL-NEXT:    v_add_i32_e32 v9, vcc, 1, v4
+; GISEL-NEXT:    v_addc_u32_e32 v10, vcc, 0, v5, vcc
+; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v3, v7
+; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, -1, vcc
+; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v2, v6
+; GISEL-NEXT:    v_cndmask_b32_e64 v2, 0, -1, vcc
+; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v3, v7
+; GISEL-NEXT:    v_cndmask_b32_e32 v2, v11, v2, vcc
+; GISEL-NEXT:    v_add_i32_e32 v3, vcc, 1, v9
+; GISEL-NEXT:    v_addc_u32_e32 v6, vcc, 0, v10, vcc
+; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v2
+; GISEL-NEXT:    v_cndmask_b32_e32 v2, v9, v3, vcc
+; GISEL-NEXT:    v_cndmask_b32_e32 v3, v10, v6, vcc
+; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v8
+; GISEL-NEXT:    v_cndmask_b32_e32 v2, v4, v2, vcc
+; GISEL-NEXT:    v_cndmask_b32_e32 v3, v5, v3, vcc
+; GISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; CGP-LABEL: v_udiv_v2i64:
+; CGP:       ; %bb.0:
+; CGP-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CGP-NEXT:    v_mov_b32_e32 v8, v0
+; CGP-NEXT:    v_mov_b32_e32 v9, v1
+; CGP-NEXT:    v_or_b32_e32 v1, v9, v5
+; CGP-NEXT:    v_mov_b32_e32 v0, 0
+; CGP-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[0:1]
+; CGP-NEXT:    v_cmp_ne_u32_e64 s[4:5], 0, 1
+; CGP-NEXT:    s_xor_b64 s[4:5], vcc, s[4:5]
+; CGP-NEXT:    ; implicit-def: $vgpr0_vgpr1
+; CGP-NEXT:    s_and_saveexec_b64 s[6:7], s[4:5]
+; CGP-NEXT:    s_xor_b64 s[6:7], exec, s[6:7]
+; CGP-NEXT:    s_cbranch_execnz BB2_2
+; CGP-NEXT:  ; %bb.1:
+; CGP-NEXT:    v_cvt_f32_u32_e32 v0, v4
+; CGP-NEXT:    v_cvt_f32_u32_e32 v1, v5
+; CGP-NEXT:    v_sub_i32_e32 v10, vcc, 0, v4
+; CGP-NEXT:    v_subb_u32_e32 v11, vcc, 0, v5, vcc
+; CGP-NEXT:    v_mac_f32_e32 v0, 0x4f800000, v1
+; CGP-NEXT:    v_rcp_iflag_f32_e32 v0, v0
+; CGP-NEXT:    v_mul_f32_e32 v0, 0x5f7ffffc, v0
+; CGP-NEXT:    v_mul_f32_e32 v1, 0x2f800000, v0
+; CGP-NEXT:    v_trunc_f32_e32 v1, v1
+; CGP-NEXT:    v_mac_f32_e32 v0, 0xcf800000, v1
+; CGP-NEXT:    v_cvt_u32_f32_e32 v1, v1
+; CGP-NEXT:    v_cvt_u32_f32_e32 v0, v0
+; CGP-NEXT:    v_mul_lo_u32 v12, v10, v1
+; CGP-NEXT:    v_mul_lo_u32 v13, v10, v0
+; CGP-NEXT:    v_mul_lo_u32 v14, v11, v0
+; CGP-NEXT:    v_mul_hi_u32 v15, v10, v0
+; CGP-NEXT:    v_add_i32_e32 v12, vcc, v14, v12
+; CGP-NEXT:    v_mul_lo_u32 v14, v1, v13
+; CGP-NEXT:    v_mul_hi_u32 v16, v0, v13
+; CGP-NEXT:    v_mul_hi_u32 v13, v1, v13
+; CGP-NEXT:    v_add_i32_e32 v12, vcc, v12, v15
+; CGP-NEXT:    v_mul_lo_u32 v15, v0, v12
+; CGP-NEXT:    v_mul_lo_u32 v17, v1, v12
+; CGP-NEXT:    v_mul_hi_u32 v18, v0, v12
+; CGP-NEXT:    v_mul_hi_u32 v12, v1, v12
+; CGP-NEXT:    v_add_i32_e32 v14, vcc, v14, v15
+; CGP-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v13, vcc, v17, v13
+; CGP-NEXT:    v_cndmask_b32_e64 v17, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v14, vcc, v14, v16
+; CGP-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v13, vcc, v13, v18
+; CGP-NEXT:    v_cndmask_b32_e64 v16, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v14, vcc, v15, v14
+; CGP-NEXT:    v_add_i32_e32 v15, vcc, v17, v16
+; CGP-NEXT:    v_add_i32_e32 v13, vcc, v13, v14
+; CGP-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v14, vcc, v15, v14
+; CGP-NEXT:    v_add_i32_e32 v12, vcc, v12, v14
+; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v13
+; CGP-NEXT:    v_addc_u32_e64 v13, s[4:5], v1, v12, vcc
+; CGP-NEXT:    v_add_i32_e64 v1, s[4:5], v1, v12
+; CGP-NEXT:    v_mul_lo_u32 v12, v10, v0
+; CGP-NEXT:    v_mul_lo_u32 v11, v11, v0
+; CGP-NEXT:    v_mul_hi_u32 v14, v10, v0
+; CGP-NEXT:    v_mul_lo_u32 v10, v10, v13
+; CGP-NEXT:    v_mul_lo_u32 v15, v13, v12
+; CGP-NEXT:    v_mul_hi_u32 v16, v0, v12
+; CGP-NEXT:    v_mul_hi_u32 v12, v13, v12
+; CGP-NEXT:    v_add_i32_e64 v10, s[4:5], v11, v10
+; CGP-NEXT:    v_add_i32_e64 v10, s[4:5], v10, v14
+; CGP-NEXT:    v_mul_lo_u32 v11, v0, v10
+; CGP-NEXT:    v_mul_lo_u32 v14, v13, v10
+; CGP-NEXT:    v_mul_hi_u32 v17, v0, v10
+; CGP-NEXT:    v_mul_hi_u32 v10, v13, v10
+; CGP-NEXT:    v_add_i32_e64 v11, s[4:5], v15, v11
+; CGP-NEXT:    v_cndmask_b32_e64 v13, 0, 1, s[4:5]
+; CGP-NEXT:    v_add_i32_e64 v12, s[4:5], v14, v12
+; CGP-NEXT:    v_cndmask_b32_e64 v14, 0, 1, s[4:5]
+; CGP-NEXT:    v_add_i32_e64 v11, s[4:5], v11, v16
+; CGP-NEXT:    v_cndmask_b32_e64 v11, 0, 1, s[4:5]
+; CGP-NEXT:    v_add_i32_e64 v12, s[4:5], v12, v17
+; CGP-NEXT:    v_cndmask_b32_e64 v15, 0, 1, s[4:5]
+; CGP-NEXT:    v_add_i32_e64 v11, s[4:5], v13, v11
+; CGP-NEXT:    v_add_i32_e64 v13, s[4:5], v14, v15
+; CGP-NEXT:    v_add_i32_e64 v11, s[4:5], v12, v11
+; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, 1, s[4:5]
+; CGP-NEXT:    v_add_i32_e64 v12, s[4:5], v13, v12
+; CGP-NEXT:    v_add_i32_e64 v10, s[4:5], v10, v12
+; CGP-NEXT:    v_addc_u32_e32 v1, vcc, v1, v10, vcc
+; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v11
+; CGP-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
+; CGP-NEXT:    v_mul_lo_u32 v10, v9, v0
+; CGP-NEXT:    v_mul_hi_u32 v11, v8, v0
+; CGP-NEXT:    v_mul_hi_u32 v0, v9, v0
+; CGP-NEXT:    v_mul_lo_u32 v12, v8, v1
+; CGP-NEXT:    v_mul_lo_u32 v13, v9, v1
+; CGP-NEXT:    v_mul_hi_u32 v14, v8, v1
+; CGP-NEXT:    v_mul_hi_u32 v1, v9, v1
+; CGP-NEXT:    v_add_i32_e32 v10, vcc, v10, v12
+; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v0, vcc, v13, v0
+; CGP-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v10, vcc, v10, v11
+; CGP-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v14
+; CGP-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v10, vcc, v12, v10
+; CGP-NEXT:    v_add_i32_e32 v11, vcc, v13, v11
+; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v10
+; CGP-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v10, vcc, v11, v10
+; CGP-NEXT:    v_mul_lo_u32 v11, v4, v0
+; CGP-NEXT:    v_mul_lo_u32 v12, v5, v0
+; CGP-NEXT:    v_mul_hi_u32 v13, v4, v0
+; CGP-NEXT:    v_add_i32_e32 v1, vcc, v1, v10
+; CGP-NEXT:    v_mul_lo_u32 v10, v4, v1
+; CGP-NEXT:    v_add_i32_e32 v14, vcc, 1, v0
+; CGP-NEXT:    v_addc_u32_e32 v15, vcc, 0, v1, vcc
+; CGP-NEXT:    v_add_i32_e32 v10, vcc, v12, v10
+; CGP-NEXT:    v_add_i32_e32 v12, vcc, 1, v14
+; CGP-NEXT:    v_addc_u32_e32 v16, vcc, 0, v15, vcc
+; CGP-NEXT:    v_add_i32_e32 v10, vcc, v10, v13
+; CGP-NEXT:    v_sub_i32_e32 v11, vcc, v8, v11
+; CGP-NEXT:    v_subb_u32_e64 v13, s[4:5], v9, v10, vcc
+; CGP-NEXT:    v_sub_i32_e64 v9, s[4:5], v9, v10
+; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v11, v4
+; CGP-NEXT:    v_cndmask_b32_e64 v10, 0, -1, s[4:5]
+; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v13, v5
+; CGP-NEXT:    v_cndmask_b32_e64 v17, 0, -1, s[4:5]
+; CGP-NEXT:    v_subb_u32_e32 v9, vcc, v9, v5, vcc
+; CGP-NEXT:    v_cmp_eq_u32_e32 vcc, v13, v5
+; CGP-NEXT:    v_cndmask_b32_e32 v10, v17, v10, vcc
+; CGP-NEXT:    v_sub_i32_e32 v11, vcc, v11, v4
+; CGP-NEXT:    v_subbrev_u32_e32 v9, vcc, 0, v9, vcc
+; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v11, v4
+; CGP-NEXT:    v_cndmask_b32_e64 v11, 0, -1, vcc
+; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v9, v5
+; CGP-NEXT:    v_cndmask_b32_e64 v13, 0, -1, vcc
+; CGP-NEXT:    v_cmp_eq_u32_e32 vcc, v9, v5
+; CGP-NEXT:    v_cndmask_b32_e32 v5, v13, v11, vcc
+; CGP-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v5
+; CGP-NEXT:    v_cndmask_b32_e32 v5, v14, v12, vcc
+; CGP-NEXT:    v_cndmask_b32_e32 v9, v15, v16, vcc
+; CGP-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v10
+; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v5, vcc
+; CGP-NEXT:    v_cndmask_b32_e32 v1, v1, v9, vcc
+; CGP-NEXT:  BB2_2: ; %Flow1
+; CGP-NEXT:    s_or_saveexec_b64 s[6:7], s[6:7]
+; CGP-NEXT:    s_xor_b64 exec, exec, s[6:7]
+; CGP-NEXT:    s_cbranch_execz BB2_4
+; CGP-NEXT:  ; %bb.3:
+; CGP-NEXT:    v_cvt_f32_u32_e32 v0, v4
+; CGP-NEXT:    v_rcp_iflag_f32_e32 v0, v0
+; CGP-NEXT:    v_mul_f32_e32 v0, 0x4f800000, v0
+; CGP-NEXT:    v_cvt_u32_f32_e32 v0, v0
+; CGP-NEXT:    v_mul_lo_u32 v1, v0, v4
+; CGP-NEXT:    v_mul_hi_u32 v5, v0, v4
+; CGP-NEXT:    v_sub_i32_e32 v9, vcc, 0, v1
+; CGP-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v5
+; CGP-NEXT:    v_cndmask_b32_e32 v1, v1, v9, vcc
+; CGP-NEXT:    v_mul_hi_u32 v1, v1, v0
+; CGP-NEXT:    v_add_i32_e64 v5, s[4:5], v0, v1
+; CGP-NEXT:    v_sub_i32_e64 v0, s[4:5], v0, v1
+; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v5, vcc
+; CGP-NEXT:    v_mul_hi_u32 v0, v0, v8
+; CGP-NEXT:    v_mul_lo_u32 v1, v0, v4
+; CGP-NEXT:    v_add_i32_e32 v5, vcc, 1, v0
+; CGP-NEXT:    v_subrev_i32_e32 v9, vcc, 1, v0
+; CGP-NEXT:    v_sub_i32_e32 v10, vcc, v8, v1
+; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v8, v1
+; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v10, v4
+; CGP-NEXT:    s_and_b64 s[4:5], s[4:5], vcc
+; CGP-NEXT:    v_cndmask_b32_e64 v0, v5, v0, s[4:5]
+; CGP-NEXT:    v_cndmask_b32_e32 v0, v9, v0, vcc
+; CGP-NEXT:    v_mov_b32_e32 v1, 0
+; CGP-NEXT:  BB2_4:
+; CGP-NEXT:    s_or_b64 exec, exec, s[6:7]
+; CGP-NEXT:    v_or_b32_e32 v5, v3, v7
+; CGP-NEXT:    v_mov_b32_e32 v4, 0
+; CGP-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[4:5]
+; CGP-NEXT:    v_cmp_ne_u32_e64 s[4:5], 0, 1
+; CGP-NEXT:    s_xor_b64 s[4:5], vcc, s[4:5]
+; CGP-NEXT:    ; implicit-def: $vgpr4_vgpr5
+; CGP-NEXT:    s_and_saveexec_b64 s[6:7], s[4:5]
+; CGP-NEXT:    s_xor_b64 s[6:7], exec, s[6:7]
+; CGP-NEXT:    s_cbranch_execnz BB2_6
+; CGP-NEXT:  ; %bb.5:
+; CGP-NEXT:    v_cvt_f32_u32_e32 v4, v6
+; CGP-NEXT:    v_cvt_f32_u32_e32 v5, v7
+; CGP-NEXT:    v_sub_i32_e32 v8, vcc, 0, v6
+; CGP-NEXT:    v_subb_u32_e32 v9, vcc, 0, v7, vcc
+; CGP-NEXT:    v_mac_f32_e32 v4, 0x4f800000, v5
+; CGP-NEXT:    v_rcp_iflag_f32_e32 v4, v4
+; CGP-NEXT:    v_mul_f32_e32 v4, 0x5f7ffffc, v4
+; CGP-NEXT:    v_mul_f32_e32 v5, 0x2f800000, v4
+; CGP-NEXT:    v_trunc_f32_e32 v5, v5
+; CGP-NEXT:    v_mac_f32_e32 v4, 0xcf800000, v5
+; CGP-NEXT:    v_cvt_u32_f32_e32 v5, v5
+; CGP-NEXT:    v_cvt_u32_f32_e32 v4, v4
+; CGP-NEXT:    v_mul_lo_u32 v10, v8, v5
+; CGP-NEXT:    v_mul_lo_u32 v11, v8, v4
+; CGP-NEXT:    v_mul_lo_u32 v12, v9, v4
+; CGP-NEXT:    v_mul_hi_u32 v13, v8, v4
+; CGP-NEXT:    v_add_i32_e32 v10, vcc, v12, v10
+; CGP-NEXT:    v_mul_lo_u32 v12, v5, v11
+; CGP-NEXT:    v_mul_hi_u32 v14, v4, v11
+; CGP-NEXT:    v_mul_hi_u32 v11, v5, v11
+; CGP-NEXT:    v_add_i32_e32 v10, vcc, v10, v13
+; CGP-NEXT:    v_mul_lo_u32 v13, v4, v10
+; CGP-NEXT:    v_mul_lo_u32 v15, v5, v10
+; CGP-NEXT:    v_mul_hi_u32 v16, v4, v10
+; CGP-NEXT:    v_mul_hi_u32 v10, v5, v10
+; CGP-NEXT:    v_add_i32_e32 v12, vcc, v12, v13
+; CGP-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v11, vcc, v15, v11
+; CGP-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v12, vcc, v12, v14
+; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v11, vcc, v11, v16
+; CGP-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v12, vcc, v13, v12
+; CGP-NEXT:    v_add_i32_e32 v13, vcc, v15, v14
+; CGP-NEXT:    v_add_i32_e32 v11, vcc, v11, v12
+; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v12, vcc, v13, v12
+; CGP-NEXT:    v_add_i32_e32 v10, vcc, v10, v12
+; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v11
+; CGP-NEXT:    v_addc_u32_e64 v11, s[4:5], v5, v10, vcc
+; CGP-NEXT:    v_add_i32_e64 v5, s[4:5], v5, v10
+; CGP-NEXT:    v_mul_lo_u32 v10, v8, v4
+; CGP-NEXT:    v_mul_lo_u32 v9, v9, v4
+; CGP-NEXT:    v_mul_hi_u32 v12, v8, v4
+; CGP-NEXT:    v_mul_lo_u32 v8, v8, v11
+; CGP-NEXT:    v_mul_lo_u32 v13, v11, v10
+; CGP-NEXT:    v_mul_hi_u32 v14, v4, v10
+; CGP-NEXT:    v_mul_hi_u32 v10, v11, v10
+; CGP-NEXT:    v_add_i32_e64 v8, s[4:5], v9, v8
+; CGP-NEXT:    v_add_i32_e64 v8, s[4:5], v8, v12
+; CGP-NEXT:    v_mul_lo_u32 v9, v4, v8
+; CGP-NEXT:    v_mul_lo_u32 v12, v11, v8
+; CGP-NEXT:    v_mul_hi_u32 v15, v4, v8
+; CGP-NEXT:    v_mul_hi_u32 v8, v11, v8
+; CGP-NEXT:    v_add_i32_e64 v9, s[4:5], v13, v9
+; CGP-NEXT:    v_cndmask_b32_e64 v11, 0, 1, s[4:5]
+; CGP-NEXT:    v_add_i32_e64 v10, s[4:5], v12, v10
+; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, 1, s[4:5]
+; CGP-NEXT:    v_add_i32_e64 v9, s[4:5], v9, v14
+; CGP-NEXT:    v_cndmask_b32_e64 v9, 0, 1, s[4:5]
+; CGP-NEXT:    v_add_i32_e64 v10, s[4:5], v10, v15
+; CGP-NEXT:    v_cndmask_b32_e64 v13, 0, 1, s[4:5]
+; CGP-NEXT:    v_add_i32_e64 v9, s[4:5], v11, v9
+; CGP-NEXT:    v_add_i32_e64 v11, s[4:5], v12, v13
+; CGP-NEXT:    v_add_i32_e64 v9, s[4:5], v10, v9
+; CGP-NEXT:    v_cndmask_b32_e64 v10, 0, 1, s[4:5]
+; CGP-NEXT:    v_add_i32_e64 v10, s[4:5], v11, v10
+; CGP-NEXT:    v_add_i32_e64 v8, s[4:5], v8, v10
+; CGP-NEXT:    v_addc_u32_e32 v5, vcc, v5, v8, vcc
+; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v9
+; CGP-NEXT:    v_addc_u32_e32 v5, vcc, 0, v5, vcc
+; CGP-NEXT:    v_mul_lo_u32 v8, v3, v4
+; CGP-NEXT:    v_mul_hi_u32 v9, v2, v4
+; CGP-NEXT:    v_mul_hi_u32 v4, v3, v4
+; CGP-NEXT:    v_mul_lo_u32 v10, v2, v5
+; CGP-NEXT:    v_mul_lo_u32 v11, v3, v5
+; CGP-NEXT:    v_mul_hi_u32 v12, v2, v5
+; CGP-NEXT:    v_mul_hi_u32 v5, v3, v5
+; CGP-NEXT:    v_add_i32_e32 v8, vcc, v8, v10
+; CGP-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v4, vcc, v11, v4
+; CGP-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v8, vcc, v8, v9
+; CGP-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v12
+; CGP-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v8, vcc, v10, v8
+; CGP-NEXT:    v_add_i32_e32 v9, vcc, v11, v9
+; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v8
+; CGP-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v8, vcc, v9, v8
+; CGP-NEXT:    v_mul_lo_u32 v9, v6, v4
+; CGP-NEXT:    v_mul_lo_u32 v10, v7, v4
+; CGP-NEXT:    v_mul_hi_u32 v11, v6, v4
+; CGP-NEXT:    v_add_i32_e32 v5, vcc, v5, v8
+; CGP-NEXT:    v_mul_lo_u32 v8, v6, v5
+; CGP-NEXT:    v_add_i32_e32 v12, vcc, 1, v4
+; CGP-NEXT:    v_addc_u32_e32 v13, vcc, 0, v5, vcc
+; CGP-NEXT:    v_add_i32_e32 v8, vcc, v10, v8
+; CGP-NEXT:    v_add_i32_e32 v10, vcc, 1, v12
+; CGP-NEXT:    v_addc_u32_e32 v14, vcc, 0, v13, vcc
+; CGP-NEXT:    v_add_i32_e32 v8, vcc, v8, v11
+; CGP-NEXT:    v_sub_i32_e32 v9, vcc, v2, v9
+; CGP-NEXT:    v_subb_u32_e64 v11, s[4:5], v3, v8, vcc
+; CGP-NEXT:    v_sub_i32_e64 v3, s[4:5], v3, v8
+; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v9, v6
+; CGP-NEXT:    v_cndmask_b32_e64 v8, 0, -1, s[4:5]
+; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v11, v7
+; CGP-NEXT:    v_cndmask_b32_e64 v15, 0, -1, s[4:5]
+; CGP-NEXT:    v_subb_u32_e32 v3, vcc, v3, v7, vcc
+; CGP-NEXT:    v_cmp_eq_u32_e32 vcc, v11, v7
+; CGP-NEXT:    v_cndmask_b32_e32 v8, v15, v8, vcc
+; CGP-NEXT:    v_sub_i32_e32 v9, vcc, v9, v6
+; CGP-NEXT:    v_subbrev_u32_e32 v3, vcc, 0, v3, vcc
+; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v9, v6
+; CGP-NEXT:    v_cndmask_b32_e64 v9, 0, -1, vcc
+; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v3, v7
+; CGP-NEXT:    v_cndmask_b32_e64 v11, 0, -1, vcc
+; CGP-NEXT:    v_cmp_eq_u32_e32 vcc, v3, v7
+; CGP-NEXT:    v_cndmask_b32_e32 v3, v11, v9, vcc
+; CGP-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v3
+; CGP-NEXT:    v_cndmask_b32_e32 v3, v12, v10, vcc
+; CGP-NEXT:    v_cndmask_b32_e32 v7, v13, v14, vcc
+; CGP-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v8
+; CGP-NEXT:    v_cndmask_b32_e32 v4, v4, v3, vcc
+; CGP-NEXT:    v_cndmask_b32_e32 v5, v5, v7, vcc
+; CGP-NEXT:  BB2_6: ; %Flow
+; CGP-NEXT:    s_or_saveexec_b64 s[6:7], s[6:7]
+; CGP-NEXT:    s_xor_b64 exec, exec, s[6:7]
+; CGP-NEXT:    s_cbranch_execz BB2_8
+; CGP-NEXT:  ; %bb.7:
+; CGP-NEXT:    v_cvt_f32_u32_e32 v3, v6
+; CGP-NEXT:    v_rcp_iflag_f32_e32 v3, v3
+; CGP-NEXT:    v_mul_f32_e32 v3, 0x4f800000, v3
+; CGP-NEXT:    v_cvt_u32_f32_e32 v3, v3
+; CGP-NEXT:    v_mul_lo_u32 v4, v3, v6
+; CGP-NEXT:    v_mul_hi_u32 v5, v3, v6
+; CGP-NEXT:    v_sub_i32_e32 v7, vcc, 0, v4
+; CGP-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v5
+; CGP-NEXT:    v_cndmask_b32_e32 v4, v4, v7, vcc
+; CGP-NEXT:    v_mul_hi_u32 v4, v4, v3
+; CGP-NEXT:    v_add_i32_e64 v5, s[4:5], v3, v4
+; CGP-NEXT:    v_sub_i32_e64 v3, s[4:5], v3, v4
+; CGP-NEXT:    v_cndmask_b32_e32 v3, v3, v5, vcc
+; CGP-NEXT:    v_mul_hi_u32 v3, v3, v2
+; CGP-NEXT:    v_mul_lo_u32 v4, v3, v6
+; CGP-NEXT:    v_add_i32_e32 v5, vcc, 1, v3
+; CGP-NEXT:    v_subrev_i32_e32 v7, vcc, 1, v3
+; CGP-NEXT:    v_sub_i32_e32 v8, vcc, v2, v4
+; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v2, v4
+; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v8, v6
+; CGP-NEXT:    s_and_b64 s[4:5], s[4:5], vcc
+; CGP-NEXT:    v_cndmask_b32_e64 v2, v5, v3, s[4:5]
+; CGP-NEXT:    v_cndmask_b32_e32 v4, v7, v2, vcc
+; CGP-NEXT:    v_mov_b32_e32 v5, 0
+; CGP-NEXT:  BB2_8:
+; CGP-NEXT:    s_or_b64 exec, exec, s[6:7]
+; CGP-NEXT:    v_mov_b32_e32 v2, v4
+; CGP-NEXT:    v_mov_b32_e32 v3, v5
+; CGP-NEXT:    s_setpc_b64 s[30:31]
+  %result = udiv <2 x i64> %num, %den
+  ret <2 x i64> %result
+}
+
+define i64 @v_udiv_i64_pow2k_denom(i64 %num) {
+; CHECK-LABEL: v_udiv_i64_pow2k_denom:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT:    s_movk_i32 s6, 0x1000
+; CHECK-NEXT:    v_cvt_f32_u32_e32 v2, 0
+; CHECK-NEXT:    s_mov_b32 s7, 0xfffff000
+; CHECK-NEXT:    v_cvt_f32_u32_e32 v3, s6
+; CHECK-NEXT:    v_mac_f32_e32 v3, 0x4f800000, v2
+; CHECK-NEXT:    v_rcp_iflag_f32_e32 v2, v3
+; CHECK-NEXT:    v_mul_f32_e32 v2, 0x5f7ffffc, v2
+; CHECK-NEXT:    v_mul_f32_e32 v3, 0x2f800000, v2
+; CHECK-NEXT:    v_trunc_f32_e32 v3, v3
+; CHECK-NEXT:    v_mac_f32_e32 v2, 0xcf800000, v3
+; CHECK-NEXT:    v_cvt_u32_f32_e32 v3, v3
+; CHECK-NEXT:    v_cvt_u32_f32_e32 v2, v2
+; CHECK-NEXT:    v_mul_lo_u32 v4, s7, v3
+; CHECK-NEXT:    v_mul_lo_u32 v5, s7, v2
+; CHECK-NEXT:    v_mul_lo_u32 v6, -1, v2
+; CHECK-NEXT:    v_mul_hi_u32 v7, s7, v2
+; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v6, v4
+; CHECK-NEXT:    v_mul_lo_u32 v6, v3, v5
+; CHECK-NEXT:    v_mul_hi_u32 v8, v2, v5
+; CHECK-NEXT:    v_mul_hi_u32 v5, v3, v5
+; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v4, v7
+; CHECK-NEXT:    v_mul_lo_u32 v7, v2, v4
+; CHECK-NEXT:    v_mul_lo_u32 v9, v3, v4
+; CHECK-NEXT:    v_mul_hi_u32 v10, v2, v4
+; CHECK-NEXT:    v_mul_hi_u32 v4, v3, v4
+; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v6, v7
+; CHECK-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v9, v5
+; CHECK-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v6, v8
+; CHECK-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v5, v10
+; CHECK-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v7, v6
+; CHECK-NEXT:    v_add_i32_e32 v7, vcc, v9, v8
+; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v5, v6
+; CHECK-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v7, v6
+; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v4, v6
+; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v5
+; CHECK-NEXT:    v_addc_u32_e64 v5, s[4:5], v3, v4, vcc
+; CHECK-NEXT:    v_add_i32_e64 v3, s[4:5], v3, v4
+; CHECK-NEXT:    v_mul_lo_u32 v4, s7, v2
+; CHECK-NEXT:    v_mul_lo_u32 v6, -1, v2
+; CHECK-NEXT:    v_mul_hi_u32 v7, s7, v2
+; CHECK-NEXT:    v_mul_lo_u32 v8, s7, v5
+; CHECK-NEXT:    v_mul_lo_u32 v9, v5, v4
+; CHECK-NEXT:    v_mul_hi_u32 v10, v2, v4
+; CHECK-NEXT:    v_mul_hi_u32 v4, v5, v4
+; CHECK-NEXT:    v_add_i32_e64 v6, s[4:5], v6, v8
+; CHECK-NEXT:    v_add_i32_e64 v6, s[4:5], v6, v7
+; CHECK-NEXT:    v_mul_lo_u32 v7, v2, v6
+; CHECK-NEXT:    v_mul_lo_u32 v8, v5, v6
+; CHECK-NEXT:    v_mul_hi_u32 v11, v2, v6
+; CHECK-NEXT:    v_mul_hi_u32 v5, v5, v6
+; CHECK-NEXT:    v_add_i32_e64 v6, s[4:5], v9, v7
+; CHECK-NEXT:    v_cndmask_b32_e64 v7, 0, 1, s[4:5]
+; CHECK-NEXT:    v_add_i32_e64 v4, s[4:5], v8, v4
+; CHECK-NEXT:    v_cndmask_b32_e64 v8, 0, 1, s[4:5]
+; CHECK-NEXT:    v_add_i32_e64 v6, s[4:5], v6, v10
+; CHECK-NEXT:    v_cndmask_b32_e64 v6, 0, 1, s[4:5]
+; CHECK-NEXT:    v_add_i32_e64 v4, s[4:5], v4, v11
+; CHECK-NEXT:    v_cndmask_b32_e64 v9, 0, 1, s[4:5]
+; CHECK-NEXT:    v_add_i32_e64 v6, s[4:5], v7, v6
+; CHECK-NEXT:    v_add_i32_e64 v7, s[4:5], v8, v9
+; CHECK-NEXT:    v_add_i32_e64 v4, s[4:5], v4, v6
+; CHECK-NEXT:    v_cndmask_b32_e64 v6, 0, 1, s[4:5]
+; CHECK-NEXT:    v_add_i32_e64 v6, s[4:5], v7, v6
+; CHECK-NEXT:    v_add_i32_e64 v5, s[4:5], v5, v6
+; CHECK-NEXT:    v_addc_u32_e32 v3, vcc, v3, v5, vcc
+; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v4
+; CHECK-NEXT:    v_addc_u32_e32 v3, vcc, 0, v3, vcc
+; CHECK-NEXT:    v_mul_lo_u32 v4, v1, v2
+; CHECK-NEXT:    v_mul_hi_u32 v5, v0, v2
+; CHECK-NEXT:    v_mul_hi_u32 v2, v1, v2
+; CHECK-NEXT:    v_mul_lo_u32 v6, v0, v3
+; CHECK-NEXT:    v_mul_lo_u32 v7, v1, v3
+; CHECK-NEXT:    v_mul_hi_u32 v8, v0, v3
+; CHECK-NEXT:    v_mul_hi_u32 v3, v1, v3
+; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v4, v6
+; CHECK-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v7, v2
+; CHECK-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v4, v5
+; CHECK-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v8
+; CHECK-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v6, v4
+; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v7, v5
+; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v4
+; CHECK-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v5, v4
+; CHECK-NEXT:    v_mul_lo_u32 v5, s6, v2
+; CHECK-NEXT:    v_mul_lo_u32 v6, 0, v2
+; CHECK-NEXT:    v_mul_hi_u32 v7, s6, v2
+; CHECK-NEXT:    v_add_i32_e32 v3, vcc, v3, v4
+; CHECK-NEXT:    v_mul_lo_u32 v4, s6, v3
+; CHECK-NEXT:    v_add_i32_e32 v8, vcc, 1, v2
+; CHECK-NEXT:    v_addc_u32_e32 v9, vcc, 0, v3, vcc
+; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v6, v4
+; CHECK-NEXT:    v_add_i32_e32 v6, vcc, 1, v8
+; CHECK-NEXT:    v_addc_u32_e32 v10, vcc, 0, v9, vcc
+; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v4, v7
+; CHECK-NEXT:    v_sub_i32_e32 v0, vcc, v0, v5
+; CHECK-NEXT:    v_subb_u32_e64 v5, s[4:5], v1, v4, vcc
+; CHECK-NEXT:    v_sub_i32_e64 v1, s[4:5], v1, v4
+; CHECK-NEXT:    v_cmp_le_u32_e64 s[4:5], s6, v0
+; CHECK-NEXT:    v_cndmask_b32_e64 v4, 0, -1, s[4:5]
+; CHECK-NEXT:    v_cmp_le_u32_e64 s[4:5], 0, v5
+; CHECK-NEXT:    v_cndmask_b32_e64 v7, 0, -1, s[4:5]
+; CHECK-NEXT:    v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
+; CHECK-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v5
+; CHECK-NEXT:    v_cndmask_b32_e32 v4, v7, v4, vcc
+; CHECK-NEXT:    v_subrev_i32_e32 v0, vcc, s6, v0
+; CHECK-NEXT:    v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
+; CHECK-NEXT:    v_cmp_le_u32_e32 vcc, s6, v0
+; CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, -1, vcc
+; CHECK-NEXT:    v_cmp_le_u32_e32 vcc, 0, v1
+; CHECK-NEXT:    v_cndmask_b32_e64 v5, 0, -1, vcc
+; CHECK-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v1
+; CHECK-NEXT:    v_cndmask_b32_e32 v0, v5, v0, vcc
+; CHECK-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
+; CHECK-NEXT:    v_cndmask_b32_e32 v0, v8, v6, vcc
+; CHECK-NEXT:    v_cndmask_b32_e32 v1, v9, v10, vcc
+; CHECK-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v4
+; CHECK-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
+; CHECK-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
+; CHECK-NEXT:    s_setpc_b64 s[30:31]
+  %result = udiv i64 %num, 4096
+  ret i64 %result
+}
+
+define <2 x i64> @v_udiv_v2i64_pow2k_denom(<2 x i64> %num) {
+; GISEL-LABEL: v_udiv_v2i64_pow2k_denom:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-NEXT:    s_movk_i32 s10, 0x1000
+; GISEL-NEXT:    v_cvt_f32_u32_e32 v4, s10
+; GISEL-NEXT:    s_sub_u32 s8, 0, s10
+; GISEL-NEXT:    s_cselect_b32 s4, 1, 0
+; GISEL-NEXT:    v_cvt_f32_u32_e32 v5, 0
+; GISEL-NEXT:    v_mov_b32_e32 v6, v4
+; GISEL-NEXT:    s_and_b32 s4, s4, 1
+; GISEL-NEXT:    v_mac_f32_e32 v4, 0x4f800000, v5
+; GISEL-NEXT:    v_mac_f32_e32 v6, 0x4f800000, v5
+; GISEL-NEXT:    v_rcp_iflag_f32_e32 v4, v4
+; GISEL-NEXT:    v_rcp_iflag_f32_e32 v5, v6
+; GISEL-NEXT:    s_cmp_lg_u32 s4, 0
+; GISEL-NEXT:    s_subb_u32 s9, 0, 0
+; GISEL-NEXT:    v_mul_f32_e32 v4, 0x5f7ffffc, v4
+; GISEL-NEXT:    v_mul_f32_e32 v5, 0x5f7ffffc, v5
+; GISEL-NEXT:    v_mul_f32_e32 v6, 0x2f800000, v4
+; GISEL-NEXT:    s_sub_u32 s11, 0, s10
+; GISEL-NEXT:    s_cselect_b32 s4, 1, 0
+; GISEL-NEXT:    v_mul_f32_e32 v7, 0x2f800000, v5
+; GISEL-NEXT:    v_trunc_f32_e32 v6, v6
+; GISEL-NEXT:    s_and_b32 s4, s4, 1
+; GISEL-NEXT:    v_trunc_f32_e32 v7, v7
+; GISEL-NEXT:    v_mac_f32_e32 v4, 0xcf800000, v6
+; GISEL-NEXT:    v_cvt_u32_f32_e32 v6, v6
+; GISEL-NEXT:    v_mac_f32_e32 v5, 0xcf800000, v7
+; GISEL-NEXT:    v_cvt_u32_f32_e32 v7, v7
+; GISEL-NEXT:    v_cvt_u32_f32_e32 v4, v4
+; GISEL-NEXT:    s_cmp_lg_u32 s4, 0
+; GISEL-NEXT:    s_subb_u32 s6, 0, 0
+; GISEL-NEXT:    v_mul_lo_u32 v8, s11, v6
+; GISEL-NEXT:    v_cvt_u32_f32_e32 v5, v5
+; GISEL-NEXT:    v_mul_lo_u32 v9, s8, v7
+; GISEL-NEXT:    v_mul_lo_u32 v10, s11, v4
+; GISEL-NEXT:    v_mul_lo_u32 v11, s6, v4
+; GISEL-NEXT:    v_mul_hi_u32 v12, s11, v4
+; GISEL-NEXT:    v_mul_lo_u32 v13, s8, v5
+; GISEL-NEXT:    v_mul_lo_u32 v14, s9, v5
+; GISEL-NEXT:    v_mul_hi_u32 v15, s8, v5
+; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v11, v8
+; GISEL-NEXT:    v_mul_lo_u32 v11, v6, v10
+; GISEL-NEXT:    v_mul_hi_u32 v16, v4, v10
+; GISEL-NEXT:    v_mul_hi_u32 v10, v6, v10
+; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v14, v9
+; GISEL-NEXT:    v_mul_lo_u32 v14, v7, v13
+; GISEL-NEXT:    v_mul_hi_u32 v17, v5, v13
+; GISEL-NEXT:    v_mul_hi_u32 v13, v7, v13
+; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v12
+; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v9, v15
+; GISEL-NEXT:    v_mul_lo_u32 v12, v4, v8
+; GISEL-NEXT:    v_mul_lo_u32 v15, v6, v8
+; GISEL-NEXT:    v_mul_hi_u32 v18, v4, v8
+; GISEL-NEXT:    v_mul_hi_u32 v8, v6, v8
+; GISEL-NEXT:    v_mul_lo_u32 v19, v5, v9
+; GISEL-NEXT:    v_add_i32_e32 v14, vcc, v14, v19
+; GISEL-NEXT:    v_cndmask_b32_e64 v19, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v14, vcc, v14, v17
+; GISEL-NEXT:    v_mul_lo_u32 v14, v7, v9
+; GISEL-NEXT:    v_mul_hi_u32 v17, v5, v9
+; GISEL-NEXT:    v_mul_hi_u32 v9, v7, v9
+; GISEL-NEXT:    v_add_i32_e64 v11, s[4:5], v11, v12
+; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v10, s[4:5], v15, v10
+; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, 1, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v13, s[4:5], v14, v13
+; GISEL-NEXT:    v_cndmask_b32_e64 v14, 0, 1, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v11, s[4:5], v11, v16
+; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v10, s[4:5], v10, v18
+; GISEL-NEXT:    v_cndmask_b32_e64 v16, 0, 1, s[4:5]
+; GISEL-NEXT:    v_cndmask_b32_e64 v18, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v13, v17
+; GISEL-NEXT:    v_cndmask_b32_e64 v17, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v12, v11
+; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v15, v16
+; GISEL-NEXT:    v_add_i32_e32 v15, vcc, v19, v18
+; GISEL-NEXT:    v_add_i32_e32 v14, vcc, v14, v17
+; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v10, v11
+; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v13, v15
+; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v12, v11
+; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v14, v15
+; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v11
+; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v9, v12
+; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v10
+; GISEL-NEXT:    v_addc_u32_e64 v10, s[4:5], v6, v8, vcc
+; GISEL-NEXT:    v_add_i32_e64 v6, s[4:5], v6, v8
+; GISEL-NEXT:    v_mul_lo_u32 v8, s11, v4
+; GISEL-NEXT:    v_mul_lo_u32 v11, s6, v4
+; GISEL-NEXT:    v_mul_hi_u32 v12, s11, v4
+; GISEL-NEXT:    v_add_i32_e64 v5, s[4:5], v5, v13
+; GISEL-NEXT:    v_addc_u32_e64 v13, s[6:7], v7, v9, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v7, s[6:7], v7, v9
+; GISEL-NEXT:    v_mul_lo_u32 v9, s8, v5
+; GISEL-NEXT:    v_mul_lo_u32 v14, s9, v5
+; GISEL-NEXT:    v_mul_hi_u32 v15, s8, v5
+; GISEL-NEXT:    v_mul_lo_u32 v16, s11, v10
+; GISEL-NEXT:    v_mul_lo_u32 v17, v10, v8
+; GISEL-NEXT:    v_mul_hi_u32 v18, v4, v8
+; GISEL-NEXT:    v_mul_hi_u32 v8, v10, v8
+; GISEL-NEXT:    v_mul_lo_u32 v19, s8, v13
+; GISEL-NEXT:    v_add_i32_e64 v11, s[6:7], v11, v16
+; GISEL-NEXT:    v_mul_lo_u32 v16, v13, v9
+; GISEL-NEXT:    v_add_i32_e64 v14, s[6:7], v14, v19
+; GISEL-NEXT:    v_mul_hi_u32 v19, v5, v9
+; GISEL-NEXT:    v_mul_hi_u32 v9, v13, v9
+; GISEL-NEXT:    v_add_i32_e64 v11, s[6:7], v11, v12
+; GISEL-NEXT:    v_add_i32_e64 v12, s[6:7], v14, v15
+; GISEL-NEXT:    v_mul_lo_u32 v14, v4, v11
+; GISEL-NEXT:    v_mul_lo_u32 v15, v5, v12
+; GISEL-NEXT:    v_add_i32_e64 v15, s[6:7], v16, v15
+; GISEL-NEXT:    v_cndmask_b32_e64 v16, 0, 1, s[6:7]
+; GISEL-NEXT:    v_add_i32_e64 v15, s[6:7], v15, v19
+; GISEL-NEXT:    v_mul_lo_u32 v15, v10, v11
+; GISEL-NEXT:    v_mul_hi_u32 v19, v4, v11
+; GISEL-NEXT:    v_mul_hi_u32 v10, v10, v11
+; GISEL-NEXT:    v_mul_lo_u32 v11, v13, v12
+; GISEL-NEXT:    v_mul_hi_u32 v13, v13, v12
+; GISEL-NEXT:    v_mul_hi_u32 v12, v5, v12
+; GISEL-NEXT:    v_add_i32_e64 v14, s[8:9], v17, v14
+; GISEL-NEXT:    v_cndmask_b32_e64 v17, 0, 1, s[8:9]
+; GISEL-NEXT:    v_add_i32_e64 v8, s[8:9], v15, v8
+; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, 1, s[8:9]
+; GISEL-NEXT:    v_add_i32_e64 v9, s[8:9], v11, v9
+; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, s[8:9]
+; GISEL-NEXT:    v_add_i32_e64 v14, s[8:9], v14, v18
+; GISEL-NEXT:    v_cndmask_b32_e64 v14, 0, 1, s[8:9]
+; GISEL-NEXT:    v_add_i32_e64 v8, s[8:9], v8, v19
+; GISEL-NEXT:    v_cndmask_b32_e64 v18, 0, 1, s[8:9]
+; GISEL-NEXT:    v_cndmask_b32_e64 v19, 0, 1, s[6:7]
+; GISEL-NEXT:    v_add_i32_e64 v9, s[6:7], v9, v12
+; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, s[6:7]
+; GISEL-NEXT:    v_add_i32_e64 v14, s[6:7], v17, v14
+; GISEL-NEXT:    v_add_i32_e64 v15, s[6:7], v15, v18
+; GISEL-NEXT:    v_add_i32_e64 v16, s[6:7], v16, v19
+; GISEL-NEXT:    v_add_i32_e64 v11, s[6:7], v11, v12
+; GISEL-NEXT:    v_add_i32_e64 v8, s[6:7], v8, v14
+; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, s[6:7]
+; GISEL-NEXT:    v_add_i32_e64 v9, s[6:7], v9, v16
+; GISEL-NEXT:    v_cndmask_b32_e64 v14, 0, 1, s[6:7]
+; GISEL-NEXT:    v_add_i32_e64 v12, s[6:7], v15, v12
+; GISEL-NEXT:    v_add_i32_e64 v11, s[6:7], v11, v14
+; GISEL-NEXT:    v_add_i32_e64 v10, s[6:7], v10, v12
+; GISEL-NEXT:    v_add_i32_e64 v11, s[6:7], v13, v11
+; GISEL-NEXT:    v_addc_u32_e32 v6, vcc, v6, v10, vcc
+; GISEL-NEXT:    v_addc_u32_e64 v7, vcc, v7, v11, s[4:5]
+; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v8
+; GISEL-NEXT:    v_addc_u32_e32 v6, vcc, 0, v6, vcc
+; GISEL-NEXT:    v_mul_lo_u32 v8, v3, v4
+; GISEL-NEXT:    v_mul_hi_u32 v10, v2, v4
+; GISEL-NEXT:    v_mul_hi_u32 v4, v3, v4
+; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v9
+; GISEL-NEXT:    v_addc_u32_e32 v7, vcc, 0, v7, vcc
+; GISEL-NEXT:    v_mul_lo_u32 v9, v1, v5
+; GISEL-NEXT:    v_mul_hi_u32 v11, v0, v5
+; GISEL-NEXT:    v_mul_hi_u32 v5, v1, v5
+; GISEL-NEXT:    v_mul_lo_u32 v12, v2, v6
+; GISEL-NEXT:    v_mul_lo_u32 v13, v3, v6
+; GISEL-NEXT:    v_mul_hi_u32 v14, v2, v6
+; GISEL-NEXT:    v_mul_hi_u32 v6, v3, v6
+; GISEL-NEXT:    v_mul_lo_u32 v15, v0, v7
+; GISEL-NEXT:    v_mul_lo_u32 v16, v1, v7
+; GISEL-NEXT:    v_mul_hi_u32 v17, v0, v7
+; GISEL-NEXT:    v_mul_hi_u32 v7, v1, v7
+; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v12
+; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v13, v4
+; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v9, v15
+; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v16, v5
+; GISEL-NEXT:    v_cndmask_b32_e64 v16, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v10
+; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v14
+; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v9, v11
+; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v17
+; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v12, v8
+; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v13, v10
+; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v15, v9
+; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v16, v11
+; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v8
+; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v9
+; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v10, v8
+; GISEL-NEXT:    v_mul_lo_u32 v10, s10, v4
+; GISEL-NEXT:    v_mul_lo_u32 v12, 0, v4
+; GISEL-NEXT:    v_mul_hi_u32 v13, s10, v4
+; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v11, v9
+; GISEL-NEXT:    v_mul_lo_u32 v11, s10, v5
+; GISEL-NEXT:    v_mul_lo_u32 v14, 0, v5
+; GISEL-NEXT:    v_mul_hi_u32 v15, s10, v5
+; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v6, v8
+; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v7, v9
+; GISEL-NEXT:    v_mul_lo_u32 v8, s10, v6
+; GISEL-NEXT:    v_add_i32_e32 v9, vcc, 1, v4
+; GISEL-NEXT:    v_addc_u32_e32 v16, vcc, 0, v6, vcc
+; GISEL-NEXT:    v_mul_lo_u32 v17, s10, v7
+; GISEL-NEXT:    v_add_i32_e32 v18, vcc, 1, v5
+; GISEL-NEXT:    v_addc_u32_e32 v19, vcc, 0, v7, vcc
+; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v12, v8
+; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v14, v17
+; GISEL-NEXT:    v_add_i32_e32 v14, vcc, 1, v9
+; GISEL-NEXT:    v_addc_u32_e32 v17, vcc, 0, v16, vcc
+; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v13
+; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v12, v15
+; GISEL-NEXT:    v_add_i32_e32 v13, vcc, 1, v18
+; GISEL-NEXT:    v_addc_u32_e32 v15, vcc, 0, v19, vcc
+; GISEL-NEXT:    v_sub_i32_e32 v2, vcc, v2, v10
+; GISEL-NEXT:    v_subb_u32_e64 v10, s[4:5], v3, v8, vcc
+; GISEL-NEXT:    v_sub_i32_e64 v3, s[4:5], v3, v8
+; GISEL-NEXT:    v_cmp_le_u32_e64 s[4:5], s10, v2
+; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, -1, s[4:5]
+; GISEL-NEXT:    v_sub_i32_e64 v0, s[4:5], v0, v11
+; GISEL-NEXT:    v_subb_u32_e64 v11, s[6:7], v1, v12, s[4:5]
+; GISEL-NEXT:    v_sub_i32_e64 v1, s[6:7], v1, v12
+; GISEL-NEXT:    v_cmp_le_u32_e64 s[6:7], 0, v10
+; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, -1, s[6:7]
+; GISEL-NEXT:    v_cmp_eq_u32_e64 s[6:7], 0, v10
+; GISEL-NEXT:    v_cmp_le_u32_e64 s[8:9], s10, v0
+; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, -1, s[8:9]
+; GISEL-NEXT:    v_subbrev_u32_e32 v3, vcc, 0, v3, vcc
+; GISEL-NEXT:    v_cndmask_b32_e64 v8, v12, v8, s[6:7]
+; GISEL-NEXT:    v_cmp_le_u32_e32 vcc, 0, v11
+; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, -1, vcc
+; GISEL-NEXT:    v_subbrev_u32_e64 v1, vcc, 0, v1, s[4:5]
+; GISEL-NEXT:    v_subrev_i32_e32 v2, vcc, s10, v2
+; GISEL-NEXT:    v_subbrev_u32_e32 v3, vcc, 0, v3, vcc
+; GISEL-NEXT:    v_cmp_le_u32_e32 vcc, s10, v2
+; GISEL-NEXT:    v_cndmask_b32_e64 v2, 0, -1, vcc
+; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v11
+; GISEL-NEXT:    v_cndmask_b32_e32 v10, v12, v10, vcc
+; GISEL-NEXT:    v_subrev_i32_e32 v0, vcc, s10, v0
+; GISEL-NEXT:    v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
+; GISEL-NEXT:    v_cmp_le_u32_e32 vcc, s10, v0
+; GISEL-NEXT:    v_cndmask_b32_e64 v0, 0, -1, vcc
+; GISEL-NEXT:    v_cmp_le_u32_e32 vcc, 0, v3
+; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, -1, vcc
+; GISEL-NEXT:    v_cmp_le_u32_e32 vcc, 0, v1
+; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, -1, vcc
+; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v3
+; GISEL-NEXT:    v_cndmask_b32_e32 v2, v11, v2, vcc
+; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v1
+; GISEL-NEXT:    v_cndmask_b32_e32 v0, v12, v0, vcc
+; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v2
+; GISEL-NEXT:    v_cndmask_b32_e32 v1, v9, v14, vcc
+; GISEL-NEXT:    v_cmp_ne_u32_e64 s[4:5], 0, v0
+; GISEL-NEXT:    v_cndmask_b32_e64 v0, v18, v13, s[4:5]
+; GISEL-NEXT:    v_cndmask_b32_e32 v3, v16, v17, vcc
+; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v8
+; GISEL-NEXT:    v_cndmask_b32_e32 v2, v4, v1, vcc
+; GISEL-NEXT:    v_cndmask_b32_e64 v1, v19, v15, s[4:5]
+; GISEL-NEXT:    v_cmp_ne_u32_e64 s[4:5], 0, v10
+; GISEL-NEXT:    v_cndmask_b32_e64 v0, v5, v0, s[4:5]
+; GISEL-NEXT:    v_cndmask_b32_e64 v1, v7, v1, s[4:5]
+; GISEL-NEXT:    v_cndmask_b32_e32 v3, v6, v3, vcc
+; GISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; CGP-LABEL: v_udiv_v2i64_pow2k_denom:
+; CGP:       ; %bb.0:
+; CGP-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CGP-NEXT:    s_movk_i32 s10, 0x1000
+; CGP-NEXT:    v_cvt_f32_u32_e32 v4, 0
+; CGP-NEXT:    v_cvt_f32_u32_e32 v5, s10
+; CGP-NEXT:    s_mov_b32 s8, 0xfffff000
+; CGP-NEXT:    v_mov_b32_e32 v6, v5
+; CGP-NEXT:    v_mac_f32_e32 v5, 0x4f800000, v4
+; CGP-NEXT:    v_mac_f32_e32 v6, 0x4f800000, v4
+; CGP-NEXT:    v_rcp_iflag_f32_e32 v4, v5
+; CGP-NEXT:    v_rcp_iflag_f32_e32 v5, v6
+; CGP-NEXT:    v_mul_f32_e32 v4, 0x5f7ffffc, v4
+; CGP-NEXT:    v_mul_f32_e32 v5, 0x5f7ffffc, v5
+; CGP-NEXT:    v_mul_f32_e32 v6, 0x2f800000, v4
+; CGP-NEXT:    v_mul_f32_e32 v7, 0x2f800000, v5
+; CGP-NEXT:    v_trunc_f32_e32 v6, v6
+; CGP-NEXT:    v_trunc_f32_e32 v7, v7
+; CGP-NEXT:    v_mac_f32_e32 v4, 0xcf800000, v6
+; CGP-NEXT:    v_cvt_u32_f32_e32 v6, v6
+; CGP-NEXT:    v_mac_f32_e32 v5, 0xcf800000, v7
+; CGP-NEXT:    v_cvt_u32_f32_e32 v7, v7
+; CGP-NEXT:    v_cvt_u32_f32_e32 v4, v4
+; CGP-NEXT:    v_mul_lo_u32 v8, s8, v6
+; CGP-NEXT:    v_cvt_u32_f32_e32 v5, v5
+; CGP-NEXT:    v_mul_lo_u32 v9, s8, v7
+; CGP-NEXT:    v_mul_lo_u32 v10, s8, v4
+; CGP-NEXT:    v_mul_lo_u32 v11, -1, v4
+; CGP-NEXT:    v_mul_hi_u32 v12, s8, v4
+; CGP-NEXT:    v_mul_lo_u32 v13, s8, v5
+; CGP-NEXT:    v_mul_lo_u32 v14, -1, v5
+; CGP-NEXT:    v_mul_hi_u32 v15, s8, v5
+; CGP-NEXT:    v_add_i32_e32 v8, vcc, v11, v8
+; CGP-NEXT:    v_mul_lo_u32 v11, v6, v10
+; CGP-NEXT:    v_mul_hi_u32 v16, v4, v10
+; CGP-NEXT:    v_mul_hi_u32 v10, v6, v10
+; CGP-NEXT:    v_add_i32_e32 v9, vcc, v14, v9
+; CGP-NEXT:    v_mul_lo_u32 v14, v7, v13
+; CGP-NEXT:    v_mul_hi_u32 v17, v5, v13
+; CGP-NEXT:    v_mul_hi_u32 v13, v7, v13
+; CGP-NEXT:    v_add_i32_e32 v8, vcc, v8, v12
+; CGP-NEXT:    v_add_i32_e32 v9, vcc, v9, v15
+; CGP-NEXT:    v_mul_lo_u32 v12, v4, v8
+; CGP-NEXT:    v_mul_lo_u32 v15, v6, v8
+; CGP-NEXT:    v_mul_hi_u32 v18, v4, v8
+; CGP-NEXT:    v_mul_hi_u32 v8, v6, v8
+; CGP-NEXT:    v_mul_lo_u32 v19, v5, v9
+; CGP-NEXT:    v_add_i32_e32 v14, vcc, v14, v19
+; CGP-NEXT:    v_cndmask_b32_e64 v19, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v14, vcc, v14, v17
+; CGP-NEXT:    v_mul_lo_u32 v14, v7, v9
+; CGP-NEXT:    v_mul_hi_u32 v17, v5, v9
+; CGP-NEXT:    v_mul_hi_u32 v9, v7, v9
+; CGP-NEXT:    v_add_i32_e64 v11, s[4:5], v11, v12
+; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, 1, s[4:5]
+; CGP-NEXT:    v_add_i32_e64 v10, s[4:5], v15, v10
+; CGP-NEXT:    v_cndmask_b32_e64 v15, 0, 1, s[4:5]
+; CGP-NEXT:    v_add_i32_e64 v13, s[4:5], v14, v13
+; CGP-NEXT:    v_cndmask_b32_e64 v14, 0, 1, s[4:5]
+; CGP-NEXT:    v_add_i32_e64 v11, s[4:5], v11, v16
+; CGP-NEXT:    v_cndmask_b32_e64 v11, 0, 1, s[4:5]
+; CGP-NEXT:    v_add_i32_e64 v10, s[4:5], v10, v18
+; CGP-NEXT:    v_cndmask_b32_e64 v16, 0, 1, s[4:5]
+; CGP-NEXT:    v_cndmask_b32_e64 v18, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v13, vcc, v13, v17
+; CGP-NEXT:    v_cndmask_b32_e64 v17, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v11, vcc, v12, v11
+; CGP-NEXT:    v_add_i32_e32 v12, vcc, v15, v16
+; CGP-NEXT:    v_add_i32_e32 v15, vcc, v19, v18
+; CGP-NEXT:    v_add_i32_e32 v14, vcc, v14, v17
+; CGP-NEXT:    v_add_i32_e32 v10, vcc, v10, v11
+; CGP-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v13, vcc, v13, v15
+; CGP-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v11, vcc, v12, v11
+; CGP-NEXT:    v_add_i32_e32 v12, vcc, v14, v15
+; CGP-NEXT:    v_add_i32_e32 v8, vcc, v8, v11
+; CGP-NEXT:    v_add_i32_e32 v9, vcc, v9, v12
+; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v10
+; CGP-NEXT:    v_addc_u32_e64 v10, s[4:5], v6, v8, vcc
+; CGP-NEXT:    v_add_i32_e64 v6, s[4:5], v6, v8
+; CGP-NEXT:    v_mul_lo_u32 v8, s8, v4
+; CGP-NEXT:    v_mul_lo_u32 v11, -1, v4
+; CGP-NEXT:    v_mul_hi_u32 v12, s8, v4
+; CGP-NEXT:    v_add_i32_e64 v5, s[4:5], v5, v13
+; CGP-NEXT:    v_addc_u32_e64 v13, s[6:7], v7, v9, s[4:5]
+; CGP-NEXT:    v_add_i32_e64 v7, s[6:7], v7, v9
+; CGP-NEXT:    v_mul_lo_u32 v9, s8, v5
+; CGP-NEXT:    v_mul_lo_u32 v14, -1, v5
+; CGP-NEXT:    v_mul_hi_u32 v15, s8, v5
+; CGP-NEXT:    v_mul_lo_u32 v16, s8, v10
+; CGP-NEXT:    v_mul_lo_u32 v17, v10, v8
+; CGP-NEXT:    v_mul_hi_u32 v18, v4, v8
+; CGP-NEXT:    v_mul_hi_u32 v8, v10, v8
+; CGP-NEXT:    v_mul_lo_u32 v19, s8, v13
+; CGP-NEXT:    v_add_i32_e64 v11, s[6:7], v11, v16
+; CGP-NEXT:    v_mul_lo_u32 v16, v13, v9
+; CGP-NEXT:    v_add_i32_e64 v14, s[6:7], v14, v19
+; CGP-NEXT:    v_mul_hi_u32 v19, v5, v9
+; CGP-NEXT:    v_mul_hi_u32 v9, v13, v9
+; CGP-NEXT:    v_add_i32_e64 v11, s[6:7], v11, v12
+; CGP-NEXT:    v_add_i32_e64 v12, s[6:7], v14, v15
+; CGP-NEXT:    v_mul_lo_u32 v14, v4, v11
+; CGP-NEXT:    v_mul_lo_u32 v15, v5, v12
+; CGP-NEXT:    v_add_i32_e64 v15, s[6:7], v16, v15
+; CGP-NEXT:    v_cndmask_b32_e64 v16, 0, 1, s[6:7]
+; CGP-NEXT:    v_add_i32_e64 v15, s[6:7], v15, v19
+; CGP-NEXT:    v_mul_lo_u32 v15, v10, v11
+; CGP-NEXT:    v_mul_hi_u32 v19, v4, v11
+; CGP-NEXT:    v_mul_hi_u32 v10, v10, v11
+; CGP-NEXT:    v_mul_lo_u32 v11, v13, v12
+; CGP-NEXT:    v_mul_hi_u32 v13, v13, v12
+; CGP-NEXT:    v_mul_hi_u32 v12, v5, v12
+; CGP-NEXT:    v_add_i32_e64 v14, s[8:9], v17, v14
+; CGP-NEXT:    v_cndmask_b32_e64 v17, 0, 1, s[8:9]
+; CGP-NEXT:    v_add_i32_e64 v8, s[8:9], v15, v8
+; CGP-NEXT:    v_cndmask_b32_e64 v15, 0, 1, s[8:9]
+; CGP-NEXT:    v_add_i32_e64 v9, s[8:9], v11, v9
+; CGP-NEXT:    v_cndmask_b32_e64 v11, 0, 1, s[8:9]
+; CGP-NEXT:    v_add_i32_e64 v14, s[8:9], v14, v18
+; CGP-NEXT:    v_cndmask_b32_e64 v14, 0, 1, s[8:9]
+; CGP-NEXT:    v_add_i32_e64 v8, s[8:9], v8, v19
+; CGP-NEXT:    v_cndmask_b32_e64 v18, 0, 1, s[8:9]
+; CGP-NEXT:    v_cndmask_b32_e64 v19, 0, 1, s[6:7]
+; CGP-NEXT:    v_add_i32_e64 v9, s[6:7], v9, v12
+; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, 1, s[6:7]
+; CGP-NEXT:    v_add_i32_e64 v14, s[6:7], v17, v14
+; CGP-NEXT:    v_add_i32_e64 v15, s[6:7], v15, v18
+; CGP-NEXT:    v_add_i32_e64 v16, s[6:7], v16, v19
+; CGP-NEXT:    v_add_i32_e64 v11, s[6:7], v11, v12
+; CGP-NEXT:    v_add_i32_e64 v8, s[6:7], v8, v14
+; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, 1, s[6:7]
+; CGP-NEXT:    v_add_i32_e64 v9, s[6:7], v9, v16
+; CGP-NEXT:    v_cndmask_b32_e64 v14, 0, 1, s[6:7]
+; CGP-NEXT:    v_add_i32_e64 v12, s[6:7], v15, v12
+; CGP-NEXT:    v_add_i32_e64 v11, s[6:7], v11, v14
+; CGP-NEXT:    v_add_i32_e64 v10, s[6:7], v10, v12
+; CGP-NEXT:    v_add_i32_e64 v11, s[6:7], v13, v11
+; CGP-NEXT:    v_addc_u32_e32 v6, vcc, v6, v10, vcc
+; CGP-NEXT:    v_addc_u32_e64 v7, vcc, v7, v11, s[4:5]
+; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v8
+; CGP-NEXT:    v_addc_u32_e32 v6, vcc, 0, v6, vcc
+; CGP-NEXT:    v_mul_lo_u32 v8, v3, v4
+; CGP-NEXT:    v_mul_hi_u32 v10, v2, v4
+; CGP-NEXT:    v_mul_hi_u32 v4, v3, v4
+; CGP-NEXT:    v_add_i32_e32 v5, vcc, v5, v9
+; CGP-NEXT:    v_addc_u32_e32 v7, vcc, 0, v7, vcc
+; CGP-NEXT:    v_mul_lo_u32 v9, v1, v5
+; CGP-NEXT:    v_mul_hi_u32 v11, v0, v5
+; CGP-NEXT:    v_mul_hi_u32 v5, v1, v5
+; CGP-NEXT:    v_mul_lo_u32 v12, v2, v6
+; CGP-NEXT:    v_mul_lo_u32 v13, v3, v6
+; CGP-NEXT:    v_mul_hi_u32 v14, v2, v6
+; CGP-NEXT:    v_mul_hi_u32 v6, v3, v6
+; CGP-NEXT:    v_mul_lo_u32 v15, v0, v7
+; CGP-NEXT:    v_mul_lo_u32 v16, v1, v7
+; CGP-NEXT:    v_mul_hi_u32 v17, v0, v7
+; CGP-NEXT:    v_mul_hi_u32 v7, v1, v7
+; CGP-NEXT:    v_add_i32_e32 v8, vcc, v8, v12
+; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v4, vcc, v13, v4
+; CGP-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v9, vcc, v9, v15
+; CGP-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v5, vcc, v16, v5
+; CGP-NEXT:    v_cndmask_b32_e64 v16, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v8, vcc, v8, v10
+; CGP-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v14
+; CGP-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v9, vcc, v9, v11
+; CGP-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v5, vcc, v5, v17
+; CGP-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v8, vcc, v12, v8
+; CGP-NEXT:    v_add_i32_e32 v10, vcc, v13, v10
+; CGP-NEXT:    v_add_i32_e32 v9, vcc, v15, v9
+; CGP-NEXT:    v_add_i32_e32 v11, vcc, v16, v11
+; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v8
+; CGP-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v5, vcc, v5, v9
+; CGP-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v8, vcc, v10, v8
+; CGP-NEXT:    v_mul_lo_u32 v10, s10, v4
+; CGP-NEXT:    v_mul_lo_u32 v12, 0, v4
+; CGP-NEXT:    v_mul_hi_u32 v13, s10, v4
+; CGP-NEXT:    v_add_i32_e32 v9, vcc, v11, v9
+; CGP-NEXT:    v_mul_lo_u32 v11, s10, v5
+; CGP-NEXT:    v_mul_lo_u32 v14, 0, v5
+; CGP-NEXT:    v_mul_hi_u32 v15, s10, v5
+; CGP-NEXT:    v_add_i32_e32 v6, vcc, v6, v8
+; CGP-NEXT:    v_add_i32_e32 v7, vcc, v7, v9
+; CGP-NEXT:    v_mul_lo_u32 v8, s10, v6
+; CGP-NEXT:    v_add_i32_e32 v9, vcc, 1, v4
+; CGP-NEXT:    v_addc_u32_e32 v16, vcc, 0, v6, vcc
+; CGP-NEXT:    v_mul_lo_u32 v17, s10, v7
+; CGP-NEXT:    v_add_i32_e32 v18, vcc, 1, v5
+; CGP-NEXT:    v_addc_u32_e32 v19, vcc, 0, v7, vcc
+; CGP-NEXT:    v_add_i32_e32 v8, vcc, v12, v8
+; CGP-NEXT:    v_add_i32_e32 v12, vcc, v14, v17
+; CGP-NEXT:    v_add_i32_e32 v14, vcc, 1, v9
+; CGP-NEXT:    v_addc_u32_e32 v17, vcc, 0, v16, vcc
+; CGP-NEXT:    v_add_i32_e32 v8, vcc, v8, v13
+; CGP-NEXT:    v_add_i32_e32 v12, vcc, v12, v15
+; CGP-NEXT:    v_add_i32_e32 v13, vcc, 1, v18
+; CGP-NEXT:    v_addc_u32_e32 v15, vcc, 0, v19, vcc
+; CGP-NEXT:    v_sub_i32_e32 v2, vcc, v2, v10
+; CGP-NEXT:    v_subb_u32_e64 v10, s[4:5], v3, v8, vcc
+; CGP-NEXT:    v_sub_i32_e64 v3, s[4:5], v3, v8
+; CGP-NEXT:    v_cmp_le_u32_e64 s[4:5], s10, v2
+; CGP-NEXT:    v_cndmask_b32_e64 v8, 0, -1, s[4:5]
+; CGP-NEXT:    v_sub_i32_e64 v0, s[4:5], v0, v11
+; CGP-NEXT:    v_subb_u32_e64 v11, s[6:7], v1, v12, s[4:5]
+; CGP-NEXT:    v_sub_i32_e64 v1, s[6:7], v1, v12
+; CGP-NEXT:    v_cmp_le_u32_e64 s[6:7], 0, v10
+; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, -1, s[6:7]
+; CGP-NEXT:    v_cmp_eq_u32_e64 s[6:7], 0, v10
+; CGP-NEXT:    v_cmp_le_u32_e64 s[8:9], s10, v0
+; CGP-NEXT:    v_cndmask_b32_e64 v10, 0, -1, s[8:9]
+; CGP-NEXT:    v_subbrev_u32_e32 v3, vcc, 0, v3, vcc
+; CGP-NEXT:    v_cndmask_b32_e64 v8, v12, v8, s[6:7]
+; CGP-NEXT:    v_cmp_le_u32_e32 vcc, 0, v11
+; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, -1, vcc
+; CGP-NEXT:    v_subbrev_u32_e64 v1, vcc, 0, v1, s[4:5]
+; CGP-NEXT:    v_subrev_i32_e32 v2, vcc, s10, v2
+; CGP-NEXT:    v_subbrev_u32_e32 v3, vcc, 0, v3, vcc
+; CGP-NEXT:    v_cmp_le_u32_e32 vcc, s10, v2
+; CGP-NEXT:    v_cndmask_b32_e64 v2, 0, -1, vcc
+; CGP-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v11
+; CGP-NEXT:    v_cndmask_b32_e32 v10, v12, v10, vcc
+; CGP-NEXT:    v_subrev_i32_e32 v0, vcc, s10, v0
+; CGP-NEXT:    v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
+; CGP-NEXT:    v_cmp_le_u32_e32 vcc, s10, v0
+; CGP-NEXT:    v_cndmask_b32_e64 v0, 0, -1, vcc
+; CGP-NEXT:    v_cmp_le_u32_e32 vcc, 0, v3
+; CGP-NEXT:    v_cndmask_b32_e64 v11, 0, -1, vcc
+; CGP-NEXT:    v_cmp_le_u32_e32 vcc, 0, v1
+; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, -1, vcc
+; CGP-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v3
+; CGP-NEXT:    v_cndmask_b32_e32 v2, v11, v2, vcc
+; CGP-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v1
+; CGP-NEXT:    v_cndmask_b32_e32 v0, v12, v0, vcc
+; CGP-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v2
+; CGP-NEXT:    v_cndmask_b32_e32 v1, v9, v14, vcc
+; CGP-NEXT:    v_cmp_ne_u32_e64 s[4:5], 0, v0
+; CGP-NEXT:    v_cndmask_b32_e64 v0, v18, v13, s[4:5]
+; CGP-NEXT:    v_cndmask_b32_e32 v3, v16, v17, vcc
+; CGP-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v8
+; CGP-NEXT:    v_cndmask_b32_e32 v2, v4, v1, vcc
+; CGP-NEXT:    v_cndmask_b32_e64 v1, v19, v15, s[4:5]
+; CGP-NEXT:    v_cmp_ne_u32_e64 s[4:5], 0, v10
+; CGP-NEXT:    v_cndmask_b32_e64 v0, v5, v0, s[4:5]
+; CGP-NEXT:    v_cndmask_b32_e64 v1, v7, v1, s[4:5]
+; CGP-NEXT:    v_cndmask_b32_e32 v3, v6, v3, vcc
+; CGP-NEXT:    s_setpc_b64 s[30:31]
+  %result = udiv <2 x i64> %num, <i64 4096, i64 4096>
+  ret <2 x i64> %result
+}
+
+define i64 @v_udiv_i64_oddk_denom(i64 %num) {
+; CHECK-LABEL: v_udiv_i64_oddk_denom:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT:    s_mov_b32 s6, 0x12d8fb
+; CHECK-NEXT:    v_cvt_f32_u32_e32 v2, 0
+; CHECK-NEXT:    s_mov_b32 s7, 0xffed2705
+; CHECK-NEXT:    v_cvt_f32_u32_e32 v3, s6
+; CHECK-NEXT:    v_mac_f32_e32 v3, 0x4f800000, v2
+; CHECK-NEXT:    v_rcp_iflag_f32_e32 v2, v3
+; CHECK-NEXT:    v_mul_f32_e32 v2, 0x5f7ffffc, v2
+; CHECK-NEXT:    v_mul_f32_e32 v3, 0x2f800000, v2
+; CHECK-NEXT:    v_trunc_f32_e32 v3, v3
+; CHECK-NEXT:    v_mac_f32_e32 v2, 0xcf800000, v3
+; CHECK-NEXT:    v_cvt_u32_f32_e32 v3, v3
+; CHECK-NEXT:    v_cvt_u32_f32_e32 v2, v2
+; CHECK-NEXT:    v_mul_lo_u32 v4, s7, v3
+; CHECK-NEXT:    v_mul_lo_u32 v5, s7, v2
+; CHECK-NEXT:    v_mul_lo_u32 v6, -1, v2
+; CHECK-NEXT:    v_mul_hi_u32 v7, s7, v2
+; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v6, v4
+; CHECK-NEXT:    v_mul_lo_u32 v6, v3, v5
+; CHECK-NEXT:    v_mul_hi_u32 v8, v2, v5
+; CHECK-NEXT:    v_mul_hi_u32 v5, v3, v5
+; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v4, v7
+; CHECK-NEXT:    v_mul_lo_u32 v7, v2, v4
+; CHECK-NEXT:    v_mul_lo_u32 v9, v3, v4
+; CHECK-NEXT:    v_mul_hi_u32 v10, v2, v4
+; CHECK-NEXT:    v_mul_hi_u32 v4, v3, v4
+; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v6, v7
+; CHECK-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v9, v5
+; CHECK-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v6, v8
+; CHECK-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v5, v10
+; CHECK-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v7, v6
+; CHECK-NEXT:    v_add_i32_e32 v7, vcc, v9, v8
+; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v5, v6
+; CHECK-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v7, v6
+; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v4, v6
+; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v5
+; CHECK-NEXT:    v_addc_u32_e64 v5, s[4:5], v3, v4, vcc
+; CHECK-NEXT:    v_add_i32_e64 v3, s[4:5], v3, v4
+; CHECK-NEXT:    v_mul_lo_u32 v4, s7, v2
+; CHECK-NEXT:    v_mul_lo_u32 v6, -1, v2
+; CHECK-NEXT:    v_mul_hi_u32 v7, s7, v2
+; CHECK-NEXT:    v_mul_lo_u32 v8, s7, v5
+; CHECK-NEXT:    v_mul_lo_u32 v9, v5, v4
+; CHECK-NEXT:    v_mul_hi_u32 v10, v2, v4
+; CHECK-NEXT:    v_mul_hi_u32 v4, v5, v4
+; CHECK-NEXT:    v_add_i32_e64 v6, s[4:5], v6, v8
+; CHECK-NEXT:    v_add_i32_e64 v6, s[4:5], v6, v7
+; CHECK-NEXT:    v_mul_lo_u32 v7, v2, v6
+; CHECK-NEXT:    v_mul_lo_u32 v8, v5, v6
+; CHECK-NEXT:    v_mul_hi_u32 v11, v2, v6
+; CHECK-NEXT:    v_mul_hi_u32 v5, v5, v6
+; CHECK-NEXT:    v_add_i32_e64 v6, s[4:5], v9, v7
+; CHECK-NEXT:    v_cndmask_b32_e64 v7, 0, 1, s[4:5]
+; CHECK-NEXT:    v_add_i32_e64 v4, s[4:5], v8, v4
+; CHECK-NEXT:    v_cndmask_b32_e64 v8, 0, 1, s[4:5]
+; CHECK-NEXT:    v_add_i32_e64 v6, s[4:5], v6, v10
+; CHECK-NEXT:    v_cndmask_b32_e64 v6, 0, 1, s[4:5]
+; CHECK-NEXT:    v_add_i32_e64 v4, s[4:5], v4, v11
+; CHECK-NEXT:    v_cndmask_b32_e64 v9, 0, 1, s[4:5]
+; CHECK-NEXT:    v_add_i32_e64 v6, s[4:5], v7, v6
+; CHECK-NEXT:    v_add_i32_e64 v7, s[4:5], v8, v9
+; CHECK-NEXT:    v_add_i32_e64 v4, s[4:5], v4, v6
+; CHECK-NEXT:    v_cndmask_b32_e64 v6, 0, 1, s[4:5]
+; CHECK-NEXT:    v_add_i32_e64 v6, s[4:5], v7, v6
+; CHECK-NEXT:    v_add_i32_e64 v5, s[4:5], v5, v6
+; CHECK-NEXT:    v_addc_u32_e32 v3, vcc, v3, v5, vcc
+; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v4
+; CHECK-NEXT:    v_addc_u32_e32 v3, vcc, 0, v3, vcc
+; CHECK-NEXT:    v_mul_lo_u32 v4, v1, v2
+; CHECK-NEXT:    v_mul_hi_u32 v5, v0, v2
+; CHECK-NEXT:    v_mul_hi_u32 v2, v1, v2
+; CHECK-NEXT:    v_mul_lo_u32 v6, v0, v3
+; CHECK-NEXT:    v_mul_lo_u32 v7, v1, v3
+; CHECK-NEXT:    v_mul_hi_u32 v8, v0, v3
+; CHECK-NEXT:    v_mul_hi_u32 v3, v1, v3
+; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v4, v6
+; CHECK-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v7, v2
+; CHECK-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v4, v5
+; CHECK-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v8
+; CHECK-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v6, v4
+; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v7, v5
+; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v4
+; CHECK-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v5, v4
+; CHECK-NEXT:    v_mul_lo_u32 v5, s6, v2
+; CHECK-NEXT:    v_mul_lo_u32 v6, 0, v2
+; CHECK-NEXT:    v_mul_hi_u32 v7, s6, v2
+; CHECK-NEXT:    v_add_i32_e32 v3, vcc, v3, v4
+; CHECK-NEXT:    v_mul_lo_u32 v4, s6, v3
+; CHECK-NEXT:    v_add_i32_e32 v8, vcc, 1, v2
+; CHECK-NEXT:    v_addc_u32_e32 v9, vcc, 0, v3, vcc
+; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v6, v4
+; CHECK-NEXT:    v_add_i32_e32 v6, vcc, 1, v8
+; CHECK-NEXT:    v_addc_u32_e32 v10, vcc, 0, v9, vcc
+; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v4, v7
+; CHECK-NEXT:    v_sub_i32_e32 v0, vcc, v0, v5
+; CHECK-NEXT:    v_subb_u32_e64 v5, s[4:5], v1, v4, vcc
+; CHECK-NEXT:    v_sub_i32_e64 v1, s[4:5], v1, v4
+; CHECK-NEXT:    v_cmp_le_u32_e64 s[4:5], s6, v0
+; CHECK-NEXT:    v_cndmask_b32_e64 v4, 0, -1, s[4:5]
+; CHECK-NEXT:    v_cmp_le_u32_e64 s[4:5], 0, v5
+; CHECK-NEXT:    v_cndmask_b32_e64 v7, 0, -1, s[4:5]
+; CHECK-NEXT:    v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
+; CHECK-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v5
+; CHECK-NEXT:    v_cndmask_b32_e32 v4, v7, v4, vcc
+; CHECK-NEXT:    v_subrev_i32_e32 v0, vcc, s6, v0
+; CHECK-NEXT:    v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
+; CHECK-NEXT:    v_cmp_le_u32_e32 vcc, s6, v0
+; CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, -1, vcc
+; CHECK-NEXT:    v_cmp_le_u32_e32 vcc, 0, v1
+; CHECK-NEXT:    v_cndmask_b32_e64 v5, 0, -1, vcc
+; CHECK-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v1
+; CHECK-NEXT:    v_cndmask_b32_e32 v0, v5, v0, vcc
+; CHECK-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
+; CHECK-NEXT:    v_cndmask_b32_e32 v0, v8, v6, vcc
+; CHECK-NEXT:    v_cndmask_b32_e32 v1, v9, v10, vcc
+; CHECK-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v4
+; CHECK-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
+; CHECK-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
+; CHECK-NEXT:    s_setpc_b64 s[30:31]
+  %result = udiv i64 %num, 1235195
+  ret i64 %result
+}
+
+define <2 x i64> @v_udiv_v2i64_oddk_denom(<2 x i64> %num) {
+; GISEL-LABEL: v_udiv_v2i64_oddk_denom:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-NEXT:    s_mov_b32 s10, 0x12d8fb
+; GISEL-NEXT:    v_cvt_f32_u32_e32 v4, s10
+; GISEL-NEXT:    s_sub_u32 s8, 0, s10
+; GISEL-NEXT:    s_cselect_b32 s4, 1, 0
+; GISEL-NEXT:    v_cvt_f32_u32_e32 v5, 0
+; GISEL-NEXT:    v_mov_b32_e32 v6, v4
+; GISEL-NEXT:    s_and_b32 s4, s4, 1
+; GISEL-NEXT:    v_mac_f32_e32 v4, 0x4f800000, v5
+; GISEL-NEXT:    v_mac_f32_e32 v6, 0x4f800000, v5
+; GISEL-NEXT:    v_rcp_iflag_f32_e32 v4, v4
+; GISEL-NEXT:    v_rcp_iflag_f32_e32 v5, v6
+; GISEL-NEXT:    s_cmp_lg_u32 s4, 0
+; GISEL-NEXT:    s_subb_u32 s9, 0, 0
+; GISEL-NEXT:    v_mul_f32_e32 v4, 0x5f7ffffc, v4
+; GISEL-NEXT:    v_mul_f32_e32 v5, 0x5f7ffffc, v5
+; GISEL-NEXT:    v_mul_f32_e32 v6, 0x2f800000, v4
+; GISEL-NEXT:    s_sub_u32 s11, 0, s10
+; GISEL-NEXT:    s_cselect_b32 s4, 1, 0
+; GISEL-NEXT:    v_mul_f32_e32 v7, 0x2f800000, v5
+; GISEL-NEXT:    v_trunc_f32_e32 v6, v6
+; GISEL-NEXT:    s_and_b32 s4, s4, 1
+; GISEL-NEXT:    v_trunc_f32_e32 v7, v7
+; GISEL-NEXT:    v_mac_f32_e32 v4, 0xcf800000, v6
+; GISEL-NEXT:    v_cvt_u32_f32_e32 v6, v6
+; GISEL-NEXT:    v_mac_f32_e32 v5, 0xcf800000, v7
+; GISEL-NEXT:    v_cvt_u32_f32_e32 v7, v7
+; GISEL-NEXT:    v_cvt_u32_f32_e32 v4, v4
+; GISEL-NEXT:    s_cmp_lg_u32 s4, 0
+; GISEL-NEXT:    s_subb_u32 s6, 0, 0
+; GISEL-NEXT:    v_mul_lo_u32 v8, s11, v6
+; GISEL-NEXT:    v_cvt_u32_f32_e32 v5, v5
+; GISEL-NEXT:    v_mul_lo_u32 v9, s8, v7
+; GISEL-NEXT:    v_mul_lo_u32 v10, s11, v4
+; GISEL-NEXT:    v_mul_lo_u32 v11, s6, v4
+; GISEL-NEXT:    v_mul_hi_u32 v12, s11, v4
+; GISEL-NEXT:    v_mul_lo_u32 v13, s8, v5
+; GISEL-NEXT:    v_mul_lo_u32 v14, s9, v5
+; GISEL-NEXT:    v_mul_hi_u32 v15, s8, v5
+; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v11, v8
+; GISEL-NEXT:    v_mul_lo_u32 v11, v6, v10
+; GISEL-NEXT:    v_mul_hi_u32 v16, v4, v10
+; GISEL-NEXT:    v_mul_hi_u32 v10, v6, v10
+; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v14, v9
+; GISEL-NEXT:    v_mul_lo_u32 v14, v7, v13
+; GISEL-NEXT:    v_mul_hi_u32 v17, v5, v13
+; GISEL-NEXT:    v_mul_hi_u32 v13, v7, v13
+; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v12
+; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v9, v15
+; GISEL-NEXT:    v_mul_lo_u32 v12, v4, v8
+; GISEL-NEXT:    v_mul_lo_u32 v15, v6, v8
+; GISEL-NEXT:    v_mul_hi_u32 v18, v4, v8
+; GISEL-NEXT:    v_mul_hi_u32 v8, v6, v8
+; GISEL-NEXT:    v_mul_lo_u32 v19, v5, v9
+; GISEL-NEXT:    v_add_i32_e32 v14, vcc, v14, v19
+; GISEL-NEXT:    v_cndmask_b32_e64 v19, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v14, vcc, v14, v17
+; GISEL-NEXT:    v_mul_lo_u32 v14, v7, v9
+; GISEL-NEXT:    v_mul_hi_u32 v17, v5, v9
+; GISEL-NEXT:    v_mul_hi_u32 v9, v7, v9
+; GISEL-NEXT:    v_add_i32_e64 v11, s[4:5], v11, v12
+; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v10, s[4:5], v15, v10
+; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, 1, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v13, s[4:5], v14, v13
+; GISEL-NEXT:    v_cndmask_b32_e64 v14, 0, 1, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v11, s[4:5], v11, v16
+; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v10, s[4:5], v10, v18
+; GISEL-NEXT:    v_cndmask_b32_e64 v16, 0, 1, s[4:5]
+; GISEL-NEXT:    v_cndmask_b32_e64 v18, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v13, v17
+; GISEL-NEXT:    v_cndmask_b32_e64 v17, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v12, v11
+; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v15, v16
+; GISEL-NEXT:    v_add_i32_e32 v15, vcc, v19, v18
+; GISEL-NEXT:    v_add_i32_e32 v14, vcc, v14, v17
+; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v10, v11
+; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v13, v15
+; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v12, v11
+; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v14, v15
+; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v11
+; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v9, v12
+; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v10
+; GISEL-NEXT:    v_addc_u32_e64 v10, s[4:5], v6, v8, vcc
+; GISEL-NEXT:    v_add_i32_e64 v6, s[4:5], v6, v8
+; GISEL-NEXT:    v_mul_lo_u32 v8, s11, v4
+; GISEL-NEXT:    v_mul_lo_u32 v11, s6, v4
+; GISEL-NEXT:    v_mul_hi_u32 v12, s11, v4
+; GISEL-NEXT:    v_add_i32_e64 v5, s[4:5], v5, v13
+; GISEL-NEXT:    v_addc_u32_e64 v13, s[6:7], v7, v9, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v7, s[6:7], v7, v9
+; GISEL-NEXT:    v_mul_lo_u32 v9, s8, v5
+; GISEL-NEXT:    v_mul_lo_u32 v14, s9, v5
+; GISEL-NEXT:    v_mul_hi_u32 v15, s8, v5
+; GISEL-NEXT:    v_mul_lo_u32 v16, s11, v10
+; GISEL-NEXT:    v_mul_lo_u32 v17, v10, v8
+; GISEL-NEXT:    v_mul_hi_u32 v18, v4, v8
+; GISEL-NEXT:    v_mul_hi_u32 v8, v10, v8
+; GISEL-NEXT:    v_mul_lo_u32 v19, s8, v13
+; GISEL-NEXT:    v_add_i32_e64 v11, s[6:7], v11, v16
+; GISEL-NEXT:    v_mul_lo_u32 v16, v13, v9
+; GISEL-NEXT:    v_add_i32_e64 v14, s[6:7], v14, v19
+; GISEL-NEXT:    v_mul_hi_u32 v19, v5, v9
+; GISEL-NEXT:    v_mul_hi_u32 v9, v13, v9
+; GISEL-NEXT:    v_add_i32_e64 v11, s[6:7], v11, v12
+; GISEL-NEXT:    v_add_i32_e64 v12, s[6:7], v14, v15
+; GISEL-NEXT:    v_mul_lo_u32 v14, v4, v11
+; GISEL-NEXT:    v_mul_lo_u32 v15, v5, v12
+; GISEL-NEXT:    v_add_i32_e64 v15, s[6:7], v16, v15
+; GISEL-NEXT:    v_cndmask_b32_e64 v16, 0, 1, s[6:7]
+; GISEL-NEXT:    v_add_i32_e64 v15, s[6:7], v15, v19
+; GISEL-NEXT:    v_mul_lo_u32 v15, v10, v11
+; GISEL-NEXT:    v_mul_hi_u32 v19, v4, v11
+; GISEL-NEXT:    v_mul_hi_u32 v10, v10, v11
+; GISEL-NEXT:    v_mul_lo_u32 v11, v13, v12
+; GISEL-NEXT:    v_mul_hi_u32 v13, v13, v12
+; GISEL-NEXT:    v_mul_hi_u32 v12, v5, v12
+; GISEL-NEXT:    v_add_i32_e64 v14, s[8:9], v17, v14
+; GISEL-NEXT:    v_cndmask_b32_e64 v17, 0, 1, s[8:9]
+; GISEL-NEXT:    v_add_i32_e64 v8, s[8:9], v15, v8
+; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, 1, s[8:9]
+; GISEL-NEXT:    v_add_i32_e64 v9, s[8:9], v11, v9
+; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, s[8:9]
+; GISEL-NEXT:    v_add_i32_e64 v14, s[8:9], v14, v18
+; GISEL-NEXT:    v_cndmask_b32_e64 v14, 0, 1, s[8:9]
+; GISEL-NEXT:    v_add_i32_e64 v8, s[8:9], v8, v19
+; GISEL-NEXT:    v_cndmask_b32_e64 v18, 0, 1, s[8:9]
+; GISEL-NEXT:    v_cndmask_b32_e64 v19, 0, 1, s[6:7]
+; GISEL-NEXT:    v_add_i32_e64 v9, s[6:7], v9, v12
+; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, s[6:7]
+; GISEL-NEXT:    v_add_i32_e64 v14, s[6:7], v17, v14
+; GISEL-NEXT:    v_add_i32_e64 v15, s[6:7], v15, v18
+; GISEL-NEXT:    v_add_i32_e64 v16, s[6:7], v16, v19
+; GISEL-NEXT:    v_add_i32_e64 v11, s[6:7], v11, v12
+; GISEL-NEXT:    v_add_i32_e64 v8, s[6:7], v8, v14
+; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, s[6:7]
+; GISEL-NEXT:    v_add_i32_e64 v9, s[6:7], v9, v16
+; GISEL-NEXT:    v_cndmask_b32_e64 v14, 0, 1, s[6:7]
+; GISEL-NEXT:    v_add_i32_e64 v12, s[6:7], v15, v12
+; GISEL-NEXT:    v_add_i32_e64 v11, s[6:7], v11, v14
+; GISEL-NEXT:    v_add_i32_e64 v10, s[6:7], v10, v12
+; GISEL-NEXT:    v_add_i32_e64 v11, s[6:7], v13, v11
+; GISEL-NEXT:    v_addc_u32_e32 v6, vcc, v6, v10, vcc
+; GISEL-NEXT:    v_addc_u32_e64 v7, vcc, v7, v11, s[4:5]
+; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v8
+; GISEL-NEXT:    v_addc_u32_e32 v6, vcc, 0, v6, vcc
+; GISEL-NEXT:    v_mul_lo_u32 v8, v3, v4
+; GISEL-NEXT:    v_mul_hi_u32 v10, v2, v4
+; GISEL-NEXT:    v_mul_hi_u32 v4, v3, v4
+; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v9
+; GISEL-NEXT:    v_addc_u32_e32 v7, vcc, 0, v7, vcc
+; GISEL-NEXT:    v_mul_lo_u32 v9, v1, v5
+; GISEL-NEXT:    v_mul_hi_u32 v11, v0, v5
+; GISEL-NEXT:    v_mul_hi_u32 v5, v1, v5
+; GISEL-NEXT:    v_mul_lo_u32 v12, v2, v6
+; GISEL-NEXT:    v_mul_lo_u32 v13, v3, v6
+; GISEL-NEXT:    v_mul_hi_u32 v14, v2, v6
+; GISEL-NEXT:    v_mul_hi_u32 v6, v3, v6
+; GISEL-NEXT:    v_mul_lo_u32 v15, v0, v7
+; GISEL-NEXT:    v_mul_lo_u32 v16, v1, v7
+; GISEL-NEXT:    v_mul_hi_u32 v17, v0, v7
+; GISEL-NEXT:    v_mul_hi_u32 v7, v1, v7
+; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v12
+; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v13, v4
+; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v9, v15
+; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v16, v5
+; GISEL-NEXT:    v_cndmask_b32_e64 v16, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v10
+; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v14
+; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v9, v11
+; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v17
+; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v12, v8
+; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v13, v10
+; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v15, v9
+; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v16, v11
+; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v8
+; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v9
+; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v10, v8
+; GISEL-NEXT:    v_mul_lo_u32 v10, s10, v4
+; GISEL-NEXT:    v_mul_lo_u32 v12, 0, v4
+; GISEL-NEXT:    v_mul_hi_u32 v13, s10, v4
+; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v11, v9
+; GISEL-NEXT:    v_mul_lo_u32 v11, s10, v5
+; GISEL-NEXT:    v_mul_lo_u32 v14, 0, v5
+; GISEL-NEXT:    v_mul_hi_u32 v15, s10, v5
+; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v6, v8
+; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v7, v9
+; GISEL-NEXT:    v_mul_lo_u32 v8, s10, v6
+; GISEL-NEXT:    v_add_i32_e32 v9, vcc, 1, v4
+; GISEL-NEXT:    v_addc_u32_e32 v16, vcc, 0, v6, vcc
+; GISEL-NEXT:    v_mul_lo_u32 v17, s10, v7
+; GISEL-NEXT:    v_add_i32_e32 v18, vcc, 1, v5
+; GISEL-NEXT:    v_addc_u32_e32 v19, vcc, 0, v7, vcc
+; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v12, v8
+; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v14, v17
+; GISEL-NEXT:    v_add_i32_e32 v14, vcc, 1, v9
+; GISEL-NEXT:    v_addc_u32_e32 v17, vcc, 0, v16, vcc
+; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v13
+; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v12, v15
+; GISEL-NEXT:    v_add_i32_e32 v13, vcc, 1, v18
+; GISEL-NEXT:    v_addc_u32_e32 v15, vcc, 0, v19, vcc
+; GISEL-NEXT:    v_sub_i32_e32 v2, vcc, v2, v10
+; GISEL-NEXT:    v_subb_u32_e64 v10, s[4:5], v3, v8, vcc
+; GISEL-NEXT:    v_sub_i32_e64 v3, s[4:5], v3, v8
+; GISEL-NEXT:    v_cmp_le_u32_e64 s[4:5], s10, v2
+; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, -1, s[4:5]
+; GISEL-NEXT:    v_sub_i32_e64 v0, s[4:5], v0, v11
+; GISEL-NEXT:    v_subb_u32_e64 v11, s[6:7], v1, v12, s[4:5]
+; GISEL-NEXT:    v_sub_i32_e64 v1, s[6:7], v1, v12
+; GISEL-NEXT:    v_cmp_le_u32_e64 s[6:7], 0, v10
+; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, -1, s[6:7]
+; GISEL-NEXT:    v_cmp_eq_u32_e64 s[6:7], 0, v10
+; GISEL-NEXT:    v_cmp_le_u32_e64 s[8:9], s10, v0
+; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, -1, s[8:9]
+; GISEL-NEXT:    v_subbrev_u32_e32 v3, vcc, 0, v3, vcc
+; GISEL-NEXT:    v_cndmask_b32_e64 v8, v12, v8, s[6:7]
+; GISEL-NEXT:    v_cmp_le_u32_e32 vcc, 0, v11
+; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, -1, vcc
+; GISEL-NEXT:    v_subbrev_u32_e64 v1, vcc, 0, v1, s[4:5]
+; GISEL-NEXT:    v_subrev_i32_e32 v2, vcc, s10, v2
+; GISEL-NEXT:    v_subbrev_u32_e32 v3, vcc, 0, v3, vcc
+; GISEL-NEXT:    v_cmp_le_u32_e32 vcc, s10, v2
+; GISEL-NEXT:    v_cndmask_b32_e64 v2, 0, -1, vcc
+; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v11
+; GISEL-NEXT:    v_cndmask_b32_e32 v10, v12, v10, vcc
+; GISEL-NEXT:    v_subrev_i32_e32 v0, vcc, s10, v0
+; GISEL-NEXT:    v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
+; GISEL-NEXT:    v_cmp_le_u32_e32 vcc, s10, v0
+; GISEL-NEXT:    v_cndmask_b32_e64 v0, 0, -1, vcc
+; GISEL-NEXT:    v_cmp_le_u32_e32 vcc, 0, v3
+; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, -1, vcc
+; GISEL-NEXT:    v_cmp_le_u32_e32 vcc, 0, v1
+; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, -1, vcc
+; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v3
+; GISEL-NEXT:    v_cndmask_b32_e32 v2, v11, v2, vcc
+; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v1
+; GISEL-NEXT:    v_cndmask_b32_e32 v0, v12, v0, vcc
+; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v2
+; GISEL-NEXT:    v_cndmask_b32_e32 v1, v9, v14, vcc
+; GISEL-NEXT:    v_cmp_ne_u32_e64 s[4:5], 0, v0
+; GISEL-NEXT:    v_cndmask_b32_e64 v0, v18, v13, s[4:5]
+; GISEL-NEXT:    v_cndmask_b32_e32 v3, v16, v17, vcc
+; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v8
+; GISEL-NEXT:    v_cndmask_b32_e32 v2, v4, v1, vcc
+; GISEL-NEXT:    v_cndmask_b32_e64 v1, v19, v15, s[4:5]
+; GISEL-NEXT:    v_cmp_ne_u32_e64 s[4:5], 0, v10
+; GISEL-NEXT:    v_cndmask_b32_e64 v0, v5, v0, s[4:5]
+; GISEL-NEXT:    v_cndmask_b32_e64 v1, v7, v1, s[4:5]
+; GISEL-NEXT:    v_cndmask_b32_e32 v3, v6, v3, vcc
+; GISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; CGP-LABEL: v_udiv_v2i64_oddk_denom:
+; CGP:       ; %bb.0:
+; CGP-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CGP-NEXT:    s_mov_b32 s10, 0x12d8fb
+; CGP-NEXT:    v_cvt_f32_u32_e32 v4, 0
+; CGP-NEXT:    v_cvt_f32_u32_e32 v5, s10
+; CGP-NEXT:    s_mov_b32 s8, 0xffed2705
+; CGP-NEXT:    v_mov_b32_e32 v6, v5
+; CGP-NEXT:    v_mac_f32_e32 v5, 0x4f800000, v4
+; CGP-NEXT:    v_mac_f32_e32 v6, 0x4f800000, v4
+; CGP-NEXT:    v_rcp_iflag_f32_e32 v4, v5
+; CGP-NEXT:    v_rcp_iflag_f32_e32 v5, v6
+; CGP-NEXT:    v_mul_f32_e32 v4, 0x5f7ffffc, v4
+; CGP-NEXT:    v_mul_f32_e32 v5, 0x5f7ffffc, v5
+; CGP-NEXT:    v_mul_f32_e32 v6, 0x2f800000, v4
+; CGP-NEXT:    v_mul_f32_e32 v7, 0x2f800000, v5
+; CGP-NEXT:    v_trunc_f32_e32 v6, v6
+; CGP-NEXT:    v_trunc_f32_e32 v7, v7
+; CGP-NEXT:    v_mac_f32_e32 v4, 0xcf800000, v6
+; CGP-NEXT:    v_cvt_u32_f32_e32 v6, v6
+; CGP-NEXT:    v_mac_f32_e32 v5, 0xcf800000, v7
+; CGP-NEXT:    v_cvt_u32_f32_e32 v7, v7
+; CGP-NEXT:    v_cvt_u32_f32_e32 v4, v4
+; CGP-NEXT:    v_mul_lo_u32 v8, s8, v6
+; CGP-NEXT:    v_cvt_u32_f32_e32 v5, v5
+; CGP-NEXT:    v_mul_lo_u32 v9, s8, v7
+; CGP-NEXT:    v_mul_lo_u32 v10, s8, v4
+; CGP-NEXT:    v_mul_lo_u32 v11, -1, v4
+; CGP-NEXT:    v_mul_hi_u32 v12, s8, v4
+; CGP-NEXT:    v_mul_lo_u32 v13, s8, v5
+; CGP-NEXT:    v_mul_lo_u32 v14, -1, v5
+; CGP-NEXT:    v_mul_hi_u32 v15, s8, v5
+; CGP-NEXT:    v_add_i32_e32 v8, vcc, v11, v8
+; CGP-NEXT:    v_mul_lo_u32 v11, v6, v10
+; CGP-NEXT:    v_mul_hi_u32 v16, v4, v10
+; CGP-NEXT:    v_mul_hi_u32 v10, v6, v10
+; CGP-NEXT:    v_add_i32_e32 v9, vcc, v14, v9
+; CGP-NEXT:    v_mul_lo_u32 v14, v7, v13
+; CGP-NEXT:    v_mul_hi_u32 v17, v5, v13
+; CGP-NEXT:    v_mul_hi_u32 v13, v7, v13
+; CGP-NEXT:    v_add_i32_e32 v8, vcc, v8, v12
+; CGP-NEXT:    v_add_i32_e32 v9, vcc, v9, v15
+; CGP-NEXT:    v_mul_lo_u32 v12, v4, v8
+; CGP-NEXT:    v_mul_lo_u32 v15, v6, v8
+; CGP-NEXT:    v_mul_hi_u32 v18, v4, v8
+; CGP-NEXT:    v_mul_hi_u32 v8, v6, v8
+; CGP-NEXT:    v_mul_lo_u32 v19, v5, v9
+; CGP-NEXT:    v_add_i32_e32 v14, vcc, v14, v19
+; CGP-NEXT:    v_cndmask_b32_e64 v19, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v14, vcc, v14, v17
+; CGP-NEXT:    v_mul_lo_u32 v14, v7, v9
+; CGP-NEXT:    v_mul_hi_u32 v17, v5, v9
+; CGP-NEXT:    v_mul_hi_u32 v9, v7, v9
+; CGP-NEXT:    v_add_i32_e64 v11, s[4:5], v11, v12
+; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, 1, s[4:5]
+; CGP-NEXT:    v_add_i32_e64 v10, s[4:5], v15, v10
+; CGP-NEXT:    v_cndmask_b32_e64 v15, 0, 1, s[4:5]
+; CGP-NEXT:    v_add_i32_e64 v13, s[4:5], v14, v13
+; CGP-NEXT:    v_cndmask_b32_e64 v14, 0, 1, s[4:5]
+; CGP-NEXT:    v_add_i32_e64 v11, s[4:5], v11, v16
+; CGP-NEXT:    v_cndmask_b32_e64 v11, 0, 1, s[4:5]
+; CGP-NEXT:    v_add_i32_e64 v10, s[4:5], v10, v18
+; CGP-NEXT:    v_cndmask_b32_e64 v16, 0, 1, s[4:5]
+; CGP-NEXT:    v_cndmask_b32_e64 v18, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v13, vcc, v13, v17
+; CGP-NEXT:    v_cndmask_b32_e64 v17, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v11, vcc, v12, v11
+; CGP-NEXT:    v_add_i32_e32 v12, vcc, v15, v16
+; CGP-NEXT:    v_add_i32_e32 v15, vcc, v19, v18
+; CGP-NEXT:    v_add_i32_e32 v14, vcc, v14, v17
+; CGP-NEXT:    v_add_i32_e32 v10, vcc, v10, v11
+; CGP-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v13, vcc, v13, v15
+; CGP-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v11, vcc, v12, v11
+; CGP-NEXT:    v_add_i32_e32 v12, vcc, v14, v15
+; CGP-NEXT:    v_add_i32_e32 v8, vcc, v8, v11
+; CGP-NEXT:    v_add_i32_e32 v9, vcc, v9, v12
+; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v10
+; CGP-NEXT:    v_addc_u32_e64 v10, s[4:5], v6, v8, vcc
+; CGP-NEXT:    v_add_i32_e64 v6, s[4:5], v6, v8
+; CGP-NEXT:    v_mul_lo_u32 v8, s8, v4
+; CGP-NEXT:    v_mul_lo_u32 v11, -1, v4
+; CGP-NEXT:    v_mul_hi_u32 v12, s8, v4
+; CGP-NEXT:    v_add_i32_e64 v5, s[4:5], v5, v13
+; CGP-NEXT:    v_addc_u32_e64 v13, s[6:7], v7, v9, s[4:5]
+; CGP-NEXT:    v_add_i32_e64 v7, s[6:7], v7, v9
+; CGP-NEXT:    v_mul_lo_u32 v9, s8, v5
+; CGP-NEXT:    v_mul_lo_u32 v14, -1, v5
+; CGP-NEXT:    v_mul_hi_u32 v15, s8, v5
+; CGP-NEXT:    v_mul_lo_u32 v16, s8, v10
+; CGP-NEXT:    v_mul_lo_u32 v17, v10, v8
+; CGP-NEXT:    v_mul_hi_u32 v18, v4, v8
+; CGP-NEXT:    v_mul_hi_u32 v8, v10, v8
+; CGP-NEXT:    v_mul_lo_u32 v19, s8, v13
+; CGP-NEXT:    v_add_i32_e64 v11, s[6:7], v11, v16
+; CGP-NEXT:    v_mul_lo_u32 v16, v13, v9
+; CGP-NEXT:    v_add_i32_e64 v14, s[6:7], v14, v19
+; CGP-NEXT:    v_mul_hi_u32 v19, v5, v9
+; CGP-NEXT:    v_mul_hi_u32 v9, v13, v9
+; CGP-NEXT:    v_add_i32_e64 v11, s[6:7], v11, v12
+; CGP-NEXT:    v_add_i32_e64 v12, s[6:7], v14, v15
+; CGP-NEXT:    v_mul_lo_u32 v14, v4, v11
+; CGP-NEXT:    v_mul_lo_u32 v15, v5, v12
+; CGP-NEXT:    v_add_i32_e64 v15, s[6:7], v16, v15
+; CGP-NEXT:    v_cndmask_b32_e64 v16, 0, 1, s[6:7]
+; CGP-NEXT:    v_add_i32_e64 v15, s[6:7], v15, v19
+; CGP-NEXT:    v_mul_lo_u32 v15, v10, v11
+; CGP-NEXT:    v_mul_hi_u32 v19, v4, v11
+; CGP-NEXT:    v_mul_hi_u32 v10, v10, v11
+; CGP-NEXT:    v_mul_lo_u32 v11, v13, v12
+; CGP-NEXT:    v_mul_hi_u32 v13, v13, v12
+; CGP-NEXT:    v_mul_hi_u32 v12, v5, v12
+; CGP-NEXT:    v_add_i32_e64 v14, s[8:9], v17, v14
+; CGP-NEXT:    v_cndmask_b32_e64 v17, 0, 1, s[8:9]
+; CGP-NEXT:    v_add_i32_e64 v8, s[8:9], v15, v8
+; CGP-NEXT:    v_cndmask_b32_e64 v15, 0, 1, s[8:9]
+; CGP-NEXT:    v_add_i32_e64 v9, s[8:9], v11, v9
+; CGP-NEXT:    v_cndmask_b32_e64 v11, 0, 1, s[8:9]
+; CGP-NEXT:    v_add_i32_e64 v14, s[8:9], v14, v18
+; CGP-NEXT:    v_cndmask_b32_e64 v14, 0, 1, s[8:9]
+; CGP-NEXT:    v_add_i32_e64 v8, s[8:9], v8, v19
+; CGP-NEXT:    v_cndmask_b32_e64 v18, 0, 1, s[8:9]
+; CGP-NEXT:    v_cndmask_b32_e64 v19, 0, 1, s[6:7]
+; CGP-NEXT:    v_add_i32_e64 v9, s[6:7], v9, v12
+; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, 1, s[6:7]
+; CGP-NEXT:    v_add_i32_e64 v14, s[6:7], v17, v14
+; CGP-NEXT:    v_add_i32_e64 v15, s[6:7], v15, v18
+; CGP-NEXT:    v_add_i32_e64 v16, s[6:7], v16, v19
+; CGP-NEXT:    v_add_i32_e64 v11, s[6:7], v11, v12
+; CGP-NEXT:    v_add_i32_e64 v8, s[6:7], v8, v14
+; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, 1, s[6:7]
+; CGP-NEXT:    v_add_i32_e64 v9, s[6:7], v9, v16
+; CGP-NEXT:    v_cndmask_b32_e64 v14, 0, 1, s[6:7]
+; CGP-NEXT:    v_add_i32_e64 v12, s[6:7], v15, v12
+; CGP-NEXT:    v_add_i32_e64 v11, s[6:7], v11, v14
+; CGP-NEXT:    v_add_i32_e64 v10, s[6:7], v10, v12
+; CGP-NEXT:    v_add_i32_e64 v11, s[6:7], v13, v11
+; CGP-NEXT:    v_addc_u32_e32 v6, vcc, v6, v10, vcc
+; CGP-NEXT:    v_addc_u32_e64 v7, vcc, v7, v11, s[4:5]
+; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v8
+; CGP-NEXT:    v_addc_u32_e32 v6, vcc, 0, v6, vcc
+; CGP-NEXT:    v_mul_lo_u32 v8, v3, v4
+; CGP-NEXT:    v_mul_hi_u32 v10, v2, v4
+; CGP-NEXT:    v_mul_hi_u32 v4, v3, v4
+; CGP-NEXT:    v_add_i32_e32 v5, vcc, v5, v9
+; CGP-NEXT:    v_addc_u32_e32 v7, vcc, 0, v7, vcc
+; CGP-NEXT:    v_mul_lo_u32 v9, v1, v5
+; CGP-NEXT:    v_mul_hi_u32 v11, v0, v5
+; CGP-NEXT:    v_mul_hi_u32 v5, v1, v5
+; CGP-NEXT:    v_mul_lo_u32 v12, v2, v6
+; CGP-NEXT:    v_mul_lo_u32 v13, v3, v6
+; CGP-NEXT:    v_mul_hi_u32 v14, v2, v6
+; CGP-NEXT:    v_mul_hi_u32 v6, v3, v6
+; CGP-NEXT:    v_mul_lo_u32 v15, v0, v7
+; CGP-NEXT:    v_mul_lo_u32 v16, v1, v7
+; CGP-NEXT:    v_mul_hi_u32 v17, v0, v7
+; CGP-NEXT:    v_mul_hi_u32 v7, v1, v7
+; CGP-NEXT:    v_add_i32_e32 v8, vcc, v8, v12
+; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v4, vcc, v13, v4
+; CGP-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v9, vcc, v9, v15
+; CGP-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v5, vcc, v16, v5
+; CGP-NEXT:    v_cndmask_b32_e64 v16, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v8, vcc, v8, v10
+; CGP-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v14
+; CGP-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v9, vcc, v9, v11
+; CGP-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v5, vcc, v5, v17
+; CGP-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v8, vcc, v12, v8
+; CGP-NEXT:    v_add_i32_e32 v10, vcc, v13, v10
+; CGP-NEXT:    v_add_i32_e32 v9, vcc, v15, v9
+; CGP-NEXT:    v_add_i32_e32 v11, vcc, v16, v11
+; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v8
+; CGP-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v5, vcc, v5, v9
+; CGP-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v8, vcc, v10, v8
+; CGP-NEXT:    v_mul_lo_u32 v10, s10, v4
+; CGP-NEXT:    v_mul_lo_u32 v12, 0, v4
+; CGP-NEXT:    v_mul_hi_u32 v13, s10, v4
+; CGP-NEXT:    v_add_i32_e32 v9, vcc, v11, v9
+; CGP-NEXT:    v_mul_lo_u32 v11, s10, v5
+; CGP-NEXT:    v_mul_lo_u32 v14, 0, v5
+; CGP-NEXT:    v_mul_hi_u32 v15, s10, v5
+; CGP-NEXT:    v_add_i32_e32 v6, vcc, v6, v8
+; CGP-NEXT:    v_add_i32_e32 v7, vcc, v7, v9
+; CGP-NEXT:    v_mul_lo_u32 v8, s10, v6
+; CGP-NEXT:    v_add_i32_e32 v9, vcc, 1, v4
+; CGP-NEXT:    v_addc_u32_e32 v16, vcc, 0, v6, vcc
+; CGP-NEXT:    v_mul_lo_u32 v17, s10, v7
+; CGP-NEXT:    v_add_i32_e32 v18, vcc, 1, v5
+; CGP-NEXT:    v_addc_u32_e32 v19, vcc, 0, v7, vcc
+; CGP-NEXT:    v_add_i32_e32 v8, vcc, v12, v8
+; CGP-NEXT:    v_add_i32_e32 v12, vcc, v14, v17
+; CGP-NEXT:    v_add_i32_e32 v14, vcc, 1, v9
+; CGP-NEXT:    v_addc_u32_e32 v17, vcc, 0, v16, vcc
+; CGP-NEXT:    v_add_i32_e32 v8, vcc, v8, v13
+; CGP-NEXT:    v_add_i32_e32 v12, vcc, v12, v15
+; CGP-NEXT:    v_add_i32_e32 v13, vcc, 1, v18
+; CGP-NEXT:    v_addc_u32_e32 v15, vcc, 0, v19, vcc
+; CGP-NEXT:    v_sub_i32_e32 v2, vcc, v2, v10
+; CGP-NEXT:    v_subb_u32_e64 v10, s[4:5], v3, v8, vcc
+; CGP-NEXT:    v_sub_i32_e64 v3, s[4:5], v3, v8
+; CGP-NEXT:    v_cmp_le_u32_e64 s[4:5], s10, v2
+; CGP-NEXT:    v_cndmask_b32_e64 v8, 0, -1, s[4:5]
+; CGP-NEXT:    v_sub_i32_e64 v0, s[4:5], v0, v11
+; CGP-NEXT:    v_subb_u32_e64 v11, s[6:7], v1, v12, s[4:5]
+; CGP-NEXT:    v_sub_i32_e64 v1, s[6:7], v1, v12
+; CGP-NEXT:    v_cmp_le_u32_e64 s[6:7], 0, v10
+; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, -1, s[6:7]
+; CGP-NEXT:    v_cmp_eq_u32_e64 s[6:7], 0, v10
+; CGP-NEXT:    v_cmp_le_u32_e64 s[8:9], s10, v0
+; CGP-NEXT:    v_cndmask_b32_e64 v10, 0, -1, s[8:9]
+; CGP-NEXT:    v_subbrev_u32_e32 v3, vcc, 0, v3, vcc
+; CGP-NEXT:    v_cndmask_b32_e64 v8, v12, v8, s[6:7]
+; CGP-NEXT:    v_cmp_le_u32_e32 vcc, 0, v11
+; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, -1, vcc
+; CGP-NEXT:    v_subbrev_u32_e64 v1, vcc, 0, v1, s[4:5]
+; CGP-NEXT:    v_subrev_i32_e32 v2, vcc, s10, v2
+; CGP-NEXT:    v_subbrev_u32_e32 v3, vcc, 0, v3, vcc
+; CGP-NEXT:    v_cmp_le_u32_e32 vcc, s10, v2
+; CGP-NEXT:    v_cndmask_b32_e64 v2, 0, -1, vcc
+; CGP-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v11
+; CGP-NEXT:    v_cndmask_b32_e32 v10, v12, v10, vcc
+; CGP-NEXT:    v_subrev_i32_e32 v0, vcc, s10, v0
+; CGP-NEXT:    v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
+; CGP-NEXT:    v_cmp_le_u32_e32 vcc, s10, v0
+; CGP-NEXT:    v_cndmask_b32_e64 v0, 0, -1, vcc
+; CGP-NEXT:    v_cmp_le_u32_e32 vcc, 0, v3
+; CGP-NEXT:    v_cndmask_b32_e64 v11, 0, -1, vcc
+; CGP-NEXT:    v_cmp_le_u32_e32 vcc, 0, v1
+; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, -1, vcc
+; CGP-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v3
+; CGP-NEXT:    v_cndmask_b32_e32 v2, v11, v2, vcc
+; CGP-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v1
+; CGP-NEXT:    v_cndmask_b32_e32 v0, v12, v0, vcc
+; CGP-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v2
+; CGP-NEXT:    v_cndmask_b32_e32 v1, v9, v14, vcc
+; CGP-NEXT:    v_cmp_ne_u32_e64 s[4:5], 0, v0
+; CGP-NEXT:    v_cndmask_b32_e64 v0, v18, v13, s[4:5]
+; CGP-NEXT:    v_cndmask_b32_e32 v3, v16, v17, vcc
+; CGP-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v8
+; CGP-NEXT:    v_cndmask_b32_e32 v2, v4, v1, vcc
+; CGP-NEXT:    v_cndmask_b32_e64 v1, v19, v15, s[4:5]
+; CGP-NEXT:    v_cmp_ne_u32_e64 s[4:5], 0, v10
+; CGP-NEXT:    v_cndmask_b32_e64 v0, v5, v0, s[4:5]
+; CGP-NEXT:    v_cndmask_b32_e64 v1, v7, v1, s[4:5]
+; CGP-NEXT:    v_cndmask_b32_e32 v3, v6, v3, vcc
+; CGP-NEXT:    s_setpc_b64 s[30:31]
+  %result = udiv <2 x i64> %num, <i64 1235195, i64 1235195>
+  ret <2 x i64> %result
+}
+
+define i64 @v_udiv_i64_pow2_shl_denom(i64 %x, i64 %y) {
+; CHECK-LABEL: v_udiv_i64_pow2_shl_denom:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT:    s_movk_i32 s4, 0x1000
+; CHECK-NEXT:    s_mov_b32 s5, 0
+; CHECK-NEXT:    v_mov_b32_e32 v6, 0
+; CHECK-NEXT:    v_lshl_b64 v[4:5], s[4:5], v2
+; CHECK-NEXT:    v_or_b32_e32 v7, v1, v5
+; CHECK-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[6:7]
+; CHECK-NEXT:    v_cmp_ne_u32_e64 s[4:5], 0, 1
+; CHECK-NEXT:    s_xor_b64 s[4:5], vcc, s[4:5]
+; CHECK-NEXT:    ; implicit-def: $vgpr2_vgpr3
+; CHECK-NEXT:    s_and_saveexec_b64 s[6:7], s[4:5]
+; CHECK-NEXT:    s_xor_b64 s[6:7], exec, s[6:7]
+; CHECK-NEXT:    s_cbranch_execnz BB7_2
+; CHECK-NEXT:  ; %bb.1:
+; CHECK-NEXT:    v_cvt_f32_u32_e32 v2, v4
+; CHECK-NEXT:    v_cvt_f32_u32_e32 v3, v5
+; CHECK-NEXT:    v_sub_i32_e32 v6, vcc, 0, v4
+; CHECK-NEXT:    v_subb_u32_e32 v7, vcc, 0, v5, vcc
+; CHECK-NEXT:    v_mac_f32_e32 v2, 0x4f800000, v3
+; CHECK-NEXT:    v_rcp_iflag_f32_e32 v2, v2
+; CHECK-NEXT:    v_mul_f32_e32 v2, 0x5f7ffffc, v2
+; CHECK-NEXT:    v_mul_f32_e32 v3, 0x2f800000, v2
+; CHECK-NEXT:    v_trunc_f32_e32 v3, v3
+; CHECK-NEXT:    v_mac_f32_e32 v2, 0xcf800000, v3
+; CHECK-NEXT:    v_cvt_u32_f32_e32 v3, v3
+; CHECK-NEXT:    v_cvt_u32_f32_e32 v2, v2
+; CHECK-NEXT:    v_mul_lo_u32 v8, v6, v3
+; CHECK-NEXT:    v_mul_lo_u32 v9, v6, v2
+; CHECK-NEXT:    v_mul_lo_u32 v10, v7, v2
+; CHECK-NEXT:    v_mul_hi_u32 v11, v6, v2
+; CHECK-NEXT:    v_add_i32_e32 v8, vcc, v10, v8
+; CHECK-NEXT:    v_mul_lo_u32 v10, v3, v9
+; CHECK-NEXT:    v_mul_hi_u32 v12, v2, v9
+; CHECK-NEXT:    v_mul_hi_u32 v9, v3, v9
+; CHECK-NEXT:    v_add_i32_e32 v8, vcc, v8, v11
+; CHECK-NEXT:    v_mul_lo_u32 v11, v2, v8
+; CHECK-NEXT:    v_mul_lo_u32 v13, v3, v8
+; CHECK-NEXT:    v_mul_hi_u32 v14, v2, v8
+; CHECK-NEXT:    v_mul_hi_u32 v8, v3, v8
+; CHECK-NEXT:    v_add_i32_e32 v10, vcc, v10, v11
+; CHECK-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v9, vcc, v13, v9
+; CHECK-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v10, vcc, v10, v12
+; CHECK-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v9, vcc, v9, v14
+; CHECK-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v10, vcc, v11, v10
+; CHECK-NEXT:    v_add_i32_e32 v11, vcc, v13, v12
+; CHECK-NEXT:    v_add_i32_e32 v9, vcc, v9, v10
+; CHECK-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v10, vcc, v11, v10
+; CHECK-NEXT:    v_add_i32_e32 v8, vcc, v8, v10
+; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v9
+; CHECK-NEXT:    v_addc_u32_e64 v9, s[4:5], v3, v8, vcc
+; CHECK-NEXT:    v_add_i32_e64 v3, s[4:5], v3, v8
+; CHECK-NEXT:    v_mul_lo_u32 v8, v6, v2
+; CHECK-NEXT:    v_mul_lo_u32 v7, v7, v2
+; CHECK-NEXT:    v_mul_hi_u32 v10, v6, v2
+; CHECK-NEXT:    v_mul_lo_u32 v6, v6, v9
+; CHECK-NEXT:    v_mul_lo_u32 v11, v9, v8
+; CHECK-NEXT:    v_mul_hi_u32 v12, v2, v8
+; CHECK-NEXT:    v_mul_hi_u32 v8, v9, v8
+; CHECK-NEXT:    v_add_i32_e64 v6, s[4:5], v7, v6
+; CHECK-NEXT:    v_add_i32_e64 v6, s[4:5], v6, v10
+; CHECK-NEXT:    v_mul_lo_u32 v7, v2, v6
+; CHECK-NEXT:    v_mul_lo_u32 v10, v9, v6
+; CHECK-NEXT:    v_mul_hi_u32 v13, v2, v6
+; CHECK-NEXT:    v_mul_hi_u32 v6, v9, v6
+; CHECK-NEXT:    v_add_i32_e64 v7, s[4:5], v11, v7
+; CHECK-NEXT:    v_cndmask_b32_e64 v9, 0, 1, s[4:5]
+; CHECK-NEXT:    v_add_i32_e64 v8, s[4:5], v10, v8
+; CHECK-NEXT:    v_cndmask_b32_e64 v10, 0, 1, s[4:5]
+; CHECK-NEXT:    v_add_i32_e64 v7, s[4:5], v7, v12
+; CHECK-NEXT:    v_cndmask_b32_e64 v7, 0, 1, s[4:5]
+; CHECK-NEXT:    v_add_i32_e64 v8, s[4:5], v8, v13
+; CHECK-NEXT:    v_cndmask_b32_e64 v11, 0, 1, s[4:5]
+; CHECK-NEXT:    v_add_i32_e64 v7, s[4:5], v9, v7
+; CHECK-NEXT:    v_add_i32_e64 v9, s[4:5], v10, v11
+; CHECK-NEXT:    v_add_i32_e64 v7, s[4:5], v8, v7
+; CHECK-NEXT:    v_cndmask_b32_e64 v8, 0, 1, s[4:5]
+; CHECK-NEXT:    v_add_i32_e64 v8, s[4:5], v9, v8
+; CHECK-NEXT:    v_add_i32_e64 v6, s[4:5], v6, v8
+; CHECK-NEXT:    v_addc_u32_e32 v3, vcc, v3, v6, vcc
+; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v7
+; CHECK-NEXT:    v_addc_u32_e32 v3, vcc, 0, v3, vcc
+; CHECK-NEXT:    v_mul_lo_u32 v6, v1, v2
+; CHECK-NEXT:    v_mul_hi_u32 v7, v0, v2
+; CHECK-NEXT:    v_mul_hi_u32 v2, v1, v2
+; CHECK-NEXT:    v_mul_lo_u32 v8, v0, v3
+; CHECK-NEXT:    v_mul_lo_u32 v9, v1, v3
+; CHECK-NEXT:    v_mul_hi_u32 v10, v0, v3
+; CHECK-NEXT:    v_mul_hi_u32 v3, v1, v3
+; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v6, v8
+; CHECK-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v9, v2
+; CHECK-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v6, v7
+; CHECK-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v10
+; CHECK-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v8, v6
+; CHECK-NEXT:    v_add_i32_e32 v7, vcc, v9, v7
+; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v6
+; CHECK-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v7, v6
+; CHECK-NEXT:    v_mul_lo_u32 v7, v4, v2
+; CHECK-NEXT:    v_mul_lo_u32 v8, v5, v2
+; CHECK-NEXT:    v_mul_hi_u32 v9, v4, v2
+; CHECK-NEXT:    v_add_i32_e32 v3, vcc, v3, v6
+; CHECK-NEXT:    v_mul_lo_u32 v6, v4, v3
+; CHECK-NEXT:    v_add_i32_e32 v10, vcc, 1, v2
+; CHECK-NEXT:    v_addc_u32_e32 v11, vcc, 0, v3, vcc
+; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v8, v6
+; CHECK-NEXT:    v_add_i32_e32 v8, vcc, 1, v10
+; CHECK-NEXT:    v_addc_u32_e32 v12, vcc, 0, v11, vcc
+; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v6, v9
+; CHECK-NEXT:    v_sub_i32_e32 v7, vcc, v0, v7
+; CHECK-NEXT:    v_subb_u32_e64 v9, s[4:5], v1, v6, vcc
+; CHECK-NEXT:    v_sub_i32_e64 v1, s[4:5], v1, v6
+; CHECK-NEXT:    v_cmp_ge_u32_e64 s[4:5], v7, v4
+; CHECK-NEXT:    v_cndmask_b32_e64 v6, 0, -1, s[4:5]
+; CHECK-NEXT:    v_cmp_ge_u32_e64 s[4:5], v9, v5
+; CHECK-NEXT:    v_cndmask_b32_e64 v13, 0, -1, s[4:5]
+; CHECK-NEXT:    v_subb_u32_e32 v1, vcc, v1, v5, vcc
+; CHECK-NEXT:    v_cmp_eq_u32_e32 vcc, v9, v5
+; CHECK-NEXT:    v_cndmask_b32_e32 v6, v13, v6, vcc
+; CHECK-NEXT:    v_sub_i32_e32 v7, vcc, v7, v4
+; CHECK-NEXT:    v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
+; CHECK-NEXT:    v_cmp_ge_u32_e32 vcc, v7, v4
+; CHECK-NEXT:    v_cndmask_b32_e64 v7, 0, -1, vcc
+; CHECK-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v5
+; CHECK-NEXT:    v_cndmask_b32_e64 v9, 0, -1, vcc
+; CHECK-NEXT:    v_cmp_eq_u32_e32 vcc, v1, v5
+; CHECK-NEXT:    v_cndmask_b32_e32 v1, v9, v7, vcc
+; CHECK-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v1
+; CHECK-NEXT:    v_cndmask_b32_e32 v1, v10, v8, vcc
+; CHECK-NEXT:    v_cndmask_b32_e32 v5, v11, v12, vcc
+; CHECK-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v6
+; CHECK-NEXT:    v_cndmask_b32_e32 v2, v2, v1, vcc
+; CHECK-NEXT:    v_cndmask_b32_e32 v3, v3, v5, vcc
+; CHECK-NEXT:  BB7_2: ; %Flow
+; CHECK-NEXT:    s_or_saveexec_b64 s[6:7], s[6:7]
+; CHECK-NEXT:    s_xor_b64 exec, exec, s[6:7]
+; CHECK-NEXT:    s_cbranch_execz BB7_4
+; CHECK-NEXT:  ; %bb.3:
+; CHECK-NEXT:    v_cvt_f32_u32_e32 v1, v4
+; CHECK-NEXT:    v_rcp_iflag_f32_e32 v1, v1
+; CHECK-NEXT:    v_mul_f32_e32 v1, 0x4f800000, v1
+; CHECK-NEXT:    v_cvt_u32_f32_e32 v1, v1
+; CHECK-NEXT:    v_mul_lo_u32 v2, v1, v4
+; CHECK-NEXT:    v_mul_hi_u32 v3, v1, v4
+; CHECK-NEXT:    v_sub_i32_e32 v5, vcc, 0, v2
+; CHECK-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v3
+; CHECK-NEXT:    v_cndmask_b32_e32 v2, v2, v5, vcc
+; CHECK-NEXT:    v_mul_hi_u32 v2, v2, v1
+; CHECK-NEXT:    v_add_i32_e64 v3, s[4:5], v1, v2
+; CHECK-NEXT:    v_sub_i32_e64 v1, s[4:5], v1, v2
+; CHECK-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
+; CHECK-NEXT:    v_mul_hi_u32 v1, v1, v0
+; CHECK-NEXT:    v_mul_lo_u32 v2, v1, v4
+; CHECK-NEXT:    v_add_i32_e32 v3, vcc, 1, v1
+; CHECK-NEXT:    v_subrev_i32_e32 v5, vcc, 1, v1
+; CHECK-NEXT:    v_sub_i32_e32 v6, vcc, v0, v2
+; CHECK-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v2
+; CHECK-NEXT:    v_cmp_ge_u32_e64 s[4:5], v6, v4
+; CHECK-NEXT:    s_and_b64 s[4:5], s[4:5], vcc
+; CHECK-NEXT:    v_cndmask_b32_e64 v0, v3, v1, s[4:5]
+; CHECK-NEXT:    v_cndmask_b32_e32 v2, v5, v0, vcc
+; CHECK-NEXT:    v_mov_b32_e32 v3, 0
+; CHECK-NEXT:  BB7_4:
+; CHECK-NEXT:    s_or_b64 exec, exec, s[6:7]
+; CHECK-NEXT:    v_mov_b32_e32 v0, v2
+; CHECK-NEXT:    v_mov_b32_e32 v1, v3
+; CHECK-NEXT:    s_setpc_b64 s[30:31]
+  %shl.y = shl i64 4096, %y
+  %r = udiv i64 %x, %shl.y
+  ret i64 %r
+}
+
+define <2 x i64> @v_udiv_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) {
+; GISEL-LABEL: v_udiv_v2i64_pow2_shl_denom:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-NEXT:    s_movk_i32 s4, 0x1000
+; GISEL-NEXT:    s_mov_b32 s5, 0
+; GISEL-NEXT:    v_lshl_b64 v[4:5], s[4:5], v4
+; GISEL-NEXT:    v_lshl_b64 v[6:7], s[4:5], v6
+; GISEL-NEXT:    v_cvt_f32_u32_e32 v8, v4
+; GISEL-NEXT:    v_cvt_f32_u32_e32 v9, v5
+; GISEL-NEXT:    v_mac_f32_e32 v8, 0x4f800000, v9
+; GISEL-NEXT:    v_rcp_iflag_f32_e32 v8, v8
+; GISEL-NEXT:    v_mul_f32_e32 v8, 0x5f7ffffc, v8
+; GISEL-NEXT:    v_mul_f32_e32 v9, 0x2f800000, v8
+; GISEL-NEXT:    v_trunc_f32_e32 v9, v9
+; GISEL-NEXT:    v_mac_f32_e32 v8, 0xcf800000, v9
+; GISEL-NEXT:    v_cvt_u32_f32_e32 v8, v8
+; GISEL-NEXT:    v_cvt_u32_f32_e32 v9, v9
+; GISEL-NEXT:    v_sub_i32_e32 v10, vcc, 0, v4
+; GISEL-NEXT:    v_subb_u32_e32 v11, vcc, 0, v5, vcc
+; GISEL-NEXT:    v_mul_lo_u32 v12, v10, v8
+; GISEL-NEXT:    v_mul_lo_u32 v13, v11, v8
+; GISEL-NEXT:    v_mul_lo_u32 v14, v10, v9
+; GISEL-NEXT:    v_mul_hi_u32 v15, v10, v8
+; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v13, v14
+; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v13, v15
+; GISEL-NEXT:    v_mul_lo_u32 v14, v9, v12
+; GISEL-NEXT:    v_mul_lo_u32 v15, v8, v13
+; GISEL-NEXT:    v_mul_hi_u32 v16, v8, v12
+; GISEL-NEXT:    v_add_i32_e32 v14, vcc, v14, v15
+; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v14, vcc, v14, v16
+; GISEL-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v14, vcc, v15, v14
+; GISEL-NEXT:    v_mul_lo_u32 v15, v9, v13
+; GISEL-NEXT:    v_mul_hi_u32 v12, v9, v12
+; GISEL-NEXT:    v_mul_hi_u32 v16, v8, v13
+; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v15, v12
+; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v12, v16
+; GISEL-NEXT:    v_cndmask_b32_e64 v16, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v15, vcc, v15, v16
+; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v12, v14
+; GISEL-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v14, vcc, v15, v14
+; GISEL-NEXT:    v_mul_hi_u32 v13, v9, v13
+; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v13, v14
+; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v12
+; GISEL-NEXT:    v_addc_u32_e64 v12, s[4:5], v9, v13, vcc
+; GISEL-NEXT:    v_add_i32_e64 v9, s[4:5], v9, v13
+; GISEL-NEXT:    v_mul_lo_u32 v13, v10, v8
+; GISEL-NEXT:    v_mul_lo_u32 v11, v11, v8
+; GISEL-NEXT:    v_mul_lo_u32 v14, v10, v12
+; GISEL-NEXT:    v_mul_hi_u32 v10, v10, v8
+; GISEL-NEXT:    v_add_i32_e64 v11, s[4:5], v11, v14
+; GISEL-NEXT:    v_add_i32_e64 v10, s[4:5], v11, v10
+; GISEL-NEXT:    v_mul_lo_u32 v11, v12, v13
+; GISEL-NEXT:    v_mul_lo_u32 v14, v8, v10
+; GISEL-NEXT:    v_mul_hi_u32 v15, v8, v13
+; GISEL-NEXT:    v_add_i32_e64 v11, s[4:5], v11, v14
+; GISEL-NEXT:    v_cndmask_b32_e64 v14, 0, 1, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v11, s[4:5], v11, v15
+; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v11, s[4:5], v14, v11
+; GISEL-NEXT:    v_mul_lo_u32 v14, v12, v10
+; GISEL-NEXT:    v_mul_hi_u32 v13, v12, v13
+; GISEL-NEXT:    v_mul_hi_u32 v15, v8, v10
+; GISEL-NEXT:    v_add_i32_e64 v13, s[4:5], v14, v13
+; GISEL-NEXT:    v_cndmask_b32_e64 v14, 0, 1, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v13, s[4:5], v13, v15
+; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, 1, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v14, s[4:5], v14, v15
+; GISEL-NEXT:    v_add_i32_e64 v11, s[4:5], v13, v11
+; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, 1, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v13, s[4:5], v14, v13
+; GISEL-NEXT:    v_mul_hi_u32 v10, v12, v10
+; GISEL-NEXT:    v_add_i32_e64 v10, s[4:5], v10, v13
+; GISEL-NEXT:    v_add_i32_e64 v8, s[4:5], v8, v11
+; GISEL-NEXT:    v_addc_u32_e32 v9, vcc, v9, v10, vcc
+; GISEL-NEXT:    v_addc_u32_e64 v9, vcc, 0, v9, s[4:5]
+; GISEL-NEXT:    v_mul_lo_u32 v10, v1, v8
+; GISEL-NEXT:    v_mul_lo_u32 v11, v0, v9
+; GISEL-NEXT:    v_mul_hi_u32 v12, v0, v8
+; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v10, v11
+; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v10, v12
+; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v11, v10
+; GISEL-NEXT:    v_mul_lo_u32 v11, v1, v9
+; GISEL-NEXT:    v_mul_hi_u32 v8, v1, v8
+; GISEL-NEXT:    v_mul_hi_u32 v12, v0, v9
+; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v11, v8
+; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v12
+; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v11, v12
+; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v10
+; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v11, v10
+; GISEL-NEXT:    v_mul_hi_u32 v9, v1, v9
+; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v9, v10
+; GISEL-NEXT:    v_mul_lo_u32 v10, v4, v8
+; GISEL-NEXT:    v_mul_lo_u32 v11, v5, v8
+; GISEL-NEXT:    v_mul_lo_u32 v12, v4, v9
+; GISEL-NEXT:    v_mul_hi_u32 v13, v4, v8
+; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v11, v12
+; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v11, v13
+; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v10
+; GISEL-NEXT:    v_subb_u32_e64 v10, s[4:5], v1, v11, vcc
+; GISEL-NEXT:    v_sub_i32_e64 v1, s[4:5], v1, v11
+; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v10, v5
+; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, -1, s[4:5]
+; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v0, v4
+; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, -1, s[4:5]
+; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], v10, v5
+; GISEL-NEXT:    v_cndmask_b32_e64 v10, v11, v12, s[4:5]
+; GISEL-NEXT:    v_sub_i32_e64 v0, s[4:5], v0, v4
+; GISEL-NEXT:    v_subb_u32_e32 v1, vcc, v1, v5, vcc
+; GISEL-NEXT:    v_subbrev_u32_e64 v1, vcc, 0, v1, s[4:5]
+; GISEL-NEXT:    v_add_i32_e32 v11, vcc, 1, v8
+; GISEL-NEXT:    v_addc_u32_e32 v12, vcc, 0, v9, vcc
+; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v5
+; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, -1, vcc
+; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v4
+; GISEL-NEXT:    v_cndmask_b32_e64 v0, 0, -1, vcc
+; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v1, v5
+; GISEL-NEXT:    v_cndmask_b32_e32 v0, v13, v0, vcc
+; GISEL-NEXT:    v_add_i32_e32 v1, vcc, 1, v11
+; GISEL-NEXT:    v_addc_u32_e32 v4, vcc, 0, v12, vcc
+; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
+; GISEL-NEXT:    v_cndmask_b32_e32 v0, v11, v1, vcc
+; GISEL-NEXT:    v_cndmask_b32_e32 v1, v12, v4, vcc
+; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v10
+; GISEL-NEXT:    v_cndmask_b32_e32 v0, v8, v0, vcc
+; GISEL-NEXT:    v_cndmask_b32_e32 v1, v9, v1, vcc
+; GISEL-NEXT:    v_cvt_f32_u32_e32 v4, v6
+; GISEL-NEXT:    v_cvt_f32_u32_e32 v5, v7
+; GISEL-NEXT:    v_mac_f32_e32 v4, 0x4f800000, v5
+; GISEL-NEXT:    v_rcp_iflag_f32_e32 v4, v4
+; GISEL-NEXT:    v_mul_f32_e32 v4, 0x5f7ffffc, v4
+; GISEL-NEXT:    v_mul_f32_e32 v5, 0x2f800000, v4
+; GISEL-NEXT:    v_trunc_f32_e32 v5, v5
+; GISEL-NEXT:    v_mac_f32_e32 v4, 0xcf800000, v5
+; GISEL-NEXT:    v_cvt_u32_f32_e32 v4, v4
+; GISEL-NEXT:    v_cvt_u32_f32_e32 v5, v5
+; GISEL-NEXT:    v_sub_i32_e32 v8, vcc, 0, v6
+; GISEL-NEXT:    v_subb_u32_e32 v9, vcc, 0, v7, vcc
+; GISEL-NEXT:    v_mul_lo_u32 v10, v8, v4
+; GISEL-NEXT:    v_mul_lo_u32 v11, v9, v4
+; GISEL-NEXT:    v_mul_lo_u32 v12, v8, v5
+; GISEL-NEXT:    v_mul_hi_u32 v13, v8, v4
+; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v11, v12
+; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v11, v13
+; GISEL-NEXT:    v_mul_lo_u32 v12, v5, v10
+; GISEL-NEXT:    v_mul_lo_u32 v13, v4, v11
+; GISEL-NEXT:    v_mul_hi_u32 v14, v4, v10
+; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v12, v13
+; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v12, v14
+; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v13, v12
+; GISEL-NEXT:    v_mul_lo_u32 v13, v5, v11
+; GISEL-NEXT:    v_mul_hi_u32 v10, v5, v10
+; GISEL-NEXT:    v_mul_hi_u32 v14, v4, v11
+; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v13, v10
+; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v10, v14
+; GISEL-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v13, v14
+; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v10, v12
+; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v13, v12
+; GISEL-NEXT:    v_mul_hi_u32 v11, v5, v11
+; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v11, v12
+; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v10
+; GISEL-NEXT:    v_addc_u32_e64 v10, s[4:5], v5, v11, vcc
+; GISEL-NEXT:    v_add_i32_e64 v5, s[4:5], v5, v11
+; GISEL-NEXT:    v_mul_lo_u32 v11, v8, v4
+; GISEL-NEXT:    v_mul_lo_u32 v9, v9, v4
+; GISEL-NEXT:    v_mul_lo_u32 v12, v8, v10
+; GISEL-NEXT:    v_mul_hi_u32 v8, v8, v4
+; GISEL-NEXT:    v_add_i32_e64 v9, s[4:5], v9, v12
+; GISEL-NEXT:    v_add_i32_e64 v8, s[4:5], v9, v8
+; GISEL-NEXT:    v_mul_lo_u32 v9, v10, v11
+; GISEL-NEXT:    v_mul_lo_u32 v12, v4, v8
+; GISEL-NEXT:    v_mul_hi_u32 v13, v4, v11
+; GISEL-NEXT:    v_add_i32_e64 v9, s[4:5], v9, v12
+; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v9, s[4:5], v9, v13
+; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, 1, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v9, s[4:5], v12, v9
+; GISEL-NEXT:    v_mul_lo_u32 v12, v10, v8
+; GISEL-NEXT:    v_mul_hi_u32 v11, v10, v11
+; GISEL-NEXT:    v_mul_hi_u32 v13, v4, v8
+; GISEL-NEXT:    v_add_i32_e64 v11, s[4:5], v12, v11
+; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v11, s[4:5], v11, v13
+; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, 1, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v12, s[4:5], v12, v13
+; GISEL-NEXT:    v_add_i32_e64 v9, s[4:5], v11, v9
+; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v11, s[4:5], v12, v11
+; GISEL-NEXT:    v_mul_hi_u32 v8, v10, v8
+; GISEL-NEXT:    v_add_i32_e64 v8, s[4:5], v8, v11
+; GISEL-NEXT:    v_add_i32_e64 v4, s[4:5], v4, v9
+; GISEL-NEXT:    v_addc_u32_e32 v5, vcc, v5, v8, vcc
+; GISEL-NEXT:    v_addc_u32_e64 v5, vcc, 0, v5, s[4:5]
+; GISEL-NEXT:    v_mul_lo_u32 v8, v3, v4
+; GISEL-NEXT:    v_mul_lo_u32 v9, v2, v5
+; GISEL-NEXT:    v_mul_hi_u32 v10, v2, v4
+; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v9
+; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v10
+; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v9, v8
+; GISEL-NEXT:    v_mul_lo_u32 v9, v3, v5
+; GISEL-NEXT:    v_mul_hi_u32 v4, v3, v4
+; GISEL-NEXT:    v_mul_hi_u32 v10, v2, v5
+; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v9, v4
+; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v10
+; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v9, v10
+; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v8
+; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v9, v8
+; GISEL-NEXT:    v_mul_hi_u32 v5, v3, v5
+; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v8
+; GISEL-NEXT:    v_mul_lo_u32 v8, v6, v4
+; GISEL-NEXT:    v_mul_lo_u32 v9, v7, v4
+; GISEL-NEXT:    v_mul_lo_u32 v10, v6, v5
+; GISEL-NEXT:    v_mul_hi_u32 v11, v6, v4
+; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v9, v10
+; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v9, v11
+; GISEL-NEXT:    v_sub_i32_e32 v2, vcc, v2, v8
+; GISEL-NEXT:    v_subb_u32_e64 v8, s[4:5], v3, v9, vcc
+; GISEL-NEXT:    v_sub_i32_e64 v3, s[4:5], v3, v9
+; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v8, v7
+; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, -1, s[4:5]
+; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v2, v6
+; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, -1, s[4:5]
+; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], v8, v7
+; GISEL-NEXT:    v_cndmask_b32_e64 v8, v9, v10, s[4:5]
+; GISEL-NEXT:    v_sub_i32_e64 v2, s[4:5], v2, v6
+; GISEL-NEXT:    v_subb_u32_e32 v3, vcc, v3, v7, vcc
+; GISEL-NEXT:    v_subbrev_u32_e64 v3, vcc, 0, v3, s[4:5]
+; GISEL-NEXT:    v_add_i32_e32 v9, vcc, 1, v4
+; GISEL-NEXT:    v_addc_u32_e32 v10, vcc, 0, v5, vcc
+; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v3, v7
+; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, -1, vcc
+; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v2, v6
+; GISEL-NEXT:    v_cndmask_b32_e64 v2, 0, -1, vcc
+; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v3, v7
+; GISEL-NEXT:    v_cndmask_b32_e32 v2, v11, v2, vcc
+; GISEL-NEXT:    v_add_i32_e32 v3, vcc, 1, v9
+; GISEL-NEXT:    v_addc_u32_e32 v6, vcc, 0, v10, vcc
+; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v2
+; GISEL-NEXT:    v_cndmask_b32_e32 v2, v9, v3, vcc
+; GISEL-NEXT:    v_cndmask_b32_e32 v3, v10, v6, vcc
+; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v8
+; GISEL-NEXT:    v_cndmask_b32_e32 v2, v4, v2, vcc
+; GISEL-NEXT:    v_cndmask_b32_e32 v3, v5, v3, vcc
+; GISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; CGP-LABEL: v_udiv_v2i64_pow2_shl_denom:
+; CGP:       ; %bb.0:
+; CGP-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CGP-NEXT:    v_mov_b32_e32 v5, v0
+; CGP-NEXT:    v_mov_b32_e32 v7, v1
+; CGP-NEXT:    s_movk_i32 s4, 0x1000
+; CGP-NEXT:    s_mov_b32 s5, 0
+; CGP-NEXT:    v_mov_b32_e32 v0, 0
+; CGP-NEXT:    v_lshl_b64 v[10:11], s[4:5], v4
+; CGP-NEXT:    v_lshl_b64 v[8:9], s[4:5], v6
+; CGP-NEXT:    v_or_b32_e32 v1, v7, v11
+; CGP-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[0:1]
+; CGP-NEXT:    v_cmp_ne_u32_e64 s[4:5], 0, 1
+; CGP-NEXT:    s_xor_b64 s[4:5], vcc, s[4:5]
+; CGP-NEXT:    ; implicit-def: $vgpr0_vgpr1
+; CGP-NEXT:    s_and_saveexec_b64 s[6:7], s[4:5]
+; CGP-NEXT:    s_xor_b64 s[6:7], exec, s[6:7]
+; CGP-NEXT:    s_cbranch_execnz BB8_2
+; CGP-NEXT:  ; %bb.1:
+; CGP-NEXT:    v_cvt_f32_u32_e32 v0, v10
+; CGP-NEXT:    v_cvt_f32_u32_e32 v1, v11
+; CGP-NEXT:    v_sub_i32_e32 v4, vcc, 0, v10
+; CGP-NEXT:    v_subb_u32_e32 v6, vcc, 0, v11, vcc
+; CGP-NEXT:    v_mac_f32_e32 v0, 0x4f800000, v1
+; CGP-NEXT:    v_rcp_iflag_f32_e32 v0, v0
+; CGP-NEXT:    v_mul_f32_e32 v0, 0x5f7ffffc, v0
+; CGP-NEXT:    v_mul_f32_e32 v1, 0x2f800000, v0
+; CGP-NEXT:    v_trunc_f32_e32 v1, v1
+; CGP-NEXT:    v_mac_f32_e32 v0, 0xcf800000, v1
+; CGP-NEXT:    v_cvt_u32_f32_e32 v1, v1
+; CGP-NEXT:    v_cvt_u32_f32_e32 v0, v0
+; CGP-NEXT:    v_mul_lo_u32 v12, v4, v1
+; CGP-NEXT:    v_mul_lo_u32 v13, v4, v0
+; CGP-NEXT:    v_mul_lo_u32 v14, v6, v0
+; CGP-NEXT:    v_mul_hi_u32 v15, v4, v0
+; CGP-NEXT:    v_add_i32_e32 v12, vcc, v14, v12
+; CGP-NEXT:    v_mul_lo_u32 v14, v1, v13
+; CGP-NEXT:    v_mul_hi_u32 v16, v0, v13
+; CGP-NEXT:    v_mul_hi_u32 v13, v1, v13
+; CGP-NEXT:    v_add_i32_e32 v12, vcc, v12, v15
+; CGP-NEXT:    v_mul_lo_u32 v15, v0, v12
+; CGP-NEXT:    v_mul_lo_u32 v17, v1, v12
+; CGP-NEXT:    v_mul_hi_u32 v18, v0, v12
+; CGP-NEXT:    v_mul_hi_u32 v12, v1, v12
+; CGP-NEXT:    v_add_i32_e32 v14, vcc, v14, v15
+; CGP-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v13, vcc, v17, v13
+; CGP-NEXT:    v_cndmask_b32_e64 v17, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v14, vcc, v14, v16
+; CGP-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v13, vcc, v13, v18
+; CGP-NEXT:    v_cndmask_b32_e64 v16, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v14, vcc, v15, v14
+; CGP-NEXT:    v_add_i32_e32 v15, vcc, v17, v16
+; CGP-NEXT:    v_add_i32_e32 v13, vcc, v13, v14
+; CGP-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v14, vcc, v15, v14
+; CGP-NEXT:    v_add_i32_e32 v12, vcc, v12, v14
+; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v13
+; CGP-NEXT:    v_addc_u32_e64 v13, s[4:5], v1, v12, vcc
+; CGP-NEXT:    v_add_i32_e64 v1, s[4:5], v1, v12
+; CGP-NEXT:    v_mul_lo_u32 v12, v4, v0
+; CGP-NEXT:    v_mul_lo_u32 v6, v6, v0
+; CGP-NEXT:    v_mul_hi_u32 v14, v4, v0
+; CGP-NEXT:    v_mul_lo_u32 v4, v4, v13
+; CGP-NEXT:    v_mul_lo_u32 v15, v13, v12
+; CGP-NEXT:    v_mul_hi_u32 v16, v0, v12
+; CGP-NEXT:    v_mul_hi_u32 v12, v13, v12
+; CGP-NEXT:    v_add_i32_e64 v4, s[4:5], v6, v4
+; CGP-NEXT:    v_add_i32_e64 v4, s[4:5], v4, v14
+; CGP-NEXT:    v_mul_lo_u32 v6, v0, v4
+; CGP-NEXT:    v_mul_lo_u32 v14, v13, v4
+; CGP-NEXT:    v_mul_hi_u32 v17, v0, v4
+; CGP-NEXT:    v_mul_hi_u32 v4, v13, v4
+; CGP-NEXT:    v_add_i32_e64 v6, s[4:5], v15, v6
+; CGP-NEXT:    v_cndmask_b32_e64 v13, 0, 1, s[4:5]
+; CGP-NEXT:    v_add_i32_e64 v12, s[4:5], v14, v12
+; CGP-NEXT:    v_cndmask_b32_e64 v14, 0, 1, s[4:5]
+; CGP-NEXT:    v_add_i32_e64 v6, s[4:5], v6, v16
+; CGP-NEXT:    v_cndmask_b32_e64 v6, 0, 1, s[4:5]
+; CGP-NEXT:    v_add_i32_e64 v12, s[4:5], v12, v17
+; CGP-NEXT:    v_cndmask_b32_e64 v15, 0, 1, s[4:5]
+; CGP-NEXT:    v_add_i32_e64 v6, s[4:5], v13, v6
+; CGP-NEXT:    v_add_i32_e64 v13, s[4:5], v14, v15
+; CGP-NEXT:    v_add_i32_e64 v6, s[4:5], v12, v6
+; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, 1, s[4:5]
+; CGP-NEXT:    v_add_i32_e64 v12, s[4:5], v13, v12
+; CGP-NEXT:    v_add_i32_e64 v4, s[4:5], v4, v12
+; CGP-NEXT:    v_addc_u32_e32 v1, vcc, v1, v4, vcc
+; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v6
+; CGP-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
+; CGP-NEXT:    v_mul_lo_u32 v4, v7, v0
+; CGP-NEXT:    v_mul_hi_u32 v6, v5, v0
+; CGP-NEXT:    v_mul_hi_u32 v0, v7, v0
+; CGP-NEXT:    v_mul_lo_u32 v12, v5, v1
+; CGP-NEXT:    v_mul_lo_u32 v13, v7, v1
+; CGP-NEXT:    v_mul_hi_u32 v14, v5, v1
+; CGP-NEXT:    v_mul_hi_u32 v1, v7, v1
+; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v12
+; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v0, vcc, v13, v0
+; CGP-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v6
+; CGP-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v14
+; CGP-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v4, vcc, v12, v4
+; CGP-NEXT:    v_add_i32_e32 v6, vcc, v13, v6
+; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v4
+; CGP-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v4, vcc, v6, v4
+; CGP-NEXT:    v_mul_lo_u32 v6, v10, v0
+; CGP-NEXT:    v_mul_lo_u32 v12, v11, v0
+; CGP-NEXT:    v_mul_hi_u32 v13, v10, v0
+; CGP-NEXT:    v_add_i32_e32 v1, vcc, v1, v4
+; CGP-NEXT:    v_mul_lo_u32 v4, v10, v1
+; CGP-NEXT:    v_add_i32_e32 v14, vcc, 1, v0
+; CGP-NEXT:    v_addc_u32_e32 v15, vcc, 0, v1, vcc
+; CGP-NEXT:    v_add_i32_e32 v4, vcc, v12, v4
+; CGP-NEXT:    v_add_i32_e32 v12, vcc, 1, v14
+; CGP-NEXT:    v_addc_u32_e32 v16, vcc, 0, v15, vcc
+; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v13
+; CGP-NEXT:    v_sub_i32_e32 v6, vcc, v5, v6
+; CGP-NEXT:    v_subb_u32_e64 v13, s[4:5], v7, v4, vcc
+; CGP-NEXT:    v_sub_i32_e64 v4, s[4:5], v7, v4
+; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v6, v10
+; CGP-NEXT:    v_cndmask_b32_e64 v7, 0, -1, s[4:5]
+; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v13, v11
+; CGP-NEXT:    v_cndmask_b32_e64 v17, 0, -1, s[4:5]
+; CGP-NEXT:    v_subb_u32_e32 v4, vcc, v4, v11, vcc
+; CGP-NEXT:    v_cmp_eq_u32_e32 vcc, v13, v11
+; CGP-NEXT:    v_cndmask_b32_e32 v7, v17, v7, vcc
+; CGP-NEXT:    v_sub_i32_e32 v6, vcc, v6, v10
+; CGP-NEXT:    v_subbrev_u32_e32 v4, vcc, 0, v4, vcc
+; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v6, v10
+; CGP-NEXT:    v_cndmask_b32_e64 v6, 0, -1, vcc
+; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v4, v11
+; CGP-NEXT:    v_cndmask_b32_e64 v13, 0, -1, vcc
+; CGP-NEXT:    v_cmp_eq_u32_e32 vcc, v4, v11
+; CGP-NEXT:    v_cndmask_b32_e32 v4, v13, v6, vcc
+; CGP-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v4
+; CGP-NEXT:    v_cndmask_b32_e32 v4, v14, v12, vcc
+; CGP-NEXT:    v_cndmask_b32_e32 v6, v15, v16, vcc
+; CGP-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v7
+; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
+; CGP-NEXT:    v_cndmask_b32_e32 v1, v1, v6, vcc
+; CGP-NEXT:  BB8_2: ; %Flow1
+; CGP-NEXT:    s_or_saveexec_b64 s[6:7], s[6:7]
+; CGP-NEXT:    s_xor_b64 exec, exec, s[6:7]
+; CGP-NEXT:    s_cbranch_execz BB8_4
+; CGP-NEXT:  ; %bb.3:
+; CGP-NEXT:    v_cvt_f32_u32_e32 v0, v10
+; CGP-NEXT:    v_rcp_iflag_f32_e32 v0, v0
+; CGP-NEXT:    v_mul_f32_e32 v0, 0x4f800000, v0
+; CGP-NEXT:    v_cvt_u32_f32_e32 v0, v0
+; CGP-NEXT:    v_mul_lo_u32 v1, v0, v10
+; CGP-NEXT:    v_mul_hi_u32 v4, v0, v10
+; CGP-NEXT:    v_sub_i32_e32 v6, vcc, 0, v1
+; CGP-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v4
+; CGP-NEXT:    v_cndmask_b32_e32 v1, v1, v6, vcc
+; CGP-NEXT:    v_mul_hi_u32 v1, v1, v0
+; CGP-NEXT:    v_add_i32_e64 v4, s[4:5], v0, v1
+; CGP-NEXT:    v_sub_i32_e64 v0, s[4:5], v0, v1
+; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
+; CGP-NEXT:    v_mul_hi_u32 v0, v0, v5
+; CGP-NEXT:    v_mul_lo_u32 v1, v0, v10
+; CGP-NEXT:    v_add_i32_e32 v4, vcc, 1, v0
+; CGP-NEXT:    v_subrev_i32_e32 v6, vcc, 1, v0
+; CGP-NEXT:    v_sub_i32_e32 v7, vcc, v5, v1
+; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v5, v1
+; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v7, v10
+; CGP-NEXT:    s_and_b64 s[4:5], s[4:5], vcc
+; CGP-NEXT:    v_cndmask_b32_e64 v0, v4, v0, s[4:5]
+; CGP-NEXT:    v_cndmask_b32_e32 v0, v6, v0, vcc
+; CGP-NEXT:    v_mov_b32_e32 v1, 0
+; CGP-NEXT:  BB8_4:
+; CGP-NEXT:    s_or_b64 exec, exec, s[6:7]
+; CGP-NEXT:    v_or_b32_e32 v5, v3, v9
+; CGP-NEXT:    v_mov_b32_e32 v4, 0
+; CGP-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[4:5]
+; CGP-NEXT:    v_cmp_ne_u32_e64 s[4:5], 0, 1
+; CGP-NEXT:    s_xor_b64 s[4:5], vcc, s[4:5]
+; CGP-NEXT:    ; implicit-def: $vgpr4_vgpr5
+; CGP-NEXT:    s_and_saveexec_b64 s[6:7], s[4:5]
+; CGP-NEXT:    s_xor_b64 s[6:7], exec, s[6:7]
+; CGP-NEXT:    s_cbranch_execnz BB8_6
+; CGP-NEXT:  ; %bb.5:
+; CGP-NEXT:    v_cvt_f32_u32_e32 v4, v8
+; CGP-NEXT:    v_cvt_f32_u32_e32 v5, v9
+; CGP-NEXT:    v_sub_i32_e32 v6, vcc, 0, v8
+; CGP-NEXT:    v_subb_u32_e32 v7, vcc, 0, v9, vcc
+; CGP-NEXT:    v_mac_f32_e32 v4, 0x4f800000, v5
+; CGP-NEXT:    v_rcp_iflag_f32_e32 v4, v4
+; CGP-NEXT:    v_mul_f32_e32 v4, 0x5f7ffffc, v4
+; CGP-NEXT:    v_mul_f32_e32 v5, 0x2f800000, v4
+; CGP-NEXT:    v_trunc_f32_e32 v5, v5
+; CGP-NEXT:    v_mac_f32_e32 v4, 0xcf800000, v5
+; CGP-NEXT:    v_cvt_u32_f32_e32 v5, v5
+; CGP-NEXT:    v_cvt_u32_f32_e32 v4, v4
+; CGP-NEXT:    v_mul_lo_u32 v10, v6, v5
+; CGP-NEXT:    v_mul_lo_u32 v11, v6, v4
+; CGP-NEXT:    v_mul_lo_u32 v12, v7, v4
+; CGP-NEXT:    v_mul_hi_u32 v13, v6, v4
+; CGP-NEXT:    v_add_i32_e32 v10, vcc, v12, v10
+; CGP-NEXT:    v_mul_lo_u32 v12, v5, v11
+; CGP-NEXT:    v_mul_hi_u32 v14, v4, v11
+; CGP-NEXT:    v_mul_hi_u32 v11, v5, v11
+; CGP-NEXT:    v_add_i32_e32 v10, vcc, v10, v13
+; CGP-NEXT:    v_mul_lo_u32 v13, v4, v10
+; CGP-NEXT:    v_mul_lo_u32 v15, v5, v10
+; CGP-NEXT:    v_mul_hi_u32 v16, v4, v10
+; CGP-NEXT:    v_mul_hi_u32 v10, v5, v10
+; CGP-NEXT:    v_add_i32_e32 v12, vcc, v12, v13
+; CGP-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v11, vcc, v15, v11
+; CGP-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v12, vcc, v12, v14
+; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v11, vcc, v11, v16
+; CGP-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v12, vcc, v13, v12
+; CGP-NEXT:    v_add_i32_e32 v13, vcc, v15, v14
+; CGP-NEXT:    v_add_i32_e32 v11, vcc, v11, v12
+; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v12, vcc, v13, v12
+; CGP-NEXT:    v_add_i32_e32 v10, vcc, v10, v12
+; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v11
+; CGP-NEXT:    v_addc_u32_e64 v11, s[4:5], v5, v10, vcc
+; CGP-NEXT:    v_add_i32_e64 v5, s[4:5], v5, v10
+; CGP-NEXT:    v_mul_lo_u32 v10, v6, v4
+; CGP-NEXT:    v_mul_lo_u32 v7, v7, v4
+; CGP-NEXT:    v_mul_hi_u32 v12, v6, v4
+; CGP-NEXT:    v_mul_lo_u32 v6, v6, v11
+; CGP-NEXT:    v_mul_lo_u32 v13, v11, v10
+; CGP-NEXT:    v_mul_hi_u32 v14, v4, v10
+; CGP-NEXT:    v_mul_hi_u32 v10, v11, v10
+; CGP-NEXT:    v_add_i32_e64 v6, s[4:5], v7, v6
+; CGP-NEXT:    v_add_i32_e64 v6, s[4:5], v6, v12
+; CGP-NEXT:    v_mul_lo_u32 v7, v4, v6
+; CGP-NEXT:    v_mul_lo_u32 v12, v11, v6
+; CGP-NEXT:    v_mul_hi_u32 v15, v4, v6
+; CGP-NEXT:    v_mul_hi_u32 v6, v11, v6
+; CGP-NEXT:    v_add_i32_e64 v7, s[4:5], v13, v7
+; CGP-NEXT:    v_cndmask_b32_e64 v11, 0, 1, s[4:5]
+; CGP-NEXT:    v_add_i32_e64 v10, s[4:5], v12, v10
+; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, 1, s[4:5]
+; CGP-NEXT:    v_add_i32_e64 v7, s[4:5], v7, v14
+; CGP-NEXT:    v_cndmask_b32_e64 v7, 0, 1, s[4:5]
+; CGP-NEXT:    v_add_i32_e64 v10, s[4:5], v10, v15
+; CGP-NEXT:    v_cndmask_b32_e64 v13, 0, 1, s[4:5]
+; CGP-NEXT:    v_add_i32_e64 v7, s[4:5], v11, v7
+; CGP-NEXT:    v_add_i32_e64 v11, s[4:5], v12, v13
+; CGP-NEXT:    v_add_i32_e64 v7, s[4:5], v10, v7
+; CGP-NEXT:    v_cndmask_b32_e64 v10, 0, 1, s[4:5]
+; CGP-NEXT:    v_add_i32_e64 v10, s[4:5], v11, v10
+; CGP-NEXT:    v_add_i32_e64 v6, s[4:5], v6, v10
+; CGP-NEXT:    v_addc_u32_e32 v5, vcc, v5, v6, vcc
+; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v7
+; CGP-NEXT:    v_addc_u32_e32 v5, vcc, 0, v5, vcc
+; CGP-NEXT:    v_mul_lo_u32 v6, v3, v4
+; CGP-NEXT:    v_mul_hi_u32 v7, v2, v4
+; CGP-NEXT:    v_mul_hi_u32 v4, v3, v4
+; CGP-NEXT:    v_mul_lo_u32 v10, v2, v5
+; CGP-NEXT:    v_mul_lo_u32 v11, v3, v5
+; CGP-NEXT:    v_mul_hi_u32 v12, v2, v5
+; CGP-NEXT:    v_mul_hi_u32 v5, v3, v5
+; CGP-NEXT:    v_add_i32_e32 v6, vcc, v6, v10
+; CGP-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v4, vcc, v11, v4
+; CGP-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v6, vcc, v6, v7
+; CGP-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v12
+; CGP-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v6, vcc, v10, v6
+; CGP-NEXT:    v_add_i32_e32 v7, vcc, v11, v7
+; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v6
+; CGP-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v6, vcc, v7, v6
+; CGP-NEXT:    v_mul_lo_u32 v7, v8, v4
+; CGP-NEXT:    v_mul_lo_u32 v10, v9, v4
+; CGP-NEXT:    v_mul_hi_u32 v11, v8, v4
+; CGP-NEXT:    v_add_i32_e32 v5, vcc, v5, v6
+; CGP-NEXT:    v_mul_lo_u32 v6, v8, v5
+; CGP-NEXT:    v_add_i32_e32 v12, vcc, 1, v4
+; CGP-NEXT:    v_addc_u32_e32 v13, vcc, 0, v5, vcc
+; CGP-NEXT:    v_add_i32_e32 v6, vcc, v10, v6
+; CGP-NEXT:    v_add_i32_e32 v10, vcc, 1, v12
+; CGP-NEXT:    v_addc_u32_e32 v14, vcc, 0, v13, vcc
+; CGP-NEXT:    v_add_i32_e32 v6, vcc, v6, v11
+; CGP-NEXT:    v_sub_i32_e32 v7, vcc, v2, v7
+; CGP-NEXT:    v_subb_u32_e64 v11, s[4:5], v3, v6, vcc
+; CGP-NEXT:    v_sub_i32_e64 v3, s[4:5], v3, v6
+; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v7, v8
+; CGP-NEXT:    v_cndmask_b32_e64 v6, 0, -1, s[4:5]
+; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v11, v9
+; CGP-NEXT:    v_cndmask_b32_e64 v15, 0, -1, s[4:5]
+; CGP-NEXT:    v_subb_u32_e32 v3, vcc, v3, v9, vcc
+; CGP-NEXT:    v_cmp_eq_u32_e32 vcc, v11, v9
+; CGP-NEXT:    v_cndmask_b32_e32 v6, v15, v6, vcc
+; CGP-NEXT:    v_sub_i32_e32 v7, vcc, v7, v8
+; CGP-NEXT:    v_subbrev_u32_e32 v3, vcc, 0, v3, vcc
+; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v7, v8
+; CGP-NEXT:    v_cndmask_b32_e64 v7, 0, -1, vcc
+; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v3, v9
+; CGP-NEXT:    v_cndmask_b32_e64 v11, 0, -1, vcc
+; CGP-NEXT:    v_cmp_eq_u32_e32 vcc, v3, v9
+; CGP-NEXT:    v_cndmask_b32_e32 v3, v11, v7, vcc
+; CGP-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v3
+; CGP-NEXT:    v_cndmask_b32_e32 v3, v12, v10, vcc
+; CGP-NEXT:    v_cndmask_b32_e32 v7, v13, v14, vcc
+; CGP-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v6
+; CGP-NEXT:    v_cndmask_b32_e32 v4, v4, v3, vcc
+; CGP-NEXT:    v_cndmask_b32_e32 v5, v5, v7, vcc
+; CGP-NEXT:  BB8_6: ; %Flow
+; CGP-NEXT:    s_or_saveexec_b64 s[6:7], s[6:7]
+; CGP-NEXT:    s_xor_b64 exec, exec, s[6:7]
+; CGP-NEXT:    s_cbranch_execz BB8_8
+; CGP-NEXT:  ; %bb.7:
+; CGP-NEXT:    v_cvt_f32_u32_e32 v3, v8
+; CGP-NEXT:    v_rcp_iflag_f32_e32 v3, v3
+; CGP-NEXT:    v_mul_f32_e32 v3, 0x4f800000, v3
+; CGP-NEXT:    v_cvt_u32_f32_e32 v3, v3
+; CGP-NEXT:    v_mul_lo_u32 v4, v3, v8
+; CGP-NEXT:    v_mul_hi_u32 v5, v3, v8
+; CGP-NEXT:    v_sub_i32_e32 v6, vcc, 0, v4
+; CGP-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v5
+; CGP-NEXT:    v_cndmask_b32_e32 v4, v4, v6, vcc
+; CGP-NEXT:    v_mul_hi_u32 v4, v4, v3
+; CGP-NEXT:    v_add_i32_e64 v5, s[4:5], v3, v4
+; CGP-NEXT:    v_sub_i32_e64 v3, s[4:5], v3, v4
+; CGP-NEXT:    v_cndmask_b32_e32 v3, v3, v5, vcc
+; CGP-NEXT:    v_mul_hi_u32 v3, v3, v2
+; CGP-NEXT:    v_mul_lo_u32 v4, v3, v8
+; CGP-NEXT:    v_add_i32_e32 v5, vcc, 1, v3
+; CGP-NEXT:    v_subrev_i32_e32 v6, vcc, 1, v3
+; CGP-NEXT:    v_sub_i32_e32 v7, vcc, v2, v4
+; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v2, v4
+; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v7, v8
+; CGP-NEXT:    s_and_b64 s[4:5], s[4:5], vcc
+; CGP-NEXT:    v_cndmask_b32_e64 v2, v5, v3, s[4:5]
+; CGP-NEXT:    v_cndmask_b32_e32 v4, v6, v2, vcc
+; CGP-NEXT:    v_mov_b32_e32 v5, 0
+; CGP-NEXT:  BB8_8:
+; CGP-NEXT:    s_or_b64 exec, exec, s[6:7]
+; CGP-NEXT:    v_mov_b32_e32 v2, v4
+; CGP-NEXT:    v_mov_b32_e32 v3, v5
+; CGP-NEXT:    s_setpc_b64 s[30:31]
+  %shl.y = shl <2 x i64> <i64 4096, i64 4096>, %y
+  %r = udiv <2 x i64> %x, %shl.y
+  ret <2 x i64> %r
+}
+
+define i64 @v_udiv_i64_24bit(i64 %num, i64 %den) {
+; GISEL-LABEL: v_udiv_i64_24bit:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-NEXT:    s_mov_b32 s4, 0xffffff
+; GISEL-NEXT:    v_and_b32_e32 v0, s4, v0
+; GISEL-NEXT:    v_and_b32_e32 v1, s4, v2
+; GISEL-NEXT:    v_cvt_f32_u32_e32 v2, v1
+; GISEL-NEXT:    v_rcp_iflag_f32_e32 v2, v2
+; GISEL-NEXT:    v_mul_f32_e32 v2, 0x4f800000, v2
+; GISEL-NEXT:    v_cvt_u32_f32_e32 v2, v2
+; GISEL-NEXT:    v_mul_lo_u32 v3, v2, v1
+; GISEL-NEXT:    v_mul_hi_u32 v4, v2, v1
+; GISEL-NEXT:    v_sub_i32_e32 v5, vcc, 0, v3
+; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v4
+; GISEL-NEXT:    v_cndmask_b32_e32 v3, v3, v5, vcc
+; GISEL-NEXT:    v_mul_hi_u32 v3, v3, v2
+; GISEL-NEXT:    v_add_i32_e64 v4, s[4:5], v2, v3
+; GISEL-NEXT:    v_sub_i32_e64 v2, s[4:5], v2, v3
+; GISEL-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
+; GISEL-NEXT:    v_mul_hi_u32 v2, v2, v0
+; GISEL-NEXT:    v_mul_lo_u32 v3, v2, v1
+; GISEL-NEXT:    v_add_i32_e32 v4, vcc, 1, v2
+; GISEL-NEXT:    v_subrev_i32_e32 v5, vcc, 1, v2
+; GISEL-NEXT:    v_sub_i32_e32 v6, vcc, v0, v3
+; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v3
+; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v6, v1
+; GISEL-NEXT:    s_and_b64 s[4:5], s[4:5], vcc
+; GISEL-NEXT:    v_cndmask_b32_e64 v0, v4, v2, s[4:5]
+; GISEL-NEXT:    v_cndmask_b32_e32 v0, v5, v0, vcc
+; GISEL-NEXT:    v_mov_b32_e32 v1, 0
+; GISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; CGP-LABEL: v_udiv_i64_24bit:
+; CGP:       ; %bb.0:
+; CGP-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CGP-NEXT:    s_mov_b32 s4, 0xffffff
+; CGP-NEXT:    v_and_b32_e32 v0, s4, v0
+; CGP-NEXT:    v_and_b32_e32 v1, s4, v2
+; CGP-NEXT:    v_cvt_f32_u32_e32 v0, v0
+; CGP-NEXT:    v_cvt_f32_u32_e32 v1, v1
+; CGP-NEXT:    v_rcp_f32_e32 v2, v1
+; CGP-NEXT:    v_mul_f32_e32 v2, v0, v2
+; CGP-NEXT:    v_trunc_f32_e32 v2, v2
+; CGP-NEXT:    v_mad_f32 v0, -v2, v1, v0
+; CGP-NEXT:    v_cvt_u32_f32_e32 v2, v2
+; CGP-NEXT:    v_cmp_ge_f32_e64 s[4:5], |v0|, v1
+; CGP-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[4:5]
+; CGP-NEXT:    v_add_i32_e32 v0, vcc, v2, v0
+; CGP-NEXT:    v_and_b32_e32 v0, 0xffffff, v0
+; CGP-NEXT:    v_mov_b32_e32 v1, 0
+; CGP-NEXT:    s_setpc_b64 s[30:31]
+  %num.mask = and i64 %num, 16777215
+  %den.mask = and i64 %den, 16777215
+  %result = udiv i64 %num.mask, %den.mask
+  ret i64 %result
+}
+
+define <2 x i64> @v_udiv_v2i64_24bit(<2 x i64> %num, <2 x i64> %den) {
+; GISEL-LABEL: v_udiv_v2i64_24bit:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-NEXT:    s_mov_b32 s6, 0xffffff
+; GISEL-NEXT:    v_cvt_f32_u32_e32 v1, 0
+; GISEL-NEXT:    v_and_b32_e32 v3, s6, v4
+; GISEL-NEXT:    v_and_b32_e32 v4, s6, v6
+; GISEL-NEXT:    v_cvt_f32_u32_e32 v5, v3
+; GISEL-NEXT:    v_sub_i32_e32 v6, vcc, 0, v3
+; GISEL-NEXT:    v_subb_u32_e64 v7, s[4:5], 0, 0, vcc
+; GISEL-NEXT:    v_cvt_f32_u32_e32 v8, v4
+; GISEL-NEXT:    v_sub_i32_e32 v9, vcc, 0, v4
+; GISEL-NEXT:    v_subb_u32_e64 v10, s[4:5], 0, 0, vcc
+; GISEL-NEXT:    v_mac_f32_e32 v5, 0x4f800000, v1
+; GISEL-NEXT:    v_mac_f32_e32 v8, 0x4f800000, v1
+; GISEL-NEXT:    v_rcp_iflag_f32_e32 v1, v5
+; GISEL-NEXT:    v_rcp_iflag_f32_e32 v5, v8
+; GISEL-NEXT:    v_mul_f32_e32 v1, 0x5f7ffffc, v1
+; GISEL-NEXT:    v_mul_f32_e32 v5, 0x5f7ffffc, v5
+; GISEL-NEXT:    v_mul_f32_e32 v8, 0x2f800000, v1
+; GISEL-NEXT:    v_mul_f32_e32 v11, 0x2f800000, v5
+; GISEL-NEXT:    v_trunc_f32_e32 v8, v8
+; GISEL-NEXT:    v_trunc_f32_e32 v11, v11
+; GISEL-NEXT:    v_mac_f32_e32 v1, 0xcf800000, v8
+; GISEL-NEXT:    v_cvt_u32_f32_e32 v8, v8
+; GISEL-NEXT:    v_mac_f32_e32 v5, 0xcf800000, v11
+; GISEL-NEXT:    v_cvt_u32_f32_e32 v11, v11
+; GISEL-NEXT:    v_cvt_u32_f32_e32 v1, v1
+; GISEL-NEXT:    v_mul_lo_u32 v12, v6, v8
+; GISEL-NEXT:    v_cvt_u32_f32_e32 v5, v5
+; GISEL-NEXT:    v_mul_lo_u32 v13, v9, v11
+; GISEL-NEXT:    v_mul_lo_u32 v14, v6, v1
+; GISEL-NEXT:    v_mul_lo_u32 v15, v7, v1
+; GISEL-NEXT:    v_mul_hi_u32 v16, v6, v1
+; GISEL-NEXT:    v_mul_lo_u32 v17, v9, v5
+; GISEL-NEXT:    v_mul_lo_u32 v18, v10, v5
+; GISEL-NEXT:    v_mul_hi_u32 v19, v9, v5
+; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v15, v12
+; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v18, v13
+; GISEL-NEXT:    v_mul_lo_u32 v15, v11, v17
+; GISEL-NEXT:    v_mul_hi_u32 v18, v5, v17
+; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v13, v19
+; GISEL-NEXT:    v_mul_lo_u32 v19, v5, v13
+; GISEL-NEXT:    v_add_i32_e32 v15, vcc, v15, v19
+; GISEL-NEXT:    v_cndmask_b32_e64 v19, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v15, vcc, v15, v18
+; GISEL-NEXT:    v_mul_lo_u32 v15, v8, v14
+; GISEL-NEXT:    v_mul_hi_u32 v18, v1, v14
+; GISEL-NEXT:    v_mul_hi_u32 v14, v8, v14
+; GISEL-NEXT:    v_mul_hi_u32 v17, v11, v17
+; GISEL-NEXT:    v_add_i32_e64 v12, s[4:5], v12, v16
+; GISEL-NEXT:    v_mul_lo_u32 v16, v1, v12
+; GISEL-NEXT:    v_add_i32_e64 v15, s[4:5], v15, v16
+; GISEL-NEXT:    v_cndmask_b32_e64 v16, 0, 1, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v15, s[4:5], v15, v18
+; GISEL-NEXT:    v_mul_lo_u32 v15, v8, v12
+; GISEL-NEXT:    v_cndmask_b32_e64 v18, 0, 1, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v16, s[4:5], v16, v18
+; GISEL-NEXT:    v_mul_hi_u32 v18, v1, v12
+; GISEL-NEXT:    v_add_i32_e64 v14, s[4:5], v15, v14
+; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, 1, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v14, s[4:5], v14, v18
+; GISEL-NEXT:    v_cndmask_b32_e64 v18, 0, 1, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v15, s[4:5], v15, v18
+; GISEL-NEXT:    v_cndmask_b32_e64 v18, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v18, vcc, v19, v18
+; GISEL-NEXT:    v_mul_lo_u32 v19, v11, v13
+; GISEL-NEXT:    v_add_i32_e32 v17, vcc, v19, v17
+; GISEL-NEXT:    v_mul_hi_u32 v19, v5, v13
+; GISEL-NEXT:    v_cndmask_b32_e64 v20, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v17, vcc, v17, v19
+; GISEL-NEXT:    v_cndmask_b32_e64 v19, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v19, vcc, v20, v19
+; GISEL-NEXT:    v_and_b32_e32 v0, s6, v0
+; GISEL-NEXT:    v_and_b32_e32 v2, s6, v2
+; GISEL-NEXT:    v_mul_hi_u32 v12, v8, v12
+; GISEL-NEXT:    v_mul_hi_u32 v13, v11, v13
+; GISEL-NEXT:    v_add_i32_e32 v14, vcc, v14, v16
+; GISEL-NEXT:    v_cndmask_b32_e64 v16, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v17, vcc, v17, v18
+; GISEL-NEXT:    v_cndmask_b32_e64 v18, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v15, vcc, v15, v16
+; GISEL-NEXT:    v_add_i32_e32 v16, vcc, v19, v18
+; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v12, v15
+; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v13, v16
+; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v1, v14
+; GISEL-NEXT:    v_addc_u32_e64 v14, s[4:5], v8, v12, vcc
+; GISEL-NEXT:    v_add_i32_e64 v8, s[4:5], v8, v12
+; GISEL-NEXT:    v_mul_lo_u32 v12, v6, v1
+; GISEL-NEXT:    v_mul_lo_u32 v7, v7, v1
+; GISEL-NEXT:    v_mul_hi_u32 v15, v6, v1
+; GISEL-NEXT:    v_add_i32_e64 v5, s[4:5], v5, v17
+; GISEL-NEXT:    v_addc_u32_e64 v16, s[6:7], v11, v13, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v11, s[6:7], v11, v13
+; GISEL-NEXT:    v_mul_lo_u32 v13, v9, v5
+; GISEL-NEXT:    v_mul_lo_u32 v10, v10, v5
+; GISEL-NEXT:    v_mul_hi_u32 v17, v9, v5
+; GISEL-NEXT:    v_mul_lo_u32 v6, v6, v14
+; GISEL-NEXT:    v_mul_lo_u32 v18, v14, v12
+; GISEL-NEXT:    v_mul_hi_u32 v19, v1, v12
+; GISEL-NEXT:    v_mul_hi_u32 v12, v14, v12
+; GISEL-NEXT:    v_mul_lo_u32 v9, v9, v16
+; GISEL-NEXT:    v_add_i32_e64 v6, s[6:7], v7, v6
+; GISEL-NEXT:    v_mul_lo_u32 v7, v16, v13
+; GISEL-NEXT:    v_add_i32_e64 v9, s[6:7], v10, v9
+; GISEL-NEXT:    v_mul_hi_u32 v10, v5, v13
+; GISEL-NEXT:    v_mul_hi_u32 v13, v16, v13
+; GISEL-NEXT:    v_add_i32_e64 v6, s[6:7], v6, v15
+; GISEL-NEXT:    v_add_i32_e64 v9, s[6:7], v9, v17
+; GISEL-NEXT:    v_mul_lo_u32 v15, v1, v6
+; GISEL-NEXT:    v_mul_lo_u32 v17, v5, v9
+; GISEL-NEXT:    v_add_i32_e64 v7, s[6:7], v7, v17
+; GISEL-NEXT:    v_cndmask_b32_e64 v17, 0, 1, s[6:7]
+; GISEL-NEXT:    v_add_i32_e64 v7, s[6:7], v7, v10
+; GISEL-NEXT:    v_mul_lo_u32 v7, v14, v6
+; GISEL-NEXT:    v_mul_hi_u32 v10, v1, v6
+; GISEL-NEXT:    v_mul_hi_u32 v6, v14, v6
+; GISEL-NEXT:    v_mul_lo_u32 v14, v16, v9
+; GISEL-NEXT:    v_mul_hi_u32 v16, v16, v9
+; GISEL-NEXT:    v_mul_hi_u32 v9, v5, v9
+; GISEL-NEXT:    v_add_i32_e64 v15, s[8:9], v18, v15
+; GISEL-NEXT:    v_cndmask_b32_e64 v18, 0, 1, s[8:9]
+; GISEL-NEXT:    v_add_i32_e64 v7, s[8:9], v7, v12
+; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, s[8:9]
+; GISEL-NEXT:    v_add_i32_e64 v13, s[8:9], v14, v13
+; GISEL-NEXT:    v_cndmask_b32_e64 v14, 0, 1, s[8:9]
+; GISEL-NEXT:    v_add_i32_e64 v15, s[8:9], v15, v19
+; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, 1, s[8:9]
+; GISEL-NEXT:    v_add_i32_e64 v7, s[8:9], v7, v10
+; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, 1, s[8:9]
+; GISEL-NEXT:    v_cndmask_b32_e64 v19, 0, 1, s[6:7]
+; GISEL-NEXT:    v_add_i32_e64 v9, s[6:7], v13, v9
+; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, 1, s[6:7]
+; GISEL-NEXT:    v_add_i32_e64 v15, s[6:7], v18, v15
+; GISEL-NEXT:    v_add_i32_e64 v10, s[6:7], v12, v10
+; GISEL-NEXT:    v_add_i32_e64 v12, s[6:7], v17, v19
+; GISEL-NEXT:    v_add_i32_e64 v13, s[6:7], v14, v13
+; GISEL-NEXT:    v_add_i32_e64 v7, s[6:7], v7, v15
+; GISEL-NEXT:    v_cndmask_b32_e64 v14, 0, 1, s[6:7]
+; GISEL-NEXT:    v_add_i32_e64 v9, s[6:7], v9, v12
+; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, s[6:7]
+; GISEL-NEXT:    v_add_i32_e64 v10, s[6:7], v10, v14
+; GISEL-NEXT:    v_add_i32_e64 v12, s[6:7], v13, v12
+; GISEL-NEXT:    v_add_i32_e64 v6, s[6:7], v6, v10
+; GISEL-NEXT:    v_add_i32_e64 v10, s[6:7], v16, v12
+; GISEL-NEXT:    v_addc_u32_e32 v6, vcc, v8, v6, vcc
+; GISEL-NEXT:    v_addc_u32_e64 v8, vcc, v11, v10, s[4:5]
+; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v1, v7
+; GISEL-NEXT:    v_addc_u32_e32 v6, vcc, 0, v6, vcc
+; GISEL-NEXT:    v_mul_lo_u32 v7, 0, v1
+; GISEL-NEXT:    v_mul_hi_u32 v10, v0, v1
+; GISEL-NEXT:    v_mul_hi_u32 v1, 0, v1
+; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v9
+; GISEL-NEXT:    v_addc_u32_e32 v8, vcc, 0, v8, vcc
+; GISEL-NEXT:    v_mul_lo_u32 v9, 0, v5
+; GISEL-NEXT:    v_mul_hi_u32 v11, v2, v5
+; GISEL-NEXT:    v_mul_hi_u32 v5, 0, v5
+; GISEL-NEXT:    v_mul_lo_u32 v12, v0, v6
+; GISEL-NEXT:    v_mul_lo_u32 v13, 0, v6
+; GISEL-NEXT:    v_mul_hi_u32 v14, v0, v6
+; GISEL-NEXT:    v_mul_hi_u32 v6, 0, v6
+; GISEL-NEXT:    v_mul_lo_u32 v15, v2, v8
+; GISEL-NEXT:    v_mul_lo_u32 v16, 0, v8
+; GISEL-NEXT:    v_mul_hi_u32 v17, v2, v8
+; GISEL-NEXT:    v_mul_hi_u32 v8, 0, v8
+; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v7, v12
+; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v13, v1
+; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v9, v15
+; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v16, v5
+; GISEL-NEXT:    v_cndmask_b32_e64 v16, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v7, v10
+; GISEL-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v1, v14
+; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v9, v11
+; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v17
+; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v12, v7
+; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v13, v10
+; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v15, v9
+; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v16, v11
+; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v1, v7
+; GISEL-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v9
+; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v10, v7
+; GISEL-NEXT:    v_mul_lo_u32 v10, v3, v1
+; GISEL-NEXT:    v_mul_lo_u32 v12, 0, v1
+; GISEL-NEXT:    v_mul_hi_u32 v13, v3, v1
+; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v11, v9
+; GISEL-NEXT:    v_mul_lo_u32 v11, v4, v5
+; GISEL-NEXT:    v_mul_lo_u32 v14, 0, v5
+; GISEL-NEXT:    v_mul_hi_u32 v15, v4, v5
+; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v6, v7
+; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v8, v9
+; GISEL-NEXT:    v_mul_lo_u32 v8, v3, v6
+; GISEL-NEXT:    v_add_i32_e32 v9, vcc, 1, v1
+; GISEL-NEXT:    v_addc_u32_e32 v16, vcc, 0, v6, vcc
+; GISEL-NEXT:    v_mul_lo_u32 v17, v4, v7
+; GISEL-NEXT:    v_add_i32_e32 v18, vcc, 1, v5
+; GISEL-NEXT:    v_addc_u32_e32 v19, vcc, 0, v7, vcc
+; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v12, v8
+; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v14, v17
+; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v13
+; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v12, v15
+; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v10
+; GISEL-NEXT:    v_subb_u32_e64 v10, s[4:5], 0, v8, vcc
+; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v0, v3
+; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, -1, s[4:5]
+; GISEL-NEXT:    v_sub_i32_e64 v2, s[4:5], v2, v11
+; GISEL-NEXT:    v_subb_u32_e64 v11, s[6:7], 0, v12, s[4:5]
+; GISEL-NEXT:    v_cmp_ge_u32_e64 s[6:7], v2, v4
+; GISEL-NEXT:    v_cndmask_b32_e64 v14, 0, -1, s[6:7]
+; GISEL-NEXT:    v_cmp_le_u32_e64 s[6:7], 0, v10
+; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, -1, s[6:7]
+; GISEL-NEXT:    v_cmp_eq_u32_e64 s[6:7], 0, v10
+; GISEL-NEXT:    v_cmp_le_u32_e64 s[8:9], 0, v11
+; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, -1, s[8:9]
+; GISEL-NEXT:    v_cmp_eq_u32_e64 s[8:9], 0, v11
+; GISEL-NEXT:    v_add_i32_e64 v11, s[10:11], 1, v9
+; GISEL-NEXT:    v_addc_u32_e64 v17, s[10:11], 0, v16, s[10:11]
+; GISEL-NEXT:    v_sub_i32_e64 v8, s[10:11], 0, v8
+; GISEL-NEXT:    v_subbrev_u32_e32 v8, vcc, 0, v8, vcc
+; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v3
+; GISEL-NEXT:    v_subbrev_u32_e32 v8, vcc, 0, v8, vcc
+; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v3
+; GISEL-NEXT:    v_add_i32_e64 v0, s[10:11], 1, v18
+; GISEL-NEXT:    v_addc_u32_e64 v3, s[10:11], 0, v19, s[10:11]
+; GISEL-NEXT:    v_sub_i32_e64 v12, s[10:11], 0, v12
+; GISEL-NEXT:    v_subbrev_u32_e64 v12, s[4:5], 0, v12, s[4:5]
+; GISEL-NEXT:    v_cndmask_b32_e64 v13, v15, v13, s[6:7]
+; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, -1, vcc
+; GISEL-NEXT:    v_cndmask_b32_e64 v10, v10, v14, s[8:9]
+; GISEL-NEXT:    v_sub_i32_e32 v2, vcc, v2, v4
+; GISEL-NEXT:    v_subbrev_u32_e32 v12, vcc, 0, v12, vcc
+; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v2, v4
+; GISEL-NEXT:    v_cndmask_b32_e64 v2, 0, -1, vcc
+; GISEL-NEXT:    v_cmp_le_u32_e32 vcc, 0, v8
+; GISEL-NEXT:    v_cndmask_b32_e64 v4, 0, -1, vcc
+; GISEL-NEXT:    v_cmp_le_u32_e32 vcc, 0, v12
+; GISEL-NEXT:    v_cndmask_b32_e64 v14, 0, -1, vcc
+; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v8
+; GISEL-NEXT:    v_cndmask_b32_e32 v4, v4, v15, vcc
+; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v12
+; GISEL-NEXT:    v_cndmask_b32_e32 v2, v14, v2, vcc
+; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v4
+; GISEL-NEXT:    v_cndmask_b32_e32 v4, v9, v11, vcc
+; GISEL-NEXT:    v_cmp_ne_u32_e64 s[4:5], 0, v2
+; GISEL-NEXT:    v_cndmask_b32_e64 v2, v18, v0, s[4:5]
+; GISEL-NEXT:    v_cndmask_b32_e32 v8, v16, v17, vcc
+; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v13
+; GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v4, vcc
+; GISEL-NEXT:    v_cndmask_b32_e64 v3, v19, v3, s[4:5]
+; GISEL-NEXT:    v_cmp_ne_u32_e64 s[4:5], 0, v10
+; GISEL-NEXT:    v_cndmask_b32_e64 v2, v5, v2, s[4:5]
+; GISEL-NEXT:    v_cndmask_b32_e32 v1, v6, v8, vcc
+; GISEL-NEXT:    v_cndmask_b32_e64 v3, v7, v3, s[4:5]
+; GISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; CGP-LABEL: v_udiv_v2i64_24bit:
+; CGP:       ; %bb.0:
+; CGP-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CGP-NEXT:    s_mov_b32 s6, 0xffffff
+; CGP-NEXT:    v_mov_b32_e32 v1, 0
+; CGP-NEXT:    v_and_b32_e32 v0, s6, v0
+; CGP-NEXT:    v_and_b32_e32 v2, s6, v2
+; CGP-NEXT:    v_and_b32_e32 v3, s6, v4
+; CGP-NEXT:    v_and_b32_e32 v4, s6, v6
+; CGP-NEXT:    v_cvt_f32_u32_e32 v0, v0
+; CGP-NEXT:    v_cvt_f32_u32_e32 v3, v3
+; CGP-NEXT:    v_cvt_f32_u32_e32 v2, v2
+; CGP-NEXT:    v_cvt_f32_u32_e32 v4, v4
+; CGP-NEXT:    v_rcp_f32_e32 v5, v3
+; CGP-NEXT:    v_rcp_f32_e32 v6, v4
+; CGP-NEXT:    v_mul_f32_e32 v5, v0, v5
+; CGP-NEXT:    v_mul_f32_e32 v6, v2, v6
+; CGP-NEXT:    v_trunc_f32_e32 v5, v5
+; CGP-NEXT:    v_trunc_f32_e32 v6, v6
+; CGP-NEXT:    v_mad_f32 v0, -v5, v3, v0
+; CGP-NEXT:    v_cvt_u32_f32_e32 v5, v5
+; CGP-NEXT:    v_mad_f32 v2, -v6, v4, v2
+; CGP-NEXT:    v_cvt_u32_f32_e32 v6, v6
+; CGP-NEXT:    v_cmp_ge_f32_e64 s[4:5], |v0|, v3
+; CGP-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[4:5]
+; CGP-NEXT:    v_cmp_ge_f32_e64 s[4:5], |v2|, v4
+; CGP-NEXT:    v_cndmask_b32_e64 v2, 0, 1, s[4:5]
+; CGP-NEXT:    v_add_i32_e32 v0, vcc, v5, v0
+; CGP-NEXT:    v_add_i32_e32 v2, vcc, v6, v2
+; CGP-NEXT:    v_and_b32_e32 v0, s6, v0
+; CGP-NEXT:    v_and_b32_e32 v2, s6, v2
+; CGP-NEXT:    v_mov_b32_e32 v3, v1
+; CGP-NEXT:    s_setpc_b64 s[30:31]
+  %num.mask = and <2 x i64> %num, <i64 16777215, i64 16777215>
+  %den.mask = and <2 x i64> %den, <i64 16777215, i64 16777215>
+  %result = udiv <2 x i64> %num.mask, %den.mask
+  ret <2 x i64> %result
+}

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll
new file mode 100644
index 000000000000..291b56b6b74b
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll
@@ -0,0 +1,3427 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -global-isel -amdgpu-codegenprepare-disable-idiv-expansion=1 -mtriple=amdgcn-amd-amdhsa < %s | FileCheck -check-prefixes=CHECK,GISEL %s
+; RUN: llc -global-isel -amdgpu-codegenprepare-disable-idiv-expansion=0 -mtriple=amdgcn-amd-amdhsa < %s | FileCheck -check-prefixes=CHECK,CGP %s
+
+; The same 32-bit expansion is implemented in the legalizer and in AMDGPUCodeGenPrepare.
+
+define i64 @v_urem_i64(i64 %num, i64 %den) {
+; CHECK-LABEL: v_urem_i64:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT:    v_or_b32_e32 v5, v1, v3
+; CHECK-NEXT:    v_mov_b32_e32 v4, 0
+; CHECK-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[4:5]
+; CHECK-NEXT:    v_cmp_ne_u32_e64 s[4:5], 0, 1
+; CHECK-NEXT:    s_xor_b64 s[4:5], vcc, s[4:5]
+; CHECK-NEXT:    ; implicit-def: $vgpr4_vgpr5
+; CHECK-NEXT:    s_and_saveexec_b64 s[6:7], s[4:5]
+; CHECK-NEXT:    s_xor_b64 s[6:7], exec, s[6:7]
+; CHECK-NEXT:    s_cbranch_execnz BB0_2
+; CHECK-NEXT:  ; %bb.1:
+; CHECK-NEXT:    v_cvt_f32_u32_e32 v4, v2
+; CHECK-NEXT:    v_cvt_f32_u32_e32 v5, v3
+; CHECK-NEXT:    v_sub_i32_e32 v6, vcc, 0, v2
+; CHECK-NEXT:    v_subb_u32_e32 v7, vcc, 0, v3, vcc
+; CHECK-NEXT:    v_mac_f32_e32 v4, 0x4f800000, v5
+; CHECK-NEXT:    v_rcp_iflag_f32_e32 v4, v4
+; CHECK-NEXT:    v_mul_f32_e32 v4, 0x5f7ffffc, v4
+; CHECK-NEXT:    v_mul_f32_e32 v5, 0x2f800000, v4
+; CHECK-NEXT:    v_trunc_f32_e32 v5, v5
+; CHECK-NEXT:    v_mac_f32_e32 v4, 0xcf800000, v5
+; CHECK-NEXT:    v_cvt_u32_f32_e32 v5, v5
+; CHECK-NEXT:    v_cvt_u32_f32_e32 v4, v4
+; CHECK-NEXT:    v_mul_lo_u32 v8, v6, v5
+; CHECK-NEXT:    v_mul_lo_u32 v9, v6, v4
+; CHECK-NEXT:    v_mul_lo_u32 v10, v7, v4
+; CHECK-NEXT:    v_mul_hi_u32 v11, v6, v4
+; CHECK-NEXT:    v_add_i32_e32 v8, vcc, v10, v8
+; CHECK-NEXT:    v_mul_lo_u32 v10, v5, v9
+; CHECK-NEXT:    v_mul_hi_u32 v12, v4, v9
+; CHECK-NEXT:    v_mul_hi_u32 v9, v5, v9
+; CHECK-NEXT:    v_add_i32_e32 v8, vcc, v8, v11
+; CHECK-NEXT:    v_mul_lo_u32 v11, v4, v8
+; CHECK-NEXT:    v_mul_lo_u32 v13, v5, v8
+; CHECK-NEXT:    v_mul_hi_u32 v14, v4, v8
+; CHECK-NEXT:    v_mul_hi_u32 v8, v5, v8
+; CHECK-NEXT:    v_add_i32_e32 v10, vcc, v10, v11
+; CHECK-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v9, vcc, v13, v9
+; CHECK-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v10, vcc, v10, v12
+; CHECK-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v9, vcc, v9, v14
+; CHECK-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v10, vcc, v11, v10
+; CHECK-NEXT:    v_add_i32_e32 v11, vcc, v13, v12
+; CHECK-NEXT:    v_add_i32_e32 v9, vcc, v9, v10
+; CHECK-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v10, vcc, v11, v10
+; CHECK-NEXT:    v_add_i32_e32 v8, vcc, v8, v10
+; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v4, v9
+; CHECK-NEXT:    v_addc_u32_e64 v9, s[4:5], v5, v8, vcc
+; CHECK-NEXT:    v_add_i32_e64 v5, s[4:5], v5, v8
+; CHECK-NEXT:    v_mul_lo_u32 v8, v6, v4
+; CHECK-NEXT:    v_mul_lo_u32 v7, v7, v4
+; CHECK-NEXT:    v_mul_hi_u32 v10, v6, v4
+; CHECK-NEXT:    v_mul_lo_u32 v6, v6, v9
+; CHECK-NEXT:    v_mul_lo_u32 v11, v9, v8
+; CHECK-NEXT:    v_mul_hi_u32 v12, v4, v8
+; CHECK-NEXT:    v_mul_hi_u32 v8, v9, v8
+; CHECK-NEXT:    v_add_i32_e64 v6, s[4:5], v7, v6
+; CHECK-NEXT:    v_add_i32_e64 v6, s[4:5], v6, v10
+; CHECK-NEXT:    v_mul_lo_u32 v7, v4, v6
+; CHECK-NEXT:    v_mul_lo_u32 v10, v9, v6
+; CHECK-NEXT:    v_mul_hi_u32 v13, v4, v6
+; CHECK-NEXT:    v_mul_hi_u32 v6, v9, v6
+; CHECK-NEXT:    v_add_i32_e64 v7, s[4:5], v11, v7
+; CHECK-NEXT:    v_cndmask_b32_e64 v9, 0, 1, s[4:5]
+; CHECK-NEXT:    v_add_i32_e64 v8, s[4:5], v10, v8
+; CHECK-NEXT:    v_cndmask_b32_e64 v10, 0, 1, s[4:5]
+; CHECK-NEXT:    v_add_i32_e64 v7, s[4:5], v7, v12
+; CHECK-NEXT:    v_cndmask_b32_e64 v7, 0, 1, s[4:5]
+; CHECK-NEXT:    v_add_i32_e64 v8, s[4:5], v8, v13
+; CHECK-NEXT:    v_cndmask_b32_e64 v11, 0, 1, s[4:5]
+; CHECK-NEXT:    v_add_i32_e64 v7, s[4:5], v9, v7
+; CHECK-NEXT:    v_add_i32_e64 v9, s[4:5], v10, v11
+; CHECK-NEXT:    v_add_i32_e64 v7, s[4:5], v8, v7
+; CHECK-NEXT:    v_cndmask_b32_e64 v8, 0, 1, s[4:5]
+; CHECK-NEXT:    v_add_i32_e64 v8, s[4:5], v9, v8
+; CHECK-NEXT:    v_add_i32_e64 v6, s[4:5], v6, v8
+; CHECK-NEXT:    v_addc_u32_e32 v5, vcc, v5, v6, vcc
+; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v4, v7
+; CHECK-NEXT:    v_addc_u32_e32 v5, vcc, 0, v5, vcc
+; CHECK-NEXT:    v_mul_lo_u32 v6, v1, v4
+; CHECK-NEXT:    v_mul_hi_u32 v7, v0, v4
+; CHECK-NEXT:    v_mul_hi_u32 v4, v1, v4
+; CHECK-NEXT:    v_mul_lo_u32 v8, v0, v5
+; CHECK-NEXT:    v_mul_lo_u32 v9, v1, v5
+; CHECK-NEXT:    v_mul_hi_u32 v10, v0, v5
+; CHECK-NEXT:    v_mul_hi_u32 v5, v1, v5
+; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v6, v8
+; CHECK-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v9, v4
+; CHECK-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v6, v7
+; CHECK-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v4, v10
+; CHECK-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v8, v6
+; CHECK-NEXT:    v_add_i32_e32 v7, vcc, v9, v7
+; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v4, v6
+; CHECK-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v7, v6
+; CHECK-NEXT:    v_mul_lo_u32 v7, v2, v4
+; CHECK-NEXT:    v_mul_lo_u32 v8, v3, v4
+; CHECK-NEXT:    v_mul_hi_u32 v4, v2, v4
+; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v5, v6
+; CHECK-NEXT:    v_mul_lo_u32 v5, v2, v5
+; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v8, v5
+; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v5, v4
+; CHECK-NEXT:    v_sub_i32_e32 v5, vcc, v0, v7
+; CHECK-NEXT:    v_subb_u32_e64 v6, s[4:5], v1, v4, vcc
+; CHECK-NEXT:    v_sub_i32_e64 v1, s[4:5], v1, v4
+; CHECK-NEXT:    v_cmp_ge_u32_e64 s[4:5], v5, v2
+; CHECK-NEXT:    v_cndmask_b32_e64 v4, 0, -1, s[4:5]
+; CHECK-NEXT:    v_cmp_ge_u32_e64 s[4:5], v6, v3
+; CHECK-NEXT:    v_cndmask_b32_e64 v7, 0, -1, s[4:5]
+; CHECK-NEXT:    v_subb_u32_e32 v1, vcc, v1, v3, vcc
+; CHECK-NEXT:    v_cmp_eq_u32_e32 vcc, v6, v3
+; CHECK-NEXT:    v_cndmask_b32_e32 v4, v7, v4, vcc
+; CHECK-NEXT:    v_sub_i32_e32 v7, vcc, v5, v2
+; CHECK-NEXT:    v_subbrev_u32_e64 v8, s[4:5], 0, v1, vcc
+; CHECK-NEXT:    v_cmp_ge_u32_e64 s[4:5], v7, v2
+; CHECK-NEXT:    v_cndmask_b32_e64 v9, 0, -1, s[4:5]
+; CHECK-NEXT:    v_subb_u32_e32 v1, vcc, v1, v3, vcc
+; CHECK-NEXT:    v_cmp_ge_u32_e32 vcc, v8, v3
+; CHECK-NEXT:    v_cndmask_b32_e64 v10, 0, -1, vcc
+; CHECK-NEXT:    v_sub_i32_e32 v11, vcc, v7, v2
+; CHECK-NEXT:    v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
+; CHECK-NEXT:    v_cmp_eq_u32_e32 vcc, v8, v3
+; CHECK-NEXT:    v_cndmask_b32_e32 v3, v10, v9, vcc
+; CHECK-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v3
+; CHECK-NEXT:    v_cndmask_b32_e32 v3, v7, v11, vcc
+; CHECK-NEXT:    v_cndmask_b32_e32 v1, v8, v1, vcc
+; CHECK-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v4
+; CHECK-NEXT:    v_cndmask_b32_e32 v4, v5, v3, vcc
+; CHECK-NEXT:    v_cndmask_b32_e32 v5, v6, v1, vcc
+; CHECK-NEXT:  BB0_2: ; %Flow
+; CHECK-NEXT:    s_or_saveexec_b64 s[8:9], s[6:7]
+; CHECK-NEXT:    s_xor_b64 exec, exec, s[8:9]
+; CHECK-NEXT:    s_cbranch_execz BB0_4
+; CHECK-NEXT:  ; %bb.3:
+; CHECK-NEXT:    v_cvt_f32_u32_e32 v1, v2
+; CHECK-NEXT:    v_rcp_iflag_f32_e32 v1, v1
+; CHECK-NEXT:    v_mul_f32_e32 v1, 0x4f800000, v1
+; CHECK-NEXT:    v_cvt_u32_f32_e32 v1, v1
+; CHECK-NEXT:    v_mul_lo_u32 v3, v1, v2
+; CHECK-NEXT:    v_mul_hi_u32 v4, v1, v2
+; CHECK-NEXT:    v_sub_i32_e32 v5, vcc, 0, v3
+; CHECK-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v4
+; CHECK-NEXT:    v_cndmask_b32_e32 v3, v3, v5, vcc
+; CHECK-NEXT:    v_mul_hi_u32 v3, v3, v1
+; CHECK-NEXT:    v_add_i32_e64 v4, s[4:5], v1, v3
+; CHECK-NEXT:    v_sub_i32_e64 v1, s[4:5], v1, v3
+; CHECK-NEXT:    v_cndmask_b32_e32 v1, v1, v4, vcc
+; CHECK-NEXT:    v_mul_hi_u32 v1, v1, v0
+; CHECK-NEXT:    v_mul_lo_u32 v1, v1, v2
+; CHECK-NEXT:    v_sub_i32_e32 v3, vcc, v0, v1
+; CHECK-NEXT:    v_cmp_ge_u32_e32 vcc, v3, v2
+; CHECK-NEXT:    v_add_i32_e64 v4, s[4:5], v3, v2
+; CHECK-NEXT:    v_cmp_ge_u32_e64 s[4:5], v0, v1
+; CHECK-NEXT:    v_sub_i32_e64 v0, s[6:7], v3, v2
+; CHECK-NEXT:    s_and_b64 vcc, vcc, s[4:5]
+; CHECK-NEXT:    v_cndmask_b32_e32 v0, v3, v0, vcc
+; CHECK-NEXT:    v_cndmask_b32_e64 v4, v4, v0, s[4:5]
+; CHECK-NEXT:    v_mov_b32_e32 v5, 0
+; CHECK-NEXT:  BB0_4:
+; CHECK-NEXT:    s_or_b64 exec, exec, s[8:9]
+; CHECK-NEXT:    v_mov_b32_e32 v0, v4
+; CHECK-NEXT:    v_mov_b32_e32 v1, v5
+; CHECK-NEXT:    s_setpc_b64 s[30:31]
+  %result = urem i64 %num, %den
+  ret i64 %result
+}
+
+; FIXME: This is a workaround for not handling uniform VGPR case.
+declare i32 @llvm.amdgcn.readfirstlane(i32)
+
+define amdgpu_ps i64 @s_urem_i64(i64 inreg %num, i64 inreg %den) {
+; CHECK-LABEL: s_urem_i64:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    s_or_b64 s[4:5], s[0:1], s[2:3]
+; CHECK-NEXT:    s_mov_b32 s6, 0
+; CHECK-NEXT:    s_mov_b32 s7, -1
+; CHECK-NEXT:    s_and_b64 s[4:5], s[4:5], s[6:7]
+; CHECK-NEXT:    v_cvt_f32_u32_e32 v0, s2
+; CHECK-NEXT:    v_cmp_ne_u64_e64 vcc, s[4:5], 0
+; CHECK-NEXT:    s_cbranch_vccnz BB1_2
+; CHECK-NEXT:  ; %bb.1:
+; CHECK-NEXT:    v_rcp_iflag_f32_e32 v0, v0
+; CHECK-NEXT:    v_mul_f32_e32 v0, 0x4f800000, v0
+; CHECK-NEXT:    v_cvt_u32_f32_e32 v0, v0
+; CHECK-NEXT:    v_mul_lo_u32 v1, v0, s2
+; CHECK-NEXT:    v_mul_hi_u32 v2, v0, s2
+; CHECK-NEXT:    v_sub_i32_e32 v3, vcc, 0, v1
+; CHECK-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v2
+; CHECK-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
+; CHECK-NEXT:    v_mul_hi_u32 v1, v1, v0
+; CHECK-NEXT:    v_add_i32_e64 v2, s[4:5], v0, v1
+; CHECK-NEXT:    v_sub_i32_e64 v0, s[4:5], v0, v1
+; CHECK-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; CHECK-NEXT:    v_mul_hi_u32 v0, v0, s0
+; CHECK-NEXT:    v_mul_lo_u32 v0, v0, s2
+; CHECK-NEXT:    v_sub_i32_e32 v1, vcc, s0, v0
+; CHECK-NEXT:    v_cmp_le_u32_e32 vcc, s2, v1
+; CHECK-NEXT:    v_add_i32_e64 v2, s[4:5], s2, v1
+; CHECK-NEXT:    v_cmp_ge_u32_e64 s[0:1], s0, v0
+; CHECK-NEXT:    v_subrev_i32_e64 v0, s[2:3], s2, v1
+; CHECK-NEXT:    s_and_b64 vcc, vcc, s[0:1]
+; CHECK-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
+; CHECK-NEXT:    v_cndmask_b32_e64 v0, v2, v0, s[0:1]
+; CHECK-NEXT:    s_branch BB1_3
+; CHECK-NEXT:  BB1_2:
+; CHECK-NEXT:    v_mov_b32_e32 v1, s3
+; CHECK-NEXT:    v_cvt_f32_u32_e32 v2, s3
+; CHECK-NEXT:    s_sub_u32 s6, 0, s2
+; CHECK-NEXT:    s_cselect_b32 s4, 1, 0
+; CHECK-NEXT:    v_mov_b32_e32 v3, s1
+; CHECK-NEXT:    v_mac_f32_e32 v0, 0x4f800000, v2
+; CHECK-NEXT:    s_and_b32 s4, s4, 1
+; CHECK-NEXT:    v_rcp_iflag_f32_e32 v0, v0
+; CHECK-NEXT:    v_mul_f32_e32 v0, 0x5f7ffffc, v0
+; CHECK-NEXT:    s_cmp_lg_u32 s4, 0
+; CHECK-NEXT:    s_subb_u32 s7, 0, s3
+; CHECK-NEXT:    v_mul_f32_e32 v2, 0x2f800000, v0
+; CHECK-NEXT:    v_trunc_f32_e32 v2, v2
+; CHECK-NEXT:    v_mac_f32_e32 v0, 0xcf800000, v2
+; CHECK-NEXT:    v_cvt_u32_f32_e32 v2, v2
+; CHECK-NEXT:    v_cvt_u32_f32_e32 v0, v0
+; CHECK-NEXT:    v_mul_lo_u32 v4, s6, v2
+; CHECK-NEXT:    v_mul_lo_u32 v5, s6, v0
+; CHECK-NEXT:    v_mul_lo_u32 v6, s7, v0
+; CHECK-NEXT:    v_mul_hi_u32 v7, s6, v0
+; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v6, v4
+; CHECK-NEXT:    v_mul_lo_u32 v6, v2, v5
+; CHECK-NEXT:    v_mul_hi_u32 v8, v0, v5
+; CHECK-NEXT:    v_mul_hi_u32 v5, v2, v5
+; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v4, v7
+; CHECK-NEXT:    v_mul_lo_u32 v7, v0, v4
+; CHECK-NEXT:    v_mul_lo_u32 v9, v2, v4
+; CHECK-NEXT:    v_mul_hi_u32 v10, v0, v4
+; CHECK-NEXT:    v_mul_hi_u32 v4, v2, v4
+; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v6, v7
+; CHECK-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v9, v5
+; CHECK-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v6, v8
+; CHECK-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v5, v10
+; CHECK-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v7, v6
+; CHECK-NEXT:    v_add_i32_e32 v7, vcc, v9, v8
+; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v5, v6
+; CHECK-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v7, v6
+; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v4, v6
+; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v5
+; CHECK-NEXT:    v_addc_u32_e64 v5, s[4:5], v2, v4, vcc
+; CHECK-NEXT:    v_add_i32_e64 v2, s[4:5], v2, v4
+; CHECK-NEXT:    v_mul_lo_u32 v4, s6, v0
+; CHECK-NEXT:    v_mul_lo_u32 v6, s7, v0
+; CHECK-NEXT:    v_mul_hi_u32 v7, s6, v0
+; CHECK-NEXT:    v_mul_lo_u32 v8, s6, v5
+; CHECK-NEXT:    v_mul_lo_u32 v9, v5, v4
+; CHECK-NEXT:    v_mul_hi_u32 v10, v0, v4
+; CHECK-NEXT:    v_mul_hi_u32 v4, v5, v4
+; CHECK-NEXT:    v_add_i32_e64 v6, s[4:5], v6, v8
+; CHECK-NEXT:    v_add_i32_e64 v6, s[4:5], v6, v7
+; CHECK-NEXT:    v_mul_lo_u32 v7, v0, v6
+; CHECK-NEXT:    v_mul_lo_u32 v8, v5, v6
+; CHECK-NEXT:    v_mul_hi_u32 v11, v0, v6
+; CHECK-NEXT:    v_mul_hi_u32 v5, v5, v6
+; CHECK-NEXT:    v_add_i32_e64 v6, s[4:5], v9, v7
+; CHECK-NEXT:    v_cndmask_b32_e64 v7, 0, 1, s[4:5]
+; CHECK-NEXT:    v_add_i32_e64 v4, s[4:5], v8, v4
+; CHECK-NEXT:    v_cndmask_b32_e64 v8, 0, 1, s[4:5]
+; CHECK-NEXT:    v_add_i32_e64 v6, s[4:5], v6, v10
+; CHECK-NEXT:    v_cndmask_b32_e64 v6, 0, 1, s[4:5]
+; CHECK-NEXT:    v_add_i32_e64 v4, s[4:5], v4, v11
+; CHECK-NEXT:    v_cndmask_b32_e64 v9, 0, 1, s[4:5]
+; CHECK-NEXT:    v_add_i32_e64 v6, s[4:5], v7, v6
+; CHECK-NEXT:    v_add_i32_e64 v7, s[4:5], v8, v9
+; CHECK-NEXT:    v_add_i32_e64 v4, s[4:5], v4, v6
+; CHECK-NEXT:    v_cndmask_b32_e64 v6, 0, 1, s[4:5]
+; CHECK-NEXT:    v_add_i32_e64 v6, s[4:5], v7, v6
+; CHECK-NEXT:    v_add_i32_e64 v5, s[4:5], v5, v6
+; CHECK-NEXT:    v_addc_u32_e32 v2, vcc, v2, v5, vcc
+; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v4
+; CHECK-NEXT:    v_addc_u32_e32 v2, vcc, 0, v2, vcc
+; CHECK-NEXT:    v_mul_lo_u32 v4, s1, v0
+; CHECK-NEXT:    v_mul_hi_u32 v5, s0, v0
+; CHECK-NEXT:    v_mul_hi_u32 v0, s1, v0
+; CHECK-NEXT:    v_mul_lo_u32 v6, s0, v2
+; CHECK-NEXT:    v_mul_lo_u32 v7, s1, v2
+; CHECK-NEXT:    v_mul_hi_u32 v8, s0, v2
+; CHECK-NEXT:    v_mul_hi_u32 v2, s1, v2
+; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v4, v6
+; CHECK-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v7, v0
+; CHECK-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v4, v5
+; CHECK-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v8
+; CHECK-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v6, v4
+; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v7, v5
+; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v4
+; CHECK-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v5, v4
+; CHECK-NEXT:    v_mul_lo_u32 v5, s2, v0
+; CHECK-NEXT:    v_mul_lo_u32 v6, s3, v0
+; CHECK-NEXT:    v_mul_hi_u32 v0, s2, v0
+; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v4
+; CHECK-NEXT:    v_mul_lo_u32 v2, s2, v2
+; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v6, v2
+; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v2, v0
+; CHECK-NEXT:    v_sub_i32_e32 v2, vcc, s0, v5
+; CHECK-NEXT:    v_subb_u32_e64 v3, s[4:5], v3, v0, vcc
+; CHECK-NEXT:    v_sub_i32_e64 v0, s[0:1], s1, v0
+; CHECK-NEXT:    v_cmp_le_u32_e64 s[0:1], s2, v2
+; CHECK-NEXT:    v_cndmask_b32_e64 v4, 0, -1, s[0:1]
+; CHECK-NEXT:    v_cmp_le_u32_e64 s[0:1], s3, v3
+; CHECK-NEXT:    v_cndmask_b32_e64 v5, 0, -1, s[0:1]
+; CHECK-NEXT:    v_subb_u32_e32 v0, vcc, v0, v1, vcc
+; CHECK-NEXT:    v_cmp_eq_u32_e32 vcc, s3, v3
+; CHECK-NEXT:    v_cndmask_b32_e32 v1, v5, v4, vcc
+; CHECK-NEXT:    v_subrev_i32_e32 v3, vcc, s2, v2
+; CHECK-NEXT:    v_subbrev_u32_e32 v0, vcc, 0, v0, vcc
+; CHECK-NEXT:    v_cmp_le_u32_e32 vcc, s2, v3
+; CHECK-NEXT:    v_cndmask_b32_e64 v4, 0, -1, vcc
+; CHECK-NEXT:    v_subrev_i32_e32 v5, vcc, s2, v3
+; CHECK-NEXT:    v_cmp_le_u32_e32 vcc, s3, v0
+; CHECK-NEXT:    v_cndmask_b32_e64 v6, 0, -1, vcc
+; CHECK-NEXT:    v_cmp_eq_u32_e32 vcc, s3, v0
+; CHECK-NEXT:    v_cndmask_b32_e32 v0, v6, v4, vcc
+; CHECK-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
+; CHECK-NEXT:    v_cndmask_b32_e32 v0, v3, v5, vcc
+; CHECK-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v1
+; CHECK-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
+; CHECK-NEXT:  BB1_3:
+; CHECK-NEXT:    v_readfirstlane_b32 s0, v0
+; CHECK-NEXT:    s_mov_b32 s1, s0
+; CHECK-NEXT:    ; return to shader part epilog
+  %result = urem i64 %num, %den
+  %cast = bitcast i64 %result to <2 x i32>
+  %elt.0 = extractelement <2 x i32> %cast, i32 0
+  %elt.1 = extractelement <2 x i32> %cast, i32 1
+  %res.0 = call i32 @llvm.amdgcn.readfirstlane(i32 %elt.0)
+  %res.1 = call i32 @llvm.amdgcn.readfirstlane(i32 %elt.1)
+  %ins.0 = insertelement <2 x i32> undef, i32 %res.0, i32 0
+  %ins.1 = insertelement <2 x i32> %ins.0, i32 %res.0, i32 1
+  %cast.back = bitcast <2 x i32> %ins.1 to i64
+  ret i64 %cast.back
+}
+
+define <2 x i64> @v_urem_v2i64(<2 x i64> %num, <2 x i64> %den) {
+; GISEL-LABEL: v_urem_v2i64:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-NEXT:    v_cvt_f32_u32_e32 v8, v4
+; GISEL-NEXT:    v_cvt_f32_u32_e32 v9, v5
+; GISEL-NEXT:    v_mac_f32_e32 v8, 0x4f800000, v9
+; GISEL-NEXT:    v_rcp_iflag_f32_e32 v8, v8
+; GISEL-NEXT:    v_mul_f32_e32 v8, 0x5f7ffffc, v8
+; GISEL-NEXT:    v_mul_f32_e32 v9, 0x2f800000, v8
+; GISEL-NEXT:    v_trunc_f32_e32 v9, v9
+; GISEL-NEXT:    v_mac_f32_e32 v8, 0xcf800000, v9
+; GISEL-NEXT:    v_cvt_u32_f32_e32 v8, v8
+; GISEL-NEXT:    v_cvt_u32_f32_e32 v9, v9
+; GISEL-NEXT:    v_sub_i32_e32 v10, vcc, 0, v4
+; GISEL-NEXT:    v_subb_u32_e32 v11, vcc, 0, v5, vcc
+; GISEL-NEXT:    v_mul_lo_u32 v12, v10, v8
+; GISEL-NEXT:    v_mul_lo_u32 v13, v11, v8
+; GISEL-NEXT:    v_mul_lo_u32 v14, v10, v9
+; GISEL-NEXT:    v_mul_hi_u32 v15, v10, v8
+; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v13, v14
+; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v13, v15
+; GISEL-NEXT:    v_mul_lo_u32 v14, v9, v12
+; GISEL-NEXT:    v_mul_lo_u32 v15, v8, v13
+; GISEL-NEXT:    v_mul_hi_u32 v16, v8, v12
+; GISEL-NEXT:    v_add_i32_e32 v14, vcc, v14, v15
+; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v14, vcc, v14, v16
+; GISEL-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v14, vcc, v15, v14
+; GISEL-NEXT:    v_mul_lo_u32 v15, v9, v13
+; GISEL-NEXT:    v_mul_hi_u32 v12, v9, v12
+; GISEL-NEXT:    v_mul_hi_u32 v16, v8, v13
+; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v15, v12
+; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v12, v16
+; GISEL-NEXT:    v_cndmask_b32_e64 v16, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v15, vcc, v15, v16
+; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v12, v14
+; GISEL-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v14, vcc, v15, v14
+; GISEL-NEXT:    v_mul_hi_u32 v13, v9, v13
+; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v13, v14
+; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v12
+; GISEL-NEXT:    v_addc_u32_e64 v12, s[4:5], v9, v13, vcc
+; GISEL-NEXT:    v_add_i32_e64 v9, s[4:5], v9, v13
+; GISEL-NEXT:    v_mul_lo_u32 v13, v10, v8
+; GISEL-NEXT:    v_mul_lo_u32 v11, v11, v8
+; GISEL-NEXT:    v_mul_lo_u32 v14, v10, v12
+; GISEL-NEXT:    v_mul_hi_u32 v10, v10, v8
+; GISEL-NEXT:    v_add_i32_e64 v11, s[4:5], v11, v14
+; GISEL-NEXT:    v_add_i32_e64 v10, s[4:5], v11, v10
+; GISEL-NEXT:    v_mul_lo_u32 v11, v12, v13
+; GISEL-NEXT:    v_mul_lo_u32 v14, v8, v10
+; GISEL-NEXT:    v_mul_hi_u32 v15, v8, v13
+; GISEL-NEXT:    v_add_i32_e64 v11, s[4:5], v11, v14
+; GISEL-NEXT:    v_cndmask_b32_e64 v14, 0, 1, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v11, s[4:5], v11, v15
+; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v11, s[4:5], v14, v11
+; GISEL-NEXT:    v_mul_lo_u32 v14, v12, v10
+; GISEL-NEXT:    v_mul_hi_u32 v13, v12, v13
+; GISEL-NEXT:    v_mul_hi_u32 v15, v8, v10
+; GISEL-NEXT:    v_add_i32_e64 v13, s[4:5], v14, v13
+; GISEL-NEXT:    v_cndmask_b32_e64 v14, 0, 1, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v13, s[4:5], v13, v15
+; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, 1, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v14, s[4:5], v14, v15
+; GISEL-NEXT:    v_add_i32_e64 v11, s[4:5], v13, v11
+; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, 1, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v13, s[4:5], v14, v13
+; GISEL-NEXT:    v_mul_hi_u32 v10, v12, v10
+; GISEL-NEXT:    v_add_i32_e64 v10, s[4:5], v10, v13
+; GISEL-NEXT:    v_add_i32_e64 v8, s[4:5], v8, v11
+; GISEL-NEXT:    v_addc_u32_e32 v9, vcc, v9, v10, vcc
+; GISEL-NEXT:    v_addc_u32_e64 v9, vcc, 0, v9, s[4:5]
+; GISEL-NEXT:    v_mul_lo_u32 v10, v1, v8
+; GISEL-NEXT:    v_mul_lo_u32 v11, v0, v9
+; GISEL-NEXT:    v_mul_hi_u32 v12, v0, v8
+; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v10, v11
+; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v10, v12
+; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v11, v10
+; GISEL-NEXT:    v_mul_lo_u32 v11, v1, v9
+; GISEL-NEXT:    v_mul_hi_u32 v8, v1, v8
+; GISEL-NEXT:    v_mul_hi_u32 v12, v0, v9
+; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v11, v8
+; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v12
+; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v11, v12
+; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v10
+; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v11, v10
+; GISEL-NEXT:    v_mul_hi_u32 v9, v1, v9
+; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v9, v10
+; GISEL-NEXT:    v_mul_lo_u32 v10, v4, v8
+; GISEL-NEXT:    v_mul_lo_u32 v11, v5, v8
+; GISEL-NEXT:    v_mul_lo_u32 v9, v4, v9
+; GISEL-NEXT:    v_mul_hi_u32 v8, v4, v8
+; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v11, v9
+; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v9, v8
+; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v10
+; GISEL-NEXT:    v_subb_u32_e64 v9, s[4:5], v1, v8, vcc
+; GISEL-NEXT:    v_sub_i32_e64 v1, s[4:5], v1, v8
+; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v9, v5
+; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, -1, s[4:5]
+; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v0, v4
+; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, -1, s[4:5]
+; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], v9, v5
+; GISEL-NEXT:    v_cndmask_b32_e64 v8, v8, v10, s[4:5]
+; GISEL-NEXT:    v_sub_i32_e64 v10, s[4:5], v0, v4
+; GISEL-NEXT:    v_subb_u32_e32 v1, vcc, v1, v5, vcc
+; GISEL-NEXT:    v_subbrev_u32_e64 v11, vcc, 0, v1, s[4:5]
+; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v11, v5
+; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, -1, vcc
+; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v10, v4
+; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, -1, vcc
+; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v11, v5
+; GISEL-NEXT:    v_cndmask_b32_e32 v12, v12, v13, vcc
+; GISEL-NEXT:    v_sub_i32_e32 v4, vcc, v10, v4
+; GISEL-NEXT:    v_subb_u32_e64 v1, s[4:5], v1, v5, s[4:5]
+; GISEL-NEXT:    v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
+; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v12
+; GISEL-NEXT:    v_cndmask_b32_e32 v4, v10, v4, vcc
+; GISEL-NEXT:    v_cndmask_b32_e32 v1, v11, v1, vcc
+; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v8
+; GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
+; GISEL-NEXT:    v_cndmask_b32_e32 v1, v9, v1, vcc
+; GISEL-NEXT:    v_cvt_f32_u32_e32 v4, v6
+; GISEL-NEXT:    v_cvt_f32_u32_e32 v5, v7
+; GISEL-NEXT:    v_mac_f32_e32 v4, 0x4f800000, v5
+; GISEL-NEXT:    v_rcp_iflag_f32_e32 v4, v4
+; GISEL-NEXT:    v_mul_f32_e32 v4, 0x5f7ffffc, v4
+; GISEL-NEXT:    v_mul_f32_e32 v5, 0x2f800000, v4
+; GISEL-NEXT:    v_trunc_f32_e32 v5, v5
+; GISEL-NEXT:    v_mac_f32_e32 v4, 0xcf800000, v5
+; GISEL-NEXT:    v_cvt_u32_f32_e32 v4, v4
+; GISEL-NEXT:    v_cvt_u32_f32_e32 v5, v5
+; GISEL-NEXT:    v_sub_i32_e32 v8, vcc, 0, v6
+; GISEL-NEXT:    v_subb_u32_e32 v9, vcc, 0, v7, vcc
+; GISEL-NEXT:    v_mul_lo_u32 v10, v8, v4
+; GISEL-NEXT:    v_mul_lo_u32 v11, v9, v4
+; GISEL-NEXT:    v_mul_lo_u32 v12, v8, v5
+; GISEL-NEXT:    v_mul_hi_u32 v13, v8, v4
+; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v11, v12
+; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v11, v13
+; GISEL-NEXT:    v_mul_lo_u32 v12, v5, v10
+; GISEL-NEXT:    v_mul_lo_u32 v13, v4, v11
+; GISEL-NEXT:    v_mul_hi_u32 v14, v4, v10
+; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v12, v13
+; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v12, v14
+; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v13, v12
+; GISEL-NEXT:    v_mul_lo_u32 v13, v5, v11
+; GISEL-NEXT:    v_mul_hi_u32 v10, v5, v10
+; GISEL-NEXT:    v_mul_hi_u32 v14, v4, v11
+; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v13, v10
+; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v10, v14
+; GISEL-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v13, v14
+; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v10, v12
+; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v13, v12
+; GISEL-NEXT:    v_mul_hi_u32 v11, v5, v11
+; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v11, v12
+; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v10
+; GISEL-NEXT:    v_addc_u32_e64 v10, s[4:5], v5, v11, vcc
+; GISEL-NEXT:    v_add_i32_e64 v5, s[4:5], v5, v11
+; GISEL-NEXT:    v_mul_lo_u32 v11, v8, v4
+; GISEL-NEXT:    v_mul_lo_u32 v9, v9, v4
+; GISEL-NEXT:    v_mul_lo_u32 v12, v8, v10
+; GISEL-NEXT:    v_mul_hi_u32 v8, v8, v4
+; GISEL-NEXT:    v_add_i32_e64 v9, s[4:5], v9, v12
+; GISEL-NEXT:    v_add_i32_e64 v8, s[4:5], v9, v8
+; GISEL-NEXT:    v_mul_lo_u32 v9, v10, v11
+; GISEL-NEXT:    v_mul_lo_u32 v12, v4, v8
+; GISEL-NEXT:    v_mul_hi_u32 v13, v4, v11
+; GISEL-NEXT:    v_add_i32_e64 v9, s[4:5], v9, v12
+; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v9, s[4:5], v9, v13
+; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, 1, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v9, s[4:5], v12, v9
+; GISEL-NEXT:    v_mul_lo_u32 v12, v10, v8
+; GISEL-NEXT:    v_mul_hi_u32 v11, v10, v11
+; GISEL-NEXT:    v_mul_hi_u32 v13, v4, v8
+; GISEL-NEXT:    v_add_i32_e64 v11, s[4:5], v12, v11
+; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v11, s[4:5], v11, v13
+; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, 1, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v12, s[4:5], v12, v13
+; GISEL-NEXT:    v_add_i32_e64 v9, s[4:5], v11, v9
+; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v11, s[4:5], v12, v11
+; GISEL-NEXT:    v_mul_hi_u32 v8, v10, v8
+; GISEL-NEXT:    v_add_i32_e64 v8, s[4:5], v8, v11
+; GISEL-NEXT:    v_add_i32_e64 v4, s[4:5], v4, v9
+; GISEL-NEXT:    v_addc_u32_e32 v5, vcc, v5, v8, vcc
+; GISEL-NEXT:    v_addc_u32_e64 v5, vcc, 0, v5, s[4:5]
+; GISEL-NEXT:    v_mul_lo_u32 v8, v3, v4
+; GISEL-NEXT:    v_mul_lo_u32 v9, v2, v5
+; GISEL-NEXT:    v_mul_hi_u32 v10, v2, v4
+; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v9
+; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v10
+; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v9, v8
+; GISEL-NEXT:    v_mul_lo_u32 v9, v3, v5
+; GISEL-NEXT:    v_mul_hi_u32 v4, v3, v4
+; GISEL-NEXT:    v_mul_hi_u32 v10, v2, v5
+; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v9, v4
+; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v10
+; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v9, v10
+; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v8
+; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v9, v8
+; GISEL-NEXT:    v_mul_hi_u32 v5, v3, v5
+; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v8
+; GISEL-NEXT:    v_mul_lo_u32 v8, v6, v4
+; GISEL-NEXT:    v_mul_lo_u32 v9, v7, v4
+; GISEL-NEXT:    v_mul_lo_u32 v5, v6, v5
+; GISEL-NEXT:    v_mul_hi_u32 v4, v6, v4
+; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v9, v5
+; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v5, v4
+; GISEL-NEXT:    v_sub_i32_e32 v2, vcc, v2, v8
+; GISEL-NEXT:    v_subb_u32_e64 v5, s[4:5], v3, v4, vcc
+; GISEL-NEXT:    v_sub_i32_e64 v3, s[4:5], v3, v4
+; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v5, v7
+; GISEL-NEXT:    v_cndmask_b32_e64 v4, 0, -1, s[4:5]
+; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v2, v6
+; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, -1, s[4:5]
+; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], v5, v7
+; GISEL-NEXT:    v_cndmask_b32_e64 v4, v4, v8, s[4:5]
+; GISEL-NEXT:    v_sub_i32_e64 v8, s[4:5], v2, v6
+; GISEL-NEXT:    v_subb_u32_e32 v3, vcc, v3, v7, vcc
+; GISEL-NEXT:    v_subbrev_u32_e64 v9, vcc, 0, v3, s[4:5]
+; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v9, v7
+; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, -1, vcc
+; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v8, v6
+; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, -1, vcc
+; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v9, v7
+; GISEL-NEXT:    v_cndmask_b32_e32 v10, v10, v11, vcc
+; GISEL-NEXT:    v_sub_i32_e32 v6, vcc, v8, v6
+; GISEL-NEXT:    v_subb_u32_e64 v3, s[4:5], v3, v7, s[4:5]
+; GISEL-NEXT:    v_subbrev_u32_e32 v3, vcc, 0, v3, vcc
+; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v10
+; GISEL-NEXT:    v_cndmask_b32_e32 v6, v8, v6, vcc
+; GISEL-NEXT:    v_cndmask_b32_e32 v3, v9, v3, vcc
+; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v4
+; GISEL-NEXT:    v_cndmask_b32_e32 v2, v2, v6, vcc
+; GISEL-NEXT:    v_cndmask_b32_e32 v3, v5, v3, vcc
+; GISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; CGP-LABEL: v_urem_v2i64:
+; CGP:       ; %bb.0:
+; CGP-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CGP-NEXT:    v_mov_b32_e32 v8, v0
+; CGP-NEXT:    v_mov_b32_e32 v9, v1
+; CGP-NEXT:    v_or_b32_e32 v1, v9, v5
+; CGP-NEXT:    v_mov_b32_e32 v0, 0
+; CGP-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[0:1]
+; CGP-NEXT:    v_cmp_ne_u32_e64 s[4:5], 0, 1
+; CGP-NEXT:    s_xor_b64 s[4:5], vcc, s[4:5]
+; CGP-NEXT:    ; implicit-def: $vgpr0_vgpr1
+; CGP-NEXT:    s_and_saveexec_b64 s[6:7], s[4:5]
+; CGP-NEXT:    s_xor_b64 s[6:7], exec, s[6:7]
+; CGP-NEXT:    s_cbranch_execnz BB2_2
+; CGP-NEXT:  ; %bb.1:
+; CGP-NEXT:    v_cvt_f32_u32_e32 v0, v4
+; CGP-NEXT:    v_cvt_f32_u32_e32 v1, v5
+; CGP-NEXT:    v_sub_i32_e32 v10, vcc, 0, v4
+; CGP-NEXT:    v_subb_u32_e32 v11, vcc, 0, v5, vcc
+; CGP-NEXT:    v_mac_f32_e32 v0, 0x4f800000, v1
+; CGP-NEXT:    v_rcp_iflag_f32_e32 v0, v0
+; CGP-NEXT:    v_mul_f32_e32 v0, 0x5f7ffffc, v0
+; CGP-NEXT:    v_mul_f32_e32 v1, 0x2f800000, v0
+; CGP-NEXT:    v_trunc_f32_e32 v1, v1
+; CGP-NEXT:    v_mac_f32_e32 v0, 0xcf800000, v1
+; CGP-NEXT:    v_cvt_u32_f32_e32 v1, v1
+; CGP-NEXT:    v_cvt_u32_f32_e32 v0, v0
+; CGP-NEXT:    v_mul_lo_u32 v12, v10, v1
+; CGP-NEXT:    v_mul_lo_u32 v13, v10, v0
+; CGP-NEXT:    v_mul_lo_u32 v14, v11, v0
+; CGP-NEXT:    v_mul_hi_u32 v15, v10, v0
+; CGP-NEXT:    v_add_i32_e32 v12, vcc, v14, v12
+; CGP-NEXT:    v_mul_lo_u32 v14, v1, v13
+; CGP-NEXT:    v_mul_hi_u32 v16, v0, v13
+; CGP-NEXT:    v_mul_hi_u32 v13, v1, v13
+; CGP-NEXT:    v_add_i32_e32 v12, vcc, v12, v15
+; CGP-NEXT:    v_mul_lo_u32 v15, v0, v12
+; CGP-NEXT:    v_mul_lo_u32 v17, v1, v12
+; CGP-NEXT:    v_mul_hi_u32 v18, v0, v12
+; CGP-NEXT:    v_mul_hi_u32 v12, v1, v12
+; CGP-NEXT:    v_add_i32_e32 v14, vcc, v14, v15
+; CGP-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v13, vcc, v17, v13
+; CGP-NEXT:    v_cndmask_b32_e64 v17, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v14, vcc, v14, v16
+; CGP-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v13, vcc, v13, v18
+; CGP-NEXT:    v_cndmask_b32_e64 v16, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v14, vcc, v15, v14
+; CGP-NEXT:    v_add_i32_e32 v15, vcc, v17, v16
+; CGP-NEXT:    v_add_i32_e32 v13, vcc, v13, v14
+; CGP-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v14, vcc, v15, v14
+; CGP-NEXT:    v_add_i32_e32 v12, vcc, v12, v14
+; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v13
+; CGP-NEXT:    v_addc_u32_e64 v13, s[4:5], v1, v12, vcc
+; CGP-NEXT:    v_add_i32_e64 v1, s[4:5], v1, v12
+; CGP-NEXT:    v_mul_lo_u32 v12, v10, v0
+; CGP-NEXT:    v_mul_lo_u32 v11, v11, v0
+; CGP-NEXT:    v_mul_hi_u32 v14, v10, v0
+; CGP-NEXT:    v_mul_lo_u32 v10, v10, v13
+; CGP-NEXT:    v_mul_lo_u32 v15, v13, v12
+; CGP-NEXT:    v_mul_hi_u32 v16, v0, v12
+; CGP-NEXT:    v_mul_hi_u32 v12, v13, v12
+; CGP-NEXT:    v_add_i32_e64 v10, s[4:5], v11, v10
+; CGP-NEXT:    v_add_i32_e64 v10, s[4:5], v10, v14
+; CGP-NEXT:    v_mul_lo_u32 v11, v0, v10
+; CGP-NEXT:    v_mul_lo_u32 v14, v13, v10
+; CGP-NEXT:    v_mul_hi_u32 v17, v0, v10
+; CGP-NEXT:    v_mul_hi_u32 v10, v13, v10
+; CGP-NEXT:    v_add_i32_e64 v11, s[4:5], v15, v11
+; CGP-NEXT:    v_cndmask_b32_e64 v13, 0, 1, s[4:5]
+; CGP-NEXT:    v_add_i32_e64 v12, s[4:5], v14, v12
+; CGP-NEXT:    v_cndmask_b32_e64 v14, 0, 1, s[4:5]
+; CGP-NEXT:    v_add_i32_e64 v11, s[4:5], v11, v16
+; CGP-NEXT:    v_cndmask_b32_e64 v11, 0, 1, s[4:5]
+; CGP-NEXT:    v_add_i32_e64 v12, s[4:5], v12, v17
+; CGP-NEXT:    v_cndmask_b32_e64 v15, 0, 1, s[4:5]
+; CGP-NEXT:    v_add_i32_e64 v11, s[4:5], v13, v11
+; CGP-NEXT:    v_add_i32_e64 v13, s[4:5], v14, v15
+; CGP-NEXT:    v_add_i32_e64 v11, s[4:5], v12, v11
+; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, 1, s[4:5]
+; CGP-NEXT:    v_add_i32_e64 v12, s[4:5], v13, v12
+; CGP-NEXT:    v_add_i32_e64 v10, s[4:5], v10, v12
+; CGP-NEXT:    v_addc_u32_e32 v1, vcc, v1, v10, vcc
+; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v11
+; CGP-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
+; CGP-NEXT:    v_mul_lo_u32 v10, v9, v0
+; CGP-NEXT:    v_mul_hi_u32 v11, v8, v0
+; CGP-NEXT:    v_mul_hi_u32 v0, v9, v0
+; CGP-NEXT:    v_mul_lo_u32 v12, v8, v1
+; CGP-NEXT:    v_mul_lo_u32 v13, v9, v1
+; CGP-NEXT:    v_mul_hi_u32 v14, v8, v1
+; CGP-NEXT:    v_mul_hi_u32 v1, v9, v1
+; CGP-NEXT:    v_add_i32_e32 v10, vcc, v10, v12
+; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v0, vcc, v13, v0
+; CGP-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v10, vcc, v10, v11
+; CGP-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v14
+; CGP-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v10, vcc, v12, v10
+; CGP-NEXT:    v_add_i32_e32 v11, vcc, v13, v11
+; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v10
+; CGP-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v10, vcc, v11, v10
+; CGP-NEXT:    v_mul_lo_u32 v11, v4, v0
+; CGP-NEXT:    v_mul_lo_u32 v12, v5, v0
+; CGP-NEXT:    v_mul_hi_u32 v0, v4, v0
+; CGP-NEXT:    v_add_i32_e32 v1, vcc, v1, v10
+; CGP-NEXT:    v_mul_lo_u32 v1, v4, v1
+; CGP-NEXT:    v_add_i32_e32 v1, vcc, v12, v1
+; CGP-NEXT:    v_add_i32_e32 v0, vcc, v1, v0
+; CGP-NEXT:    v_sub_i32_e32 v1, vcc, v8, v11
+; CGP-NEXT:    v_subb_u32_e64 v10, s[4:5], v9, v0, vcc
+; CGP-NEXT:    v_sub_i32_e64 v0, s[4:5], v9, v0
+; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v1, v4
+; CGP-NEXT:    v_cndmask_b32_e64 v9, 0, -1, s[4:5]
+; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v10, v5
+; CGP-NEXT:    v_cndmask_b32_e64 v11, 0, -1, s[4:5]
+; CGP-NEXT:    v_subb_u32_e32 v0, vcc, v0, v5, vcc
+; CGP-NEXT:    v_cmp_eq_u32_e32 vcc, v10, v5
+; CGP-NEXT:    v_cndmask_b32_e32 v9, v11, v9, vcc
+; CGP-NEXT:    v_sub_i32_e32 v11, vcc, v1, v4
+; CGP-NEXT:    v_subbrev_u32_e64 v12, s[4:5], 0, v0, vcc
+; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v11, v4
+; CGP-NEXT:    v_cndmask_b32_e64 v13, 0, -1, s[4:5]
+; CGP-NEXT:    v_subb_u32_e32 v0, vcc, v0, v5, vcc
+; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v12, v5
+; CGP-NEXT:    v_cndmask_b32_e64 v14, 0, -1, vcc
+; CGP-NEXT:    v_sub_i32_e32 v15, vcc, v11, v4
+; CGP-NEXT:    v_subbrev_u32_e32 v0, vcc, 0, v0, vcc
+; CGP-NEXT:    v_cmp_eq_u32_e32 vcc, v12, v5
+; CGP-NEXT:    v_cndmask_b32_e32 v5, v14, v13, vcc
+; CGP-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v5
+; CGP-NEXT:    v_cndmask_b32_e32 v5, v11, v15, vcc
+; CGP-NEXT:    v_cndmask_b32_e32 v11, v12, v0, vcc
+; CGP-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v9
+; CGP-NEXT:    v_cndmask_b32_e32 v0, v1, v5, vcc
+; CGP-NEXT:    v_cndmask_b32_e32 v1, v10, v11, vcc
+; CGP-NEXT:  BB2_2: ; %Flow1
+; CGP-NEXT:    s_or_saveexec_b64 s[8:9], s[6:7]
+; CGP-NEXT:    s_xor_b64 exec, exec, s[8:9]
+; CGP-NEXT:    s_cbranch_execz BB2_4
+; CGP-NEXT:  ; %bb.3:
+; CGP-NEXT:    v_cvt_f32_u32_e32 v0, v4
+; CGP-NEXT:    v_rcp_iflag_f32_e32 v0, v0
+; CGP-NEXT:    v_mul_f32_e32 v0, 0x4f800000, v0
+; CGP-NEXT:    v_cvt_u32_f32_e32 v0, v0
+; CGP-NEXT:    v_mul_lo_u32 v1, v0, v4
+; CGP-NEXT:    v_mul_hi_u32 v5, v0, v4
+; CGP-NEXT:    v_sub_i32_e32 v9, vcc, 0, v1
+; CGP-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v5
+; CGP-NEXT:    v_cndmask_b32_e32 v1, v1, v9, vcc
+; CGP-NEXT:    v_mul_hi_u32 v1, v1, v0
+; CGP-NEXT:    v_add_i32_e64 v5, s[4:5], v0, v1
+; CGP-NEXT:    v_sub_i32_e64 v0, s[4:5], v0, v1
+; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v5, vcc
+; CGP-NEXT:    v_mul_hi_u32 v0, v0, v8
+; CGP-NEXT:    v_mul_lo_u32 v0, v0, v4
+; CGP-NEXT:    v_sub_i32_e32 v1, vcc, v8, v0
+; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v4
+; CGP-NEXT:    v_add_i32_e64 v5, s[4:5], v1, v4
+; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v8, v0
+; CGP-NEXT:    v_sub_i32_e64 v0, s[6:7], v1, v4
+; CGP-NEXT:    s_and_b64 vcc, vcc, s[4:5]
+; CGP-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
+; CGP-NEXT:    v_cndmask_b32_e64 v0, v5, v0, s[4:5]
+; CGP-NEXT:    v_mov_b32_e32 v1, 0
+; CGP-NEXT:  BB2_4:
+; CGP-NEXT:    s_or_b64 exec, exec, s[8:9]
+; CGP-NEXT:    v_or_b32_e32 v5, v3, v7
+; CGP-NEXT:    v_mov_b32_e32 v4, 0
+; CGP-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[4:5]
+; CGP-NEXT:    v_cmp_ne_u32_e64 s[4:5], 0, 1
+; CGP-NEXT:    s_xor_b64 s[4:5], vcc, s[4:5]
+; CGP-NEXT:    ; implicit-def: $vgpr4_vgpr5
+; CGP-NEXT:    s_and_saveexec_b64 s[6:7], s[4:5]
+; CGP-NEXT:    s_xor_b64 s[6:7], exec, s[6:7]
+; CGP-NEXT:    s_cbranch_execnz BB2_6
+; CGP-NEXT:  ; %bb.5:
+; CGP-NEXT:    v_cvt_f32_u32_e32 v4, v6
+; CGP-NEXT:    v_cvt_f32_u32_e32 v5, v7
+; CGP-NEXT:    v_sub_i32_e32 v8, vcc, 0, v6
+; CGP-NEXT:    v_subb_u32_e32 v9, vcc, 0, v7, vcc
+; CGP-NEXT:    v_mac_f32_e32 v4, 0x4f800000, v5
+; CGP-NEXT:    v_rcp_iflag_f32_e32 v4, v4
+; CGP-NEXT:    v_mul_f32_e32 v4, 0x5f7ffffc, v4
+; CGP-NEXT:    v_mul_f32_e32 v5, 0x2f800000, v4
+; CGP-NEXT:    v_trunc_f32_e32 v5, v5
+; CGP-NEXT:    v_mac_f32_e32 v4, 0xcf800000, v5
+; CGP-NEXT:    v_cvt_u32_f32_e32 v5, v5
+; CGP-NEXT:    v_cvt_u32_f32_e32 v4, v4
+; CGP-NEXT:    v_mul_lo_u32 v10, v8, v5
+; CGP-NEXT:    v_mul_lo_u32 v11, v8, v4
+; CGP-NEXT:    v_mul_lo_u32 v12, v9, v4
+; CGP-NEXT:    v_mul_hi_u32 v13, v8, v4
+; CGP-NEXT:    v_add_i32_e32 v10, vcc, v12, v10
+; CGP-NEXT:    v_mul_lo_u32 v12, v5, v11
+; CGP-NEXT:    v_mul_hi_u32 v14, v4, v11
+; CGP-NEXT:    v_mul_hi_u32 v11, v5, v11
+; CGP-NEXT:    v_add_i32_e32 v10, vcc, v10, v13
+; CGP-NEXT:    v_mul_lo_u32 v13, v4, v10
+; CGP-NEXT:    v_mul_lo_u32 v15, v5, v10
+; CGP-NEXT:    v_mul_hi_u32 v16, v4, v10
+; CGP-NEXT:    v_mul_hi_u32 v10, v5, v10
+; CGP-NEXT:    v_add_i32_e32 v12, vcc, v12, v13
+; CGP-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v11, vcc, v15, v11
+; CGP-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v12, vcc, v12, v14
+; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v11, vcc, v11, v16
+; CGP-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v12, vcc, v13, v12
+; CGP-NEXT:    v_add_i32_e32 v13, vcc, v15, v14
+; CGP-NEXT:    v_add_i32_e32 v11, vcc, v11, v12
+; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v12, vcc, v13, v12
+; CGP-NEXT:    v_add_i32_e32 v10, vcc, v10, v12
+; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v11
+; CGP-NEXT:    v_addc_u32_e64 v11, s[4:5], v5, v10, vcc
+; CGP-NEXT:    v_add_i32_e64 v5, s[4:5], v5, v10
+; CGP-NEXT:    v_mul_lo_u32 v10, v8, v4
+; CGP-NEXT:    v_mul_lo_u32 v9, v9, v4
+; CGP-NEXT:    v_mul_hi_u32 v12, v8, v4
+; CGP-NEXT:    v_mul_lo_u32 v8, v8, v11
+; CGP-NEXT:    v_mul_lo_u32 v13, v11, v10
+; CGP-NEXT:    v_mul_hi_u32 v14, v4, v10
+; CGP-NEXT:    v_mul_hi_u32 v10, v11, v10
+; CGP-NEXT:    v_add_i32_e64 v8, s[4:5], v9, v8
+; CGP-NEXT:    v_add_i32_e64 v8, s[4:5], v8, v12
+; CGP-NEXT:    v_mul_lo_u32 v9, v4, v8
+; CGP-NEXT:    v_mul_lo_u32 v12, v11, v8
+; CGP-NEXT:    v_mul_hi_u32 v15, v4, v8
+; CGP-NEXT:    v_mul_hi_u32 v8, v11, v8
+; CGP-NEXT:    v_add_i32_e64 v9, s[4:5], v13, v9
+; CGP-NEXT:    v_cndmask_b32_e64 v11, 0, 1, s[4:5]
+; CGP-NEXT:    v_add_i32_e64 v10, s[4:5], v12, v10
+; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, 1, s[4:5]
+; CGP-NEXT:    v_add_i32_e64 v9, s[4:5], v9, v14
+; CGP-NEXT:    v_cndmask_b32_e64 v9, 0, 1, s[4:5]
+; CGP-NEXT:    v_add_i32_e64 v10, s[4:5], v10, v15
+; CGP-NEXT:    v_cndmask_b32_e64 v13, 0, 1, s[4:5]
+; CGP-NEXT:    v_add_i32_e64 v9, s[4:5], v11, v9
+; CGP-NEXT:    v_add_i32_e64 v11, s[4:5], v12, v13
+; CGP-NEXT:    v_add_i32_e64 v9, s[4:5], v10, v9
+; CGP-NEXT:    v_cndmask_b32_e64 v10, 0, 1, s[4:5]
+; CGP-NEXT:    v_add_i32_e64 v10, s[4:5], v11, v10
+; CGP-NEXT:    v_add_i32_e64 v8, s[4:5], v8, v10
+; CGP-NEXT:    v_addc_u32_e32 v5, vcc, v5, v8, vcc
+; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v9
+; CGP-NEXT:    v_addc_u32_e32 v5, vcc, 0, v5, vcc
+; CGP-NEXT:    v_mul_lo_u32 v8, v3, v4
+; CGP-NEXT:    v_mul_hi_u32 v9, v2, v4
+; CGP-NEXT:    v_mul_hi_u32 v4, v3, v4
+; CGP-NEXT:    v_mul_lo_u32 v10, v2, v5
+; CGP-NEXT:    v_mul_lo_u32 v11, v3, v5
+; CGP-NEXT:    v_mul_hi_u32 v12, v2, v5
+; CGP-NEXT:    v_mul_hi_u32 v5, v3, v5
+; CGP-NEXT:    v_add_i32_e32 v8, vcc, v8, v10
+; CGP-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v4, vcc, v11, v4
+; CGP-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v8, vcc, v8, v9
+; CGP-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v12
+; CGP-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v8, vcc, v10, v8
+; CGP-NEXT:    v_add_i32_e32 v9, vcc, v11, v9
+; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v8
+; CGP-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v8, vcc, v9, v8
+; CGP-NEXT:    v_mul_lo_u32 v9, v6, v4
+; CGP-NEXT:    v_mul_lo_u32 v10, v7, v4
+; CGP-NEXT:    v_mul_hi_u32 v4, v6, v4
+; CGP-NEXT:    v_add_i32_e32 v5, vcc, v5, v8
+; CGP-NEXT:    v_mul_lo_u32 v5, v6, v5
+; CGP-NEXT:    v_add_i32_e32 v5, vcc, v10, v5
+; CGP-NEXT:    v_add_i32_e32 v4, vcc, v5, v4
+; CGP-NEXT:    v_sub_i32_e32 v5, vcc, v2, v9
+; CGP-NEXT:    v_subb_u32_e64 v8, s[4:5], v3, v4, vcc
+; CGP-NEXT:    v_sub_i32_e64 v3, s[4:5], v3, v4
+; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v5, v6
+; CGP-NEXT:    v_cndmask_b32_e64 v4, 0, -1, s[4:5]
+; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v8, v7
+; CGP-NEXT:    v_cndmask_b32_e64 v9, 0, -1, s[4:5]
+; CGP-NEXT:    v_subb_u32_e32 v3, vcc, v3, v7, vcc
+; CGP-NEXT:    v_cmp_eq_u32_e32 vcc, v8, v7
+; CGP-NEXT:    v_cndmask_b32_e32 v4, v9, v4, vcc
+; CGP-NEXT:    v_sub_i32_e32 v9, vcc, v5, v6
+; CGP-NEXT:    v_subbrev_u32_e64 v10, s[4:5], 0, v3, vcc
+; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v9, v6
+; CGP-NEXT:    v_cndmask_b32_e64 v11, 0, -1, s[4:5]
+; CGP-NEXT:    v_subb_u32_e32 v3, vcc, v3, v7, vcc
+; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v10, v7
+; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, -1, vcc
+; CGP-NEXT:    v_sub_i32_e32 v13, vcc, v9, v6
+; CGP-NEXT:    v_subbrev_u32_e32 v3, vcc, 0, v3, vcc
+; CGP-NEXT:    v_cmp_eq_u32_e32 vcc, v10, v7
+; CGP-NEXT:    v_cndmask_b32_e32 v7, v12, v11, vcc
+; CGP-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v7
+; CGP-NEXT:    v_cndmask_b32_e32 v7, v9, v13, vcc
+; CGP-NEXT:    v_cndmask_b32_e32 v3, v10, v3, vcc
+; CGP-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v4
+; CGP-NEXT:    v_cndmask_b32_e32 v4, v5, v7, vcc
+; CGP-NEXT:    v_cndmask_b32_e32 v5, v8, v3, vcc
+; CGP-NEXT:  BB2_6: ; %Flow
+; CGP-NEXT:    s_or_saveexec_b64 s[8:9], s[6:7]
+; CGP-NEXT:    s_xor_b64 exec, exec, s[8:9]
+; CGP-NEXT:    s_cbranch_execz BB2_8
+; CGP-NEXT:  ; %bb.7:
+; CGP-NEXT:    v_cvt_f32_u32_e32 v3, v6
+; CGP-NEXT:    v_rcp_iflag_f32_e32 v3, v3
+; CGP-NEXT:    v_mul_f32_e32 v3, 0x4f800000, v3
+; CGP-NEXT:    v_cvt_u32_f32_e32 v3, v3
+; CGP-NEXT:    v_mul_lo_u32 v4, v3, v6
+; CGP-NEXT:    v_mul_hi_u32 v5, v3, v6
+; CGP-NEXT:    v_sub_i32_e32 v7, vcc, 0, v4
+; CGP-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v5
+; CGP-NEXT:    v_cndmask_b32_e32 v4, v4, v7, vcc
+; CGP-NEXT:    v_mul_hi_u32 v4, v4, v3
+; CGP-NEXT:    v_add_i32_e64 v5, s[4:5], v3, v4
+; CGP-NEXT:    v_sub_i32_e64 v3, s[4:5], v3, v4
+; CGP-NEXT:    v_cndmask_b32_e32 v3, v3, v5, vcc
+; CGP-NEXT:    v_mul_hi_u32 v3, v3, v2
+; CGP-NEXT:    v_mul_lo_u32 v3, v3, v6
+; CGP-NEXT:    v_sub_i32_e32 v4, vcc, v2, v3
+; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v4, v6
+; CGP-NEXT:    v_add_i32_e64 v5, s[4:5], v4, v6
+; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v2, v3
+; CGP-NEXT:    v_sub_i32_e64 v2, s[6:7], v4, v6
+; CGP-NEXT:    s_and_b64 vcc, vcc, s[4:5]
+; CGP-NEXT:    v_cndmask_b32_e32 v2, v4, v2, vcc
+; CGP-NEXT:    v_cndmask_b32_e64 v4, v5, v2, s[4:5]
+; CGP-NEXT:    v_mov_b32_e32 v5, 0
+; CGP-NEXT:  BB2_8:
+; CGP-NEXT:    s_or_b64 exec, exec, s[8:9]
+; CGP-NEXT:    v_mov_b32_e32 v2, v4
+; CGP-NEXT:    v_mov_b32_e32 v3, v5
+; CGP-NEXT:    s_setpc_b64 s[30:31]
+  %result = urem <2 x i64> %num, %den
+  ret <2 x i64> %result
+}
+
+define i64 @v_urem_i64_pow2k_denom(i64 %num) {
+; CHECK-LABEL: v_urem_i64_pow2k_denom:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT:    s_movk_i32 s6, 0x1000
+; CHECK-NEXT:    v_cvt_f32_u32_e32 v2, 0
+; CHECK-NEXT:    s_mov_b32 s7, 0xfffff000
+; CHECK-NEXT:    v_cvt_f32_u32_e32 v3, s6
+; CHECK-NEXT:    v_mac_f32_e32 v3, 0x4f800000, v2
+; CHECK-NEXT:    v_rcp_iflag_f32_e32 v2, v3
+; CHECK-NEXT:    v_mul_f32_e32 v2, 0x5f7ffffc, v2
+; CHECK-NEXT:    v_mul_f32_e32 v3, 0x2f800000, v2
+; CHECK-NEXT:    v_trunc_f32_e32 v3, v3
+; CHECK-NEXT:    v_mac_f32_e32 v2, 0xcf800000, v3
+; CHECK-NEXT:    v_cvt_u32_f32_e32 v3, v3
+; CHECK-NEXT:    v_cvt_u32_f32_e32 v2, v2
+; CHECK-NEXT:    v_mul_lo_u32 v4, s7, v3
+; CHECK-NEXT:    v_mul_lo_u32 v5, s7, v2
+; CHECK-NEXT:    v_mul_lo_u32 v6, -1, v2
+; CHECK-NEXT:    v_mul_hi_u32 v7, s7, v2
+; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v6, v4
+; CHECK-NEXT:    v_mul_lo_u32 v6, v3, v5
+; CHECK-NEXT:    v_mul_hi_u32 v8, v2, v5
+; CHECK-NEXT:    v_mul_hi_u32 v5, v3, v5
+; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v4, v7
+; CHECK-NEXT:    v_mul_lo_u32 v7, v2, v4
+; CHECK-NEXT:    v_mul_lo_u32 v9, v3, v4
+; CHECK-NEXT:    v_mul_hi_u32 v10, v2, v4
+; CHECK-NEXT:    v_mul_hi_u32 v4, v3, v4
+; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v6, v7
+; CHECK-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v9, v5
+; CHECK-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v6, v8
+; CHECK-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v5, v10
+; CHECK-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v7, v6
+; CHECK-NEXT:    v_add_i32_e32 v7, vcc, v9, v8
+; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v5, v6
+; CHECK-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v7, v6
+; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v4, v6
+; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v5
+; CHECK-NEXT:    v_addc_u32_e64 v5, s[4:5], v3, v4, vcc
+; CHECK-NEXT:    v_add_i32_e64 v3, s[4:5], v3, v4
+; CHECK-NEXT:    v_mul_lo_u32 v4, s7, v2
+; CHECK-NEXT:    v_mul_lo_u32 v6, -1, v2
+; CHECK-NEXT:    v_mul_hi_u32 v7, s7, v2
+; CHECK-NEXT:    v_mul_lo_u32 v8, s7, v5
+; CHECK-NEXT:    v_mul_lo_u32 v9, v5, v4
+; CHECK-NEXT:    v_mul_hi_u32 v10, v2, v4
+; CHECK-NEXT:    v_mul_hi_u32 v4, v5, v4
+; CHECK-NEXT:    v_add_i32_e64 v6, s[4:5], v6, v8
+; CHECK-NEXT:    v_add_i32_e64 v6, s[4:5], v6, v7
+; CHECK-NEXT:    v_mul_lo_u32 v7, v2, v6
+; CHECK-NEXT:    v_mul_lo_u32 v8, v5, v6
+; CHECK-NEXT:    v_mul_hi_u32 v11, v2, v6
+; CHECK-NEXT:    v_mul_hi_u32 v5, v5, v6
+; CHECK-NEXT:    v_add_i32_e64 v6, s[4:5], v9, v7
+; CHECK-NEXT:    v_cndmask_b32_e64 v7, 0, 1, s[4:5]
+; CHECK-NEXT:    v_add_i32_e64 v4, s[4:5], v8, v4
+; CHECK-NEXT:    v_cndmask_b32_e64 v8, 0, 1, s[4:5]
+; CHECK-NEXT:    v_add_i32_e64 v6, s[4:5], v6, v10
+; CHECK-NEXT:    v_cndmask_b32_e64 v6, 0, 1, s[4:5]
+; CHECK-NEXT:    v_add_i32_e64 v4, s[4:5], v4, v11
+; CHECK-NEXT:    v_cndmask_b32_e64 v9, 0, 1, s[4:5]
+; CHECK-NEXT:    v_add_i32_e64 v6, s[4:5], v7, v6
+; CHECK-NEXT:    v_add_i32_e64 v7, s[4:5], v8, v9
+; CHECK-NEXT:    v_add_i32_e64 v4, s[4:5], v4, v6
+; CHECK-NEXT:    v_cndmask_b32_e64 v6, 0, 1, s[4:5]
+; CHECK-NEXT:    v_add_i32_e64 v6, s[4:5], v7, v6
+; CHECK-NEXT:    v_add_i32_e64 v5, s[4:5], v5, v6
+; CHECK-NEXT:    v_addc_u32_e32 v3, vcc, v3, v5, vcc
+; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v4
+; CHECK-NEXT:    v_addc_u32_e32 v3, vcc, 0, v3, vcc
+; CHECK-NEXT:    v_mul_lo_u32 v4, v1, v2
+; CHECK-NEXT:    v_mul_hi_u32 v5, v0, v2
+; CHECK-NEXT:    v_mul_hi_u32 v2, v1, v2
+; CHECK-NEXT:    v_mul_lo_u32 v6, v0, v3
+; CHECK-NEXT:    v_mul_lo_u32 v7, v1, v3
+; CHECK-NEXT:    v_mul_hi_u32 v8, v0, v3
+; CHECK-NEXT:    v_mul_hi_u32 v3, v1, v3
+; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v4, v6
+; CHECK-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v7, v2
+; CHECK-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v4, v5
+; CHECK-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v8
+; CHECK-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v6, v4
+; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v7, v5
+; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v4
+; CHECK-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v5, v4
+; CHECK-NEXT:    v_mul_lo_u32 v5, s6, v2
+; CHECK-NEXT:    v_mul_lo_u32 v6, 0, v2
+; CHECK-NEXT:    v_mul_hi_u32 v2, s6, v2
+; CHECK-NEXT:    v_add_i32_e32 v3, vcc, v3, v4
+; CHECK-NEXT:    v_mul_lo_u32 v3, s6, v3
+; CHECK-NEXT:    v_add_i32_e32 v3, vcc, v6, v3
+; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v3, v2
+; CHECK-NEXT:    v_sub_i32_e32 v0, vcc, v0, v5
+; CHECK-NEXT:    v_subb_u32_e64 v3, s[4:5], v1, v2, vcc
+; CHECK-NEXT:    v_sub_i32_e64 v1, s[4:5], v1, v2
+; CHECK-NEXT:    v_cmp_le_u32_e64 s[4:5], s6, v0
+; CHECK-NEXT:    v_cndmask_b32_e64 v2, 0, -1, s[4:5]
+; CHECK-NEXT:    v_cmp_le_u32_e64 s[4:5], 0, v3
+; CHECK-NEXT:    v_cndmask_b32_e64 v4, 0, -1, s[4:5]
+; CHECK-NEXT:    v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
+; CHECK-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v3
+; CHECK-NEXT:    v_cndmask_b32_e32 v2, v4, v2, vcc
+; CHECK-NEXT:    v_subrev_i32_e32 v4, vcc, s6, v0
+; CHECK-NEXT:    v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
+; CHECK-NEXT:    v_cmp_le_u32_e32 vcc, s6, v4
+; CHECK-NEXT:    v_cndmask_b32_e64 v5, 0, -1, vcc
+; CHECK-NEXT:    v_cmp_le_u32_e32 vcc, 0, v1
+; CHECK-NEXT:    v_cndmask_b32_e64 v6, 0, -1, vcc
+; CHECK-NEXT:    v_subrev_i32_e32 v7, vcc, s6, v4
+; CHECK-NEXT:    v_subbrev_u32_e32 v8, vcc, 0, v1, vcc
+; CHECK-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v1
+; CHECK-NEXT:    v_cndmask_b32_e32 v5, v6, v5, vcc
+; CHECK-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v5
+; CHECK-NEXT:    v_cndmask_b32_e32 v4, v4, v7, vcc
+; CHECK-NEXT:    v_cndmask_b32_e32 v1, v1, v8, vcc
+; CHECK-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v2
+; CHECK-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
+; CHECK-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
+; CHECK-NEXT:    s_setpc_b64 s[30:31]
+  %result = urem i64 %num, 4096
+  ret i64 %result
+}
+
+define <2 x i64> @v_urem_v2i64_pow2k_denom(<2 x i64> %num) {
+; GISEL-LABEL: v_urem_v2i64_pow2k_denom:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-NEXT:    s_movk_i32 s10, 0x1000
+; GISEL-NEXT:    v_cvt_f32_u32_e32 v4, s10
+; GISEL-NEXT:    s_sub_u32 s8, 0, s10
+; GISEL-NEXT:    s_cselect_b32 s4, 1, 0
+; GISEL-NEXT:    v_cvt_f32_u32_e32 v5, 0
+; GISEL-NEXT:    v_mov_b32_e32 v6, v4
+; GISEL-NEXT:    s_and_b32 s4, s4, 1
+; GISEL-NEXT:    v_mac_f32_e32 v4, 0x4f800000, v5
+; GISEL-NEXT:    v_mac_f32_e32 v6, 0x4f800000, v5
+; GISEL-NEXT:    v_rcp_iflag_f32_e32 v4, v4
+; GISEL-NEXT:    v_rcp_iflag_f32_e32 v5, v6
+; GISEL-NEXT:    s_cmp_lg_u32 s4, 0
+; GISEL-NEXT:    s_subb_u32 s9, 0, 0
+; GISEL-NEXT:    v_mul_f32_e32 v4, 0x5f7ffffc, v4
+; GISEL-NEXT:    v_mul_f32_e32 v5, 0x5f7ffffc, v5
+; GISEL-NEXT:    v_mul_f32_e32 v6, 0x2f800000, v4
+; GISEL-NEXT:    s_sub_u32 s11, 0, s10
+; GISEL-NEXT:    s_cselect_b32 s4, 1, 0
+; GISEL-NEXT:    v_mul_f32_e32 v7, 0x2f800000, v5
+; GISEL-NEXT:    v_trunc_f32_e32 v6, v6
+; GISEL-NEXT:    s_and_b32 s4, s4, 1
+; GISEL-NEXT:    v_trunc_f32_e32 v7, v7
+; GISEL-NEXT:    v_mac_f32_e32 v4, 0xcf800000, v6
+; GISEL-NEXT:    v_cvt_u32_f32_e32 v6, v6
+; GISEL-NEXT:    v_mac_f32_e32 v5, 0xcf800000, v7
+; GISEL-NEXT:    v_cvt_u32_f32_e32 v7, v7
+; GISEL-NEXT:    v_cvt_u32_f32_e32 v4, v4
+; GISEL-NEXT:    s_cmp_lg_u32 s4, 0
+; GISEL-NEXT:    s_subb_u32 s6, 0, 0
+; GISEL-NEXT:    v_mul_lo_u32 v8, s11, v6
+; GISEL-NEXT:    v_cvt_u32_f32_e32 v5, v5
+; GISEL-NEXT:    v_mul_lo_u32 v9, s8, v7
+; GISEL-NEXT:    v_mul_lo_u32 v10, s11, v4
+; GISEL-NEXT:    v_mul_lo_u32 v11, s6, v4
+; GISEL-NEXT:    v_mul_hi_u32 v12, s11, v4
+; GISEL-NEXT:    v_mul_lo_u32 v13, s8, v5
+; GISEL-NEXT:    v_mul_lo_u32 v14, s9, v5
+; GISEL-NEXT:    v_mul_hi_u32 v15, s8, v5
+; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v11, v8
+; GISEL-NEXT:    v_mul_lo_u32 v11, v6, v10
+; GISEL-NEXT:    v_mul_hi_u32 v16, v4, v10
+; GISEL-NEXT:    v_mul_hi_u32 v10, v6, v10
+; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v14, v9
+; GISEL-NEXT:    v_mul_lo_u32 v14, v7, v13
+; GISEL-NEXT:    v_mul_hi_u32 v17, v5, v13
+; GISEL-NEXT:    v_mul_hi_u32 v13, v7, v13
+; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v12
+; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v9, v15
+; GISEL-NEXT:    v_mul_lo_u32 v12, v4, v8
+; GISEL-NEXT:    v_mul_lo_u32 v15, v6, v8
+; GISEL-NEXT:    v_mul_hi_u32 v18, v4, v8
+; GISEL-NEXT:    v_mul_hi_u32 v8, v6, v8
+; GISEL-NEXT:    v_mul_lo_u32 v19, v5, v9
+; GISEL-NEXT:    v_add_i32_e32 v14, vcc, v14, v19
+; GISEL-NEXT:    v_cndmask_b32_e64 v19, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v14, vcc, v14, v17
+; GISEL-NEXT:    v_mul_lo_u32 v14, v7, v9
+; GISEL-NEXT:    v_mul_hi_u32 v17, v5, v9
+; GISEL-NEXT:    v_mul_hi_u32 v9, v7, v9
+; GISEL-NEXT:    v_add_i32_e64 v11, s[4:5], v11, v12
+; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v10, s[4:5], v15, v10
+; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, 1, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v13, s[4:5], v14, v13
+; GISEL-NEXT:    v_cndmask_b32_e64 v14, 0, 1, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v11, s[4:5], v11, v16
+; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v10, s[4:5], v10, v18
+; GISEL-NEXT:    v_cndmask_b32_e64 v16, 0, 1, s[4:5]
+; GISEL-NEXT:    v_cndmask_b32_e64 v18, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v13, v17
+; GISEL-NEXT:    v_cndmask_b32_e64 v17, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v12, v11
+; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v15, v16
+; GISEL-NEXT:    v_add_i32_e32 v15, vcc, v19, v18
+; GISEL-NEXT:    v_add_i32_e32 v14, vcc, v14, v17
+; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v10, v11
+; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v13, v15
+; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v12, v11
+; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v14, v15
+; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v11
+; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v9, v12
+; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v10
+; GISEL-NEXT:    v_addc_u32_e64 v10, s[4:5], v6, v8, vcc
+; GISEL-NEXT:    v_add_i32_e64 v6, s[4:5], v6, v8
+; GISEL-NEXT:    v_mul_lo_u32 v8, s11, v4
+; GISEL-NEXT:    v_mul_lo_u32 v11, s6, v4
+; GISEL-NEXT:    v_mul_hi_u32 v12, s11, v4
+; GISEL-NEXT:    v_add_i32_e64 v5, s[4:5], v5, v13
+; GISEL-NEXT:    v_addc_u32_e64 v13, s[6:7], v7, v9, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v7, s[6:7], v7, v9
+; GISEL-NEXT:    v_mul_lo_u32 v9, s8, v5
+; GISEL-NEXT:    v_mul_lo_u32 v14, s9, v5
+; GISEL-NEXT:    v_mul_hi_u32 v15, s8, v5
+; GISEL-NEXT:    v_mul_lo_u32 v16, s11, v10
+; GISEL-NEXT:    v_mul_lo_u32 v17, v10, v8
+; GISEL-NEXT:    v_mul_hi_u32 v18, v4, v8
+; GISEL-NEXT:    v_mul_hi_u32 v8, v10, v8
+; GISEL-NEXT:    v_mul_lo_u32 v19, s8, v13
+; GISEL-NEXT:    v_add_i32_e64 v11, s[6:7], v11, v16
+; GISEL-NEXT:    v_mul_lo_u32 v16, v13, v9
+; GISEL-NEXT:    v_add_i32_e64 v14, s[6:7], v14, v19
+; GISEL-NEXT:    v_mul_hi_u32 v19, v5, v9
+; GISEL-NEXT:    v_mul_hi_u32 v9, v13, v9
+; GISEL-NEXT:    v_add_i32_e64 v11, s[6:7], v11, v12
+; GISEL-NEXT:    v_add_i32_e64 v12, s[6:7], v14, v15
+; GISEL-NEXT:    v_mul_lo_u32 v14, v4, v11
+; GISEL-NEXT:    v_mul_lo_u32 v15, v5, v12
+; GISEL-NEXT:    v_add_i32_e64 v15, s[6:7], v16, v15
+; GISEL-NEXT:    v_cndmask_b32_e64 v16, 0, 1, s[6:7]
+; GISEL-NEXT:    v_add_i32_e64 v15, s[6:7], v15, v19
+; GISEL-NEXT:    v_mul_lo_u32 v15, v10, v11
+; GISEL-NEXT:    v_mul_hi_u32 v19, v4, v11
+; GISEL-NEXT:    v_mul_hi_u32 v10, v10, v11
+; GISEL-NEXT:    v_mul_lo_u32 v11, v13, v12
+; GISEL-NEXT:    v_mul_hi_u32 v13, v13, v12
+; GISEL-NEXT:    v_mul_hi_u32 v12, v5, v12
+; GISEL-NEXT:    v_add_i32_e64 v14, s[8:9], v17, v14
+; GISEL-NEXT:    v_cndmask_b32_e64 v17, 0, 1, s[8:9]
+; GISEL-NEXT:    v_add_i32_e64 v8, s[8:9], v15, v8
+; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, 1, s[8:9]
+; GISEL-NEXT:    v_add_i32_e64 v9, s[8:9], v11, v9
+; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, s[8:9]
+; GISEL-NEXT:    v_add_i32_e64 v14, s[8:9], v14, v18
+; GISEL-NEXT:    v_cndmask_b32_e64 v14, 0, 1, s[8:9]
+; GISEL-NEXT:    v_add_i32_e64 v8, s[8:9], v8, v19
+; GISEL-NEXT:    v_cndmask_b32_e64 v18, 0, 1, s[8:9]
+; GISEL-NEXT:    v_cndmask_b32_e64 v19, 0, 1, s[6:7]
+; GISEL-NEXT:    v_add_i32_e64 v9, s[6:7], v9, v12
+; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, s[6:7]
+; GISEL-NEXT:    v_add_i32_e64 v14, s[6:7], v17, v14
+; GISEL-NEXT:    v_add_i32_e64 v15, s[6:7], v15, v18
+; GISEL-NEXT:    v_add_i32_e64 v16, s[6:7], v16, v19
+; GISEL-NEXT:    v_add_i32_e64 v11, s[6:7], v11, v12
+; GISEL-NEXT:    v_add_i32_e64 v8, s[6:7], v8, v14
+; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, s[6:7]
+; GISEL-NEXT:    v_add_i32_e64 v9, s[6:7], v9, v16
+; GISEL-NEXT:    v_cndmask_b32_e64 v14, 0, 1, s[6:7]
+; GISEL-NEXT:    v_add_i32_e64 v12, s[6:7], v15, v12
+; GISEL-NEXT:    v_add_i32_e64 v11, s[6:7], v11, v14
+; GISEL-NEXT:    v_add_i32_e64 v10, s[6:7], v10, v12
+; GISEL-NEXT:    v_add_i32_e64 v11, s[6:7], v13, v11
+; GISEL-NEXT:    v_addc_u32_e32 v6, vcc, v6, v10, vcc
+; GISEL-NEXT:    v_addc_u32_e64 v7, vcc, v7, v11, s[4:5]
+; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v8
+; GISEL-NEXT:    v_addc_u32_e32 v6, vcc, 0, v6, vcc
+; GISEL-NEXT:    v_mul_lo_u32 v8, v3, v4
+; GISEL-NEXT:    v_mul_hi_u32 v10, v2, v4
+; GISEL-NEXT:    v_mul_hi_u32 v4, v3, v4
+; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v9
+; GISEL-NEXT:    v_addc_u32_e32 v7, vcc, 0, v7, vcc
+; GISEL-NEXT:    v_mul_lo_u32 v9, v1, v5
+; GISEL-NEXT:    v_mul_hi_u32 v11, v0, v5
+; GISEL-NEXT:    v_mul_hi_u32 v5, v1, v5
+; GISEL-NEXT:    v_mul_lo_u32 v12, v2, v6
+; GISEL-NEXT:    v_mul_lo_u32 v13, v3, v6
+; GISEL-NEXT:    v_mul_hi_u32 v14, v2, v6
+; GISEL-NEXT:    v_mul_hi_u32 v6, v3, v6
+; GISEL-NEXT:    v_mul_lo_u32 v15, v0, v7
+; GISEL-NEXT:    v_mul_lo_u32 v16, v1, v7
+; GISEL-NEXT:    v_mul_hi_u32 v17, v0, v7
+; GISEL-NEXT:    v_mul_hi_u32 v7, v1, v7
+; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v12
+; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v13, v4
+; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v9, v15
+; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v16, v5
+; GISEL-NEXT:    v_cndmask_b32_e64 v16, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v10
+; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v14
+; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v9, v11
+; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v17
+; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v12, v8
+; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v13, v10
+; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v15, v9
+; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v16, v11
+; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v8
+; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v9
+; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v10, v8
+; GISEL-NEXT:    v_mul_lo_u32 v10, s10, v4
+; GISEL-NEXT:    v_mul_lo_u32 v12, 0, v4
+; GISEL-NEXT:    v_mul_hi_u32 v4, s10, v4
+; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v11, v9
+; GISEL-NEXT:    v_mul_lo_u32 v11, s10, v5
+; GISEL-NEXT:    v_mul_lo_u32 v13, 0, v5
+; GISEL-NEXT:    v_mul_hi_u32 v5, s10, v5
+; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v6, v8
+; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v7, v9
+; GISEL-NEXT:    v_mul_lo_u32 v6, s10, v6
+; GISEL-NEXT:    v_mul_lo_u32 v7, s10, v7
+; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v12, v6
+; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v13, v7
+; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v6, v4
+; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v7, v5
+; GISEL-NEXT:    v_sub_i32_e32 v2, vcc, v2, v10
+; GISEL-NEXT:    v_subb_u32_e64 v6, s[4:5], v3, v4, vcc
+; GISEL-NEXT:    v_sub_i32_e64 v3, s[4:5], v3, v4
+; GISEL-NEXT:    v_cmp_le_u32_e64 s[4:5], s10, v2
+; GISEL-NEXT:    v_cndmask_b32_e64 v4, 0, -1, s[4:5]
+; GISEL-NEXT:    v_sub_i32_e64 v0, s[4:5], v0, v11
+; GISEL-NEXT:    v_subb_u32_e64 v7, s[6:7], v1, v5, s[4:5]
+; GISEL-NEXT:    v_sub_i32_e64 v1, s[6:7], v1, v5
+; GISEL-NEXT:    v_cmp_le_u32_e64 s[6:7], s10, v0
+; GISEL-NEXT:    v_cndmask_b32_e64 v5, 0, -1, s[6:7]
+; GISEL-NEXT:    v_cmp_le_u32_e64 s[6:7], 0, v6
+; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, -1, s[6:7]
+; GISEL-NEXT:    v_subbrev_u32_e32 v3, vcc, 0, v3, vcc
+; GISEL-NEXT:    v_cmp_le_u32_e32 vcc, 0, v7
+; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, -1, vcc
+; GISEL-NEXT:    v_subbrev_u32_e64 v1, vcc, 0, v1, s[4:5]
+; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v6
+; GISEL-NEXT:    v_cndmask_b32_e32 v4, v8, v4, vcc
+; GISEL-NEXT:    v_subrev_i32_e32 v8, vcc, s10, v2
+; GISEL-NEXT:    v_subbrev_u32_e32 v3, vcc, 0, v3, vcc
+; GISEL-NEXT:    v_cmp_le_u32_e32 vcc, s10, v8
+; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, -1, vcc
+; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v7
+; GISEL-NEXT:    v_cndmask_b32_e32 v5, v9, v5, vcc
+; GISEL-NEXT:    v_subrev_i32_e32 v9, vcc, s10, v0
+; GISEL-NEXT:    v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
+; GISEL-NEXT:    v_cmp_le_u32_e32 vcc, s10, v9
+; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, -1, vcc
+; GISEL-NEXT:    v_cmp_le_u32_e32 vcc, 0, v3
+; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, -1, vcc
+; GISEL-NEXT:    v_subrev_i32_e32 v13, vcc, s10, v8
+; GISEL-NEXT:    v_subbrev_u32_e32 v14, vcc, 0, v3, vcc
+; GISEL-NEXT:    v_cmp_le_u32_e32 vcc, 0, v1
+; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, -1, vcc
+; GISEL-NEXT:    v_subrev_i32_e32 v16, vcc, s10, v9
+; GISEL-NEXT:    v_subbrev_u32_e32 v17, vcc, 0, v1, vcc
+; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v3
+; GISEL-NEXT:    v_cndmask_b32_e32 v10, v12, v10, vcc
+; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v1
+; GISEL-NEXT:    v_cndmask_b32_e32 v11, v15, v11, vcc
+; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v10
+; GISEL-NEXT:    v_cndmask_b32_e32 v8, v8, v13, vcc
+; GISEL-NEXT:    v_cmp_ne_u32_e64 s[4:5], 0, v11
+; GISEL-NEXT:    v_cndmask_b32_e64 v9, v9, v16, s[4:5]
+; GISEL-NEXT:    v_cndmask_b32_e32 v3, v3, v14, vcc
+; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v4
+; GISEL-NEXT:    v_cndmask_b32_e32 v2, v2, v8, vcc
+; GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, v17, s[4:5]
+; GISEL-NEXT:    v_cmp_ne_u32_e64 s[4:5], 0, v5
+; GISEL-NEXT:    v_cndmask_b32_e64 v0, v0, v9, s[4:5]
+; GISEL-NEXT:    v_cndmask_b32_e64 v1, v7, v1, s[4:5]
+; GISEL-NEXT:    v_cndmask_b32_e32 v3, v6, v3, vcc
+; GISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; CGP-LABEL: v_urem_v2i64_pow2k_denom:
+; CGP:       ; %bb.0:
+; CGP-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CGP-NEXT:    s_movk_i32 s10, 0x1000
+; CGP-NEXT:    v_cvt_f32_u32_e32 v4, 0
+; CGP-NEXT:    v_cvt_f32_u32_e32 v5, s10
+; CGP-NEXT:    s_mov_b32 s8, 0xfffff000
+; CGP-NEXT:    v_mov_b32_e32 v6, v5
+; CGP-NEXT:    v_mac_f32_e32 v5, 0x4f800000, v4
+; CGP-NEXT:    v_mac_f32_e32 v6, 0x4f800000, v4
+; CGP-NEXT:    v_rcp_iflag_f32_e32 v4, v5
+; CGP-NEXT:    v_rcp_iflag_f32_e32 v5, v6
+; CGP-NEXT:    v_mul_f32_e32 v4, 0x5f7ffffc, v4
+; CGP-NEXT:    v_mul_f32_e32 v5, 0x5f7ffffc, v5
+; CGP-NEXT:    v_mul_f32_e32 v6, 0x2f800000, v4
+; CGP-NEXT:    v_mul_f32_e32 v7, 0x2f800000, v5
+; CGP-NEXT:    v_trunc_f32_e32 v6, v6
+; CGP-NEXT:    v_trunc_f32_e32 v7, v7
+; CGP-NEXT:    v_mac_f32_e32 v4, 0xcf800000, v6
+; CGP-NEXT:    v_cvt_u32_f32_e32 v6, v6
+; CGP-NEXT:    v_mac_f32_e32 v5, 0xcf800000, v7
+; CGP-NEXT:    v_cvt_u32_f32_e32 v7, v7
+; CGP-NEXT:    v_cvt_u32_f32_e32 v4, v4
+; CGP-NEXT:    v_mul_lo_u32 v8, s8, v6
+; CGP-NEXT:    v_cvt_u32_f32_e32 v5, v5
+; CGP-NEXT:    v_mul_lo_u32 v9, s8, v7
+; CGP-NEXT:    v_mul_lo_u32 v10, s8, v4
+; CGP-NEXT:    v_mul_lo_u32 v11, -1, v4
+; CGP-NEXT:    v_mul_hi_u32 v12, s8, v4
+; CGP-NEXT:    v_mul_lo_u32 v13, s8, v5
+; CGP-NEXT:    v_mul_lo_u32 v14, -1, v5
+; CGP-NEXT:    v_mul_hi_u32 v15, s8, v5
+; CGP-NEXT:    v_add_i32_e32 v8, vcc, v11, v8
+; CGP-NEXT:    v_mul_lo_u32 v11, v6, v10
+; CGP-NEXT:    v_mul_hi_u32 v16, v4, v10
+; CGP-NEXT:    v_mul_hi_u32 v10, v6, v10
+; CGP-NEXT:    v_add_i32_e32 v9, vcc, v14, v9
+; CGP-NEXT:    v_mul_lo_u32 v14, v7, v13
+; CGP-NEXT:    v_mul_hi_u32 v17, v5, v13
+; CGP-NEXT:    v_mul_hi_u32 v13, v7, v13
+; CGP-NEXT:    v_add_i32_e32 v8, vcc, v8, v12
+; CGP-NEXT:    v_add_i32_e32 v9, vcc, v9, v15
+; CGP-NEXT:    v_mul_lo_u32 v12, v4, v8
+; CGP-NEXT:    v_mul_lo_u32 v15, v6, v8
+; CGP-NEXT:    v_mul_hi_u32 v18, v4, v8
+; CGP-NEXT:    v_mul_hi_u32 v8, v6, v8
+; CGP-NEXT:    v_mul_lo_u32 v19, v5, v9
+; CGP-NEXT:    v_add_i32_e32 v14, vcc, v14, v19
+; CGP-NEXT:    v_cndmask_b32_e64 v19, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v14, vcc, v14, v17
+; CGP-NEXT:    v_mul_lo_u32 v14, v7, v9
+; CGP-NEXT:    v_mul_hi_u32 v17, v5, v9
+; CGP-NEXT:    v_mul_hi_u32 v9, v7, v9
+; CGP-NEXT:    v_add_i32_e64 v11, s[4:5], v11, v12
+; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, 1, s[4:5]
+; CGP-NEXT:    v_add_i32_e64 v10, s[4:5], v15, v10
+; CGP-NEXT:    v_cndmask_b32_e64 v15, 0, 1, s[4:5]
+; CGP-NEXT:    v_add_i32_e64 v13, s[4:5], v14, v13
+; CGP-NEXT:    v_cndmask_b32_e64 v14, 0, 1, s[4:5]
+; CGP-NEXT:    v_add_i32_e64 v11, s[4:5], v11, v16
+; CGP-NEXT:    v_cndmask_b32_e64 v11, 0, 1, s[4:5]
+; CGP-NEXT:    v_add_i32_e64 v10, s[4:5], v10, v18
+; CGP-NEXT:    v_cndmask_b32_e64 v16, 0, 1, s[4:5]
+; CGP-NEXT:    v_cndmask_b32_e64 v18, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v13, vcc, v13, v17
+; CGP-NEXT:    v_cndmask_b32_e64 v17, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v11, vcc, v12, v11
+; CGP-NEXT:    v_add_i32_e32 v12, vcc, v15, v16
+; CGP-NEXT:    v_add_i32_e32 v15, vcc, v19, v18
+; CGP-NEXT:    v_add_i32_e32 v14, vcc, v14, v17
+; CGP-NEXT:    v_add_i32_e32 v10, vcc, v10, v11
+; CGP-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v13, vcc, v13, v15
+; CGP-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v11, vcc, v12, v11
+; CGP-NEXT:    v_add_i32_e32 v12, vcc, v14, v15
+; CGP-NEXT:    v_add_i32_e32 v8, vcc, v8, v11
+; CGP-NEXT:    v_add_i32_e32 v9, vcc, v9, v12
+; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v10
+; CGP-NEXT:    v_addc_u32_e64 v10, s[4:5], v6, v8, vcc
+; CGP-NEXT:    v_add_i32_e64 v6, s[4:5], v6, v8
+; CGP-NEXT:    v_mul_lo_u32 v8, s8, v4
+; CGP-NEXT:    v_mul_lo_u32 v11, -1, v4
+; CGP-NEXT:    v_mul_hi_u32 v12, s8, v4
+; CGP-NEXT:    v_add_i32_e64 v5, s[4:5], v5, v13
+; CGP-NEXT:    v_addc_u32_e64 v13, s[6:7], v7, v9, s[4:5]
+; CGP-NEXT:    v_add_i32_e64 v7, s[6:7], v7, v9
+; CGP-NEXT:    v_mul_lo_u32 v9, s8, v5
+; CGP-NEXT:    v_mul_lo_u32 v14, -1, v5
+; CGP-NEXT:    v_mul_hi_u32 v15, s8, v5
+; CGP-NEXT:    v_mul_lo_u32 v16, s8, v10
+; CGP-NEXT:    v_mul_lo_u32 v17, v10, v8
+; CGP-NEXT:    v_mul_hi_u32 v18, v4, v8
+; CGP-NEXT:    v_mul_hi_u32 v8, v10, v8
+; CGP-NEXT:    v_mul_lo_u32 v19, s8, v13
+; CGP-NEXT:    v_add_i32_e64 v11, s[6:7], v11, v16
+; CGP-NEXT:    v_mul_lo_u32 v16, v13, v9
+; CGP-NEXT:    v_add_i32_e64 v14, s[6:7], v14, v19
+; CGP-NEXT:    v_mul_hi_u32 v19, v5, v9
+; CGP-NEXT:    v_mul_hi_u32 v9, v13, v9
+; CGP-NEXT:    v_add_i32_e64 v11, s[6:7], v11, v12
+; CGP-NEXT:    v_add_i32_e64 v12, s[6:7], v14, v15
+; CGP-NEXT:    v_mul_lo_u32 v14, v4, v11
+; CGP-NEXT:    v_mul_lo_u32 v15, v5, v12
+; CGP-NEXT:    v_add_i32_e64 v15, s[6:7], v16, v15
+; CGP-NEXT:    v_cndmask_b32_e64 v16, 0, 1, s[6:7]
+; CGP-NEXT:    v_add_i32_e64 v15, s[6:7], v15, v19
+; CGP-NEXT:    v_mul_lo_u32 v15, v10, v11
+; CGP-NEXT:    v_mul_hi_u32 v19, v4, v11
+; CGP-NEXT:    v_mul_hi_u32 v10, v10, v11
+; CGP-NEXT:    v_mul_lo_u32 v11, v13, v12
+; CGP-NEXT:    v_mul_hi_u32 v13, v13, v12
+; CGP-NEXT:    v_mul_hi_u32 v12, v5, v12
+; CGP-NEXT:    v_add_i32_e64 v14, s[8:9], v17, v14
+; CGP-NEXT:    v_cndmask_b32_e64 v17, 0, 1, s[8:9]
+; CGP-NEXT:    v_add_i32_e64 v8, s[8:9], v15, v8
+; CGP-NEXT:    v_cndmask_b32_e64 v15, 0, 1, s[8:9]
+; CGP-NEXT:    v_add_i32_e64 v9, s[8:9], v11, v9
+; CGP-NEXT:    v_cndmask_b32_e64 v11, 0, 1, s[8:9]
+; CGP-NEXT:    v_add_i32_e64 v14, s[8:9], v14, v18
+; CGP-NEXT:    v_cndmask_b32_e64 v14, 0, 1, s[8:9]
+; CGP-NEXT:    v_add_i32_e64 v8, s[8:9], v8, v19
+; CGP-NEXT:    v_cndmask_b32_e64 v18, 0, 1, s[8:9]
+; CGP-NEXT:    v_cndmask_b32_e64 v19, 0, 1, s[6:7]
+; CGP-NEXT:    v_add_i32_e64 v9, s[6:7], v9, v12
+; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, 1, s[6:7]
+; CGP-NEXT:    v_add_i32_e64 v14, s[6:7], v17, v14
+; CGP-NEXT:    v_add_i32_e64 v15, s[6:7], v15, v18
+; CGP-NEXT:    v_add_i32_e64 v16, s[6:7], v16, v19
+; CGP-NEXT:    v_add_i32_e64 v11, s[6:7], v11, v12
+; CGP-NEXT:    v_add_i32_e64 v8, s[6:7], v8, v14
+; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, 1, s[6:7]
+; CGP-NEXT:    v_add_i32_e64 v9, s[6:7], v9, v16
+; CGP-NEXT:    v_cndmask_b32_e64 v14, 0, 1, s[6:7]
+; CGP-NEXT:    v_add_i32_e64 v12, s[6:7], v15, v12
+; CGP-NEXT:    v_add_i32_e64 v11, s[6:7], v11, v14
+; CGP-NEXT:    v_add_i32_e64 v10, s[6:7], v10, v12
+; CGP-NEXT:    v_add_i32_e64 v11, s[6:7], v13, v11
+; CGP-NEXT:    v_addc_u32_e32 v6, vcc, v6, v10, vcc
+; CGP-NEXT:    v_addc_u32_e64 v7, vcc, v7, v11, s[4:5]
+; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v8
+; CGP-NEXT:    v_addc_u32_e32 v6, vcc, 0, v6, vcc
+; CGP-NEXT:    v_mul_lo_u32 v8, v3, v4
+; CGP-NEXT:    v_mul_hi_u32 v10, v2, v4
+; CGP-NEXT:    v_mul_hi_u32 v4, v3, v4
+; CGP-NEXT:    v_add_i32_e32 v5, vcc, v5, v9
+; CGP-NEXT:    v_addc_u32_e32 v7, vcc, 0, v7, vcc
+; CGP-NEXT:    v_mul_lo_u32 v9, v1, v5
+; CGP-NEXT:    v_mul_hi_u32 v11, v0, v5
+; CGP-NEXT:    v_mul_hi_u32 v5, v1, v5
+; CGP-NEXT:    v_mul_lo_u32 v12, v2, v6
+; CGP-NEXT:    v_mul_lo_u32 v13, v3, v6
+; CGP-NEXT:    v_mul_hi_u32 v14, v2, v6
+; CGP-NEXT:    v_mul_hi_u32 v6, v3, v6
+; CGP-NEXT:    v_mul_lo_u32 v15, v0, v7
+; CGP-NEXT:    v_mul_lo_u32 v16, v1, v7
+; CGP-NEXT:    v_mul_hi_u32 v17, v0, v7
+; CGP-NEXT:    v_mul_hi_u32 v7, v1, v7
+; CGP-NEXT:    v_add_i32_e32 v8, vcc, v8, v12
+; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v4, vcc, v13, v4
+; CGP-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v9, vcc, v9, v15
+; CGP-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v5, vcc, v16, v5
+; CGP-NEXT:    v_cndmask_b32_e64 v16, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v8, vcc, v8, v10
+; CGP-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v14
+; CGP-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v9, vcc, v9, v11
+; CGP-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v5, vcc, v5, v17
+; CGP-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v8, vcc, v12, v8
+; CGP-NEXT:    v_add_i32_e32 v10, vcc, v13, v10
+; CGP-NEXT:    v_add_i32_e32 v9, vcc, v15, v9
+; CGP-NEXT:    v_add_i32_e32 v11, vcc, v16, v11
+; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v8
+; CGP-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v5, vcc, v5, v9
+; CGP-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v8, vcc, v10, v8
+; CGP-NEXT:    v_mul_lo_u32 v10, s10, v4
+; CGP-NEXT:    v_mul_lo_u32 v12, 0, v4
+; CGP-NEXT:    v_mul_hi_u32 v4, s10, v4
+; CGP-NEXT:    v_add_i32_e32 v9, vcc, v11, v9
+; CGP-NEXT:    v_mul_lo_u32 v11, s10, v5
+; CGP-NEXT:    v_mul_lo_u32 v13, 0, v5
+; CGP-NEXT:    v_mul_hi_u32 v5, s10, v5
+; CGP-NEXT:    v_add_i32_e32 v6, vcc, v6, v8
+; CGP-NEXT:    v_add_i32_e32 v7, vcc, v7, v9
+; CGP-NEXT:    v_mul_lo_u32 v6, s10, v6
+; CGP-NEXT:    v_mul_lo_u32 v7, s10, v7
+; CGP-NEXT:    v_add_i32_e32 v6, vcc, v12, v6
+; CGP-NEXT:    v_add_i32_e32 v7, vcc, v13, v7
+; CGP-NEXT:    v_add_i32_e32 v4, vcc, v6, v4
+; CGP-NEXT:    v_add_i32_e32 v5, vcc, v7, v5
+; CGP-NEXT:    v_sub_i32_e32 v2, vcc, v2, v10
+; CGP-NEXT:    v_subb_u32_e64 v6, s[4:5], v3, v4, vcc
+; CGP-NEXT:    v_sub_i32_e64 v3, s[4:5], v3, v4
+; CGP-NEXT:    v_cmp_le_u32_e64 s[4:5], s10, v2
+; CGP-NEXT:    v_cndmask_b32_e64 v4, 0, -1, s[4:5]
+; CGP-NEXT:    v_sub_i32_e64 v0, s[4:5], v0, v11
+; CGP-NEXT:    v_subb_u32_e64 v7, s[6:7], v1, v5, s[4:5]
+; CGP-NEXT:    v_sub_i32_e64 v1, s[6:7], v1, v5
+; CGP-NEXT:    v_cmp_le_u32_e64 s[6:7], s10, v0
+; CGP-NEXT:    v_cndmask_b32_e64 v5, 0, -1, s[6:7]
+; CGP-NEXT:    v_cmp_le_u32_e64 s[6:7], 0, v6
+; CGP-NEXT:    v_cndmask_b32_e64 v8, 0, -1, s[6:7]
+; CGP-NEXT:    v_subbrev_u32_e32 v3, vcc, 0, v3, vcc
+; CGP-NEXT:    v_cmp_le_u32_e32 vcc, 0, v7
+; CGP-NEXT:    v_cndmask_b32_e64 v9, 0, -1, vcc
+; CGP-NEXT:    v_subbrev_u32_e64 v1, vcc, 0, v1, s[4:5]
+; CGP-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v6
+; CGP-NEXT:    v_cndmask_b32_e32 v4, v8, v4, vcc
+; CGP-NEXT:    v_subrev_i32_e32 v8, vcc, s10, v2
+; CGP-NEXT:    v_subbrev_u32_e32 v3, vcc, 0, v3, vcc
+; CGP-NEXT:    v_cmp_le_u32_e32 vcc, s10, v8
+; CGP-NEXT:    v_cndmask_b32_e64 v10, 0, -1, vcc
+; CGP-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v7
+; CGP-NEXT:    v_cndmask_b32_e32 v5, v9, v5, vcc
+; CGP-NEXT:    v_subrev_i32_e32 v9, vcc, s10, v0
+; CGP-NEXT:    v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
+; CGP-NEXT:    v_cmp_le_u32_e32 vcc, s10, v9
+; CGP-NEXT:    v_cndmask_b32_e64 v11, 0, -1, vcc
+; CGP-NEXT:    v_cmp_le_u32_e32 vcc, 0, v3
+; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, -1, vcc
+; CGP-NEXT:    v_subrev_i32_e32 v13, vcc, s10, v8
+; CGP-NEXT:    v_subbrev_u32_e32 v14, vcc, 0, v3, vcc
+; CGP-NEXT:    v_cmp_le_u32_e32 vcc, 0, v1
+; CGP-NEXT:    v_cndmask_b32_e64 v15, 0, -1, vcc
+; CGP-NEXT:    v_subrev_i32_e32 v16, vcc, s10, v9
+; CGP-NEXT:    v_subbrev_u32_e32 v17, vcc, 0, v1, vcc
+; CGP-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v3
+; CGP-NEXT:    v_cndmask_b32_e32 v10, v12, v10, vcc
+; CGP-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v1
+; CGP-NEXT:    v_cndmask_b32_e32 v11, v15, v11, vcc
+; CGP-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v10
+; CGP-NEXT:    v_cndmask_b32_e32 v8, v8, v13, vcc
+; CGP-NEXT:    v_cmp_ne_u32_e64 s[4:5], 0, v11
+; CGP-NEXT:    v_cndmask_b32_e64 v9, v9, v16, s[4:5]
+; CGP-NEXT:    v_cndmask_b32_e32 v3, v3, v14, vcc
+; CGP-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v4
+; CGP-NEXT:    v_cndmask_b32_e32 v2, v2, v8, vcc
+; CGP-NEXT:    v_cndmask_b32_e64 v1, v1, v17, s[4:5]
+; CGP-NEXT:    v_cmp_ne_u32_e64 s[4:5], 0, v5
+; CGP-NEXT:    v_cndmask_b32_e64 v0, v0, v9, s[4:5]
+; CGP-NEXT:    v_cndmask_b32_e64 v1, v7, v1, s[4:5]
+; CGP-NEXT:    v_cndmask_b32_e32 v3, v6, v3, vcc
+; CGP-NEXT:    s_setpc_b64 s[30:31]
+  %result = urem <2 x i64> %num, <i64 4096, i64 4096>
+  ret <2 x i64> %result
+}
+
+define i64 @v_urem_i64_oddk_denom(i64 %num) {
+; CHECK-LABEL: v_urem_i64_oddk_denom:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT:    s_mov_b32 s6, 0x12d8fb
+; CHECK-NEXT:    v_cvt_f32_u32_e32 v2, 0
+; CHECK-NEXT:    s_mov_b32 s7, 0xffed2705
+; CHECK-NEXT:    v_cvt_f32_u32_e32 v3, s6
+; CHECK-NEXT:    v_mac_f32_e32 v3, 0x4f800000, v2
+; CHECK-NEXT:    v_rcp_iflag_f32_e32 v2, v3
+; CHECK-NEXT:    v_mul_f32_e32 v2, 0x5f7ffffc, v2
+; CHECK-NEXT:    v_mul_f32_e32 v3, 0x2f800000, v2
+; CHECK-NEXT:    v_trunc_f32_e32 v3, v3
+; CHECK-NEXT:    v_mac_f32_e32 v2, 0xcf800000, v3
+; CHECK-NEXT:    v_cvt_u32_f32_e32 v3, v3
+; CHECK-NEXT:    v_cvt_u32_f32_e32 v2, v2
+; CHECK-NEXT:    v_mul_lo_u32 v4, s7, v3
+; CHECK-NEXT:    v_mul_lo_u32 v5, s7, v2
+; CHECK-NEXT:    v_mul_lo_u32 v6, -1, v2
+; CHECK-NEXT:    v_mul_hi_u32 v7, s7, v2
+; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v6, v4
+; CHECK-NEXT:    v_mul_lo_u32 v6, v3, v5
+; CHECK-NEXT:    v_mul_hi_u32 v8, v2, v5
+; CHECK-NEXT:    v_mul_hi_u32 v5, v3, v5
+; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v4, v7
+; CHECK-NEXT:    v_mul_lo_u32 v7, v2, v4
+; CHECK-NEXT:    v_mul_lo_u32 v9, v3, v4
+; CHECK-NEXT:    v_mul_hi_u32 v10, v2, v4
+; CHECK-NEXT:    v_mul_hi_u32 v4, v3, v4
+; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v6, v7
+; CHECK-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v9, v5
+; CHECK-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v6, v8
+; CHECK-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v5, v10
+; CHECK-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v7, v6
+; CHECK-NEXT:    v_add_i32_e32 v7, vcc, v9, v8
+; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v5, v6
+; CHECK-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v7, v6
+; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v4, v6
+; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v5
+; CHECK-NEXT:    v_addc_u32_e64 v5, s[4:5], v3, v4, vcc
+; CHECK-NEXT:    v_add_i32_e64 v3, s[4:5], v3, v4
+; CHECK-NEXT:    v_mul_lo_u32 v4, s7, v2
+; CHECK-NEXT:    v_mul_lo_u32 v6, -1, v2
+; CHECK-NEXT:    v_mul_hi_u32 v7, s7, v2
+; CHECK-NEXT:    v_mul_lo_u32 v8, s7, v5
+; CHECK-NEXT:    v_mul_lo_u32 v9, v5, v4
+; CHECK-NEXT:    v_mul_hi_u32 v10, v2, v4
+; CHECK-NEXT:    v_mul_hi_u32 v4, v5, v4
+; CHECK-NEXT:    v_add_i32_e64 v6, s[4:5], v6, v8
+; CHECK-NEXT:    v_add_i32_e64 v6, s[4:5], v6, v7
+; CHECK-NEXT:    v_mul_lo_u32 v7, v2, v6
+; CHECK-NEXT:    v_mul_lo_u32 v8, v5, v6
+; CHECK-NEXT:    v_mul_hi_u32 v11, v2, v6
+; CHECK-NEXT:    v_mul_hi_u32 v5, v5, v6
+; CHECK-NEXT:    v_add_i32_e64 v6, s[4:5], v9, v7
+; CHECK-NEXT:    v_cndmask_b32_e64 v7, 0, 1, s[4:5]
+; CHECK-NEXT:    v_add_i32_e64 v4, s[4:5], v8, v4
+; CHECK-NEXT:    v_cndmask_b32_e64 v8, 0, 1, s[4:5]
+; CHECK-NEXT:    v_add_i32_e64 v6, s[4:5], v6, v10
+; CHECK-NEXT:    v_cndmask_b32_e64 v6, 0, 1, s[4:5]
+; CHECK-NEXT:    v_add_i32_e64 v4, s[4:5], v4, v11
+; CHECK-NEXT:    v_cndmask_b32_e64 v9, 0, 1, s[4:5]
+; CHECK-NEXT:    v_add_i32_e64 v6, s[4:5], v7, v6
+; CHECK-NEXT:    v_add_i32_e64 v7, s[4:5], v8, v9
+; CHECK-NEXT:    v_add_i32_e64 v4, s[4:5], v4, v6
+; CHECK-NEXT:    v_cndmask_b32_e64 v6, 0, 1, s[4:5]
+; CHECK-NEXT:    v_add_i32_e64 v6, s[4:5], v7, v6
+; CHECK-NEXT:    v_add_i32_e64 v5, s[4:5], v5, v6
+; CHECK-NEXT:    v_addc_u32_e32 v3, vcc, v3, v5, vcc
+; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v4
+; CHECK-NEXT:    v_addc_u32_e32 v3, vcc, 0, v3, vcc
+; CHECK-NEXT:    v_mul_lo_u32 v4, v1, v2
+; CHECK-NEXT:    v_mul_hi_u32 v5, v0, v2
+; CHECK-NEXT:    v_mul_hi_u32 v2, v1, v2
+; CHECK-NEXT:    v_mul_lo_u32 v6, v0, v3
+; CHECK-NEXT:    v_mul_lo_u32 v7, v1, v3
+; CHECK-NEXT:    v_mul_hi_u32 v8, v0, v3
+; CHECK-NEXT:    v_mul_hi_u32 v3, v1, v3
+; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v4, v6
+; CHECK-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v7, v2
+; CHECK-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v4, v5
+; CHECK-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v8
+; CHECK-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v6, v4
+; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v7, v5
+; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v4
+; CHECK-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v5, v4
+; CHECK-NEXT:    v_mul_lo_u32 v5, s6, v2
+; CHECK-NEXT:    v_mul_lo_u32 v6, 0, v2
+; CHECK-NEXT:    v_mul_hi_u32 v2, s6, v2
+; CHECK-NEXT:    v_add_i32_e32 v3, vcc, v3, v4
+; CHECK-NEXT:    v_mul_lo_u32 v3, s6, v3
+; CHECK-NEXT:    v_add_i32_e32 v3, vcc, v6, v3
+; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v3, v2
+; CHECK-NEXT:    v_sub_i32_e32 v0, vcc, v0, v5
+; CHECK-NEXT:    v_subb_u32_e64 v3, s[4:5], v1, v2, vcc
+; CHECK-NEXT:    v_sub_i32_e64 v1, s[4:5], v1, v2
+; CHECK-NEXT:    v_cmp_le_u32_e64 s[4:5], s6, v0
+; CHECK-NEXT:    v_cndmask_b32_e64 v2, 0, -1, s[4:5]
+; CHECK-NEXT:    v_cmp_le_u32_e64 s[4:5], 0, v3
+; CHECK-NEXT:    v_cndmask_b32_e64 v4, 0, -1, s[4:5]
+; CHECK-NEXT:    v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
+; CHECK-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v3
+; CHECK-NEXT:    v_cndmask_b32_e32 v2, v4, v2, vcc
+; CHECK-NEXT:    v_subrev_i32_e32 v4, vcc, s6, v0
+; CHECK-NEXT:    v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
+; CHECK-NEXT:    v_cmp_le_u32_e32 vcc, s6, v4
+; CHECK-NEXT:    v_cndmask_b32_e64 v5, 0, -1, vcc
+; CHECK-NEXT:    v_cmp_le_u32_e32 vcc, 0, v1
+; CHECK-NEXT:    v_cndmask_b32_e64 v6, 0, -1, vcc
+; CHECK-NEXT:    v_subrev_i32_e32 v7, vcc, s6, v4
+; CHECK-NEXT:    v_subbrev_u32_e32 v8, vcc, 0, v1, vcc
+; CHECK-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v1
+; CHECK-NEXT:    v_cndmask_b32_e32 v5, v6, v5, vcc
+; CHECK-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v5
+; CHECK-NEXT:    v_cndmask_b32_e32 v4, v4, v7, vcc
+; CHECK-NEXT:    v_cndmask_b32_e32 v1, v1, v8, vcc
+; CHECK-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v2
+; CHECK-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
+; CHECK-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
+; CHECK-NEXT:    s_setpc_b64 s[30:31]
+  %result = urem i64 %num, 1235195
+  ret i64 %result
+}
+
+define <2 x i64> @v_urem_v2i64_oddk_denom(<2 x i64> %num) {
+; GISEL-LABEL: v_urem_v2i64_oddk_denom:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-NEXT:    s_mov_b32 s10, 0x12d8fb
+; GISEL-NEXT:    v_cvt_f32_u32_e32 v4, s10
+; GISEL-NEXT:    s_sub_u32 s8, 0, s10
+; GISEL-NEXT:    s_cselect_b32 s4, 1, 0
+; GISEL-NEXT:    v_cvt_f32_u32_e32 v5, 0
+; GISEL-NEXT:    v_mov_b32_e32 v6, v4
+; GISEL-NEXT:    s_and_b32 s4, s4, 1
+; GISEL-NEXT:    v_mac_f32_e32 v4, 0x4f800000, v5
+; GISEL-NEXT:    v_mac_f32_e32 v6, 0x4f800000, v5
+; GISEL-NEXT:    v_rcp_iflag_f32_e32 v4, v4
+; GISEL-NEXT:    v_rcp_iflag_f32_e32 v5, v6
+; GISEL-NEXT:    s_cmp_lg_u32 s4, 0
+; GISEL-NEXT:    s_subb_u32 s9, 0, 0
+; GISEL-NEXT:    v_mul_f32_e32 v4, 0x5f7ffffc, v4
+; GISEL-NEXT:    v_mul_f32_e32 v5, 0x5f7ffffc, v5
+; GISEL-NEXT:    v_mul_f32_e32 v6, 0x2f800000, v4
+; GISEL-NEXT:    s_sub_u32 s11, 0, s10
+; GISEL-NEXT:    s_cselect_b32 s4, 1, 0
+; GISEL-NEXT:    v_mul_f32_e32 v7, 0x2f800000, v5
+; GISEL-NEXT:    v_trunc_f32_e32 v6, v6
+; GISEL-NEXT:    s_and_b32 s4, s4, 1
+; GISEL-NEXT:    v_trunc_f32_e32 v7, v7
+; GISEL-NEXT:    v_mac_f32_e32 v4, 0xcf800000, v6
+; GISEL-NEXT:    v_cvt_u32_f32_e32 v6, v6
+; GISEL-NEXT:    v_mac_f32_e32 v5, 0xcf800000, v7
+; GISEL-NEXT:    v_cvt_u32_f32_e32 v7, v7
+; GISEL-NEXT:    v_cvt_u32_f32_e32 v4, v4
+; GISEL-NEXT:    s_cmp_lg_u32 s4, 0
+; GISEL-NEXT:    s_subb_u32 s6, 0, 0
+; GISEL-NEXT:    v_mul_lo_u32 v8, s11, v6
+; GISEL-NEXT:    v_cvt_u32_f32_e32 v5, v5
+; GISEL-NEXT:    v_mul_lo_u32 v9, s8, v7
+; GISEL-NEXT:    v_mul_lo_u32 v10, s11, v4
+; GISEL-NEXT:    v_mul_lo_u32 v11, s6, v4
+; GISEL-NEXT:    v_mul_hi_u32 v12, s11, v4
+; GISEL-NEXT:    v_mul_lo_u32 v13, s8, v5
+; GISEL-NEXT:    v_mul_lo_u32 v14, s9, v5
+; GISEL-NEXT:    v_mul_hi_u32 v15, s8, v5
+; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v11, v8
+; GISEL-NEXT:    v_mul_lo_u32 v11, v6, v10
+; GISEL-NEXT:    v_mul_hi_u32 v16, v4, v10
+; GISEL-NEXT:    v_mul_hi_u32 v10, v6, v10
+; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v14, v9
+; GISEL-NEXT:    v_mul_lo_u32 v14, v7, v13
+; GISEL-NEXT:    v_mul_hi_u32 v17, v5, v13
+; GISEL-NEXT:    v_mul_hi_u32 v13, v7, v13
+; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v12
+; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v9, v15
+; GISEL-NEXT:    v_mul_lo_u32 v12, v4, v8
+; GISEL-NEXT:    v_mul_lo_u32 v15, v6, v8
+; GISEL-NEXT:    v_mul_hi_u32 v18, v4, v8
+; GISEL-NEXT:    v_mul_hi_u32 v8, v6, v8
+; GISEL-NEXT:    v_mul_lo_u32 v19, v5, v9
+; GISEL-NEXT:    v_add_i32_e32 v14, vcc, v14, v19
+; GISEL-NEXT:    v_cndmask_b32_e64 v19, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v14, vcc, v14, v17
+; GISEL-NEXT:    v_mul_lo_u32 v14, v7, v9
+; GISEL-NEXT:    v_mul_hi_u32 v17, v5, v9
+; GISEL-NEXT:    v_mul_hi_u32 v9, v7, v9
+; GISEL-NEXT:    v_add_i32_e64 v11, s[4:5], v11, v12
+; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v10, s[4:5], v15, v10
+; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, 1, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v13, s[4:5], v14, v13
+; GISEL-NEXT:    v_cndmask_b32_e64 v14, 0, 1, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v11, s[4:5], v11, v16
+; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v10, s[4:5], v10, v18
+; GISEL-NEXT:    v_cndmask_b32_e64 v16, 0, 1, s[4:5]
+; GISEL-NEXT:    v_cndmask_b32_e64 v18, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v13, v17
+; GISEL-NEXT:    v_cndmask_b32_e64 v17, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v12, v11
+; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v15, v16
+; GISEL-NEXT:    v_add_i32_e32 v15, vcc, v19, v18
+; GISEL-NEXT:    v_add_i32_e32 v14, vcc, v14, v17
+; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v10, v11
+; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v13, v15
+; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v12, v11
+; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v14, v15
+; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v11
+; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v9, v12
+; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v10
+; GISEL-NEXT:    v_addc_u32_e64 v10, s[4:5], v6, v8, vcc
+; GISEL-NEXT:    v_add_i32_e64 v6, s[4:5], v6, v8
+; GISEL-NEXT:    v_mul_lo_u32 v8, s11, v4
+; GISEL-NEXT:    v_mul_lo_u32 v11, s6, v4
+; GISEL-NEXT:    v_mul_hi_u32 v12, s11, v4
+; GISEL-NEXT:    v_add_i32_e64 v5, s[4:5], v5, v13
+; GISEL-NEXT:    v_addc_u32_e64 v13, s[6:7], v7, v9, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v7, s[6:7], v7, v9
+; GISEL-NEXT:    v_mul_lo_u32 v9, s8, v5
+; GISEL-NEXT:    v_mul_lo_u32 v14, s9, v5
+; GISEL-NEXT:    v_mul_hi_u32 v15, s8, v5
+; GISEL-NEXT:    v_mul_lo_u32 v16, s11, v10
+; GISEL-NEXT:    v_mul_lo_u32 v17, v10, v8
+; GISEL-NEXT:    v_mul_hi_u32 v18, v4, v8
+; GISEL-NEXT:    v_mul_hi_u32 v8, v10, v8
+; GISEL-NEXT:    v_mul_lo_u32 v19, s8, v13
+; GISEL-NEXT:    v_add_i32_e64 v11, s[6:7], v11, v16
+; GISEL-NEXT:    v_mul_lo_u32 v16, v13, v9
+; GISEL-NEXT:    v_add_i32_e64 v14, s[6:7], v14, v19
+; GISEL-NEXT:    v_mul_hi_u32 v19, v5, v9
+; GISEL-NEXT:    v_mul_hi_u32 v9, v13, v9
+; GISEL-NEXT:    v_add_i32_e64 v11, s[6:7], v11, v12
+; GISEL-NEXT:    v_add_i32_e64 v12, s[6:7], v14, v15
+; GISEL-NEXT:    v_mul_lo_u32 v14, v4, v11
+; GISEL-NEXT:    v_mul_lo_u32 v15, v5, v12
+; GISEL-NEXT:    v_add_i32_e64 v15, s[6:7], v16, v15
+; GISEL-NEXT:    v_cndmask_b32_e64 v16, 0, 1, s[6:7]
+; GISEL-NEXT:    v_add_i32_e64 v15, s[6:7], v15, v19
+; GISEL-NEXT:    v_mul_lo_u32 v15, v10, v11
+; GISEL-NEXT:    v_mul_hi_u32 v19, v4, v11
+; GISEL-NEXT:    v_mul_hi_u32 v10, v10, v11
+; GISEL-NEXT:    v_mul_lo_u32 v11, v13, v12
+; GISEL-NEXT:    v_mul_hi_u32 v13, v13, v12
+; GISEL-NEXT:    v_mul_hi_u32 v12, v5, v12
+; GISEL-NEXT:    v_add_i32_e64 v14, s[8:9], v17, v14
+; GISEL-NEXT:    v_cndmask_b32_e64 v17, 0, 1, s[8:9]
+; GISEL-NEXT:    v_add_i32_e64 v8, s[8:9], v15, v8
+; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, 1, s[8:9]
+; GISEL-NEXT:    v_add_i32_e64 v9, s[8:9], v11, v9
+; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, s[8:9]
+; GISEL-NEXT:    v_add_i32_e64 v14, s[8:9], v14, v18
+; GISEL-NEXT:    v_cndmask_b32_e64 v14, 0, 1, s[8:9]
+; GISEL-NEXT:    v_add_i32_e64 v8, s[8:9], v8, v19
+; GISEL-NEXT:    v_cndmask_b32_e64 v18, 0, 1, s[8:9]
+; GISEL-NEXT:    v_cndmask_b32_e64 v19, 0, 1, s[6:7]
+; GISEL-NEXT:    v_add_i32_e64 v9, s[6:7], v9, v12
+; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, s[6:7]
+; GISEL-NEXT:    v_add_i32_e64 v14, s[6:7], v17, v14
+; GISEL-NEXT:    v_add_i32_e64 v15, s[6:7], v15, v18
+; GISEL-NEXT:    v_add_i32_e64 v16, s[6:7], v16, v19
+; GISEL-NEXT:    v_add_i32_e64 v11, s[6:7], v11, v12
+; GISEL-NEXT:    v_add_i32_e64 v8, s[6:7], v8, v14
+; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, s[6:7]
+; GISEL-NEXT:    v_add_i32_e64 v9, s[6:7], v9, v16
+; GISEL-NEXT:    v_cndmask_b32_e64 v14, 0, 1, s[6:7]
+; GISEL-NEXT:    v_add_i32_e64 v12, s[6:7], v15, v12
+; GISEL-NEXT:    v_add_i32_e64 v11, s[6:7], v11, v14
+; GISEL-NEXT:    v_add_i32_e64 v10, s[6:7], v10, v12
+; GISEL-NEXT:    v_add_i32_e64 v11, s[6:7], v13, v11
+; GISEL-NEXT:    v_addc_u32_e32 v6, vcc, v6, v10, vcc
+; GISEL-NEXT:    v_addc_u32_e64 v7, vcc, v7, v11, s[4:5]
+; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v8
+; GISEL-NEXT:    v_addc_u32_e32 v6, vcc, 0, v6, vcc
+; GISEL-NEXT:    v_mul_lo_u32 v8, v3, v4
+; GISEL-NEXT:    v_mul_hi_u32 v10, v2, v4
+; GISEL-NEXT:    v_mul_hi_u32 v4, v3, v4
+; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v9
+; GISEL-NEXT:    v_addc_u32_e32 v7, vcc, 0, v7, vcc
+; GISEL-NEXT:    v_mul_lo_u32 v9, v1, v5
+; GISEL-NEXT:    v_mul_hi_u32 v11, v0, v5
+; GISEL-NEXT:    v_mul_hi_u32 v5, v1, v5
+; GISEL-NEXT:    v_mul_lo_u32 v12, v2, v6
+; GISEL-NEXT:    v_mul_lo_u32 v13, v3, v6
+; GISEL-NEXT:    v_mul_hi_u32 v14, v2, v6
+; GISEL-NEXT:    v_mul_hi_u32 v6, v3, v6
+; GISEL-NEXT:    v_mul_lo_u32 v15, v0, v7
+; GISEL-NEXT:    v_mul_lo_u32 v16, v1, v7
+; GISEL-NEXT:    v_mul_hi_u32 v17, v0, v7
+; GISEL-NEXT:    v_mul_hi_u32 v7, v1, v7
+; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v12
+; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v13, v4
+; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v9, v15
+; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v16, v5
+; GISEL-NEXT:    v_cndmask_b32_e64 v16, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v10
+; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v14
+; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v9, v11
+; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v17
+; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v12, v8
+; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v13, v10
+; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v15, v9
+; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v16, v11
+; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v8
+; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v9
+; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v10, v8
+; GISEL-NEXT:    v_mul_lo_u32 v10, s10, v4
+; GISEL-NEXT:    v_mul_lo_u32 v12, 0, v4
+; GISEL-NEXT:    v_mul_hi_u32 v4, s10, v4
+; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v11, v9
+; GISEL-NEXT:    v_mul_lo_u32 v11, s10, v5
+; GISEL-NEXT:    v_mul_lo_u32 v13, 0, v5
+; GISEL-NEXT:    v_mul_hi_u32 v5, s10, v5
+; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v6, v8
+; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v7, v9
+; GISEL-NEXT:    v_mul_lo_u32 v6, s10, v6
+; GISEL-NEXT:    v_mul_lo_u32 v7, s10, v7
+; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v12, v6
+; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v13, v7
+; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v6, v4
+; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v7, v5
+; GISEL-NEXT:    v_sub_i32_e32 v2, vcc, v2, v10
+; GISEL-NEXT:    v_subb_u32_e64 v6, s[4:5], v3, v4, vcc
+; GISEL-NEXT:    v_sub_i32_e64 v3, s[4:5], v3, v4
+; GISEL-NEXT:    v_cmp_le_u32_e64 s[4:5], s10, v2
+; GISEL-NEXT:    v_cndmask_b32_e64 v4, 0, -1, s[4:5]
+; GISEL-NEXT:    v_sub_i32_e64 v0, s[4:5], v0, v11
+; GISEL-NEXT:    v_subb_u32_e64 v7, s[6:7], v1, v5, s[4:5]
+; GISEL-NEXT:    v_sub_i32_e64 v1, s[6:7], v1, v5
+; GISEL-NEXT:    v_cmp_le_u32_e64 s[6:7], s10, v0
+; GISEL-NEXT:    v_cndmask_b32_e64 v5, 0, -1, s[6:7]
+; GISEL-NEXT:    v_cmp_le_u32_e64 s[6:7], 0, v6
+; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, -1, s[6:7]
+; GISEL-NEXT:    v_subbrev_u32_e32 v3, vcc, 0, v3, vcc
+; GISEL-NEXT:    v_cmp_le_u32_e32 vcc, 0, v7
+; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, -1, vcc
+; GISEL-NEXT:    v_subbrev_u32_e64 v1, vcc, 0, v1, s[4:5]
+; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v6
+; GISEL-NEXT:    v_cndmask_b32_e32 v4, v8, v4, vcc
+; GISEL-NEXT:    v_subrev_i32_e32 v8, vcc, s10, v2
+; GISEL-NEXT:    v_subbrev_u32_e32 v3, vcc, 0, v3, vcc
+; GISEL-NEXT:    v_cmp_le_u32_e32 vcc, s10, v8
+; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, -1, vcc
+; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v7
+; GISEL-NEXT:    v_cndmask_b32_e32 v5, v9, v5, vcc
+; GISEL-NEXT:    v_subrev_i32_e32 v9, vcc, s10, v0
+; GISEL-NEXT:    v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
+; GISEL-NEXT:    v_cmp_le_u32_e32 vcc, s10, v9
+; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, -1, vcc
+; GISEL-NEXT:    v_cmp_le_u32_e32 vcc, 0, v3
+; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, -1, vcc
+; GISEL-NEXT:    v_subrev_i32_e32 v13, vcc, s10, v8
+; GISEL-NEXT:    v_subbrev_u32_e32 v14, vcc, 0, v3, vcc
+; GISEL-NEXT:    v_cmp_le_u32_e32 vcc, 0, v1
+; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, -1, vcc
+; GISEL-NEXT:    v_subrev_i32_e32 v16, vcc, s10, v9
+; GISEL-NEXT:    v_subbrev_u32_e32 v17, vcc, 0, v1, vcc
+; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v3
+; GISEL-NEXT:    v_cndmask_b32_e32 v10, v12, v10, vcc
+; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v1
+; GISEL-NEXT:    v_cndmask_b32_e32 v11, v15, v11, vcc
+; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v10
+; GISEL-NEXT:    v_cndmask_b32_e32 v8, v8, v13, vcc
+; GISEL-NEXT:    v_cmp_ne_u32_e64 s[4:5], 0, v11
+; GISEL-NEXT:    v_cndmask_b32_e64 v9, v9, v16, s[4:5]
+; GISEL-NEXT:    v_cndmask_b32_e32 v3, v3, v14, vcc
+; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v4
+; GISEL-NEXT:    v_cndmask_b32_e32 v2, v2, v8, vcc
+; GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, v17, s[4:5]
+; GISEL-NEXT:    v_cmp_ne_u32_e64 s[4:5], 0, v5
+; GISEL-NEXT:    v_cndmask_b32_e64 v0, v0, v9, s[4:5]
+; GISEL-NEXT:    v_cndmask_b32_e64 v1, v7, v1, s[4:5]
+; GISEL-NEXT:    v_cndmask_b32_e32 v3, v6, v3, vcc
+; GISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; CGP-LABEL: v_urem_v2i64_oddk_denom:
+; CGP:       ; %bb.0:
+; CGP-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CGP-NEXT:    s_mov_b32 s10, 0x12d8fb
+; CGP-NEXT:    v_cvt_f32_u32_e32 v4, 0
+; CGP-NEXT:    v_cvt_f32_u32_e32 v5, s10
+; CGP-NEXT:    s_mov_b32 s8, 0xffed2705
+; CGP-NEXT:    v_mov_b32_e32 v6, v5
+; CGP-NEXT:    v_mac_f32_e32 v5, 0x4f800000, v4
+; CGP-NEXT:    v_mac_f32_e32 v6, 0x4f800000, v4
+; CGP-NEXT:    v_rcp_iflag_f32_e32 v4, v5
+; CGP-NEXT:    v_rcp_iflag_f32_e32 v5, v6
+; CGP-NEXT:    v_mul_f32_e32 v4, 0x5f7ffffc, v4
+; CGP-NEXT:    v_mul_f32_e32 v5, 0x5f7ffffc, v5
+; CGP-NEXT:    v_mul_f32_e32 v6, 0x2f800000, v4
+; CGP-NEXT:    v_mul_f32_e32 v7, 0x2f800000, v5
+; CGP-NEXT:    v_trunc_f32_e32 v6, v6
+; CGP-NEXT:    v_trunc_f32_e32 v7, v7
+; CGP-NEXT:    v_mac_f32_e32 v4, 0xcf800000, v6
+; CGP-NEXT:    v_cvt_u32_f32_e32 v6, v6
+; CGP-NEXT:    v_mac_f32_e32 v5, 0xcf800000, v7
+; CGP-NEXT:    v_cvt_u32_f32_e32 v7, v7
+; CGP-NEXT:    v_cvt_u32_f32_e32 v4, v4
+; CGP-NEXT:    v_mul_lo_u32 v8, s8, v6
+; CGP-NEXT:    v_cvt_u32_f32_e32 v5, v5
+; CGP-NEXT:    v_mul_lo_u32 v9, s8, v7
+; CGP-NEXT:    v_mul_lo_u32 v10, s8, v4
+; CGP-NEXT:    v_mul_lo_u32 v11, -1, v4
+; CGP-NEXT:    v_mul_hi_u32 v12, s8, v4
+; CGP-NEXT:    v_mul_lo_u32 v13, s8, v5
+; CGP-NEXT:    v_mul_lo_u32 v14, -1, v5
+; CGP-NEXT:    v_mul_hi_u32 v15, s8, v5
+; CGP-NEXT:    v_add_i32_e32 v8, vcc, v11, v8
+; CGP-NEXT:    v_mul_lo_u32 v11, v6, v10
+; CGP-NEXT:    v_mul_hi_u32 v16, v4, v10
+; CGP-NEXT:    v_mul_hi_u32 v10, v6, v10
+; CGP-NEXT:    v_add_i32_e32 v9, vcc, v14, v9
+; CGP-NEXT:    v_mul_lo_u32 v14, v7, v13
+; CGP-NEXT:    v_mul_hi_u32 v17, v5, v13
+; CGP-NEXT:    v_mul_hi_u32 v13, v7, v13
+; CGP-NEXT:    v_add_i32_e32 v8, vcc, v8, v12
+; CGP-NEXT:    v_add_i32_e32 v9, vcc, v9, v15
+; CGP-NEXT:    v_mul_lo_u32 v12, v4, v8
+; CGP-NEXT:    v_mul_lo_u32 v15, v6, v8
+; CGP-NEXT:    v_mul_hi_u32 v18, v4, v8
+; CGP-NEXT:    v_mul_hi_u32 v8, v6, v8
+; CGP-NEXT:    v_mul_lo_u32 v19, v5, v9
+; CGP-NEXT:    v_add_i32_e32 v14, vcc, v14, v19
+; CGP-NEXT:    v_cndmask_b32_e64 v19, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v14, vcc, v14, v17
+; CGP-NEXT:    v_mul_lo_u32 v14, v7, v9
+; CGP-NEXT:    v_mul_hi_u32 v17, v5, v9
+; CGP-NEXT:    v_mul_hi_u32 v9, v7, v9
+; CGP-NEXT:    v_add_i32_e64 v11, s[4:5], v11, v12
+; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, 1, s[4:5]
+; CGP-NEXT:    v_add_i32_e64 v10, s[4:5], v15, v10
+; CGP-NEXT:    v_cndmask_b32_e64 v15, 0, 1, s[4:5]
+; CGP-NEXT:    v_add_i32_e64 v13, s[4:5], v14, v13
+; CGP-NEXT:    v_cndmask_b32_e64 v14, 0, 1, s[4:5]
+; CGP-NEXT:    v_add_i32_e64 v11, s[4:5], v11, v16
+; CGP-NEXT:    v_cndmask_b32_e64 v11, 0, 1, s[4:5]
+; CGP-NEXT:    v_add_i32_e64 v10, s[4:5], v10, v18
+; CGP-NEXT:    v_cndmask_b32_e64 v16, 0, 1, s[4:5]
+; CGP-NEXT:    v_cndmask_b32_e64 v18, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v13, vcc, v13, v17
+; CGP-NEXT:    v_cndmask_b32_e64 v17, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v11, vcc, v12, v11
+; CGP-NEXT:    v_add_i32_e32 v12, vcc, v15, v16
+; CGP-NEXT:    v_add_i32_e32 v15, vcc, v19, v18
+; CGP-NEXT:    v_add_i32_e32 v14, vcc, v14, v17
+; CGP-NEXT:    v_add_i32_e32 v10, vcc, v10, v11
+; CGP-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v13, vcc, v13, v15
+; CGP-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v11, vcc, v12, v11
+; CGP-NEXT:    v_add_i32_e32 v12, vcc, v14, v15
+; CGP-NEXT:    v_add_i32_e32 v8, vcc, v8, v11
+; CGP-NEXT:    v_add_i32_e32 v9, vcc, v9, v12
+; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v10
+; CGP-NEXT:    v_addc_u32_e64 v10, s[4:5], v6, v8, vcc
+; CGP-NEXT:    v_add_i32_e64 v6, s[4:5], v6, v8
+; CGP-NEXT:    v_mul_lo_u32 v8, s8, v4
+; CGP-NEXT:    v_mul_lo_u32 v11, -1, v4
+; CGP-NEXT:    v_mul_hi_u32 v12, s8, v4
+; CGP-NEXT:    v_add_i32_e64 v5, s[4:5], v5, v13
+; CGP-NEXT:    v_addc_u32_e64 v13, s[6:7], v7, v9, s[4:5]
+; CGP-NEXT:    v_add_i32_e64 v7, s[6:7], v7, v9
+; CGP-NEXT:    v_mul_lo_u32 v9, s8, v5
+; CGP-NEXT:    v_mul_lo_u32 v14, -1, v5
+; CGP-NEXT:    v_mul_hi_u32 v15, s8, v5
+; CGP-NEXT:    v_mul_lo_u32 v16, s8, v10
+; CGP-NEXT:    v_mul_lo_u32 v17, v10, v8
+; CGP-NEXT:    v_mul_hi_u32 v18, v4, v8
+; CGP-NEXT:    v_mul_hi_u32 v8, v10, v8
+; CGP-NEXT:    v_mul_lo_u32 v19, s8, v13
+; CGP-NEXT:    v_add_i32_e64 v11, s[6:7], v11, v16
+; CGP-NEXT:    v_mul_lo_u32 v16, v13, v9
+; CGP-NEXT:    v_add_i32_e64 v14, s[6:7], v14, v19
+; CGP-NEXT:    v_mul_hi_u32 v19, v5, v9
+; CGP-NEXT:    v_mul_hi_u32 v9, v13, v9
+; CGP-NEXT:    v_add_i32_e64 v11, s[6:7], v11, v12
+; CGP-NEXT:    v_add_i32_e64 v12, s[6:7], v14, v15
+; CGP-NEXT:    v_mul_lo_u32 v14, v4, v11
+; CGP-NEXT:    v_mul_lo_u32 v15, v5, v12
+; CGP-NEXT:    v_add_i32_e64 v15, s[6:7], v16, v15
+; CGP-NEXT:    v_cndmask_b32_e64 v16, 0, 1, s[6:7]
+; CGP-NEXT:    v_add_i32_e64 v15, s[6:7], v15, v19
+; CGP-NEXT:    v_mul_lo_u32 v15, v10, v11
+; CGP-NEXT:    v_mul_hi_u32 v19, v4, v11
+; CGP-NEXT:    v_mul_hi_u32 v10, v10, v11
+; CGP-NEXT:    v_mul_lo_u32 v11, v13, v12
+; CGP-NEXT:    v_mul_hi_u32 v13, v13, v12
+; CGP-NEXT:    v_mul_hi_u32 v12, v5, v12
+; CGP-NEXT:    v_add_i32_e64 v14, s[8:9], v17, v14
+; CGP-NEXT:    v_cndmask_b32_e64 v17, 0, 1, s[8:9]
+; CGP-NEXT:    v_add_i32_e64 v8, s[8:9], v15, v8
+; CGP-NEXT:    v_cndmask_b32_e64 v15, 0, 1, s[8:9]
+; CGP-NEXT:    v_add_i32_e64 v9, s[8:9], v11, v9
+; CGP-NEXT:    v_cndmask_b32_e64 v11, 0, 1, s[8:9]
+; CGP-NEXT:    v_add_i32_e64 v14, s[8:9], v14, v18
+; CGP-NEXT:    v_cndmask_b32_e64 v14, 0, 1, s[8:9]
+; CGP-NEXT:    v_add_i32_e64 v8, s[8:9], v8, v19
+; CGP-NEXT:    v_cndmask_b32_e64 v18, 0, 1, s[8:9]
+; CGP-NEXT:    v_cndmask_b32_e64 v19, 0, 1, s[6:7]
+; CGP-NEXT:    v_add_i32_e64 v9, s[6:7], v9, v12
+; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, 1, s[6:7]
+; CGP-NEXT:    v_add_i32_e64 v14, s[6:7], v17, v14
+; CGP-NEXT:    v_add_i32_e64 v15, s[6:7], v15, v18
+; CGP-NEXT:    v_add_i32_e64 v16, s[6:7], v16, v19
+; CGP-NEXT:    v_add_i32_e64 v11, s[6:7], v11, v12
+; CGP-NEXT:    v_add_i32_e64 v8, s[6:7], v8, v14
+; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, 1, s[6:7]
+; CGP-NEXT:    v_add_i32_e64 v9, s[6:7], v9, v16
+; CGP-NEXT:    v_cndmask_b32_e64 v14, 0, 1, s[6:7]
+; CGP-NEXT:    v_add_i32_e64 v12, s[6:7], v15, v12
+; CGP-NEXT:    v_add_i32_e64 v11, s[6:7], v11, v14
+; CGP-NEXT:    v_add_i32_e64 v10, s[6:7], v10, v12
+; CGP-NEXT:    v_add_i32_e64 v11, s[6:7], v13, v11
+; CGP-NEXT:    v_addc_u32_e32 v6, vcc, v6, v10, vcc
+; CGP-NEXT:    v_addc_u32_e64 v7, vcc, v7, v11, s[4:5]
+; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v8
+; CGP-NEXT:    v_addc_u32_e32 v6, vcc, 0, v6, vcc
+; CGP-NEXT:    v_mul_lo_u32 v8, v3, v4
+; CGP-NEXT:    v_mul_hi_u32 v10, v2, v4
+; CGP-NEXT:    v_mul_hi_u32 v4, v3, v4
+; CGP-NEXT:    v_add_i32_e32 v5, vcc, v5, v9
+; CGP-NEXT:    v_addc_u32_e32 v7, vcc, 0, v7, vcc
+; CGP-NEXT:    v_mul_lo_u32 v9, v1, v5
+; CGP-NEXT:    v_mul_hi_u32 v11, v0, v5
+; CGP-NEXT:    v_mul_hi_u32 v5, v1, v5
+; CGP-NEXT:    v_mul_lo_u32 v12, v2, v6
+; CGP-NEXT:    v_mul_lo_u32 v13, v3, v6
+; CGP-NEXT:    v_mul_hi_u32 v14, v2, v6
+; CGP-NEXT:    v_mul_hi_u32 v6, v3, v6
+; CGP-NEXT:    v_mul_lo_u32 v15, v0, v7
+; CGP-NEXT:    v_mul_lo_u32 v16, v1, v7
+; CGP-NEXT:    v_mul_hi_u32 v17, v0, v7
+; CGP-NEXT:    v_mul_hi_u32 v7, v1, v7
+; CGP-NEXT:    v_add_i32_e32 v8, vcc, v8, v12
+; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v4, vcc, v13, v4
+; CGP-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v9, vcc, v9, v15
+; CGP-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v5, vcc, v16, v5
+; CGP-NEXT:    v_cndmask_b32_e64 v16, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v8, vcc, v8, v10
+; CGP-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v14
+; CGP-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v9, vcc, v9, v11
+; CGP-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v5, vcc, v5, v17
+; CGP-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v8, vcc, v12, v8
+; CGP-NEXT:    v_add_i32_e32 v10, vcc, v13, v10
+; CGP-NEXT:    v_add_i32_e32 v9, vcc, v15, v9
+; CGP-NEXT:    v_add_i32_e32 v11, vcc, v16, v11
+; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v8
+; CGP-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v5, vcc, v5, v9
+; CGP-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v8, vcc, v10, v8
+; CGP-NEXT:    v_mul_lo_u32 v10, s10, v4
+; CGP-NEXT:    v_mul_lo_u32 v12, 0, v4
+; CGP-NEXT:    v_mul_hi_u32 v4, s10, v4
+; CGP-NEXT:    v_add_i32_e32 v9, vcc, v11, v9
+; CGP-NEXT:    v_mul_lo_u32 v11, s10, v5
+; CGP-NEXT:    v_mul_lo_u32 v13, 0, v5
+; CGP-NEXT:    v_mul_hi_u32 v5, s10, v5
+; CGP-NEXT:    v_add_i32_e32 v6, vcc, v6, v8
+; CGP-NEXT:    v_add_i32_e32 v7, vcc, v7, v9
+; CGP-NEXT:    v_mul_lo_u32 v6, s10, v6
+; CGP-NEXT:    v_mul_lo_u32 v7, s10, v7
+; CGP-NEXT:    v_add_i32_e32 v6, vcc, v12, v6
+; CGP-NEXT:    v_add_i32_e32 v7, vcc, v13, v7
+; CGP-NEXT:    v_add_i32_e32 v4, vcc, v6, v4
+; CGP-NEXT:    v_add_i32_e32 v5, vcc, v7, v5
+; CGP-NEXT:    v_sub_i32_e32 v2, vcc, v2, v10
+; CGP-NEXT:    v_subb_u32_e64 v6, s[4:5], v3, v4, vcc
+; CGP-NEXT:    v_sub_i32_e64 v3, s[4:5], v3, v4
+; CGP-NEXT:    v_cmp_le_u32_e64 s[4:5], s10, v2
+; CGP-NEXT:    v_cndmask_b32_e64 v4, 0, -1, s[4:5]
+; CGP-NEXT:    v_sub_i32_e64 v0, s[4:5], v0, v11
+; CGP-NEXT:    v_subb_u32_e64 v7, s[6:7], v1, v5, s[4:5]
+; CGP-NEXT:    v_sub_i32_e64 v1, s[6:7], v1, v5
+; CGP-NEXT:    v_cmp_le_u32_e64 s[6:7], s10, v0
+; CGP-NEXT:    v_cndmask_b32_e64 v5, 0, -1, s[6:7]
+; CGP-NEXT:    v_cmp_le_u32_e64 s[6:7], 0, v6
+; CGP-NEXT:    v_cndmask_b32_e64 v8, 0, -1, s[6:7]
+; CGP-NEXT:    v_subbrev_u32_e32 v3, vcc, 0, v3, vcc
+; CGP-NEXT:    v_cmp_le_u32_e32 vcc, 0, v7
+; CGP-NEXT:    v_cndmask_b32_e64 v9, 0, -1, vcc
+; CGP-NEXT:    v_subbrev_u32_e64 v1, vcc, 0, v1, s[4:5]
+; CGP-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v6
+; CGP-NEXT:    v_cndmask_b32_e32 v4, v8, v4, vcc
+; CGP-NEXT:    v_subrev_i32_e32 v8, vcc, s10, v2
+; CGP-NEXT:    v_subbrev_u32_e32 v3, vcc, 0, v3, vcc
+; CGP-NEXT:    v_cmp_le_u32_e32 vcc, s10, v8
+; CGP-NEXT:    v_cndmask_b32_e64 v10, 0, -1, vcc
+; CGP-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v7
+; CGP-NEXT:    v_cndmask_b32_e32 v5, v9, v5, vcc
+; CGP-NEXT:    v_subrev_i32_e32 v9, vcc, s10, v0
+; CGP-NEXT:    v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
+; CGP-NEXT:    v_cmp_le_u32_e32 vcc, s10, v9
+; CGP-NEXT:    v_cndmask_b32_e64 v11, 0, -1, vcc
+; CGP-NEXT:    v_cmp_le_u32_e32 vcc, 0, v3
+; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, -1, vcc
+; CGP-NEXT:    v_subrev_i32_e32 v13, vcc, s10, v8
+; CGP-NEXT:    v_subbrev_u32_e32 v14, vcc, 0, v3, vcc
+; CGP-NEXT:    v_cmp_le_u32_e32 vcc, 0, v1
+; CGP-NEXT:    v_cndmask_b32_e64 v15, 0, -1, vcc
+; CGP-NEXT:    v_subrev_i32_e32 v16, vcc, s10, v9
+; CGP-NEXT:    v_subbrev_u32_e32 v17, vcc, 0, v1, vcc
+; CGP-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v3
+; CGP-NEXT:    v_cndmask_b32_e32 v10, v12, v10, vcc
+; CGP-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v1
+; CGP-NEXT:    v_cndmask_b32_e32 v11, v15, v11, vcc
+; CGP-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v10
+; CGP-NEXT:    v_cndmask_b32_e32 v8, v8, v13, vcc
+; CGP-NEXT:    v_cmp_ne_u32_e64 s[4:5], 0, v11
+; CGP-NEXT:    v_cndmask_b32_e64 v9, v9, v16, s[4:5]
+; CGP-NEXT:    v_cndmask_b32_e32 v3, v3, v14, vcc
+; CGP-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v4
+; CGP-NEXT:    v_cndmask_b32_e32 v2, v2, v8, vcc
+; CGP-NEXT:    v_cndmask_b32_e64 v1, v1, v17, s[4:5]
+; CGP-NEXT:    v_cmp_ne_u32_e64 s[4:5], 0, v5
+; CGP-NEXT:    v_cndmask_b32_e64 v0, v0, v9, s[4:5]
+; CGP-NEXT:    v_cndmask_b32_e64 v1, v7, v1, s[4:5]
+; CGP-NEXT:    v_cndmask_b32_e32 v3, v6, v3, vcc
+; CGP-NEXT:    s_setpc_b64 s[30:31]
+  %result = urem <2 x i64> %num, <i64 1235195, i64 1235195>
+  ret <2 x i64> %result
+}
+
+define i64 @v_urem_i64_pow2_shl_denom(i64 %x, i64 %y) {
+; CHECK-LABEL: v_urem_i64_pow2_shl_denom:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT:    s_movk_i32 s4, 0x1000
+; CHECK-NEXT:    s_mov_b32 s5, 0
+; CHECK-NEXT:    v_mov_b32_e32 v6, 0
+; CHECK-NEXT:    v_lshl_b64 v[4:5], s[4:5], v2
+; CHECK-NEXT:    v_or_b32_e32 v7, v1, v5
+; CHECK-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[6:7]
+; CHECK-NEXT:    v_cmp_ne_u32_e64 s[4:5], 0, 1
+; CHECK-NEXT:    s_xor_b64 s[4:5], vcc, s[4:5]
+; CHECK-NEXT:    ; implicit-def: $vgpr2_vgpr3
+; CHECK-NEXT:    s_and_saveexec_b64 s[6:7], s[4:5]
+; CHECK-NEXT:    s_xor_b64 s[6:7], exec, s[6:7]
+; CHECK-NEXT:    s_cbranch_execnz BB7_2
+; CHECK-NEXT:  ; %bb.1:
+; CHECK-NEXT:    v_cvt_f32_u32_e32 v2, v4
+; CHECK-NEXT:    v_cvt_f32_u32_e32 v3, v5
+; CHECK-NEXT:    v_sub_i32_e32 v6, vcc, 0, v4
+; CHECK-NEXT:    v_subb_u32_e32 v7, vcc, 0, v5, vcc
+; CHECK-NEXT:    v_mac_f32_e32 v2, 0x4f800000, v3
+; CHECK-NEXT:    v_rcp_iflag_f32_e32 v2, v2
+; CHECK-NEXT:    v_mul_f32_e32 v2, 0x5f7ffffc, v2
+; CHECK-NEXT:    v_mul_f32_e32 v3, 0x2f800000, v2
+; CHECK-NEXT:    v_trunc_f32_e32 v3, v3
+; CHECK-NEXT:    v_mac_f32_e32 v2, 0xcf800000, v3
+; CHECK-NEXT:    v_cvt_u32_f32_e32 v3, v3
+; CHECK-NEXT:    v_cvt_u32_f32_e32 v2, v2
+; CHECK-NEXT:    v_mul_lo_u32 v8, v6, v3
+; CHECK-NEXT:    v_mul_lo_u32 v9, v6, v2
+; CHECK-NEXT:    v_mul_lo_u32 v10, v7, v2
+; CHECK-NEXT:    v_mul_hi_u32 v11, v6, v2
+; CHECK-NEXT:    v_add_i32_e32 v8, vcc, v10, v8
+; CHECK-NEXT:    v_mul_lo_u32 v10, v3, v9
+; CHECK-NEXT:    v_mul_hi_u32 v12, v2, v9
+; CHECK-NEXT:    v_mul_hi_u32 v9, v3, v9
+; CHECK-NEXT:    v_add_i32_e32 v8, vcc, v8, v11
+; CHECK-NEXT:    v_mul_lo_u32 v11, v2, v8
+; CHECK-NEXT:    v_mul_lo_u32 v13, v3, v8
+; CHECK-NEXT:    v_mul_hi_u32 v14, v2, v8
+; CHECK-NEXT:    v_mul_hi_u32 v8, v3, v8
+; CHECK-NEXT:    v_add_i32_e32 v10, vcc, v10, v11
+; CHECK-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v9, vcc, v13, v9
+; CHECK-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v10, vcc, v10, v12
+; CHECK-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v9, vcc, v9, v14
+; CHECK-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v10, vcc, v11, v10
+; CHECK-NEXT:    v_add_i32_e32 v11, vcc, v13, v12
+; CHECK-NEXT:    v_add_i32_e32 v9, vcc, v9, v10
+; CHECK-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v10, vcc, v11, v10
+; CHECK-NEXT:    v_add_i32_e32 v8, vcc, v8, v10
+; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v9
+; CHECK-NEXT:    v_addc_u32_e64 v9, s[4:5], v3, v8, vcc
+; CHECK-NEXT:    v_add_i32_e64 v3, s[4:5], v3, v8
+; CHECK-NEXT:    v_mul_lo_u32 v8, v6, v2
+; CHECK-NEXT:    v_mul_lo_u32 v7, v7, v2
+; CHECK-NEXT:    v_mul_hi_u32 v10, v6, v2
+; CHECK-NEXT:    v_mul_lo_u32 v6, v6, v9
+; CHECK-NEXT:    v_mul_lo_u32 v11, v9, v8
+; CHECK-NEXT:    v_mul_hi_u32 v12, v2, v8
+; CHECK-NEXT:    v_mul_hi_u32 v8, v9, v8
+; CHECK-NEXT:    v_add_i32_e64 v6, s[4:5], v7, v6
+; CHECK-NEXT:    v_add_i32_e64 v6, s[4:5], v6, v10
+; CHECK-NEXT:    v_mul_lo_u32 v7, v2, v6
+; CHECK-NEXT:    v_mul_lo_u32 v10, v9, v6
+; CHECK-NEXT:    v_mul_hi_u32 v13, v2, v6
+; CHECK-NEXT:    v_mul_hi_u32 v6, v9, v6
+; CHECK-NEXT:    v_add_i32_e64 v7, s[4:5], v11, v7
+; CHECK-NEXT:    v_cndmask_b32_e64 v9, 0, 1, s[4:5]
+; CHECK-NEXT:    v_add_i32_e64 v8, s[4:5], v10, v8
+; CHECK-NEXT:    v_cndmask_b32_e64 v10, 0, 1, s[4:5]
+; CHECK-NEXT:    v_add_i32_e64 v7, s[4:5], v7, v12
+; CHECK-NEXT:    v_cndmask_b32_e64 v7, 0, 1, s[4:5]
+; CHECK-NEXT:    v_add_i32_e64 v8, s[4:5], v8, v13
+; CHECK-NEXT:    v_cndmask_b32_e64 v11, 0, 1, s[4:5]
+; CHECK-NEXT:    v_add_i32_e64 v7, s[4:5], v9, v7
+; CHECK-NEXT:    v_add_i32_e64 v9, s[4:5], v10, v11
+; CHECK-NEXT:    v_add_i32_e64 v7, s[4:5], v8, v7
+; CHECK-NEXT:    v_cndmask_b32_e64 v8, 0, 1, s[4:5]
+; CHECK-NEXT:    v_add_i32_e64 v8, s[4:5], v9, v8
+; CHECK-NEXT:    v_add_i32_e64 v6, s[4:5], v6, v8
+; CHECK-NEXT:    v_addc_u32_e32 v3, vcc, v3, v6, vcc
+; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v7
+; CHECK-NEXT:    v_addc_u32_e32 v3, vcc, 0, v3, vcc
+; CHECK-NEXT:    v_mul_lo_u32 v6, v1, v2
+; CHECK-NEXT:    v_mul_hi_u32 v7, v0, v2
+; CHECK-NEXT:    v_mul_hi_u32 v2, v1, v2
+; CHECK-NEXT:    v_mul_lo_u32 v8, v0, v3
+; CHECK-NEXT:    v_mul_lo_u32 v9, v1, v3
+; CHECK-NEXT:    v_mul_hi_u32 v10, v0, v3
+; CHECK-NEXT:    v_mul_hi_u32 v3, v1, v3
+; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v6, v8
+; CHECK-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v9, v2
+; CHECK-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v6, v7
+; CHECK-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v10
+; CHECK-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v8, v6
+; CHECK-NEXT:    v_add_i32_e32 v7, vcc, v9, v7
+; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v6
+; CHECK-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v7, v6
+; CHECK-NEXT:    v_mul_lo_u32 v7, v4, v2
+; CHECK-NEXT:    v_mul_lo_u32 v8, v5, v2
+; CHECK-NEXT:    v_mul_hi_u32 v2, v4, v2
+; CHECK-NEXT:    v_add_i32_e32 v3, vcc, v3, v6
+; CHECK-NEXT:    v_mul_lo_u32 v3, v4, v3
+; CHECK-NEXT:    v_add_i32_e32 v3, vcc, v8, v3
+; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v3, v2
+; CHECK-NEXT:    v_sub_i32_e32 v3, vcc, v0, v7
+; CHECK-NEXT:    v_subb_u32_e64 v6, s[4:5], v1, v2, vcc
+; CHECK-NEXT:    v_sub_i32_e64 v1, s[4:5], v1, v2
+; CHECK-NEXT:    v_cmp_ge_u32_e64 s[4:5], v3, v4
+; CHECK-NEXT:    v_cndmask_b32_e64 v2, 0, -1, s[4:5]
+; CHECK-NEXT:    v_cmp_ge_u32_e64 s[4:5], v6, v5
+; CHECK-NEXT:    v_cndmask_b32_e64 v7, 0, -1, s[4:5]
+; CHECK-NEXT:    v_subb_u32_e32 v1, vcc, v1, v5, vcc
+; CHECK-NEXT:    v_cmp_eq_u32_e32 vcc, v6, v5
+; CHECK-NEXT:    v_cndmask_b32_e32 v2, v7, v2, vcc
+; CHECK-NEXT:    v_sub_i32_e32 v7, vcc, v3, v4
+; CHECK-NEXT:    v_subbrev_u32_e64 v8, s[4:5], 0, v1, vcc
+; CHECK-NEXT:    v_cmp_ge_u32_e64 s[4:5], v7, v4
+; CHECK-NEXT:    v_cndmask_b32_e64 v9, 0, -1, s[4:5]
+; CHECK-NEXT:    v_subb_u32_e32 v1, vcc, v1, v5, vcc
+; CHECK-NEXT:    v_cmp_ge_u32_e32 vcc, v8, v5
+; CHECK-NEXT:    v_cndmask_b32_e64 v10, 0, -1, vcc
+; CHECK-NEXT:    v_sub_i32_e32 v11, vcc, v7, v4
+; CHECK-NEXT:    v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
+; CHECK-NEXT:    v_cmp_eq_u32_e32 vcc, v8, v5
+; CHECK-NEXT:    v_cndmask_b32_e32 v5, v10, v9, vcc
+; CHECK-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v5
+; CHECK-NEXT:    v_cndmask_b32_e32 v5, v7, v11, vcc
+; CHECK-NEXT:    v_cndmask_b32_e32 v1, v8, v1, vcc
+; CHECK-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v2
+; CHECK-NEXT:    v_cndmask_b32_e32 v2, v3, v5, vcc
+; CHECK-NEXT:    v_cndmask_b32_e32 v3, v6, v1, vcc
+; CHECK-NEXT:  BB7_2: ; %Flow
+; CHECK-NEXT:    s_or_saveexec_b64 s[8:9], s[6:7]
+; CHECK-NEXT:    s_xor_b64 exec, exec, s[8:9]
+; CHECK-NEXT:    s_cbranch_execz BB7_4
+; CHECK-NEXT:  ; %bb.3:
+; CHECK-NEXT:    v_cvt_f32_u32_e32 v1, v4
+; CHECK-NEXT:    v_rcp_iflag_f32_e32 v1, v1
+; CHECK-NEXT:    v_mul_f32_e32 v1, 0x4f800000, v1
+; CHECK-NEXT:    v_cvt_u32_f32_e32 v1, v1
+; CHECK-NEXT:    v_mul_lo_u32 v2, v1, v4
+; CHECK-NEXT:    v_mul_hi_u32 v3, v1, v4
+; CHECK-NEXT:    v_sub_i32_e32 v5, vcc, 0, v2
+; CHECK-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v3
+; CHECK-NEXT:    v_cndmask_b32_e32 v2, v2, v5, vcc
+; CHECK-NEXT:    v_mul_hi_u32 v2, v2, v1
+; CHECK-NEXT:    v_add_i32_e64 v3, s[4:5], v1, v2
+; CHECK-NEXT:    v_sub_i32_e64 v1, s[4:5], v1, v2
+; CHECK-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
+; CHECK-NEXT:    v_mul_hi_u32 v1, v1, v0
+; CHECK-NEXT:    v_mul_lo_u32 v1, v1, v4
+; CHECK-NEXT:    v_sub_i32_e32 v2, vcc, v0, v1
+; CHECK-NEXT:    v_cmp_ge_u32_e32 vcc, v2, v4
+; CHECK-NEXT:    v_add_i32_e64 v3, s[4:5], v2, v4
+; CHECK-NEXT:    v_cmp_ge_u32_e64 s[4:5], v0, v1
+; CHECK-NEXT:    v_sub_i32_e64 v0, s[6:7], v2, v4
+; CHECK-NEXT:    s_and_b64 vcc, vcc, s[4:5]
+; CHECK-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
+; CHECK-NEXT:    v_cndmask_b32_e64 v2, v3, v0, s[4:5]
+; CHECK-NEXT:    v_mov_b32_e32 v3, 0
+; CHECK-NEXT:  BB7_4:
+; CHECK-NEXT:    s_or_b64 exec, exec, s[8:9]
+; CHECK-NEXT:    v_mov_b32_e32 v0, v2
+; CHECK-NEXT:    v_mov_b32_e32 v1, v3
+; CHECK-NEXT:    s_setpc_b64 s[30:31]
+  %shl.y = shl i64 4096, %y
+  %r = urem i64 %x, %shl.y
+  ret i64 %r
+}
+
+define <2 x i64> @v_urem_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) {
+; GISEL-LABEL: v_urem_v2i64_pow2_shl_denom:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-NEXT:    s_movk_i32 s4, 0x1000
+; GISEL-NEXT:    s_mov_b32 s5, 0
+; GISEL-NEXT:    v_lshl_b64 v[4:5], s[4:5], v4
+; GISEL-NEXT:    v_lshl_b64 v[6:7], s[4:5], v6
+; GISEL-NEXT:    v_cvt_f32_u32_e32 v8, v4
+; GISEL-NEXT:    v_cvt_f32_u32_e32 v9, v5
+; GISEL-NEXT:    v_mac_f32_e32 v8, 0x4f800000, v9
+; GISEL-NEXT:    v_rcp_iflag_f32_e32 v8, v8
+; GISEL-NEXT:    v_mul_f32_e32 v8, 0x5f7ffffc, v8
+; GISEL-NEXT:    v_mul_f32_e32 v9, 0x2f800000, v8
+; GISEL-NEXT:    v_trunc_f32_e32 v9, v9
+; GISEL-NEXT:    v_mac_f32_e32 v8, 0xcf800000, v9
+; GISEL-NEXT:    v_cvt_u32_f32_e32 v8, v8
+; GISEL-NEXT:    v_cvt_u32_f32_e32 v9, v9
+; GISEL-NEXT:    v_sub_i32_e32 v10, vcc, 0, v4
+; GISEL-NEXT:    v_subb_u32_e32 v11, vcc, 0, v5, vcc
+; GISEL-NEXT:    v_mul_lo_u32 v12, v10, v8
+; GISEL-NEXT:    v_mul_lo_u32 v13, v11, v8
+; GISEL-NEXT:    v_mul_lo_u32 v14, v10, v9
+; GISEL-NEXT:    v_mul_hi_u32 v15, v10, v8
+; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v13, v14
+; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v13, v15
+; GISEL-NEXT:    v_mul_lo_u32 v14, v9, v12
+; GISEL-NEXT:    v_mul_lo_u32 v15, v8, v13
+; GISEL-NEXT:    v_mul_hi_u32 v16, v8, v12
+; GISEL-NEXT:    v_add_i32_e32 v14, vcc, v14, v15
+; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v14, vcc, v14, v16
+; GISEL-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v14, vcc, v15, v14
+; GISEL-NEXT:    v_mul_lo_u32 v15, v9, v13
+; GISEL-NEXT:    v_mul_hi_u32 v12, v9, v12
+; GISEL-NEXT:    v_mul_hi_u32 v16, v8, v13
+; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v15, v12
+; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v12, v16
+; GISEL-NEXT:    v_cndmask_b32_e64 v16, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v15, vcc, v15, v16
+; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v12, v14
+; GISEL-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v14, vcc, v15, v14
+; GISEL-NEXT:    v_mul_hi_u32 v13, v9, v13
+; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v13, v14
+; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v12
+; GISEL-NEXT:    v_addc_u32_e64 v12, s[4:5], v9, v13, vcc
+; GISEL-NEXT:    v_add_i32_e64 v9, s[4:5], v9, v13
+; GISEL-NEXT:    v_mul_lo_u32 v13, v10, v8
+; GISEL-NEXT:    v_mul_lo_u32 v11, v11, v8
+; GISEL-NEXT:    v_mul_lo_u32 v14, v10, v12
+; GISEL-NEXT:    v_mul_hi_u32 v10, v10, v8
+; GISEL-NEXT:    v_add_i32_e64 v11, s[4:5], v11, v14
+; GISEL-NEXT:    v_add_i32_e64 v10, s[4:5], v11, v10
+; GISEL-NEXT:    v_mul_lo_u32 v11, v12, v13
+; GISEL-NEXT:    v_mul_lo_u32 v14, v8, v10
+; GISEL-NEXT:    v_mul_hi_u32 v15, v8, v13
+; GISEL-NEXT:    v_add_i32_e64 v11, s[4:5], v11, v14
+; GISEL-NEXT:    v_cndmask_b32_e64 v14, 0, 1, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v11, s[4:5], v11, v15
+; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v11, s[4:5], v14, v11
+; GISEL-NEXT:    v_mul_lo_u32 v14, v12, v10
+; GISEL-NEXT:    v_mul_hi_u32 v13, v12, v13
+; GISEL-NEXT:    v_mul_hi_u32 v15, v8, v10
+; GISEL-NEXT:    v_add_i32_e64 v13, s[4:5], v14, v13
+; GISEL-NEXT:    v_cndmask_b32_e64 v14, 0, 1, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v13, s[4:5], v13, v15
+; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, 1, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v14, s[4:5], v14, v15
+; GISEL-NEXT:    v_add_i32_e64 v11, s[4:5], v13, v11
+; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, 1, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v13, s[4:5], v14, v13
+; GISEL-NEXT:    v_mul_hi_u32 v10, v12, v10
+; GISEL-NEXT:    v_add_i32_e64 v10, s[4:5], v10, v13
+; GISEL-NEXT:    v_add_i32_e64 v8, s[4:5], v8, v11
+; GISEL-NEXT:    v_addc_u32_e32 v9, vcc, v9, v10, vcc
+; GISEL-NEXT:    v_addc_u32_e64 v9, vcc, 0, v9, s[4:5]
+; GISEL-NEXT:    v_mul_lo_u32 v10, v1, v8
+; GISEL-NEXT:    v_mul_lo_u32 v11, v0, v9
+; GISEL-NEXT:    v_mul_hi_u32 v12, v0, v8
+; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v10, v11
+; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v10, v12
+; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v11, v10
+; GISEL-NEXT:    v_mul_lo_u32 v11, v1, v9
+; GISEL-NEXT:    v_mul_hi_u32 v8, v1, v8
+; GISEL-NEXT:    v_mul_hi_u32 v12, v0, v9
+; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v11, v8
+; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v12
+; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v11, v12
+; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v10
+; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v11, v10
+; GISEL-NEXT:    v_mul_hi_u32 v9, v1, v9
+; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v9, v10
+; GISEL-NEXT:    v_mul_lo_u32 v10, v4, v8
+; GISEL-NEXT:    v_mul_lo_u32 v11, v5, v8
+; GISEL-NEXT:    v_mul_lo_u32 v9, v4, v9
+; GISEL-NEXT:    v_mul_hi_u32 v8, v4, v8
+; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v11, v9
+; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v9, v8
+; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v10
+; GISEL-NEXT:    v_subb_u32_e64 v9, s[4:5], v1, v8, vcc
+; GISEL-NEXT:    v_sub_i32_e64 v1, s[4:5], v1, v8
+; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v9, v5
+; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, -1, s[4:5]
+; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v0, v4
+; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, -1, s[4:5]
+; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], v9, v5
+; GISEL-NEXT:    v_cndmask_b32_e64 v8, v8, v10, s[4:5]
+; GISEL-NEXT:    v_sub_i32_e64 v10, s[4:5], v0, v4
+; GISEL-NEXT:    v_subb_u32_e32 v1, vcc, v1, v5, vcc
+; GISEL-NEXT:    v_subbrev_u32_e64 v11, vcc, 0, v1, s[4:5]
+; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v11, v5
+; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, -1, vcc
+; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v10, v4
+; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, -1, vcc
+; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v11, v5
+; GISEL-NEXT:    v_cndmask_b32_e32 v12, v12, v13, vcc
+; GISEL-NEXT:    v_sub_i32_e32 v4, vcc, v10, v4
+; GISEL-NEXT:    v_subb_u32_e64 v1, s[4:5], v1, v5, s[4:5]
+; GISEL-NEXT:    v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
+; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v12
+; GISEL-NEXT:    v_cndmask_b32_e32 v4, v10, v4, vcc
+; GISEL-NEXT:    v_cndmask_b32_e32 v1, v11, v1, vcc
+; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v8
+; GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
+; GISEL-NEXT:    v_cndmask_b32_e32 v1, v9, v1, vcc
+; GISEL-NEXT:    v_cvt_f32_u32_e32 v4, v6
+; GISEL-NEXT:    v_cvt_f32_u32_e32 v5, v7
+; GISEL-NEXT:    v_mac_f32_e32 v4, 0x4f800000, v5
+; GISEL-NEXT:    v_rcp_iflag_f32_e32 v4, v4
+; GISEL-NEXT:    v_mul_f32_e32 v4, 0x5f7ffffc, v4
+; GISEL-NEXT:    v_mul_f32_e32 v5, 0x2f800000, v4
+; GISEL-NEXT:    v_trunc_f32_e32 v5, v5
+; GISEL-NEXT:    v_mac_f32_e32 v4, 0xcf800000, v5
+; GISEL-NEXT:    v_cvt_u32_f32_e32 v4, v4
+; GISEL-NEXT:    v_cvt_u32_f32_e32 v5, v5
+; GISEL-NEXT:    v_sub_i32_e32 v8, vcc, 0, v6
+; GISEL-NEXT:    v_subb_u32_e32 v9, vcc, 0, v7, vcc
+; GISEL-NEXT:    v_mul_lo_u32 v10, v8, v4
+; GISEL-NEXT:    v_mul_lo_u32 v11, v9, v4
+; GISEL-NEXT:    v_mul_lo_u32 v12, v8, v5
+; GISEL-NEXT:    v_mul_hi_u32 v13, v8, v4
+; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v11, v12
+; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v11, v13
+; GISEL-NEXT:    v_mul_lo_u32 v12, v5, v10
+; GISEL-NEXT:    v_mul_lo_u32 v13, v4, v11
+; GISEL-NEXT:    v_mul_hi_u32 v14, v4, v10
+; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v12, v13
+; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v12, v14
+; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v13, v12
+; GISEL-NEXT:    v_mul_lo_u32 v13, v5, v11
+; GISEL-NEXT:    v_mul_hi_u32 v10, v5, v10
+; GISEL-NEXT:    v_mul_hi_u32 v14, v4, v11
+; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v13, v10
+; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v10, v14
+; GISEL-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v13, v14
+; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v10, v12
+; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v13, v12
+; GISEL-NEXT:    v_mul_hi_u32 v11, v5, v11
+; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v11, v12
+; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v10
+; GISEL-NEXT:    v_addc_u32_e64 v10, s[4:5], v5, v11, vcc
+; GISEL-NEXT:    v_add_i32_e64 v5, s[4:5], v5, v11
+; GISEL-NEXT:    v_mul_lo_u32 v11, v8, v4
+; GISEL-NEXT:    v_mul_lo_u32 v9, v9, v4
+; GISEL-NEXT:    v_mul_lo_u32 v12, v8, v10
+; GISEL-NEXT:    v_mul_hi_u32 v8, v8, v4
+; GISEL-NEXT:    v_add_i32_e64 v9, s[4:5], v9, v12
+; GISEL-NEXT:    v_add_i32_e64 v8, s[4:5], v9, v8
+; GISEL-NEXT:    v_mul_lo_u32 v9, v10, v11
+; GISEL-NEXT:    v_mul_lo_u32 v12, v4, v8
+; GISEL-NEXT:    v_mul_hi_u32 v13, v4, v11
+; GISEL-NEXT:    v_add_i32_e64 v9, s[4:5], v9, v12
+; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v9, s[4:5], v9, v13
+; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, 1, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v9, s[4:5], v12, v9
+; GISEL-NEXT:    v_mul_lo_u32 v12, v10, v8
+; GISEL-NEXT:    v_mul_hi_u32 v11, v10, v11
+; GISEL-NEXT:    v_mul_hi_u32 v13, v4, v8
+; GISEL-NEXT:    v_add_i32_e64 v11, s[4:5], v12, v11
+; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v11, s[4:5], v11, v13
+; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, 1, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v12, s[4:5], v12, v13
+; GISEL-NEXT:    v_add_i32_e64 v9, s[4:5], v11, v9
+; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v11, s[4:5], v12, v11
+; GISEL-NEXT:    v_mul_hi_u32 v8, v10, v8
+; GISEL-NEXT:    v_add_i32_e64 v8, s[4:5], v8, v11
+; GISEL-NEXT:    v_add_i32_e64 v4, s[4:5], v4, v9
+; GISEL-NEXT:    v_addc_u32_e32 v5, vcc, v5, v8, vcc
+; GISEL-NEXT:    v_addc_u32_e64 v5, vcc, 0, v5, s[4:5]
+; GISEL-NEXT:    v_mul_lo_u32 v8, v3, v4
+; GISEL-NEXT:    v_mul_lo_u32 v9, v2, v5
+; GISEL-NEXT:    v_mul_hi_u32 v10, v2, v4
+; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v9
+; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v10
+; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v9, v8
+; GISEL-NEXT:    v_mul_lo_u32 v9, v3, v5
+; GISEL-NEXT:    v_mul_hi_u32 v4, v3, v4
+; GISEL-NEXT:    v_mul_hi_u32 v10, v2, v5
+; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v9, v4
+; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v10
+; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v9, v10
+; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v8
+; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v9, v8
+; GISEL-NEXT:    v_mul_hi_u32 v5, v3, v5
+; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v8
+; GISEL-NEXT:    v_mul_lo_u32 v8, v6, v4
+; GISEL-NEXT:    v_mul_lo_u32 v9, v7, v4
+; GISEL-NEXT:    v_mul_lo_u32 v5, v6, v5
+; GISEL-NEXT:    v_mul_hi_u32 v4, v6, v4
+; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v9, v5
+; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v5, v4
+; GISEL-NEXT:    v_sub_i32_e32 v2, vcc, v2, v8
+; GISEL-NEXT:    v_subb_u32_e64 v5, s[4:5], v3, v4, vcc
+; GISEL-NEXT:    v_sub_i32_e64 v3, s[4:5], v3, v4
+; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v5, v7
+; GISEL-NEXT:    v_cndmask_b32_e64 v4, 0, -1, s[4:5]
+; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v2, v6
+; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, -1, s[4:5]
+; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], v5, v7
+; GISEL-NEXT:    v_cndmask_b32_e64 v4, v4, v8, s[4:5]
+; GISEL-NEXT:    v_sub_i32_e64 v8, s[4:5], v2, v6
+; GISEL-NEXT:    v_subb_u32_e32 v3, vcc, v3, v7, vcc
+; GISEL-NEXT:    v_subbrev_u32_e64 v9, vcc, 0, v3, s[4:5]
+; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v9, v7
+; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, -1, vcc
+; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v8, v6
+; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, -1, vcc
+; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v9, v7
+; GISEL-NEXT:    v_cndmask_b32_e32 v10, v10, v11, vcc
+; GISEL-NEXT:    v_sub_i32_e32 v6, vcc, v8, v6
+; GISEL-NEXT:    v_subb_u32_e64 v3, s[4:5], v3, v7, s[4:5]
+; GISEL-NEXT:    v_subbrev_u32_e32 v3, vcc, 0, v3, vcc
+; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v10
+; GISEL-NEXT:    v_cndmask_b32_e32 v6, v8, v6, vcc
+; GISEL-NEXT:    v_cndmask_b32_e32 v3, v9, v3, vcc
+; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v4
+; GISEL-NEXT:    v_cndmask_b32_e32 v2, v2, v6, vcc
+; GISEL-NEXT:    v_cndmask_b32_e32 v3, v5, v3, vcc
+; GISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; CGP-LABEL: v_urem_v2i64_pow2_shl_denom:
+; CGP:       ; %bb.0:
+; CGP-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CGP-NEXT:    v_mov_b32_e32 v5, v0
+; CGP-NEXT:    v_mov_b32_e32 v7, v1
+; CGP-NEXT:    s_movk_i32 s4, 0x1000
+; CGP-NEXT:    s_mov_b32 s5, 0
+; CGP-NEXT:    v_mov_b32_e32 v0, 0
+; CGP-NEXT:    v_lshl_b64 v[10:11], s[4:5], v4
+; CGP-NEXT:    v_lshl_b64 v[8:9], s[4:5], v6
+; CGP-NEXT:    v_or_b32_e32 v1, v7, v11
+; CGP-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[0:1]
+; CGP-NEXT:    v_cmp_ne_u32_e64 s[4:5], 0, 1
+; CGP-NEXT:    s_xor_b64 s[4:5], vcc, s[4:5]
+; CGP-NEXT:    ; implicit-def: $vgpr0_vgpr1
+; CGP-NEXT:    s_and_saveexec_b64 s[6:7], s[4:5]
+; CGP-NEXT:    s_xor_b64 s[6:7], exec, s[6:7]
+; CGP-NEXT:    s_cbranch_execnz BB8_2
+; CGP-NEXT:  ; %bb.1:
+; CGP-NEXT:    v_cvt_f32_u32_e32 v0, v10
+; CGP-NEXT:    v_cvt_f32_u32_e32 v1, v11
+; CGP-NEXT:    v_sub_i32_e32 v4, vcc, 0, v10
+; CGP-NEXT:    v_subb_u32_e32 v6, vcc, 0, v11, vcc
+; CGP-NEXT:    v_mac_f32_e32 v0, 0x4f800000, v1
+; CGP-NEXT:    v_rcp_iflag_f32_e32 v0, v0
+; CGP-NEXT:    v_mul_f32_e32 v0, 0x5f7ffffc, v0
+; CGP-NEXT:    v_mul_f32_e32 v1, 0x2f800000, v0
+; CGP-NEXT:    v_trunc_f32_e32 v1, v1
+; CGP-NEXT:    v_mac_f32_e32 v0, 0xcf800000, v1
+; CGP-NEXT:    v_cvt_u32_f32_e32 v1, v1
+; CGP-NEXT:    v_cvt_u32_f32_e32 v0, v0
+; CGP-NEXT:    v_mul_lo_u32 v12, v4, v1
+; CGP-NEXT:    v_mul_lo_u32 v13, v4, v0
+; CGP-NEXT:    v_mul_lo_u32 v14, v6, v0
+; CGP-NEXT:    v_mul_hi_u32 v15, v4, v0
+; CGP-NEXT:    v_add_i32_e32 v12, vcc, v14, v12
+; CGP-NEXT:    v_mul_lo_u32 v14, v1, v13
+; CGP-NEXT:    v_mul_hi_u32 v16, v0, v13
+; CGP-NEXT:    v_mul_hi_u32 v13, v1, v13
+; CGP-NEXT:    v_add_i32_e32 v12, vcc, v12, v15
+; CGP-NEXT:    v_mul_lo_u32 v15, v0, v12
+; CGP-NEXT:    v_mul_lo_u32 v17, v1, v12
+; CGP-NEXT:    v_mul_hi_u32 v18, v0, v12
+; CGP-NEXT:    v_mul_hi_u32 v12, v1, v12
+; CGP-NEXT:    v_add_i32_e32 v14, vcc, v14, v15
+; CGP-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v13, vcc, v17, v13
+; CGP-NEXT:    v_cndmask_b32_e64 v17, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v14, vcc, v14, v16
+; CGP-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v13, vcc, v13, v18
+; CGP-NEXT:    v_cndmask_b32_e64 v16, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v14, vcc, v15, v14
+; CGP-NEXT:    v_add_i32_e32 v15, vcc, v17, v16
+; CGP-NEXT:    v_add_i32_e32 v13, vcc, v13, v14
+; CGP-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v14, vcc, v15, v14
+; CGP-NEXT:    v_add_i32_e32 v12, vcc, v12, v14
+; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v13
+; CGP-NEXT:    v_addc_u32_e64 v13, s[4:5], v1, v12, vcc
+; CGP-NEXT:    v_add_i32_e64 v1, s[4:5], v1, v12
+; CGP-NEXT:    v_mul_lo_u32 v12, v4, v0
+; CGP-NEXT:    v_mul_lo_u32 v6, v6, v0
+; CGP-NEXT:    v_mul_hi_u32 v14, v4, v0
+; CGP-NEXT:    v_mul_lo_u32 v4, v4, v13
+; CGP-NEXT:    v_mul_lo_u32 v15, v13, v12
+; CGP-NEXT:    v_mul_hi_u32 v16, v0, v12
+; CGP-NEXT:    v_mul_hi_u32 v12, v13, v12
+; CGP-NEXT:    v_add_i32_e64 v4, s[4:5], v6, v4
+; CGP-NEXT:    v_add_i32_e64 v4, s[4:5], v4, v14
+; CGP-NEXT:    v_mul_lo_u32 v6, v0, v4
+; CGP-NEXT:    v_mul_lo_u32 v14, v13, v4
+; CGP-NEXT:    v_mul_hi_u32 v17, v0, v4
+; CGP-NEXT:    v_mul_hi_u32 v4, v13, v4
+; CGP-NEXT:    v_add_i32_e64 v6, s[4:5], v15, v6
+; CGP-NEXT:    v_cndmask_b32_e64 v13, 0, 1, s[4:5]
+; CGP-NEXT:    v_add_i32_e64 v12, s[4:5], v14, v12
+; CGP-NEXT:    v_cndmask_b32_e64 v14, 0, 1, s[4:5]
+; CGP-NEXT:    v_add_i32_e64 v6, s[4:5], v6, v16
+; CGP-NEXT:    v_cndmask_b32_e64 v6, 0, 1, s[4:5]
+; CGP-NEXT:    v_add_i32_e64 v12, s[4:5], v12, v17
+; CGP-NEXT:    v_cndmask_b32_e64 v15, 0, 1, s[4:5]
+; CGP-NEXT:    v_add_i32_e64 v6, s[4:5], v13, v6
+; CGP-NEXT:    v_add_i32_e64 v13, s[4:5], v14, v15
+; CGP-NEXT:    v_add_i32_e64 v6, s[4:5], v12, v6
+; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, 1, s[4:5]
+; CGP-NEXT:    v_add_i32_e64 v12, s[4:5], v13, v12
+; CGP-NEXT:    v_add_i32_e64 v4, s[4:5], v4, v12
+; CGP-NEXT:    v_addc_u32_e32 v1, vcc, v1, v4, vcc
+; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v6
+; CGP-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
+; CGP-NEXT:    v_mul_lo_u32 v4, v7, v0
+; CGP-NEXT:    v_mul_hi_u32 v6, v5, v0
+; CGP-NEXT:    v_mul_hi_u32 v0, v7, v0
+; CGP-NEXT:    v_mul_lo_u32 v12, v5, v1
+; CGP-NEXT:    v_mul_lo_u32 v13, v7, v1
+; CGP-NEXT:    v_mul_hi_u32 v14, v5, v1
+; CGP-NEXT:    v_mul_hi_u32 v1, v7, v1
+; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v12
+; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v0, vcc, v13, v0
+; CGP-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v6
+; CGP-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v14
+; CGP-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v4, vcc, v12, v4
+; CGP-NEXT:    v_add_i32_e32 v6, vcc, v13, v6
+; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v4
+; CGP-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v4, vcc, v6, v4
+; CGP-NEXT:    v_mul_lo_u32 v6, v10, v0
+; CGP-NEXT:    v_mul_lo_u32 v12, v11, v0
+; CGP-NEXT:    v_mul_hi_u32 v0, v10, v0
+; CGP-NEXT:    v_add_i32_e32 v1, vcc, v1, v4
+; CGP-NEXT:    v_mul_lo_u32 v1, v10, v1
+; CGP-NEXT:    v_add_i32_e32 v1, vcc, v12, v1
+; CGP-NEXT:    v_add_i32_e32 v0, vcc, v1, v0
+; CGP-NEXT:    v_sub_i32_e32 v1, vcc, v5, v6
+; CGP-NEXT:    v_subb_u32_e64 v4, s[4:5], v7, v0, vcc
+; CGP-NEXT:    v_sub_i32_e64 v0, s[4:5], v7, v0
+; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v1, v10
+; CGP-NEXT:    v_cndmask_b32_e64 v6, 0, -1, s[4:5]
+; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v4, v11
+; CGP-NEXT:    v_cndmask_b32_e64 v7, 0, -1, s[4:5]
+; CGP-NEXT:    v_subb_u32_e32 v0, vcc, v0, v11, vcc
+; CGP-NEXT:    v_cmp_eq_u32_e32 vcc, v4, v11
+; CGP-NEXT:    v_cndmask_b32_e32 v6, v7, v6, vcc
+; CGP-NEXT:    v_sub_i32_e32 v7, vcc, v1, v10
+; CGP-NEXT:    v_subbrev_u32_e64 v12, s[4:5], 0, v0, vcc
+; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v7, v10
+; CGP-NEXT:    v_cndmask_b32_e64 v13, 0, -1, s[4:5]
+; CGP-NEXT:    v_subb_u32_e32 v0, vcc, v0, v11, vcc
+; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v12, v11
+; CGP-NEXT:    v_cndmask_b32_e64 v14, 0, -1, vcc
+; CGP-NEXT:    v_sub_i32_e32 v15, vcc, v7, v10
+; CGP-NEXT:    v_subbrev_u32_e32 v0, vcc, 0, v0, vcc
+; CGP-NEXT:    v_cmp_eq_u32_e32 vcc, v12, v11
+; CGP-NEXT:    v_cndmask_b32_e32 v11, v14, v13, vcc
+; CGP-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v11
+; CGP-NEXT:    v_cndmask_b32_e32 v7, v7, v15, vcc
+; CGP-NEXT:    v_cndmask_b32_e32 v11, v12, v0, vcc
+; CGP-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v6
+; CGP-NEXT:    v_cndmask_b32_e32 v0, v1, v7, vcc
+; CGP-NEXT:    v_cndmask_b32_e32 v1, v4, v11, vcc
+; CGP-NEXT:  BB8_2: ; %Flow1
+; CGP-NEXT:    s_or_saveexec_b64 s[8:9], s[6:7]
+; CGP-NEXT:    s_xor_b64 exec, exec, s[8:9]
+; CGP-NEXT:    s_cbranch_execz BB8_4
+; CGP-NEXT:  ; %bb.3:
+; CGP-NEXT:    v_cvt_f32_u32_e32 v0, v10
+; CGP-NEXT:    v_rcp_iflag_f32_e32 v0, v0
+; CGP-NEXT:    v_mul_f32_e32 v0, 0x4f800000, v0
+; CGP-NEXT:    v_cvt_u32_f32_e32 v0, v0
+; CGP-NEXT:    v_mul_lo_u32 v1, v0, v10
+; CGP-NEXT:    v_mul_hi_u32 v4, v0, v10
+; CGP-NEXT:    v_sub_i32_e32 v6, vcc, 0, v1
+; CGP-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v4
+; CGP-NEXT:    v_cndmask_b32_e32 v1, v1, v6, vcc
+; CGP-NEXT:    v_mul_hi_u32 v1, v1, v0
+; CGP-NEXT:    v_add_i32_e64 v4, s[4:5], v0, v1
+; CGP-NEXT:    v_sub_i32_e64 v0, s[4:5], v0, v1
+; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
+; CGP-NEXT:    v_mul_hi_u32 v0, v0, v5
+; CGP-NEXT:    v_mul_lo_u32 v0, v0, v10
+; CGP-NEXT:    v_sub_i32_e32 v1, vcc, v5, v0
+; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v10
+; CGP-NEXT:    v_add_i32_e64 v4, s[4:5], v1, v10
+; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v5, v0
+; CGP-NEXT:    v_sub_i32_e64 v0, s[6:7], v1, v10
+; CGP-NEXT:    s_and_b64 vcc, vcc, s[4:5]
+; CGP-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
+; CGP-NEXT:    v_cndmask_b32_e64 v0, v4, v0, s[4:5]
+; CGP-NEXT:    v_mov_b32_e32 v1, 0
+; CGP-NEXT:  BB8_4:
+; CGP-NEXT:    s_or_b64 exec, exec, s[8:9]
+; CGP-NEXT:    v_or_b32_e32 v5, v3, v9
+; CGP-NEXT:    v_mov_b32_e32 v4, 0
+; CGP-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[4:5]
+; CGP-NEXT:    v_cmp_ne_u32_e64 s[4:5], 0, 1
+; CGP-NEXT:    s_xor_b64 s[4:5], vcc, s[4:5]
+; CGP-NEXT:    ; implicit-def: $vgpr4_vgpr5
+; CGP-NEXT:    s_and_saveexec_b64 s[6:7], s[4:5]
+; CGP-NEXT:    s_xor_b64 s[6:7], exec, s[6:7]
+; CGP-NEXT:    s_cbranch_execnz BB8_6
+; CGP-NEXT:  ; %bb.5:
+; CGP-NEXT:    v_cvt_f32_u32_e32 v4, v8
+; CGP-NEXT:    v_cvt_f32_u32_e32 v5, v9
+; CGP-NEXT:    v_sub_i32_e32 v6, vcc, 0, v8
+; CGP-NEXT:    v_subb_u32_e32 v7, vcc, 0, v9, vcc
+; CGP-NEXT:    v_mac_f32_e32 v4, 0x4f800000, v5
+; CGP-NEXT:    v_rcp_iflag_f32_e32 v4, v4
+; CGP-NEXT:    v_mul_f32_e32 v4, 0x5f7ffffc, v4
+; CGP-NEXT:    v_mul_f32_e32 v5, 0x2f800000, v4
+; CGP-NEXT:    v_trunc_f32_e32 v5, v5
+; CGP-NEXT:    v_mac_f32_e32 v4, 0xcf800000, v5
+; CGP-NEXT:    v_cvt_u32_f32_e32 v5, v5
+; CGP-NEXT:    v_cvt_u32_f32_e32 v4, v4
+; CGP-NEXT:    v_mul_lo_u32 v10, v6, v5
+; CGP-NEXT:    v_mul_lo_u32 v11, v6, v4
+; CGP-NEXT:    v_mul_lo_u32 v12, v7, v4
+; CGP-NEXT:    v_mul_hi_u32 v13, v6, v4
+; CGP-NEXT:    v_add_i32_e32 v10, vcc, v12, v10
+; CGP-NEXT:    v_mul_lo_u32 v12, v5, v11
+; CGP-NEXT:    v_mul_hi_u32 v14, v4, v11
+; CGP-NEXT:    v_mul_hi_u32 v11, v5, v11
+; CGP-NEXT:    v_add_i32_e32 v10, vcc, v10, v13
+; CGP-NEXT:    v_mul_lo_u32 v13, v4, v10
+; CGP-NEXT:    v_mul_lo_u32 v15, v5, v10
+; CGP-NEXT:    v_mul_hi_u32 v16, v4, v10
+; CGP-NEXT:    v_mul_hi_u32 v10, v5, v10
+; CGP-NEXT:    v_add_i32_e32 v12, vcc, v12, v13
+; CGP-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v11, vcc, v15, v11
+; CGP-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v12, vcc, v12, v14
+; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v11, vcc, v11, v16
+; CGP-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v12, vcc, v13, v12
+; CGP-NEXT:    v_add_i32_e32 v13, vcc, v15, v14
+; CGP-NEXT:    v_add_i32_e32 v11, vcc, v11, v12
+; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v12, vcc, v13, v12
+; CGP-NEXT:    v_add_i32_e32 v10, vcc, v10, v12
+; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v11
+; CGP-NEXT:    v_addc_u32_e64 v11, s[4:5], v5, v10, vcc
+; CGP-NEXT:    v_add_i32_e64 v5, s[4:5], v5, v10
+; CGP-NEXT:    v_mul_lo_u32 v10, v6, v4
+; CGP-NEXT:    v_mul_lo_u32 v7, v7, v4
+; CGP-NEXT:    v_mul_hi_u32 v12, v6, v4
+; CGP-NEXT:    v_mul_lo_u32 v6, v6, v11
+; CGP-NEXT:    v_mul_lo_u32 v13, v11, v10
+; CGP-NEXT:    v_mul_hi_u32 v14, v4, v10
+; CGP-NEXT:    v_mul_hi_u32 v10, v11, v10
+; CGP-NEXT:    v_add_i32_e64 v6, s[4:5], v7, v6
+; CGP-NEXT:    v_add_i32_e64 v6, s[4:5], v6, v12
+; CGP-NEXT:    v_mul_lo_u32 v7, v4, v6
+; CGP-NEXT:    v_mul_lo_u32 v12, v11, v6
+; CGP-NEXT:    v_mul_hi_u32 v15, v4, v6
+; CGP-NEXT:    v_mul_hi_u32 v6, v11, v6
+; CGP-NEXT:    v_add_i32_e64 v7, s[4:5], v13, v7
+; CGP-NEXT:    v_cndmask_b32_e64 v11, 0, 1, s[4:5]
+; CGP-NEXT:    v_add_i32_e64 v10, s[4:5], v12, v10
+; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, 1, s[4:5]
+; CGP-NEXT:    v_add_i32_e64 v7, s[4:5], v7, v14
+; CGP-NEXT:    v_cndmask_b32_e64 v7, 0, 1, s[4:5]
+; CGP-NEXT:    v_add_i32_e64 v10, s[4:5], v10, v15
+; CGP-NEXT:    v_cndmask_b32_e64 v13, 0, 1, s[4:5]
+; CGP-NEXT:    v_add_i32_e64 v7, s[4:5], v11, v7
+; CGP-NEXT:    v_add_i32_e64 v11, s[4:5], v12, v13
+; CGP-NEXT:    v_add_i32_e64 v7, s[4:5], v10, v7
+; CGP-NEXT:    v_cndmask_b32_e64 v10, 0, 1, s[4:5]
+; CGP-NEXT:    v_add_i32_e64 v10, s[4:5], v11, v10
+; CGP-NEXT:    v_add_i32_e64 v6, s[4:5], v6, v10
+; CGP-NEXT:    v_addc_u32_e32 v5, vcc, v5, v6, vcc
+; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v7
+; CGP-NEXT:    v_addc_u32_e32 v5, vcc, 0, v5, vcc
+; CGP-NEXT:    v_mul_lo_u32 v6, v3, v4
+; CGP-NEXT:    v_mul_hi_u32 v7, v2, v4
+; CGP-NEXT:    v_mul_hi_u32 v4, v3, v4
+; CGP-NEXT:    v_mul_lo_u32 v10, v2, v5
+; CGP-NEXT:    v_mul_lo_u32 v11, v3, v5
+; CGP-NEXT:    v_mul_hi_u32 v12, v2, v5
+; CGP-NEXT:    v_mul_hi_u32 v5, v3, v5
+; CGP-NEXT:    v_add_i32_e32 v6, vcc, v6, v10
+; CGP-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v4, vcc, v11, v4
+; CGP-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v6, vcc, v6, v7
+; CGP-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v12
+; CGP-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v6, vcc, v10, v6
+; CGP-NEXT:    v_add_i32_e32 v7, vcc, v11, v7
+; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v6
+; CGP-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
+; CGP-NEXT:    v_add_i32_e32 v6, vcc, v7, v6
+; CGP-NEXT:    v_mul_lo_u32 v7, v8, v4
+; CGP-NEXT:    v_mul_lo_u32 v10, v9, v4
+; CGP-NEXT:    v_mul_hi_u32 v4, v8, v4
+; CGP-NEXT:    v_add_i32_e32 v5, vcc, v5, v6
+; CGP-NEXT:    v_mul_lo_u32 v5, v8, v5
+; CGP-NEXT:    v_add_i32_e32 v5, vcc, v10, v5
+; CGP-NEXT:    v_add_i32_e32 v4, vcc, v5, v4
+; CGP-NEXT:    v_sub_i32_e32 v5, vcc, v2, v7
+; CGP-NEXT:    v_subb_u32_e64 v6, s[4:5], v3, v4, vcc
+; CGP-NEXT:    v_sub_i32_e64 v3, s[4:5], v3, v4
+; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v5, v8
+; CGP-NEXT:    v_cndmask_b32_e64 v4, 0, -1, s[4:5]
+; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v6, v9
+; CGP-NEXT:    v_cndmask_b32_e64 v7, 0, -1, s[4:5]
+; CGP-NEXT:    v_subb_u32_e32 v3, vcc, v3, v9, vcc
+; CGP-NEXT:    v_cmp_eq_u32_e32 vcc, v6, v9
+; CGP-NEXT:    v_cndmask_b32_e32 v4, v7, v4, vcc
+; CGP-NEXT:    v_sub_i32_e32 v7, vcc, v5, v8
+; CGP-NEXT:    v_subbrev_u32_e64 v10, s[4:5], 0, v3, vcc
+; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v7, v8
+; CGP-NEXT:    v_cndmask_b32_e64 v11, 0, -1, s[4:5]
+; CGP-NEXT:    v_subb_u32_e32 v3, vcc, v3, v9, vcc
+; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v10, v9
+; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, -1, vcc
+; CGP-NEXT:    v_sub_i32_e32 v13, vcc, v7, v8
+; CGP-NEXT:    v_subbrev_u32_e32 v3, vcc, 0, v3, vcc
+; CGP-NEXT:    v_cmp_eq_u32_e32 vcc, v10, v9
+; CGP-NEXT:    v_cndmask_b32_e32 v9, v12, v11, vcc
+; CGP-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v9
+; CGP-NEXT:    v_cndmask_b32_e32 v7, v7, v13, vcc
+; CGP-NEXT:    v_cndmask_b32_e32 v3, v10, v3, vcc
+; CGP-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v4
+; CGP-NEXT:    v_cndmask_b32_e32 v4, v5, v7, vcc
+; CGP-NEXT:    v_cndmask_b32_e32 v5, v6, v3, vcc
+; CGP-NEXT:  BB8_6: ; %Flow
+; CGP-NEXT:    s_or_saveexec_b64 s[8:9], s[6:7]
+; CGP-NEXT:    s_xor_b64 exec, exec, s[8:9]
+; CGP-NEXT:    s_cbranch_execz BB8_8
+; CGP-NEXT:  ; %bb.7:
+; CGP-NEXT:    v_cvt_f32_u32_e32 v3, v8
+; CGP-NEXT:    v_rcp_iflag_f32_e32 v3, v3
+; CGP-NEXT:    v_mul_f32_e32 v3, 0x4f800000, v3
+; CGP-NEXT:    v_cvt_u32_f32_e32 v3, v3
+; CGP-NEXT:    v_mul_lo_u32 v4, v3, v8
+; CGP-NEXT:    v_mul_hi_u32 v5, v3, v8
+; CGP-NEXT:    v_sub_i32_e32 v6, vcc, 0, v4
+; CGP-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v5
+; CGP-NEXT:    v_cndmask_b32_e32 v4, v4, v6, vcc
+; CGP-NEXT:    v_mul_hi_u32 v4, v4, v3
+; CGP-NEXT:    v_add_i32_e64 v5, s[4:5], v3, v4
+; CGP-NEXT:    v_sub_i32_e64 v3, s[4:5], v3, v4
+; CGP-NEXT:    v_cndmask_b32_e32 v3, v3, v5, vcc
+; CGP-NEXT:    v_mul_hi_u32 v3, v3, v2
+; CGP-NEXT:    v_mul_lo_u32 v3, v3, v8
+; CGP-NEXT:    v_sub_i32_e32 v4, vcc, v2, v3
+; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v4, v8
+; CGP-NEXT:    v_add_i32_e64 v5, s[4:5], v4, v8
+; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v2, v3
+; CGP-NEXT:    v_sub_i32_e64 v2, s[6:7], v4, v8
+; CGP-NEXT:    s_and_b64 vcc, vcc, s[4:5]
+; CGP-NEXT:    v_cndmask_b32_e32 v2, v4, v2, vcc
+; CGP-NEXT:    v_cndmask_b32_e64 v4, v5, v2, s[4:5]
+; CGP-NEXT:    v_mov_b32_e32 v5, 0
+; CGP-NEXT:  BB8_8:
+; CGP-NEXT:    s_or_b64 exec, exec, s[8:9]
+; CGP-NEXT:    v_mov_b32_e32 v2, v4
+; CGP-NEXT:    v_mov_b32_e32 v3, v5
+; CGP-NEXT:    s_setpc_b64 s[30:31]
+  %shl.y = shl <2 x i64> <i64 4096, i64 4096>, %y
+  %r = urem <2 x i64> %x, %shl.y
+  ret <2 x i64> %r
+}
+
+define i64 @v_urem_i64_24bit(i64 %num, i64 %den) {
+; GISEL-LABEL: v_urem_i64_24bit:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-NEXT:    s_mov_b32 s4, 0xffffff
+; GISEL-NEXT:    v_and_b32_e32 v0, s4, v0
+; GISEL-NEXT:    v_and_b32_e32 v1, s4, v2
+; GISEL-NEXT:    v_cvt_f32_u32_e32 v2, v1
+; GISEL-NEXT:    v_rcp_iflag_f32_e32 v2, v2
+; GISEL-NEXT:    v_mul_f32_e32 v2, 0x4f800000, v2
+; GISEL-NEXT:    v_cvt_u32_f32_e32 v2, v2
+; GISEL-NEXT:    v_mul_lo_u32 v3, v2, v1
+; GISEL-NEXT:    v_mul_hi_u32 v4, v2, v1
+; GISEL-NEXT:    v_sub_i32_e32 v5, vcc, 0, v3
+; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v4
+; GISEL-NEXT:    v_cndmask_b32_e32 v3, v3, v5, vcc
+; GISEL-NEXT:    v_mul_hi_u32 v3, v3, v2
+; GISEL-NEXT:    v_add_i32_e64 v4, s[4:5], v2, v3
+; GISEL-NEXT:    v_sub_i32_e64 v2, s[4:5], v2, v3
+; GISEL-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
+; GISEL-NEXT:    v_mul_hi_u32 v2, v2, v0
+; GISEL-NEXT:    v_mul_lo_u32 v2, v2, v1
+; GISEL-NEXT:    v_sub_i32_e32 v3, vcc, v0, v2
+; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v3, v1
+; GISEL-NEXT:    v_add_i32_e64 v4, s[4:5], v3, v1
+; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v0, v2
+; GISEL-NEXT:    v_sub_i32_e64 v0, s[6:7], v3, v1
+; GISEL-NEXT:    s_and_b64 vcc, vcc, s[4:5]
+; GISEL-NEXT:    v_cndmask_b32_e32 v0, v3, v0, vcc
+; GISEL-NEXT:    v_cndmask_b32_e64 v0, v4, v0, s[4:5]
+; GISEL-NEXT:    v_mov_b32_e32 v1, 0
+; GISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; CGP-LABEL: v_urem_i64_24bit:
+; CGP:       ; %bb.0:
+; CGP-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CGP-NEXT:    s_mov_b32 s4, 0xffffff
+; CGP-NEXT:    v_and_b32_e32 v0, s4, v0
+; CGP-NEXT:    v_and_b32_e32 v1, s4, v2
+; CGP-NEXT:    v_cvt_f32_u32_e32 v2, v0
+; CGP-NEXT:    v_cvt_f32_u32_e32 v3, v1
+; CGP-NEXT:    v_rcp_f32_e32 v4, v3
+; CGP-NEXT:    v_mul_f32_e32 v4, v2, v4
+; CGP-NEXT:    v_trunc_f32_e32 v4, v4
+; CGP-NEXT:    v_mad_f32 v2, -v4, v3, v2
+; CGP-NEXT:    v_cvt_u32_f32_e32 v4, v4
+; CGP-NEXT:    v_cmp_ge_f32_e64 s[4:5], |v2|, v3
+; CGP-NEXT:    v_cndmask_b32_e64 v2, 0, 1, s[4:5]
+; CGP-NEXT:    v_add_i32_e32 v2, vcc, v4, v2
+; CGP-NEXT:    v_mul_lo_u32 v1, v2, v1
+; CGP-NEXT:    v_sub_i32_e32 v0, vcc, v0, v1
+; CGP-NEXT:    v_and_b32_e32 v0, 0xffffff, v0
+; CGP-NEXT:    v_mov_b32_e32 v1, 0
+; CGP-NEXT:    s_setpc_b64 s[30:31]
+  %num.mask = and i64 %num, 16777215
+  %den.mask = and i64 %den, 16777215
+  %result = urem i64 %num.mask, %den.mask
+  ret i64 %result
+}
+
+define <2 x i64> @v_urem_v2i64_24bit(<2 x i64> %num, <2 x i64> %den) {
+; GISEL-LABEL: v_urem_v2i64_24bit:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-NEXT:    s_mov_b32 s6, 0xffffff
+; GISEL-NEXT:    v_cvt_f32_u32_e32 v1, 0
+; GISEL-NEXT:    v_and_b32_e32 v3, s6, v4
+; GISEL-NEXT:    v_and_b32_e32 v4, s6, v6
+; GISEL-NEXT:    v_cvt_f32_u32_e32 v5, v3
+; GISEL-NEXT:    v_sub_i32_e32 v6, vcc, 0, v3
+; GISEL-NEXT:    v_subb_u32_e64 v7, s[4:5], 0, 0, vcc
+; GISEL-NEXT:    v_cvt_f32_u32_e32 v8, v4
+; GISEL-NEXT:    v_sub_i32_e32 v9, vcc, 0, v4
+; GISEL-NEXT:    v_subb_u32_e64 v10, s[4:5], 0, 0, vcc
+; GISEL-NEXT:    v_mac_f32_e32 v5, 0x4f800000, v1
+; GISEL-NEXT:    v_mac_f32_e32 v8, 0x4f800000, v1
+; GISEL-NEXT:    v_rcp_iflag_f32_e32 v1, v5
+; GISEL-NEXT:    v_rcp_iflag_f32_e32 v5, v8
+; GISEL-NEXT:    v_mul_f32_e32 v1, 0x5f7ffffc, v1
+; GISEL-NEXT:    v_mul_f32_e32 v5, 0x5f7ffffc, v5
+; GISEL-NEXT:    v_mul_f32_e32 v8, 0x2f800000, v1
+; GISEL-NEXT:    v_mul_f32_e32 v11, 0x2f800000, v5
+; GISEL-NEXT:    v_trunc_f32_e32 v8, v8
+; GISEL-NEXT:    v_trunc_f32_e32 v11, v11
+; GISEL-NEXT:    v_mac_f32_e32 v1, 0xcf800000, v8
+; GISEL-NEXT:    v_cvt_u32_f32_e32 v8, v8
+; GISEL-NEXT:    v_mac_f32_e32 v5, 0xcf800000, v11
+; GISEL-NEXT:    v_cvt_u32_f32_e32 v11, v11
+; GISEL-NEXT:    v_cvt_u32_f32_e32 v1, v1
+; GISEL-NEXT:    v_mul_lo_u32 v12, v6, v8
+; GISEL-NEXT:    v_cvt_u32_f32_e32 v5, v5
+; GISEL-NEXT:    v_mul_lo_u32 v13, v9, v11
+; GISEL-NEXT:    v_mul_lo_u32 v14, v6, v1
+; GISEL-NEXT:    v_mul_lo_u32 v15, v7, v1
+; GISEL-NEXT:    v_mul_hi_u32 v16, v6, v1
+; GISEL-NEXT:    v_mul_lo_u32 v17, v9, v5
+; GISEL-NEXT:    v_mul_lo_u32 v18, v10, v5
+; GISEL-NEXT:    v_mul_hi_u32 v19, v9, v5
+; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v15, v12
+; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v18, v13
+; GISEL-NEXT:    v_mul_lo_u32 v15, v11, v17
+; GISEL-NEXT:    v_mul_hi_u32 v18, v5, v17
+; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v13, v19
+; GISEL-NEXT:    v_mul_lo_u32 v19, v5, v13
+; GISEL-NEXT:    v_add_i32_e32 v15, vcc, v15, v19
+; GISEL-NEXT:    v_cndmask_b32_e64 v19, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v15, vcc, v15, v18
+; GISEL-NEXT:    v_mul_lo_u32 v15, v8, v14
+; GISEL-NEXT:    v_mul_hi_u32 v18, v1, v14
+; GISEL-NEXT:    v_mul_hi_u32 v14, v8, v14
+; GISEL-NEXT:    v_mul_hi_u32 v17, v11, v17
+; GISEL-NEXT:    v_add_i32_e64 v12, s[4:5], v12, v16
+; GISEL-NEXT:    v_mul_lo_u32 v16, v1, v12
+; GISEL-NEXT:    v_add_i32_e64 v15, s[4:5], v15, v16
+; GISEL-NEXT:    v_cndmask_b32_e64 v16, 0, 1, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v15, s[4:5], v15, v18
+; GISEL-NEXT:    v_mul_lo_u32 v15, v8, v12
+; GISEL-NEXT:    v_cndmask_b32_e64 v18, 0, 1, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v16, s[4:5], v16, v18
+; GISEL-NEXT:    v_mul_hi_u32 v18, v1, v12
+; GISEL-NEXT:    v_add_i32_e64 v14, s[4:5], v15, v14
+; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, 1, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v14, s[4:5], v14, v18
+; GISEL-NEXT:    v_cndmask_b32_e64 v18, 0, 1, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v15, s[4:5], v15, v18
+; GISEL-NEXT:    v_cndmask_b32_e64 v18, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v18, vcc, v19, v18
+; GISEL-NEXT:    v_mul_lo_u32 v19, v11, v13
+; GISEL-NEXT:    v_add_i32_e32 v17, vcc, v19, v17
+; GISEL-NEXT:    v_mul_hi_u32 v19, v5, v13
+; GISEL-NEXT:    v_cndmask_b32_e64 v20, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v17, vcc, v17, v19
+; GISEL-NEXT:    v_cndmask_b32_e64 v19, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v19, vcc, v20, v19
+; GISEL-NEXT:    v_and_b32_e32 v0, s6, v0
+; GISEL-NEXT:    v_and_b32_e32 v2, s6, v2
+; GISEL-NEXT:    v_mul_hi_u32 v12, v8, v12
+; GISEL-NEXT:    v_mul_hi_u32 v13, v11, v13
+; GISEL-NEXT:    v_add_i32_e32 v14, vcc, v14, v16
+; GISEL-NEXT:    v_cndmask_b32_e64 v16, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v17, vcc, v17, v18
+; GISEL-NEXT:    v_cndmask_b32_e64 v18, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v15, vcc, v15, v16
+; GISEL-NEXT:    v_add_i32_e32 v16, vcc, v19, v18
+; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v12, v15
+; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v13, v16
+; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v1, v14
+; GISEL-NEXT:    v_addc_u32_e64 v14, s[4:5], v8, v12, vcc
+; GISEL-NEXT:    v_add_i32_e64 v8, s[4:5], v8, v12
+; GISEL-NEXT:    v_mul_lo_u32 v12, v6, v1
+; GISEL-NEXT:    v_mul_lo_u32 v7, v7, v1
+; GISEL-NEXT:    v_mul_hi_u32 v15, v6, v1
+; GISEL-NEXT:    v_add_i32_e64 v5, s[4:5], v5, v17
+; GISEL-NEXT:    v_addc_u32_e64 v16, s[6:7], v11, v13, s[4:5]
+; GISEL-NEXT:    v_add_i32_e64 v11, s[6:7], v11, v13
+; GISEL-NEXT:    v_mul_lo_u32 v13, v9, v5
+; GISEL-NEXT:    v_mul_lo_u32 v10, v10, v5
+; GISEL-NEXT:    v_mul_hi_u32 v17, v9, v5
+; GISEL-NEXT:    v_mul_lo_u32 v6, v6, v14
+; GISEL-NEXT:    v_mul_lo_u32 v18, v14, v12
+; GISEL-NEXT:    v_mul_hi_u32 v19, v1, v12
+; GISEL-NEXT:    v_mul_hi_u32 v12, v14, v12
+; GISEL-NEXT:    v_mul_lo_u32 v9, v9, v16
+; GISEL-NEXT:    v_add_i32_e64 v6, s[6:7], v7, v6
+; GISEL-NEXT:    v_mul_lo_u32 v7, v16, v13
+; GISEL-NEXT:    v_add_i32_e64 v9, s[6:7], v10, v9
+; GISEL-NEXT:    v_mul_hi_u32 v10, v5, v13
+; GISEL-NEXT:    v_mul_hi_u32 v13, v16, v13
+; GISEL-NEXT:    v_add_i32_e64 v6, s[6:7], v6, v15
+; GISEL-NEXT:    v_add_i32_e64 v9, s[6:7], v9, v17
+; GISEL-NEXT:    v_mul_lo_u32 v15, v1, v6
+; GISEL-NEXT:    v_mul_lo_u32 v17, v5, v9
+; GISEL-NEXT:    v_add_i32_e64 v7, s[6:7], v7, v17
+; GISEL-NEXT:    v_cndmask_b32_e64 v17, 0, 1, s[6:7]
+; GISEL-NEXT:    v_add_i32_e64 v7, s[6:7], v7, v10
+; GISEL-NEXT:    v_mul_lo_u32 v7, v14, v6
+; GISEL-NEXT:    v_mul_hi_u32 v10, v1, v6
+; GISEL-NEXT:    v_mul_hi_u32 v6, v14, v6
+; GISEL-NEXT:    v_mul_lo_u32 v14, v16, v9
+; GISEL-NEXT:    v_mul_hi_u32 v16, v16, v9
+; GISEL-NEXT:    v_mul_hi_u32 v9, v5, v9
+; GISEL-NEXT:    v_add_i32_e64 v15, s[8:9], v18, v15
+; GISEL-NEXT:    v_cndmask_b32_e64 v18, 0, 1, s[8:9]
+; GISEL-NEXT:    v_add_i32_e64 v7, s[8:9], v7, v12
+; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, s[8:9]
+; GISEL-NEXT:    v_add_i32_e64 v13, s[8:9], v14, v13
+; GISEL-NEXT:    v_cndmask_b32_e64 v14, 0, 1, s[8:9]
+; GISEL-NEXT:    v_add_i32_e64 v15, s[8:9], v15, v19
+; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, 1, s[8:9]
+; GISEL-NEXT:    v_add_i32_e64 v7, s[8:9], v7, v10
+; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, 1, s[8:9]
+; GISEL-NEXT:    v_cndmask_b32_e64 v19, 0, 1, s[6:7]
+; GISEL-NEXT:    v_add_i32_e64 v9, s[6:7], v13, v9
+; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, 1, s[6:7]
+; GISEL-NEXT:    v_add_i32_e64 v15, s[6:7], v18, v15
+; GISEL-NEXT:    v_add_i32_e64 v10, s[6:7], v12, v10
+; GISEL-NEXT:    v_add_i32_e64 v12, s[6:7], v17, v19
+; GISEL-NEXT:    v_add_i32_e64 v13, s[6:7], v14, v13
+; GISEL-NEXT:    v_add_i32_e64 v7, s[6:7], v7, v15
+; GISEL-NEXT:    v_cndmask_b32_e64 v14, 0, 1, s[6:7]
+; GISEL-NEXT:    v_add_i32_e64 v9, s[6:7], v9, v12
+; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, s[6:7]
+; GISEL-NEXT:    v_add_i32_e64 v10, s[6:7], v10, v14
+; GISEL-NEXT:    v_add_i32_e64 v12, s[6:7], v13, v12
+; GISEL-NEXT:    v_add_i32_e64 v6, s[6:7], v6, v10
+; GISEL-NEXT:    v_add_i32_e64 v10, s[6:7], v16, v12
+; GISEL-NEXT:    v_addc_u32_e32 v6, vcc, v8, v6, vcc
+; GISEL-NEXT:    v_addc_u32_e64 v8, vcc, v11, v10, s[4:5]
+; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v1, v7
+; GISEL-NEXT:    v_addc_u32_e32 v6, vcc, 0, v6, vcc
+; GISEL-NEXT:    v_mul_lo_u32 v7, 0, v1
+; GISEL-NEXT:    v_mul_hi_u32 v10, v0, v1
+; GISEL-NEXT:    v_mul_hi_u32 v1, 0, v1
+; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v9
+; GISEL-NEXT:    v_addc_u32_e32 v8, vcc, 0, v8, vcc
+; GISEL-NEXT:    v_mul_lo_u32 v9, 0, v5
+; GISEL-NEXT:    v_mul_hi_u32 v11, v2, v5
+; GISEL-NEXT:    v_mul_hi_u32 v5, 0, v5
+; GISEL-NEXT:    v_mul_lo_u32 v12, v0, v6
+; GISEL-NEXT:    v_mul_lo_u32 v13, 0, v6
+; GISEL-NEXT:    v_mul_hi_u32 v14, v0, v6
+; GISEL-NEXT:    v_mul_hi_u32 v6, 0, v6
+; GISEL-NEXT:    v_mul_lo_u32 v15, v2, v8
+; GISEL-NEXT:    v_mul_lo_u32 v16, 0, v8
+; GISEL-NEXT:    v_mul_hi_u32 v17, v2, v8
+; GISEL-NEXT:    v_mul_hi_u32 v8, 0, v8
+; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v7, v12
+; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v13, v1
+; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v9, v15
+; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v16, v5
+; GISEL-NEXT:    v_cndmask_b32_e64 v16, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v7, v10
+; GISEL-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v1, v14
+; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v9, v11
+; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v17
+; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v12, v7
+; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v13, v10
+; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v15, v9
+; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v16, v11
+; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v1, v7
+; GISEL-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v9
+; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v10, v7
+; GISEL-NEXT:    v_mul_lo_u32 v10, v3, v1
+; GISEL-NEXT:    v_mul_lo_u32 v12, 0, v1
+; GISEL-NEXT:    v_mul_hi_u32 v1, v3, v1
+; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v11, v9
+; GISEL-NEXT:    v_mul_lo_u32 v11, v4, v5
+; GISEL-NEXT:    v_mul_lo_u32 v13, 0, v5
+; GISEL-NEXT:    v_mul_hi_u32 v5, v4, v5
+; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v6, v7
+; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v8, v9
+; GISEL-NEXT:    v_mul_lo_u32 v6, v3, v6
+; GISEL-NEXT:    v_mul_lo_u32 v7, v4, v7
+; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v12, v6
+; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v13, v7
+; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v6, v1
+; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v7, v5
+; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v10
+; GISEL-NEXT:    v_subb_u32_e64 v6, s[4:5], 0, v1, vcc
+; GISEL-NEXT:    v_sub_i32_e64 v1, s[4:5], 0, v1
+; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v0, v3
+; GISEL-NEXT:    v_cndmask_b32_e64 v7, 0, -1, s[4:5]
+; GISEL-NEXT:    v_sub_i32_e64 v2, s[4:5], v2, v11
+; GISEL-NEXT:    v_subb_u32_e64 v8, s[6:7], 0, v5, s[4:5]
+; GISEL-NEXT:    v_sub_i32_e64 v5, s[6:7], 0, v5
+; GISEL-NEXT:    v_cmp_ge_u32_e64 s[6:7], v2, v4
+; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, -1, s[6:7]
+; GISEL-NEXT:    v_cmp_le_u32_e64 s[6:7], 0, v6
+; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, -1, s[6:7]
+; GISEL-NEXT:    v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
+; GISEL-NEXT:    v_cmp_le_u32_e32 vcc, 0, v8
+; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, -1, vcc
+; GISEL-NEXT:    v_subbrev_u32_e64 v5, vcc, 0, v5, s[4:5]
+; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v6
+; GISEL-NEXT:    v_cndmask_b32_e32 v7, v10, v7, vcc
+; GISEL-NEXT:    v_sub_i32_e32 v10, vcc, v0, v3
+; GISEL-NEXT:    v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
+; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v10, v3
+; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, -1, vcc
+; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v8
+; GISEL-NEXT:    v_cndmask_b32_e32 v9, v11, v9, vcc
+; GISEL-NEXT:    v_sub_i32_e32 v11, vcc, v2, v4
+; GISEL-NEXT:    v_subbrev_u32_e32 v5, vcc, 0, v5, vcc
+; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v11, v4
+; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, -1, vcc
+; GISEL-NEXT:    v_cmp_le_u32_e32 vcc, 0, v1
+; GISEL-NEXT:    v_cndmask_b32_e64 v14, 0, -1, vcc
+; GISEL-NEXT:    v_sub_i32_e32 v3, vcc, v10, v3
+; GISEL-NEXT:    v_subbrev_u32_e32 v15, vcc, 0, v1, vcc
+; GISEL-NEXT:    v_cmp_le_u32_e32 vcc, 0, v5
+; GISEL-NEXT:    v_cndmask_b32_e64 v16, 0, -1, vcc
+; GISEL-NEXT:    v_sub_i32_e32 v4, vcc, v11, v4
+; GISEL-NEXT:    v_subbrev_u32_e32 v17, vcc, 0, v5, vcc
+; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v1
+; GISEL-NEXT:    v_cndmask_b32_e32 v12, v14, v12, vcc
+; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v5
+; GISEL-NEXT:    v_cndmask_b32_e32 v13, v16, v13, vcc
+; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v12
+; GISEL-NEXT:    v_cndmask_b32_e32 v3, v10, v3, vcc
+; GISEL-NEXT:    v_cmp_ne_u32_e64 s[4:5], 0, v13
+; GISEL-NEXT:    v_cndmask_b32_e64 v4, v11, v4, s[4:5]
+; GISEL-NEXT:    v_cndmask_b32_e32 v1, v1, v15, vcc
+; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v7
+; GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
+; GISEL-NEXT:    v_cndmask_b32_e64 v3, v5, v17, s[4:5]
+; GISEL-NEXT:    v_cmp_ne_u32_e64 s[4:5], 0, v9
+; GISEL-NEXT:    v_cndmask_b32_e64 v2, v2, v4, s[4:5]
+; GISEL-NEXT:    v_cndmask_b32_e32 v1, v6, v1, vcc
+; GISEL-NEXT:    v_cndmask_b32_e64 v3, v8, v3, s[4:5]
+; GISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; CGP-LABEL: v_urem_v2i64_24bit:
+; CGP:       ; %bb.0:
+; CGP-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CGP-NEXT:    s_mov_b32 s6, 0xffffff
+; CGP-NEXT:    v_mov_b32_e32 v1, 0
+; CGP-NEXT:    v_and_b32_e32 v0, s6, v0
+; CGP-NEXT:    v_and_b32_e32 v2, s6, v2
+; CGP-NEXT:    v_and_b32_e32 v3, s6, v4
+; CGP-NEXT:    v_and_b32_e32 v4, s6, v6
+; CGP-NEXT:    v_cvt_f32_u32_e32 v5, v0
+; CGP-NEXT:    v_cvt_f32_u32_e32 v6, v3
+; CGP-NEXT:    v_cvt_f32_u32_e32 v7, v2
+; CGP-NEXT:    v_cvt_f32_u32_e32 v8, v4
+; CGP-NEXT:    v_rcp_f32_e32 v9, v6
+; CGP-NEXT:    v_rcp_f32_e32 v10, v8
+; CGP-NEXT:    v_mul_f32_e32 v9, v5, v9
+; CGP-NEXT:    v_mul_f32_e32 v10, v7, v10
+; CGP-NEXT:    v_trunc_f32_e32 v9, v9
+; CGP-NEXT:    v_trunc_f32_e32 v10, v10
+; CGP-NEXT:    v_mad_f32 v5, -v9, v6, v5
+; CGP-NEXT:    v_cvt_u32_f32_e32 v9, v9
+; CGP-NEXT:    v_mad_f32 v7, -v10, v8, v7
+; CGP-NEXT:    v_cvt_u32_f32_e32 v10, v10
+; CGP-NEXT:    v_cmp_ge_f32_e64 s[4:5], |v5|, v6
+; CGP-NEXT:    v_cndmask_b32_e64 v5, 0, 1, s[4:5]
+; CGP-NEXT:    v_cmp_ge_f32_e64 s[4:5], |v7|, v8
+; CGP-NEXT:    v_cndmask_b32_e64 v6, 0, 1, s[4:5]
+; CGP-NEXT:    v_add_i32_e32 v5, vcc, v9, v5
+; CGP-NEXT:    v_add_i32_e32 v6, vcc, v10, v6
+; CGP-NEXT:    v_mul_lo_u32 v3, v5, v3
+; CGP-NEXT:    v_mul_lo_u32 v4, v6, v4
+; CGP-NEXT:    v_sub_i32_e32 v0, vcc, v0, v3
+; CGP-NEXT:    v_sub_i32_e32 v2, vcc, v2, v4
+; CGP-NEXT:    v_and_b32_e32 v0, s6, v0
+; CGP-NEXT:    v_and_b32_e32 v2, s6, v2
+; CGP-NEXT:    v_mov_b32_e32 v3, v1
+; CGP-NEXT:    s_setpc_b64 s[30:31]
+  %num.mask = and <2 x i64> %num, <i64 16777215, i64 16777215>
+  %den.mask = and <2 x i64> %den, <i64 16777215, i64 16777215>
+  %result = urem <2 x i64> %num.mask, %den.mask
+  ret <2 x i64> %result
+}


        


More information about the llvm-commits mailing list