[llvm] 8bfc0e0 - [GlobalISel] Port the udiv -> mul by constant combine.

Amara Emerson via llvm-commits llvm-commits at lists.llvm.org
Thu Oct 7 11:37:27 PDT 2021


Author: Amara Emerson
Date: 2021-10-07T11:37:17-07:00
New Revision: 8bfc0e06dc85663ba3317da0c7b472260bf27948

URL: https://github.com/llvm/llvm-project/commit/8bfc0e06dc85663ba3317da0c7b472260bf27948
DIFF: https://github.com/llvm/llvm-project/commit/8bfc0e06dc85663ba3317da0c7b472260bf27948.diff

LOG: [GlobalISel] Port the udiv -> mul by constant combine.

This is a straight port from the equivalent DAG combine.

Differential Revision: https://reviews.llvm.org/D110890

Added: 
    llvm/test/CodeGen/AArch64/GlobalISel/combine-udiv.ll
    llvm/test/CodeGen/AArch64/GlobalISel/combine-udiv.mir

Modified: 
    llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
    llvm/include/llvm/CodeGen/GlobalISel/Utils.h
    llvm/include/llvm/Target/GlobalISel/Combine.td
    llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
    llvm/lib/CodeGen/GlobalISel/Utils.cpp
    llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i32.ll
    llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i64.ll

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
index 987e930e6d11a..a8d06acd186f8 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
@@ -602,6 +602,14 @@ class CombinerHelper {
   /// feeding a G_AND instruction \p MI.
   bool matchNarrowBinopFeedingAnd(MachineInstr &MI, BuildFnTy &MatchInfo);
 
+  /// Given an G_UDIV \p MI expressing a divide by constant, return an
+  /// expression that implements it by multiplying by a magic number.
+  /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
+  MachineInstr *buildUDivUsingMul(MachineInstr &MI);
+  /// Combine G_UDIV by constant into a multiply by magic constant.
+  bool matchUDivByConst(MachineInstr &MI);
+  void applyUDivByConst(MachineInstr &MI);
+
   /// Try to transform \p MI by using all of the above
   /// combine functions. Returns true if changed.
   bool tryCombine(MachineInstr &MI);

diff  --git a/llvm/include/llvm/CodeGen/GlobalISel/Utils.h b/llvm/include/llvm/CodeGen/GlobalISel/Utils.h
index 39a5ee71c7102..d0d6ca17b9483 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/Utils.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/Utils.h
@@ -397,6 +397,11 @@ bool isBuildVectorAllOnes(const MachineInstr &MI,
 Optional<RegOrConstant> getVectorSplat(const MachineInstr &MI,
                                        const MachineRegisterInfo &MRI);
 
+/// Determines if \p MI defines a constant integer or a build vector of
+/// constant integers. Treats undef values as constants.
+bool isConstantOrConstantVector(MachineInstr &MI,
+                                const MachineRegisterInfo &MRI);
+
 /// Determines if \p MI defines a constant integer or a splat vector of
 /// constant integers.
 /// \returns the scalar constant or None.

diff  --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td
index 52caf3f9ee609..17256d3bc95dd 100644
--- a/llvm/include/llvm/Target/GlobalISel/Combine.td
+++ b/llvm/include/llvm/Target/GlobalISel/Combine.td
@@ -694,6 +694,15 @@ def bitfield_extract_from_shr : GICombineRule<
 def form_bitfield_extract : GICombineGroup<[bitfield_extract_from_sext_inreg,
                                             bitfield_extract_from_and,
                                             bitfield_extract_from_shr]>;
+
+def udiv_by_const : GICombineRule<
+  (defs root:$root),
+  (match (wip_match_opcode G_UDIV):$root,
+   [{ return Helper.matchUDivByConst(*${root}); }]),
+  (apply [{ Helper.applyUDivByConst(*${root}); }])>;
+
+def intdiv_combines : GICombineGroup<[udiv_by_const]>;
+
 def reassoc_ptradd : GICombineRule<
   (defs root:$root, build_fn_matchinfo:$matchinfo),
   (match (wip_match_opcode G_PTR_ADD):$root,
@@ -761,7 +770,8 @@ def all_combines : GICombineGroup<[trivial_combines, insert_vec_elt_combines,
     const_combines, xor_of_and_with_same_reg, ptr_add_with_zero,
     shift_immed_chain, shift_of_shifted_logic_chain, load_or_combine,
     truncstore_merge, div_rem_to_divrem, funnel_shift_combines,
-    form_bitfield_extract, constant_fold, fabs_fneg_fold]>;
+    form_bitfield_extract, constant_fold, fabs_fneg_fold,
+    intdiv_combines]>;
 
 // A combine group used to for prelegalizer combiners at -O0. The combines in
 // this group have been selected based on experiments to balance code size and

diff  --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index 29a16bc765e8e..e60f2c34b2d2b 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -30,6 +30,7 @@
 #include "llvm/CodeGen/TargetOpcodes.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/Support/Casting.h"
+#include "llvm/Support/DivisionByConstantInfo.h"
 #include "llvm/Support/MathExtras.h"
 #include <tuple>
 
@@ -4422,6 +4423,162 @@ bool CombinerHelper::matchMulOBy2(MachineInstr &MI, BuildFnTy &MatchInfo) {
   return true;
 }
 
+MachineInstr *CombinerHelper::buildUDivUsingMul(MachineInstr &MI) {
+  assert(MI.getOpcode() == TargetOpcode::G_UDIV);
+  auto &UDiv = cast<GenericMachineInstr>(MI);
+  Register Dst = UDiv.getReg(0);
+  Register LHS = UDiv.getReg(1);
+  Register RHS = UDiv.getReg(2);
+  LLT Ty = MRI.getType(Dst);
+  LLT ScalarTy = Ty.getScalarType();
+  const unsigned EltBits = ScalarTy.getScalarSizeInBits();
+  LLT ShiftAmtTy = getTargetLowering().getPreferredShiftAmountTy(Ty);
+  LLT ScalarShiftAmtTy = ShiftAmtTy.getScalarType();
+  auto &MIB = Builder;
+  MIB.setInstrAndDebugLoc(MI);
+
+  bool UseNPQ = false;
+  SmallVector<Register, 16> PreShifts, PostShifts, MagicFactors, NPQFactors;
+
+  auto BuildUDIVPattern = [&](const Constant *C) {
+    auto *CI = cast<ConstantInt>(C);
+    const APInt &Divisor = CI->getValue();
+    UnsignedDivisonByConstantInfo magics =
+        UnsignedDivisonByConstantInfo::get(Divisor);
+    unsigned PreShift = 0, PostShift = 0;
+
+    // If the divisor is even, we can avoid using the expensive fixup by
+    // shifting the divided value upfront.
+    if (magics.IsAdd != 0 && !Divisor[0]) {
+      PreShift = Divisor.countTrailingZeros();
+      // Get magic number for the shifted divisor.
+      magics =
+          UnsignedDivisonByConstantInfo::get(Divisor.lshr(PreShift), PreShift);
+      assert(magics.IsAdd == 0 && "Should use cheap fixup now");
+    }
+
+    APInt Magic = magics.Magic;
+
+    unsigned SelNPQ;
+    if (magics.IsAdd == 0 || Divisor.isOneValue()) {
+      assert(magics.ShiftAmount < Divisor.getBitWidth() &&
+             "We shouldn't generate an undefined shift!");
+      PostShift = magics.ShiftAmount;
+      SelNPQ = false;
+    } else {
+      PostShift = magics.ShiftAmount - 1;
+      SelNPQ = true;
+    }
+
+    PreShifts.push_back(
+        MIB.buildConstant(ScalarShiftAmtTy, PreShift).getReg(0));
+    MagicFactors.push_back(MIB.buildConstant(ScalarTy, Magic).getReg(0));
+    NPQFactors.push_back(
+        MIB.buildConstant(ScalarTy,
+                          SelNPQ ? APInt::getOneBitSet(EltBits, EltBits - 1)
+                                 : APInt::getZero(EltBits))
+            .getReg(0));
+    PostShifts.push_back(
+        MIB.buildConstant(ScalarShiftAmtTy, PostShift).getReg(0));
+    UseNPQ |= SelNPQ;
+    return true;
+  };
+
+  // Collect the shifts/magic values from each element.
+  bool Matched = matchUnaryPredicate(MRI, RHS, BuildUDIVPattern);
+  (void)Matched;
+  assert(Matched && "Expected unary predicate match to succeed");
+
+  Register PreShift, PostShift, MagicFactor, NPQFactor;
+  auto *RHSDef = getOpcodeDef<GBuildVector>(RHS, MRI);
+  if (RHSDef) {
+    PreShift = MIB.buildBuildVector(ShiftAmtTy, PreShifts).getReg(0);
+    MagicFactor = MIB.buildBuildVector(Ty, MagicFactors).getReg(0);
+    NPQFactor = MIB.buildBuildVector(Ty, NPQFactors).getReg(0);
+    PostShift = MIB.buildBuildVector(ShiftAmtTy, PostShifts).getReg(0);
+  } else {
+    assert(MRI.getType(RHS).isScalar() &&
+           "Non-build_vector operation should have been a scalar");
+    PreShift = PreShifts[0];
+    MagicFactor = MagicFactors[0];
+    PostShift = PostShifts[0];
+  }
+
+  Register Q = LHS;
+  Q = MIB.buildLShr(Ty, Q, PreShift).getReg(0);
+
+  // Multiply the numerator (operand 0) by the magic value.
+  Q = MIB.buildUMulH(Ty, Q, MagicFactor).getReg(0);
+
+  if (UseNPQ) {
+    Register NPQ = MIB.buildSub(Ty, LHS, Q).getReg(0);
+
+    // For vectors we might have a mix of non-NPQ/NPQ paths, so use
+    // G_UMULH to act as a SRL-by-1 for NPQ, else multiply by zero.
+    if (Ty.isVector())
+      NPQ = MIB.buildUMulH(Ty, NPQ, NPQFactor).getReg(0);
+    else
+      NPQ = MIB.buildLShr(Ty, NPQ, MIB.buildConstant(ShiftAmtTy, 1)).getReg(0);
+
+    Q = MIB.buildAdd(Ty, NPQ, Q).getReg(0);
+  }
+
+  Q = MIB.buildLShr(Ty, Q, PostShift).getReg(0);
+  auto One = MIB.buildConstant(Ty, 1);
+  auto IsOne = MIB.buildICmp(
+      CmpInst::Predicate::ICMP_EQ,
+      Ty.isScalar() ? LLT::scalar(1) : Ty.changeElementSize(1), RHS, One);
+  return MIB.buildSelect(Ty, IsOne, LHS, Q);
+}
+
+bool CombinerHelper::matchUDivByConst(MachineInstr &MI) {
+  assert(MI.getOpcode() == TargetOpcode::G_UDIV);
+  Register Dst = MI.getOperand(0).getReg();
+  Register RHS = MI.getOperand(2).getReg();
+  LLT DstTy = MRI.getType(Dst);
+  auto *RHSDef = MRI.getVRegDef(RHS);
+  if (!isConstantOrConstantVector(*RHSDef, MRI))
+    return false;
+
+  auto &MF = *MI.getMF();
+  AttributeList Attr = MF.getFunction().getAttributes();
+  const auto &TLI = getTargetLowering();
+  LLVMContext &Ctx = MF.getFunction().getContext();
+  auto &DL = MF.getDataLayout();
+  if (TLI.isIntDivCheap(getApproximateEVTForLLT(DstTy, DL, Ctx), Attr))
+    return false;
+
+  // Don't do this for minsize because the instruction sequence is usually
+  // larger.
+  if (MF.getFunction().hasMinSize())
+    return false;
+
+  // Don't do this if the types are not going to be legal.
+  if (LI) {
+    if (!isLegalOrBeforeLegalizer({TargetOpcode::G_MUL, {DstTy, DstTy}}))
+      return false;
+    if (!isLegalOrBeforeLegalizer({TargetOpcode::G_UMULH, {DstTy}}))
+      return false;
+    if (!isLegalOrBeforeLegalizer(
+            {TargetOpcode::G_ICMP,
+             {DstTy.isVector() ? DstTy.changeElementSize(1) : LLT::scalar(1),
+              DstTy}}))
+      return false;
+  }
+
+  auto CheckEltValue = [&](const Constant *C) {
+    if (auto *CI = dyn_cast_or_null<ConstantInt>(C))
+      return !CI->isZero();
+    return false;
+  };
+  return matchUnaryPredicate(MRI, RHS, CheckEltValue);
+}
+
+void CombinerHelper::applyUDivByConst(MachineInstr &MI) {
+  auto *NewMI = buildUDivUsingMul(MI);
+  replaceSingleDefInstWithReg(MI, NewMI->getOperand(0).getReg());
+}
+
 bool CombinerHelper::tryCombine(MachineInstr &MI) {
   if (tryCombineCopy(MI))
     return true;

diff  --git a/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/llvm/lib/CodeGen/GlobalISel/Utils.cpp
index 4223a0d1d73d8..f01df66249e7f 100644
--- a/llvm/lib/CodeGen/GlobalISel/Utils.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/Utils.cpp
@@ -1016,6 +1016,23 @@ Optional<RegOrConstant> llvm::getVectorSplat(const MachineInstr &MI,
   return RegOrConstant(Reg);
 }
 
+bool llvm::isConstantOrConstantVector(MachineInstr &MI,
+                                      const MachineRegisterInfo &MRI) {
+  Register Def = MI.getOperand(0).getReg();
+  if (auto C = getIConstantVRegValWithLookThrough(Def, MRI))
+    return true;
+  GBuildVector *BV = dyn_cast<GBuildVector>(&MI);
+  if (!BV)
+    return false;
+  for (unsigned SrcIdx = 0; SrcIdx < BV->getNumSources(); ++SrcIdx) {
+    if (getIConstantVRegValWithLookThrough(BV->getSourceReg(SrcIdx), MRI) ||
+        getOpcodeDef<GImplicitDef>(BV->getSourceReg(SrcIdx), MRI))
+      continue;
+    return false;
+  }
+  return true;
+}
+
 Optional<APInt>
 llvm::isConstantOrConstantSplatVector(MachineInstr &MI,
                                       const MachineRegisterInfo &MRI) {

diff  --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-udiv.ll b/llvm/test/CodeGen/AArch64/GlobalISel/combine-udiv.ll
new file mode 100644
index 0000000000000..9b8597943f3de
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-udiv.ll
@@ -0,0 +1,287 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=aarch64-unknown-unknown | FileCheck %s --check-prefixes=SDAG
+; RUN: llc < %s -mtriple=aarch64-unknown-unknown -global-isel | FileCheck %s --check-prefixes=GISEL
+
+; These tests are taken from the combine-udiv.ll in X86.
+define <8 x i16> @combine_vec_udiv_uniform(<8 x i16> %x) {
+; SDAG-LABEL: combine_vec_udiv_uniform:
+; SDAG:       // %bb.0:
+; SDAG-NEXT:    mov w8, #25645
+; SDAG-NEXT:    dup v1.8h, w8
+; SDAG-NEXT:    umull2 v2.4s, v0.8h, v1.8h
+; SDAG-NEXT:    umull v1.4s, v0.4h, v1.4h
+; SDAG-NEXT:    uzp2 v1.8h, v1.8h, v2.8h
+; SDAG-NEXT:    sub v0.8h, v0.8h, v1.8h
+; SDAG-NEXT:    usra v1.8h, v0.8h, #1
+; SDAG-NEXT:    ushr v0.8h, v1.8h, #4
+; SDAG-NEXT:    ret
+;
+; GISEL-LABEL: combine_vec_udiv_uniform:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    adrp x8, .LCPI0_1
+; GISEL-NEXT:    ldr q1, [x8, :lo12:.LCPI0_1]
+; GISEL-NEXT:    adrp x8, .LCPI0_0
+; GISEL-NEXT:    ldr q2, [x8, :lo12:.LCPI0_0]
+; GISEL-NEXT:    umull2 v3.4s, v0.8h, v1.8h
+; GISEL-NEXT:    umull v1.4s, v0.4h, v1.4h
+; GISEL-NEXT:    uzp2 v1.8h, v1.8h, v3.8h
+; GISEL-NEXT:    sub v0.8h, v0.8h, v1.8h
+; GISEL-NEXT:    umull2 v3.4s, v0.8h, v2.8h
+; GISEL-NEXT:    umull v0.4s, v0.4h, v2.4h
+; GISEL-NEXT:    uzp2 v0.8h, v0.8h, v3.8h
+; GISEL-NEXT:    add v0.8h, v0.8h, v1.8h
+; GISEL-NEXT:    ushr v0.8h, v0.8h, #4
+; GISEL-NEXT:    ret
+  %1 = udiv <8 x i16> %x, <i16 23, i16 23, i16 23, i16 23, i16 23, i16 23, i16 23, i16 23>
+  ret <8 x i16> %1
+}
+
+define <8 x i16> @combine_vec_udiv_nonuniform(<8 x i16> %x) {
+; SDAG-LABEL: combine_vec_udiv_nonuniform:
+; SDAG:       // %bb.0:
+; SDAG-NEXT:    adrp x8, .LCPI1_0
+; SDAG-NEXT:    ldr q1, [x8, :lo12:.LCPI1_0]
+; SDAG-NEXT:    adrp x8, .LCPI1_1
+; SDAG-NEXT:    ldr q2, [x8, :lo12:.LCPI1_1]
+; SDAG-NEXT:    adrp x8, .LCPI1_2
+; SDAG-NEXT:    ldr q3, [x8, :lo12:.LCPI1_2]
+; SDAG-NEXT:    ushl v1.8h, v0.8h, v1.8h
+; SDAG-NEXT:    umull2 v4.4s, v1.8h, v2.8h
+; SDAG-NEXT:    umull v1.4s, v1.4h, v2.4h
+; SDAG-NEXT:    adrp x8, .LCPI1_3
+; SDAG-NEXT:    uzp2 v1.8h, v1.8h, v4.8h
+; SDAG-NEXT:    ldr q2, [x8, :lo12:.LCPI1_3]
+; SDAG-NEXT:    sub v0.8h, v0.8h, v1.8h
+; SDAG-NEXT:    umull2 v4.4s, v0.8h, v3.8h
+; SDAG-NEXT:    umull v0.4s, v0.4h, v3.4h
+; SDAG-NEXT:    uzp2 v0.8h, v0.8h, v4.8h
+; SDAG-NEXT:    add v0.8h, v0.8h, v1.8h
+; SDAG-NEXT:    ushl v0.8h, v0.8h, v2.8h
+; SDAG-NEXT:    ret
+;
+; GISEL-LABEL: combine_vec_udiv_nonuniform:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    adrp x8, .LCPI1_5
+; GISEL-NEXT:    ldr q1, [x8, :lo12:.LCPI1_5]
+; GISEL-NEXT:    adrp x8, .LCPI1_4
+; GISEL-NEXT:    ldr q2, [x8, :lo12:.LCPI1_4]
+; GISEL-NEXT:    adrp x8, .LCPI1_3
+; GISEL-NEXT:    ldr q3, [x8, :lo12:.LCPI1_3]
+; GISEL-NEXT:    adrp x8, .LCPI1_1
+; GISEL-NEXT:    ldr q4, [x8, :lo12:.LCPI1_1]
+; GISEL-NEXT:    adrp x8, .LCPI1_0
+; GISEL-NEXT:    ldr q5, [x8, :lo12:.LCPI1_0]
+; GISEL-NEXT:    adrp x8, .LCPI1_2
+; GISEL-NEXT:    neg v2.8h, v2.8h
+; GISEL-NEXT:    ldr q6, [x8, :lo12:.LCPI1_2]
+; GISEL-NEXT:    ushl v2.8h, v0.8h, v2.8h
+; GISEL-NEXT:    cmeq v1.8h, v1.8h, v5.8h
+; GISEL-NEXT:    umull2 v5.4s, v2.8h, v3.8h
+; GISEL-NEXT:    umull v2.4s, v2.4h, v3.4h
+; GISEL-NEXT:    uzp2 v2.8h, v2.8h, v5.8h
+; GISEL-NEXT:    sub v3.8h, v0.8h, v2.8h
+; GISEL-NEXT:    umull2 v5.4s, v3.8h, v6.8h
+; GISEL-NEXT:    umull v3.4s, v3.4h, v6.4h
+; GISEL-NEXT:    uzp2 v3.8h, v3.8h, v5.8h
+; GISEL-NEXT:    neg v4.8h, v4.8h
+; GISEL-NEXT:    shl v1.8h, v1.8h, #15
+; GISEL-NEXT:    add v2.8h, v3.8h, v2.8h
+; GISEL-NEXT:    ushl v2.8h, v2.8h, v4.8h
+; GISEL-NEXT:    sshr v1.8h, v1.8h, #15
+; GISEL-NEXT:    bif v0.16b, v2.16b, v1.16b
+; GISEL-NEXT:    ret
+  %1 = udiv <8 x i16> %x, <i16 23, i16 34, i16 -23, i16 56, i16 128, i16 -1, i16 -256, i16 -32768>
+  ret <8 x i16> %1
+}
+
+define <8 x i16> @combine_vec_udiv_nonuniform2(<8 x i16> %x) {
+; SDAG-LABEL: combine_vec_udiv_nonuniform2:
+; SDAG:       // %bb.0:
+; SDAG-NEXT:    adrp x8, .LCPI2_0
+; SDAG-NEXT:    adrp x9, .LCPI2_1
+; SDAG-NEXT:    ldr q1, [x8, :lo12:.LCPI2_0]
+; SDAG-NEXT:    ldr q2, [x9, :lo12:.LCPI2_1]
+; SDAG-NEXT:    adrp x8, .LCPI2_2
+; SDAG-NEXT:    ldr q3, [x8, :lo12:.LCPI2_2]
+; SDAG-NEXT:    ushl v0.8h, v0.8h, v1.8h
+; SDAG-NEXT:    umull2 v1.4s, v0.8h, v2.8h
+; SDAG-NEXT:    umull v0.4s, v0.4h, v2.4h
+; SDAG-NEXT:    uzp2 v0.8h, v0.8h, v1.8h
+; SDAG-NEXT:    ushl v0.8h, v0.8h, v3.8h
+; SDAG-NEXT:    ret
+;
+; GISEL-LABEL: combine_vec_udiv_nonuniform2:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    adrp x8, .LCPI2_4
+; GISEL-NEXT:    ldr q1, [x8, :lo12:.LCPI2_4]
+; GISEL-NEXT:    adrp x8, .LCPI2_3
+; GISEL-NEXT:    ldr q2, [x8, :lo12:.LCPI2_3]
+; GISEL-NEXT:    adrp x8, .LCPI2_1
+; GISEL-NEXT:    ldr q3, [x8, :lo12:.LCPI2_1]
+; GISEL-NEXT:    adrp x8, .LCPI2_0
+; GISEL-NEXT:    ldr q4, [x8, :lo12:.LCPI2_0]
+; GISEL-NEXT:    adrp x8, .LCPI2_2
+; GISEL-NEXT:    ldr q5, [x8, :lo12:.LCPI2_2]
+; GISEL-NEXT:    neg v2.8h, v2.8h
+; GISEL-NEXT:    ushl v2.8h, v0.8h, v2.8h
+; GISEL-NEXT:    cmeq v1.8h, v1.8h, v4.8h
+; GISEL-NEXT:    umull2 v4.4s, v2.8h, v5.8h
+; GISEL-NEXT:    umull v2.4s, v2.4h, v5.4h
+; GISEL-NEXT:    neg v3.8h, v3.8h
+; GISEL-NEXT:    shl v1.8h, v1.8h, #15
+; GISEL-NEXT:    uzp2 v2.8h, v2.8h, v4.8h
+; GISEL-NEXT:    ushl v2.8h, v2.8h, v3.8h
+; GISEL-NEXT:    sshr v1.8h, v1.8h, #15
+; GISEL-NEXT:    bif v0.16b, v2.16b, v1.16b
+; GISEL-NEXT:    ret
+  %1 = udiv <8 x i16> %x, <i16 -34, i16 35, i16 36, i16 -37, i16 38, i16 -39, i16 40, i16 -41>
+  ret <8 x i16> %1
+}
+
+define <8 x i16> @combine_vec_udiv_nonuniform3(<8 x i16> %x) {
+; SDAG-LABEL: combine_vec_udiv_nonuniform3:
+; SDAG:       // %bb.0:
+; SDAG-NEXT:    adrp x8, .LCPI3_0
+; SDAG-NEXT:    ldr q1, [x8, :lo12:.LCPI3_0]
+; SDAG-NEXT:    adrp x8, .LCPI3_1
+; SDAG-NEXT:    ldr q3, [x8, :lo12:.LCPI3_1]
+; SDAG-NEXT:    umull2 v2.4s, v0.8h, v1.8h
+; SDAG-NEXT:    umull v1.4s, v0.4h, v1.4h
+; SDAG-NEXT:    uzp2 v1.8h, v1.8h, v2.8h
+; SDAG-NEXT:    sub v0.8h, v0.8h, v1.8h
+; SDAG-NEXT:    usra v1.8h, v0.8h, #1
+; SDAG-NEXT:    ushl v0.8h, v1.8h, v3.8h
+; SDAG-NEXT:    ret
+;
+; GISEL-LABEL: combine_vec_udiv_nonuniform3:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    adrp x8, .LCPI3_4
+; GISEL-NEXT:    ldr q1, [x8, :lo12:.LCPI3_4]
+; GISEL-NEXT:    adrp x8, .LCPI3_3
+; GISEL-NEXT:    ldr q2, [x8, :lo12:.LCPI3_3]
+; GISEL-NEXT:    adrp x8, .LCPI3_2
+; GISEL-NEXT:    ldr q3, [x8, :lo12:.LCPI3_2]
+; GISEL-NEXT:    adrp x8, .LCPI3_1
+; GISEL-NEXT:    ldr q4, [x8, :lo12:.LCPI3_1]
+; GISEL-NEXT:    adrp x8, .LCPI3_0
+; GISEL-NEXT:    ldr q5, [x8, :lo12:.LCPI3_0]
+; GISEL-NEXT:    umull2 v6.4s, v0.8h, v2.8h
+; GISEL-NEXT:    umull v2.4s, v0.4h, v2.4h
+; GISEL-NEXT:    uzp2 v2.8h, v2.8h, v6.8h
+; GISEL-NEXT:    cmeq v1.8h, v1.8h, v5.8h
+; GISEL-NEXT:    sub v5.8h, v0.8h, v2.8h
+; GISEL-NEXT:    umull2 v6.4s, v5.8h, v3.8h
+; GISEL-NEXT:    umull v3.4s, v5.4h, v3.4h
+; GISEL-NEXT:    uzp2 v3.8h, v3.8h, v6.8h
+; GISEL-NEXT:    neg v4.8h, v4.8h
+; GISEL-NEXT:    shl v1.8h, v1.8h, #15
+; GISEL-NEXT:    add v2.8h, v3.8h, v2.8h
+; GISEL-NEXT:    ushl v2.8h, v2.8h, v4.8h
+; GISEL-NEXT:    sshr v1.8h, v1.8h, #15
+; GISEL-NEXT:    bif v0.16b, v2.16b, v1.16b
+; GISEL-NEXT:    ret
+  %1 = udiv <8 x i16> %x, <i16 7, i16 23, i16 25, i16 27, i16 31, i16 47, i16 63, i16 127>
+  ret <8 x i16> %1
+}
+
+define <16 x i8> @combine_vec_udiv_nonuniform4(<16 x i8> %x) {
+; SDAG-LABEL: combine_vec_udiv_nonuniform4:
+; SDAG:       // %bb.0:
+; SDAG-NEXT:    adrp x8, .LCPI4_0
+; SDAG-NEXT:    ldr q1, [x8, :lo12:.LCPI4_0]
+; SDAG-NEXT:    adrp x8, .LCPI4_1
+; SDAG-NEXT:    ldr q2, [x8, :lo12:.LCPI4_1]
+; SDAG-NEXT:    adrp x8, .LCPI4_2
+; SDAG-NEXT:    ldr q3, [x8, :lo12:.LCPI4_2]
+; SDAG-NEXT:    adrp x8, .LCPI4_3
+; SDAG-NEXT:    ldr q4, [x8, :lo12:.LCPI4_3]
+; SDAG-NEXT:    umull2 v5.8h, v0.16b, v1.16b
+; SDAG-NEXT:    umull v1.8h, v0.8b, v1.8b
+; SDAG-NEXT:    uzp2 v1.16b, v1.16b, v5.16b
+; SDAG-NEXT:    ushl v1.16b, v1.16b, v2.16b
+; SDAG-NEXT:    and v1.16b, v1.16b, v3.16b
+; SDAG-NEXT:    and v0.16b, v0.16b, v4.16b
+; SDAG-NEXT:    orr v0.16b, v0.16b, v1.16b
+; SDAG-NEXT:    ret
+;
+; GISEL-LABEL: combine_vec_udiv_nonuniform4:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    adrp x8, .LCPI4_3
+; GISEL-NEXT:    ldr q1, [x8, :lo12:.LCPI4_3]
+; GISEL-NEXT:    adrp x8, .LCPI4_0
+; GISEL-NEXT:    ldr q2, [x8, :lo12:.LCPI4_0]
+; GISEL-NEXT:    adrp x8, .LCPI4_2
+; GISEL-NEXT:    ldr q3, [x8, :lo12:.LCPI4_2]
+; GISEL-NEXT:    adrp x8, .LCPI4_1
+; GISEL-NEXT:    ldr q4, [x8, :lo12:.LCPI4_1]
+; GISEL-NEXT:    cmeq v1.16b, v1.16b, v2.16b
+; GISEL-NEXT:    umull2 v2.8h, v0.16b, v3.16b
+; GISEL-NEXT:    umull v3.8h, v0.8b, v3.8b
+; GISEL-NEXT:    neg v4.16b, v4.16b
+; GISEL-NEXT:    uzp2 v2.16b, v3.16b, v2.16b
+; GISEL-NEXT:    shl v1.16b, v1.16b, #7
+; GISEL-NEXT:    ushl v2.16b, v2.16b, v4.16b
+; GISEL-NEXT:    sshr v1.16b, v1.16b, #7
+; GISEL-NEXT:    bif v0.16b, v2.16b, v1.16b
+; GISEL-NEXT:    ret
+  %div = udiv <16 x i8> %x, <i8 -64, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+  ret <16 x i8> %div
+}
+
+define <8 x i16> @pr38477(<8 x i16> %a0) {
+; SDAG-LABEL: pr38477:
+; SDAG:       // %bb.0:
+; SDAG-NEXT:    adrp x8, .LCPI5_0
+; SDAG-NEXT:    ldr q1, [x8, :lo12:.LCPI5_0]
+; SDAG-NEXT:    adrp x8, .LCPI5_1
+; SDAG-NEXT:    ldr q2, [x8, :lo12:.LCPI5_1]
+; SDAG-NEXT:    adrp x8, .LCPI5_2
+; SDAG-NEXT:    umull2 v4.4s, v0.8h, v1.8h
+; SDAG-NEXT:    umull v1.4s, v0.4h, v1.4h
+; SDAG-NEXT:    uzp2 v1.8h, v1.8h, v4.8h
+; SDAG-NEXT:    ldr q3, [x8, :lo12:.LCPI5_2]
+; SDAG-NEXT:    adrp x8, .LCPI5_3
+; SDAG-NEXT:    sub v4.8h, v0.8h, v1.8h
+; SDAG-NEXT:    umull2 v5.4s, v4.8h, v2.8h
+; SDAG-NEXT:    umull v2.4s, v4.4h, v2.4h
+; SDAG-NEXT:    ldr q4, [x8, :lo12:.LCPI5_3]
+; SDAG-NEXT:    adrp x8, .LCPI5_4
+; SDAG-NEXT:    uzp2 v2.8h, v2.8h, v5.8h
+; SDAG-NEXT:    ldr q5, [x8, :lo12:.LCPI5_4]
+; SDAG-NEXT:    add v1.8h, v2.8h, v1.8h
+; SDAG-NEXT:    ushl v1.8h, v1.8h, v3.8h
+; SDAG-NEXT:    and v1.16b, v1.16b, v4.16b
+; SDAG-NEXT:    and v0.16b, v0.16b, v5.16b
+; SDAG-NEXT:    orr v0.16b, v0.16b, v1.16b
+; SDAG-NEXT:    ret
+;
+; GISEL-LABEL: pr38477:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    adrp x8, .LCPI5_4
+; GISEL-NEXT:    ldr q1, [x8, :lo12:.LCPI5_4]
+; GISEL-NEXT:    adrp x8, .LCPI5_3
+; GISEL-NEXT:    ldr q2, [x8, :lo12:.LCPI5_3]
+; GISEL-NEXT:    adrp x8, .LCPI5_2
+; GISEL-NEXT:    ldr q3, [x8, :lo12:.LCPI5_2]
+; GISEL-NEXT:    adrp x8, .LCPI5_1
+; GISEL-NEXT:    ldr q4, [x8, :lo12:.LCPI5_1]
+; GISEL-NEXT:    adrp x8, .LCPI5_0
+; GISEL-NEXT:    ldr q5, [x8, :lo12:.LCPI5_0]
+; GISEL-NEXT:    umull2 v6.4s, v0.8h, v2.8h
+; GISEL-NEXT:    umull v2.4s, v0.4h, v2.4h
+; GISEL-NEXT:    uzp2 v2.8h, v2.8h, v6.8h
+; GISEL-NEXT:    cmeq v1.8h, v1.8h, v5.8h
+; GISEL-NEXT:    sub v5.8h, v0.8h, v2.8h
+; GISEL-NEXT:    umull2 v6.4s, v5.8h, v3.8h
+; GISEL-NEXT:    umull v3.4s, v5.4h, v3.4h
+; GISEL-NEXT:    uzp2 v3.8h, v3.8h, v6.8h
+; GISEL-NEXT:    neg v4.8h, v4.8h
+; GISEL-NEXT:    shl v1.8h, v1.8h, #15
+; GISEL-NEXT:    add v2.8h, v3.8h, v2.8h
+; GISEL-NEXT:    ushl v2.8h, v2.8h, v4.8h
+; GISEL-NEXT:    sshr v1.8h, v1.8h, #15
+; GISEL-NEXT:    bif v0.16b, v2.16b, v1.16b
+; GISEL-NEXT:    ret
+  %1 = udiv <8 x i16> %a0, <i16 1, i16 119, i16 73, i16 -111, i16 -3, i16 118, i16 32, i16 31>
+  ret <8 x i16> %1
+}

diff  --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-udiv.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-udiv.mir
new file mode 100644
index 0000000000000..6ebaff1dfaaed
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-udiv.mir
@@ -0,0 +1,353 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -o - -mtriple=aarch64-unknown-unknown -run-pass=aarch64-prelegalizer-combiner -verify-machineinstrs %s | FileCheck %s
+---
+name: udiv_by_scalar_const
+body:             |
+  bb.1:
+  liveins: $w0
+    ; CHECK-LABEL: name: udiv_by_scalar_const
+    ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 818089009
+    ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32)
+    ; CHECK-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[LSHR]], [[C1]]
+    ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UMULH]], [[C2]](s32)
+    ; CHECK-NEXT: $w0 = COPY [[LSHR1]](s32)
+    %0:_(s32) = COPY $w0
+    %cst:_(s32) = G_CONSTANT i32 42
+    %2:_(s32) = G_UDIV %0(s32), %cst(s32)
+    $w0 = COPY %2(s32)
+...
+---
+name:            combine_vec_udiv_uniform
+alignment:       4
+tracksRegLiveness: true
+liveins:
+  - { reg: '$q0' }
+body:             |
+  bb.1:
+    liveins: $q0
+
+    ; CHECK-LABEL: name: combine_vec_udiv_uniform
+    ; CHECK: liveins: $q0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<8 x s16>) = COPY $q0
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 25645
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768
+    ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 4
+    ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[C]](s16), [[C]](s16), [[C]](s16), [[C]](s16), [[C]](s16), [[C]](s16), [[C]](s16), [[C]](s16)
+    ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[C1]](s16), [[C1]](s16), [[C1]](s16), [[C1]](s16), [[C1]](s16), [[C1]](s16), [[C1]](s16), [[C1]](s16)
+    ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[C2]](s16), [[C2]](s16), [[C2]](s16), [[C2]](s16), [[C2]](s16), [[C2]](s16), [[C2]](s16), [[C2]](s16)
+    ; CHECK-NEXT: [[UMULH:%[0-9]+]]:_(<8 x s16>) = G_UMULH [[COPY]], [[BUILD_VECTOR]]
+    ; CHECK-NEXT: [[SUB:%[0-9]+]]:_(<8 x s16>) = G_SUB [[COPY]], [[UMULH]]
+    ; CHECK-NEXT: [[UMULH1:%[0-9]+]]:_(<8 x s16>) = G_UMULH [[SUB]], [[BUILD_VECTOR1]]
+    ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(<8 x s16>) = G_ADD [[UMULH1]], [[UMULH]]
+    ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(<8 x s16>) = G_LSHR [[ADD]], [[BUILD_VECTOR2]](<8 x s16>)
+    ; CHECK-NEXT: $q0 = COPY [[LSHR]](<8 x s16>)
+    ; CHECK-NEXT: RET_ReallyLR implicit $q0
+    %0:_(<8 x s16>) = COPY $q0
+    %2:_(s16) = G_CONSTANT i16 23
+    %1:_(<8 x s16>) = G_BUILD_VECTOR %2(s16), %2(s16), %2(s16), %2(s16), %2(s16), %2(s16), %2(s16), %2(s16)
+    %3:_(<8 x s16>) = G_UDIV %0, %1
+    $q0 = COPY %3(<8 x s16>)
+    RET_ReallyLR implicit $q0
+
+...
+---
+name:            combine_vec_udiv_nonuniform
+alignment:       4
+tracksRegLiveness: true
+liveins:
+  - { reg: '$q0' }
+body:             |
+  bb.1:
+    liveins: $q0
+
+    ; CHECK-LABEL: name: combine_vec_udiv_nonuniform
+    ; CHECK: liveins: $q0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<8 x s16>) = COPY $q0
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 23
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 34
+    ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 -23
+    ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 56
+    ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 128
+    ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1
+    ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s16) = G_CONSTANT i16 -256
+    ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768
+    ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[C]](s16), [[C1]](s16), [[C2]](s16), [[C3]](s16), [[C4]](s16), [[C5]](s16), [[C6]](s16), [[C7]](s16)
+    ; CHECK-NEXT: [[C8:%[0-9]+]]:_(s16) = G_CONSTANT i16 0
+    ; CHECK-NEXT: [[C9:%[0-9]+]]:_(s16) = G_CONSTANT i16 25645
+    ; CHECK-NEXT: [[C10:%[0-9]+]]:_(s16) = G_CONSTANT i16 4
+    ; CHECK-NEXT: [[C11:%[0-9]+]]:_(s16) = G_CONSTANT i16 -3855
+    ; CHECK-NEXT: [[C12:%[0-9]+]]:_(s16) = G_CONSTANT i16 5
+    ; CHECK-NEXT: [[C13:%[0-9]+]]:_(s16) = G_CONSTANT i16 8195
+    ; CHECK-NEXT: [[C14:%[0-9]+]]:_(s16) = G_CONSTANT i16 13
+    ; CHECK-NEXT: [[C15:%[0-9]+]]:_(s16) = G_CONSTANT i16 3
+    ; CHECK-NEXT: [[C16:%[0-9]+]]:_(s16) = G_CONSTANT i16 9363
+    ; CHECK-NEXT: [[C17:%[0-9]+]]:_(s16) = G_CONSTANT i16 512
+    ; CHECK-NEXT: [[C18:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32767
+    ; CHECK-NEXT: [[C19:%[0-9]+]]:_(s16) = G_CONSTANT i16 15
+    ; CHECK-NEXT: [[C20:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32639
+    ; CHECK-NEXT: [[C21:%[0-9]+]]:_(s16) = G_CONSTANT i16 2
+    ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[C8]](s16), [[C8]](s16), [[C8]](s16), [[C15]](s16), [[C8]](s16), [[C8]](s16), [[C8]](s16), [[C8]](s16)
+    ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[C9]](s16), [[C11]](s16), [[C13]](s16), [[C16]](s16), [[C17]](s16), [[C18]](s16), [[C20]](s16), [[C21]](s16)
+    ; CHECK-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[C7]](s16), [[C8]](s16), [[C8]](s16), [[C8]](s16), [[C8]](s16), [[C8]](s16), [[C8]](s16), [[C8]](s16)
+    ; CHECK-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[C10]](s16), [[C12]](s16), [[C14]](s16), [[C8]](s16), [[C8]](s16), [[C19]](s16), [[C19]](s16), [[C8]](s16)
+    ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(<8 x s16>) = G_LSHR [[COPY]], [[BUILD_VECTOR1]](<8 x s16>)
+    ; CHECK-NEXT: [[UMULH:%[0-9]+]]:_(<8 x s16>) = G_UMULH [[LSHR]], [[BUILD_VECTOR2]]
+    ; CHECK-NEXT: [[SUB:%[0-9]+]]:_(<8 x s16>) = G_SUB [[COPY]], [[UMULH]]
+    ; CHECK-NEXT: [[UMULH1:%[0-9]+]]:_(<8 x s16>) = G_UMULH [[SUB]], [[BUILD_VECTOR3]]
+    ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(<8 x s16>) = G_ADD [[UMULH1]], [[UMULH]]
+    ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(<8 x s16>) = G_LSHR [[ADD]], [[BUILD_VECTOR4]](<8 x s16>)
+    ; CHECK-NEXT: [[C22:%[0-9]+]]:_(s16) = G_CONSTANT i16 1
+    ; CHECK-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[C22]](s16), [[C22]](s16), [[C22]](s16), [[C22]](s16), [[C22]](s16), [[C22]](s16), [[C22]](s16), [[C22]](s16)
+    ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(<8 x s1>) = G_ICMP intpred(eq), [[BUILD_VECTOR]](<8 x s16>), [[BUILD_VECTOR5]]
+    ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(<8 x s16>) = G_SELECT [[ICMP]](<8 x s1>), [[COPY]], [[LSHR1]]
+    ; CHECK-NEXT: $q0 = COPY [[SELECT]](<8 x s16>)
+    ; CHECK-NEXT: RET_ReallyLR implicit $q0
+    %0:_(<8 x s16>) = COPY $q0
+    %2:_(s16) = G_CONSTANT i16 23
+    %3:_(s16) = G_CONSTANT i16 34
+    %4:_(s16) = G_CONSTANT i16 -23
+    %5:_(s16) = G_CONSTANT i16 56
+    %6:_(s16) = G_CONSTANT i16 128
+    %7:_(s16) = G_CONSTANT i16 -1
+    %8:_(s16) = G_CONSTANT i16 -256
+    %9:_(s16) = G_CONSTANT i16 -32768
+    %1:_(<8 x s16>) = G_BUILD_VECTOR %2(s16), %3(s16), %4(s16), %5(s16), %6(s16), %7(s16), %8(s16), %9(s16)
+    %10:_(<8 x s16>) = G_UDIV %0, %1
+    $q0 = COPY %10(<8 x s16>)
+    RET_ReallyLR implicit $q0
+
+...
+---
+name:            combine_vec_udiv_nonuniform2
+alignment:       4
+tracksRegLiveness: true
+liveins:
+  - { reg: '$q0' }
+body:             |
+  bb.1:
+    liveins: $q0
+
+    ; CHECK-LABEL: name: combine_vec_udiv_nonuniform2
+    ; CHECK: liveins: $q0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<8 x s16>) = COPY $q0
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 -34
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 35
+    ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 36
+    ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 -37
+    ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 38
+    ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s16) = G_CONSTANT i16 -39
+    ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s16) = G_CONSTANT i16 40
+    ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 -41
+    ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[C]](s16), [[C1]](s16), [[C2]](s16), [[C3]](s16), [[C4]](s16), [[C5]](s16), [[C6]](s16), [[C7]](s16)
+    ; CHECK-NEXT: [[C8:%[0-9]+]]:_(s16) = G_CONSTANT i16 1
+    ; CHECK-NEXT: [[C9:%[0-9]+]]:_(s16) = G_CONSTANT i16 16393
+    ; CHECK-NEXT: [[C10:%[0-9]+]]:_(s16) = G_CONSTANT i16 0
+    ; CHECK-NEXT: [[C11:%[0-9]+]]:_(s16) = G_CONSTANT i16 13
+    ; CHECK-NEXT: [[C12:%[0-9]+]]:_(s16) = G_CONSTANT i16 -5617
+    ; CHECK-NEXT: [[C13:%[0-9]+]]:_(s16) = G_CONSTANT i16 5
+    ; CHECK-NEXT: [[C14:%[0-9]+]]:_(s16) = G_CONSTANT i16 -7281
+    ; CHECK-NEXT: [[C15:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32749
+    ; CHECK-NEXT: [[C16:%[0-9]+]]:_(s16) = G_CONSTANT i16 15
+    ; CHECK-NEXT: [[C17:%[0-9]+]]:_(s16) = G_CONSTANT i16 -10347
+    ; CHECK-NEXT: [[C18:%[0-9]+]]:_(s16) = G_CONSTANT i16 8197
+    ; CHECK-NEXT: [[C19:%[0-9]+]]:_(s16) = G_CONSTANT i16 -13107
+    ; CHECK-NEXT: [[C20:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32747
+    ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[C8]](s16), [[C10]](s16), [[C10]](s16), [[C10]](s16), [[C10]](s16), [[C10]](s16), [[C10]](s16), [[C10]](s16)
+    ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[C9]](s16), [[C12]](s16), [[C14]](s16), [[C15]](s16), [[C17]](s16), [[C18]](s16), [[C19]](s16), [[C20]](s16)
+    ; CHECK-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[C11]](s16), [[C13]](s16), [[C13]](s16), [[C16]](s16), [[C13]](s16), [[C11]](s16), [[C13]](s16), [[C16]](s16)
+    ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(<8 x s16>) = G_LSHR [[COPY]], [[BUILD_VECTOR1]](<8 x s16>)
+    ; CHECK-NEXT: [[UMULH:%[0-9]+]]:_(<8 x s16>) = G_UMULH [[LSHR]], [[BUILD_VECTOR2]]
+    ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(<8 x s16>) = G_LSHR [[UMULH]], [[BUILD_VECTOR3]](<8 x s16>)
+    ; CHECK-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[C8]](s16), [[C8]](s16), [[C8]](s16), [[C8]](s16), [[C8]](s16), [[C8]](s16), [[C8]](s16), [[C8]](s16)
+    ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(<8 x s1>) = G_ICMP intpred(eq), [[BUILD_VECTOR]](<8 x s16>), [[BUILD_VECTOR4]]
+    ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(<8 x s16>) = G_SELECT [[ICMP]](<8 x s1>), [[COPY]], [[LSHR1]]
+    ; CHECK-NEXT: $q0 = COPY [[SELECT]](<8 x s16>)
+    ; CHECK-NEXT: RET_ReallyLR implicit $q0
+    %0:_(<8 x s16>) = COPY $q0
+    %2:_(s16) = G_CONSTANT i16 -34
+    %3:_(s16) = G_CONSTANT i16 35
+    %4:_(s16) = G_CONSTANT i16 36
+    %5:_(s16) = G_CONSTANT i16 -37
+    %6:_(s16) = G_CONSTANT i16 38
+    %7:_(s16) = G_CONSTANT i16 -39
+    %8:_(s16) = G_CONSTANT i16 40
+    %9:_(s16) = G_CONSTANT i16 -41
+    %1:_(<8 x s16>) = G_BUILD_VECTOR %2(s16), %3(s16), %4(s16), %5(s16), %6(s16), %7(s16), %8(s16), %9(s16)
+    %10:_(<8 x s16>) = G_UDIV %0, %1
+    $q0 = COPY %10(<8 x s16>)
+    RET_ReallyLR implicit $q0
+
+...
+---
+name:            combine_vec_udiv_nonuniform3
+alignment:       4
+tracksRegLiveness: true
+liveins:
+  - { reg: '$q0' }
+body:             |
+  bb.1:
+    liveins: $q0
+
+    ; CHECK-LABEL: name: combine_vec_udiv_nonuniform3
+    ; CHECK: liveins: $q0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<8 x s16>) = COPY $q0
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 7
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 23
+    ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 25
+    ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 27
+    ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 31
+    ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s16) = G_CONSTANT i16 47
+    ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s16) = G_CONSTANT i16 63
+    ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 127
+    ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[C]](s16), [[C1]](s16), [[C2]](s16), [[C3]](s16), [[C4]](s16), [[C5]](s16), [[C6]](s16), [[C7]](s16)
+    ; CHECK-NEXT: [[C8:%[0-9]+]]:_(s16) = G_CONSTANT i16 9363
+    ; CHECK-NEXT: [[C9:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768
+    ; CHECK-NEXT: [[C10:%[0-9]+]]:_(s16) = G_CONSTANT i16 2
+    ; CHECK-NEXT: [[C11:%[0-9]+]]:_(s16) = G_CONSTANT i16 25645
+    ; CHECK-NEXT: [[C12:%[0-9]+]]:_(s16) = G_CONSTANT i16 4
+    ; CHECK-NEXT: [[C13:%[0-9]+]]:_(s16) = G_CONSTANT i16 18351
+    ; CHECK-NEXT: [[C14:%[0-9]+]]:_(s16) = G_CONSTANT i16 12137
+    ; CHECK-NEXT: [[C15:%[0-9]+]]:_(s16) = G_CONSTANT i16 2115
+    ; CHECK-NEXT: [[C16:%[0-9]+]]:_(s16) = G_CONSTANT i16 23705
+    ; CHECK-NEXT: [[C17:%[0-9]+]]:_(s16) = G_CONSTANT i16 5
+    ; CHECK-NEXT: [[C18:%[0-9]+]]:_(s16) = G_CONSTANT i16 1041
+    ; CHECK-NEXT: [[C19:%[0-9]+]]:_(s16) = G_CONSTANT i16 517
+    ; CHECK-NEXT: [[C20:%[0-9]+]]:_(s16) = G_CONSTANT i16 6
+    ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[C8]](s16), [[C11]](s16), [[C13]](s16), [[C14]](s16), [[C15]](s16), [[C16]](s16), [[C18]](s16), [[C19]](s16)
+    ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[C9]](s16), [[C9]](s16), [[C9]](s16), [[C9]](s16), [[C9]](s16), [[C9]](s16), [[C9]](s16), [[C9]](s16)
+    ; CHECK-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[C10]](s16), [[C12]](s16), [[C12]](s16), [[C12]](s16), [[C12]](s16), [[C17]](s16), [[C17]](s16), [[C20]](s16)
+    ; CHECK-NEXT: [[UMULH:%[0-9]+]]:_(<8 x s16>) = G_UMULH [[COPY]], [[BUILD_VECTOR1]]
+    ; CHECK-NEXT: [[SUB:%[0-9]+]]:_(<8 x s16>) = G_SUB [[COPY]], [[UMULH]]
+    ; CHECK-NEXT: [[UMULH1:%[0-9]+]]:_(<8 x s16>) = G_UMULH [[SUB]], [[BUILD_VECTOR2]]
+    ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(<8 x s16>) = G_ADD [[UMULH1]], [[UMULH]]
+    ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(<8 x s16>) = G_LSHR [[ADD]], [[BUILD_VECTOR3]](<8 x s16>)
+    ; CHECK-NEXT: [[C21:%[0-9]+]]:_(s16) = G_CONSTANT i16 1
+    ; CHECK-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[C21]](s16), [[C21]](s16), [[C21]](s16), [[C21]](s16), [[C21]](s16), [[C21]](s16), [[C21]](s16), [[C21]](s16)
+    ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(<8 x s1>) = G_ICMP intpred(eq), [[BUILD_VECTOR]](<8 x s16>), [[BUILD_VECTOR4]]
+    ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(<8 x s16>) = G_SELECT [[ICMP]](<8 x s1>), [[COPY]], [[LSHR]]
+    ; CHECK-NEXT: $q0 = COPY [[SELECT]](<8 x s16>)
+    ; CHECK-NEXT: RET_ReallyLR implicit $q0
+    %0:_(<8 x s16>) = COPY $q0
+    %2:_(s16) = G_CONSTANT i16 7
+    %3:_(s16) = G_CONSTANT i16 23
+    %4:_(s16) = G_CONSTANT i16 25
+    %5:_(s16) = G_CONSTANT i16 27
+    %6:_(s16) = G_CONSTANT i16 31
+    %7:_(s16) = G_CONSTANT i16 47
+    %8:_(s16) = G_CONSTANT i16 63
+    %9:_(s16) = G_CONSTANT i16 127
+    %1:_(<8 x s16>) = G_BUILD_VECTOR %2(s16), %3(s16), %4(s16), %5(s16), %6(s16), %7(s16), %8(s16), %9(s16)
+    %10:_(<8 x s16>) = G_UDIV %0, %1
+    $q0 = COPY %10(<8 x s16>)
+    RET_ReallyLR implicit $q0
+
+...
+---
+name:            combine_vec_udiv_nonuniform4
+alignment:       4
+tracksRegLiveness: true
+liveins:
+  - { reg: '$q0' }
+body:             |
+  bb.1:
+    liveins: $q0
+
+    ; CHECK-LABEL: name: combine_vec_udiv_nonuniform4
+    ; CHECK: liveins: $q0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<16 x s8>) = COPY $q0
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 -64
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s8) = G_CONSTANT i8 1
+    ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s8>) = G_BUILD_VECTOR [[C]](s8), [[C1]](s8), [[C1]](s8), [[C1]](s8), [[C1]](s8), [[C1]](s8), [[C1]](s8), [[C1]](s8), [[C1]](s8), [[C1]](s8), [[C1]](s8), [[C1]](s8), [[C1]](s8), [[C1]](s8), [[C1]](s8), [[C1]](s8)
+    ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s8) = G_CONSTANT i8 0
+    ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s8) = G_CONSTANT i8 -85
+    ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s8) = G_CONSTANT i8 7
+    ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<16 x s8>) = G_BUILD_VECTOR [[C3]](s8), [[C2]](s8), [[C2]](s8), [[C2]](s8), [[C2]](s8), [[C2]](s8), [[C2]](s8), [[C2]](s8), [[C2]](s8), [[C2]](s8), [[C2]](s8), [[C2]](s8), [[C2]](s8), [[C2]](s8), [[C2]](s8), [[C2]](s8)
+    ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<16 x s8>) = G_BUILD_VECTOR [[C4]](s8), [[C2]](s8), [[C2]](s8), [[C2]](s8), [[C2]](s8), [[C2]](s8), [[C2]](s8), [[C2]](s8), [[C2]](s8), [[C2]](s8), [[C2]](s8), [[C2]](s8), [[C2]](s8), [[C2]](s8), [[C2]](s8), [[C2]](s8)
+    ; CHECK-NEXT: [[UMULH:%[0-9]+]]:_(<16 x s8>) = G_UMULH [[COPY]], [[BUILD_VECTOR1]]
+    ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(<16 x s8>) = G_LSHR [[UMULH]], [[BUILD_VECTOR2]](<16 x s8>)
+    ; CHECK-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<16 x s8>) = G_BUILD_VECTOR [[C1]](s8), [[C1]](s8), [[C1]](s8), [[C1]](s8), [[C1]](s8), [[C1]](s8), [[C1]](s8), [[C1]](s8), [[C1]](s8), [[C1]](s8), [[C1]](s8), [[C1]](s8), [[C1]](s8), [[C1]](s8), [[C1]](s8), [[C1]](s8)
+    ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(<16 x s1>) = G_ICMP intpred(eq), [[BUILD_VECTOR]](<16 x s8>), [[BUILD_VECTOR3]]
+    ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(<16 x s8>) = G_SELECT [[ICMP]](<16 x s1>), [[COPY]], [[LSHR]]
+    ; CHECK-NEXT: $q0 = COPY [[SELECT]](<16 x s8>)
+    ; CHECK-NEXT: RET_ReallyLR implicit $q0
+    %0:_(<16 x s8>) = COPY $q0
+    %2:_(s8) = G_CONSTANT i8 -64
+    %3:_(s8) = G_CONSTANT i8 1
+    %1:_(<16 x s8>) = G_BUILD_VECTOR %2(s8), %3(s8), %3(s8), %3(s8), %3(s8), %3(s8), %3(s8), %3(s8), %3(s8), %3(s8), %3(s8), %3(s8), %3(s8), %3(s8), %3(s8), %3(s8)
+    %4:_(<16 x s8>) = G_UDIV %0, %1
+    $q0 = COPY %4(<16 x s8>)
+    RET_ReallyLR implicit $q0
+
+...
+---
+name:            pr38477
+alignment:       4
+tracksRegLiveness: true
+liveins:
+  - { reg: '$q0' }
+body:             |
+  bb.1:
+    liveins: $q0
+
+    ; CHECK-LABEL: name: pr38477
+    ; CHECK: liveins: $q0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<8 x s16>) = COPY $q0
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 1
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 119
+    ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 73
+    ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 -111
+    ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 -3
+    ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s16) = G_CONSTANT i16 118
+    ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s16) = G_CONSTANT i16 32
+    ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 31
+    ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[C]](s16), [[C1]](s16), [[C2]](s16), [[C3]](s16), [[C4]](s16), [[C5]](s16), [[C6]](s16), [[C7]](s16)
+    ; CHECK-NEXT: [[C8:%[0-9]+]]:_(s16) = G_CONSTANT i16 0
+    ; CHECK-NEXT: [[C9:%[0-9]+]]:_(s16) = G_CONSTANT i16 4957
+    ; CHECK-NEXT: [[C10:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768
+    ; CHECK-NEXT: [[C11:%[0-9]+]]:_(s16) = G_CONSTANT i16 6
+    ; CHECK-NEXT: [[C12:%[0-9]+]]:_(s16) = G_CONSTANT i16 -8079
+    ; CHECK-NEXT: [[C13:%[0-9]+]]:_(s16) = G_CONSTANT i16 4103
+    ; CHECK-NEXT: [[C14:%[0-9]+]]:_(s16) = G_CONSTANT i16 12
+    ; CHECK-NEXT: [[C15:%[0-9]+]]:_(s16) = G_CONSTANT i16 16385
+    ; CHECK-NEXT: [[C16:%[0-9]+]]:_(s16) = G_CONSTANT i16 14
+    ; CHECK-NEXT: [[C17:%[0-9]+]]:_(s16) = G_CONSTANT i16 -29991
+    ; CHECK-NEXT: [[C18:%[0-9]+]]:_(s16) = G_CONSTANT i16 2048
+    ; CHECK-NEXT: [[C19:%[0-9]+]]:_(s16) = G_CONSTANT i16 2115
+    ; CHECK-NEXT: [[C20:%[0-9]+]]:_(s16) = G_CONSTANT i16 4
+    ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[C8]](s16), [[C9]](s16), [[C12]](s16), [[C13]](s16), [[C15]](s16), [[C17]](s16), [[C18]](s16), [[C19]](s16)
+    ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[C8]](s16), [[C10]](s16), [[C8]](s16), [[C8]](s16), [[C8]](s16), [[C8]](s16), [[C8]](s16), [[C10]](s16)
+    ; CHECK-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[C8]](s16), [[C11]](s16), [[C11]](s16), [[C14]](s16), [[C16]](s16), [[C11]](s16), [[C8]](s16), [[C20]](s16)
+    ; CHECK-NEXT: [[UMULH:%[0-9]+]]:_(<8 x s16>) = G_UMULH [[COPY]], [[BUILD_VECTOR1]]
+    ; CHECK-NEXT: [[SUB:%[0-9]+]]:_(<8 x s16>) = G_SUB [[COPY]], [[UMULH]]
+    ; CHECK-NEXT: [[UMULH1:%[0-9]+]]:_(<8 x s16>) = G_UMULH [[SUB]], [[BUILD_VECTOR2]]
+    ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(<8 x s16>) = G_ADD [[UMULH1]], [[UMULH]]
+    ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(<8 x s16>) = G_LSHR [[ADD]], [[BUILD_VECTOR3]](<8 x s16>)
+    ; CHECK-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[C]](s16), [[C]](s16), [[C]](s16), [[C]](s16), [[C]](s16), [[C]](s16), [[C]](s16), [[C]](s16)
+    ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(<8 x s1>) = G_ICMP intpred(eq), [[BUILD_VECTOR]](<8 x s16>), [[BUILD_VECTOR4]]
+    ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(<8 x s16>) = G_SELECT [[ICMP]](<8 x s1>), [[COPY]], [[LSHR]]
+    ; CHECK-NEXT: $q0 = COPY [[SELECT]](<8 x s16>)
+    ; CHECK-NEXT: RET_ReallyLR implicit $q0
+    %0:_(<8 x s16>) = COPY $q0
+    %2:_(s16) = G_CONSTANT i16 1
+    %3:_(s16) = G_CONSTANT i16 119
+    %4:_(s16) = G_CONSTANT i16 73
+    %5:_(s16) = G_CONSTANT i16 -111
+    %6:_(s16) = G_CONSTANT i16 -3
+    %7:_(s16) = G_CONSTANT i16 118
+    %8:_(s16) = G_CONSTANT i16 32
+    %9:_(s16) = G_CONSTANT i16 31
+    %1:_(<8 x s16>) = G_BUILD_VECTOR %2(s16), %3(s16), %4(s16), %5(s16), %6(s16), %7(s16), %8(s16), %9(s16)
+    %10:_(<8 x s16>) = G_UDIV %0, %1
+    $q0 = COPY %10(<8 x s16>)
+    RET_ReallyLR implicit $q0
+
+...

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i32.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i32.ll
index bd1bc4d0a25c9..1a3f54695bc49 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i32.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i32.ll
@@ -222,117 +222,21 @@ define i32 @v_udiv_i32_pow2k_denom(i32 %num) {
 ; CHECK-LABEL: v_udiv_i32_pow2k_denom:
 ; CHECK:       ; %bb.0:
 ; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CHECK-NEXT:    s_movk_i32 s6, 0x1000
-; CHECK-NEXT:    v_rcp_iflag_f32_e32 v1, 0x45800000
-; CHECK-NEXT:    v_mov_b32_e32 v2, 0xfffff000
-; CHECK-NEXT:    v_mul_f32_e32 v1, 0x4f7ffffe, v1
-; CHECK-NEXT:    v_cvt_u32_f32_e32 v1, v1
-; CHECK-NEXT:    v_mul_lo_u32 v2, v2, v1
-; CHECK-NEXT:    v_mul_hi_u32 v2, v1, v2
-; CHECK-NEXT:    v_add_i32_e32 v1, vcc, v1, v2
-; CHECK-NEXT:    v_mul_hi_u32 v1, v0, v1
-; CHECK-NEXT:    v_lshlrev_b32_e32 v2, 12, v1
-; CHECK-NEXT:    v_add_i32_e32 v3, vcc, 1, v1
-; CHECK-NEXT:    v_sub_i32_e32 v0, vcc, v0, v2
-; CHECK-NEXT:    v_cmp_le_u32_e32 vcc, s6, v0
-; CHECK-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
-; CHECK-NEXT:    v_subrev_i32_e64 v2, s[4:5], s6, v0
-; CHECK-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
-; CHECK-NEXT:    v_add_i32_e32 v2, vcc, 1, v1
-; CHECK-NEXT:    v_cmp_le_u32_e32 vcc, s6, v0
-; CHECK-NEXT:    v_cndmask_b32_e32 v0, v1, v2, vcc
+; CHECK-NEXT:    v_mov_b32_e32 v1, 0x100000
+; CHECK-NEXT:    v_mul_hi_u32 v0, v0, v1
 ; CHECK-NEXT:    s_setpc_b64 s[30:31]
   %result = udiv i32 %num, 4096
   ret i32 %result
 }
 
 define <2 x i32> @v_udiv_v2i32_pow2k_denom(<2 x i32> %num) {
-; GISEL-LABEL: v_udiv_v2i32_pow2k_denom:
-; GISEL:       ; %bb.0:
-; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GISEL-NEXT:    s_movk_i32 s4, 0x1000
-; GISEL-NEXT:    v_mov_b32_e32 v2, 0x1000
-; GISEL-NEXT:    v_mov_b32_e32 v3, 0xfffff000
-; GISEL-NEXT:    v_cvt_f32_u32_e32 v4, s4
-; GISEL-NEXT:    v_cvt_f32_u32_e32 v5, v2
-; GISEL-NEXT:    v_rcp_iflag_f32_e32 v4, v4
-; GISEL-NEXT:    v_rcp_iflag_f32_e32 v5, v5
-; GISEL-NEXT:    v_mul_f32_e32 v4, 0x4f7ffffe, v4
-; GISEL-NEXT:    v_mul_f32_e32 v5, 0x4f7ffffe, v5
-; GISEL-NEXT:    v_cvt_u32_f32_e32 v4, v4
-; GISEL-NEXT:    v_cvt_u32_f32_e32 v5, v5
-; GISEL-NEXT:    v_mul_lo_u32 v6, v3, v4
-; GISEL-NEXT:    v_mul_lo_u32 v3, v3, v5
-; GISEL-NEXT:    v_mul_hi_u32 v6, v4, v6
-; GISEL-NEXT:    v_mul_hi_u32 v3, v5, v3
-; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v6
-; GISEL-NEXT:    v_add_i32_e32 v3, vcc, v5, v3
-; GISEL-NEXT:    v_mul_hi_u32 v4, v0, v4
-; GISEL-NEXT:    v_mul_hi_u32 v3, v1, v3
-; GISEL-NEXT:    v_lshlrev_b32_e32 v5, 12, v4
-; GISEL-NEXT:    v_add_i32_e32 v6, vcc, 1, v4
-; GISEL-NEXT:    v_lshlrev_b32_e32 v7, 12, v3
-; GISEL-NEXT:    v_add_i32_e32 v8, vcc, 1, v3
-; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v5
-; GISEL-NEXT:    v_sub_i32_e32 v1, vcc, v1, v7
-; GISEL-NEXT:    v_cmp_le_u32_e32 vcc, s4, v0
-; GISEL-NEXT:    v_cndmask_b32_e32 v4, v4, v6, vcc
-; GISEL-NEXT:    v_subrev_i32_e64 v5, s[4:5], s4, v0
-; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v1, v2
-; GISEL-NEXT:    v_cndmask_b32_e64 v3, v3, v8, s[4:5]
-; GISEL-NEXT:    v_sub_i32_e64 v6, s[6:7], v1, v2
-; GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v5, vcc
-; GISEL-NEXT:    v_add_i32_e32 v5, vcc, 1, v4
-; GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, v6, s[4:5]
-; GISEL-NEXT:    v_add_i32_e32 v6, vcc, 1, v3
-; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v2
-; GISEL-NEXT:    v_cndmask_b32_e32 v0, v4, v5, vcc
-; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v2
-; GISEL-NEXT:    v_cndmask_b32_e32 v1, v3, v6, vcc
-; GISEL-NEXT:    s_setpc_b64 s[30:31]
-;
-; CGP-LABEL: v_udiv_v2i32_pow2k_denom:
-; CGP:       ; %bb.0:
-; CGP-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CGP-NEXT:    s_movk_i32 s8, 0x1000
-; CGP-NEXT:    v_rcp_iflag_f32_e32 v2, 0x45800000
-; CGP-NEXT:    s_movk_i32 s4, 0xf000
-; CGP-NEXT:    v_mov_b32_e32 v3, 0xfffff000
-; CGP-NEXT:    v_mov_b32_e32 v4, 0x1000
-; CGP-NEXT:    v_rcp_iflag_f32_e32 v5, 0x45800000
-; CGP-NEXT:    v_mul_f32_e32 v2, 0x4f7ffffe, v2
-; CGP-NEXT:    v_mul_f32_e32 v5, 0x4f7ffffe, v5
-; CGP-NEXT:    v_cvt_u32_f32_e32 v2, v2
-; CGP-NEXT:    v_cvt_u32_f32_e32 v5, v5
-; CGP-NEXT:    v_mul_lo_u32 v6, s4, v2
-; CGP-NEXT:    v_mul_lo_u32 v3, v3, v5
-; CGP-NEXT:    v_mul_hi_u32 v6, v2, v6
-; CGP-NEXT:    v_mul_hi_u32 v3, v5, v3
-; CGP-NEXT:    v_add_i32_e32 v2, vcc, v2, v6
-; CGP-NEXT:    v_add_i32_e32 v3, vcc, v5, v3
-; CGP-NEXT:    v_mul_hi_u32 v2, v0, v2
-; CGP-NEXT:    v_mul_hi_u32 v3, v1, v3
-; CGP-NEXT:    v_lshlrev_b32_e32 v5, 12, v2
-; CGP-NEXT:    v_add_i32_e32 v6, vcc, 1, v2
-; CGP-NEXT:    v_lshlrev_b32_e32 v7, 12, v3
-; CGP-NEXT:    v_add_i32_e32 v8, vcc, 1, v3
-; CGP-NEXT:    v_sub_i32_e32 v0, vcc, v0, v5
-; CGP-NEXT:    v_sub_i32_e32 v1, vcc, v1, v7
-; CGP-NEXT:    v_cmp_le_u32_e32 vcc, s8, v0
-; CGP-NEXT:    v_cndmask_b32_e32 v2, v2, v6, vcc
-; CGP-NEXT:    v_subrev_i32_e64 v5, s[4:5], s8, v0
-; CGP-NEXT:    v_cmp_le_u32_e64 s[4:5], s8, v1
-; CGP-NEXT:    v_cndmask_b32_e64 v3, v3, v8, s[4:5]
-; CGP-NEXT:    v_sub_i32_e64 v6, s[6:7], v1, v4
-; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v5, vcc
-; CGP-NEXT:    v_add_i32_e32 v5, vcc, 1, v2
-; CGP-NEXT:    v_cndmask_b32_e64 v1, v1, v6, s[4:5]
-; CGP-NEXT:    v_add_i32_e32 v6, vcc, 1, v3
-; CGP-NEXT:    v_cmp_le_u32_e32 vcc, s8, v0
-; CGP-NEXT:    v_cndmask_b32_e32 v0, v2, v5, vcc
-; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v4
-; CGP-NEXT:    v_cndmask_b32_e32 v1, v3, v6, vcc
-; CGP-NEXT:    s_setpc_b64 s[30:31]
+; CHECK-LABEL: v_udiv_v2i32_pow2k_denom:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT:    s_mov_b32 s4, 0x100000
+; CHECK-NEXT:    v_mul_hi_u32 v0, v0, s4
+; CHECK-NEXT:    v_mul_hi_u32 v1, v1, s4
+; CHECK-NEXT:    s_setpc_b64 s[30:31]
   %result = udiv <2 x i32> %num, <i32 4096, i32 4096>
   ret <2 x i32> %result
 }
@@ -341,25 +245,12 @@ define i32 @v_udiv_i32_oddk_denom(i32 %num) {
 ; CHECK-LABEL: v_udiv_i32_oddk_denom:
 ; CHECK:       ; %bb.0:
 ; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CHECK-NEXT:    s_mov_b32 s6, 0x12d8fb
-; CHECK-NEXT:    v_rcp_iflag_f32_e32 v1, 0x4996c7d8
-; CHECK-NEXT:    v_mov_b32_e32 v2, 0xffed2705
-; CHECK-NEXT:    v_mul_f32_e32 v1, 0x4f7ffffe, v1
-; CHECK-NEXT:    v_cvt_u32_f32_e32 v1, v1
-; CHECK-NEXT:    v_mul_lo_u32 v2, v2, v1
-; CHECK-NEXT:    v_mul_hi_u32 v2, v1, v2
-; CHECK-NEXT:    v_add_i32_e32 v1, vcc, v1, v2
+; CHECK-NEXT:    v_mov_b32_e32 v1, 0xb2a50881
 ; CHECK-NEXT:    v_mul_hi_u32 v1, v0, v1
-; CHECK-NEXT:    v_mul_lo_u32 v2, v1, s6
-; CHECK-NEXT:    v_add_i32_e32 v3, vcc, 1, v1
-; CHECK-NEXT:    v_sub_i32_e32 v0, vcc, v0, v2
-; CHECK-NEXT:    v_cmp_le_u32_e32 vcc, s6, v0
-; CHECK-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
-; CHECK-NEXT:    v_subrev_i32_e64 v2, s[4:5], s6, v0
-; CHECK-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
-; CHECK-NEXT:    v_add_i32_e32 v2, vcc, 1, v1
-; CHECK-NEXT:    v_cmp_le_u32_e32 vcc, s6, v0
-; CHECK-NEXT:    v_cndmask_b32_e32 v0, v1, v2, vcc
+; CHECK-NEXT:    v_sub_i32_e32 v0, vcc, v0, v1
+; CHECK-NEXT:    v_lshrrev_b32_e32 v0, 1, v0
+; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v1
+; CHECK-NEXT:    v_lshrrev_b32_e32 v0, 20, v0
 ; CHECK-NEXT:    s_setpc_b64 s[30:31]
   %result = udiv i32 %num, 1235195
   ret i32 %result
@@ -369,87 +260,34 @@ define <2 x i32> @v_udiv_v2i32_oddk_denom(<2 x i32> %num) {
 ; GISEL-LABEL: v_udiv_v2i32_oddk_denom:
 ; GISEL:       ; %bb.0:
 ; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GISEL-NEXT:    s_mov_b32 s8, 0x12d8fb
-; GISEL-NEXT:    v_mov_b32_e32 v2, 0x12d8fb
-; GISEL-NEXT:    v_mov_b32_e32 v3, 0xffed2705
-; GISEL-NEXT:    v_cvt_f32_u32_e32 v4, s8
-; GISEL-NEXT:    v_cvt_f32_u32_e32 v5, v2
-; GISEL-NEXT:    v_rcp_iflag_f32_e32 v4, v4
-; GISEL-NEXT:    v_rcp_iflag_f32_e32 v5, v5
-; GISEL-NEXT:    v_mul_f32_e32 v4, 0x4f7ffffe, v4
-; GISEL-NEXT:    v_mul_f32_e32 v5, 0x4f7ffffe, v5
-; GISEL-NEXT:    v_cvt_u32_f32_e32 v4, v4
-; GISEL-NEXT:    v_cvt_u32_f32_e32 v5, v5
-; GISEL-NEXT:    v_mul_lo_u32 v6, v3, v4
-; GISEL-NEXT:    v_mul_lo_u32 v3, v3, v5
-; GISEL-NEXT:    v_mul_hi_u32 v6, v4, v6
-; GISEL-NEXT:    v_mul_hi_u32 v3, v5, v3
-; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v6
-; GISEL-NEXT:    v_add_i32_e32 v3, vcc, v5, v3
-; GISEL-NEXT:    v_mul_hi_u32 v4, v0, v4
-; GISEL-NEXT:    v_mul_hi_u32 v3, v1, v3
-; GISEL-NEXT:    v_mul_lo_u32 v5, v4, s8
-; GISEL-NEXT:    v_add_i32_e32 v6, vcc, 1, v4
-; GISEL-NEXT:    v_mul_lo_u32 v7, v3, v2
-; GISEL-NEXT:    v_add_i32_e32 v8, vcc, 1, v3
-; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v5
-; GISEL-NEXT:    v_sub_i32_e32 v1, vcc, v1, v7
-; GISEL-NEXT:    v_cmp_le_u32_e32 vcc, s8, v0
-; GISEL-NEXT:    v_cndmask_b32_e32 v4, v4, v6, vcc
-; GISEL-NEXT:    v_subrev_i32_e64 v5, s[4:5], s8, v0
-; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v1, v2
-; GISEL-NEXT:    v_cndmask_b32_e64 v3, v3, v8, s[4:5]
-; GISEL-NEXT:    v_sub_i32_e64 v6, s[6:7], v1, v2
-; GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v5, vcc
-; GISEL-NEXT:    v_add_i32_e32 v5, vcc, 1, v4
-; GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, v6, s[4:5]
-; GISEL-NEXT:    v_add_i32_e32 v6, vcc, 1, v3
-; GISEL-NEXT:    v_cmp_le_u32_e32 vcc, s8, v0
-; GISEL-NEXT:    v_cndmask_b32_e32 v0, v4, v5, vcc
-; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v2
-; GISEL-NEXT:    v_cndmask_b32_e32 v1, v3, v6, vcc
+; GISEL-NEXT:    s_mov_b32 s4, 0xb2a50881
+; GISEL-NEXT:    s_brev_b32 s5, 1
+; GISEL-NEXT:    v_mul_hi_u32 v2, v0, s4
+; GISEL-NEXT:    v_mul_hi_u32 v3, v1, s4
+; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v2
+; GISEL-NEXT:    v_sub_i32_e32 v1, vcc, v1, v3
+; GISEL-NEXT:    v_mul_hi_u32 v0, v0, s5
+; GISEL-NEXT:    v_mul_hi_u32 v1, v1, s5
+; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
+; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v1, v3
+; GISEL-NEXT:    v_lshrrev_b32_e32 v0, 20, v0
+; GISEL-NEXT:    v_lshrrev_b32_e32 v1, 20, v1
 ; GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; CGP-LABEL: v_udiv_v2i32_oddk_denom:
 ; CGP:       ; %bb.0:
 ; CGP-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CGP-NEXT:    s_mov_b32 s8, 0x12d8fb
-; CGP-NEXT:    v_rcp_iflag_f32_e32 v2, 0x4996c7d8
-; CGP-NEXT:    s_mov_b32 s4, 0xffed2705
-; CGP-NEXT:    v_mov_b32_e32 v3, 0x12d8fb
-; CGP-NEXT:    v_rcp_iflag_f32_e32 v4, 0x4996c7d8
-; CGP-NEXT:    v_mul_f32_e32 v2, 0x4f7ffffe, v2
-; CGP-NEXT:    v_mul_f32_e32 v4, 0x4f7ffffe, v4
-; CGP-NEXT:    v_cvt_u32_f32_e32 v2, v2
-; CGP-NEXT:    v_cvt_u32_f32_e32 v4, v4
-; CGP-NEXT:    v_mul_lo_u32 v5, s4, v2
-; CGP-NEXT:    v_mul_lo_u32 v6, s4, v4
-; CGP-NEXT:    v_mul_hi_u32 v5, v2, v5
-; CGP-NEXT:    v_mul_hi_u32 v6, v4, v6
-; CGP-NEXT:    v_add_i32_e32 v2, vcc, v2, v5
-; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v6
-; CGP-NEXT:    v_mul_hi_u32 v2, v0, v2
-; CGP-NEXT:    v_mul_hi_u32 v4, v1, v4
-; CGP-NEXT:    v_mul_lo_u32 v5, v2, s8
-; CGP-NEXT:    v_add_i32_e32 v6, vcc, 1, v2
-; CGP-NEXT:    v_mul_lo_u32 v7, v4, s8
-; CGP-NEXT:    v_add_i32_e32 v8, vcc, 1, v4
-; CGP-NEXT:    v_sub_i32_e32 v0, vcc, v0, v5
-; CGP-NEXT:    v_sub_i32_e32 v1, vcc, v1, v7
-; CGP-NEXT:    v_cmp_le_u32_e32 vcc, s8, v0
-; CGP-NEXT:    v_cndmask_b32_e32 v2, v2, v6, vcc
-; CGP-NEXT:    v_subrev_i32_e64 v5, s[4:5], s8, v0
-; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v1, v3
-; CGP-NEXT:    v_cndmask_b32_e64 v4, v4, v8, s[4:5]
-; CGP-NEXT:    v_sub_i32_e64 v6, s[6:7], v1, v3
-; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v5, vcc
-; CGP-NEXT:    v_add_i32_e32 v5, vcc, 1, v2
-; CGP-NEXT:    v_cndmask_b32_e64 v1, v1, v6, s[4:5]
-; CGP-NEXT:    v_add_i32_e32 v6, vcc, 1, v4
-; CGP-NEXT:    v_cmp_le_u32_e32 vcc, s8, v0
-; CGP-NEXT:    v_cndmask_b32_e32 v0, v2, v5, vcc
-; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v3
-; CGP-NEXT:    v_cndmask_b32_e32 v1, v4, v6, vcc
+; CGP-NEXT:    s_mov_b32 s4, 0xb2a50881
+; CGP-NEXT:    v_mul_hi_u32 v2, v0, s4
+; CGP-NEXT:    v_mul_hi_u32 v3, v1, s4
+; CGP-NEXT:    v_sub_i32_e32 v0, vcc, v0, v2
+; CGP-NEXT:    v_sub_i32_e32 v1, vcc, v1, v3
+; CGP-NEXT:    v_lshrrev_b32_e32 v0, 1, v0
+; CGP-NEXT:    v_lshrrev_b32_e32 v1, 1, v1
+; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
+; CGP-NEXT:    v_add_i32_e32 v1, vcc, v1, v3
+; CGP-NEXT:    v_lshrrev_b32_e32 v0, 20, v0
+; CGP-NEXT:    v_lshrrev_b32_e32 v1, 20, v1
 ; CGP-NEXT:    s_setpc_b64 s[30:31]
   %result = udiv <2 x i32> %num, <i32 1235195, i32 1235195>
   ret <2 x i32> %result

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i64.ll
index 2c95c717e34b6..dcf7cac58b982 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i64.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i64.ll
@@ -969,659 +969,78 @@ define i64 @v_udiv_i64_pow2k_denom(i64 %num) {
 ; CHECK-LABEL: v_udiv_i64_pow2k_denom:
 ; CHECK:       ; %bb.0:
 ; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CHECK-NEXT:    v_cvt_f32_u32_e32 v2, 0x1000
-; CHECK-NEXT:    v_cvt_f32_ubyte0_e32 v3, 0
-; CHECK-NEXT:    s_movk_i32 s6, 0xf000
-; CHECK-NEXT:    s_movk_i32 s7, 0x1000
-; CHECK-NEXT:    s_bfe_i32 s4, -1, 0x10000
-; CHECK-NEXT:    s_bfe_i32 s5, -1, 0x10000
-; CHECK-NEXT:    v_mac_f32_e32 v2, 0x4f800000, v3
-; CHECK-NEXT:    v_mov_b32_e32 v3, s4
-; CHECK-NEXT:    v_mov_b32_e32 v4, s5
-; CHECK-NEXT:    v_rcp_iflag_f32_e32 v2, v2
-; CHECK-NEXT:    v_mul_f32_e32 v2, 0x5f7ffffc, v2
-; CHECK-NEXT:    v_mul_f32_e32 v5, 0x2f800000, v2
-; CHECK-NEXT:    v_trunc_f32_e32 v5, v5
-; CHECK-NEXT:    v_mac_f32_e32 v2, 0xcf800000, v5
-; CHECK-NEXT:    v_cvt_u32_f32_e32 v5, v5
-; CHECK-NEXT:    v_cvt_u32_f32_e32 v2, v2
-; CHECK-NEXT:    v_mul_lo_u32 v6, s6, v5
-; CHECK-NEXT:    v_mul_lo_u32 v7, s6, v2
-; CHECK-NEXT:    v_mul_lo_u32 v8, -1, v2
-; CHECK-NEXT:    v_mul_hi_u32 v9, s6, v2
-; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v8, v6
-; CHECK-NEXT:    v_mul_lo_u32 v8, v5, v7
-; CHECK-NEXT:    v_mul_hi_u32 v10, v2, v7
-; CHECK-NEXT:    v_mul_hi_u32 v7, v5, v7
-; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v6, v9
-; CHECK-NEXT:    v_mul_lo_u32 v9, v2, v6
-; CHECK-NEXT:    v_mul_lo_u32 v11, v5, v6
-; CHECK-NEXT:    v_mul_hi_u32 v12, v2, v6
-; CHECK-NEXT:    v_mul_hi_u32 v6, v5, v6
-; CHECK-NEXT:    v_add_i32_e32 v8, vcc, v8, v9
-; CHECK-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v7, vcc, v11, v7
-; CHECK-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v8, vcc, v8, v10
-; CHECK-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v7, vcc, v7, v12
-; CHECK-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v8, vcc, v9, v8
-; CHECK-NEXT:    v_add_i32_e32 v9, vcc, v11, v10
-; CHECK-NEXT:    v_add_i32_e32 v7, vcc, v7, v8
-; CHECK-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v8, vcc, v9, v8
-; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v6, v8
-; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v7
-; CHECK-NEXT:    v_addc_u32_e64 v7, s[4:5], v5, v6, vcc
-; CHECK-NEXT:    v_add_i32_e64 v5, s[4:5], v5, v6
-; CHECK-NEXT:    v_mul_lo_u32 v6, s6, v2
-; CHECK-NEXT:    v_mul_lo_u32 v8, -1, v2
-; CHECK-NEXT:    v_mul_hi_u32 v9, s6, v2
-; CHECK-NEXT:    v_mul_lo_u32 v10, s6, v7
-; CHECK-NEXT:    v_mul_lo_u32 v11, v7, v6
-; CHECK-NEXT:    v_mul_hi_u32 v12, v2, v6
-; CHECK-NEXT:    v_mul_hi_u32 v6, v7, v6
-; CHECK-NEXT:    v_add_i32_e64 v8, s[4:5], v8, v10
-; CHECK-NEXT:    v_add_i32_e64 v8, s[4:5], v8, v9
-; CHECK-NEXT:    v_mul_lo_u32 v9, v2, v8
-; CHECK-NEXT:    v_mul_lo_u32 v10, v7, v8
-; CHECK-NEXT:    v_mul_hi_u32 v13, v2, v8
-; CHECK-NEXT:    v_mul_hi_u32 v7, v7, v8
-; CHECK-NEXT:    v_add_i32_e64 v8, s[4:5], v11, v9
-; CHECK-NEXT:    v_cndmask_b32_e64 v9, 0, 1, s[4:5]
-; CHECK-NEXT:    v_add_i32_e64 v6, s[4:5], v10, v6
-; CHECK-NEXT:    v_cndmask_b32_e64 v10, 0, 1, s[4:5]
-; CHECK-NEXT:    v_add_i32_e64 v8, s[4:5], v8, v12
-; CHECK-NEXT:    v_cndmask_b32_e64 v8, 0, 1, s[4:5]
-; CHECK-NEXT:    v_add_i32_e64 v6, s[4:5], v6, v13
-; CHECK-NEXT:    v_cndmask_b32_e64 v11, 0, 1, s[4:5]
-; CHECK-NEXT:    v_add_i32_e64 v8, s[4:5], v9, v8
-; CHECK-NEXT:    v_add_i32_e64 v9, s[4:5], v10, v11
-; CHECK-NEXT:    v_add_i32_e64 v6, s[4:5], v6, v8
-; CHECK-NEXT:    v_cndmask_b32_e64 v8, 0, 1, s[4:5]
-; CHECK-NEXT:    v_add_i32_e64 v8, s[4:5], v9, v8
-; CHECK-NEXT:    v_add_i32_e64 v7, s[4:5], v7, v8
-; CHECK-NEXT:    v_addc_u32_e32 v5, vcc, v5, v7, vcc
-; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v6
-; CHECK-NEXT:    v_addc_u32_e32 v5, vcc, 0, v5, vcc
-; CHECK-NEXT:    v_mul_lo_u32 v6, v1, v2
-; CHECK-NEXT:    v_mul_hi_u32 v7, v0, v2
-; CHECK-NEXT:    v_mul_hi_u32 v2, v1, v2
-; CHECK-NEXT:    v_mul_lo_u32 v8, v0, v5
-; CHECK-NEXT:    v_mul_lo_u32 v9, v1, v5
-; CHECK-NEXT:    v_mul_hi_u32 v10, v0, v5
-; CHECK-NEXT:    v_mul_hi_u32 v5, v1, v5
-; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v6, v8
-; CHECK-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v9, v2
-; CHECK-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v6, v7
+; CHECK-NEXT:    s_mov_b32 s4, 0x100000
+; CHECK-NEXT:    v_lshlrev_b32_e32 v2, 20, v0
+; CHECK-NEXT:    v_mul_hi_u32 v3, v0, 0
+; CHECK-NEXT:    v_lshlrev_b32_e32 v4, 20, v1
+; CHECK-NEXT:    v_mul_hi_u32 v5, v1, 0
+; CHECK-NEXT:    v_add_i32_e32 v2, vcc, 0, v2
 ; CHECK-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v10
-; CHECK-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v8, v6
-; CHECK-NEXT:    v_add_i32_e32 v7, vcc, v9, v7
-; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v6
-; CHECK-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v7, v6
-; CHECK-NEXT:    v_mul_lo_u32 v7, s7, v2
-; CHECK-NEXT:    v_mul_lo_u32 v8, 0, v2
-; CHECK-NEXT:    v_mul_hi_u32 v9, s7, v2
-; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v5, v6
-; CHECK-NEXT:    v_mul_lo_u32 v6, s7, v5
-; CHECK-NEXT:    v_add_i32_e32 v10, vcc, 1, v2
-; CHECK-NEXT:    v_addc_u32_e32 v11, vcc, 0, v5, vcc
-; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v8, v6
-; CHECK-NEXT:    v_add_i32_e32 v8, vcc, 1, v10
-; CHECK-NEXT:    v_addc_u32_e32 v12, vcc, 0, v11, vcc
-; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v6, v9
-; CHECK-NEXT:    v_sub_i32_e32 v0, vcc, v0, v7
-; CHECK-NEXT:    v_subb_u32_e64 v7, s[4:5], v1, v6, vcc
-; CHECK-NEXT:    v_sub_i32_e64 v1, s[4:5], v1, v6
-; CHECK-NEXT:    v_cmp_le_u32_e64 s[4:5], s7, v0
-; CHECK-NEXT:    v_cndmask_b32_e64 v6, 0, -1, s[4:5]
-; CHECK-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v7
-; CHECK-NEXT:    v_cndmask_b32_e64 v3, v3, v6, s[4:5]
-; CHECK-NEXT:    v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
-; CHECK-NEXT:    v_subrev_i32_e32 v0, vcc, s7, v0
-; CHECK-NEXT:    v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
-; CHECK-NEXT:    v_cmp_le_u32_e32 vcc, s7, v0
-; CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, -1, vcc
-; CHECK-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v1
-; CHECK-NEXT:    v_cndmask_b32_e32 v0, v4, v0, vcc
-; CHECK-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
-; CHECK-NEXT:    v_cndmask_b32_e32 v0, v10, v8, vcc
-; CHECK-NEXT:    v_cndmask_b32_e32 v1, v11, v12, vcc
-; CHECK-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v3
-; CHECK-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
-; CHECK-NEXT:    v_cndmask_b32_e32 v1, v5, v1, vcc
+; CHECK-NEXT:    v_mul_hi_u32 v0, v0, s4
+; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v4, v5
+; CHECK-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v3
+; CHECK-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v4, v0
+; CHECK-NEXT:    v_cndmask_b32_e64 v3, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v6, v2
+; CHECK-NEXT:    v_add_i32_e32 v3, vcc, v5, v3
+; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
+; CHECK-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v3, v2
+; CHECK-NEXT:    v_mul_hi_u32 v1, v1, s4
+; CHECK-NEXT:    v_add_i32_e32 v1, vcc, v1, v2
 ; CHECK-NEXT:    s_setpc_b64 s[30:31]
   %result = udiv i64 %num, 4096
   ret i64 %result
 }
 
 define <2 x i64> @v_udiv_v2i64_pow2k_denom(<2 x i64> %num) {
-; GISEL-LABEL: v_udiv_v2i64_pow2k_denom:
-; GISEL:       ; %bb.0:
-; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GISEL-NEXT:    s_movk_i32 s12, 0x1000
-; GISEL-NEXT:    v_cvt_f32_u32_e32 v4, s12
-; GISEL-NEXT:    s_sub_u32 s8, 0, s12
-; GISEL-NEXT:    s_cselect_b32 s4, 1, 0
-; GISEL-NEXT:    v_cvt_f32_ubyte0_e32 v5, 0
-; GISEL-NEXT:    v_mov_b32_e32 v6, v4
-; GISEL-NEXT:    s_and_b32 s4, s4, 1
-; GISEL-NEXT:    v_mac_f32_e32 v4, 0x4f800000, v5
-; GISEL-NEXT:    v_mac_f32_e32 v6, 0x4f800000, v5
-; GISEL-NEXT:    v_rcp_iflag_f32_e32 v4, v4
-; GISEL-NEXT:    v_rcp_iflag_f32_e32 v5, v6
-; GISEL-NEXT:    s_cmp_lg_u32 s4, 0
-; GISEL-NEXT:    s_subb_u32 s9, 0, 0
-; GISEL-NEXT:    s_bfe_i32 s10, -1, 0x10000
-; GISEL-NEXT:    s_bfe_i32 s11, -1, 0x10000
-; GISEL-NEXT:    v_mul_f32_e32 v4, 0x5f7ffffc, v4
-; GISEL-NEXT:    v_mul_f32_e32 v5, 0x5f7ffffc, v5
-; GISEL-NEXT:    v_mul_f32_e32 v6, 0x2f800000, v4
-; GISEL-NEXT:    s_sub_u32 s13, 0, s12
-; GISEL-NEXT:    s_cselect_b32 s4, 1, 0
-; GISEL-NEXT:    v_mul_f32_e32 v7, 0x2f800000, v5
-; GISEL-NEXT:    v_trunc_f32_e32 v6, v6
-; GISEL-NEXT:    s_and_b32 s4, s4, 1
-; GISEL-NEXT:    v_trunc_f32_e32 v7, v7
-; GISEL-NEXT:    v_mac_f32_e32 v4, 0xcf800000, v6
-; GISEL-NEXT:    v_cvt_u32_f32_e32 v6, v6
-; GISEL-NEXT:    v_mac_f32_e32 v5, 0xcf800000, v7
-; GISEL-NEXT:    v_cvt_u32_f32_e32 v7, v7
-; GISEL-NEXT:    v_cvt_u32_f32_e32 v4, v4
-; GISEL-NEXT:    s_cmp_lg_u32 s4, 0
-; GISEL-NEXT:    s_subb_u32 s6, 0, 0
-; GISEL-NEXT:    v_mul_lo_u32 v8, s13, v6
-; GISEL-NEXT:    v_cvt_u32_f32_e32 v5, v5
-; GISEL-NEXT:    v_mul_lo_u32 v9, s8, v7
-; GISEL-NEXT:    v_mul_lo_u32 v10, s13, v4
-; GISEL-NEXT:    v_mul_lo_u32 v11, s6, v4
-; GISEL-NEXT:    v_mul_hi_u32 v12, s13, v4
-; GISEL-NEXT:    v_mul_lo_u32 v13, s8, v5
-; GISEL-NEXT:    v_mul_lo_u32 v14, s9, v5
-; GISEL-NEXT:    v_mul_hi_u32 v15, s8, v5
-; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v11, v8
-; GISEL-NEXT:    v_mul_lo_u32 v11, v6, v10
-; GISEL-NEXT:    v_mul_hi_u32 v16, v4, v10
-; GISEL-NEXT:    v_mul_hi_u32 v10, v6, v10
-; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v14, v9
-; GISEL-NEXT:    v_mul_lo_u32 v14, v7, v13
-; GISEL-NEXT:    v_mul_hi_u32 v17, v5, v13
-; GISEL-NEXT:    v_mul_hi_u32 v13, v7, v13
-; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v12
-; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v9, v15
-; GISEL-NEXT:    v_mul_lo_u32 v12, v4, v8
-; GISEL-NEXT:    v_mul_lo_u32 v15, v6, v8
-; GISEL-NEXT:    v_mul_hi_u32 v18, v4, v8
-; GISEL-NEXT:    v_mul_hi_u32 v8, v6, v8
-; GISEL-NEXT:    v_mul_lo_u32 v19, v5, v9
-; GISEL-NEXT:    v_add_i32_e32 v14, vcc, v14, v19
-; GISEL-NEXT:    v_cndmask_b32_e64 v19, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v14, vcc, v14, v17
-; GISEL-NEXT:    v_mul_lo_u32 v14, v7, v9
-; GISEL-NEXT:    v_mul_hi_u32 v17, v5, v9
-; GISEL-NEXT:    v_mul_hi_u32 v9, v7, v9
-; GISEL-NEXT:    v_add_i32_e64 v11, s[4:5], v11, v12
-; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, s[4:5]
-; GISEL-NEXT:    v_add_i32_e64 v10, s[4:5], v15, v10
-; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, 1, s[4:5]
-; GISEL-NEXT:    v_add_i32_e64 v13, s[4:5], v14, v13
-; GISEL-NEXT:    v_cndmask_b32_e64 v14, 0, 1, s[4:5]
-; GISEL-NEXT:    v_add_i32_e64 v11, s[4:5], v11, v16
-; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, s[4:5]
-; GISEL-NEXT:    v_add_i32_e64 v10, s[4:5], v10, v18
-; GISEL-NEXT:    v_cndmask_b32_e64 v16, 0, 1, s[4:5]
-; GISEL-NEXT:    v_cndmask_b32_e64 v18, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v13, v17
-; GISEL-NEXT:    v_cndmask_b32_e64 v17, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v12, v11
-; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v15, v16
-; GISEL-NEXT:    v_add_i32_e32 v15, vcc, v19, v18
-; GISEL-NEXT:    v_add_i32_e32 v14, vcc, v14, v17
-; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v10, v11
-; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v13, v15
-; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v12, v11
-; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v14, v15
-; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v11
-; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v9, v12
-; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v10
-; GISEL-NEXT:    v_addc_u32_e64 v10, s[4:5], v6, v8, vcc
-; GISEL-NEXT:    v_mul_lo_u32 v11, s13, v4
-; GISEL-NEXT:    v_mul_lo_u32 v12, s6, v4
-; GISEL-NEXT:    v_mul_hi_u32 v14, s13, v4
-; GISEL-NEXT:    v_add_i32_e64 v5, s[4:5], v5, v13
-; GISEL-NEXT:    v_addc_u32_e64 v13, s[6:7], v7, v9, s[4:5]
-; GISEL-NEXT:    v_mul_lo_u32 v15, s8, v5
-; GISEL-NEXT:    v_mul_lo_u32 v16, s9, v5
-; GISEL-NEXT:    v_mul_hi_u32 v17, s8, v5
-; GISEL-NEXT:    v_mul_lo_u32 v18, s8, v13
-; GISEL-NEXT:    v_mul_lo_u32 v19, v13, v15
-; GISEL-NEXT:    v_add_i32_e64 v16, s[6:7], v16, v18
-; GISEL-NEXT:    v_mul_hi_u32 v18, v5, v15
-; GISEL-NEXT:    v_add_i32_e64 v16, s[6:7], v16, v17
-; GISEL-NEXT:    v_mul_lo_u32 v17, v5, v16
-; GISEL-NEXT:    v_add_i32_e64 v17, s[6:7], v19, v17
-; GISEL-NEXT:    v_cndmask_b32_e64 v19, 0, 1, s[6:7]
-; GISEL-NEXT:    v_add_i32_e64 v17, s[6:7], v17, v18
-; GISEL-NEXT:    v_mul_lo_u32 v17, s13, v10
-; GISEL-NEXT:    v_mul_lo_u32 v18, v10, v11
-; GISEL-NEXT:    v_add_i32_e64 v12, s[8:9], v12, v17
-; GISEL-NEXT:    v_mul_hi_u32 v17, v4, v11
-; GISEL-NEXT:    v_add_i32_e64 v12, s[8:9], v12, v14
-; GISEL-NEXT:    v_mul_lo_u32 v14, v4, v12
-; GISEL-NEXT:    v_add_i32_e64 v14, s[8:9], v18, v14
-; GISEL-NEXT:    v_cndmask_b32_e64 v18, 0, 1, s[8:9]
-; GISEL-NEXT:    v_add_i32_e64 v14, s[8:9], v14, v17
-; GISEL-NEXT:    v_mov_b32_e32 v14, s10
-; GISEL-NEXT:    v_mov_b32_e32 v17, s11
-; GISEL-NEXT:    s_bfe_i32 s13, -1, 0x10000
-; GISEL-NEXT:    s_bfe_i32 s14, -1, 0x10000
-; GISEL-NEXT:    v_add_i32_e64 v6, s[10:11], v6, v8
-; GISEL-NEXT:    v_mov_b32_e32 v8, s13
-; GISEL-NEXT:    v_add_i32_e64 v7, s[10:11], v7, v9
-; GISEL-NEXT:    v_mul_hi_u32 v9, v10, v11
-; GISEL-NEXT:    v_mul_hi_u32 v11, v13, v15
-; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, 1, s[8:9]
-; GISEL-NEXT:    v_add_i32_e64 v15, s[8:9], v18, v15
-; GISEL-NEXT:    v_mul_lo_u32 v18, v10, v12
-; GISEL-NEXT:    v_mul_hi_u32 v10, v10, v12
-; GISEL-NEXT:    v_mul_hi_u32 v12, v4, v12
-; GISEL-NEXT:    v_add_i32_e64 v9, s[8:9], v18, v9
-; GISEL-NEXT:    v_cndmask_b32_e64 v18, 0, 1, s[8:9]
-; GISEL-NEXT:    v_add_i32_e64 v9, s[8:9], v9, v12
-; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, s[8:9]
-; GISEL-NEXT:    v_add_i32_e64 v12, s[8:9], v18, v12
-; GISEL-NEXT:    v_cndmask_b32_e64 v18, 0, 1, s[6:7]
-; GISEL-NEXT:    v_add_i32_e64 v18, s[6:7], v19, v18
-; GISEL-NEXT:    v_mul_lo_u32 v19, v13, v16
-; GISEL-NEXT:    v_mul_hi_u32 v13, v13, v16
-; GISEL-NEXT:    v_mul_hi_u32 v16, v5, v16
-; GISEL-NEXT:    v_add_i32_e64 v11, s[6:7], v19, v11
-; GISEL-NEXT:    v_cndmask_b32_e64 v19, 0, 1, s[6:7]
-; GISEL-NEXT:    v_add_i32_e64 v11, s[6:7], v11, v16
-; GISEL-NEXT:    v_cndmask_b32_e64 v16, 0, 1, s[6:7]
-; GISEL-NEXT:    v_add_i32_e64 v16, s[6:7], v19, v16
-; GISEL-NEXT:    v_mov_b32_e32 v19, s14
-; GISEL-NEXT:    v_add_i32_e64 v9, s[6:7], v9, v15
-; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, 1, s[6:7]
-; GISEL-NEXT:    v_add_i32_e64 v11, s[6:7], v11, v18
-; GISEL-NEXT:    v_cndmask_b32_e64 v18, 0, 1, s[6:7]
-; GISEL-NEXT:    v_add_i32_e64 v12, s[6:7], v12, v15
-; GISEL-NEXT:    v_add_i32_e64 v15, s[6:7], v16, v18
-; GISEL-NEXT:    v_add_i32_e64 v10, s[6:7], v10, v12
-; GISEL-NEXT:    v_add_i32_e64 v12, s[6:7], v13, v15
-; GISEL-NEXT:    v_addc_u32_e32 v6, vcc, v6, v10, vcc
-; GISEL-NEXT:    v_addc_u32_e64 v7, vcc, v7, v12, s[4:5]
-; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v9
-; GISEL-NEXT:    v_addc_u32_e32 v6, vcc, 0, v6, vcc
-; GISEL-NEXT:    v_mul_lo_u32 v9, v3, v4
-; GISEL-NEXT:    v_mul_hi_u32 v10, v2, v4
-; GISEL-NEXT:    v_mul_hi_u32 v4, v3, v4
-; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v11
-; GISEL-NEXT:    v_addc_u32_e32 v7, vcc, 0, v7, vcc
-; GISEL-NEXT:    v_mul_lo_u32 v11, v1, v5
-; GISEL-NEXT:    v_mul_hi_u32 v12, v0, v5
-; GISEL-NEXT:    v_mul_hi_u32 v5, v1, v5
-; GISEL-NEXT:    v_mul_lo_u32 v13, v2, v6
-; GISEL-NEXT:    v_mul_lo_u32 v15, v3, v6
-; GISEL-NEXT:    v_mul_hi_u32 v16, v2, v6
-; GISEL-NEXT:    v_mul_hi_u32 v6, v3, v6
-; GISEL-NEXT:    v_mul_lo_u32 v18, v0, v7
-; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v11, v18
-; GISEL-NEXT:    v_cndmask_b32_e64 v18, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v11, v12
-; GISEL-NEXT:    v_mul_lo_u32 v11, v1, v7
-; GISEL-NEXT:    v_mul_hi_u32 v12, v0, v7
-; GISEL-NEXT:    v_mul_hi_u32 v7, v1, v7
-; GISEL-NEXT:    v_add_i32_e64 v9, s[4:5], v9, v13
-; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, 1, s[4:5]
-; GISEL-NEXT:    v_add_i32_e64 v4, s[4:5], v15, v4
-; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, 1, s[4:5]
-; GISEL-NEXT:    v_add_i32_e64 v5, s[4:5], v11, v5
-; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, s[4:5]
-; GISEL-NEXT:    v_add_i32_e64 v9, s[4:5], v9, v10
-; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, 1, s[4:5]
-; GISEL-NEXT:    v_add_i32_e64 v4, s[4:5], v4, v16
-; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, 1, s[4:5]
-; GISEL-NEXT:    v_cndmask_b32_e64 v16, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v12
-; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v13, v9
-; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v15, v10
-; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v18, v16
-; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v11, v12
-; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v9
-; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v13
-; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v10, v9
-; GISEL-NEXT:    v_mul_lo_u32 v10, s12, v4
-; GISEL-NEXT:    v_mul_lo_u32 v13, 0, v4
-; GISEL-NEXT:    v_mul_hi_u32 v15, s12, v4
-; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v11, v12
-; GISEL-NEXT:    v_mul_lo_u32 v12, s12, v5
-; GISEL-NEXT:    v_mul_lo_u32 v16, 0, v5
-; GISEL-NEXT:    v_mul_hi_u32 v18, s12, v5
-; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v6, v9
-; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v7, v11
-; GISEL-NEXT:    v_mul_lo_u32 v9, s12, v6
-; GISEL-NEXT:    v_mul_lo_u32 v11, s12, v7
-; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v13, v9
-; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v16, v11
-; GISEL-NEXT:    v_add_i32_e32 v13, vcc, 1, v4
-; GISEL-NEXT:    v_addc_u32_e32 v16, vcc, 0, v6, vcc
-; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v9, v15
-; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v11, v18
-; GISEL-NEXT:    v_add_i32_e32 v15, vcc, 1, v5
-; GISEL-NEXT:    v_addc_u32_e32 v18, vcc, 0, v7, vcc
-; GISEL-NEXT:    v_sub_i32_e32 v2, vcc, v2, v10
-; GISEL-NEXT:    v_subb_u32_e64 v10, s[4:5], v3, v9, vcc
-; GISEL-NEXT:    v_sub_i32_e64 v3, s[4:5], v3, v9
-; GISEL-NEXT:    v_cmp_le_u32_e64 s[4:5], s12, v2
-; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, -1, s[4:5]
-; GISEL-NEXT:    v_sub_i32_e64 v0, s[4:5], v0, v12
-; GISEL-NEXT:    v_subb_u32_e64 v12, s[6:7], v1, v11, s[4:5]
-; GISEL-NEXT:    v_sub_i32_e64 v1, s[6:7], v1, v11
-; GISEL-NEXT:    v_cmp_eq_u32_e64 s[6:7], 0, v10
-; GISEL-NEXT:    v_add_i32_e64 v10, s[8:9], 1, v13
-; GISEL-NEXT:    v_addc_u32_e64 v11, s[8:9], 0, v16, s[8:9]
-; GISEL-NEXT:    v_cndmask_b32_e64 v8, v8, v9, s[6:7]
-; GISEL-NEXT:    v_cmp_le_u32_e64 s[6:7], s12, v0
-; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, -1, s[6:7]
-; GISEL-NEXT:    v_cmp_eq_u32_e64 s[6:7], 0, v12
-; GISEL-NEXT:    v_cndmask_b32_e64 v9, v14, v9, s[6:7]
-; GISEL-NEXT:    v_add_i32_e64 v12, s[6:7], 1, v15
-; GISEL-NEXT:    v_addc_u32_e64 v14, s[6:7], 0, v18, s[6:7]
-; GISEL-NEXT:    v_subbrev_u32_e32 v3, vcc, 0, v3, vcc
-; GISEL-NEXT:    v_subbrev_u32_e64 v1, vcc, 0, v1, s[4:5]
-; GISEL-NEXT:    v_subrev_i32_e32 v2, vcc, s12, v2
-; GISEL-NEXT:    v_subbrev_u32_e32 v3, vcc, 0, v3, vcc
-; GISEL-NEXT:    v_cmp_le_u32_e32 vcc, s12, v2
-; GISEL-NEXT:    v_cndmask_b32_e64 v2, 0, -1, vcc
-; GISEL-NEXT:    v_subrev_i32_e32 v0, vcc, s12, v0
-; GISEL-NEXT:    v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
-; GISEL-NEXT:    v_cmp_le_u32_e32 vcc, s12, v0
-; GISEL-NEXT:    v_cndmask_b32_e64 v0, 0, -1, vcc
-; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v3
-; GISEL-NEXT:    v_cndmask_b32_e32 v2, v19, v2, vcc
-; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v1
-; GISEL-NEXT:    v_cndmask_b32_e32 v0, v17, v0, vcc
-; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v2
-; GISEL-NEXT:    v_cndmask_b32_e32 v1, v13, v10, vcc
-; GISEL-NEXT:    v_cmp_ne_u32_e64 s[4:5], 0, v0
-; GISEL-NEXT:    v_cndmask_b32_e64 v0, v15, v12, s[4:5]
-; GISEL-NEXT:    v_cndmask_b32_e32 v3, v16, v11, vcc
-; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v8
-; GISEL-NEXT:    v_cndmask_b32_e32 v2, v4, v1, vcc
-; GISEL-NEXT:    v_cndmask_b32_e64 v1, v18, v14, s[4:5]
-; GISEL-NEXT:    v_cmp_ne_u32_e64 s[4:5], 0, v9
-; GISEL-NEXT:    v_cndmask_b32_e64 v0, v5, v0, s[4:5]
-; GISEL-NEXT:    v_cndmask_b32_e64 v1, v7, v1, s[4:5]
-; GISEL-NEXT:    v_cndmask_b32_e32 v3, v6, v3, vcc
-; GISEL-NEXT:    s_setpc_b64 s[30:31]
-;
-; CGP-LABEL: v_udiv_v2i64_pow2k_denom:
-; CGP:       ; %bb.0:
-; CGP-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CGP-NEXT:    v_cvt_f32_u32_e32 v4, 0x1000
-; CGP-NEXT:    v_cvt_f32_ubyte0_e32 v5, 0
-; CGP-NEXT:    s_movk_i32 s8, 0xf000
-; CGP-NEXT:    s_movk_i32 s12, 0x1000
-; CGP-NEXT:    s_bfe_i32 s10, -1, 0x10000
-; CGP-NEXT:    s_bfe_i32 s11, -1, 0x10000
-; CGP-NEXT:    s_bfe_i32 s13, -1, 0x10000
-; CGP-NEXT:    s_bfe_i32 s14, -1, 0x10000
-; CGP-NEXT:    v_mov_b32_e32 v6, v4
-; CGP-NEXT:    v_mac_f32_e32 v4, 0x4f800000, v5
-; CGP-NEXT:    v_mac_f32_e32 v6, 0x4f800000, v5
-; CGP-NEXT:    v_rcp_iflag_f32_e32 v4, v4
-; CGP-NEXT:    v_rcp_iflag_f32_e32 v5, v6
-; CGP-NEXT:    v_mul_f32_e32 v4, 0x5f7ffffc, v4
-; CGP-NEXT:    v_mul_f32_e32 v5, 0x5f7ffffc, v5
-; CGP-NEXT:    v_mul_f32_e32 v6, 0x2f800000, v4
-; CGP-NEXT:    v_mul_f32_e32 v7, 0x2f800000, v5
-; CGP-NEXT:    v_trunc_f32_e32 v6, v6
-; CGP-NEXT:    v_trunc_f32_e32 v7, v7
-; CGP-NEXT:    v_mac_f32_e32 v4, 0xcf800000, v6
-; CGP-NEXT:    v_cvt_u32_f32_e32 v6, v6
-; CGP-NEXT:    v_mac_f32_e32 v5, 0xcf800000, v7
-; CGP-NEXT:    v_cvt_u32_f32_e32 v7, v7
-; CGP-NEXT:    v_cvt_u32_f32_e32 v4, v4
-; CGP-NEXT:    v_mul_lo_u32 v8, s8, v6
-; CGP-NEXT:    v_cvt_u32_f32_e32 v5, v5
-; CGP-NEXT:    v_mul_lo_u32 v9, s8, v7
-; CGP-NEXT:    v_mul_lo_u32 v10, s8, v4
-; CGP-NEXT:    v_mul_lo_u32 v11, -1, v4
-; CGP-NEXT:    v_mul_hi_u32 v12, s8, v4
-; CGP-NEXT:    v_mul_lo_u32 v13, s8, v5
-; CGP-NEXT:    v_mul_lo_u32 v14, -1, v5
-; CGP-NEXT:    v_mul_hi_u32 v15, s8, v5
-; CGP-NEXT:    v_add_i32_e32 v8, vcc, v11, v8
-; CGP-NEXT:    v_mul_lo_u32 v11, v6, v10
-; CGP-NEXT:    v_mul_hi_u32 v16, v4, v10
-; CGP-NEXT:    v_mul_hi_u32 v10, v6, v10
-; CGP-NEXT:    v_add_i32_e32 v9, vcc, v14, v9
-; CGP-NEXT:    v_mul_lo_u32 v14, v7, v13
-; CGP-NEXT:    v_mul_hi_u32 v17, v5, v13
-; CGP-NEXT:    v_mul_hi_u32 v13, v7, v13
-; CGP-NEXT:    v_add_i32_e32 v8, vcc, v8, v12
-; CGP-NEXT:    v_add_i32_e32 v9, vcc, v9, v15
-; CGP-NEXT:    v_mul_lo_u32 v12, v4, v8
-; CGP-NEXT:    v_mul_lo_u32 v15, v6, v8
-; CGP-NEXT:    v_mul_hi_u32 v18, v4, v8
-; CGP-NEXT:    v_mul_hi_u32 v8, v6, v8
-; CGP-NEXT:    v_mul_lo_u32 v19, v5, v9
-; CGP-NEXT:    v_add_i32_e32 v14, vcc, v14, v19
-; CGP-NEXT:    v_cndmask_b32_e64 v19, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v14, vcc, v14, v17
-; CGP-NEXT:    v_mul_lo_u32 v14, v7, v9
-; CGP-NEXT:    v_mul_hi_u32 v17, v5, v9
-; CGP-NEXT:    v_mul_hi_u32 v9, v7, v9
-; CGP-NEXT:    v_add_i32_e64 v11, s[4:5], v11, v12
-; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, 1, s[4:5]
-; CGP-NEXT:    v_add_i32_e64 v10, s[4:5], v15, v10
-; CGP-NEXT:    v_cndmask_b32_e64 v15, 0, 1, s[4:5]
-; CGP-NEXT:    v_add_i32_e64 v13, s[4:5], v14, v13
-; CGP-NEXT:    v_cndmask_b32_e64 v14, 0, 1, s[4:5]
-; CGP-NEXT:    v_add_i32_e64 v11, s[4:5], v11, v16
-; CGP-NEXT:    v_cndmask_b32_e64 v11, 0, 1, s[4:5]
-; CGP-NEXT:    v_add_i32_e64 v10, s[4:5], v10, v18
-; CGP-NEXT:    v_cndmask_b32_e64 v16, 0, 1, s[4:5]
-; CGP-NEXT:    v_cndmask_b32_e64 v18, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v13, vcc, v13, v17
-; CGP-NEXT:    v_cndmask_b32_e64 v17, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v11, vcc, v12, v11
-; CGP-NEXT:    v_add_i32_e32 v12, vcc, v15, v16
-; CGP-NEXT:    v_add_i32_e32 v15, vcc, v19, v18
-; CGP-NEXT:    v_add_i32_e32 v14, vcc, v14, v17
-; CGP-NEXT:    v_add_i32_e32 v10, vcc, v10, v11
-; CGP-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v13, vcc, v13, v15
-; CGP-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v11, vcc, v12, v11
-; CGP-NEXT:    v_add_i32_e32 v12, vcc, v14, v15
-; CGP-NEXT:    v_add_i32_e32 v8, vcc, v8, v11
-; CGP-NEXT:    v_add_i32_e32 v9, vcc, v9, v12
-; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v10
-; CGP-NEXT:    v_addc_u32_e64 v10, s[4:5], v6, v8, vcc
-; CGP-NEXT:    v_mul_lo_u32 v11, s8, v4
-; CGP-NEXT:    v_mul_lo_u32 v12, -1, v4
-; CGP-NEXT:    v_mul_hi_u32 v14, s8, v4
-; CGP-NEXT:    v_add_i32_e64 v5, s[4:5], v5, v13
-; CGP-NEXT:    v_addc_u32_e64 v13, s[6:7], v7, v9, s[4:5]
-; CGP-NEXT:    v_mul_lo_u32 v15, s8, v5
-; CGP-NEXT:    v_mul_lo_u32 v16, -1, v5
-; CGP-NEXT:    v_mul_hi_u32 v17, s8, v5
-; CGP-NEXT:    v_mul_lo_u32 v18, s8, v13
-; CGP-NEXT:    v_mul_lo_u32 v19, v13, v15
-; CGP-NEXT:    v_add_i32_e64 v16, s[6:7], v16, v18
-; CGP-NEXT:    v_mul_hi_u32 v18, v5, v15
-; CGP-NEXT:    v_add_i32_e64 v16, s[6:7], v16, v17
-; CGP-NEXT:    v_mul_lo_u32 v17, v5, v16
-; CGP-NEXT:    v_add_i32_e64 v17, s[6:7], v19, v17
-; CGP-NEXT:    v_cndmask_b32_e64 v19, 0, 1, s[6:7]
-; CGP-NEXT:    v_add_i32_e64 v17, s[6:7], v17, v18
-; CGP-NEXT:    v_mul_lo_u32 v17, s8, v10
-; CGP-NEXT:    v_mul_lo_u32 v18, v10, v11
-; CGP-NEXT:    v_add_i32_e64 v12, s[8:9], v12, v17
-; CGP-NEXT:    v_mul_hi_u32 v17, v4, v11
-; CGP-NEXT:    v_add_i32_e64 v12, s[8:9], v12, v14
-; CGP-NEXT:    v_mul_lo_u32 v14, v4, v12
-; CGP-NEXT:    v_add_i32_e64 v14, s[8:9], v18, v14
-; CGP-NEXT:    v_cndmask_b32_e64 v18, 0, 1, s[8:9]
-; CGP-NEXT:    v_add_i32_e64 v14, s[8:9], v14, v17
-; CGP-NEXT:    v_mov_b32_e32 v14, s10
-; CGP-NEXT:    v_mov_b32_e32 v17, s11
-; CGP-NEXT:    v_add_i32_e64 v6, s[10:11], v6, v8
-; CGP-NEXT:    v_mov_b32_e32 v8, s13
-; CGP-NEXT:    v_add_i32_e64 v7, s[10:11], v7, v9
-; CGP-NEXT:    v_mul_hi_u32 v9, v10, v11
-; CGP-NEXT:    v_mul_hi_u32 v11, v13, v15
-; CGP-NEXT:    v_cndmask_b32_e64 v15, 0, 1, s[8:9]
-; CGP-NEXT:    v_add_i32_e64 v15, s[8:9], v18, v15
-; CGP-NEXT:    v_mul_lo_u32 v18, v10, v12
-; CGP-NEXT:    v_mul_hi_u32 v10, v10, v12
-; CGP-NEXT:    v_mul_hi_u32 v12, v4, v12
-; CGP-NEXT:    v_add_i32_e64 v9, s[8:9], v18, v9
-; CGP-NEXT:    v_cndmask_b32_e64 v18, 0, 1, s[8:9]
-; CGP-NEXT:    v_add_i32_e64 v9, s[8:9], v9, v12
-; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, 1, s[8:9]
-; CGP-NEXT:    v_add_i32_e64 v12, s[8:9], v18, v12
-; CGP-NEXT:    v_cndmask_b32_e64 v18, 0, 1, s[6:7]
-; CGP-NEXT:    v_add_i32_e64 v18, s[6:7], v19, v18
-; CGP-NEXT:    v_mul_lo_u32 v19, v13, v16
-; CGP-NEXT:    v_mul_hi_u32 v13, v13, v16
-; CGP-NEXT:    v_mul_hi_u32 v16, v5, v16
-; CGP-NEXT:    v_add_i32_e64 v11, s[6:7], v19, v11
-; CGP-NEXT:    v_cndmask_b32_e64 v19, 0, 1, s[6:7]
-; CGP-NEXT:    v_add_i32_e64 v11, s[6:7], v11, v16
-; CGP-NEXT:    v_cndmask_b32_e64 v16, 0, 1, s[6:7]
-; CGP-NEXT:    v_add_i32_e64 v16, s[6:7], v19, v16
-; CGP-NEXT:    v_mov_b32_e32 v19, s14
-; CGP-NEXT:    v_add_i32_e64 v9, s[6:7], v9, v15
-; CGP-NEXT:    v_cndmask_b32_e64 v15, 0, 1, s[6:7]
-; CGP-NEXT:    v_add_i32_e64 v11, s[6:7], v11, v18
-; CGP-NEXT:    v_cndmask_b32_e64 v18, 0, 1, s[6:7]
-; CGP-NEXT:    v_add_i32_e64 v12, s[6:7], v12, v15
-; CGP-NEXT:    v_add_i32_e64 v15, s[6:7], v16, v18
-; CGP-NEXT:    v_add_i32_e64 v10, s[6:7], v10, v12
-; CGP-NEXT:    v_add_i32_e64 v12, s[6:7], v13, v15
-; CGP-NEXT:    v_addc_u32_e32 v6, vcc, v6, v10, vcc
-; CGP-NEXT:    v_addc_u32_e64 v7, vcc, v7, v12, s[4:5]
-; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v9
-; CGP-NEXT:    v_addc_u32_e32 v6, vcc, 0, v6, vcc
-; CGP-NEXT:    v_mul_lo_u32 v9, v3, v4
-; CGP-NEXT:    v_mul_hi_u32 v10, v2, v4
-; CGP-NEXT:    v_mul_hi_u32 v4, v3, v4
-; CGP-NEXT:    v_add_i32_e32 v5, vcc, v5, v11
-; CGP-NEXT:    v_addc_u32_e32 v7, vcc, 0, v7, vcc
-; CGP-NEXT:    v_mul_lo_u32 v11, v1, v5
-; CGP-NEXT:    v_mul_hi_u32 v12, v0, v5
-; CGP-NEXT:    v_mul_hi_u32 v5, v1, v5
-; CGP-NEXT:    v_mul_lo_u32 v13, v2, v6
-; CGP-NEXT:    v_mul_lo_u32 v15, v3, v6
-; CGP-NEXT:    v_mul_hi_u32 v16, v2, v6
-; CGP-NEXT:    v_mul_hi_u32 v6, v3, v6
-; CGP-NEXT:    v_mul_lo_u32 v18, v0, v7
-; CGP-NEXT:    v_add_i32_e32 v11, vcc, v11, v18
-; CGP-NEXT:    v_cndmask_b32_e64 v18, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v11, vcc, v11, v12
-; CGP-NEXT:    v_mul_lo_u32 v11, v1, v7
-; CGP-NEXT:    v_mul_hi_u32 v12, v0, v7
-; CGP-NEXT:    v_mul_hi_u32 v7, v1, v7
-; CGP-NEXT:    v_add_i32_e64 v9, s[4:5], v9, v13
-; CGP-NEXT:    v_cndmask_b32_e64 v13, 0, 1, s[4:5]
-; CGP-NEXT:    v_add_i32_e64 v4, s[4:5], v15, v4
-; CGP-NEXT:    v_cndmask_b32_e64 v15, 0, 1, s[4:5]
-; CGP-NEXT:    v_add_i32_e64 v5, s[4:5], v11, v5
-; CGP-NEXT:    v_cndmask_b32_e64 v11, 0, 1, s[4:5]
-; CGP-NEXT:    v_add_i32_e64 v9, s[4:5], v9, v10
-; CGP-NEXT:    v_cndmask_b32_e64 v9, 0, 1, s[4:5]
-; CGP-NEXT:    v_add_i32_e64 v4, s[4:5], v4, v16
-; CGP-NEXT:    v_cndmask_b32_e64 v10, 0, 1, s[4:5]
-; CGP-NEXT:    v_cndmask_b32_e64 v16, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v5, vcc, v5, v12
-; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v9, vcc, v13, v9
-; CGP-NEXT:    v_add_i32_e32 v10, vcc, v15, v10
-; CGP-NEXT:    v_add_i32_e32 v13, vcc, v18, v16
-; CGP-NEXT:    v_add_i32_e32 v11, vcc, v11, v12
-; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v9
-; CGP-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v5, vcc, v5, v13
-; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v9, vcc, v10, v9
-; CGP-NEXT:    v_mul_lo_u32 v10, s12, v4
-; CGP-NEXT:    v_mul_lo_u32 v13, 0, v4
-; CGP-NEXT:    v_mul_hi_u32 v15, s12, v4
-; CGP-NEXT:    v_add_i32_e32 v11, vcc, v11, v12
-; CGP-NEXT:    v_mul_lo_u32 v12, s12, v5
-; CGP-NEXT:    v_mul_lo_u32 v16, 0, v5
-; CGP-NEXT:    v_mul_hi_u32 v18, s12, v5
-; CGP-NEXT:    v_add_i32_e32 v6, vcc, v6, v9
-; CGP-NEXT:    v_add_i32_e32 v7, vcc, v7, v11
-; CGP-NEXT:    v_mul_lo_u32 v9, s12, v6
-; CGP-NEXT:    v_mul_lo_u32 v11, s12, v7
-; CGP-NEXT:    v_add_i32_e32 v9, vcc, v13, v9
-; CGP-NEXT:    v_add_i32_e32 v11, vcc, v16, v11
-; CGP-NEXT:    v_add_i32_e32 v13, vcc, 1, v4
-; CGP-NEXT:    v_addc_u32_e32 v16, vcc, 0, v6, vcc
-; CGP-NEXT:    v_add_i32_e32 v9, vcc, v9, v15
-; CGP-NEXT:    v_add_i32_e32 v11, vcc, v11, v18
-; CGP-NEXT:    v_add_i32_e32 v15, vcc, 1, v5
-; CGP-NEXT:    v_addc_u32_e32 v18, vcc, 0, v7, vcc
-; CGP-NEXT:    v_sub_i32_e32 v2, vcc, v2, v10
-; CGP-NEXT:    v_subb_u32_e64 v10, s[4:5], v3, v9, vcc
-; CGP-NEXT:    v_sub_i32_e64 v3, s[4:5], v3, v9
-; CGP-NEXT:    v_cmp_le_u32_e64 s[4:5], s12, v2
-; CGP-NEXT:    v_cndmask_b32_e64 v9, 0, -1, s[4:5]
-; CGP-NEXT:    v_sub_i32_e64 v0, s[4:5], v0, v12
-; CGP-NEXT:    v_subb_u32_e64 v12, s[6:7], v1, v11, s[4:5]
-; CGP-NEXT:    v_sub_i32_e64 v1, s[6:7], v1, v11
-; CGP-NEXT:    v_cmp_eq_u32_e64 s[6:7], 0, v10
-; CGP-NEXT:    v_add_i32_e64 v10, s[8:9], 1, v13
-; CGP-NEXT:    v_addc_u32_e64 v11, s[8:9], 0, v16, s[8:9]
-; CGP-NEXT:    v_cndmask_b32_e64 v8, v8, v9, s[6:7]
-; CGP-NEXT:    v_cmp_le_u32_e64 s[6:7], s12, v0
-; CGP-NEXT:    v_cndmask_b32_e64 v9, 0, -1, s[6:7]
-; CGP-NEXT:    v_cmp_eq_u32_e64 s[6:7], 0, v12
-; CGP-NEXT:    v_cndmask_b32_e64 v9, v14, v9, s[6:7]
-; CGP-NEXT:    v_add_i32_e64 v12, s[6:7], 1, v15
-; CGP-NEXT:    v_addc_u32_e64 v14, s[6:7], 0, v18, s[6:7]
-; CGP-NEXT:    v_subbrev_u32_e32 v3, vcc, 0, v3, vcc
-; CGP-NEXT:    v_subbrev_u32_e64 v1, vcc, 0, v1, s[4:5]
-; CGP-NEXT:    v_subrev_i32_e32 v2, vcc, s12, v2
-; CGP-NEXT:    v_subbrev_u32_e32 v3, vcc, 0, v3, vcc
-; CGP-NEXT:    v_cmp_le_u32_e32 vcc, s12, v2
-; CGP-NEXT:    v_cndmask_b32_e64 v2, 0, -1, vcc
-; CGP-NEXT:    v_subrev_i32_e32 v0, vcc, s12, v0
-; CGP-NEXT:    v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
-; CGP-NEXT:    v_cmp_le_u32_e32 vcc, s12, v0
-; CGP-NEXT:    v_cndmask_b32_e64 v0, 0, -1, vcc
-; CGP-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v3
-; CGP-NEXT:    v_cndmask_b32_e32 v2, v19, v2, vcc
-; CGP-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v1
-; CGP-NEXT:    v_cndmask_b32_e32 v0, v17, v0, vcc
-; CGP-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v2
-; CGP-NEXT:    v_cndmask_b32_e32 v1, v13, v10, vcc
-; CGP-NEXT:    v_cmp_ne_u32_e64 s[4:5], 0, v0
-; CGP-NEXT:    v_cndmask_b32_e64 v0, v15, v12, s[4:5]
-; CGP-NEXT:    v_cndmask_b32_e32 v3, v16, v11, vcc
-; CGP-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v8
-; CGP-NEXT:    v_cndmask_b32_e32 v2, v4, v1, vcc
-; CGP-NEXT:    v_cndmask_b32_e64 v1, v18, v14, s[4:5]
-; CGP-NEXT:    v_cmp_ne_u32_e64 s[4:5], 0, v9
-; CGP-NEXT:    v_cndmask_b32_e64 v0, v5, v0, s[4:5]
-; CGP-NEXT:    v_cndmask_b32_e64 v1, v7, v1, s[4:5]
-; CGP-NEXT:    v_cndmask_b32_e32 v3, v6, v3, vcc
-; CGP-NEXT:    s_setpc_b64 s[30:31]
+; CHECK-LABEL: v_udiv_v2i64_pow2k_denom:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT:    s_mov_b32 s4, 0x100000
+; CHECK-NEXT:    v_lshlrev_b32_e32 v4, 20, v0
+; CHECK-NEXT:    v_mul_hi_u32 v5, v0, 0
+; CHECK-NEXT:    v_lshlrev_b32_e32 v6, 20, v1
+; CHECK-NEXT:    v_mul_hi_u32 v7, v1, 0
+; CHECK-NEXT:    v_lshlrev_b32_e32 v8, 20, v2
+; CHECK-NEXT:    v_mul_hi_u32 v9, v2, 0
+; CHECK-NEXT:    v_lshlrev_b32_e32 v10, 20, v3
+; CHECK-NEXT:    v_mul_hi_u32 v11, v3, 0
+; CHECK-NEXT:    v_add_i32_e32 v4, vcc, 0, v4
+; CHECK-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
+; CHECK-NEXT:    v_mul_hi_u32 v0, v0, s4
+; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v6, v7
+; CHECK-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
+; CHECK-NEXT:    v_mul_hi_u32 v1, v1, s4
+; CHECK-NEXT:    v_add_i32_e32 v8, vcc, 0, v8
+; CHECK-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
+; CHECK-NEXT:    v_mul_hi_u32 v2, v2, s4
+; CHECK-NEXT:    v_add_i32_e32 v10, vcc, v10, v11
+; CHECK-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
+; CHECK-NEXT:    v_mul_hi_u32 v3, v3, s4
+; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v4, v5
+; CHECK-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v6, v0
+; CHECK-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v8, v9
+; CHECK-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v10, v2
+; CHECK-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v12, v4
+; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v7, v5
+; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v13, v6
+; CHECK-NEXT:    v_add_i32_e32 v7, vcc, v11, v8
+; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v4
+; CHECK-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v6
+; CHECK-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v5, v4
+; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v7, v6
+; CHECK-NEXT:    v_add_i32_e32 v1, vcc, v1, v4
+; CHECK-NEXT:    v_add_i32_e32 v3, vcc, v3, v5
+; CHECK-NEXT:    s_setpc_b64 s[30:31]
   %result = udiv <2 x i64> %num, <i64 4096, i64 4096>
   ret <2 x i64> %result
 }
@@ -1630,659 +1049,86 @@ define i64 @v_udiv_i64_oddk_denom(i64 %num) {
 ; CHECK-LABEL: v_udiv_i64_oddk_denom:
 ; CHECK:       ; %bb.0:
 ; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CHECK-NEXT:    v_cvt_f32_u32_e32 v2, 0x12d8fb
-; CHECK-NEXT:    v_cvt_f32_ubyte0_e32 v3, 0
-; CHECK-NEXT:    s_mov_b32 s6, 0xffed2705
-; CHECK-NEXT:    s_mov_b32 s7, 0x12d8fb
-; CHECK-NEXT:    s_bfe_i32 s4, -1, 0x10000
-; CHECK-NEXT:    s_bfe_i32 s5, -1, 0x10000
-; CHECK-NEXT:    v_mac_f32_e32 v2, 0x4f800000, v3
-; CHECK-NEXT:    v_mov_b32_e32 v3, s4
-; CHECK-NEXT:    v_mov_b32_e32 v4, s5
-; CHECK-NEXT:    v_rcp_iflag_f32_e32 v2, v2
-; CHECK-NEXT:    v_mul_f32_e32 v2, 0x5f7ffffc, v2
-; CHECK-NEXT:    v_mul_f32_e32 v5, 0x2f800000, v2
-; CHECK-NEXT:    v_trunc_f32_e32 v5, v5
-; CHECK-NEXT:    v_mac_f32_e32 v2, 0xcf800000, v5
-; CHECK-NEXT:    v_cvt_u32_f32_e32 v5, v5
-; CHECK-NEXT:    v_cvt_u32_f32_e32 v2, v2
-; CHECK-NEXT:    v_mul_lo_u32 v6, s6, v5
-; CHECK-NEXT:    v_mul_lo_u32 v7, s6, v2
-; CHECK-NEXT:    v_mul_lo_u32 v8, -1, v2
-; CHECK-NEXT:    v_mul_hi_u32 v9, s6, v2
-; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v8, v6
-; CHECK-NEXT:    v_mul_lo_u32 v8, v5, v7
-; CHECK-NEXT:    v_mul_hi_u32 v10, v2, v7
-; CHECK-NEXT:    v_mul_hi_u32 v7, v5, v7
-; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v6, v9
-; CHECK-NEXT:    v_mul_lo_u32 v9, v2, v6
-; CHECK-NEXT:    v_mul_lo_u32 v11, v5, v6
-; CHECK-NEXT:    v_mul_hi_u32 v12, v2, v6
-; CHECK-NEXT:    v_mul_hi_u32 v6, v5, v6
-; CHECK-NEXT:    v_add_i32_e32 v8, vcc, v8, v9
-; CHECK-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v7, vcc, v11, v7
-; CHECK-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v8, vcc, v8, v10
-; CHECK-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v7, vcc, v7, v12
-; CHECK-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v8, vcc, v9, v8
-; CHECK-NEXT:    v_add_i32_e32 v9, vcc, v11, v10
-; CHECK-NEXT:    v_add_i32_e32 v7, vcc, v7, v8
-; CHECK-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v8, vcc, v9, v8
-; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v6, v8
-; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v7
-; CHECK-NEXT:    v_addc_u32_e64 v7, s[4:5], v5, v6, vcc
-; CHECK-NEXT:    v_add_i32_e64 v5, s[4:5], v5, v6
-; CHECK-NEXT:    v_mul_lo_u32 v6, s6, v2
-; CHECK-NEXT:    v_mul_lo_u32 v8, -1, v2
-; CHECK-NEXT:    v_mul_hi_u32 v9, s6, v2
-; CHECK-NEXT:    v_mul_lo_u32 v10, s6, v7
-; CHECK-NEXT:    v_mul_lo_u32 v11, v7, v6
-; CHECK-NEXT:    v_mul_hi_u32 v12, v2, v6
-; CHECK-NEXT:    v_mul_hi_u32 v6, v7, v6
-; CHECK-NEXT:    v_add_i32_e64 v8, s[4:5], v8, v10
-; CHECK-NEXT:    v_add_i32_e64 v8, s[4:5], v8, v9
-; CHECK-NEXT:    v_mul_lo_u32 v9, v2, v8
-; CHECK-NEXT:    v_mul_lo_u32 v10, v7, v8
-; CHECK-NEXT:    v_mul_hi_u32 v13, v2, v8
-; CHECK-NEXT:    v_mul_hi_u32 v7, v7, v8
-; CHECK-NEXT:    v_add_i32_e64 v8, s[4:5], v11, v9
-; CHECK-NEXT:    v_cndmask_b32_e64 v9, 0, 1, s[4:5]
-; CHECK-NEXT:    v_add_i32_e64 v6, s[4:5], v10, v6
-; CHECK-NEXT:    v_cndmask_b32_e64 v10, 0, 1, s[4:5]
-; CHECK-NEXT:    v_add_i32_e64 v8, s[4:5], v8, v12
-; CHECK-NEXT:    v_cndmask_b32_e64 v8, 0, 1, s[4:5]
-; CHECK-NEXT:    v_add_i32_e64 v6, s[4:5], v6, v13
-; CHECK-NEXT:    v_cndmask_b32_e64 v11, 0, 1, s[4:5]
-; CHECK-NEXT:    v_add_i32_e64 v8, s[4:5], v9, v8
-; CHECK-NEXT:    v_add_i32_e64 v9, s[4:5], v10, v11
-; CHECK-NEXT:    v_add_i32_e64 v6, s[4:5], v6, v8
-; CHECK-NEXT:    v_cndmask_b32_e64 v8, 0, 1, s[4:5]
-; CHECK-NEXT:    v_add_i32_e64 v8, s[4:5], v9, v8
-; CHECK-NEXT:    v_add_i32_e64 v7, s[4:5], v7, v8
-; CHECK-NEXT:    v_addc_u32_e32 v5, vcc, v5, v7, vcc
-; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v6
-; CHECK-NEXT:    v_addc_u32_e32 v5, vcc, 0, v5, vcc
-; CHECK-NEXT:    v_mul_lo_u32 v6, v1, v2
-; CHECK-NEXT:    v_mul_hi_u32 v7, v0, v2
-; CHECK-NEXT:    v_mul_hi_u32 v2, v1, v2
-; CHECK-NEXT:    v_mul_lo_u32 v8, v0, v5
-; CHECK-NEXT:    v_mul_lo_u32 v9, v1, v5
-; CHECK-NEXT:    v_mul_hi_u32 v10, v0, v5
-; CHECK-NEXT:    v_mul_hi_u32 v5, v1, v5
-; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v6, v8
-; CHECK-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v9, v2
-; CHECK-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v6, v7
-; CHECK-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v10
-; CHECK-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v8, v6
-; CHECK-NEXT:    v_add_i32_e32 v7, vcc, v9, v7
-; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v6
-; CHECK-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
-; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v7, v6
-; CHECK-NEXT:    v_mul_lo_u32 v7, s7, v2
-; CHECK-NEXT:    v_mul_lo_u32 v8, 0, v2
-; CHECK-NEXT:    v_mul_hi_u32 v9, s7, v2
+; CHECK-NEXT:    s_mov_b32 s4, 0x1fb03c31
+; CHECK-NEXT:    s_mov_b32 s5, 0xd9528440
+; CHECK-NEXT:    v_mul_lo_u32 v2, v1, s4
+; CHECK-NEXT:    v_mul_lo_u32 v3, v0, s5
+; CHECK-NEXT:    v_mul_hi_u32 v4, v0, s4
+; CHECK-NEXT:    v_mul_lo_u32 v5, v1, s5
+; CHECK-NEXT:    v_mul_hi_u32 v6, v1, s4
+; CHECK-NEXT:    v_mul_hi_u32 v0, v0, s5
+; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v3
+; CHECK-NEXT:    v_cndmask_b32_e64 v3, 0, 1, vcc
 ; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v5, v6
-; CHECK-NEXT:    v_mul_lo_u32 v6, s7, v5
-; CHECK-NEXT:    v_add_i32_e32 v10, vcc, 1, v2
-; CHECK-NEXT:    v_addc_u32_e32 v11, vcc, 0, v5, vcc
-; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v8, v6
-; CHECK-NEXT:    v_add_i32_e32 v8, vcc, 1, v10
-; CHECK-NEXT:    v_addc_u32_e32 v12, vcc, 0, v11, vcc
-; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v6, v9
-; CHECK-NEXT:    v_sub_i32_e32 v0, vcc, v0, v7
-; CHECK-NEXT:    v_subb_u32_e64 v7, s[4:5], v1, v6, vcc
-; CHECK-NEXT:    v_sub_i32_e64 v1, s[4:5], v1, v6
-; CHECK-NEXT:    v_cmp_le_u32_e64 s[4:5], s7, v0
-; CHECK-NEXT:    v_cndmask_b32_e64 v6, 0, -1, s[4:5]
-; CHECK-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v7
-; CHECK-NEXT:    v_cndmask_b32_e64 v3, v3, v6, s[4:5]
-; CHECK-NEXT:    v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
-; CHECK-NEXT:    v_subrev_i32_e32 v0, vcc, s7, v0
-; CHECK-NEXT:    v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
-; CHECK-NEXT:    v_cmp_le_u32_e32 vcc, s7, v0
-; CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, -1, vcc
-; CHECK-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v1
-; CHECK-NEXT:    v_cndmask_b32_e32 v0, v4, v0, vcc
-; CHECK-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
-; CHECK-NEXT:    v_cndmask_b32_e32 v0, v10, v8, vcc
-; CHECK-NEXT:    v_cndmask_b32_e32 v1, v11, v12, vcc
-; CHECK-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v3
-; CHECK-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
-; CHECK-NEXT:    v_cndmask_b32_e32 v1, v5, v1, vcc
+; CHECK-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v4
+; CHECK-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v5, v0
+; CHECK-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v3, v2
+; CHECK-NEXT:    v_add_i32_e32 v3, vcc, v6, v4
+; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
+; CHECK-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v3, v2
+; CHECK-NEXT:    v_mul_hi_u32 v1, v1, s5
+; CHECK-NEXT:    v_add_i32_e32 v1, vcc, v1, v2
+; CHECK-NEXT:    v_lshr_b64 v[0:1], v[0:1], 20
 ; CHECK-NEXT:    s_setpc_b64 s[30:31]
   %result = udiv i64 %num, 1235195
   ret i64 %result
 }
 
 define <2 x i64> @v_udiv_v2i64_oddk_denom(<2 x i64> %num) {
-; GISEL-LABEL: v_udiv_v2i64_oddk_denom:
-; GISEL:       ; %bb.0:
-; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GISEL-NEXT:    s_mov_b32 s12, 0x12d8fb
-; GISEL-NEXT:    v_cvt_f32_u32_e32 v4, s12
-; GISEL-NEXT:    s_sub_u32 s8, 0, s12
-; GISEL-NEXT:    s_cselect_b32 s4, 1, 0
-; GISEL-NEXT:    v_cvt_f32_ubyte0_e32 v5, 0
-; GISEL-NEXT:    v_mov_b32_e32 v6, v4
-; GISEL-NEXT:    s_and_b32 s4, s4, 1
-; GISEL-NEXT:    v_mac_f32_e32 v4, 0x4f800000, v5
-; GISEL-NEXT:    v_mac_f32_e32 v6, 0x4f800000, v5
-; GISEL-NEXT:    v_rcp_iflag_f32_e32 v4, v4
-; GISEL-NEXT:    v_rcp_iflag_f32_e32 v5, v6
-; GISEL-NEXT:    s_cmp_lg_u32 s4, 0
-; GISEL-NEXT:    s_subb_u32 s9, 0, 0
-; GISEL-NEXT:    s_bfe_i32 s10, -1, 0x10000
-; GISEL-NEXT:    s_bfe_i32 s11, -1, 0x10000
-; GISEL-NEXT:    v_mul_f32_e32 v4, 0x5f7ffffc, v4
-; GISEL-NEXT:    v_mul_f32_e32 v5, 0x5f7ffffc, v5
-; GISEL-NEXT:    v_mul_f32_e32 v6, 0x2f800000, v4
-; GISEL-NEXT:    s_sub_u32 s13, 0, s12
-; GISEL-NEXT:    s_cselect_b32 s4, 1, 0
-; GISEL-NEXT:    v_mul_f32_e32 v7, 0x2f800000, v5
-; GISEL-NEXT:    v_trunc_f32_e32 v6, v6
-; GISEL-NEXT:    s_and_b32 s4, s4, 1
-; GISEL-NEXT:    v_trunc_f32_e32 v7, v7
-; GISEL-NEXT:    v_mac_f32_e32 v4, 0xcf800000, v6
-; GISEL-NEXT:    v_cvt_u32_f32_e32 v6, v6
-; GISEL-NEXT:    v_mac_f32_e32 v5, 0xcf800000, v7
-; GISEL-NEXT:    v_cvt_u32_f32_e32 v7, v7
-; GISEL-NEXT:    v_cvt_u32_f32_e32 v4, v4
-; GISEL-NEXT:    s_cmp_lg_u32 s4, 0
-; GISEL-NEXT:    s_subb_u32 s6, 0, 0
-; GISEL-NEXT:    v_mul_lo_u32 v8, s13, v6
-; GISEL-NEXT:    v_cvt_u32_f32_e32 v5, v5
-; GISEL-NEXT:    v_mul_lo_u32 v9, s8, v7
-; GISEL-NEXT:    v_mul_lo_u32 v10, s13, v4
-; GISEL-NEXT:    v_mul_lo_u32 v11, s6, v4
-; GISEL-NEXT:    v_mul_hi_u32 v12, s13, v4
-; GISEL-NEXT:    v_mul_lo_u32 v13, s8, v5
-; GISEL-NEXT:    v_mul_lo_u32 v14, s9, v5
-; GISEL-NEXT:    v_mul_hi_u32 v15, s8, v5
-; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v11, v8
-; GISEL-NEXT:    v_mul_lo_u32 v11, v6, v10
-; GISEL-NEXT:    v_mul_hi_u32 v16, v4, v10
-; GISEL-NEXT:    v_mul_hi_u32 v10, v6, v10
-; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v14, v9
-; GISEL-NEXT:    v_mul_lo_u32 v14, v7, v13
-; GISEL-NEXT:    v_mul_hi_u32 v17, v5, v13
-; GISEL-NEXT:    v_mul_hi_u32 v13, v7, v13
-; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v12
-; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v9, v15
-; GISEL-NEXT:    v_mul_lo_u32 v12, v4, v8
-; GISEL-NEXT:    v_mul_lo_u32 v15, v6, v8
-; GISEL-NEXT:    v_mul_hi_u32 v18, v4, v8
-; GISEL-NEXT:    v_mul_hi_u32 v8, v6, v8
-; GISEL-NEXT:    v_mul_lo_u32 v19, v5, v9
-; GISEL-NEXT:    v_add_i32_e32 v14, vcc, v14, v19
-; GISEL-NEXT:    v_cndmask_b32_e64 v19, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v14, vcc, v14, v17
-; GISEL-NEXT:    v_mul_lo_u32 v14, v7, v9
-; GISEL-NEXT:    v_mul_hi_u32 v17, v5, v9
-; GISEL-NEXT:    v_mul_hi_u32 v9, v7, v9
-; GISEL-NEXT:    v_add_i32_e64 v11, s[4:5], v11, v12
-; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, s[4:5]
-; GISEL-NEXT:    v_add_i32_e64 v10, s[4:5], v15, v10
-; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, 1, s[4:5]
-; GISEL-NEXT:    v_add_i32_e64 v13, s[4:5], v14, v13
-; GISEL-NEXT:    v_cndmask_b32_e64 v14, 0, 1, s[4:5]
-; GISEL-NEXT:    v_add_i32_e64 v11, s[4:5], v11, v16
-; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, s[4:5]
-; GISEL-NEXT:    v_add_i32_e64 v10, s[4:5], v10, v18
-; GISEL-NEXT:    v_cndmask_b32_e64 v16, 0, 1, s[4:5]
-; GISEL-NEXT:    v_cndmask_b32_e64 v18, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v13, v17
-; GISEL-NEXT:    v_cndmask_b32_e64 v17, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v12, v11
-; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v15, v16
-; GISEL-NEXT:    v_add_i32_e32 v15, vcc, v19, v18
-; GISEL-NEXT:    v_add_i32_e32 v14, vcc, v14, v17
-; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v10, v11
-; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v13, v15
-; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v12, v11
-; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v14, v15
-; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v11
-; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v9, v12
-; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v10
-; GISEL-NEXT:    v_addc_u32_e64 v10, s[4:5], v6, v8, vcc
-; GISEL-NEXT:    v_mul_lo_u32 v11, s13, v4
-; GISEL-NEXT:    v_mul_lo_u32 v12, s6, v4
-; GISEL-NEXT:    v_mul_hi_u32 v14, s13, v4
-; GISEL-NEXT:    v_add_i32_e64 v5, s[4:5], v5, v13
-; GISEL-NEXT:    v_addc_u32_e64 v13, s[6:7], v7, v9, s[4:5]
-; GISEL-NEXT:    v_mul_lo_u32 v15, s8, v5
-; GISEL-NEXT:    v_mul_lo_u32 v16, s9, v5
-; GISEL-NEXT:    v_mul_hi_u32 v17, s8, v5
-; GISEL-NEXT:    v_mul_lo_u32 v18, s8, v13
-; GISEL-NEXT:    v_mul_lo_u32 v19, v13, v15
-; GISEL-NEXT:    v_add_i32_e64 v16, s[6:7], v16, v18
-; GISEL-NEXT:    v_mul_hi_u32 v18, v5, v15
-; GISEL-NEXT:    v_add_i32_e64 v16, s[6:7], v16, v17
-; GISEL-NEXT:    v_mul_lo_u32 v17, v5, v16
-; GISEL-NEXT:    v_add_i32_e64 v17, s[6:7], v19, v17
-; GISEL-NEXT:    v_cndmask_b32_e64 v19, 0, 1, s[6:7]
-; GISEL-NEXT:    v_add_i32_e64 v17, s[6:7], v17, v18
-; GISEL-NEXT:    v_mul_lo_u32 v17, s13, v10
-; GISEL-NEXT:    v_mul_lo_u32 v18, v10, v11
-; GISEL-NEXT:    v_add_i32_e64 v12, s[8:9], v12, v17
-; GISEL-NEXT:    v_mul_hi_u32 v17, v4, v11
-; GISEL-NEXT:    v_add_i32_e64 v12, s[8:9], v12, v14
-; GISEL-NEXT:    v_mul_lo_u32 v14, v4, v12
-; GISEL-NEXT:    v_add_i32_e64 v14, s[8:9], v18, v14
-; GISEL-NEXT:    v_cndmask_b32_e64 v18, 0, 1, s[8:9]
-; GISEL-NEXT:    v_add_i32_e64 v14, s[8:9], v14, v17
-; GISEL-NEXT:    v_mov_b32_e32 v14, s10
-; GISEL-NEXT:    v_mov_b32_e32 v17, s11
-; GISEL-NEXT:    s_bfe_i32 s13, -1, 0x10000
-; GISEL-NEXT:    s_bfe_i32 s14, -1, 0x10000
-; GISEL-NEXT:    v_add_i32_e64 v6, s[10:11], v6, v8
-; GISEL-NEXT:    v_mov_b32_e32 v8, s13
-; GISEL-NEXT:    v_add_i32_e64 v7, s[10:11], v7, v9
-; GISEL-NEXT:    v_mul_hi_u32 v9, v10, v11
-; GISEL-NEXT:    v_mul_hi_u32 v11, v13, v15
-; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, 1, s[8:9]
-; GISEL-NEXT:    v_add_i32_e64 v15, s[8:9], v18, v15
-; GISEL-NEXT:    v_mul_lo_u32 v18, v10, v12
-; GISEL-NEXT:    v_mul_hi_u32 v10, v10, v12
-; GISEL-NEXT:    v_mul_hi_u32 v12, v4, v12
-; GISEL-NEXT:    v_add_i32_e64 v9, s[8:9], v18, v9
-; GISEL-NEXT:    v_cndmask_b32_e64 v18, 0, 1, s[8:9]
-; GISEL-NEXT:    v_add_i32_e64 v9, s[8:9], v9, v12
-; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, s[8:9]
-; GISEL-NEXT:    v_add_i32_e64 v12, s[8:9], v18, v12
-; GISEL-NEXT:    v_cndmask_b32_e64 v18, 0, 1, s[6:7]
-; GISEL-NEXT:    v_add_i32_e64 v18, s[6:7], v19, v18
-; GISEL-NEXT:    v_mul_lo_u32 v19, v13, v16
-; GISEL-NEXT:    v_mul_hi_u32 v13, v13, v16
-; GISEL-NEXT:    v_mul_hi_u32 v16, v5, v16
-; GISEL-NEXT:    v_add_i32_e64 v11, s[6:7], v19, v11
-; GISEL-NEXT:    v_cndmask_b32_e64 v19, 0, 1, s[6:7]
-; GISEL-NEXT:    v_add_i32_e64 v11, s[6:7], v11, v16
-; GISEL-NEXT:    v_cndmask_b32_e64 v16, 0, 1, s[6:7]
-; GISEL-NEXT:    v_add_i32_e64 v16, s[6:7], v19, v16
-; GISEL-NEXT:    v_mov_b32_e32 v19, s14
-; GISEL-NEXT:    v_add_i32_e64 v9, s[6:7], v9, v15
-; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, 1, s[6:7]
-; GISEL-NEXT:    v_add_i32_e64 v11, s[6:7], v11, v18
-; GISEL-NEXT:    v_cndmask_b32_e64 v18, 0, 1, s[6:7]
-; GISEL-NEXT:    v_add_i32_e64 v12, s[6:7], v12, v15
-; GISEL-NEXT:    v_add_i32_e64 v15, s[6:7], v16, v18
-; GISEL-NEXT:    v_add_i32_e64 v10, s[6:7], v10, v12
-; GISEL-NEXT:    v_add_i32_e64 v12, s[6:7], v13, v15
-; GISEL-NEXT:    v_addc_u32_e32 v6, vcc, v6, v10, vcc
-; GISEL-NEXT:    v_addc_u32_e64 v7, vcc, v7, v12, s[4:5]
-; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v9
-; GISEL-NEXT:    v_addc_u32_e32 v6, vcc, 0, v6, vcc
-; GISEL-NEXT:    v_mul_lo_u32 v9, v3, v4
-; GISEL-NEXT:    v_mul_hi_u32 v10, v2, v4
-; GISEL-NEXT:    v_mul_hi_u32 v4, v3, v4
-; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v11
-; GISEL-NEXT:    v_addc_u32_e32 v7, vcc, 0, v7, vcc
-; GISEL-NEXT:    v_mul_lo_u32 v11, v1, v5
-; GISEL-NEXT:    v_mul_hi_u32 v12, v0, v5
-; GISEL-NEXT:    v_mul_hi_u32 v5, v1, v5
-; GISEL-NEXT:    v_mul_lo_u32 v13, v2, v6
-; GISEL-NEXT:    v_mul_lo_u32 v15, v3, v6
-; GISEL-NEXT:    v_mul_hi_u32 v16, v2, v6
-; GISEL-NEXT:    v_mul_hi_u32 v6, v3, v6
-; GISEL-NEXT:    v_mul_lo_u32 v18, v0, v7
-; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v11, v18
-; GISEL-NEXT:    v_cndmask_b32_e64 v18, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v11, v12
-; GISEL-NEXT:    v_mul_lo_u32 v11, v1, v7
-; GISEL-NEXT:    v_mul_hi_u32 v12, v0, v7
-; GISEL-NEXT:    v_mul_hi_u32 v7, v1, v7
-; GISEL-NEXT:    v_add_i32_e64 v9, s[4:5], v9, v13
-; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, 1, s[4:5]
-; GISEL-NEXT:    v_add_i32_e64 v4, s[4:5], v15, v4
-; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, 1, s[4:5]
-; GISEL-NEXT:    v_add_i32_e64 v5, s[4:5], v11, v5
-; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, s[4:5]
-; GISEL-NEXT:    v_add_i32_e64 v9, s[4:5], v9, v10
-; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, 1, s[4:5]
-; GISEL-NEXT:    v_add_i32_e64 v4, s[4:5], v4, v16
-; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, 1, s[4:5]
-; GISEL-NEXT:    v_cndmask_b32_e64 v16, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v12
-; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v13, v9
-; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v15, v10
-; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v18, v16
-; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v11, v12
-; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v9
-; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v13
-; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v10, v9
-; GISEL-NEXT:    v_mul_lo_u32 v10, s12, v4
-; GISEL-NEXT:    v_mul_lo_u32 v13, 0, v4
-; GISEL-NEXT:    v_mul_hi_u32 v15, s12, v4
-; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v11, v12
-; GISEL-NEXT:    v_mul_lo_u32 v12, s12, v5
-; GISEL-NEXT:    v_mul_lo_u32 v16, 0, v5
-; GISEL-NEXT:    v_mul_hi_u32 v18, s12, v5
-; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v6, v9
-; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v7, v11
-; GISEL-NEXT:    v_mul_lo_u32 v9, s12, v6
-; GISEL-NEXT:    v_mul_lo_u32 v11, s12, v7
-; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v13, v9
-; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v16, v11
-; GISEL-NEXT:    v_add_i32_e32 v13, vcc, 1, v4
-; GISEL-NEXT:    v_addc_u32_e32 v16, vcc, 0, v6, vcc
-; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v9, v15
-; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v11, v18
-; GISEL-NEXT:    v_add_i32_e32 v15, vcc, 1, v5
-; GISEL-NEXT:    v_addc_u32_e32 v18, vcc, 0, v7, vcc
-; GISEL-NEXT:    v_sub_i32_e32 v2, vcc, v2, v10
-; GISEL-NEXT:    v_subb_u32_e64 v10, s[4:5], v3, v9, vcc
-; GISEL-NEXT:    v_sub_i32_e64 v3, s[4:5], v3, v9
-; GISEL-NEXT:    v_cmp_le_u32_e64 s[4:5], s12, v2
-; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, -1, s[4:5]
-; GISEL-NEXT:    v_sub_i32_e64 v0, s[4:5], v0, v12
-; GISEL-NEXT:    v_subb_u32_e64 v12, s[6:7], v1, v11, s[4:5]
-; GISEL-NEXT:    v_sub_i32_e64 v1, s[6:7], v1, v11
-; GISEL-NEXT:    v_cmp_eq_u32_e64 s[6:7], 0, v10
-; GISEL-NEXT:    v_add_i32_e64 v10, s[8:9], 1, v13
-; GISEL-NEXT:    v_addc_u32_e64 v11, s[8:9], 0, v16, s[8:9]
-; GISEL-NEXT:    v_cndmask_b32_e64 v8, v8, v9, s[6:7]
-; GISEL-NEXT:    v_cmp_le_u32_e64 s[6:7], s12, v0
-; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, -1, s[6:7]
-; GISEL-NEXT:    v_cmp_eq_u32_e64 s[6:7], 0, v12
-; GISEL-NEXT:    v_cndmask_b32_e64 v9, v14, v9, s[6:7]
-; GISEL-NEXT:    v_add_i32_e64 v12, s[6:7], 1, v15
-; GISEL-NEXT:    v_addc_u32_e64 v14, s[6:7], 0, v18, s[6:7]
-; GISEL-NEXT:    v_subbrev_u32_e32 v3, vcc, 0, v3, vcc
-; GISEL-NEXT:    v_subbrev_u32_e64 v1, vcc, 0, v1, s[4:5]
-; GISEL-NEXT:    v_subrev_i32_e32 v2, vcc, s12, v2
-; GISEL-NEXT:    v_subbrev_u32_e32 v3, vcc, 0, v3, vcc
-; GISEL-NEXT:    v_cmp_le_u32_e32 vcc, s12, v2
-; GISEL-NEXT:    v_cndmask_b32_e64 v2, 0, -1, vcc
-; GISEL-NEXT:    v_subrev_i32_e32 v0, vcc, s12, v0
-; GISEL-NEXT:    v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
-; GISEL-NEXT:    v_cmp_le_u32_e32 vcc, s12, v0
-; GISEL-NEXT:    v_cndmask_b32_e64 v0, 0, -1, vcc
-; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v3
-; GISEL-NEXT:    v_cndmask_b32_e32 v2, v19, v2, vcc
-; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v1
-; GISEL-NEXT:    v_cndmask_b32_e32 v0, v17, v0, vcc
-; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v2
-; GISEL-NEXT:    v_cndmask_b32_e32 v1, v13, v10, vcc
-; GISEL-NEXT:    v_cmp_ne_u32_e64 s[4:5], 0, v0
-; GISEL-NEXT:    v_cndmask_b32_e64 v0, v15, v12, s[4:5]
-; GISEL-NEXT:    v_cndmask_b32_e32 v3, v16, v11, vcc
-; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v8
-; GISEL-NEXT:    v_cndmask_b32_e32 v2, v4, v1, vcc
-; GISEL-NEXT:    v_cndmask_b32_e64 v1, v18, v14, s[4:5]
-; GISEL-NEXT:    v_cmp_ne_u32_e64 s[4:5], 0, v9
-; GISEL-NEXT:    v_cndmask_b32_e64 v0, v5, v0, s[4:5]
-; GISEL-NEXT:    v_cndmask_b32_e64 v1, v7, v1, s[4:5]
-; GISEL-NEXT:    v_cndmask_b32_e32 v3, v6, v3, vcc
-; GISEL-NEXT:    s_setpc_b64 s[30:31]
-;
-; CGP-LABEL: v_udiv_v2i64_oddk_denom:
-; CGP:       ; %bb.0:
-; CGP-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CGP-NEXT:    v_cvt_f32_u32_e32 v4, 0x12d8fb
-; CGP-NEXT:    v_cvt_f32_ubyte0_e32 v5, 0
-; CGP-NEXT:    s_mov_b32 s8, 0xffed2705
-; CGP-NEXT:    s_mov_b32 s12, 0x12d8fb
-; CGP-NEXT:    s_bfe_i32 s10, -1, 0x10000
-; CGP-NEXT:    s_bfe_i32 s11, -1, 0x10000
-; CGP-NEXT:    s_bfe_i32 s13, -1, 0x10000
-; CGP-NEXT:    s_bfe_i32 s14, -1, 0x10000
-; CGP-NEXT:    v_mov_b32_e32 v6, v4
-; CGP-NEXT:    v_mac_f32_e32 v4, 0x4f800000, v5
-; CGP-NEXT:    v_mac_f32_e32 v6, 0x4f800000, v5
-; CGP-NEXT:    v_rcp_iflag_f32_e32 v4, v4
-; CGP-NEXT:    v_rcp_iflag_f32_e32 v5, v6
-; CGP-NEXT:    v_mul_f32_e32 v4, 0x5f7ffffc, v4
-; CGP-NEXT:    v_mul_f32_e32 v5, 0x5f7ffffc, v5
-; CGP-NEXT:    v_mul_f32_e32 v6, 0x2f800000, v4
-; CGP-NEXT:    v_mul_f32_e32 v7, 0x2f800000, v5
-; CGP-NEXT:    v_trunc_f32_e32 v6, v6
-; CGP-NEXT:    v_trunc_f32_e32 v7, v7
-; CGP-NEXT:    v_mac_f32_e32 v4, 0xcf800000, v6
-; CGP-NEXT:    v_cvt_u32_f32_e32 v6, v6
-; CGP-NEXT:    v_mac_f32_e32 v5, 0xcf800000, v7
-; CGP-NEXT:    v_cvt_u32_f32_e32 v7, v7
-; CGP-NEXT:    v_cvt_u32_f32_e32 v4, v4
-; CGP-NEXT:    v_mul_lo_u32 v8, s8, v6
-; CGP-NEXT:    v_cvt_u32_f32_e32 v5, v5
-; CGP-NEXT:    v_mul_lo_u32 v9, s8, v7
-; CGP-NEXT:    v_mul_lo_u32 v10, s8, v4
-; CGP-NEXT:    v_mul_lo_u32 v11, -1, v4
-; CGP-NEXT:    v_mul_hi_u32 v12, s8, v4
-; CGP-NEXT:    v_mul_lo_u32 v13, s8, v5
-; CGP-NEXT:    v_mul_lo_u32 v14, -1, v5
-; CGP-NEXT:    v_mul_hi_u32 v15, s8, v5
-; CGP-NEXT:    v_add_i32_e32 v8, vcc, v11, v8
-; CGP-NEXT:    v_mul_lo_u32 v11, v6, v10
-; CGP-NEXT:    v_mul_hi_u32 v16, v4, v10
-; CGP-NEXT:    v_mul_hi_u32 v10, v6, v10
-; CGP-NEXT:    v_add_i32_e32 v9, vcc, v14, v9
-; CGP-NEXT:    v_mul_lo_u32 v14, v7, v13
-; CGP-NEXT:    v_mul_hi_u32 v17, v5, v13
-; CGP-NEXT:    v_mul_hi_u32 v13, v7, v13
-; CGP-NEXT:    v_add_i32_e32 v8, vcc, v8, v12
-; CGP-NEXT:    v_add_i32_e32 v9, vcc, v9, v15
-; CGP-NEXT:    v_mul_lo_u32 v12, v4, v8
-; CGP-NEXT:    v_mul_lo_u32 v15, v6, v8
-; CGP-NEXT:    v_mul_hi_u32 v18, v4, v8
-; CGP-NEXT:    v_mul_hi_u32 v8, v6, v8
-; CGP-NEXT:    v_mul_lo_u32 v19, v5, v9
-; CGP-NEXT:    v_add_i32_e32 v14, vcc, v14, v19
-; CGP-NEXT:    v_cndmask_b32_e64 v19, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v14, vcc, v14, v17
-; CGP-NEXT:    v_mul_lo_u32 v14, v7, v9
-; CGP-NEXT:    v_mul_hi_u32 v17, v5, v9
-; CGP-NEXT:    v_mul_hi_u32 v9, v7, v9
-; CGP-NEXT:    v_add_i32_e64 v11, s[4:5], v11, v12
-; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, 1, s[4:5]
-; CGP-NEXT:    v_add_i32_e64 v10, s[4:5], v15, v10
-; CGP-NEXT:    v_cndmask_b32_e64 v15, 0, 1, s[4:5]
-; CGP-NEXT:    v_add_i32_e64 v13, s[4:5], v14, v13
-; CGP-NEXT:    v_cndmask_b32_e64 v14, 0, 1, s[4:5]
-; CGP-NEXT:    v_add_i32_e64 v11, s[4:5], v11, v16
-; CGP-NEXT:    v_cndmask_b32_e64 v11, 0, 1, s[4:5]
-; CGP-NEXT:    v_add_i32_e64 v10, s[4:5], v10, v18
-; CGP-NEXT:    v_cndmask_b32_e64 v16, 0, 1, s[4:5]
-; CGP-NEXT:    v_cndmask_b32_e64 v18, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v13, vcc, v13, v17
-; CGP-NEXT:    v_cndmask_b32_e64 v17, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v11, vcc, v12, v11
-; CGP-NEXT:    v_add_i32_e32 v12, vcc, v15, v16
-; CGP-NEXT:    v_add_i32_e32 v15, vcc, v19, v18
-; CGP-NEXT:    v_add_i32_e32 v14, vcc, v14, v17
-; CGP-NEXT:    v_add_i32_e32 v10, vcc, v10, v11
-; CGP-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v13, vcc, v13, v15
-; CGP-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v11, vcc, v12, v11
-; CGP-NEXT:    v_add_i32_e32 v12, vcc, v14, v15
-; CGP-NEXT:    v_add_i32_e32 v8, vcc, v8, v11
-; CGP-NEXT:    v_add_i32_e32 v9, vcc, v9, v12
-; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v10
-; CGP-NEXT:    v_addc_u32_e64 v10, s[4:5], v6, v8, vcc
-; CGP-NEXT:    v_mul_lo_u32 v11, s8, v4
-; CGP-NEXT:    v_mul_lo_u32 v12, -1, v4
-; CGP-NEXT:    v_mul_hi_u32 v14, s8, v4
-; CGP-NEXT:    v_add_i32_e64 v5, s[4:5], v5, v13
-; CGP-NEXT:    v_addc_u32_e64 v13, s[6:7], v7, v9, s[4:5]
-; CGP-NEXT:    v_mul_lo_u32 v15, s8, v5
-; CGP-NEXT:    v_mul_lo_u32 v16, -1, v5
-; CGP-NEXT:    v_mul_hi_u32 v17, s8, v5
-; CGP-NEXT:    v_mul_lo_u32 v18, s8, v13
-; CGP-NEXT:    v_mul_lo_u32 v19, v13, v15
-; CGP-NEXT:    v_add_i32_e64 v16, s[6:7], v16, v18
-; CGP-NEXT:    v_mul_hi_u32 v18, v5, v15
-; CGP-NEXT:    v_add_i32_e64 v16, s[6:7], v16, v17
-; CGP-NEXT:    v_mul_lo_u32 v17, v5, v16
-; CGP-NEXT:    v_add_i32_e64 v17, s[6:7], v19, v17
-; CGP-NEXT:    v_cndmask_b32_e64 v19, 0, 1, s[6:7]
-; CGP-NEXT:    v_add_i32_e64 v17, s[6:7], v17, v18
-; CGP-NEXT:    v_mul_lo_u32 v17, s8, v10
-; CGP-NEXT:    v_mul_lo_u32 v18, v10, v11
-; CGP-NEXT:    v_add_i32_e64 v12, s[8:9], v12, v17
-; CGP-NEXT:    v_mul_hi_u32 v17, v4, v11
-; CGP-NEXT:    v_add_i32_e64 v12, s[8:9], v12, v14
-; CGP-NEXT:    v_mul_lo_u32 v14, v4, v12
-; CGP-NEXT:    v_add_i32_e64 v14, s[8:9], v18, v14
-; CGP-NEXT:    v_cndmask_b32_e64 v18, 0, 1, s[8:9]
-; CGP-NEXT:    v_add_i32_e64 v14, s[8:9], v14, v17
-; CGP-NEXT:    v_mov_b32_e32 v14, s10
-; CGP-NEXT:    v_mov_b32_e32 v17, s11
-; CGP-NEXT:    v_add_i32_e64 v6, s[10:11], v6, v8
-; CGP-NEXT:    v_mov_b32_e32 v8, s13
-; CGP-NEXT:    v_add_i32_e64 v7, s[10:11], v7, v9
-; CGP-NEXT:    v_mul_hi_u32 v9, v10, v11
-; CGP-NEXT:    v_mul_hi_u32 v11, v13, v15
-; CGP-NEXT:    v_cndmask_b32_e64 v15, 0, 1, s[8:9]
-; CGP-NEXT:    v_add_i32_e64 v15, s[8:9], v18, v15
-; CGP-NEXT:    v_mul_lo_u32 v18, v10, v12
-; CGP-NEXT:    v_mul_hi_u32 v10, v10, v12
-; CGP-NEXT:    v_mul_hi_u32 v12, v4, v12
-; CGP-NEXT:    v_add_i32_e64 v9, s[8:9], v18, v9
-; CGP-NEXT:    v_cndmask_b32_e64 v18, 0, 1, s[8:9]
-; CGP-NEXT:    v_add_i32_e64 v9, s[8:9], v9, v12
-; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, 1, s[8:9]
-; CGP-NEXT:    v_add_i32_e64 v12, s[8:9], v18, v12
-; CGP-NEXT:    v_cndmask_b32_e64 v18, 0, 1, s[6:7]
-; CGP-NEXT:    v_add_i32_e64 v18, s[6:7], v19, v18
-; CGP-NEXT:    v_mul_lo_u32 v19, v13, v16
-; CGP-NEXT:    v_mul_hi_u32 v13, v13, v16
-; CGP-NEXT:    v_mul_hi_u32 v16, v5, v16
-; CGP-NEXT:    v_add_i32_e64 v11, s[6:7], v19, v11
-; CGP-NEXT:    v_cndmask_b32_e64 v19, 0, 1, s[6:7]
-; CGP-NEXT:    v_add_i32_e64 v11, s[6:7], v11, v16
-; CGP-NEXT:    v_cndmask_b32_e64 v16, 0, 1, s[6:7]
-; CGP-NEXT:    v_add_i32_e64 v16, s[6:7], v19, v16
-; CGP-NEXT:    v_mov_b32_e32 v19, s14
-; CGP-NEXT:    v_add_i32_e64 v9, s[6:7], v9, v15
-; CGP-NEXT:    v_cndmask_b32_e64 v15, 0, 1, s[6:7]
-; CGP-NEXT:    v_add_i32_e64 v11, s[6:7], v11, v18
-; CGP-NEXT:    v_cndmask_b32_e64 v18, 0, 1, s[6:7]
-; CGP-NEXT:    v_add_i32_e64 v12, s[6:7], v12, v15
-; CGP-NEXT:    v_add_i32_e64 v15, s[6:7], v16, v18
-; CGP-NEXT:    v_add_i32_e64 v10, s[6:7], v10, v12
-; CGP-NEXT:    v_add_i32_e64 v12, s[6:7], v13, v15
-; CGP-NEXT:    v_addc_u32_e32 v6, vcc, v6, v10, vcc
-; CGP-NEXT:    v_addc_u32_e64 v7, vcc, v7, v12, s[4:5]
-; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v9
-; CGP-NEXT:    v_addc_u32_e32 v6, vcc, 0, v6, vcc
-; CGP-NEXT:    v_mul_lo_u32 v9, v3, v4
-; CGP-NEXT:    v_mul_hi_u32 v10, v2, v4
-; CGP-NEXT:    v_mul_hi_u32 v4, v3, v4
-; CGP-NEXT:    v_add_i32_e32 v5, vcc, v5, v11
-; CGP-NEXT:    v_addc_u32_e32 v7, vcc, 0, v7, vcc
-; CGP-NEXT:    v_mul_lo_u32 v11, v1, v5
-; CGP-NEXT:    v_mul_hi_u32 v12, v0, v5
-; CGP-NEXT:    v_mul_hi_u32 v5, v1, v5
-; CGP-NEXT:    v_mul_lo_u32 v13, v2, v6
-; CGP-NEXT:    v_mul_lo_u32 v15, v3, v6
-; CGP-NEXT:    v_mul_hi_u32 v16, v2, v6
-; CGP-NEXT:    v_mul_hi_u32 v6, v3, v6
-; CGP-NEXT:    v_mul_lo_u32 v18, v0, v7
-; CGP-NEXT:    v_add_i32_e32 v11, vcc, v11, v18
-; CGP-NEXT:    v_cndmask_b32_e64 v18, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v11, vcc, v11, v12
-; CGP-NEXT:    v_mul_lo_u32 v11, v1, v7
-; CGP-NEXT:    v_mul_hi_u32 v12, v0, v7
-; CGP-NEXT:    v_mul_hi_u32 v7, v1, v7
-; CGP-NEXT:    v_add_i32_e64 v9, s[4:5], v9, v13
-; CGP-NEXT:    v_cndmask_b32_e64 v13, 0, 1, s[4:5]
-; CGP-NEXT:    v_add_i32_e64 v4, s[4:5], v15, v4
-; CGP-NEXT:    v_cndmask_b32_e64 v15, 0, 1, s[4:5]
-; CGP-NEXT:    v_add_i32_e64 v5, s[4:5], v11, v5
-; CGP-NEXT:    v_cndmask_b32_e64 v11, 0, 1, s[4:5]
-; CGP-NEXT:    v_add_i32_e64 v9, s[4:5], v9, v10
-; CGP-NEXT:    v_cndmask_b32_e64 v9, 0, 1, s[4:5]
-; CGP-NEXT:    v_add_i32_e64 v4, s[4:5], v4, v16
-; CGP-NEXT:    v_cndmask_b32_e64 v10, 0, 1, s[4:5]
-; CGP-NEXT:    v_cndmask_b32_e64 v16, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v5, vcc, v5, v12
-; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v9, vcc, v13, v9
-; CGP-NEXT:    v_add_i32_e32 v10, vcc, v15, v10
-; CGP-NEXT:    v_add_i32_e32 v13, vcc, v18, v16
-; CGP-NEXT:    v_add_i32_e32 v11, vcc, v11, v12
-; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v9
-; CGP-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v5, vcc, v5, v13
-; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v9, vcc, v10, v9
-; CGP-NEXT:    v_mul_lo_u32 v10, s12, v4
-; CGP-NEXT:    v_mul_lo_u32 v13, 0, v4
-; CGP-NEXT:    v_mul_hi_u32 v15, s12, v4
-; CGP-NEXT:    v_add_i32_e32 v11, vcc, v11, v12
-; CGP-NEXT:    v_mul_lo_u32 v12, s12, v5
-; CGP-NEXT:    v_mul_lo_u32 v16, 0, v5
-; CGP-NEXT:    v_mul_hi_u32 v18, s12, v5
-; CGP-NEXT:    v_add_i32_e32 v6, vcc, v6, v9
-; CGP-NEXT:    v_add_i32_e32 v7, vcc, v7, v11
-; CGP-NEXT:    v_mul_lo_u32 v9, s12, v6
-; CGP-NEXT:    v_mul_lo_u32 v11, s12, v7
-; CGP-NEXT:    v_add_i32_e32 v9, vcc, v13, v9
-; CGP-NEXT:    v_add_i32_e32 v11, vcc, v16, v11
-; CGP-NEXT:    v_add_i32_e32 v13, vcc, 1, v4
-; CGP-NEXT:    v_addc_u32_e32 v16, vcc, 0, v6, vcc
-; CGP-NEXT:    v_add_i32_e32 v9, vcc, v9, v15
-; CGP-NEXT:    v_add_i32_e32 v11, vcc, v11, v18
-; CGP-NEXT:    v_add_i32_e32 v15, vcc, 1, v5
-; CGP-NEXT:    v_addc_u32_e32 v18, vcc, 0, v7, vcc
-; CGP-NEXT:    v_sub_i32_e32 v2, vcc, v2, v10
-; CGP-NEXT:    v_subb_u32_e64 v10, s[4:5], v3, v9, vcc
-; CGP-NEXT:    v_sub_i32_e64 v3, s[4:5], v3, v9
-; CGP-NEXT:    v_cmp_le_u32_e64 s[4:5], s12, v2
-; CGP-NEXT:    v_cndmask_b32_e64 v9, 0, -1, s[4:5]
-; CGP-NEXT:    v_sub_i32_e64 v0, s[4:5], v0, v12
-; CGP-NEXT:    v_subb_u32_e64 v12, s[6:7], v1, v11, s[4:5]
-; CGP-NEXT:    v_sub_i32_e64 v1, s[6:7], v1, v11
-; CGP-NEXT:    v_cmp_eq_u32_e64 s[6:7], 0, v10
-; CGP-NEXT:    v_add_i32_e64 v10, s[8:9], 1, v13
-; CGP-NEXT:    v_addc_u32_e64 v11, s[8:9], 0, v16, s[8:9]
-; CGP-NEXT:    v_cndmask_b32_e64 v8, v8, v9, s[6:7]
-; CGP-NEXT:    v_cmp_le_u32_e64 s[6:7], s12, v0
-; CGP-NEXT:    v_cndmask_b32_e64 v9, 0, -1, s[6:7]
-; CGP-NEXT:    v_cmp_eq_u32_e64 s[6:7], 0, v12
-; CGP-NEXT:    v_cndmask_b32_e64 v9, v14, v9, s[6:7]
-; CGP-NEXT:    v_add_i32_e64 v12, s[6:7], 1, v15
-; CGP-NEXT:    v_addc_u32_e64 v14, s[6:7], 0, v18, s[6:7]
-; CGP-NEXT:    v_subbrev_u32_e32 v3, vcc, 0, v3, vcc
-; CGP-NEXT:    v_subbrev_u32_e64 v1, vcc, 0, v1, s[4:5]
-; CGP-NEXT:    v_subrev_i32_e32 v2, vcc, s12, v2
-; CGP-NEXT:    v_subbrev_u32_e32 v3, vcc, 0, v3, vcc
-; CGP-NEXT:    v_cmp_le_u32_e32 vcc, s12, v2
-; CGP-NEXT:    v_cndmask_b32_e64 v2, 0, -1, vcc
-; CGP-NEXT:    v_subrev_i32_e32 v0, vcc, s12, v0
-; CGP-NEXT:    v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
-; CGP-NEXT:    v_cmp_le_u32_e32 vcc, s12, v0
-; CGP-NEXT:    v_cndmask_b32_e64 v0, 0, -1, vcc
-; CGP-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v3
-; CGP-NEXT:    v_cndmask_b32_e32 v2, v19, v2, vcc
-; CGP-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v1
-; CGP-NEXT:    v_cndmask_b32_e32 v0, v17, v0, vcc
-; CGP-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v2
-; CGP-NEXT:    v_cndmask_b32_e32 v1, v13, v10, vcc
-; CGP-NEXT:    v_cmp_ne_u32_e64 s[4:5], 0, v0
-; CGP-NEXT:    v_cndmask_b32_e64 v0, v15, v12, s[4:5]
-; CGP-NEXT:    v_cndmask_b32_e32 v3, v16, v11, vcc
-; CGP-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v8
-; CGP-NEXT:    v_cndmask_b32_e32 v2, v4, v1, vcc
-; CGP-NEXT:    v_cndmask_b32_e64 v1, v18, v14, s[4:5]
-; CGP-NEXT:    v_cmp_ne_u32_e64 s[4:5], 0, v9
-; CGP-NEXT:    v_cndmask_b32_e64 v0, v5, v0, s[4:5]
-; CGP-NEXT:    v_cndmask_b32_e64 v1, v7, v1, s[4:5]
-; CGP-NEXT:    v_cndmask_b32_e32 v3, v6, v3, vcc
-; CGP-NEXT:    s_setpc_b64 s[30:31]
+; CHECK-LABEL: v_udiv_v2i64_oddk_denom:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT:    s_mov_b32 s4, 0x1fb03c31
+; CHECK-NEXT:    s_mov_b32 s5, 0xd9528440
+; CHECK-NEXT:    v_mul_lo_u32 v4, v1, s4
+; CHECK-NEXT:    v_mul_lo_u32 v5, v0, s5
+; CHECK-NEXT:    v_mul_hi_u32 v6, v0, s4
+; CHECK-NEXT:    v_mul_lo_u32 v7, v1, s5
+; CHECK-NEXT:    v_mul_hi_u32 v8, v1, s4
+; CHECK-NEXT:    v_mul_hi_u32 v0, v0, s5
+; CHECK-NEXT:    v_mul_hi_u32 v1, v1, s5
+; CHECK-NEXT:    v_mul_lo_u32 v9, v3, s4
+; CHECK-NEXT:    v_mul_lo_u32 v10, v2, s5
+; CHECK-NEXT:    v_mul_hi_u32 v11, v2, s4
+; CHECK-NEXT:    v_mul_lo_u32 v12, v3, s5
+; CHECK-NEXT:    v_mul_hi_u32 v13, v3, s4
+; CHECK-NEXT:    v_mul_hi_u32 v2, v2, s5
+; CHECK-NEXT:    v_mul_hi_u32 v3, v3, s5
+; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v4, v5
+; CHECK-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v7, vcc, v7, v8
+; CHECK-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v9, vcc, v9, v10
+; CHECK-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v12, vcc, v12, v13
+; CHECK-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v4, v6
+; CHECK-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v7, v0
+; CHECK-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v7, vcc, v9, v11
+; CHECK-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v12, v2
+; CHECK-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v5, v4
+; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v8, v6
+; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v10, v7
+; CHECK-NEXT:    v_add_i32_e32 v7, vcc, v13, v9
+; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v4
+; CHECK-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v6
+; CHECK-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v5, v4
+; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v7, v6
+; CHECK-NEXT:    v_add_i32_e32 v1, vcc, v1, v4
+; CHECK-NEXT:    v_add_i32_e32 v3, vcc, v3, v5
+; CHECK-NEXT:    v_lshr_b64 v[0:1], v[0:1], 20
+; CHECK-NEXT:    v_lshr_b64 v[2:3], v[2:3], 20
+; CHECK-NEXT:    s_setpc_b64 s[30:31]
   %result = udiv <2 x i64> %num, <i64 1235195, i64 1235195>
   ret <2 x i64> %result
 }


        


More information about the llvm-commits mailing list