[llvm] 8bfc0e0 - [GlobalISel] Port the udiv -> mul by constant combine.
Amara Emerson via llvm-commits
llvm-commits at lists.llvm.org
Thu Oct 7 11:37:27 PDT 2021
Author: Amara Emerson
Date: 2021-10-07T11:37:17-07:00
New Revision: 8bfc0e06dc85663ba3317da0c7b472260bf27948
URL: https://github.com/llvm/llvm-project/commit/8bfc0e06dc85663ba3317da0c7b472260bf27948
DIFF: https://github.com/llvm/llvm-project/commit/8bfc0e06dc85663ba3317da0c7b472260bf27948.diff
LOG: [GlobalISel] Port the udiv -> mul by constant combine.
This is a straight port from the equivalent DAG combine.
Differential Revision: https://reviews.llvm.org/D110890
Added:
llvm/test/CodeGen/AArch64/GlobalISel/combine-udiv.ll
llvm/test/CodeGen/AArch64/GlobalISel/combine-udiv.mir
Modified:
llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
llvm/include/llvm/CodeGen/GlobalISel/Utils.h
llvm/include/llvm/Target/GlobalISel/Combine.td
llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
llvm/lib/CodeGen/GlobalISel/Utils.cpp
llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i32.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i64.ll
Removed:
################################################################################
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
index 987e930e6d11a..a8d06acd186f8 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
@@ -602,6 +602,14 @@ class CombinerHelper {
/// feeding a G_AND instruction \p MI.
bool matchNarrowBinopFeedingAnd(MachineInstr &MI, BuildFnTy &MatchInfo);
+ /// Given an G_UDIV \p MI expressing a divide by constant, return an
+ /// expression that implements it by multiplying by a magic number.
+ /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
+ MachineInstr *buildUDivUsingMul(MachineInstr &MI);
+ /// Combine G_UDIV by constant into a multiply by magic constant.
+ bool matchUDivByConst(MachineInstr &MI);
+ void applyUDivByConst(MachineInstr &MI);
+
/// Try to transform \p MI by using all of the above
/// combine functions. Returns true if changed.
bool tryCombine(MachineInstr &MI);
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/Utils.h b/llvm/include/llvm/CodeGen/GlobalISel/Utils.h
index 39a5ee71c7102..d0d6ca17b9483 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/Utils.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/Utils.h
@@ -397,6 +397,11 @@ bool isBuildVectorAllOnes(const MachineInstr &MI,
Optional<RegOrConstant> getVectorSplat(const MachineInstr &MI,
const MachineRegisterInfo &MRI);
+/// Determines if \p MI defines a constant integer or a build vector of
+/// constant integers. Treats undef values as constants.
+bool isConstantOrConstantVector(MachineInstr &MI,
+ const MachineRegisterInfo &MRI);
+
/// Determines if \p MI defines a constant integer or a splat vector of
/// constant integers.
/// \returns the scalar constant or None.
diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td
index 52caf3f9ee609..17256d3bc95dd 100644
--- a/llvm/include/llvm/Target/GlobalISel/Combine.td
+++ b/llvm/include/llvm/Target/GlobalISel/Combine.td
@@ -694,6 +694,15 @@ def bitfield_extract_from_shr : GICombineRule<
def form_bitfield_extract : GICombineGroup<[bitfield_extract_from_sext_inreg,
bitfield_extract_from_and,
bitfield_extract_from_shr]>;
+
+def udiv_by_const : GICombineRule<
+ (defs root:$root),
+ (match (wip_match_opcode G_UDIV):$root,
+ [{ return Helper.matchUDivByConst(*${root}); }]),
+ (apply [{ Helper.applyUDivByConst(*${root}); }])>;
+
+def intdiv_combines : GICombineGroup<[udiv_by_const]>;
+
def reassoc_ptradd : GICombineRule<
(defs root:$root, build_fn_matchinfo:$matchinfo),
(match (wip_match_opcode G_PTR_ADD):$root,
@@ -761,7 +770,8 @@ def all_combines : GICombineGroup<[trivial_combines, insert_vec_elt_combines,
const_combines, xor_of_and_with_same_reg, ptr_add_with_zero,
shift_immed_chain, shift_of_shifted_logic_chain, load_or_combine,
truncstore_merge, div_rem_to_divrem, funnel_shift_combines,
- form_bitfield_extract, constant_fold, fabs_fneg_fold]>;
+ form_bitfield_extract, constant_fold, fabs_fneg_fold,
+ intdiv_combines]>;
// A combine group used to for prelegalizer combiners at -O0. The combines in
// this group have been selected based on experiments to balance code size and
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index 29a16bc765e8e..e60f2c34b2d2b 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -30,6 +30,7 @@
#include "llvm/CodeGen/TargetOpcodes.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/Support/Casting.h"
+#include "llvm/Support/DivisionByConstantInfo.h"
#include "llvm/Support/MathExtras.h"
#include <tuple>
@@ -4422,6 +4423,162 @@ bool CombinerHelper::matchMulOBy2(MachineInstr &MI, BuildFnTy &MatchInfo) {
return true;
}
+MachineInstr *CombinerHelper::buildUDivUsingMul(MachineInstr &MI) {
+ assert(MI.getOpcode() == TargetOpcode::G_UDIV);
+ auto &UDiv = cast<GenericMachineInstr>(MI);
+ Register Dst = UDiv.getReg(0);
+ Register LHS = UDiv.getReg(1);
+ Register RHS = UDiv.getReg(2);
+ LLT Ty = MRI.getType(Dst);
+ LLT ScalarTy = Ty.getScalarType();
+ const unsigned EltBits = ScalarTy.getScalarSizeInBits();
+ LLT ShiftAmtTy = getTargetLowering().getPreferredShiftAmountTy(Ty);
+ LLT ScalarShiftAmtTy = ShiftAmtTy.getScalarType();
+ auto &MIB = Builder;
+ MIB.setInstrAndDebugLoc(MI);
+
+ bool UseNPQ = false;
+ SmallVector<Register, 16> PreShifts, PostShifts, MagicFactors, NPQFactors;
+
+ auto BuildUDIVPattern = [&](const Constant *C) {
+ auto *CI = cast<ConstantInt>(C);
+ const APInt &Divisor = CI->getValue();
+ UnsignedDivisonByConstantInfo magics =
+ UnsignedDivisonByConstantInfo::get(Divisor);
+ unsigned PreShift = 0, PostShift = 0;
+
+ // If the divisor is even, we can avoid using the expensive fixup by
+ // shifting the divided value upfront.
+ if (magics.IsAdd != 0 && !Divisor[0]) {
+ PreShift = Divisor.countTrailingZeros();
+ // Get magic number for the shifted divisor.
+ magics =
+ UnsignedDivisonByConstantInfo::get(Divisor.lshr(PreShift), PreShift);
+ assert(magics.IsAdd == 0 && "Should use cheap fixup now");
+ }
+
+ APInt Magic = magics.Magic;
+
+ unsigned SelNPQ;
+ if (magics.IsAdd == 0 || Divisor.isOneValue()) {
+ assert(magics.ShiftAmount < Divisor.getBitWidth() &&
+ "We shouldn't generate an undefined shift!");
+ PostShift = magics.ShiftAmount;
+ SelNPQ = false;
+ } else {
+ PostShift = magics.ShiftAmount - 1;
+ SelNPQ = true;
+ }
+
+ PreShifts.push_back(
+ MIB.buildConstant(ScalarShiftAmtTy, PreShift).getReg(0));
+ MagicFactors.push_back(MIB.buildConstant(ScalarTy, Magic).getReg(0));
+ NPQFactors.push_back(
+ MIB.buildConstant(ScalarTy,
+ SelNPQ ? APInt::getOneBitSet(EltBits, EltBits - 1)
+ : APInt::getZero(EltBits))
+ .getReg(0));
+ PostShifts.push_back(
+ MIB.buildConstant(ScalarShiftAmtTy, PostShift).getReg(0));
+ UseNPQ |= SelNPQ;
+ return true;
+ };
+
+ // Collect the shifts/magic values from each element.
+ bool Matched = matchUnaryPredicate(MRI, RHS, BuildUDIVPattern);
+ (void)Matched;
+ assert(Matched && "Expected unary predicate match to succeed");
+
+ Register PreShift, PostShift, MagicFactor, NPQFactor;
+ auto *RHSDef = getOpcodeDef<GBuildVector>(RHS, MRI);
+ if (RHSDef) {
+ PreShift = MIB.buildBuildVector(ShiftAmtTy, PreShifts).getReg(0);
+ MagicFactor = MIB.buildBuildVector(Ty, MagicFactors).getReg(0);
+ NPQFactor = MIB.buildBuildVector(Ty, NPQFactors).getReg(0);
+ PostShift = MIB.buildBuildVector(ShiftAmtTy, PostShifts).getReg(0);
+ } else {
+ assert(MRI.getType(RHS).isScalar() &&
+ "Non-build_vector operation should have been a scalar");
+ PreShift = PreShifts[0];
+ MagicFactor = MagicFactors[0];
+ PostShift = PostShifts[0];
+ }
+
+ Register Q = LHS;
+ Q = MIB.buildLShr(Ty, Q, PreShift).getReg(0);
+
+ // Multiply the numerator (operand 0) by the magic value.
+ Q = MIB.buildUMulH(Ty, Q, MagicFactor).getReg(0);
+
+ if (UseNPQ) {
+ Register NPQ = MIB.buildSub(Ty, LHS, Q).getReg(0);
+
+ // For vectors we might have a mix of non-NPQ/NPQ paths, so use
+ // G_UMULH to act as a SRL-by-1 for NPQ, else multiply by zero.
+ if (Ty.isVector())
+ NPQ = MIB.buildUMulH(Ty, NPQ, NPQFactor).getReg(0);
+ else
+ NPQ = MIB.buildLShr(Ty, NPQ, MIB.buildConstant(ShiftAmtTy, 1)).getReg(0);
+
+ Q = MIB.buildAdd(Ty, NPQ, Q).getReg(0);
+ }
+
+ Q = MIB.buildLShr(Ty, Q, PostShift).getReg(0);
+ auto One = MIB.buildConstant(Ty, 1);
+ auto IsOne = MIB.buildICmp(
+ CmpInst::Predicate::ICMP_EQ,
+ Ty.isScalar() ? LLT::scalar(1) : Ty.changeElementSize(1), RHS, One);
+ return MIB.buildSelect(Ty, IsOne, LHS, Q);
+}
+
+bool CombinerHelper::matchUDivByConst(MachineInstr &MI) {
+ assert(MI.getOpcode() == TargetOpcode::G_UDIV);
+ Register Dst = MI.getOperand(0).getReg();
+ Register RHS = MI.getOperand(2).getReg();
+ LLT DstTy = MRI.getType(Dst);
+ auto *RHSDef = MRI.getVRegDef(RHS);
+ if (!isConstantOrConstantVector(*RHSDef, MRI))
+ return false;
+
+ auto &MF = *MI.getMF();
+ AttributeList Attr = MF.getFunction().getAttributes();
+ const auto &TLI = getTargetLowering();
+ LLVMContext &Ctx = MF.getFunction().getContext();
+ auto &DL = MF.getDataLayout();
+ if (TLI.isIntDivCheap(getApproximateEVTForLLT(DstTy, DL, Ctx), Attr))
+ return false;
+
+ // Don't do this for minsize because the instruction sequence is usually
+ // larger.
+ if (MF.getFunction().hasMinSize())
+ return false;
+
+ // Don't do this if the types are not going to be legal.
+ if (LI) {
+ if (!isLegalOrBeforeLegalizer({TargetOpcode::G_MUL, {DstTy, DstTy}}))
+ return false;
+ if (!isLegalOrBeforeLegalizer({TargetOpcode::G_UMULH, {DstTy}}))
+ return false;
+ if (!isLegalOrBeforeLegalizer(
+ {TargetOpcode::G_ICMP,
+ {DstTy.isVector() ? DstTy.changeElementSize(1) : LLT::scalar(1),
+ DstTy}}))
+ return false;
+ }
+
+ auto CheckEltValue = [&](const Constant *C) {
+ if (auto *CI = dyn_cast_or_null<ConstantInt>(C))
+ return !CI->isZero();
+ return false;
+ };
+ return matchUnaryPredicate(MRI, RHS, CheckEltValue);
+}
+
+void CombinerHelper::applyUDivByConst(MachineInstr &MI) {
+ auto *NewMI = buildUDivUsingMul(MI);
+ replaceSingleDefInstWithReg(MI, NewMI->getOperand(0).getReg());
+}
+
bool CombinerHelper::tryCombine(MachineInstr &MI) {
if (tryCombineCopy(MI))
return true;
diff --git a/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/llvm/lib/CodeGen/GlobalISel/Utils.cpp
index 4223a0d1d73d8..f01df66249e7f 100644
--- a/llvm/lib/CodeGen/GlobalISel/Utils.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/Utils.cpp
@@ -1016,6 +1016,23 @@ Optional<RegOrConstant> llvm::getVectorSplat(const MachineInstr &MI,
return RegOrConstant(Reg);
}
+bool llvm::isConstantOrConstantVector(MachineInstr &MI,
+ const MachineRegisterInfo &MRI) {
+ Register Def = MI.getOperand(0).getReg();
+ if (auto C = getIConstantVRegValWithLookThrough(Def, MRI))
+ return true;
+ GBuildVector *BV = dyn_cast<GBuildVector>(&MI);
+ if (!BV)
+ return false;
+ for (unsigned SrcIdx = 0; SrcIdx < BV->getNumSources(); ++SrcIdx) {
+ if (getIConstantVRegValWithLookThrough(BV->getSourceReg(SrcIdx), MRI) ||
+ getOpcodeDef<GImplicitDef>(BV->getSourceReg(SrcIdx), MRI))
+ continue;
+ return false;
+ }
+ return true;
+}
+
Optional<APInt>
llvm::isConstantOrConstantSplatVector(MachineInstr &MI,
const MachineRegisterInfo &MRI) {
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-udiv.ll b/llvm/test/CodeGen/AArch64/GlobalISel/combine-udiv.ll
new file mode 100644
index 0000000000000..9b8597943f3de
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-udiv.ll
@@ -0,0 +1,287 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=aarch64-unknown-unknown | FileCheck %s --check-prefixes=SDAG
+; RUN: llc < %s -mtriple=aarch64-unknown-unknown -global-isel | FileCheck %s --check-prefixes=GISEL
+
+; These tests are taken from the combine-udiv.ll in X86.
+define <8 x i16> @combine_vec_udiv_uniform(<8 x i16> %x) {
+; SDAG-LABEL: combine_vec_udiv_uniform:
+; SDAG: // %bb.0:
+; SDAG-NEXT: mov w8, #25645
+; SDAG-NEXT: dup v1.8h, w8
+; SDAG-NEXT: umull2 v2.4s, v0.8h, v1.8h
+; SDAG-NEXT: umull v1.4s, v0.4h, v1.4h
+; SDAG-NEXT: uzp2 v1.8h, v1.8h, v2.8h
+; SDAG-NEXT: sub v0.8h, v0.8h, v1.8h
+; SDAG-NEXT: usra v1.8h, v0.8h, #1
+; SDAG-NEXT: ushr v0.8h, v1.8h, #4
+; SDAG-NEXT: ret
+;
+; GISEL-LABEL: combine_vec_udiv_uniform:
+; GISEL: // %bb.0:
+; GISEL-NEXT: adrp x8, .LCPI0_1
+; GISEL-NEXT: ldr q1, [x8, :lo12:.LCPI0_1]
+; GISEL-NEXT: adrp x8, .LCPI0_0
+; GISEL-NEXT: ldr q2, [x8, :lo12:.LCPI0_0]
+; GISEL-NEXT: umull2 v3.4s, v0.8h, v1.8h
+; GISEL-NEXT: umull v1.4s, v0.4h, v1.4h
+; GISEL-NEXT: uzp2 v1.8h, v1.8h, v3.8h
+; GISEL-NEXT: sub v0.8h, v0.8h, v1.8h
+; GISEL-NEXT: umull2 v3.4s, v0.8h, v2.8h
+; GISEL-NEXT: umull v0.4s, v0.4h, v2.4h
+; GISEL-NEXT: uzp2 v0.8h, v0.8h, v3.8h
+; GISEL-NEXT: add v0.8h, v0.8h, v1.8h
+; GISEL-NEXT: ushr v0.8h, v0.8h, #4
+; GISEL-NEXT: ret
+ %1 = udiv <8 x i16> %x, <i16 23, i16 23, i16 23, i16 23, i16 23, i16 23, i16 23, i16 23>
+ ret <8 x i16> %1
+}
+
+define <8 x i16> @combine_vec_udiv_nonuniform(<8 x i16> %x) {
+; SDAG-LABEL: combine_vec_udiv_nonuniform:
+; SDAG: // %bb.0:
+; SDAG-NEXT: adrp x8, .LCPI1_0
+; SDAG-NEXT: ldr q1, [x8, :lo12:.LCPI1_0]
+; SDAG-NEXT: adrp x8, .LCPI1_1
+; SDAG-NEXT: ldr q2, [x8, :lo12:.LCPI1_1]
+; SDAG-NEXT: adrp x8, .LCPI1_2
+; SDAG-NEXT: ldr q3, [x8, :lo12:.LCPI1_2]
+; SDAG-NEXT: ushl v1.8h, v0.8h, v1.8h
+; SDAG-NEXT: umull2 v4.4s, v1.8h, v2.8h
+; SDAG-NEXT: umull v1.4s, v1.4h, v2.4h
+; SDAG-NEXT: adrp x8, .LCPI1_3
+; SDAG-NEXT: uzp2 v1.8h, v1.8h, v4.8h
+; SDAG-NEXT: ldr q2, [x8, :lo12:.LCPI1_3]
+; SDAG-NEXT: sub v0.8h, v0.8h, v1.8h
+; SDAG-NEXT: umull2 v4.4s, v0.8h, v3.8h
+; SDAG-NEXT: umull v0.4s, v0.4h, v3.4h
+; SDAG-NEXT: uzp2 v0.8h, v0.8h, v4.8h
+; SDAG-NEXT: add v0.8h, v0.8h, v1.8h
+; SDAG-NEXT: ushl v0.8h, v0.8h, v2.8h
+; SDAG-NEXT: ret
+;
+; GISEL-LABEL: combine_vec_udiv_nonuniform:
+; GISEL: // %bb.0:
+; GISEL-NEXT: adrp x8, .LCPI1_5
+; GISEL-NEXT: ldr q1, [x8, :lo12:.LCPI1_5]
+; GISEL-NEXT: adrp x8, .LCPI1_4
+; GISEL-NEXT: ldr q2, [x8, :lo12:.LCPI1_4]
+; GISEL-NEXT: adrp x8, .LCPI1_3
+; GISEL-NEXT: ldr q3, [x8, :lo12:.LCPI1_3]
+; GISEL-NEXT: adrp x8, .LCPI1_1
+; GISEL-NEXT: ldr q4, [x8, :lo12:.LCPI1_1]
+; GISEL-NEXT: adrp x8, .LCPI1_0
+; GISEL-NEXT: ldr q5, [x8, :lo12:.LCPI1_0]
+; GISEL-NEXT: adrp x8, .LCPI1_2
+; GISEL-NEXT: neg v2.8h, v2.8h
+; GISEL-NEXT: ldr q6, [x8, :lo12:.LCPI1_2]
+; GISEL-NEXT: ushl v2.8h, v0.8h, v2.8h
+; GISEL-NEXT: cmeq v1.8h, v1.8h, v5.8h
+; GISEL-NEXT: umull2 v5.4s, v2.8h, v3.8h
+; GISEL-NEXT: umull v2.4s, v2.4h, v3.4h
+; GISEL-NEXT: uzp2 v2.8h, v2.8h, v5.8h
+; GISEL-NEXT: sub v3.8h, v0.8h, v2.8h
+; GISEL-NEXT: umull2 v5.4s, v3.8h, v6.8h
+; GISEL-NEXT: umull v3.4s, v3.4h, v6.4h
+; GISEL-NEXT: uzp2 v3.8h, v3.8h, v5.8h
+; GISEL-NEXT: neg v4.8h, v4.8h
+; GISEL-NEXT: shl v1.8h, v1.8h, #15
+; GISEL-NEXT: add v2.8h, v3.8h, v2.8h
+; GISEL-NEXT: ushl v2.8h, v2.8h, v4.8h
+; GISEL-NEXT: sshr v1.8h, v1.8h, #15
+; GISEL-NEXT: bif v0.16b, v2.16b, v1.16b
+; GISEL-NEXT: ret
+ %1 = udiv <8 x i16> %x, <i16 23, i16 34, i16 -23, i16 56, i16 128, i16 -1, i16 -256, i16 -32768>
+ ret <8 x i16> %1
+}
+
+define <8 x i16> @combine_vec_udiv_nonuniform2(<8 x i16> %x) {
+; SDAG-LABEL: combine_vec_udiv_nonuniform2:
+; SDAG: // %bb.0:
+; SDAG-NEXT: adrp x8, .LCPI2_0
+; SDAG-NEXT: adrp x9, .LCPI2_1
+; SDAG-NEXT: ldr q1, [x8, :lo12:.LCPI2_0]
+; SDAG-NEXT: ldr q2, [x9, :lo12:.LCPI2_1]
+; SDAG-NEXT: adrp x8, .LCPI2_2
+; SDAG-NEXT: ldr q3, [x8, :lo12:.LCPI2_2]
+; SDAG-NEXT: ushl v0.8h, v0.8h, v1.8h
+; SDAG-NEXT: umull2 v1.4s, v0.8h, v2.8h
+; SDAG-NEXT: umull v0.4s, v0.4h, v2.4h
+; SDAG-NEXT: uzp2 v0.8h, v0.8h, v1.8h
+; SDAG-NEXT: ushl v0.8h, v0.8h, v3.8h
+; SDAG-NEXT: ret
+;
+; GISEL-LABEL: combine_vec_udiv_nonuniform2:
+; GISEL: // %bb.0:
+; GISEL-NEXT: adrp x8, .LCPI2_4
+; GISEL-NEXT: ldr q1, [x8, :lo12:.LCPI2_4]
+; GISEL-NEXT: adrp x8, .LCPI2_3
+; GISEL-NEXT: ldr q2, [x8, :lo12:.LCPI2_3]
+; GISEL-NEXT: adrp x8, .LCPI2_1
+; GISEL-NEXT: ldr q3, [x8, :lo12:.LCPI2_1]
+; GISEL-NEXT: adrp x8, .LCPI2_0
+; GISEL-NEXT: ldr q4, [x8, :lo12:.LCPI2_0]
+; GISEL-NEXT: adrp x8, .LCPI2_2
+; GISEL-NEXT: ldr q5, [x8, :lo12:.LCPI2_2]
+; GISEL-NEXT: neg v2.8h, v2.8h
+; GISEL-NEXT: ushl v2.8h, v0.8h, v2.8h
+; GISEL-NEXT: cmeq v1.8h, v1.8h, v4.8h
+; GISEL-NEXT: umull2 v4.4s, v2.8h, v5.8h
+; GISEL-NEXT: umull v2.4s, v2.4h, v5.4h
+; GISEL-NEXT: neg v3.8h, v3.8h
+; GISEL-NEXT: shl v1.8h, v1.8h, #15
+; GISEL-NEXT: uzp2 v2.8h, v2.8h, v4.8h
+; GISEL-NEXT: ushl v2.8h, v2.8h, v3.8h
+; GISEL-NEXT: sshr v1.8h, v1.8h, #15
+; GISEL-NEXT: bif v0.16b, v2.16b, v1.16b
+; GISEL-NEXT: ret
+ %1 = udiv <8 x i16> %x, <i16 -34, i16 35, i16 36, i16 -37, i16 38, i16 -39, i16 40, i16 -41>
+ ret <8 x i16> %1
+}
+
+define <8 x i16> @combine_vec_udiv_nonuniform3(<8 x i16> %x) {
+; SDAG-LABEL: combine_vec_udiv_nonuniform3:
+; SDAG: // %bb.0:
+; SDAG-NEXT: adrp x8, .LCPI3_0
+; SDAG-NEXT: ldr q1, [x8, :lo12:.LCPI3_0]
+; SDAG-NEXT: adrp x8, .LCPI3_1
+; SDAG-NEXT: ldr q3, [x8, :lo12:.LCPI3_1]
+; SDAG-NEXT: umull2 v2.4s, v0.8h, v1.8h
+; SDAG-NEXT: umull v1.4s, v0.4h, v1.4h
+; SDAG-NEXT: uzp2 v1.8h, v1.8h, v2.8h
+; SDAG-NEXT: sub v0.8h, v0.8h, v1.8h
+; SDAG-NEXT: usra v1.8h, v0.8h, #1
+; SDAG-NEXT: ushl v0.8h, v1.8h, v3.8h
+; SDAG-NEXT: ret
+;
+; GISEL-LABEL: combine_vec_udiv_nonuniform3:
+; GISEL: // %bb.0:
+; GISEL-NEXT: adrp x8, .LCPI3_4
+; GISEL-NEXT: ldr q1, [x8, :lo12:.LCPI3_4]
+; GISEL-NEXT: adrp x8, .LCPI3_3
+; GISEL-NEXT: ldr q2, [x8, :lo12:.LCPI3_3]
+; GISEL-NEXT: adrp x8, .LCPI3_2
+; GISEL-NEXT: ldr q3, [x8, :lo12:.LCPI3_2]
+; GISEL-NEXT: adrp x8, .LCPI3_1
+; GISEL-NEXT: ldr q4, [x8, :lo12:.LCPI3_1]
+; GISEL-NEXT: adrp x8, .LCPI3_0
+; GISEL-NEXT: ldr q5, [x8, :lo12:.LCPI3_0]
+; GISEL-NEXT: umull2 v6.4s, v0.8h, v2.8h
+; GISEL-NEXT: umull v2.4s, v0.4h, v2.4h
+; GISEL-NEXT: uzp2 v2.8h, v2.8h, v6.8h
+; GISEL-NEXT: cmeq v1.8h, v1.8h, v5.8h
+; GISEL-NEXT: sub v5.8h, v0.8h, v2.8h
+; GISEL-NEXT: umull2 v6.4s, v5.8h, v3.8h
+; GISEL-NEXT: umull v3.4s, v5.4h, v3.4h
+; GISEL-NEXT: uzp2 v3.8h, v3.8h, v6.8h
+; GISEL-NEXT: neg v4.8h, v4.8h
+; GISEL-NEXT: shl v1.8h, v1.8h, #15
+; GISEL-NEXT: add v2.8h, v3.8h, v2.8h
+; GISEL-NEXT: ushl v2.8h, v2.8h, v4.8h
+; GISEL-NEXT: sshr v1.8h, v1.8h, #15
+; GISEL-NEXT: bif v0.16b, v2.16b, v1.16b
+; GISEL-NEXT: ret
+ %1 = udiv <8 x i16> %x, <i16 7, i16 23, i16 25, i16 27, i16 31, i16 47, i16 63, i16 127>
+ ret <8 x i16> %1
+}
+
+define <16 x i8> @combine_vec_udiv_nonuniform4(<16 x i8> %x) {
+; SDAG-LABEL: combine_vec_udiv_nonuniform4:
+; SDAG: // %bb.0:
+; SDAG-NEXT: adrp x8, .LCPI4_0
+; SDAG-NEXT: ldr q1, [x8, :lo12:.LCPI4_0]
+; SDAG-NEXT: adrp x8, .LCPI4_1
+; SDAG-NEXT: ldr q2, [x8, :lo12:.LCPI4_1]
+; SDAG-NEXT: adrp x8, .LCPI4_2
+; SDAG-NEXT: ldr q3, [x8, :lo12:.LCPI4_2]
+; SDAG-NEXT: adrp x8, .LCPI4_3
+; SDAG-NEXT: ldr q4, [x8, :lo12:.LCPI4_3]
+; SDAG-NEXT: umull2 v5.8h, v0.16b, v1.16b
+; SDAG-NEXT: umull v1.8h, v0.8b, v1.8b
+; SDAG-NEXT: uzp2 v1.16b, v1.16b, v5.16b
+; SDAG-NEXT: ushl v1.16b, v1.16b, v2.16b
+; SDAG-NEXT: and v1.16b, v1.16b, v3.16b
+; SDAG-NEXT: and v0.16b, v0.16b, v4.16b
+; SDAG-NEXT: orr v0.16b, v0.16b, v1.16b
+; SDAG-NEXT: ret
+;
+; GISEL-LABEL: combine_vec_udiv_nonuniform4:
+; GISEL: // %bb.0:
+; GISEL-NEXT: adrp x8, .LCPI4_3
+; GISEL-NEXT: ldr q1, [x8, :lo12:.LCPI4_3]
+; GISEL-NEXT: adrp x8, .LCPI4_0
+; GISEL-NEXT: ldr q2, [x8, :lo12:.LCPI4_0]
+; GISEL-NEXT: adrp x8, .LCPI4_2
+; GISEL-NEXT: ldr q3, [x8, :lo12:.LCPI4_2]
+; GISEL-NEXT: adrp x8, .LCPI4_1
+; GISEL-NEXT: ldr q4, [x8, :lo12:.LCPI4_1]
+; GISEL-NEXT: cmeq v1.16b, v1.16b, v2.16b
+; GISEL-NEXT: umull2 v2.8h, v0.16b, v3.16b
+; GISEL-NEXT: umull v3.8h, v0.8b, v3.8b
+; GISEL-NEXT: neg v4.16b, v4.16b
+; GISEL-NEXT: uzp2 v2.16b, v3.16b, v2.16b
+; GISEL-NEXT: shl v1.16b, v1.16b, #7
+; GISEL-NEXT: ushl v2.16b, v2.16b, v4.16b
+; GISEL-NEXT: sshr v1.16b, v1.16b, #7
+; GISEL-NEXT: bif v0.16b, v2.16b, v1.16b
+; GISEL-NEXT: ret
+ %div = udiv <16 x i8> %x, <i8 -64, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+ ret <16 x i8> %div
+}
+
+define <8 x i16> @pr38477(<8 x i16> %a0) {
+; SDAG-LABEL: pr38477:
+; SDAG: // %bb.0:
+; SDAG-NEXT: adrp x8, .LCPI5_0
+; SDAG-NEXT: ldr q1, [x8, :lo12:.LCPI5_0]
+; SDAG-NEXT: adrp x8, .LCPI5_1
+; SDAG-NEXT: ldr q2, [x8, :lo12:.LCPI5_1]
+; SDAG-NEXT: adrp x8, .LCPI5_2
+; SDAG-NEXT: umull2 v4.4s, v0.8h, v1.8h
+; SDAG-NEXT: umull v1.4s, v0.4h, v1.4h
+; SDAG-NEXT: uzp2 v1.8h, v1.8h, v4.8h
+; SDAG-NEXT: ldr q3, [x8, :lo12:.LCPI5_2]
+; SDAG-NEXT: adrp x8, .LCPI5_3
+; SDAG-NEXT: sub v4.8h, v0.8h, v1.8h
+; SDAG-NEXT: umull2 v5.4s, v4.8h, v2.8h
+; SDAG-NEXT: umull v2.4s, v4.4h, v2.4h
+; SDAG-NEXT: ldr q4, [x8, :lo12:.LCPI5_3]
+; SDAG-NEXT: adrp x8, .LCPI5_4
+; SDAG-NEXT: uzp2 v2.8h, v2.8h, v5.8h
+; SDAG-NEXT: ldr q5, [x8, :lo12:.LCPI5_4]
+; SDAG-NEXT: add v1.8h, v2.8h, v1.8h
+; SDAG-NEXT: ushl v1.8h, v1.8h, v3.8h
+; SDAG-NEXT: and v1.16b, v1.16b, v4.16b
+; SDAG-NEXT: and v0.16b, v0.16b, v5.16b
+; SDAG-NEXT: orr v0.16b, v0.16b, v1.16b
+; SDAG-NEXT: ret
+;
+; GISEL-LABEL: pr38477:
+; GISEL: // %bb.0:
+; GISEL-NEXT: adrp x8, .LCPI5_4
+; GISEL-NEXT: ldr q1, [x8, :lo12:.LCPI5_4]
+; GISEL-NEXT: adrp x8, .LCPI5_3
+; GISEL-NEXT: ldr q2, [x8, :lo12:.LCPI5_3]
+; GISEL-NEXT: adrp x8, .LCPI5_2
+; GISEL-NEXT: ldr q3, [x8, :lo12:.LCPI5_2]
+; GISEL-NEXT: adrp x8, .LCPI5_1
+; GISEL-NEXT: ldr q4, [x8, :lo12:.LCPI5_1]
+; GISEL-NEXT: adrp x8, .LCPI5_0
+; GISEL-NEXT: ldr q5, [x8, :lo12:.LCPI5_0]
+; GISEL-NEXT: umull2 v6.4s, v0.8h, v2.8h
+; GISEL-NEXT: umull v2.4s, v0.4h, v2.4h
+; GISEL-NEXT: uzp2 v2.8h, v2.8h, v6.8h
+; GISEL-NEXT: cmeq v1.8h, v1.8h, v5.8h
+; GISEL-NEXT: sub v5.8h, v0.8h, v2.8h
+; GISEL-NEXT: umull2 v6.4s, v5.8h, v3.8h
+; GISEL-NEXT: umull v3.4s, v5.4h, v3.4h
+; GISEL-NEXT: uzp2 v3.8h, v3.8h, v6.8h
+; GISEL-NEXT: neg v4.8h, v4.8h
+; GISEL-NEXT: shl v1.8h, v1.8h, #15
+; GISEL-NEXT: add v2.8h, v3.8h, v2.8h
+; GISEL-NEXT: ushl v2.8h, v2.8h, v4.8h
+; GISEL-NEXT: sshr v1.8h, v1.8h, #15
+; GISEL-NEXT: bif v0.16b, v2.16b, v1.16b
+; GISEL-NEXT: ret
+ %1 = udiv <8 x i16> %a0, <i16 1, i16 119, i16 73, i16 -111, i16 -3, i16 118, i16 32, i16 31>
+ ret <8 x i16> %1
+}
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-udiv.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-udiv.mir
new file mode 100644
index 0000000000000..6ebaff1dfaaed
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-udiv.mir
@@ -0,0 +1,353 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -o - -mtriple=aarch64-unknown-unknown -run-pass=aarch64-prelegalizer-combiner -verify-machineinstrs %s | FileCheck %s
+---
+name: udiv_by_scalar_const
+body: |
+ bb.1:
+ liveins: $w0
+ ; CHECK-LABEL: name: udiv_by_scalar_const
+ ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 818089009
+ ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+ ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32)
+ ; CHECK-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[LSHR]], [[C1]]
+ ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UMULH]], [[C2]](s32)
+ ; CHECK-NEXT: $w0 = COPY [[LSHR1]](s32)
+ %0:_(s32) = COPY $w0
+ %cst:_(s32) = G_CONSTANT i32 42
+ %2:_(s32) = G_UDIV %0(s32), %cst(s32)
+ $w0 = COPY %2(s32)
+...
+---
+name: combine_vec_udiv_uniform
+alignment: 4
+tracksRegLiveness: true
+liveins:
+ - { reg: '$q0' }
+body: |
+ bb.1:
+ liveins: $q0
+
+ ; CHECK-LABEL: name: combine_vec_udiv_uniform
+ ; CHECK: liveins: $q0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<8 x s16>) = COPY $q0
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 25645
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768
+ ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 4
+ ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[C]](s16), [[C]](s16), [[C]](s16), [[C]](s16), [[C]](s16), [[C]](s16), [[C]](s16), [[C]](s16)
+ ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[C1]](s16), [[C1]](s16), [[C1]](s16), [[C1]](s16), [[C1]](s16), [[C1]](s16), [[C1]](s16), [[C1]](s16)
+ ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[C2]](s16), [[C2]](s16), [[C2]](s16), [[C2]](s16), [[C2]](s16), [[C2]](s16), [[C2]](s16), [[C2]](s16)
+ ; CHECK-NEXT: [[UMULH:%[0-9]+]]:_(<8 x s16>) = G_UMULH [[COPY]], [[BUILD_VECTOR]]
+ ; CHECK-NEXT: [[SUB:%[0-9]+]]:_(<8 x s16>) = G_SUB [[COPY]], [[UMULH]]
+ ; CHECK-NEXT: [[UMULH1:%[0-9]+]]:_(<8 x s16>) = G_UMULH [[SUB]], [[BUILD_VECTOR1]]
+ ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(<8 x s16>) = G_ADD [[UMULH1]], [[UMULH]]
+ ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(<8 x s16>) = G_LSHR [[ADD]], [[BUILD_VECTOR2]](<8 x s16>)
+ ; CHECK-NEXT: $q0 = COPY [[LSHR]](<8 x s16>)
+ ; CHECK-NEXT: RET_ReallyLR implicit $q0
+ %0:_(<8 x s16>) = COPY $q0
+ %2:_(s16) = G_CONSTANT i16 23
+ %1:_(<8 x s16>) = G_BUILD_VECTOR %2(s16), %2(s16), %2(s16), %2(s16), %2(s16), %2(s16), %2(s16), %2(s16)
+ %3:_(<8 x s16>) = G_UDIV %0, %1
+ $q0 = COPY %3(<8 x s16>)
+ RET_ReallyLR implicit $q0
+
+...
+---
+name: combine_vec_udiv_nonuniform
+alignment: 4
+tracksRegLiveness: true
+liveins:
+ - { reg: '$q0' }
+body: |
+ bb.1:
+ liveins: $q0
+
+ ; CHECK-LABEL: name: combine_vec_udiv_nonuniform
+ ; CHECK: liveins: $q0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<8 x s16>) = COPY $q0
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 23
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 34
+ ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 -23
+ ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 56
+ ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 128
+ ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1
+ ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s16) = G_CONSTANT i16 -256
+ ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768
+ ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[C]](s16), [[C1]](s16), [[C2]](s16), [[C3]](s16), [[C4]](s16), [[C5]](s16), [[C6]](s16), [[C7]](s16)
+ ; CHECK-NEXT: [[C8:%[0-9]+]]:_(s16) = G_CONSTANT i16 0
+ ; CHECK-NEXT: [[C9:%[0-9]+]]:_(s16) = G_CONSTANT i16 25645
+ ; CHECK-NEXT: [[C10:%[0-9]+]]:_(s16) = G_CONSTANT i16 4
+ ; CHECK-NEXT: [[C11:%[0-9]+]]:_(s16) = G_CONSTANT i16 -3855
+ ; CHECK-NEXT: [[C12:%[0-9]+]]:_(s16) = G_CONSTANT i16 5
+ ; CHECK-NEXT: [[C13:%[0-9]+]]:_(s16) = G_CONSTANT i16 8195
+ ; CHECK-NEXT: [[C14:%[0-9]+]]:_(s16) = G_CONSTANT i16 13
+ ; CHECK-NEXT: [[C15:%[0-9]+]]:_(s16) = G_CONSTANT i16 3
+ ; CHECK-NEXT: [[C16:%[0-9]+]]:_(s16) = G_CONSTANT i16 9363
+ ; CHECK-NEXT: [[C17:%[0-9]+]]:_(s16) = G_CONSTANT i16 512
+ ; CHECK-NEXT: [[C18:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32767
+ ; CHECK-NEXT: [[C19:%[0-9]+]]:_(s16) = G_CONSTANT i16 15
+ ; CHECK-NEXT: [[C20:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32639
+ ; CHECK-NEXT: [[C21:%[0-9]+]]:_(s16) = G_CONSTANT i16 2
+ ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[C8]](s16), [[C8]](s16), [[C8]](s16), [[C15]](s16), [[C8]](s16), [[C8]](s16), [[C8]](s16), [[C8]](s16)
+ ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[C9]](s16), [[C11]](s16), [[C13]](s16), [[C16]](s16), [[C17]](s16), [[C18]](s16), [[C20]](s16), [[C21]](s16)
+ ; CHECK-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[C7]](s16), [[C8]](s16), [[C8]](s16), [[C8]](s16), [[C8]](s16), [[C8]](s16), [[C8]](s16), [[C8]](s16)
+ ; CHECK-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[C10]](s16), [[C12]](s16), [[C14]](s16), [[C8]](s16), [[C8]](s16), [[C19]](s16), [[C19]](s16), [[C8]](s16)
+ ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(<8 x s16>) = G_LSHR [[COPY]], [[BUILD_VECTOR1]](<8 x s16>)
+ ; CHECK-NEXT: [[UMULH:%[0-9]+]]:_(<8 x s16>) = G_UMULH [[LSHR]], [[BUILD_VECTOR2]]
+ ; CHECK-NEXT: [[SUB:%[0-9]+]]:_(<8 x s16>) = G_SUB [[COPY]], [[UMULH]]
+ ; CHECK-NEXT: [[UMULH1:%[0-9]+]]:_(<8 x s16>) = G_UMULH [[SUB]], [[BUILD_VECTOR3]]
+ ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(<8 x s16>) = G_ADD [[UMULH1]], [[UMULH]]
+ ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(<8 x s16>) = G_LSHR [[ADD]], [[BUILD_VECTOR4]](<8 x s16>)
+ ; CHECK-NEXT: [[C22:%[0-9]+]]:_(s16) = G_CONSTANT i16 1
+ ; CHECK-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[C22]](s16), [[C22]](s16), [[C22]](s16), [[C22]](s16), [[C22]](s16), [[C22]](s16), [[C22]](s16), [[C22]](s16)
+ ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(<8 x s1>) = G_ICMP intpred(eq), [[BUILD_VECTOR]](<8 x s16>), [[BUILD_VECTOR5]]
+ ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(<8 x s16>) = G_SELECT [[ICMP]](<8 x s1>), [[COPY]], [[LSHR1]]
+ ; CHECK-NEXT: $q0 = COPY [[SELECT]](<8 x s16>)
+ ; CHECK-NEXT: RET_ReallyLR implicit $q0
+ %0:_(<8 x s16>) = COPY $q0
+ %2:_(s16) = G_CONSTANT i16 23
+ %3:_(s16) = G_CONSTANT i16 34
+ %4:_(s16) = G_CONSTANT i16 -23
+ %5:_(s16) = G_CONSTANT i16 56
+ %6:_(s16) = G_CONSTANT i16 128
+ %7:_(s16) = G_CONSTANT i16 -1
+ %8:_(s16) = G_CONSTANT i16 -256
+ %9:_(s16) = G_CONSTANT i16 -32768
+ %1:_(<8 x s16>) = G_BUILD_VECTOR %2(s16), %3(s16), %4(s16), %5(s16), %6(s16), %7(s16), %8(s16), %9(s16)
+ %10:_(<8 x s16>) = G_UDIV %0, %1
+ $q0 = COPY %10(<8 x s16>)
+ RET_ReallyLR implicit $q0
+
+...
+---
+name: combine_vec_udiv_nonuniform2
+alignment: 4
+tracksRegLiveness: true
+liveins:
+ - { reg: '$q0' }
+body: |
+ bb.1:
+ liveins: $q0
+
+ ; CHECK-LABEL: name: combine_vec_udiv_nonuniform2
+ ; CHECK: liveins: $q0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<8 x s16>) = COPY $q0
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 -34
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 35
+ ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 36
+ ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 -37
+ ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 38
+ ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s16) = G_CONSTANT i16 -39
+ ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s16) = G_CONSTANT i16 40
+ ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 -41
+ ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[C]](s16), [[C1]](s16), [[C2]](s16), [[C3]](s16), [[C4]](s16), [[C5]](s16), [[C6]](s16), [[C7]](s16)
+ ; CHECK-NEXT: [[C8:%[0-9]+]]:_(s16) = G_CONSTANT i16 1
+ ; CHECK-NEXT: [[C9:%[0-9]+]]:_(s16) = G_CONSTANT i16 16393
+ ; CHECK-NEXT: [[C10:%[0-9]+]]:_(s16) = G_CONSTANT i16 0
+ ; CHECK-NEXT: [[C11:%[0-9]+]]:_(s16) = G_CONSTANT i16 13
+ ; CHECK-NEXT: [[C12:%[0-9]+]]:_(s16) = G_CONSTANT i16 -5617
+ ; CHECK-NEXT: [[C13:%[0-9]+]]:_(s16) = G_CONSTANT i16 5
+ ; CHECK-NEXT: [[C14:%[0-9]+]]:_(s16) = G_CONSTANT i16 -7281
+ ; CHECK-NEXT: [[C15:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32749
+ ; CHECK-NEXT: [[C16:%[0-9]+]]:_(s16) = G_CONSTANT i16 15
+ ; CHECK-NEXT: [[C17:%[0-9]+]]:_(s16) = G_CONSTANT i16 -10347
+ ; CHECK-NEXT: [[C18:%[0-9]+]]:_(s16) = G_CONSTANT i16 8197
+ ; CHECK-NEXT: [[C19:%[0-9]+]]:_(s16) = G_CONSTANT i16 -13107
+ ; CHECK-NEXT: [[C20:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32747
+ ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[C8]](s16), [[C10]](s16), [[C10]](s16), [[C10]](s16), [[C10]](s16), [[C10]](s16), [[C10]](s16), [[C10]](s16)
+ ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[C9]](s16), [[C12]](s16), [[C14]](s16), [[C15]](s16), [[C17]](s16), [[C18]](s16), [[C19]](s16), [[C20]](s16)
+ ; CHECK-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[C11]](s16), [[C13]](s16), [[C13]](s16), [[C16]](s16), [[C13]](s16), [[C11]](s16), [[C13]](s16), [[C16]](s16)
+ ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(<8 x s16>) = G_LSHR [[COPY]], [[BUILD_VECTOR1]](<8 x s16>)
+ ; CHECK-NEXT: [[UMULH:%[0-9]+]]:_(<8 x s16>) = G_UMULH [[LSHR]], [[BUILD_VECTOR2]]
+ ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(<8 x s16>) = G_LSHR [[UMULH]], [[BUILD_VECTOR3]](<8 x s16>)
+ ; CHECK-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[C8]](s16), [[C8]](s16), [[C8]](s16), [[C8]](s16), [[C8]](s16), [[C8]](s16), [[C8]](s16), [[C8]](s16)
+ ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(<8 x s1>) = G_ICMP intpred(eq), [[BUILD_VECTOR]](<8 x s16>), [[BUILD_VECTOR4]]
+ ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(<8 x s16>) = G_SELECT [[ICMP]](<8 x s1>), [[COPY]], [[LSHR1]]
+ ; CHECK-NEXT: $q0 = COPY [[SELECT]](<8 x s16>)
+ ; CHECK-NEXT: RET_ReallyLR implicit $q0
+ %0:_(<8 x s16>) = COPY $q0
+ %2:_(s16) = G_CONSTANT i16 -34
+ %3:_(s16) = G_CONSTANT i16 35
+ %4:_(s16) = G_CONSTANT i16 36
+ %5:_(s16) = G_CONSTANT i16 -37
+ %6:_(s16) = G_CONSTANT i16 38
+ %7:_(s16) = G_CONSTANT i16 -39
+ %8:_(s16) = G_CONSTANT i16 40
+ %9:_(s16) = G_CONSTANT i16 -41
+ %1:_(<8 x s16>) = G_BUILD_VECTOR %2(s16), %3(s16), %4(s16), %5(s16), %6(s16), %7(s16), %8(s16), %9(s16)
+ %10:_(<8 x s16>) = G_UDIV %0, %1
+ $q0 = COPY %10(<8 x s16>)
+ RET_ReallyLR implicit $q0
+
+...
+---
+name: combine_vec_udiv_nonuniform3
+alignment: 4
+tracksRegLiveness: true
+liveins:
+ - { reg: '$q0' }
+body: |
+ bb.1:
+ liveins: $q0
+
+ ; CHECK-LABEL: name: combine_vec_udiv_nonuniform3
+ ; CHECK: liveins: $q0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<8 x s16>) = COPY $q0
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 7
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 23
+ ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 25
+ ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 27
+ ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 31
+ ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s16) = G_CONSTANT i16 47
+ ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s16) = G_CONSTANT i16 63
+ ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 127
+ ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[C]](s16), [[C1]](s16), [[C2]](s16), [[C3]](s16), [[C4]](s16), [[C5]](s16), [[C6]](s16), [[C7]](s16)
+ ; CHECK-NEXT: [[C8:%[0-9]+]]:_(s16) = G_CONSTANT i16 9363
+ ; CHECK-NEXT: [[C9:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768
+ ; CHECK-NEXT: [[C10:%[0-9]+]]:_(s16) = G_CONSTANT i16 2
+ ; CHECK-NEXT: [[C11:%[0-9]+]]:_(s16) = G_CONSTANT i16 25645
+ ; CHECK-NEXT: [[C12:%[0-9]+]]:_(s16) = G_CONSTANT i16 4
+ ; CHECK-NEXT: [[C13:%[0-9]+]]:_(s16) = G_CONSTANT i16 18351
+ ; CHECK-NEXT: [[C14:%[0-9]+]]:_(s16) = G_CONSTANT i16 12137
+ ; CHECK-NEXT: [[C15:%[0-9]+]]:_(s16) = G_CONSTANT i16 2115
+ ; CHECK-NEXT: [[C16:%[0-9]+]]:_(s16) = G_CONSTANT i16 23705
+ ; CHECK-NEXT: [[C17:%[0-9]+]]:_(s16) = G_CONSTANT i16 5
+ ; CHECK-NEXT: [[C18:%[0-9]+]]:_(s16) = G_CONSTANT i16 1041
+ ; CHECK-NEXT: [[C19:%[0-9]+]]:_(s16) = G_CONSTANT i16 517
+ ; CHECK-NEXT: [[C20:%[0-9]+]]:_(s16) = G_CONSTANT i16 6
+ ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[C8]](s16), [[C11]](s16), [[C13]](s16), [[C14]](s16), [[C15]](s16), [[C16]](s16), [[C18]](s16), [[C19]](s16)
+ ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[C9]](s16), [[C9]](s16), [[C9]](s16), [[C9]](s16), [[C9]](s16), [[C9]](s16), [[C9]](s16), [[C9]](s16)
+ ; CHECK-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[C10]](s16), [[C12]](s16), [[C12]](s16), [[C12]](s16), [[C12]](s16), [[C17]](s16), [[C17]](s16), [[C20]](s16)
+ ; CHECK-NEXT: [[UMULH:%[0-9]+]]:_(<8 x s16>) = G_UMULH [[COPY]], [[BUILD_VECTOR1]]
+ ; CHECK-NEXT: [[SUB:%[0-9]+]]:_(<8 x s16>) = G_SUB [[COPY]], [[UMULH]]
+ ; CHECK-NEXT: [[UMULH1:%[0-9]+]]:_(<8 x s16>) = G_UMULH [[SUB]], [[BUILD_VECTOR2]]
+ ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(<8 x s16>) = G_ADD [[UMULH1]], [[UMULH]]
+ ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(<8 x s16>) = G_LSHR [[ADD]], [[BUILD_VECTOR3]](<8 x s16>)
+ ; CHECK-NEXT: [[C21:%[0-9]+]]:_(s16) = G_CONSTANT i16 1
+ ; CHECK-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[C21]](s16), [[C21]](s16), [[C21]](s16), [[C21]](s16), [[C21]](s16), [[C21]](s16), [[C21]](s16), [[C21]](s16)
+ ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(<8 x s1>) = G_ICMP intpred(eq), [[BUILD_VECTOR]](<8 x s16>), [[BUILD_VECTOR4]]
+ ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(<8 x s16>) = G_SELECT [[ICMP]](<8 x s1>), [[COPY]], [[LSHR]]
+ ; CHECK-NEXT: $q0 = COPY [[SELECT]](<8 x s16>)
+ ; CHECK-NEXT: RET_ReallyLR implicit $q0
+ %0:_(<8 x s16>) = COPY $q0
+ %2:_(s16) = G_CONSTANT i16 7
+ %3:_(s16) = G_CONSTANT i16 23
+ %4:_(s16) = G_CONSTANT i16 25
+ %5:_(s16) = G_CONSTANT i16 27
+ %6:_(s16) = G_CONSTANT i16 31
+ %7:_(s16) = G_CONSTANT i16 47
+ %8:_(s16) = G_CONSTANT i16 63
+ %9:_(s16) = G_CONSTANT i16 127
+ %1:_(<8 x s16>) = G_BUILD_VECTOR %2(s16), %3(s16), %4(s16), %5(s16), %6(s16), %7(s16), %8(s16), %9(s16)
+ %10:_(<8 x s16>) = G_UDIV %0, %1
+ $q0 = COPY %10(<8 x s16>)
+ RET_ReallyLR implicit $q0
+
+...
+---
+name: combine_vec_udiv_nonuniform4
+alignment: 4
+tracksRegLiveness: true
+liveins:
+ - { reg: '$q0' }
+body: |
+ bb.1:
+ liveins: $q0
+
+ ; CHECK-LABEL: name: combine_vec_udiv_nonuniform4
+ ; CHECK: liveins: $q0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<16 x s8>) = COPY $q0
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 -64
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s8) = G_CONSTANT i8 1
+ ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s8>) = G_BUILD_VECTOR [[C]](s8), [[C1]](s8), [[C1]](s8), [[C1]](s8), [[C1]](s8), [[C1]](s8), [[C1]](s8), [[C1]](s8), [[C1]](s8), [[C1]](s8), [[C1]](s8), [[C1]](s8), [[C1]](s8), [[C1]](s8), [[C1]](s8), [[C1]](s8)
+ ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s8) = G_CONSTANT i8 0
+ ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s8) = G_CONSTANT i8 -85
+ ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s8) = G_CONSTANT i8 7
+ ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<16 x s8>) = G_BUILD_VECTOR [[C3]](s8), [[C2]](s8), [[C2]](s8), [[C2]](s8), [[C2]](s8), [[C2]](s8), [[C2]](s8), [[C2]](s8), [[C2]](s8), [[C2]](s8), [[C2]](s8), [[C2]](s8), [[C2]](s8), [[C2]](s8), [[C2]](s8), [[C2]](s8)
+ ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<16 x s8>) = G_BUILD_VECTOR [[C4]](s8), [[C2]](s8), [[C2]](s8), [[C2]](s8), [[C2]](s8), [[C2]](s8), [[C2]](s8), [[C2]](s8), [[C2]](s8), [[C2]](s8), [[C2]](s8), [[C2]](s8), [[C2]](s8), [[C2]](s8), [[C2]](s8), [[C2]](s8)
+ ; CHECK-NEXT: [[UMULH:%[0-9]+]]:_(<16 x s8>) = G_UMULH [[COPY]], [[BUILD_VECTOR1]]
+ ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(<16 x s8>) = G_LSHR [[UMULH]], [[BUILD_VECTOR2]](<16 x s8>)
+ ; CHECK-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<16 x s8>) = G_BUILD_VECTOR [[C1]](s8), [[C1]](s8), [[C1]](s8), [[C1]](s8), [[C1]](s8), [[C1]](s8), [[C1]](s8), [[C1]](s8), [[C1]](s8), [[C1]](s8), [[C1]](s8), [[C1]](s8), [[C1]](s8), [[C1]](s8), [[C1]](s8), [[C1]](s8)
+ ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(<16 x s1>) = G_ICMP intpred(eq), [[BUILD_VECTOR]](<16 x s8>), [[BUILD_VECTOR3]]
+ ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(<16 x s8>) = G_SELECT [[ICMP]](<16 x s1>), [[COPY]], [[LSHR]]
+ ; CHECK-NEXT: $q0 = COPY [[SELECT]](<16 x s8>)
+ ; CHECK-NEXT: RET_ReallyLR implicit $q0
+ %0:_(<16 x s8>) = COPY $q0
+ %2:_(s8) = G_CONSTANT i8 -64
+ %3:_(s8) = G_CONSTANT i8 1
+ %1:_(<16 x s8>) = G_BUILD_VECTOR %2(s8), %3(s8), %3(s8), %3(s8), %3(s8), %3(s8), %3(s8), %3(s8), %3(s8), %3(s8), %3(s8), %3(s8), %3(s8), %3(s8), %3(s8), %3(s8)
+ %4:_(<16 x s8>) = G_UDIV %0, %1
+ $q0 = COPY %4(<16 x s8>)
+ RET_ReallyLR implicit $q0
+
+...
+---
+name: pr38477
+alignment: 4
+tracksRegLiveness: true
+liveins:
+ - { reg: '$q0' }
+body: |
+ bb.1:
+ liveins: $q0
+
+ ; CHECK-LABEL: name: pr38477
+ ; CHECK: liveins: $q0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<8 x s16>) = COPY $q0
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 1
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 119
+ ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 73
+ ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 -111
+ ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 -3
+ ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s16) = G_CONSTANT i16 118
+ ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s16) = G_CONSTANT i16 32
+ ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 31
+ ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[C]](s16), [[C1]](s16), [[C2]](s16), [[C3]](s16), [[C4]](s16), [[C5]](s16), [[C6]](s16), [[C7]](s16)
+ ; CHECK-NEXT: [[C8:%[0-9]+]]:_(s16) = G_CONSTANT i16 0
+ ; CHECK-NEXT: [[C9:%[0-9]+]]:_(s16) = G_CONSTANT i16 4957
+ ; CHECK-NEXT: [[C10:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768
+ ; CHECK-NEXT: [[C11:%[0-9]+]]:_(s16) = G_CONSTANT i16 6
+ ; CHECK-NEXT: [[C12:%[0-9]+]]:_(s16) = G_CONSTANT i16 -8079
+ ; CHECK-NEXT: [[C13:%[0-9]+]]:_(s16) = G_CONSTANT i16 4103
+ ; CHECK-NEXT: [[C14:%[0-9]+]]:_(s16) = G_CONSTANT i16 12
+ ; CHECK-NEXT: [[C15:%[0-9]+]]:_(s16) = G_CONSTANT i16 16385
+ ; CHECK-NEXT: [[C16:%[0-9]+]]:_(s16) = G_CONSTANT i16 14
+ ; CHECK-NEXT: [[C17:%[0-9]+]]:_(s16) = G_CONSTANT i16 -29991
+ ; CHECK-NEXT: [[C18:%[0-9]+]]:_(s16) = G_CONSTANT i16 2048
+ ; CHECK-NEXT: [[C19:%[0-9]+]]:_(s16) = G_CONSTANT i16 2115
+ ; CHECK-NEXT: [[C20:%[0-9]+]]:_(s16) = G_CONSTANT i16 4
+ ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[C8]](s16), [[C9]](s16), [[C12]](s16), [[C13]](s16), [[C15]](s16), [[C17]](s16), [[C18]](s16), [[C19]](s16)
+ ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[C8]](s16), [[C10]](s16), [[C8]](s16), [[C8]](s16), [[C8]](s16), [[C8]](s16), [[C8]](s16), [[C10]](s16)
+ ; CHECK-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[C8]](s16), [[C11]](s16), [[C11]](s16), [[C14]](s16), [[C16]](s16), [[C11]](s16), [[C8]](s16), [[C20]](s16)
+ ; CHECK-NEXT: [[UMULH:%[0-9]+]]:_(<8 x s16>) = G_UMULH [[COPY]], [[BUILD_VECTOR1]]
+ ; CHECK-NEXT: [[SUB:%[0-9]+]]:_(<8 x s16>) = G_SUB [[COPY]], [[UMULH]]
+ ; CHECK-NEXT: [[UMULH1:%[0-9]+]]:_(<8 x s16>) = G_UMULH [[SUB]], [[BUILD_VECTOR2]]
+ ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(<8 x s16>) = G_ADD [[UMULH1]], [[UMULH]]
+ ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(<8 x s16>) = G_LSHR [[ADD]], [[BUILD_VECTOR3]](<8 x s16>)
+ ; CHECK-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[C]](s16), [[C]](s16), [[C]](s16), [[C]](s16), [[C]](s16), [[C]](s16), [[C]](s16), [[C]](s16)
+ ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(<8 x s1>) = G_ICMP intpred(eq), [[BUILD_VECTOR]](<8 x s16>), [[BUILD_VECTOR4]]
+ ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(<8 x s16>) = G_SELECT [[ICMP]](<8 x s1>), [[COPY]], [[LSHR]]
+ ; CHECK-NEXT: $q0 = COPY [[SELECT]](<8 x s16>)
+ ; CHECK-NEXT: RET_ReallyLR implicit $q0
+ %0:_(<8 x s16>) = COPY $q0
+ %2:_(s16) = G_CONSTANT i16 1
+ %3:_(s16) = G_CONSTANT i16 119
+ %4:_(s16) = G_CONSTANT i16 73
+ %5:_(s16) = G_CONSTANT i16 -111
+ %6:_(s16) = G_CONSTANT i16 -3
+ %7:_(s16) = G_CONSTANT i16 118
+ %8:_(s16) = G_CONSTANT i16 32
+ %9:_(s16) = G_CONSTANT i16 31
+ %1:_(<8 x s16>) = G_BUILD_VECTOR %2(s16), %3(s16), %4(s16), %5(s16), %6(s16), %7(s16), %8(s16), %9(s16)
+ %10:_(<8 x s16>) = G_UDIV %0, %1
+ $q0 = COPY %10(<8 x s16>)
+ RET_ReallyLR implicit $q0
+
+...
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i32.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i32.ll
index bd1bc4d0a25c9..1a3f54695bc49 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i32.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i32.ll
@@ -222,117 +222,21 @@ define i32 @v_udiv_i32_pow2k_denom(i32 %num) {
; CHECK-LABEL: v_udiv_i32_pow2k_denom:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CHECK-NEXT: s_movk_i32 s6, 0x1000
-; CHECK-NEXT: v_rcp_iflag_f32_e32 v1, 0x45800000
-; CHECK-NEXT: v_mov_b32_e32 v2, 0xfffff000
-; CHECK-NEXT: v_mul_f32_e32 v1, 0x4f7ffffe, v1
-; CHECK-NEXT: v_cvt_u32_f32_e32 v1, v1
-; CHECK-NEXT: v_mul_lo_u32 v2, v2, v1
-; CHECK-NEXT: v_mul_hi_u32 v2, v1, v2
-; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v2
-; CHECK-NEXT: v_mul_hi_u32 v1, v0, v1
-; CHECK-NEXT: v_lshlrev_b32_e32 v2, 12, v1
-; CHECK-NEXT: v_add_i32_e32 v3, vcc, 1, v1
-; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v2
-; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s6, v0
-; CHECK-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
-; CHECK-NEXT: v_subrev_i32_e64 v2, s[4:5], s6, v0
-; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
-; CHECK-NEXT: v_add_i32_e32 v2, vcc, 1, v1
-; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s6, v0
-; CHECK-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc
+; CHECK-NEXT: v_mov_b32_e32 v1, 0x100000
+; CHECK-NEXT: v_mul_hi_u32 v0, v0, v1
; CHECK-NEXT: s_setpc_b64 s[30:31]
%result = udiv i32 %num, 4096
ret i32 %result
}
define <2 x i32> @v_udiv_v2i32_pow2k_denom(<2 x i32> %num) {
-; GISEL-LABEL: v_udiv_v2i32_pow2k_denom:
-; GISEL: ; %bb.0:
-; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GISEL-NEXT: s_movk_i32 s4, 0x1000
-; GISEL-NEXT: v_mov_b32_e32 v2, 0x1000
-; GISEL-NEXT: v_mov_b32_e32 v3, 0xfffff000
-; GISEL-NEXT: v_cvt_f32_u32_e32 v4, s4
-; GISEL-NEXT: v_cvt_f32_u32_e32 v5, v2
-; GISEL-NEXT: v_rcp_iflag_f32_e32 v4, v4
-; GISEL-NEXT: v_rcp_iflag_f32_e32 v5, v5
-; GISEL-NEXT: v_mul_f32_e32 v4, 0x4f7ffffe, v4
-; GISEL-NEXT: v_mul_f32_e32 v5, 0x4f7ffffe, v5
-; GISEL-NEXT: v_cvt_u32_f32_e32 v4, v4
-; GISEL-NEXT: v_cvt_u32_f32_e32 v5, v5
-; GISEL-NEXT: v_mul_lo_u32 v6, v3, v4
-; GISEL-NEXT: v_mul_lo_u32 v3, v3, v5
-; GISEL-NEXT: v_mul_hi_u32 v6, v4, v6
-; GISEL-NEXT: v_mul_hi_u32 v3, v5, v3
-; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v6
-; GISEL-NEXT: v_add_i32_e32 v3, vcc, v5, v3
-; GISEL-NEXT: v_mul_hi_u32 v4, v0, v4
-; GISEL-NEXT: v_mul_hi_u32 v3, v1, v3
-; GISEL-NEXT: v_lshlrev_b32_e32 v5, 12, v4
-; GISEL-NEXT: v_add_i32_e32 v6, vcc, 1, v4
-; GISEL-NEXT: v_lshlrev_b32_e32 v7, 12, v3
-; GISEL-NEXT: v_add_i32_e32 v8, vcc, 1, v3
-; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v5
-; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v1, v7
-; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s4, v0
-; GISEL-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc
-; GISEL-NEXT: v_subrev_i32_e64 v5, s[4:5], s4, v0
-; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v1, v2
-; GISEL-NEXT: v_cndmask_b32_e64 v3, v3, v8, s[4:5]
-; GISEL-NEXT: v_sub_i32_e64 v6, s[6:7], v1, v2
-; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc
-; GISEL-NEXT: v_add_i32_e32 v5, vcc, 1, v4
-; GISEL-NEXT: v_cndmask_b32_e64 v1, v1, v6, s[4:5]
-; GISEL-NEXT: v_add_i32_e32 v6, vcc, 1, v3
-; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2
-; GISEL-NEXT: v_cndmask_b32_e32 v0, v4, v5, vcc
-; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v1, v2
-; GISEL-NEXT: v_cndmask_b32_e32 v1, v3, v6, vcc
-; GISEL-NEXT: s_setpc_b64 s[30:31]
-;
-; CGP-LABEL: v_udiv_v2i32_pow2k_denom:
-; CGP: ; %bb.0:
-; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CGP-NEXT: s_movk_i32 s8, 0x1000
-; CGP-NEXT: v_rcp_iflag_f32_e32 v2, 0x45800000
-; CGP-NEXT: s_movk_i32 s4, 0xf000
-; CGP-NEXT: v_mov_b32_e32 v3, 0xfffff000
-; CGP-NEXT: v_mov_b32_e32 v4, 0x1000
-; CGP-NEXT: v_rcp_iflag_f32_e32 v5, 0x45800000
-; CGP-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2
-; CGP-NEXT: v_mul_f32_e32 v5, 0x4f7ffffe, v5
-; CGP-NEXT: v_cvt_u32_f32_e32 v2, v2
-; CGP-NEXT: v_cvt_u32_f32_e32 v5, v5
-; CGP-NEXT: v_mul_lo_u32 v6, s4, v2
-; CGP-NEXT: v_mul_lo_u32 v3, v3, v5
-; CGP-NEXT: v_mul_hi_u32 v6, v2, v6
-; CGP-NEXT: v_mul_hi_u32 v3, v5, v3
-; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v6
-; CGP-NEXT: v_add_i32_e32 v3, vcc, v5, v3
-; CGP-NEXT: v_mul_hi_u32 v2, v0, v2
-; CGP-NEXT: v_mul_hi_u32 v3, v1, v3
-; CGP-NEXT: v_lshlrev_b32_e32 v5, 12, v2
-; CGP-NEXT: v_add_i32_e32 v6, vcc, 1, v2
-; CGP-NEXT: v_lshlrev_b32_e32 v7, 12, v3
-; CGP-NEXT: v_add_i32_e32 v8, vcc, 1, v3
-; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v5
-; CGP-NEXT: v_sub_i32_e32 v1, vcc, v1, v7
-; CGP-NEXT: v_cmp_le_u32_e32 vcc, s8, v0
-; CGP-NEXT: v_cndmask_b32_e32 v2, v2, v6, vcc
-; CGP-NEXT: v_subrev_i32_e64 v5, s[4:5], s8, v0
-; CGP-NEXT: v_cmp_le_u32_e64 s[4:5], s8, v1
-; CGP-NEXT: v_cndmask_b32_e64 v3, v3, v8, s[4:5]
-; CGP-NEXT: v_sub_i32_e64 v6, s[6:7], v1, v4
-; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc
-; CGP-NEXT: v_add_i32_e32 v5, vcc, 1, v2
-; CGP-NEXT: v_cndmask_b32_e64 v1, v1, v6, s[4:5]
-; CGP-NEXT: v_add_i32_e32 v6, vcc, 1, v3
-; CGP-NEXT: v_cmp_le_u32_e32 vcc, s8, v0
-; CGP-NEXT: v_cndmask_b32_e32 v0, v2, v5, vcc
-; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v4
-; CGP-NEXT: v_cndmask_b32_e32 v1, v3, v6, vcc
-; CGP-NEXT: s_setpc_b64 s[30:31]
+; CHECK-LABEL: v_udiv_v2i32_pow2k_denom:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT: s_mov_b32 s4, 0x100000
+; CHECK-NEXT: v_mul_hi_u32 v0, v0, s4
+; CHECK-NEXT: v_mul_hi_u32 v1, v1, s4
+; CHECK-NEXT: s_setpc_b64 s[30:31]
%result = udiv <2 x i32> %num, <i32 4096, i32 4096>
ret <2 x i32> %result
}
@@ -341,25 +245,12 @@ define i32 @v_udiv_i32_oddk_denom(i32 %num) {
; CHECK-LABEL: v_udiv_i32_oddk_denom:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CHECK-NEXT: s_mov_b32 s6, 0x12d8fb
-; CHECK-NEXT: v_rcp_iflag_f32_e32 v1, 0x4996c7d8
-; CHECK-NEXT: v_mov_b32_e32 v2, 0xffed2705
-; CHECK-NEXT: v_mul_f32_e32 v1, 0x4f7ffffe, v1
-; CHECK-NEXT: v_cvt_u32_f32_e32 v1, v1
-; CHECK-NEXT: v_mul_lo_u32 v2, v2, v1
-; CHECK-NEXT: v_mul_hi_u32 v2, v1, v2
-; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v2
+; CHECK-NEXT: v_mov_b32_e32 v1, 0xb2a50881
; CHECK-NEXT: v_mul_hi_u32 v1, v0, v1
-; CHECK-NEXT: v_mul_lo_u32 v2, v1, s6
-; CHECK-NEXT: v_add_i32_e32 v3, vcc, 1, v1
-; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v2
-; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s6, v0
-; CHECK-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
-; CHECK-NEXT: v_subrev_i32_e64 v2, s[4:5], s6, v0
-; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
-; CHECK-NEXT: v_add_i32_e32 v2, vcc, 1, v1
-; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s6, v0
-; CHECK-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc
+; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v1
+; CHECK-NEXT: v_lshrrev_b32_e32 v0, 1, v0
+; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v1
+; CHECK-NEXT: v_lshrrev_b32_e32 v0, 20, v0
; CHECK-NEXT: s_setpc_b64 s[30:31]
%result = udiv i32 %num, 1235195
ret i32 %result
@@ -369,87 +260,34 @@ define <2 x i32> @v_udiv_v2i32_oddk_denom(<2 x i32> %num) {
; GISEL-LABEL: v_udiv_v2i32_oddk_denom:
; GISEL: ; %bb.0:
; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GISEL-NEXT: s_mov_b32 s8, 0x12d8fb
-; GISEL-NEXT: v_mov_b32_e32 v2, 0x12d8fb
-; GISEL-NEXT: v_mov_b32_e32 v3, 0xffed2705
-; GISEL-NEXT: v_cvt_f32_u32_e32 v4, s8
-; GISEL-NEXT: v_cvt_f32_u32_e32 v5, v2
-; GISEL-NEXT: v_rcp_iflag_f32_e32 v4, v4
-; GISEL-NEXT: v_rcp_iflag_f32_e32 v5, v5
-; GISEL-NEXT: v_mul_f32_e32 v4, 0x4f7ffffe, v4
-; GISEL-NEXT: v_mul_f32_e32 v5, 0x4f7ffffe, v5
-; GISEL-NEXT: v_cvt_u32_f32_e32 v4, v4
-; GISEL-NEXT: v_cvt_u32_f32_e32 v5, v5
-; GISEL-NEXT: v_mul_lo_u32 v6, v3, v4
-; GISEL-NEXT: v_mul_lo_u32 v3, v3, v5
-; GISEL-NEXT: v_mul_hi_u32 v6, v4, v6
-; GISEL-NEXT: v_mul_hi_u32 v3, v5, v3
-; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v6
-; GISEL-NEXT: v_add_i32_e32 v3, vcc, v5, v3
-; GISEL-NEXT: v_mul_hi_u32 v4, v0, v4
-; GISEL-NEXT: v_mul_hi_u32 v3, v1, v3
-; GISEL-NEXT: v_mul_lo_u32 v5, v4, s8
-; GISEL-NEXT: v_add_i32_e32 v6, vcc, 1, v4
-; GISEL-NEXT: v_mul_lo_u32 v7, v3, v2
-; GISEL-NEXT: v_add_i32_e32 v8, vcc, 1, v3
-; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v5
-; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v1, v7
-; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s8, v0
-; GISEL-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc
-; GISEL-NEXT: v_subrev_i32_e64 v5, s[4:5], s8, v0
-; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v1, v2
-; GISEL-NEXT: v_cndmask_b32_e64 v3, v3, v8, s[4:5]
-; GISEL-NEXT: v_sub_i32_e64 v6, s[6:7], v1, v2
-; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc
-; GISEL-NEXT: v_add_i32_e32 v5, vcc, 1, v4
-; GISEL-NEXT: v_cndmask_b32_e64 v1, v1, v6, s[4:5]
-; GISEL-NEXT: v_add_i32_e32 v6, vcc, 1, v3
-; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s8, v0
-; GISEL-NEXT: v_cndmask_b32_e32 v0, v4, v5, vcc
-; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v1, v2
-; GISEL-NEXT: v_cndmask_b32_e32 v1, v3, v6, vcc
+; GISEL-NEXT: s_mov_b32 s4, 0xb2a50881
+; GISEL-NEXT: s_brev_b32 s5, 1
+; GISEL-NEXT: v_mul_hi_u32 v2, v0, s4
+; GISEL-NEXT: v_mul_hi_u32 v3, v1, s4
+; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v2
+; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v1, v3
+; GISEL-NEXT: v_mul_hi_u32 v0, v0, s5
+; GISEL-NEXT: v_mul_hi_u32 v1, v1, s5
+; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v2
+; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v3
+; GISEL-NEXT: v_lshrrev_b32_e32 v0, 20, v0
+; GISEL-NEXT: v_lshrrev_b32_e32 v1, 20, v1
; GISEL-NEXT: s_setpc_b64 s[30:31]
;
; CGP-LABEL: v_udiv_v2i32_oddk_denom:
; CGP: ; %bb.0:
; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CGP-NEXT: s_mov_b32 s8, 0x12d8fb
-; CGP-NEXT: v_rcp_iflag_f32_e32 v2, 0x4996c7d8
-; CGP-NEXT: s_mov_b32 s4, 0xffed2705
-; CGP-NEXT: v_mov_b32_e32 v3, 0x12d8fb
-; CGP-NEXT: v_rcp_iflag_f32_e32 v4, 0x4996c7d8
-; CGP-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2
-; CGP-NEXT: v_mul_f32_e32 v4, 0x4f7ffffe, v4
-; CGP-NEXT: v_cvt_u32_f32_e32 v2, v2
-; CGP-NEXT: v_cvt_u32_f32_e32 v4, v4
-; CGP-NEXT: v_mul_lo_u32 v5, s4, v2
-; CGP-NEXT: v_mul_lo_u32 v6, s4, v4
-; CGP-NEXT: v_mul_hi_u32 v5, v2, v5
-; CGP-NEXT: v_mul_hi_u32 v6, v4, v6
-; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v5
-; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v6
-; CGP-NEXT: v_mul_hi_u32 v2, v0, v2
-; CGP-NEXT: v_mul_hi_u32 v4, v1, v4
-; CGP-NEXT: v_mul_lo_u32 v5, v2, s8
-; CGP-NEXT: v_add_i32_e32 v6, vcc, 1, v2
-; CGP-NEXT: v_mul_lo_u32 v7, v4, s8
-; CGP-NEXT: v_add_i32_e32 v8, vcc, 1, v4
-; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v5
-; CGP-NEXT: v_sub_i32_e32 v1, vcc, v1, v7
-; CGP-NEXT: v_cmp_le_u32_e32 vcc, s8, v0
-; CGP-NEXT: v_cndmask_b32_e32 v2, v2, v6, vcc
-; CGP-NEXT: v_subrev_i32_e64 v5, s[4:5], s8, v0
-; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v1, v3
-; CGP-NEXT: v_cndmask_b32_e64 v4, v4, v8, s[4:5]
-; CGP-NEXT: v_sub_i32_e64 v6, s[6:7], v1, v3
-; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc
-; CGP-NEXT: v_add_i32_e32 v5, vcc, 1, v2
-; CGP-NEXT: v_cndmask_b32_e64 v1, v1, v6, s[4:5]
-; CGP-NEXT: v_add_i32_e32 v6, vcc, 1, v4
-; CGP-NEXT: v_cmp_le_u32_e32 vcc, s8, v0
-; CGP-NEXT: v_cndmask_b32_e32 v0, v2, v5, vcc
-; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3
-; CGP-NEXT: v_cndmask_b32_e32 v1, v4, v6, vcc
+; CGP-NEXT: s_mov_b32 s4, 0xb2a50881
+; CGP-NEXT: v_mul_hi_u32 v2, v0, s4
+; CGP-NEXT: v_mul_hi_u32 v3, v1, s4
+; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v2
+; CGP-NEXT: v_sub_i32_e32 v1, vcc, v1, v3
+; CGP-NEXT: v_lshrrev_b32_e32 v0, 1, v0
+; CGP-NEXT: v_lshrrev_b32_e32 v1, 1, v1
+; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v2
+; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v3
+; CGP-NEXT: v_lshrrev_b32_e32 v0, 20, v0
+; CGP-NEXT: v_lshrrev_b32_e32 v1, 20, v1
; CGP-NEXT: s_setpc_b64 s[30:31]
%result = udiv <2 x i32> %num, <i32 1235195, i32 1235195>
ret <2 x i32> %result
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i64.ll
index 2c95c717e34b6..dcf7cac58b982 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i64.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i64.ll
@@ -969,659 +969,78 @@ define i64 @v_udiv_i64_pow2k_denom(i64 %num) {
; CHECK-LABEL: v_udiv_i64_pow2k_denom:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CHECK-NEXT: v_cvt_f32_u32_e32 v2, 0x1000
-; CHECK-NEXT: v_cvt_f32_ubyte0_e32 v3, 0
-; CHECK-NEXT: s_movk_i32 s6, 0xf000
-; CHECK-NEXT: s_movk_i32 s7, 0x1000
-; CHECK-NEXT: s_bfe_i32 s4, -1, 0x10000
-; CHECK-NEXT: s_bfe_i32 s5, -1, 0x10000
-; CHECK-NEXT: v_mac_f32_e32 v2, 0x4f800000, v3
-; CHECK-NEXT: v_mov_b32_e32 v3, s4
-; CHECK-NEXT: v_mov_b32_e32 v4, s5
-; CHECK-NEXT: v_rcp_iflag_f32_e32 v2, v2
-; CHECK-NEXT: v_mul_f32_e32 v2, 0x5f7ffffc, v2
-; CHECK-NEXT: v_mul_f32_e32 v5, 0x2f800000, v2
-; CHECK-NEXT: v_trunc_f32_e32 v5, v5
-; CHECK-NEXT: v_mac_f32_e32 v2, 0xcf800000, v5
-; CHECK-NEXT: v_cvt_u32_f32_e32 v5, v5
-; CHECK-NEXT: v_cvt_u32_f32_e32 v2, v2
-; CHECK-NEXT: v_mul_lo_u32 v6, s6, v5
-; CHECK-NEXT: v_mul_lo_u32 v7, s6, v2
-; CHECK-NEXT: v_mul_lo_u32 v8, -1, v2
-; CHECK-NEXT: v_mul_hi_u32 v9, s6, v2
-; CHECK-NEXT: v_add_i32_e32 v6, vcc, v8, v6
-; CHECK-NEXT: v_mul_lo_u32 v8, v5, v7
-; CHECK-NEXT: v_mul_hi_u32 v10, v2, v7
-; CHECK-NEXT: v_mul_hi_u32 v7, v5, v7
-; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v9
-; CHECK-NEXT: v_mul_lo_u32 v9, v2, v6
-; CHECK-NEXT: v_mul_lo_u32 v11, v5, v6
-; CHECK-NEXT: v_mul_hi_u32 v12, v2, v6
-; CHECK-NEXT: v_mul_hi_u32 v6, v5, v6
-; CHECK-NEXT: v_add_i32_e32 v8, vcc, v8, v9
-; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
-; CHECK-NEXT: v_add_i32_e32 v7, vcc, v11, v7
-; CHECK-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
-; CHECK-NEXT: v_add_i32_e32 v8, vcc, v8, v10
-; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
-; CHECK-NEXT: v_add_i32_e32 v7, vcc, v7, v12
-; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
-; CHECK-NEXT: v_add_i32_e32 v8, vcc, v9, v8
-; CHECK-NEXT: v_add_i32_e32 v9, vcc, v11, v10
-; CHECK-NEXT: v_add_i32_e32 v7, vcc, v7, v8
-; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
-; CHECK-NEXT: v_add_i32_e32 v8, vcc, v9, v8
-; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v8
-; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v7
-; CHECK-NEXT: v_addc_u32_e64 v7, s[4:5], v5, v6, vcc
-; CHECK-NEXT: v_add_i32_e64 v5, s[4:5], v5, v6
-; CHECK-NEXT: v_mul_lo_u32 v6, s6, v2
-; CHECK-NEXT: v_mul_lo_u32 v8, -1, v2
-; CHECK-NEXT: v_mul_hi_u32 v9, s6, v2
-; CHECK-NEXT: v_mul_lo_u32 v10, s6, v7
-; CHECK-NEXT: v_mul_lo_u32 v11, v7, v6
-; CHECK-NEXT: v_mul_hi_u32 v12, v2, v6
-; CHECK-NEXT: v_mul_hi_u32 v6, v7, v6
-; CHECK-NEXT: v_add_i32_e64 v8, s[4:5], v8, v10
-; CHECK-NEXT: v_add_i32_e64 v8, s[4:5], v8, v9
-; CHECK-NEXT: v_mul_lo_u32 v9, v2, v8
-; CHECK-NEXT: v_mul_lo_u32 v10, v7, v8
-; CHECK-NEXT: v_mul_hi_u32 v13, v2, v8
-; CHECK-NEXT: v_mul_hi_u32 v7, v7, v8
-; CHECK-NEXT: v_add_i32_e64 v8, s[4:5], v11, v9
-; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, s[4:5]
-; CHECK-NEXT: v_add_i32_e64 v6, s[4:5], v10, v6
-; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, s[4:5]
-; CHECK-NEXT: v_add_i32_e64 v8, s[4:5], v8, v12
-; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, s[4:5]
-; CHECK-NEXT: v_add_i32_e64 v6, s[4:5], v6, v13
-; CHECK-NEXT: v_cndmask_b32_e64 v11, 0, 1, s[4:5]
-; CHECK-NEXT: v_add_i32_e64 v8, s[4:5], v9, v8
-; CHECK-NEXT: v_add_i32_e64 v9, s[4:5], v10, v11
-; CHECK-NEXT: v_add_i32_e64 v6, s[4:5], v6, v8
-; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, s[4:5]
-; CHECK-NEXT: v_add_i32_e64 v8, s[4:5], v9, v8
-; CHECK-NEXT: v_add_i32_e64 v7, s[4:5], v7, v8
-; CHECK-NEXT: v_addc_u32_e32 v5, vcc, v5, v7, vcc
-; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v6
-; CHECK-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc
-; CHECK-NEXT: v_mul_lo_u32 v6, v1, v2
-; CHECK-NEXT: v_mul_hi_u32 v7, v0, v2
-; CHECK-NEXT: v_mul_hi_u32 v2, v1, v2
-; CHECK-NEXT: v_mul_lo_u32 v8, v0, v5
-; CHECK-NEXT: v_mul_lo_u32 v9, v1, v5
-; CHECK-NEXT: v_mul_hi_u32 v10, v0, v5
-; CHECK-NEXT: v_mul_hi_u32 v5, v1, v5
-; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v8
-; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
-; CHECK-NEXT: v_add_i32_e32 v2, vcc, v9, v2
-; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
-; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v7
+; CHECK-NEXT: s_mov_b32 s4, 0x100000
+; CHECK-NEXT: v_lshlrev_b32_e32 v2, 20, v0
+; CHECK-NEXT: v_mul_hi_u32 v3, v0, 0
+; CHECK-NEXT: v_lshlrev_b32_e32 v4, 20, v1
+; CHECK-NEXT: v_mul_hi_u32 v5, v1, 0
+; CHECK-NEXT: v_add_i32_e32 v2, vcc, 0, v2
; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
-; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v10
-; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
-; CHECK-NEXT: v_add_i32_e32 v6, vcc, v8, v6
-; CHECK-NEXT: v_add_i32_e32 v7, vcc, v9, v7
-; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v6
-; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
-; CHECK-NEXT: v_add_i32_e32 v6, vcc, v7, v6
-; CHECK-NEXT: v_mul_lo_u32 v7, s7, v2
-; CHECK-NEXT: v_mul_lo_u32 v8, 0, v2
-; CHECK-NEXT: v_mul_hi_u32 v9, s7, v2
-; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v6
-; CHECK-NEXT: v_mul_lo_u32 v6, s7, v5
-; CHECK-NEXT: v_add_i32_e32 v10, vcc, 1, v2
-; CHECK-NEXT: v_addc_u32_e32 v11, vcc, 0, v5, vcc
-; CHECK-NEXT: v_add_i32_e32 v6, vcc, v8, v6
-; CHECK-NEXT: v_add_i32_e32 v8, vcc, 1, v10
-; CHECK-NEXT: v_addc_u32_e32 v12, vcc, 0, v11, vcc
-; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v9
-; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v7
-; CHECK-NEXT: v_subb_u32_e64 v7, s[4:5], v1, v6, vcc
-; CHECK-NEXT: v_sub_i32_e64 v1, s[4:5], v1, v6
-; CHECK-NEXT: v_cmp_le_u32_e64 s[4:5], s7, v0
-; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, -1, s[4:5]
-; CHECK-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v7
-; CHECK-NEXT: v_cndmask_b32_e64 v3, v3, v6, s[4:5]
-; CHECK-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
-; CHECK-NEXT: v_subrev_i32_e32 v0, vcc, s7, v0
-; CHECK-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
-; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s7, v0
-; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc
-; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
-; CHECK-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
-; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0
-; CHECK-NEXT: v_cndmask_b32_e32 v0, v10, v8, vcc
-; CHECK-NEXT: v_cndmask_b32_e32 v1, v11, v12, vcc
-; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v3
-; CHECK-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
-; CHECK-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
+; CHECK-NEXT: v_mul_hi_u32 v0, v0, s4
+; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v5
+; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
+; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v3
+; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
+; CHECK-NEXT: v_add_i32_e32 v0, vcc, v4, v0
+; CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc
+; CHECK-NEXT: v_add_i32_e32 v2, vcc, v6, v2
+; CHECK-NEXT: v_add_i32_e32 v3, vcc, v5, v3
+; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v2
+; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
+; CHECK-NEXT: v_add_i32_e32 v2, vcc, v3, v2
+; CHECK-NEXT: v_mul_hi_u32 v1, v1, s4
+; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v2
; CHECK-NEXT: s_setpc_b64 s[30:31]
%result = udiv i64 %num, 4096
ret i64 %result
}
define <2 x i64> @v_udiv_v2i64_pow2k_denom(<2 x i64> %num) {
-; GISEL-LABEL: v_udiv_v2i64_pow2k_denom:
-; GISEL: ; %bb.0:
-; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GISEL-NEXT: s_movk_i32 s12, 0x1000
-; GISEL-NEXT: v_cvt_f32_u32_e32 v4, s12
-; GISEL-NEXT: s_sub_u32 s8, 0, s12
-; GISEL-NEXT: s_cselect_b32 s4, 1, 0
-; GISEL-NEXT: v_cvt_f32_ubyte0_e32 v5, 0
-; GISEL-NEXT: v_mov_b32_e32 v6, v4
-; GISEL-NEXT: s_and_b32 s4, s4, 1
-; GISEL-NEXT: v_mac_f32_e32 v4, 0x4f800000, v5
-; GISEL-NEXT: v_mac_f32_e32 v6, 0x4f800000, v5
-; GISEL-NEXT: v_rcp_iflag_f32_e32 v4, v4
-; GISEL-NEXT: v_rcp_iflag_f32_e32 v5, v6
-; GISEL-NEXT: s_cmp_lg_u32 s4, 0
-; GISEL-NEXT: s_subb_u32 s9, 0, 0
-; GISEL-NEXT: s_bfe_i32 s10, -1, 0x10000
-; GISEL-NEXT: s_bfe_i32 s11, -1, 0x10000
-; GISEL-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4
-; GISEL-NEXT: v_mul_f32_e32 v5, 0x5f7ffffc, v5
-; GISEL-NEXT: v_mul_f32_e32 v6, 0x2f800000, v4
-; GISEL-NEXT: s_sub_u32 s13, 0, s12
-; GISEL-NEXT: s_cselect_b32 s4, 1, 0
-; GISEL-NEXT: v_mul_f32_e32 v7, 0x2f800000, v5
-; GISEL-NEXT: v_trunc_f32_e32 v6, v6
-; GISEL-NEXT: s_and_b32 s4, s4, 1
-; GISEL-NEXT: v_trunc_f32_e32 v7, v7
-; GISEL-NEXT: v_mac_f32_e32 v4, 0xcf800000, v6
-; GISEL-NEXT: v_cvt_u32_f32_e32 v6, v6
-; GISEL-NEXT: v_mac_f32_e32 v5, 0xcf800000, v7
-; GISEL-NEXT: v_cvt_u32_f32_e32 v7, v7
-; GISEL-NEXT: v_cvt_u32_f32_e32 v4, v4
-; GISEL-NEXT: s_cmp_lg_u32 s4, 0
-; GISEL-NEXT: s_subb_u32 s6, 0, 0
-; GISEL-NEXT: v_mul_lo_u32 v8, s13, v6
-; GISEL-NEXT: v_cvt_u32_f32_e32 v5, v5
-; GISEL-NEXT: v_mul_lo_u32 v9, s8, v7
-; GISEL-NEXT: v_mul_lo_u32 v10, s13, v4
-; GISEL-NEXT: v_mul_lo_u32 v11, s6, v4
-; GISEL-NEXT: v_mul_hi_u32 v12, s13, v4
-; GISEL-NEXT: v_mul_lo_u32 v13, s8, v5
-; GISEL-NEXT: v_mul_lo_u32 v14, s9, v5
-; GISEL-NEXT: v_mul_hi_u32 v15, s8, v5
-; GISEL-NEXT: v_add_i32_e32 v8, vcc, v11, v8
-; GISEL-NEXT: v_mul_lo_u32 v11, v6, v10
-; GISEL-NEXT: v_mul_hi_u32 v16, v4, v10
-; GISEL-NEXT: v_mul_hi_u32 v10, v6, v10
-; GISEL-NEXT: v_add_i32_e32 v9, vcc, v14, v9
-; GISEL-NEXT: v_mul_lo_u32 v14, v7, v13
-; GISEL-NEXT: v_mul_hi_u32 v17, v5, v13
-; GISEL-NEXT: v_mul_hi_u32 v13, v7, v13
-; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v12
-; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v15
-; GISEL-NEXT: v_mul_lo_u32 v12, v4, v8
-; GISEL-NEXT: v_mul_lo_u32 v15, v6, v8
-; GISEL-NEXT: v_mul_hi_u32 v18, v4, v8
-; GISEL-NEXT: v_mul_hi_u32 v8, v6, v8
-; GISEL-NEXT: v_mul_lo_u32 v19, v5, v9
-; GISEL-NEXT: v_add_i32_e32 v14, vcc, v14, v19
-; GISEL-NEXT: v_cndmask_b32_e64 v19, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v14, vcc, v14, v17
-; GISEL-NEXT: v_mul_lo_u32 v14, v7, v9
-; GISEL-NEXT: v_mul_hi_u32 v17, v5, v9
-; GISEL-NEXT: v_mul_hi_u32 v9, v7, v9
-; GISEL-NEXT: v_add_i32_e64 v11, s[4:5], v11, v12
-; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, s[4:5]
-; GISEL-NEXT: v_add_i32_e64 v10, s[4:5], v15, v10
-; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, s[4:5]
-; GISEL-NEXT: v_add_i32_e64 v13, s[4:5], v14, v13
-; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, s[4:5]
-; GISEL-NEXT: v_add_i32_e64 v11, s[4:5], v11, v16
-; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, s[4:5]
-; GISEL-NEXT: v_add_i32_e64 v10, s[4:5], v10, v18
-; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, s[4:5]
-; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v17
-; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v11, vcc, v12, v11
-; GISEL-NEXT: v_add_i32_e32 v12, vcc, v15, v16
-; GISEL-NEXT: v_add_i32_e32 v15, vcc, v19, v18
-; GISEL-NEXT: v_add_i32_e32 v14, vcc, v14, v17
-; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v11
-; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v15
-; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v11, vcc, v12, v11
-; GISEL-NEXT: v_add_i32_e32 v12, vcc, v14, v15
-; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v11
-; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v12
-; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v10
-; GISEL-NEXT: v_addc_u32_e64 v10, s[4:5], v6, v8, vcc
-; GISEL-NEXT: v_mul_lo_u32 v11, s13, v4
-; GISEL-NEXT: v_mul_lo_u32 v12, s6, v4
-; GISEL-NEXT: v_mul_hi_u32 v14, s13, v4
-; GISEL-NEXT: v_add_i32_e64 v5, s[4:5], v5, v13
-; GISEL-NEXT: v_addc_u32_e64 v13, s[6:7], v7, v9, s[4:5]
-; GISEL-NEXT: v_mul_lo_u32 v15, s8, v5
-; GISEL-NEXT: v_mul_lo_u32 v16, s9, v5
-; GISEL-NEXT: v_mul_hi_u32 v17, s8, v5
-; GISEL-NEXT: v_mul_lo_u32 v18, s8, v13
-; GISEL-NEXT: v_mul_lo_u32 v19, v13, v15
-; GISEL-NEXT: v_add_i32_e64 v16, s[6:7], v16, v18
-; GISEL-NEXT: v_mul_hi_u32 v18, v5, v15
-; GISEL-NEXT: v_add_i32_e64 v16, s[6:7], v16, v17
-; GISEL-NEXT: v_mul_lo_u32 v17, v5, v16
-; GISEL-NEXT: v_add_i32_e64 v17, s[6:7], v19, v17
-; GISEL-NEXT: v_cndmask_b32_e64 v19, 0, 1, s[6:7]
-; GISEL-NEXT: v_add_i32_e64 v17, s[6:7], v17, v18
-; GISEL-NEXT: v_mul_lo_u32 v17, s13, v10
-; GISEL-NEXT: v_mul_lo_u32 v18, v10, v11
-; GISEL-NEXT: v_add_i32_e64 v12, s[8:9], v12, v17
-; GISEL-NEXT: v_mul_hi_u32 v17, v4, v11
-; GISEL-NEXT: v_add_i32_e64 v12, s[8:9], v12, v14
-; GISEL-NEXT: v_mul_lo_u32 v14, v4, v12
-; GISEL-NEXT: v_add_i32_e64 v14, s[8:9], v18, v14
-; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, s[8:9]
-; GISEL-NEXT: v_add_i32_e64 v14, s[8:9], v14, v17
-; GISEL-NEXT: v_mov_b32_e32 v14, s10
-; GISEL-NEXT: v_mov_b32_e32 v17, s11
-; GISEL-NEXT: s_bfe_i32 s13, -1, 0x10000
-; GISEL-NEXT: s_bfe_i32 s14, -1, 0x10000
-; GISEL-NEXT: v_add_i32_e64 v6, s[10:11], v6, v8
-; GISEL-NEXT: v_mov_b32_e32 v8, s13
-; GISEL-NEXT: v_add_i32_e64 v7, s[10:11], v7, v9
-; GISEL-NEXT: v_mul_hi_u32 v9, v10, v11
-; GISEL-NEXT: v_mul_hi_u32 v11, v13, v15
-; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, s[8:9]
-; GISEL-NEXT: v_add_i32_e64 v15, s[8:9], v18, v15
-; GISEL-NEXT: v_mul_lo_u32 v18, v10, v12
-; GISEL-NEXT: v_mul_hi_u32 v10, v10, v12
-; GISEL-NEXT: v_mul_hi_u32 v12, v4, v12
-; GISEL-NEXT: v_add_i32_e64 v9, s[8:9], v18, v9
-; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, s[8:9]
-; GISEL-NEXT: v_add_i32_e64 v9, s[8:9], v9, v12
-; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, s[8:9]
-; GISEL-NEXT: v_add_i32_e64 v12, s[8:9], v18, v12
-; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, s[6:7]
-; GISEL-NEXT: v_add_i32_e64 v18, s[6:7], v19, v18
-; GISEL-NEXT: v_mul_lo_u32 v19, v13, v16
-; GISEL-NEXT: v_mul_hi_u32 v13, v13, v16
-; GISEL-NEXT: v_mul_hi_u32 v16, v5, v16
-; GISEL-NEXT: v_add_i32_e64 v11, s[6:7], v19, v11
-; GISEL-NEXT: v_cndmask_b32_e64 v19, 0, 1, s[6:7]
-; GISEL-NEXT: v_add_i32_e64 v11, s[6:7], v11, v16
-; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, s[6:7]
-; GISEL-NEXT: v_add_i32_e64 v16, s[6:7], v19, v16
-; GISEL-NEXT: v_mov_b32_e32 v19, s14
-; GISEL-NEXT: v_add_i32_e64 v9, s[6:7], v9, v15
-; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, s[6:7]
-; GISEL-NEXT: v_add_i32_e64 v11, s[6:7], v11, v18
-; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, s[6:7]
-; GISEL-NEXT: v_add_i32_e64 v12, s[6:7], v12, v15
-; GISEL-NEXT: v_add_i32_e64 v15, s[6:7], v16, v18
-; GISEL-NEXT: v_add_i32_e64 v10, s[6:7], v10, v12
-; GISEL-NEXT: v_add_i32_e64 v12, s[6:7], v13, v15
-; GISEL-NEXT: v_addc_u32_e32 v6, vcc, v6, v10, vcc
-; GISEL-NEXT: v_addc_u32_e64 v7, vcc, v7, v12, s[4:5]
-; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v9
-; GISEL-NEXT: v_addc_u32_e32 v6, vcc, 0, v6, vcc
-; GISEL-NEXT: v_mul_lo_u32 v9, v3, v4
-; GISEL-NEXT: v_mul_hi_u32 v10, v2, v4
-; GISEL-NEXT: v_mul_hi_u32 v4, v3, v4
-; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v11
-; GISEL-NEXT: v_addc_u32_e32 v7, vcc, 0, v7, vcc
-; GISEL-NEXT: v_mul_lo_u32 v11, v1, v5
-; GISEL-NEXT: v_mul_hi_u32 v12, v0, v5
-; GISEL-NEXT: v_mul_hi_u32 v5, v1, v5
-; GISEL-NEXT: v_mul_lo_u32 v13, v2, v6
-; GISEL-NEXT: v_mul_lo_u32 v15, v3, v6
-; GISEL-NEXT: v_mul_hi_u32 v16, v2, v6
-; GISEL-NEXT: v_mul_hi_u32 v6, v3, v6
-; GISEL-NEXT: v_mul_lo_u32 v18, v0, v7
-; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v18
-; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v12
-; GISEL-NEXT: v_mul_lo_u32 v11, v1, v7
-; GISEL-NEXT: v_mul_hi_u32 v12, v0, v7
-; GISEL-NEXT: v_mul_hi_u32 v7, v1, v7
-; GISEL-NEXT: v_add_i32_e64 v9, s[4:5], v9, v13
-; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, s[4:5]
-; GISEL-NEXT: v_add_i32_e64 v4, s[4:5], v15, v4
-; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, s[4:5]
-; GISEL-NEXT: v_add_i32_e64 v5, s[4:5], v11, v5
-; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, s[4:5]
-; GISEL-NEXT: v_add_i32_e64 v9, s[4:5], v9, v10
-; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, s[4:5]
-; GISEL-NEXT: v_add_i32_e64 v4, s[4:5], v4, v16
-; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, s[4:5]
-; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v12
-; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v9, vcc, v13, v9
-; GISEL-NEXT: v_add_i32_e32 v10, vcc, v15, v10
-; GISEL-NEXT: v_add_i32_e32 v13, vcc, v18, v16
-; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v12
-; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v9
-; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v13
-; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v9, vcc, v10, v9
-; GISEL-NEXT: v_mul_lo_u32 v10, s12, v4
-; GISEL-NEXT: v_mul_lo_u32 v13, 0, v4
-; GISEL-NEXT: v_mul_hi_u32 v15, s12, v4
-; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v12
-; GISEL-NEXT: v_mul_lo_u32 v12, s12, v5
-; GISEL-NEXT: v_mul_lo_u32 v16, 0, v5
-; GISEL-NEXT: v_mul_hi_u32 v18, s12, v5
-; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v9
-; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v11
-; GISEL-NEXT: v_mul_lo_u32 v9, s12, v6
-; GISEL-NEXT: v_mul_lo_u32 v11, s12, v7
-; GISEL-NEXT: v_add_i32_e32 v9, vcc, v13, v9
-; GISEL-NEXT: v_add_i32_e32 v11, vcc, v16, v11
-; GISEL-NEXT: v_add_i32_e32 v13, vcc, 1, v4
-; GISEL-NEXT: v_addc_u32_e32 v16, vcc, 0, v6, vcc
-; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v15
-; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v18
-; GISEL-NEXT: v_add_i32_e32 v15, vcc, 1, v5
-; GISEL-NEXT: v_addc_u32_e32 v18, vcc, 0, v7, vcc
-; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v2, v10
-; GISEL-NEXT: v_subb_u32_e64 v10, s[4:5], v3, v9, vcc
-; GISEL-NEXT: v_sub_i32_e64 v3, s[4:5], v3, v9
-; GISEL-NEXT: v_cmp_le_u32_e64 s[4:5], s12, v2
-; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, -1, s[4:5]
-; GISEL-NEXT: v_sub_i32_e64 v0, s[4:5], v0, v12
-; GISEL-NEXT: v_subb_u32_e64 v12, s[6:7], v1, v11, s[4:5]
-; GISEL-NEXT: v_sub_i32_e64 v1, s[6:7], v1, v11
-; GISEL-NEXT: v_cmp_eq_u32_e64 s[6:7], 0, v10
-; GISEL-NEXT: v_add_i32_e64 v10, s[8:9], 1, v13
-; GISEL-NEXT: v_addc_u32_e64 v11, s[8:9], 0, v16, s[8:9]
-; GISEL-NEXT: v_cndmask_b32_e64 v8, v8, v9, s[6:7]
-; GISEL-NEXT: v_cmp_le_u32_e64 s[6:7], s12, v0
-; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, -1, s[6:7]
-; GISEL-NEXT: v_cmp_eq_u32_e64 s[6:7], 0, v12
-; GISEL-NEXT: v_cndmask_b32_e64 v9, v14, v9, s[6:7]
-; GISEL-NEXT: v_add_i32_e64 v12, s[6:7], 1, v15
-; GISEL-NEXT: v_addc_u32_e64 v14, s[6:7], 0, v18, s[6:7]
-; GISEL-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc
-; GISEL-NEXT: v_subbrev_u32_e64 v1, vcc, 0, v1, s[4:5]
-; GISEL-NEXT: v_subrev_i32_e32 v2, vcc, s12, v2
-; GISEL-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc
-; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s12, v2
-; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc
-; GISEL-NEXT: v_subrev_i32_e32 v0, vcc, s12, v0
-; GISEL-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
-; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s12, v0
-; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc
-; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3
-; GISEL-NEXT: v_cndmask_b32_e32 v2, v19, v2, vcc
-; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
-; GISEL-NEXT: v_cndmask_b32_e32 v0, v17, v0, vcc
-; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2
-; GISEL-NEXT: v_cndmask_b32_e32 v1, v13, v10, vcc
-; GISEL-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v0
-; GISEL-NEXT: v_cndmask_b32_e64 v0, v15, v12, s[4:5]
-; GISEL-NEXT: v_cndmask_b32_e32 v3, v16, v11, vcc
-; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v8
-; GISEL-NEXT: v_cndmask_b32_e32 v2, v4, v1, vcc
-; GISEL-NEXT: v_cndmask_b32_e64 v1, v18, v14, s[4:5]
-; GISEL-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v9
-; GISEL-NEXT: v_cndmask_b32_e64 v0, v5, v0, s[4:5]
-; GISEL-NEXT: v_cndmask_b32_e64 v1, v7, v1, s[4:5]
-; GISEL-NEXT: v_cndmask_b32_e32 v3, v6, v3, vcc
-; GISEL-NEXT: s_setpc_b64 s[30:31]
-;
-; CGP-LABEL: v_udiv_v2i64_pow2k_denom:
-; CGP: ; %bb.0:
-; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CGP-NEXT: v_cvt_f32_u32_e32 v4, 0x1000
-; CGP-NEXT: v_cvt_f32_ubyte0_e32 v5, 0
-; CGP-NEXT: s_movk_i32 s8, 0xf000
-; CGP-NEXT: s_movk_i32 s12, 0x1000
-; CGP-NEXT: s_bfe_i32 s10, -1, 0x10000
-; CGP-NEXT: s_bfe_i32 s11, -1, 0x10000
-; CGP-NEXT: s_bfe_i32 s13, -1, 0x10000
-; CGP-NEXT: s_bfe_i32 s14, -1, 0x10000
-; CGP-NEXT: v_mov_b32_e32 v6, v4
-; CGP-NEXT: v_mac_f32_e32 v4, 0x4f800000, v5
-; CGP-NEXT: v_mac_f32_e32 v6, 0x4f800000, v5
-; CGP-NEXT: v_rcp_iflag_f32_e32 v4, v4
-; CGP-NEXT: v_rcp_iflag_f32_e32 v5, v6
-; CGP-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4
-; CGP-NEXT: v_mul_f32_e32 v5, 0x5f7ffffc, v5
-; CGP-NEXT: v_mul_f32_e32 v6, 0x2f800000, v4
-; CGP-NEXT: v_mul_f32_e32 v7, 0x2f800000, v5
-; CGP-NEXT: v_trunc_f32_e32 v6, v6
-; CGP-NEXT: v_trunc_f32_e32 v7, v7
-; CGP-NEXT: v_mac_f32_e32 v4, 0xcf800000, v6
-; CGP-NEXT: v_cvt_u32_f32_e32 v6, v6
-; CGP-NEXT: v_mac_f32_e32 v5, 0xcf800000, v7
-; CGP-NEXT: v_cvt_u32_f32_e32 v7, v7
-; CGP-NEXT: v_cvt_u32_f32_e32 v4, v4
-; CGP-NEXT: v_mul_lo_u32 v8, s8, v6
-; CGP-NEXT: v_cvt_u32_f32_e32 v5, v5
-; CGP-NEXT: v_mul_lo_u32 v9, s8, v7
-; CGP-NEXT: v_mul_lo_u32 v10, s8, v4
-; CGP-NEXT: v_mul_lo_u32 v11, -1, v4
-; CGP-NEXT: v_mul_hi_u32 v12, s8, v4
-; CGP-NEXT: v_mul_lo_u32 v13, s8, v5
-; CGP-NEXT: v_mul_lo_u32 v14, -1, v5
-; CGP-NEXT: v_mul_hi_u32 v15, s8, v5
-; CGP-NEXT: v_add_i32_e32 v8, vcc, v11, v8
-; CGP-NEXT: v_mul_lo_u32 v11, v6, v10
-; CGP-NEXT: v_mul_hi_u32 v16, v4, v10
-; CGP-NEXT: v_mul_hi_u32 v10, v6, v10
-; CGP-NEXT: v_add_i32_e32 v9, vcc, v14, v9
-; CGP-NEXT: v_mul_lo_u32 v14, v7, v13
-; CGP-NEXT: v_mul_hi_u32 v17, v5, v13
-; CGP-NEXT: v_mul_hi_u32 v13, v7, v13
-; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v12
-; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v15
-; CGP-NEXT: v_mul_lo_u32 v12, v4, v8
-; CGP-NEXT: v_mul_lo_u32 v15, v6, v8
-; CGP-NEXT: v_mul_hi_u32 v18, v4, v8
-; CGP-NEXT: v_mul_hi_u32 v8, v6, v8
-; CGP-NEXT: v_mul_lo_u32 v19, v5, v9
-; CGP-NEXT: v_add_i32_e32 v14, vcc, v14, v19
-; CGP-NEXT: v_cndmask_b32_e64 v19, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v14, vcc, v14, v17
-; CGP-NEXT: v_mul_lo_u32 v14, v7, v9
-; CGP-NEXT: v_mul_hi_u32 v17, v5, v9
-; CGP-NEXT: v_mul_hi_u32 v9, v7, v9
-; CGP-NEXT: v_add_i32_e64 v11, s[4:5], v11, v12
-; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, s[4:5]
-; CGP-NEXT: v_add_i32_e64 v10, s[4:5], v15, v10
-; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, s[4:5]
-; CGP-NEXT: v_add_i32_e64 v13, s[4:5], v14, v13
-; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, s[4:5]
-; CGP-NEXT: v_add_i32_e64 v11, s[4:5], v11, v16
-; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, s[4:5]
-; CGP-NEXT: v_add_i32_e64 v10, s[4:5], v10, v18
-; CGP-NEXT: v_cndmask_b32_e64 v16, 0, 1, s[4:5]
-; CGP-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v13, vcc, v13, v17
-; CGP-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v11, vcc, v12, v11
-; CGP-NEXT: v_add_i32_e32 v12, vcc, v15, v16
-; CGP-NEXT: v_add_i32_e32 v15, vcc, v19, v18
-; CGP-NEXT: v_add_i32_e32 v14, vcc, v14, v17
-; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v11
-; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v13, vcc, v13, v15
-; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v11, vcc, v12, v11
-; CGP-NEXT: v_add_i32_e32 v12, vcc, v14, v15
-; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v11
-; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v12
-; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v10
-; CGP-NEXT: v_addc_u32_e64 v10, s[4:5], v6, v8, vcc
-; CGP-NEXT: v_mul_lo_u32 v11, s8, v4
-; CGP-NEXT: v_mul_lo_u32 v12, -1, v4
-; CGP-NEXT: v_mul_hi_u32 v14, s8, v4
-; CGP-NEXT: v_add_i32_e64 v5, s[4:5], v5, v13
-; CGP-NEXT: v_addc_u32_e64 v13, s[6:7], v7, v9, s[4:5]
-; CGP-NEXT: v_mul_lo_u32 v15, s8, v5
-; CGP-NEXT: v_mul_lo_u32 v16, -1, v5
-; CGP-NEXT: v_mul_hi_u32 v17, s8, v5
-; CGP-NEXT: v_mul_lo_u32 v18, s8, v13
-; CGP-NEXT: v_mul_lo_u32 v19, v13, v15
-; CGP-NEXT: v_add_i32_e64 v16, s[6:7], v16, v18
-; CGP-NEXT: v_mul_hi_u32 v18, v5, v15
-; CGP-NEXT: v_add_i32_e64 v16, s[6:7], v16, v17
-; CGP-NEXT: v_mul_lo_u32 v17, v5, v16
-; CGP-NEXT: v_add_i32_e64 v17, s[6:7], v19, v17
-; CGP-NEXT: v_cndmask_b32_e64 v19, 0, 1, s[6:7]
-; CGP-NEXT: v_add_i32_e64 v17, s[6:7], v17, v18
-; CGP-NEXT: v_mul_lo_u32 v17, s8, v10
-; CGP-NEXT: v_mul_lo_u32 v18, v10, v11
-; CGP-NEXT: v_add_i32_e64 v12, s[8:9], v12, v17
-; CGP-NEXT: v_mul_hi_u32 v17, v4, v11
-; CGP-NEXT: v_add_i32_e64 v12, s[8:9], v12, v14
-; CGP-NEXT: v_mul_lo_u32 v14, v4, v12
-; CGP-NEXT: v_add_i32_e64 v14, s[8:9], v18, v14
-; CGP-NEXT: v_cndmask_b32_e64 v18, 0, 1, s[8:9]
-; CGP-NEXT: v_add_i32_e64 v14, s[8:9], v14, v17
-; CGP-NEXT: v_mov_b32_e32 v14, s10
-; CGP-NEXT: v_mov_b32_e32 v17, s11
-; CGP-NEXT: v_add_i32_e64 v6, s[10:11], v6, v8
-; CGP-NEXT: v_mov_b32_e32 v8, s13
-; CGP-NEXT: v_add_i32_e64 v7, s[10:11], v7, v9
-; CGP-NEXT: v_mul_hi_u32 v9, v10, v11
-; CGP-NEXT: v_mul_hi_u32 v11, v13, v15
-; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, s[8:9]
-; CGP-NEXT: v_add_i32_e64 v15, s[8:9], v18, v15
-; CGP-NEXT: v_mul_lo_u32 v18, v10, v12
-; CGP-NEXT: v_mul_hi_u32 v10, v10, v12
-; CGP-NEXT: v_mul_hi_u32 v12, v4, v12
-; CGP-NEXT: v_add_i32_e64 v9, s[8:9], v18, v9
-; CGP-NEXT: v_cndmask_b32_e64 v18, 0, 1, s[8:9]
-; CGP-NEXT: v_add_i32_e64 v9, s[8:9], v9, v12
-; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, s[8:9]
-; CGP-NEXT: v_add_i32_e64 v12, s[8:9], v18, v12
-; CGP-NEXT: v_cndmask_b32_e64 v18, 0, 1, s[6:7]
-; CGP-NEXT: v_add_i32_e64 v18, s[6:7], v19, v18
-; CGP-NEXT: v_mul_lo_u32 v19, v13, v16
-; CGP-NEXT: v_mul_hi_u32 v13, v13, v16
-; CGP-NEXT: v_mul_hi_u32 v16, v5, v16
-; CGP-NEXT: v_add_i32_e64 v11, s[6:7], v19, v11
-; CGP-NEXT: v_cndmask_b32_e64 v19, 0, 1, s[6:7]
-; CGP-NEXT: v_add_i32_e64 v11, s[6:7], v11, v16
-; CGP-NEXT: v_cndmask_b32_e64 v16, 0, 1, s[6:7]
-; CGP-NEXT: v_add_i32_e64 v16, s[6:7], v19, v16
-; CGP-NEXT: v_mov_b32_e32 v19, s14
-; CGP-NEXT: v_add_i32_e64 v9, s[6:7], v9, v15
-; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, s[6:7]
-; CGP-NEXT: v_add_i32_e64 v11, s[6:7], v11, v18
-; CGP-NEXT: v_cndmask_b32_e64 v18, 0, 1, s[6:7]
-; CGP-NEXT: v_add_i32_e64 v12, s[6:7], v12, v15
-; CGP-NEXT: v_add_i32_e64 v15, s[6:7], v16, v18
-; CGP-NEXT: v_add_i32_e64 v10, s[6:7], v10, v12
-; CGP-NEXT: v_add_i32_e64 v12, s[6:7], v13, v15
-; CGP-NEXT: v_addc_u32_e32 v6, vcc, v6, v10, vcc
-; CGP-NEXT: v_addc_u32_e64 v7, vcc, v7, v12, s[4:5]
-; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v9
-; CGP-NEXT: v_addc_u32_e32 v6, vcc, 0, v6, vcc
-; CGP-NEXT: v_mul_lo_u32 v9, v3, v4
-; CGP-NEXT: v_mul_hi_u32 v10, v2, v4
-; CGP-NEXT: v_mul_hi_u32 v4, v3, v4
-; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v11
-; CGP-NEXT: v_addc_u32_e32 v7, vcc, 0, v7, vcc
-; CGP-NEXT: v_mul_lo_u32 v11, v1, v5
-; CGP-NEXT: v_mul_hi_u32 v12, v0, v5
-; CGP-NEXT: v_mul_hi_u32 v5, v1, v5
-; CGP-NEXT: v_mul_lo_u32 v13, v2, v6
-; CGP-NEXT: v_mul_lo_u32 v15, v3, v6
-; CGP-NEXT: v_mul_hi_u32 v16, v2, v6
-; CGP-NEXT: v_mul_hi_u32 v6, v3, v6
-; CGP-NEXT: v_mul_lo_u32 v18, v0, v7
-; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v18
-; CGP-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v12
-; CGP-NEXT: v_mul_lo_u32 v11, v1, v7
-; CGP-NEXT: v_mul_hi_u32 v12, v0, v7
-; CGP-NEXT: v_mul_hi_u32 v7, v1, v7
-; CGP-NEXT: v_add_i32_e64 v9, s[4:5], v9, v13
-; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, s[4:5]
-; CGP-NEXT: v_add_i32_e64 v4, s[4:5], v15, v4
-; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, s[4:5]
-; CGP-NEXT: v_add_i32_e64 v5, s[4:5], v11, v5
-; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, s[4:5]
-; CGP-NEXT: v_add_i32_e64 v9, s[4:5], v9, v10
-; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, s[4:5]
-; CGP-NEXT: v_add_i32_e64 v4, s[4:5], v4, v16
-; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, s[4:5]
-; CGP-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v12
-; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v9, vcc, v13, v9
-; CGP-NEXT: v_add_i32_e32 v10, vcc, v15, v10
-; CGP-NEXT: v_add_i32_e32 v13, vcc, v18, v16
-; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v12
-; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v9
-; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v13
-; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v9, vcc, v10, v9
-; CGP-NEXT: v_mul_lo_u32 v10, s12, v4
-; CGP-NEXT: v_mul_lo_u32 v13, 0, v4
-; CGP-NEXT: v_mul_hi_u32 v15, s12, v4
-; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v12
-; CGP-NEXT: v_mul_lo_u32 v12, s12, v5
-; CGP-NEXT: v_mul_lo_u32 v16, 0, v5
-; CGP-NEXT: v_mul_hi_u32 v18, s12, v5
-; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v9
-; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v11
-; CGP-NEXT: v_mul_lo_u32 v9, s12, v6
-; CGP-NEXT: v_mul_lo_u32 v11, s12, v7
-; CGP-NEXT: v_add_i32_e32 v9, vcc, v13, v9
-; CGP-NEXT: v_add_i32_e32 v11, vcc, v16, v11
-; CGP-NEXT: v_add_i32_e32 v13, vcc, 1, v4
-; CGP-NEXT: v_addc_u32_e32 v16, vcc, 0, v6, vcc
-; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v15
-; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v18
-; CGP-NEXT: v_add_i32_e32 v15, vcc, 1, v5
-; CGP-NEXT: v_addc_u32_e32 v18, vcc, 0, v7, vcc
-; CGP-NEXT: v_sub_i32_e32 v2, vcc, v2, v10
-; CGP-NEXT: v_subb_u32_e64 v10, s[4:5], v3, v9, vcc
-; CGP-NEXT: v_sub_i32_e64 v3, s[4:5], v3, v9
-; CGP-NEXT: v_cmp_le_u32_e64 s[4:5], s12, v2
-; CGP-NEXT: v_cndmask_b32_e64 v9, 0, -1, s[4:5]
-; CGP-NEXT: v_sub_i32_e64 v0, s[4:5], v0, v12
-; CGP-NEXT: v_subb_u32_e64 v12, s[6:7], v1, v11, s[4:5]
-; CGP-NEXT: v_sub_i32_e64 v1, s[6:7], v1, v11
-; CGP-NEXT: v_cmp_eq_u32_e64 s[6:7], 0, v10
-; CGP-NEXT: v_add_i32_e64 v10, s[8:9], 1, v13
-; CGP-NEXT: v_addc_u32_e64 v11, s[8:9], 0, v16, s[8:9]
-; CGP-NEXT: v_cndmask_b32_e64 v8, v8, v9, s[6:7]
-; CGP-NEXT: v_cmp_le_u32_e64 s[6:7], s12, v0
-; CGP-NEXT: v_cndmask_b32_e64 v9, 0, -1, s[6:7]
-; CGP-NEXT: v_cmp_eq_u32_e64 s[6:7], 0, v12
-; CGP-NEXT: v_cndmask_b32_e64 v9, v14, v9, s[6:7]
-; CGP-NEXT: v_add_i32_e64 v12, s[6:7], 1, v15
-; CGP-NEXT: v_addc_u32_e64 v14, s[6:7], 0, v18, s[6:7]
-; CGP-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc
-; CGP-NEXT: v_subbrev_u32_e64 v1, vcc, 0, v1, s[4:5]
-; CGP-NEXT: v_subrev_i32_e32 v2, vcc, s12, v2
-; CGP-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc
-; CGP-NEXT: v_cmp_le_u32_e32 vcc, s12, v2
-; CGP-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc
-; CGP-NEXT: v_subrev_i32_e32 v0, vcc, s12, v0
-; CGP-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
-; CGP-NEXT: v_cmp_le_u32_e32 vcc, s12, v0
-; CGP-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc
-; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3
-; CGP-NEXT: v_cndmask_b32_e32 v2, v19, v2, vcc
-; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
-; CGP-NEXT: v_cndmask_b32_e32 v0, v17, v0, vcc
-; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2
-; CGP-NEXT: v_cndmask_b32_e32 v1, v13, v10, vcc
-; CGP-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v0
-; CGP-NEXT: v_cndmask_b32_e64 v0, v15, v12, s[4:5]
-; CGP-NEXT: v_cndmask_b32_e32 v3, v16, v11, vcc
-; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v8
-; CGP-NEXT: v_cndmask_b32_e32 v2, v4, v1, vcc
-; CGP-NEXT: v_cndmask_b32_e64 v1, v18, v14, s[4:5]
-; CGP-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v9
-; CGP-NEXT: v_cndmask_b32_e64 v0, v5, v0, s[4:5]
-; CGP-NEXT: v_cndmask_b32_e64 v1, v7, v1, s[4:5]
-; CGP-NEXT: v_cndmask_b32_e32 v3, v6, v3, vcc
-; CGP-NEXT: s_setpc_b64 s[30:31]
+; CHECK-LABEL: v_udiv_v2i64_pow2k_denom:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT: s_mov_b32 s4, 0x100000
+; CHECK-NEXT: v_lshlrev_b32_e32 v4, 20, v0
+; CHECK-NEXT: v_mul_hi_u32 v5, v0, 0
+; CHECK-NEXT: v_lshlrev_b32_e32 v6, 20, v1
+; CHECK-NEXT: v_mul_hi_u32 v7, v1, 0
+; CHECK-NEXT: v_lshlrev_b32_e32 v8, 20, v2
+; CHECK-NEXT: v_mul_hi_u32 v9, v2, 0
+; CHECK-NEXT: v_lshlrev_b32_e32 v10, 20, v3
+; CHECK-NEXT: v_mul_hi_u32 v11, v3, 0
+; CHECK-NEXT: v_add_i32_e32 v4, vcc, 0, v4
+; CHECK-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc
+; CHECK-NEXT: v_mul_hi_u32 v0, v0, s4
+; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v7
+; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
+; CHECK-NEXT: v_mul_hi_u32 v1, v1, s4
+; CHECK-NEXT: v_add_i32_e32 v8, vcc, 0, v8
+; CHECK-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc
+; CHECK-NEXT: v_mul_hi_u32 v2, v2, s4
+; CHECK-NEXT: v_add_i32_e32 v10, vcc, v10, v11
+; CHECK-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
+; CHECK-NEXT: v_mul_hi_u32 v3, v3, s4
+; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v5
+; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
+; CHECK-NEXT: v_add_i32_e32 v0, vcc, v6, v0
+; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
+; CHECK-NEXT: v_add_i32_e32 v6, vcc, v8, v9
+; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
+; CHECK-NEXT: v_add_i32_e32 v2, vcc, v10, v2
+; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
+; CHECK-NEXT: v_add_i32_e32 v4, vcc, v12, v4
+; CHECK-NEXT: v_add_i32_e32 v5, vcc, v7, v5
+; CHECK-NEXT: v_add_i32_e32 v6, vcc, v13, v6
+; CHECK-NEXT: v_add_i32_e32 v7, vcc, v11, v8
+; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v4
+; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
+; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v6
+; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
+; CHECK-NEXT: v_add_i32_e32 v4, vcc, v5, v4
+; CHECK-NEXT: v_add_i32_e32 v5, vcc, v7, v6
+; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v4
+; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v5
+; CHECK-NEXT: s_setpc_b64 s[30:31]
%result = udiv <2 x i64> %num, <i64 4096, i64 4096>
ret <2 x i64> %result
}
@@ -1630,659 +1049,86 @@ define i64 @v_udiv_i64_oddk_denom(i64 %num) {
; CHECK-LABEL: v_udiv_i64_oddk_denom:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CHECK-NEXT: v_cvt_f32_u32_e32 v2, 0x12d8fb
-; CHECK-NEXT: v_cvt_f32_ubyte0_e32 v3, 0
-; CHECK-NEXT: s_mov_b32 s6, 0xffed2705
-; CHECK-NEXT: s_mov_b32 s7, 0x12d8fb
-; CHECK-NEXT: s_bfe_i32 s4, -1, 0x10000
-; CHECK-NEXT: s_bfe_i32 s5, -1, 0x10000
-; CHECK-NEXT: v_mac_f32_e32 v2, 0x4f800000, v3
-; CHECK-NEXT: v_mov_b32_e32 v3, s4
-; CHECK-NEXT: v_mov_b32_e32 v4, s5
-; CHECK-NEXT: v_rcp_iflag_f32_e32 v2, v2
-; CHECK-NEXT: v_mul_f32_e32 v2, 0x5f7ffffc, v2
-; CHECK-NEXT: v_mul_f32_e32 v5, 0x2f800000, v2
-; CHECK-NEXT: v_trunc_f32_e32 v5, v5
-; CHECK-NEXT: v_mac_f32_e32 v2, 0xcf800000, v5
-; CHECK-NEXT: v_cvt_u32_f32_e32 v5, v5
-; CHECK-NEXT: v_cvt_u32_f32_e32 v2, v2
-; CHECK-NEXT: v_mul_lo_u32 v6, s6, v5
-; CHECK-NEXT: v_mul_lo_u32 v7, s6, v2
-; CHECK-NEXT: v_mul_lo_u32 v8, -1, v2
-; CHECK-NEXT: v_mul_hi_u32 v9, s6, v2
-; CHECK-NEXT: v_add_i32_e32 v6, vcc, v8, v6
-; CHECK-NEXT: v_mul_lo_u32 v8, v5, v7
-; CHECK-NEXT: v_mul_hi_u32 v10, v2, v7
-; CHECK-NEXT: v_mul_hi_u32 v7, v5, v7
-; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v9
-; CHECK-NEXT: v_mul_lo_u32 v9, v2, v6
-; CHECK-NEXT: v_mul_lo_u32 v11, v5, v6
-; CHECK-NEXT: v_mul_hi_u32 v12, v2, v6
-; CHECK-NEXT: v_mul_hi_u32 v6, v5, v6
-; CHECK-NEXT: v_add_i32_e32 v8, vcc, v8, v9
-; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
-; CHECK-NEXT: v_add_i32_e32 v7, vcc, v11, v7
-; CHECK-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
-; CHECK-NEXT: v_add_i32_e32 v8, vcc, v8, v10
-; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
-; CHECK-NEXT: v_add_i32_e32 v7, vcc, v7, v12
-; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
-; CHECK-NEXT: v_add_i32_e32 v8, vcc, v9, v8
-; CHECK-NEXT: v_add_i32_e32 v9, vcc, v11, v10
-; CHECK-NEXT: v_add_i32_e32 v7, vcc, v7, v8
-; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
-; CHECK-NEXT: v_add_i32_e32 v8, vcc, v9, v8
-; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v8
-; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v7
-; CHECK-NEXT: v_addc_u32_e64 v7, s[4:5], v5, v6, vcc
-; CHECK-NEXT: v_add_i32_e64 v5, s[4:5], v5, v6
-; CHECK-NEXT: v_mul_lo_u32 v6, s6, v2
-; CHECK-NEXT: v_mul_lo_u32 v8, -1, v2
-; CHECK-NEXT: v_mul_hi_u32 v9, s6, v2
-; CHECK-NEXT: v_mul_lo_u32 v10, s6, v7
-; CHECK-NEXT: v_mul_lo_u32 v11, v7, v6
-; CHECK-NEXT: v_mul_hi_u32 v12, v2, v6
-; CHECK-NEXT: v_mul_hi_u32 v6, v7, v6
-; CHECK-NEXT: v_add_i32_e64 v8, s[4:5], v8, v10
-; CHECK-NEXT: v_add_i32_e64 v8, s[4:5], v8, v9
-; CHECK-NEXT: v_mul_lo_u32 v9, v2, v8
-; CHECK-NEXT: v_mul_lo_u32 v10, v7, v8
-; CHECK-NEXT: v_mul_hi_u32 v13, v2, v8
-; CHECK-NEXT: v_mul_hi_u32 v7, v7, v8
-; CHECK-NEXT: v_add_i32_e64 v8, s[4:5], v11, v9
-; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, s[4:5]
-; CHECK-NEXT: v_add_i32_e64 v6, s[4:5], v10, v6
-; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, s[4:5]
-; CHECK-NEXT: v_add_i32_e64 v8, s[4:5], v8, v12
-; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, s[4:5]
-; CHECK-NEXT: v_add_i32_e64 v6, s[4:5], v6, v13
-; CHECK-NEXT: v_cndmask_b32_e64 v11, 0, 1, s[4:5]
-; CHECK-NEXT: v_add_i32_e64 v8, s[4:5], v9, v8
-; CHECK-NEXT: v_add_i32_e64 v9, s[4:5], v10, v11
-; CHECK-NEXT: v_add_i32_e64 v6, s[4:5], v6, v8
-; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, s[4:5]
-; CHECK-NEXT: v_add_i32_e64 v8, s[4:5], v9, v8
-; CHECK-NEXT: v_add_i32_e64 v7, s[4:5], v7, v8
-; CHECK-NEXT: v_addc_u32_e32 v5, vcc, v5, v7, vcc
-; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v6
-; CHECK-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc
-; CHECK-NEXT: v_mul_lo_u32 v6, v1, v2
-; CHECK-NEXT: v_mul_hi_u32 v7, v0, v2
-; CHECK-NEXT: v_mul_hi_u32 v2, v1, v2
-; CHECK-NEXT: v_mul_lo_u32 v8, v0, v5
-; CHECK-NEXT: v_mul_lo_u32 v9, v1, v5
-; CHECK-NEXT: v_mul_hi_u32 v10, v0, v5
-; CHECK-NEXT: v_mul_hi_u32 v5, v1, v5
-; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v8
-; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
-; CHECK-NEXT: v_add_i32_e32 v2, vcc, v9, v2
-; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
-; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v7
-; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
-; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v10
-; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
-; CHECK-NEXT: v_add_i32_e32 v6, vcc, v8, v6
-; CHECK-NEXT: v_add_i32_e32 v7, vcc, v9, v7
-; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v6
-; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
-; CHECK-NEXT: v_add_i32_e32 v6, vcc, v7, v6
-; CHECK-NEXT: v_mul_lo_u32 v7, s7, v2
-; CHECK-NEXT: v_mul_lo_u32 v8, 0, v2
-; CHECK-NEXT: v_mul_hi_u32 v9, s7, v2
+; CHECK-NEXT: s_mov_b32 s4, 0x1fb03c31
+; CHECK-NEXT: s_mov_b32 s5, 0xd9528440
+; CHECK-NEXT: v_mul_lo_u32 v2, v1, s4
+; CHECK-NEXT: v_mul_lo_u32 v3, v0, s5
+; CHECK-NEXT: v_mul_hi_u32 v4, v0, s4
+; CHECK-NEXT: v_mul_lo_u32 v5, v1, s5
+; CHECK-NEXT: v_mul_hi_u32 v6, v1, s4
+; CHECK-NEXT: v_mul_hi_u32 v0, v0, s5
+; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v3
+; CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc
; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v6
-; CHECK-NEXT: v_mul_lo_u32 v6, s7, v5
-; CHECK-NEXT: v_add_i32_e32 v10, vcc, 1, v2
-; CHECK-NEXT: v_addc_u32_e32 v11, vcc, 0, v5, vcc
-; CHECK-NEXT: v_add_i32_e32 v6, vcc, v8, v6
-; CHECK-NEXT: v_add_i32_e32 v8, vcc, 1, v10
-; CHECK-NEXT: v_addc_u32_e32 v12, vcc, 0, v11, vcc
-; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v9
-; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v7
-; CHECK-NEXT: v_subb_u32_e64 v7, s[4:5], v1, v6, vcc
-; CHECK-NEXT: v_sub_i32_e64 v1, s[4:5], v1, v6
-; CHECK-NEXT: v_cmp_le_u32_e64 s[4:5], s7, v0
-; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, -1, s[4:5]
-; CHECK-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v7
-; CHECK-NEXT: v_cndmask_b32_e64 v3, v3, v6, s[4:5]
-; CHECK-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
-; CHECK-NEXT: v_subrev_i32_e32 v0, vcc, s7, v0
-; CHECK-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
-; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s7, v0
-; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc
-; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
-; CHECK-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
-; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0
-; CHECK-NEXT: v_cndmask_b32_e32 v0, v10, v8, vcc
-; CHECK-NEXT: v_cndmask_b32_e32 v1, v11, v12, vcc
-; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v3
-; CHECK-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
-; CHECK-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
+; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
+; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v4
+; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
+; CHECK-NEXT: v_add_i32_e32 v0, vcc, v5, v0
+; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
+; CHECK-NEXT: v_add_i32_e32 v2, vcc, v3, v2
+; CHECK-NEXT: v_add_i32_e32 v3, vcc, v6, v4
+; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v2
+; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
+; CHECK-NEXT: v_add_i32_e32 v2, vcc, v3, v2
+; CHECK-NEXT: v_mul_hi_u32 v1, v1, s5
+; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v2
+; CHECK-NEXT: v_lshr_b64 v[0:1], v[0:1], 20
; CHECK-NEXT: s_setpc_b64 s[30:31]
%result = udiv i64 %num, 1235195
ret i64 %result
}
define <2 x i64> @v_udiv_v2i64_oddk_denom(<2 x i64> %num) {
-; GISEL-LABEL: v_udiv_v2i64_oddk_denom:
-; GISEL: ; %bb.0:
-; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GISEL-NEXT: s_mov_b32 s12, 0x12d8fb
-; GISEL-NEXT: v_cvt_f32_u32_e32 v4, s12
-; GISEL-NEXT: s_sub_u32 s8, 0, s12
-; GISEL-NEXT: s_cselect_b32 s4, 1, 0
-; GISEL-NEXT: v_cvt_f32_ubyte0_e32 v5, 0
-; GISEL-NEXT: v_mov_b32_e32 v6, v4
-; GISEL-NEXT: s_and_b32 s4, s4, 1
-; GISEL-NEXT: v_mac_f32_e32 v4, 0x4f800000, v5
-; GISEL-NEXT: v_mac_f32_e32 v6, 0x4f800000, v5
-; GISEL-NEXT: v_rcp_iflag_f32_e32 v4, v4
-; GISEL-NEXT: v_rcp_iflag_f32_e32 v5, v6
-; GISEL-NEXT: s_cmp_lg_u32 s4, 0
-; GISEL-NEXT: s_subb_u32 s9, 0, 0
-; GISEL-NEXT: s_bfe_i32 s10, -1, 0x10000
-; GISEL-NEXT: s_bfe_i32 s11, -1, 0x10000
-; GISEL-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4
-; GISEL-NEXT: v_mul_f32_e32 v5, 0x5f7ffffc, v5
-; GISEL-NEXT: v_mul_f32_e32 v6, 0x2f800000, v4
-; GISEL-NEXT: s_sub_u32 s13, 0, s12
-; GISEL-NEXT: s_cselect_b32 s4, 1, 0
-; GISEL-NEXT: v_mul_f32_e32 v7, 0x2f800000, v5
-; GISEL-NEXT: v_trunc_f32_e32 v6, v6
-; GISEL-NEXT: s_and_b32 s4, s4, 1
-; GISEL-NEXT: v_trunc_f32_e32 v7, v7
-; GISEL-NEXT: v_mac_f32_e32 v4, 0xcf800000, v6
-; GISEL-NEXT: v_cvt_u32_f32_e32 v6, v6
-; GISEL-NEXT: v_mac_f32_e32 v5, 0xcf800000, v7
-; GISEL-NEXT: v_cvt_u32_f32_e32 v7, v7
-; GISEL-NEXT: v_cvt_u32_f32_e32 v4, v4
-; GISEL-NEXT: s_cmp_lg_u32 s4, 0
-; GISEL-NEXT: s_subb_u32 s6, 0, 0
-; GISEL-NEXT: v_mul_lo_u32 v8, s13, v6
-; GISEL-NEXT: v_cvt_u32_f32_e32 v5, v5
-; GISEL-NEXT: v_mul_lo_u32 v9, s8, v7
-; GISEL-NEXT: v_mul_lo_u32 v10, s13, v4
-; GISEL-NEXT: v_mul_lo_u32 v11, s6, v4
-; GISEL-NEXT: v_mul_hi_u32 v12, s13, v4
-; GISEL-NEXT: v_mul_lo_u32 v13, s8, v5
-; GISEL-NEXT: v_mul_lo_u32 v14, s9, v5
-; GISEL-NEXT: v_mul_hi_u32 v15, s8, v5
-; GISEL-NEXT: v_add_i32_e32 v8, vcc, v11, v8
-; GISEL-NEXT: v_mul_lo_u32 v11, v6, v10
-; GISEL-NEXT: v_mul_hi_u32 v16, v4, v10
-; GISEL-NEXT: v_mul_hi_u32 v10, v6, v10
-; GISEL-NEXT: v_add_i32_e32 v9, vcc, v14, v9
-; GISEL-NEXT: v_mul_lo_u32 v14, v7, v13
-; GISEL-NEXT: v_mul_hi_u32 v17, v5, v13
-; GISEL-NEXT: v_mul_hi_u32 v13, v7, v13
-; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v12
-; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v15
-; GISEL-NEXT: v_mul_lo_u32 v12, v4, v8
-; GISEL-NEXT: v_mul_lo_u32 v15, v6, v8
-; GISEL-NEXT: v_mul_hi_u32 v18, v4, v8
-; GISEL-NEXT: v_mul_hi_u32 v8, v6, v8
-; GISEL-NEXT: v_mul_lo_u32 v19, v5, v9
-; GISEL-NEXT: v_add_i32_e32 v14, vcc, v14, v19
-; GISEL-NEXT: v_cndmask_b32_e64 v19, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v14, vcc, v14, v17
-; GISEL-NEXT: v_mul_lo_u32 v14, v7, v9
-; GISEL-NEXT: v_mul_hi_u32 v17, v5, v9
-; GISEL-NEXT: v_mul_hi_u32 v9, v7, v9
-; GISEL-NEXT: v_add_i32_e64 v11, s[4:5], v11, v12
-; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, s[4:5]
-; GISEL-NEXT: v_add_i32_e64 v10, s[4:5], v15, v10
-; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, s[4:5]
-; GISEL-NEXT: v_add_i32_e64 v13, s[4:5], v14, v13
-; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, s[4:5]
-; GISEL-NEXT: v_add_i32_e64 v11, s[4:5], v11, v16
-; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, s[4:5]
-; GISEL-NEXT: v_add_i32_e64 v10, s[4:5], v10, v18
-; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, s[4:5]
-; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v17
-; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v11, vcc, v12, v11
-; GISEL-NEXT: v_add_i32_e32 v12, vcc, v15, v16
-; GISEL-NEXT: v_add_i32_e32 v15, vcc, v19, v18
-; GISEL-NEXT: v_add_i32_e32 v14, vcc, v14, v17
-; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v11
-; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v15
-; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v11, vcc, v12, v11
-; GISEL-NEXT: v_add_i32_e32 v12, vcc, v14, v15
-; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v11
-; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v12
-; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v10
-; GISEL-NEXT: v_addc_u32_e64 v10, s[4:5], v6, v8, vcc
-; GISEL-NEXT: v_mul_lo_u32 v11, s13, v4
-; GISEL-NEXT: v_mul_lo_u32 v12, s6, v4
-; GISEL-NEXT: v_mul_hi_u32 v14, s13, v4
-; GISEL-NEXT: v_add_i32_e64 v5, s[4:5], v5, v13
-; GISEL-NEXT: v_addc_u32_e64 v13, s[6:7], v7, v9, s[4:5]
-; GISEL-NEXT: v_mul_lo_u32 v15, s8, v5
-; GISEL-NEXT: v_mul_lo_u32 v16, s9, v5
-; GISEL-NEXT: v_mul_hi_u32 v17, s8, v5
-; GISEL-NEXT: v_mul_lo_u32 v18, s8, v13
-; GISEL-NEXT: v_mul_lo_u32 v19, v13, v15
-; GISEL-NEXT: v_add_i32_e64 v16, s[6:7], v16, v18
-; GISEL-NEXT: v_mul_hi_u32 v18, v5, v15
-; GISEL-NEXT: v_add_i32_e64 v16, s[6:7], v16, v17
-; GISEL-NEXT: v_mul_lo_u32 v17, v5, v16
-; GISEL-NEXT: v_add_i32_e64 v17, s[6:7], v19, v17
-; GISEL-NEXT: v_cndmask_b32_e64 v19, 0, 1, s[6:7]
-; GISEL-NEXT: v_add_i32_e64 v17, s[6:7], v17, v18
-; GISEL-NEXT: v_mul_lo_u32 v17, s13, v10
-; GISEL-NEXT: v_mul_lo_u32 v18, v10, v11
-; GISEL-NEXT: v_add_i32_e64 v12, s[8:9], v12, v17
-; GISEL-NEXT: v_mul_hi_u32 v17, v4, v11
-; GISEL-NEXT: v_add_i32_e64 v12, s[8:9], v12, v14
-; GISEL-NEXT: v_mul_lo_u32 v14, v4, v12
-; GISEL-NEXT: v_add_i32_e64 v14, s[8:9], v18, v14
-; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, s[8:9]
-; GISEL-NEXT: v_add_i32_e64 v14, s[8:9], v14, v17
-; GISEL-NEXT: v_mov_b32_e32 v14, s10
-; GISEL-NEXT: v_mov_b32_e32 v17, s11
-; GISEL-NEXT: s_bfe_i32 s13, -1, 0x10000
-; GISEL-NEXT: s_bfe_i32 s14, -1, 0x10000
-; GISEL-NEXT: v_add_i32_e64 v6, s[10:11], v6, v8
-; GISEL-NEXT: v_mov_b32_e32 v8, s13
-; GISEL-NEXT: v_add_i32_e64 v7, s[10:11], v7, v9
-; GISEL-NEXT: v_mul_hi_u32 v9, v10, v11
-; GISEL-NEXT: v_mul_hi_u32 v11, v13, v15
-; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, s[8:9]
-; GISEL-NEXT: v_add_i32_e64 v15, s[8:9], v18, v15
-; GISEL-NEXT: v_mul_lo_u32 v18, v10, v12
-; GISEL-NEXT: v_mul_hi_u32 v10, v10, v12
-; GISEL-NEXT: v_mul_hi_u32 v12, v4, v12
-; GISEL-NEXT: v_add_i32_e64 v9, s[8:9], v18, v9
-; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, s[8:9]
-; GISEL-NEXT: v_add_i32_e64 v9, s[8:9], v9, v12
-; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, s[8:9]
-; GISEL-NEXT: v_add_i32_e64 v12, s[8:9], v18, v12
-; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, s[6:7]
-; GISEL-NEXT: v_add_i32_e64 v18, s[6:7], v19, v18
-; GISEL-NEXT: v_mul_lo_u32 v19, v13, v16
-; GISEL-NEXT: v_mul_hi_u32 v13, v13, v16
-; GISEL-NEXT: v_mul_hi_u32 v16, v5, v16
-; GISEL-NEXT: v_add_i32_e64 v11, s[6:7], v19, v11
-; GISEL-NEXT: v_cndmask_b32_e64 v19, 0, 1, s[6:7]
-; GISEL-NEXT: v_add_i32_e64 v11, s[6:7], v11, v16
-; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, s[6:7]
-; GISEL-NEXT: v_add_i32_e64 v16, s[6:7], v19, v16
-; GISEL-NEXT: v_mov_b32_e32 v19, s14
-; GISEL-NEXT: v_add_i32_e64 v9, s[6:7], v9, v15
-; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, s[6:7]
-; GISEL-NEXT: v_add_i32_e64 v11, s[6:7], v11, v18
-; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, s[6:7]
-; GISEL-NEXT: v_add_i32_e64 v12, s[6:7], v12, v15
-; GISEL-NEXT: v_add_i32_e64 v15, s[6:7], v16, v18
-; GISEL-NEXT: v_add_i32_e64 v10, s[6:7], v10, v12
-; GISEL-NEXT: v_add_i32_e64 v12, s[6:7], v13, v15
-; GISEL-NEXT: v_addc_u32_e32 v6, vcc, v6, v10, vcc
-; GISEL-NEXT: v_addc_u32_e64 v7, vcc, v7, v12, s[4:5]
-; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v9
-; GISEL-NEXT: v_addc_u32_e32 v6, vcc, 0, v6, vcc
-; GISEL-NEXT: v_mul_lo_u32 v9, v3, v4
-; GISEL-NEXT: v_mul_hi_u32 v10, v2, v4
-; GISEL-NEXT: v_mul_hi_u32 v4, v3, v4
-; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v11
-; GISEL-NEXT: v_addc_u32_e32 v7, vcc, 0, v7, vcc
-; GISEL-NEXT: v_mul_lo_u32 v11, v1, v5
-; GISEL-NEXT: v_mul_hi_u32 v12, v0, v5
-; GISEL-NEXT: v_mul_hi_u32 v5, v1, v5
-; GISEL-NEXT: v_mul_lo_u32 v13, v2, v6
-; GISEL-NEXT: v_mul_lo_u32 v15, v3, v6
-; GISEL-NEXT: v_mul_hi_u32 v16, v2, v6
-; GISEL-NEXT: v_mul_hi_u32 v6, v3, v6
-; GISEL-NEXT: v_mul_lo_u32 v18, v0, v7
-; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v18
-; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v12
-; GISEL-NEXT: v_mul_lo_u32 v11, v1, v7
-; GISEL-NEXT: v_mul_hi_u32 v12, v0, v7
-; GISEL-NEXT: v_mul_hi_u32 v7, v1, v7
-; GISEL-NEXT: v_add_i32_e64 v9, s[4:5], v9, v13
-; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, s[4:5]
-; GISEL-NEXT: v_add_i32_e64 v4, s[4:5], v15, v4
-; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, s[4:5]
-; GISEL-NEXT: v_add_i32_e64 v5, s[4:5], v11, v5
-; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, s[4:5]
-; GISEL-NEXT: v_add_i32_e64 v9, s[4:5], v9, v10
-; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, s[4:5]
-; GISEL-NEXT: v_add_i32_e64 v4, s[4:5], v4, v16
-; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, s[4:5]
-; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v12
-; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v9, vcc, v13, v9
-; GISEL-NEXT: v_add_i32_e32 v10, vcc, v15, v10
-; GISEL-NEXT: v_add_i32_e32 v13, vcc, v18, v16
-; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v12
-; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v9
-; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v13
-; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v9, vcc, v10, v9
-; GISEL-NEXT: v_mul_lo_u32 v10, s12, v4
-; GISEL-NEXT: v_mul_lo_u32 v13, 0, v4
-; GISEL-NEXT: v_mul_hi_u32 v15, s12, v4
-; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v12
-; GISEL-NEXT: v_mul_lo_u32 v12, s12, v5
-; GISEL-NEXT: v_mul_lo_u32 v16, 0, v5
-; GISEL-NEXT: v_mul_hi_u32 v18, s12, v5
-; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v9
-; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v11
-; GISEL-NEXT: v_mul_lo_u32 v9, s12, v6
-; GISEL-NEXT: v_mul_lo_u32 v11, s12, v7
-; GISEL-NEXT: v_add_i32_e32 v9, vcc, v13, v9
-; GISEL-NEXT: v_add_i32_e32 v11, vcc, v16, v11
-; GISEL-NEXT: v_add_i32_e32 v13, vcc, 1, v4
-; GISEL-NEXT: v_addc_u32_e32 v16, vcc, 0, v6, vcc
-; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v15
-; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v18
-; GISEL-NEXT: v_add_i32_e32 v15, vcc, 1, v5
-; GISEL-NEXT: v_addc_u32_e32 v18, vcc, 0, v7, vcc
-; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v2, v10
-; GISEL-NEXT: v_subb_u32_e64 v10, s[4:5], v3, v9, vcc
-; GISEL-NEXT: v_sub_i32_e64 v3, s[4:5], v3, v9
-; GISEL-NEXT: v_cmp_le_u32_e64 s[4:5], s12, v2
-; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, -1, s[4:5]
-; GISEL-NEXT: v_sub_i32_e64 v0, s[4:5], v0, v12
-; GISEL-NEXT: v_subb_u32_e64 v12, s[6:7], v1, v11, s[4:5]
-; GISEL-NEXT: v_sub_i32_e64 v1, s[6:7], v1, v11
-; GISEL-NEXT: v_cmp_eq_u32_e64 s[6:7], 0, v10
-; GISEL-NEXT: v_add_i32_e64 v10, s[8:9], 1, v13
-; GISEL-NEXT: v_addc_u32_e64 v11, s[8:9], 0, v16, s[8:9]
-; GISEL-NEXT: v_cndmask_b32_e64 v8, v8, v9, s[6:7]
-; GISEL-NEXT: v_cmp_le_u32_e64 s[6:7], s12, v0
-; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, -1, s[6:7]
-; GISEL-NEXT: v_cmp_eq_u32_e64 s[6:7], 0, v12
-; GISEL-NEXT: v_cndmask_b32_e64 v9, v14, v9, s[6:7]
-; GISEL-NEXT: v_add_i32_e64 v12, s[6:7], 1, v15
-; GISEL-NEXT: v_addc_u32_e64 v14, s[6:7], 0, v18, s[6:7]
-; GISEL-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc
-; GISEL-NEXT: v_subbrev_u32_e64 v1, vcc, 0, v1, s[4:5]
-; GISEL-NEXT: v_subrev_i32_e32 v2, vcc, s12, v2
-; GISEL-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc
-; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s12, v2
-; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc
-; GISEL-NEXT: v_subrev_i32_e32 v0, vcc, s12, v0
-; GISEL-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
-; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s12, v0
-; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc
-; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3
-; GISEL-NEXT: v_cndmask_b32_e32 v2, v19, v2, vcc
-; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
-; GISEL-NEXT: v_cndmask_b32_e32 v0, v17, v0, vcc
-; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2
-; GISEL-NEXT: v_cndmask_b32_e32 v1, v13, v10, vcc
-; GISEL-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v0
-; GISEL-NEXT: v_cndmask_b32_e64 v0, v15, v12, s[4:5]
-; GISEL-NEXT: v_cndmask_b32_e32 v3, v16, v11, vcc
-; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v8
-; GISEL-NEXT: v_cndmask_b32_e32 v2, v4, v1, vcc
-; GISEL-NEXT: v_cndmask_b32_e64 v1, v18, v14, s[4:5]
-; GISEL-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v9
-; GISEL-NEXT: v_cndmask_b32_e64 v0, v5, v0, s[4:5]
-; GISEL-NEXT: v_cndmask_b32_e64 v1, v7, v1, s[4:5]
-; GISEL-NEXT: v_cndmask_b32_e32 v3, v6, v3, vcc
-; GISEL-NEXT: s_setpc_b64 s[30:31]
-;
-; CGP-LABEL: v_udiv_v2i64_oddk_denom:
-; CGP: ; %bb.0:
-; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CGP-NEXT: v_cvt_f32_u32_e32 v4, 0x12d8fb
-; CGP-NEXT: v_cvt_f32_ubyte0_e32 v5, 0
-; CGP-NEXT: s_mov_b32 s8, 0xffed2705
-; CGP-NEXT: s_mov_b32 s12, 0x12d8fb
-; CGP-NEXT: s_bfe_i32 s10, -1, 0x10000
-; CGP-NEXT: s_bfe_i32 s11, -1, 0x10000
-; CGP-NEXT: s_bfe_i32 s13, -1, 0x10000
-; CGP-NEXT: s_bfe_i32 s14, -1, 0x10000
-; CGP-NEXT: v_mov_b32_e32 v6, v4
-; CGP-NEXT: v_mac_f32_e32 v4, 0x4f800000, v5
-; CGP-NEXT: v_mac_f32_e32 v6, 0x4f800000, v5
-; CGP-NEXT: v_rcp_iflag_f32_e32 v4, v4
-; CGP-NEXT: v_rcp_iflag_f32_e32 v5, v6
-; CGP-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4
-; CGP-NEXT: v_mul_f32_e32 v5, 0x5f7ffffc, v5
-; CGP-NEXT: v_mul_f32_e32 v6, 0x2f800000, v4
-; CGP-NEXT: v_mul_f32_e32 v7, 0x2f800000, v5
-; CGP-NEXT: v_trunc_f32_e32 v6, v6
-; CGP-NEXT: v_trunc_f32_e32 v7, v7
-; CGP-NEXT: v_mac_f32_e32 v4, 0xcf800000, v6
-; CGP-NEXT: v_cvt_u32_f32_e32 v6, v6
-; CGP-NEXT: v_mac_f32_e32 v5, 0xcf800000, v7
-; CGP-NEXT: v_cvt_u32_f32_e32 v7, v7
-; CGP-NEXT: v_cvt_u32_f32_e32 v4, v4
-; CGP-NEXT: v_mul_lo_u32 v8, s8, v6
-; CGP-NEXT: v_cvt_u32_f32_e32 v5, v5
-; CGP-NEXT: v_mul_lo_u32 v9, s8, v7
-; CGP-NEXT: v_mul_lo_u32 v10, s8, v4
-; CGP-NEXT: v_mul_lo_u32 v11, -1, v4
-; CGP-NEXT: v_mul_hi_u32 v12, s8, v4
-; CGP-NEXT: v_mul_lo_u32 v13, s8, v5
-; CGP-NEXT: v_mul_lo_u32 v14, -1, v5
-; CGP-NEXT: v_mul_hi_u32 v15, s8, v5
-; CGP-NEXT: v_add_i32_e32 v8, vcc, v11, v8
-; CGP-NEXT: v_mul_lo_u32 v11, v6, v10
-; CGP-NEXT: v_mul_hi_u32 v16, v4, v10
-; CGP-NEXT: v_mul_hi_u32 v10, v6, v10
-; CGP-NEXT: v_add_i32_e32 v9, vcc, v14, v9
-; CGP-NEXT: v_mul_lo_u32 v14, v7, v13
-; CGP-NEXT: v_mul_hi_u32 v17, v5, v13
-; CGP-NEXT: v_mul_hi_u32 v13, v7, v13
-; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v12
-; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v15
-; CGP-NEXT: v_mul_lo_u32 v12, v4, v8
-; CGP-NEXT: v_mul_lo_u32 v15, v6, v8
-; CGP-NEXT: v_mul_hi_u32 v18, v4, v8
-; CGP-NEXT: v_mul_hi_u32 v8, v6, v8
-; CGP-NEXT: v_mul_lo_u32 v19, v5, v9
-; CGP-NEXT: v_add_i32_e32 v14, vcc, v14, v19
-; CGP-NEXT: v_cndmask_b32_e64 v19, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v14, vcc, v14, v17
-; CGP-NEXT: v_mul_lo_u32 v14, v7, v9
-; CGP-NEXT: v_mul_hi_u32 v17, v5, v9
-; CGP-NEXT: v_mul_hi_u32 v9, v7, v9
-; CGP-NEXT: v_add_i32_e64 v11, s[4:5], v11, v12
-; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, s[4:5]
-; CGP-NEXT: v_add_i32_e64 v10, s[4:5], v15, v10
-; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, s[4:5]
-; CGP-NEXT: v_add_i32_e64 v13, s[4:5], v14, v13
-; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, s[4:5]
-; CGP-NEXT: v_add_i32_e64 v11, s[4:5], v11, v16
-; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, s[4:5]
-; CGP-NEXT: v_add_i32_e64 v10, s[4:5], v10, v18
-; CGP-NEXT: v_cndmask_b32_e64 v16, 0, 1, s[4:5]
-; CGP-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v13, vcc, v13, v17
-; CGP-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v11, vcc, v12, v11
-; CGP-NEXT: v_add_i32_e32 v12, vcc, v15, v16
-; CGP-NEXT: v_add_i32_e32 v15, vcc, v19, v18
-; CGP-NEXT: v_add_i32_e32 v14, vcc, v14, v17
-; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v11
-; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v13, vcc, v13, v15
-; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v11, vcc, v12, v11
-; CGP-NEXT: v_add_i32_e32 v12, vcc, v14, v15
-; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v11
-; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v12
-; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v10
-; CGP-NEXT: v_addc_u32_e64 v10, s[4:5], v6, v8, vcc
-; CGP-NEXT: v_mul_lo_u32 v11, s8, v4
-; CGP-NEXT: v_mul_lo_u32 v12, -1, v4
-; CGP-NEXT: v_mul_hi_u32 v14, s8, v4
-; CGP-NEXT: v_add_i32_e64 v5, s[4:5], v5, v13
-; CGP-NEXT: v_addc_u32_e64 v13, s[6:7], v7, v9, s[4:5]
-; CGP-NEXT: v_mul_lo_u32 v15, s8, v5
-; CGP-NEXT: v_mul_lo_u32 v16, -1, v5
-; CGP-NEXT: v_mul_hi_u32 v17, s8, v5
-; CGP-NEXT: v_mul_lo_u32 v18, s8, v13
-; CGP-NEXT: v_mul_lo_u32 v19, v13, v15
-; CGP-NEXT: v_add_i32_e64 v16, s[6:7], v16, v18
-; CGP-NEXT: v_mul_hi_u32 v18, v5, v15
-; CGP-NEXT: v_add_i32_e64 v16, s[6:7], v16, v17
-; CGP-NEXT: v_mul_lo_u32 v17, v5, v16
-; CGP-NEXT: v_add_i32_e64 v17, s[6:7], v19, v17
-; CGP-NEXT: v_cndmask_b32_e64 v19, 0, 1, s[6:7]
-; CGP-NEXT: v_add_i32_e64 v17, s[6:7], v17, v18
-; CGP-NEXT: v_mul_lo_u32 v17, s8, v10
-; CGP-NEXT: v_mul_lo_u32 v18, v10, v11
-; CGP-NEXT: v_add_i32_e64 v12, s[8:9], v12, v17
-; CGP-NEXT: v_mul_hi_u32 v17, v4, v11
-; CGP-NEXT: v_add_i32_e64 v12, s[8:9], v12, v14
-; CGP-NEXT: v_mul_lo_u32 v14, v4, v12
-; CGP-NEXT: v_add_i32_e64 v14, s[8:9], v18, v14
-; CGP-NEXT: v_cndmask_b32_e64 v18, 0, 1, s[8:9]
-; CGP-NEXT: v_add_i32_e64 v14, s[8:9], v14, v17
-; CGP-NEXT: v_mov_b32_e32 v14, s10
-; CGP-NEXT: v_mov_b32_e32 v17, s11
-; CGP-NEXT: v_add_i32_e64 v6, s[10:11], v6, v8
-; CGP-NEXT: v_mov_b32_e32 v8, s13
-; CGP-NEXT: v_add_i32_e64 v7, s[10:11], v7, v9
-; CGP-NEXT: v_mul_hi_u32 v9, v10, v11
-; CGP-NEXT: v_mul_hi_u32 v11, v13, v15
-; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, s[8:9]
-; CGP-NEXT: v_add_i32_e64 v15, s[8:9], v18, v15
-; CGP-NEXT: v_mul_lo_u32 v18, v10, v12
-; CGP-NEXT: v_mul_hi_u32 v10, v10, v12
-; CGP-NEXT: v_mul_hi_u32 v12, v4, v12
-; CGP-NEXT: v_add_i32_e64 v9, s[8:9], v18, v9
-; CGP-NEXT: v_cndmask_b32_e64 v18, 0, 1, s[8:9]
-; CGP-NEXT: v_add_i32_e64 v9, s[8:9], v9, v12
-; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, s[8:9]
-; CGP-NEXT: v_add_i32_e64 v12, s[8:9], v18, v12
-; CGP-NEXT: v_cndmask_b32_e64 v18, 0, 1, s[6:7]
-; CGP-NEXT: v_add_i32_e64 v18, s[6:7], v19, v18
-; CGP-NEXT: v_mul_lo_u32 v19, v13, v16
-; CGP-NEXT: v_mul_hi_u32 v13, v13, v16
-; CGP-NEXT: v_mul_hi_u32 v16, v5, v16
-; CGP-NEXT: v_add_i32_e64 v11, s[6:7], v19, v11
-; CGP-NEXT: v_cndmask_b32_e64 v19, 0, 1, s[6:7]
-; CGP-NEXT: v_add_i32_e64 v11, s[6:7], v11, v16
-; CGP-NEXT: v_cndmask_b32_e64 v16, 0, 1, s[6:7]
-; CGP-NEXT: v_add_i32_e64 v16, s[6:7], v19, v16
-; CGP-NEXT: v_mov_b32_e32 v19, s14
-; CGP-NEXT: v_add_i32_e64 v9, s[6:7], v9, v15
-; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, s[6:7]
-; CGP-NEXT: v_add_i32_e64 v11, s[6:7], v11, v18
-; CGP-NEXT: v_cndmask_b32_e64 v18, 0, 1, s[6:7]
-; CGP-NEXT: v_add_i32_e64 v12, s[6:7], v12, v15
-; CGP-NEXT: v_add_i32_e64 v15, s[6:7], v16, v18
-; CGP-NEXT: v_add_i32_e64 v10, s[6:7], v10, v12
-; CGP-NEXT: v_add_i32_e64 v12, s[6:7], v13, v15
-; CGP-NEXT: v_addc_u32_e32 v6, vcc, v6, v10, vcc
-; CGP-NEXT: v_addc_u32_e64 v7, vcc, v7, v12, s[4:5]
-; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v9
-; CGP-NEXT: v_addc_u32_e32 v6, vcc, 0, v6, vcc
-; CGP-NEXT: v_mul_lo_u32 v9, v3, v4
-; CGP-NEXT: v_mul_hi_u32 v10, v2, v4
-; CGP-NEXT: v_mul_hi_u32 v4, v3, v4
-; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v11
-; CGP-NEXT: v_addc_u32_e32 v7, vcc, 0, v7, vcc
-; CGP-NEXT: v_mul_lo_u32 v11, v1, v5
-; CGP-NEXT: v_mul_hi_u32 v12, v0, v5
-; CGP-NEXT: v_mul_hi_u32 v5, v1, v5
-; CGP-NEXT: v_mul_lo_u32 v13, v2, v6
-; CGP-NEXT: v_mul_lo_u32 v15, v3, v6
-; CGP-NEXT: v_mul_hi_u32 v16, v2, v6
-; CGP-NEXT: v_mul_hi_u32 v6, v3, v6
-; CGP-NEXT: v_mul_lo_u32 v18, v0, v7
-; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v18
-; CGP-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v12
-; CGP-NEXT: v_mul_lo_u32 v11, v1, v7
-; CGP-NEXT: v_mul_hi_u32 v12, v0, v7
-; CGP-NEXT: v_mul_hi_u32 v7, v1, v7
-; CGP-NEXT: v_add_i32_e64 v9, s[4:5], v9, v13
-; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, s[4:5]
-; CGP-NEXT: v_add_i32_e64 v4, s[4:5], v15, v4
-; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, s[4:5]
-; CGP-NEXT: v_add_i32_e64 v5, s[4:5], v11, v5
-; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, s[4:5]
-; CGP-NEXT: v_add_i32_e64 v9, s[4:5], v9, v10
-; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, s[4:5]
-; CGP-NEXT: v_add_i32_e64 v4, s[4:5], v4, v16
-; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, s[4:5]
-; CGP-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v12
-; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v9, vcc, v13, v9
-; CGP-NEXT: v_add_i32_e32 v10, vcc, v15, v10
-; CGP-NEXT: v_add_i32_e32 v13, vcc, v18, v16
-; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v12
-; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v9
-; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v13
-; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v9, vcc, v10, v9
-; CGP-NEXT: v_mul_lo_u32 v10, s12, v4
-; CGP-NEXT: v_mul_lo_u32 v13, 0, v4
-; CGP-NEXT: v_mul_hi_u32 v15, s12, v4
-; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v12
-; CGP-NEXT: v_mul_lo_u32 v12, s12, v5
-; CGP-NEXT: v_mul_lo_u32 v16, 0, v5
-; CGP-NEXT: v_mul_hi_u32 v18, s12, v5
-; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v9
-; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v11
-; CGP-NEXT: v_mul_lo_u32 v9, s12, v6
-; CGP-NEXT: v_mul_lo_u32 v11, s12, v7
-; CGP-NEXT: v_add_i32_e32 v9, vcc, v13, v9
-; CGP-NEXT: v_add_i32_e32 v11, vcc, v16, v11
-; CGP-NEXT: v_add_i32_e32 v13, vcc, 1, v4
-; CGP-NEXT: v_addc_u32_e32 v16, vcc, 0, v6, vcc
-; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v15
-; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v18
-; CGP-NEXT: v_add_i32_e32 v15, vcc, 1, v5
-; CGP-NEXT: v_addc_u32_e32 v18, vcc, 0, v7, vcc
-; CGP-NEXT: v_sub_i32_e32 v2, vcc, v2, v10
-; CGP-NEXT: v_subb_u32_e64 v10, s[4:5], v3, v9, vcc
-; CGP-NEXT: v_sub_i32_e64 v3, s[4:5], v3, v9
-; CGP-NEXT: v_cmp_le_u32_e64 s[4:5], s12, v2
-; CGP-NEXT: v_cndmask_b32_e64 v9, 0, -1, s[4:5]
-; CGP-NEXT: v_sub_i32_e64 v0, s[4:5], v0, v12
-; CGP-NEXT: v_subb_u32_e64 v12, s[6:7], v1, v11, s[4:5]
-; CGP-NEXT: v_sub_i32_e64 v1, s[6:7], v1, v11
-; CGP-NEXT: v_cmp_eq_u32_e64 s[6:7], 0, v10
-; CGP-NEXT: v_add_i32_e64 v10, s[8:9], 1, v13
-; CGP-NEXT: v_addc_u32_e64 v11, s[8:9], 0, v16, s[8:9]
-; CGP-NEXT: v_cndmask_b32_e64 v8, v8, v9, s[6:7]
-; CGP-NEXT: v_cmp_le_u32_e64 s[6:7], s12, v0
-; CGP-NEXT: v_cndmask_b32_e64 v9, 0, -1, s[6:7]
-; CGP-NEXT: v_cmp_eq_u32_e64 s[6:7], 0, v12
-; CGP-NEXT: v_cndmask_b32_e64 v9, v14, v9, s[6:7]
-; CGP-NEXT: v_add_i32_e64 v12, s[6:7], 1, v15
-; CGP-NEXT: v_addc_u32_e64 v14, s[6:7], 0, v18, s[6:7]
-; CGP-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc
-; CGP-NEXT: v_subbrev_u32_e64 v1, vcc, 0, v1, s[4:5]
-; CGP-NEXT: v_subrev_i32_e32 v2, vcc, s12, v2
-; CGP-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc
-; CGP-NEXT: v_cmp_le_u32_e32 vcc, s12, v2
-; CGP-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc
-; CGP-NEXT: v_subrev_i32_e32 v0, vcc, s12, v0
-; CGP-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
-; CGP-NEXT: v_cmp_le_u32_e32 vcc, s12, v0
-; CGP-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc
-; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3
-; CGP-NEXT: v_cndmask_b32_e32 v2, v19, v2, vcc
-; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
-; CGP-NEXT: v_cndmask_b32_e32 v0, v17, v0, vcc
-; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2
-; CGP-NEXT: v_cndmask_b32_e32 v1, v13, v10, vcc
-; CGP-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v0
-; CGP-NEXT: v_cndmask_b32_e64 v0, v15, v12, s[4:5]
-; CGP-NEXT: v_cndmask_b32_e32 v3, v16, v11, vcc
-; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v8
-; CGP-NEXT: v_cndmask_b32_e32 v2, v4, v1, vcc
-; CGP-NEXT: v_cndmask_b32_e64 v1, v18, v14, s[4:5]
-; CGP-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v9
-; CGP-NEXT: v_cndmask_b32_e64 v0, v5, v0, s[4:5]
-; CGP-NEXT: v_cndmask_b32_e64 v1, v7, v1, s[4:5]
-; CGP-NEXT: v_cndmask_b32_e32 v3, v6, v3, vcc
-; CGP-NEXT: s_setpc_b64 s[30:31]
+; CHECK-LABEL: v_udiv_v2i64_oddk_denom:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT: s_mov_b32 s4, 0x1fb03c31
+; CHECK-NEXT: s_mov_b32 s5, 0xd9528440
+; CHECK-NEXT: v_mul_lo_u32 v4, v1, s4
+; CHECK-NEXT: v_mul_lo_u32 v5, v0, s5
+; CHECK-NEXT: v_mul_hi_u32 v6, v0, s4
+; CHECK-NEXT: v_mul_lo_u32 v7, v1, s5
+; CHECK-NEXT: v_mul_hi_u32 v8, v1, s4
+; CHECK-NEXT: v_mul_hi_u32 v0, v0, s5
+; CHECK-NEXT: v_mul_hi_u32 v1, v1, s5
+; CHECK-NEXT: v_mul_lo_u32 v9, v3, s4
+; CHECK-NEXT: v_mul_lo_u32 v10, v2, s5
+; CHECK-NEXT: v_mul_hi_u32 v11, v2, s4
+; CHECK-NEXT: v_mul_lo_u32 v12, v3, s5
+; CHECK-NEXT: v_mul_hi_u32 v13, v3, s4
+; CHECK-NEXT: v_mul_hi_u32 v2, v2, s5
+; CHECK-NEXT: v_mul_hi_u32 v3, v3, s5
+; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v5
+; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
+; CHECK-NEXT: v_add_i32_e32 v7, vcc, v7, v8
+; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
+; CHECK-NEXT: v_add_i32_e32 v9, vcc, v9, v10
+; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
+; CHECK-NEXT: v_add_i32_e32 v12, vcc, v12, v13
+; CHECK-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc
+; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v6
+; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
+; CHECK-NEXT: v_add_i32_e32 v0, vcc, v7, v0
+; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
+; CHECK-NEXT: v_add_i32_e32 v7, vcc, v9, v11
+; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
+; CHECK-NEXT: v_add_i32_e32 v2, vcc, v12, v2
+; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
+; CHECK-NEXT: v_add_i32_e32 v4, vcc, v5, v4
+; CHECK-NEXT: v_add_i32_e32 v5, vcc, v8, v6
+; CHECK-NEXT: v_add_i32_e32 v6, vcc, v10, v7
+; CHECK-NEXT: v_add_i32_e32 v7, vcc, v13, v9
+; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v4
+; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
+; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v6
+; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
+; CHECK-NEXT: v_add_i32_e32 v4, vcc, v5, v4
+; CHECK-NEXT: v_add_i32_e32 v5, vcc, v7, v6
+; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v4
+; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v5
+; CHECK-NEXT: v_lshr_b64 v[0:1], v[0:1], 20
+; CHECK-NEXT: v_lshr_b64 v[2:3], v[2:3], 20
+; CHECK-NEXT: s_setpc_b64 s[30:31]
%result = udiv <2 x i64> %num, <i64 1235195, i64 1235195>
ret <2 x i64> %result
}
More information about the llvm-commits
mailing list