[llvm] [GlobalIsel] Transform build_vector(binop(_, C), ...) -> binop(bv, constant bv) (PR #73577)

Thorsten Schütt via llvm-commits llvm-commits at lists.llvm.org
Mon Nov 27 14:27:49 PST 2023


https://github.com/tschuett created https://github.com/llvm/llvm-project/pull/73577

test plan: ninja check-llvm-codegen

Port of https://github.com/llvm/llvm-project/pull/67358

The almost duplicated code is caused by the distinction of integers and floats.

Transforms a build vector of identically binops where the RHS is always a constant into a binop of two build vectors. The second is a constant build vector. Instead of O(n) binops, we take one vector binop and gain one constant build vector. The constant build vector might trigger other combines.

Differences: constants are not immediates but G_CONSTANT resp. G_FCONSTANT. Thus the hit rate may be higher, e.g., the constant might be hidden behind a COPY.

Credits: @preames

>From a9e1fb30e78126f20d113c9c706af274e15eca8c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thorsten=20Sch=C3=BCtt?= <schuett at gmail.com>
Date: Mon, 27 Nov 2023 18:56:08 +0100
Subject: [PATCH] Transform build_vector(binop(_, C), ...) -> binop(bv,
 constant bv)

test plan: ninja check-llvm-codegen

Port of https://github.com/llvm/llvm-project/pull/67358

The almost duplicated code is caused by the distinction of integers
and floats.

Transforms a build vector of identically binops where the RHS is
always a constant into a binop of two build vectors. The second is a
constant build vector. Instead of O(n) binops, we take one vector binop
and gain one constant build vector. The constant build vector might
trigger other combines.

Differences: constants are not immediates but G_CONSTANT
resp. G_FCONSTANT. Thus the hit rate may be higher, e.g., the constant
might be hidden behind a COPY.

Credits: @preames
---
 .../llvm/CodeGen/GlobalISel/CombinerHelper.h  |    3 +
 .../CodeGen/GlobalISel/GenericMachineInstrs.h |   54 +
 .../CodeGen/GlobalISel/MachineIRBuilder.h     |    5 +
 .../include/llvm/Target/GlobalISel/Combine.td |    8 +-
 .../lib/CodeGen/GlobalISel/CombinerHelper.cpp |   85 +
 .../CodeGen/GlobalISel/MachineIRBuilder.cpp   |   11 +
 .../combine-build-vector-to-binop.mir         |  236 +++
 .../AArch64/GlobalISel/inline-memset.mir      |  166 +-
 .../CodeGen/AMDGPU/GlobalISel/sdiv.i32.ll     |  270 ++--
 .../CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll     | 1420 ++++++-----------
 .../CodeGen/AMDGPU/GlobalISel/srem.i32.ll     |  246 +--
 .../CodeGen/AMDGPU/GlobalISel/srem.i64.ll     | 1396 ++++++----------
 .../CodeGen/AMDGPU/GlobalISel/urem.i32.ll     |   93 +-
 .../CodeGen/AMDGPU/GlobalISel/urem.i64.ll     |  628 +++-----
 llvm/test/CodeGen/AMDGPU/llvm.log.ll          |  343 ++--
 llvm/test/CodeGen/AMDGPU/llvm.log10.ll        |  343 ++--
 llvm/test/CodeGen/AMDGPU/v_pack.ll            |    6 +-
 llvm/test/CodeGen/AMDGPU/v_sat_pk_u8_i16.ll   |   73 +-
 18 files changed, 2340 insertions(+), 3046 deletions(-)
 create mode 100644 llvm/test/CodeGen/AArch64/GlobalISel/combine-build-vector-to-binop.mir

diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
index ba72a3b71ffd70b..1eb9f5816bdb036 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
@@ -812,6 +812,9 @@ class CombinerHelper {
   // Given a binop \p MI, commute operands 1 and 2.
   void applyCommuteBinOpOperands(MachineInstr &MI);
 
+  /// Transform build_vector (binop(_, C), ...) -> binop(bv, constant bv)
+  bool matchBuildVectorToBinOp(MachineInstr &MI, BuildFnTy &MatchInfo);
+
 private:
   /// Checks for legality of an indexed variant of \p LdSt.
   bool isIndexedLoadStoreLegal(GLoadStore &LdSt) const;
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h b/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h
index 6ab1d4550c51ca9..26742496327e912 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h
@@ -558,6 +558,60 @@ class GVecReduce : public GenericMachineInstr {
   }
 };
 
+// Represents an integer binary operation.
+class GIBinOp : public GenericMachineInstr {
+public:
+  Register getLHSReg() const { return getReg(1); }
+  Register getRHSReg() const { return getReg(2); }
+
+  static bool classof(const MachineInstr *MI) {
+    switch (MI->getOpcode()) {
+    case TargetOpcode::G_ADD:
+    case TargetOpcode::G_SUB:
+    case TargetOpcode::G_MUL:
+    case TargetOpcode::G_SDIV:
+    case TargetOpcode::G_UDIV:
+    case TargetOpcode::G_SREM:
+    case TargetOpcode::G_UREM:
+    case TargetOpcode::G_AND:
+    case TargetOpcode::G_OR:
+    case TargetOpcode::G_XOR:
+    case TargetOpcode::G_SMIN:
+    case TargetOpcode::G_SMAX:
+    case TargetOpcode::G_UMIN:
+    case TargetOpcode::G_UMAX:
+      return true;
+    default:
+      return false;
+    }
+  };
+};
+
+// Represents a floating point binary operation.
+class GFBinOp : public GenericMachineInstr {
+public:
+  Register getLHSReg() const { return getReg(1); }
+  Register getRHSReg() const { return getReg(2); }
+
+  static bool classof(const MachineInstr *MI) {
+    switch (MI->getOpcode()) {
+    case TargetOpcode::G_FMINNUM:
+    case TargetOpcode::G_FMAXNUM:
+    case TargetOpcode::G_FMINNUM_IEEE:
+    case TargetOpcode::G_FMAXNUM_IEEE:
+    case TargetOpcode::G_FMINIMUM:
+    case TargetOpcode::G_FMAXIMUM:
+    case TargetOpcode::G_FADD:
+    case TargetOpcode::G_FSUB:
+    case TargetOpcode::G_FMUL:
+    case TargetOpcode::G_FDIV:
+    case TargetOpcode::G_FPOW:
+      return true;
+    default:
+      return false;
+    }
+  };
+};
 
 } // namespace llvm
 
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h b/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h
index e0101a5ac1ca804..94da5f5e0d14b21 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h
@@ -1061,6 +1061,11 @@ class MachineIRBuilder {
   MachineInstrBuilder buildBuildVectorConstant(const DstOp &Res,
                                                ArrayRef<APInt> Ops);
 
+  /// Build and insert \p Res = G_BUILD_VECTOR \p Op0, ... where each OpN is
+  /// built with G_FCONSTANT.
+  MachineInstrBuilder buildBuildVectorConstant(const DstOp &Res,
+                                               ArrayRef<APFloat> Ops);
+
   /// Build and insert \p Res = G_BUILD_VECTOR with \p Src replicated to fill
   /// the number of elements
   MachineInstrBuilder buildSplatVector(const DstOp &Res,
diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td
index 9a84ab80157f35f..a56327ff49696b5 100644
--- a/llvm/include/llvm/Target/GlobalISel/Combine.td
+++ b/llvm/include/llvm/Target/GlobalISel/Combine.td
@@ -1236,6 +1236,12 @@ def select_to_minmax: GICombineRule<
          [{ return Helper.matchSimplifySelectToMinMax(*${root}, ${info}); }]),
   (apply [{ Helper.applyBuildFn(*${root}, ${info}); }])>;
 
+def build_vector_to_binop : GICombineRule<
+  (defs root:$root, build_fn_matchinfo:$matchinfo),
+  (match (wip_match_opcode G_BUILD_VECTOR):$root,
+        [{ return Helper.matchBuildVectorToBinOp(*${root}, ${matchinfo}); }]),
+  (apply [{ Helper.applyBuildFn(*${root}, ${matchinfo}); }])>;
+
 // FIXME: These should use the custom predicate feature once it lands.
 def undef_combines : GICombineGroup<[undef_to_fp_zero, undef_to_int_zero,
                                      undef_to_negative_one,
@@ -1309,7 +1315,7 @@ def all_combines : GICombineGroup<[trivial_combines, insert_vec_elt_combines,
     intdiv_combines, mulh_combines, redundant_neg_operands,
     and_or_disjoint_mask, fma_combines, fold_binop_into_select,
     sub_add_reg, select_to_minmax, redundant_binop_in_equality,
-    fsub_to_fneg, commute_constant_to_rhs]>;
+    fsub_to_fneg, commute_constant_to_rhs, build_vector_to_binop]>;
 
 // A combine group used to for prelegalizer combiners at -O0. The combines in
 // this group have been selected based on experiments to balance code size and
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index c2a7c2d01188129..a0b2e86c6148cff 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -6224,3 +6224,88 @@ void CombinerHelper::applyCommuteBinOpOperands(MachineInstr &MI) {
   MI.getOperand(2).setReg(LHSReg);
   Observer.changedInstr(MI);
 }
+
+// Transform build_vector of binop(_,C) -> binop(BV, constant BV).
+bool CombinerHelper::matchBuildVectorToBinOp(MachineInstr &MI,
+                                             BuildFnTy &MatchInfo) {
+  GBuildVector *BuildVector = cast<GBuildVector>(&MI);
+  Register Dst = BuildVector->getReg(0);
+
+  unsigned NumOfSources = BuildVector->getNumSources();
+  if (NumOfSources == 1)
+    return false;
+
+  LLT ElementTy = MRI.getType(BuildVector->getSourceReg(0));
+  LLT BVTy = MRI.getType(BuildVector->getReg(0));
+
+  MachineInstr *FirstDef = MRI.getVRegDef(BuildVector->getSourceReg(0));
+  const unsigned Opcode = FirstDef->getOpcode();
+  SmallVector<Register> LHS;
+
+  if (isa<GIBinOp>(FirstDef)) {
+    SmallVector<APInt> RHS;
+
+    // Collect registers and constants and check for non-conformance.
+    for (unsigned I = 0; I < NumOfSources; ++I) {
+      // Check for integer binop of the same kind.
+      GIBinOp *BinOp = getOpcodeDef<GIBinOp>(BuildVector->getSourceReg(I), MRI);
+      if (!BinOp || BinOp->getOpcode() != Opcode)
+        return false;
+      // Check for constant on rhs.
+      auto IConstant =
+          getIConstantVRegValWithLookThrough(BinOp->getRHSReg(), MRI);
+      if (!IConstant)
+        return false;
+      LHS.push_back(BinOp->getLHSReg());
+      RHS.push_back(IConstant->Value);
+    }
+
+    if (!isLegalOrBeforeLegalizer({Opcode, {BVTy, ElementTy}}))
+      return false;
+
+    // Binop of build_vector, integer constant build_vector.
+    MatchInfo = [=](MachineIRBuilder &B) {
+      B.setInstrAndDebugLoc(*BuildVector);
+      Register First = MRI.createGenericVirtualRegister(BVTy);
+      Register Second = MRI.createGenericVirtualRegister(BVTy);
+      B.buildBuildVector(First, LHS);
+      B.buildBuildVectorConstant(Second, RHS);
+      B.buildInstr(Opcode, {Dst}, {First, Second}); // DEBUG: did not happen
+    };
+    return true;
+  } else if (isa<GFBinOp>(FirstDef)) {
+    SmallVector<APFloat> RHS;
+
+    // Collect registers and constants and check for non-conformance.
+    for (unsigned I = 0; I < NumOfSources; ++I) {
+      // Check for float binop of the same kind.
+      GFBinOp *BinOp = getOpcodeDef<GFBinOp>(BuildVector->getSourceReg(I), MRI);
+      if (!BinOp || BinOp->getOpcode() != Opcode)
+        return false;
+      // Check for float constant on rhs.
+      auto FConstant =
+          getFConstantVRegValWithLookThrough(BinOp->getRHSReg(), MRI);
+      if (!FConstant)
+        return false;
+      LHS.push_back(BinOp->getLHSReg());
+      RHS.push_back(FConstant->Value);
+    }
+
+    if (!isLegalOrBeforeLegalizer({Opcode, {BVTy, ElementTy}}))
+      return false;
+
+    // Binop of build_vector, floating point constant build_vector.
+    MatchInfo = [=](MachineIRBuilder &B) {
+      B.setInstrAndDebugLoc(*BuildVector);
+      Register First = MRI.createGenericVirtualRegister(BVTy);
+      Register Second = MRI.createGenericVirtualRegister(BVTy);
+      B.buildBuildVector(First, LHS);
+      B.buildBuildVectorConstant(Second, RHS);
+      B.buildInstr(Opcode, {Dst}, {First, Second});
+    };
+    return true;
+  } else {
+    // Not a binop.
+    return false;
+  }
+}
diff --git a/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp b/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
index 80e9c08e850b683..89d5d73a2394a39 100644
--- a/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
@@ -700,6 +700,17 @@ MachineIRBuilder::buildBuildVectorConstant(const DstOp &Res,
   return buildInstr(TargetOpcode::G_BUILD_VECTOR, Res, TmpVec);
 }
 
+MachineInstrBuilder
+MachineIRBuilder::buildBuildVectorConstant(const DstOp &Res,
+                                           ArrayRef<APFloat> Ops) {
+  SmallVector<SrcOp> TmpVec;
+  TmpVec.reserve(Ops.size());
+  LLT EltTy = Res.getLLTTy(*getMRI()).getElementType();
+  for (const auto &Op : Ops)
+    TmpVec.push_back(buildFConstant(EltTy, Op));
+  return buildInstr(TargetOpcode::G_BUILD_VECTOR, Res, TmpVec);
+}
+
 MachineInstrBuilder MachineIRBuilder::buildSplatVector(const DstOp &Res,
                                                        const SrcOp &Src) {
   SmallVector<SrcOp, 8> TmpVec(Res.getLLTTy(*getMRI()).getNumElements(), Src);
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-build-vector-to-binop.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-build-vector-to-binop.mir
new file mode 100644
index 000000000000000..ea69bd4a9be2d5f
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-build-vector-to-binop.mir
@@ -0,0 +1,236 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -o - -mtriple=aarch64 -run-pass=aarch64-prelegalizer-combiner -verify-machineinstrs %s | FileCheck %s
+
+---
+name:            build_vector_success_add
+liveins:
+body:             |
+  bb.0:
+
+    ; CHECK-LABEL: name: build_vector_success_add
+    ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32)
+    ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C1]](s32)
+    ; CHECK-NEXT: %bv:_(<2 x s32>) = G_ADD [[BUILD_VECTOR]], [[BUILD_VECTOR1]]
+    ; CHECK-NEXT: %extract:_(s32) = G_EXTRACT_VECTOR_ELT %bv(<2 x s32>), [[COPY]](s32)
+    ; CHECK-NEXT: %extract2:_(s32) = G_EXTRACT_VECTOR_ELT %bv(<2 x s32>), [[COPY1]](s32)
+    ; CHECK-NEXT: $w0 = COPY %extract(s32)
+    ; CHECK-NEXT: $w1 = COPY %extract2(s32)
+    ; CHECK-NEXT: RET_ReallyLR implicit $w0
+    %0:_(s32) = COPY $w0
+    %1:_(s32) = COPY $w1
+    %2:_(s32) = G_CONSTANT i32 10
+    %3:_(s32) = G_CONSTANT i32 1
+    %4:_(s32) = G_ADD %0, %2
+    %5:_(s32) = G_ADD %1, %3
+    %bv:_(<2 x s32>) = G_BUILD_VECTOR %4(s32), %5(s32)
+    %extract:_(s32) = G_EXTRACT_VECTOR_ELT %bv(<2 x s32>), %0(s32)
+    %extract2:_(s32) = G_EXTRACT_VECTOR_ELT %bv(<2 x s32>), %1(s32)
+    $w0 = COPY %extract(s32)
+    $w1 = COPY %extract2(s32)
+    RET_ReallyLR implicit $w0
+
+...
+---
+name:            build_vector_failure_mixed
+liveins:
+body:             |
+  bb.0:
+
+    ; CHECK-LABEL: name: build_vector_failure_mixed
+    ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 5
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 11
+    ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[C]]
+    ; CHECK-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C1]]
+    ; CHECK-NEXT: %bv:_(<2 x s32>) = G_BUILD_VECTOR [[ADD]](s32), [[SUB]](s32)
+    ; CHECK-NEXT: %extract:_(s32) = G_EXTRACT_VECTOR_ELT %bv(<2 x s32>), [[COPY]](s32)
+    ; CHECK-NEXT: %extract2:_(s32) = G_EXTRACT_VECTOR_ELT %bv(<2 x s32>), [[COPY1]](s32)
+    ; CHECK-NEXT: $w0 = COPY %extract(s32)
+    ; CHECK-NEXT: $w1 = COPY %extract2(s32)
+    ; CHECK-NEXT: RET_ReallyLR implicit $w0
+    %0:_(s32) = COPY $w0
+    %1:_(s32) = COPY $w1
+    %2:_(s32) = G_CONSTANT i32 5
+    %3:_(s32) = G_CONSTANT i32 11
+    %4:_(s32) = G_ADD %0, %2
+    %5:_(s32) = G_SUB %1, %3
+    %bv:_(<2 x s32>) = G_BUILD_VECTOR %4(s32), %5(s32)
+    %extract:_(s32) = G_EXTRACT_VECTOR_ELT %bv(<2 x s32>), %0(s32)
+    %extract2:_(s32) = G_EXTRACT_VECTOR_ELT %bv(<2 x s32>), %1(s32)
+    $w0 = COPY %extract(s32)
+    $w1 = COPY %extract2(s32)
+    RET_ReallyLR implicit $w0
+
+...
+---
+name:            build_vector_success_sub
+liveins:
+body:             |
+  bb.0:
+
+    ; CHECK-LABEL: name: build_vector_success_sub
+    ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $w2
+    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $w3
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32)
+    ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C1]](s32), [[C2]](s32), [[C3]](s32)
+    ; CHECK-NEXT: %bv:_(<4 x s32>) = G_SUB [[BUILD_VECTOR]], [[BUILD_VECTOR1]]
+    ; CHECK-NEXT: %extract:_(s32) = G_EXTRACT_VECTOR_ELT %bv(<4 x s32>), [[COPY3]](s32)
+    ; CHECK-NEXT: %extract2:_(s32) = G_EXTRACT_VECTOR_ELT %bv(<4 x s32>), [[COPY]](s32)
+    ; CHECK-NEXT: $w0 = COPY %extract(s32)
+    ; CHECK-NEXT: $w1 = COPY %extract2(s32)
+    ; CHECK-NEXT: RET_ReallyLR implicit $w0
+    %0:_(s32) = COPY $w0
+    %1:_(s32) = COPY $w1
+    %2:_(s32) = COPY $w2
+    %3:_(s32) = COPY $w3
+    %4:_(s32) = G_CONSTANT i32 10
+    %5:_(s32) = G_CONSTANT i32 1
+    %6:_(s32) = G_CONSTANT i32 2
+    %7:_(s32) = G_CONSTANT i32 3
+    %8:_(s32) = G_SUB %0, %4
+    %9:_(s32) = G_SUB %1, %5
+    %10:_(s32) = G_SUB %2, %6
+    %11:_(s32) = G_SUB %3, %7
+    %bv:_(<4 x s32>) = G_BUILD_VECTOR %8(s32), %9(s32), %10(s32), %11(s32)
+    %extract:_(s32) = G_EXTRACT_VECTOR_ELT %bv(<4 x s32>), %3(s32)
+    %extract2:_(s32) = G_EXTRACT_VECTOR_ELT %bv(<4 x s32>), %0(s32)
+    $w0 = COPY %extract(s32)
+    $w1 = COPY %extract2(s32)
+    RET_ReallyLR implicit $w0
+
+
+...
+---
+name:            build_vector_success_fadd
+liveins:
+body:             |
+  bb.0:
+
+    ; CHECK-LABEL: name: build_vector_success_fadd
+    ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $w2
+    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $w3
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+01
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.100000e+01
+    ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.200000e+01
+    ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.400000e+01
+    ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32)
+    ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C1]](s32), [[C2]](s32), [[C3]](s32)
+    ; CHECK-NEXT: %bv:_(<4 x s32>) = G_FADD [[BUILD_VECTOR]], [[BUILD_VECTOR1]]
+    ; CHECK-NEXT: %extract:_(s32) = G_EXTRACT_VECTOR_ELT %bv(<4 x s32>), [[COPY3]](s32)
+    ; CHECK-NEXT: %extract2:_(s32) = G_EXTRACT_VECTOR_ELT %bv(<4 x s32>), [[COPY]](s32)
+    ; CHECK-NEXT: $w0 = COPY %extract(s32)
+    ; CHECK-NEXT: $w1 = COPY %extract2(s32)
+    ; CHECK-NEXT: RET_ReallyLR implicit $w0
+    %0:_(s32) = COPY $w0
+    %1:_(s32) = COPY $w1
+    %2:_(s32) = COPY $w2
+    %3:_(s32) = COPY $w3
+    %4:_(s32) = G_FCONSTANT float 10.000000e+00
+    %5:_(s32) = G_FCONSTANT float 11.000000e+00
+    %6:_(s32) = G_FCONSTANT float 12.000000e+00
+    %7:_(s32) = G_FCONSTANT float 14.000000e+00
+    %8:_(s32) = G_FADD %0, %4
+    %9:_(s32) = G_FADD %1, %5
+    %10:_(s32) = G_FADD %2, %6
+    %11:_(s32) = G_FADD %3, %7
+    %bv:_(<4 x s32>) = G_BUILD_VECTOR %8(s32), %9(s32), %10(s32), %11(s32)
+    %extract:_(s32) = G_EXTRACT_VECTOR_ELT %bv(<4 x s32>), %3(s32)
+    %extract2:_(s32) = G_EXTRACT_VECTOR_ELT %bv(<4 x s32>), %0(s32)
+    $w0 = COPY %extract(s32)
+    $w1 = COPY %extract2(s32)
+    RET_ReallyLR implicit $w0
+
+...
+---
+name:            build_vector_failed_no_constant_for_you
+liveins:
+body:             |
+  bb.0:
+
+    ; CHECK-LABEL: name: build_vector_failed_no_constant_for_you
+    ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $w2
+    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $w3
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+01
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.100000e+01
+    ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.200000e+01
+    ; CHECK-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[COPY]], [[C]]
+    ; CHECK-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[COPY1]], [[C1]]
+    ; CHECK-NEXT: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[COPY2]], [[C2]]
+    ; CHECK-NEXT: [[FADD3:%[0-9]+]]:_(s32) = G_FADD [[COPY3]], [[COPY]]
+    ; CHECK-NEXT: %bv:_(<4 x s32>) = G_BUILD_VECTOR [[FADD]](s32), [[FADD1]](s32), [[FADD2]](s32), [[FADD3]](s32)
+    ; CHECK-NEXT: %extract:_(s32) = G_EXTRACT_VECTOR_ELT %bv(<4 x s32>), [[COPY3]](s32)
+    ; CHECK-NEXT: %extract2:_(s32) = G_EXTRACT_VECTOR_ELT %bv(<4 x s32>), [[COPY]](s32)
+    ; CHECK-NEXT: $w0 = COPY %extract(s32)
+    ; CHECK-NEXT: $w1 = COPY %extract2(s32)
+    ; CHECK-NEXT: RET_ReallyLR implicit $w0
+    %0:_(s32) = COPY $w0
+    %1:_(s32) = COPY $w1
+    %2:_(s32) = COPY $w2
+    %3:_(s32) = COPY $w3
+    %4:_(s32) = G_FCONSTANT float 10.000000e+00
+    %5:_(s32) = G_FCONSTANT float 11.000000e+00
+    %6:_(s32) = G_FCONSTANT float 12.000000e+00
+    %8:_(s32) = G_FADD %0, %4
+    %9:_(s32) = G_FADD %1, %5
+    %10:_(s32) = G_FADD %2, %6
+    %11:_(s32) = G_FADD %3, %0
+    %bv:_(<4 x s32>) = G_BUILD_VECTOR %8(s32), %9(s32), %10(s32), %11(s32)
+    %extract:_(s32) = G_EXTRACT_VECTOR_ELT %bv(<4 x s32>), %3(s32)
+    %extract2:_(s32) = G_EXTRACT_VECTOR_ELT %bv(<4 x s32>), %0(s32)
+    $w0 = COPY %extract(s32)
+    $w1 = COPY %extract2(s32)
+    RET_ReallyLR implicit $w0
+
+...
+---
+name:            build_vector_failed_no_binop
+liveins:
+body:             |
+  bb.0:
+
+    ; CHECK-LABEL: name: build_vector_failed_no_binop
+    ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $w2
+    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $w3
+    ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[COPY]]
+    ; CHECK-NEXT: [[FNEG1:%[0-9]+]]:_(s32) = G_FNEG [[COPY1]]
+    ; CHECK-NEXT: [[FNEG2:%[0-9]+]]:_(s32) = G_FNEG [[COPY2]]
+    ; CHECK-NEXT: [[FNEG3:%[0-9]+]]:_(s32) = G_FNEG [[COPY3]]
+    ; CHECK-NEXT: %bv:_(<4 x s32>) = G_BUILD_VECTOR [[FNEG]](s32), [[FNEG1]](s32), [[FNEG2]](s32), [[FNEG3]](s32)
+    ; CHECK-NEXT: %extract:_(s32) = G_EXTRACT_VECTOR_ELT %bv(<4 x s32>), [[COPY3]](s32)
+    ; CHECK-NEXT: %extract2:_(s32) = G_EXTRACT_VECTOR_ELT %bv(<4 x s32>), [[COPY]](s32)
+    ; CHECK-NEXT: $w0 = COPY %extract(s32)
+    ; CHECK-NEXT: $w1 = COPY %extract2(s32)
+    ; CHECK-NEXT: RET_ReallyLR implicit $w0
+    %0:_(s32) = COPY $w0
+    %1:_(s32) = COPY $w1
+    %2:_(s32) = COPY $w2
+    %3:_(s32) = COPY $w3
+    %8:_(s32) = G_FNEG %0
+    %9:_(s32) = G_FNEG %1
+    %10:_(s32) = G_FNEG %2
+    %11:_(s32) = G_FNEG %3
+    %bv:_(<4 x s32>) = G_BUILD_VECTOR %8(s32), %9(s32), %10(s32), %11(s32)
+    %extract:_(s32) = G_EXTRACT_VECTOR_ELT %bv(<4 x s32>), %3(s32)
+    %extract2:_(s32) = G_EXTRACT_VECTOR_ELT %bv(<4 x s32>), %0(s32)
+    $w0 = COPY %extract(s32)
+    $w1 = COPY %extract2(s32)
+    RET_ReallyLR implicit $w0
+
+...
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/inline-memset.mir b/llvm/test/CodeGen/AArch64/GlobalISel/inline-memset.mir
index a02fb08342c1658..02aeb843e20ba27 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/inline-memset.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/inline-memset.mir
@@ -64,13 +64,14 @@ body:             |
 
     ; CHECK-LABEL: name: test_ms1
     ; CHECK: liveins: $w1, $w2, $x0
-    ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
-    ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $w2
-    ; CHECK: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY1]](s32)
-    ; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[COPY2]](s32)
-    ; CHECK: G_MEMSET [[COPY]](p0), [[TRUNC]](s8), [[ZEXT]](s64), 1 :: (store (s8) into %ir.dst)
-    ; CHECK: RET_ReallyLR
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $w2
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY1]](s32)
+    ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[COPY2]](s32)
+    ; CHECK-NEXT: G_MEMSET [[COPY]](p0), [[TRUNC]](s8), [[ZEXT]](s64), 1 :: (store (s8) into %ir.dst)
+    ; CHECK-NEXT: RET_ReallyLR
     %0:_(p0) = COPY $x0
     %1:_(s32) = COPY $w1
     %2:_(s32) = COPY $w2
@@ -90,17 +91,18 @@ body:             |
 
     ; CHECK-LABEL: name: test_ms2_const
     ; CHECK: liveins: $w1, $x0
-    ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
-    ; CHECK: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY1]](s32)
-    ; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[TRUNC]](s8)
-    ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 72340172838076673
-    ; CHECK: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[ZEXT]], [[C]]
-    ; CHECK: G_STORE [[MUL]](s64), [[COPY]](p0) :: (store (s64) into %ir.dst, align 1)
-    ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; CHECK: G_STORE [[MUL]](s64), [[PTR_ADD]](p0) :: (store (s64) into %ir.dst + 8, align 1)
-    ; CHECK: RET_ReallyLR
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY1]](s32)
+    ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[TRUNC]](s8)
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 72340172838076673
+    ; CHECK-NEXT: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[ZEXT]], [[C]]
+    ; CHECK-NEXT: G_STORE [[MUL]](s64), [[COPY]](p0) :: (store (s64) into %ir.dst, align 1)
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; CHECK-NEXT: G_STORE [[MUL]](s64), [[PTR_ADD]](p0) :: (store (s64) into %ir.dst + 8, align 1)
+    ; CHECK-NEXT: RET_ReallyLR
     %0:_(p0) = COPY $x0
     %1:_(s32) = COPY $w1
     %3:_(s64) = G_CONSTANT i64 16
@@ -119,20 +121,21 @@ body:             |
 
     ; CHECK-LABEL: name: test_zero_const
     ; CHECK: liveins: $w1, $x0
-    ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
-    ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-    ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C]](s64), [[C]](s64)
-    ; CHECK: G_STORE [[BUILD_VECTOR]](<2 x s64>), [[COPY]](p0) :: (store (<2 x s64>) into %ir.dst, align 1)
-    ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; CHECK: G_STORE [[BUILD_VECTOR]](<2 x s64>), [[PTR_ADD]](p0) :: (store (<2 x s64>) into %ir.dst + 16, align 1)
-    ; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
-    ; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; CHECK: G_STORE [[BUILD_VECTOR]](<2 x s64>), [[PTR_ADD1]](p0) :: (store (<2 x s64>) into %ir.dst + 32, align 1)
-    ; CHECK: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 48
-    ; CHECK: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; CHECK: G_STORE [[BUILD_VECTOR]](<2 x s64>), [[PTR_ADD2]](p0) :: (store (<2 x s64>) into %ir.dst + 48, align 1)
-    ; CHECK: RET_ReallyLR
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C]](s64), [[C]](s64)
+    ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<2 x s64>), [[COPY]](p0) :: (store (<2 x s64>) into %ir.dst, align 1)
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<2 x s64>), [[PTR_ADD]](p0) :: (store (<2 x s64>) into %ir.dst + 16, align 1)
+    ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
+    ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<2 x s64>), [[PTR_ADD1]](p0) :: (store (<2 x s64>) into %ir.dst + 32, align 1)
+    ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 48
+    ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<2 x s64>), [[PTR_ADD2]](p0) :: (store (<2 x s64>) into %ir.dst + 48, align 1)
+    ; CHECK-NEXT: RET_ReallyLR
     %0:_(p0) = COPY $x0
     %1:_(s32) = G_CONSTANT i32 0
     %3:_(s64) = G_CONSTANT i64 64
@@ -152,13 +155,14 @@ body:             |
 
     ; CHECK-LABEL: name: test_ms3_const_both
     ; CHECK: liveins: $x0
-    ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
-    ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4629771061636907072
-    ; CHECK: G_STORE [[C]](s64), [[COPY]](p0) :: (store (s64) into %ir.dst, align 1)
-    ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; CHECK: G_STORE [[C]](s64), [[PTR_ADD]](p0) :: (store (s64) into %ir.dst + 8, align 1)
-    ; CHECK: RET_ReallyLR
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4629771061636907072
+    ; CHECK-NEXT: G_STORE [[C]](s64), [[COPY]](p0) :: (store (s64) into %ir.dst, align 1)
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; CHECK-NEXT: G_STORE [[C]](s64), [[PTR_ADD]](p0) :: (store (s64) into %ir.dst + 8, align 1)
+    ; CHECK-NEXT: RET_ReallyLR
     %0:_(p0) = COPY $x0
     %1:_(s8) = G_CONSTANT i8 64
     %2:_(s64) = G_CONSTANT i64 16
@@ -176,24 +180,26 @@ body:             |
 
     ; CHECK-LABEL: name: test_ms_vector
     ; CHECK: liveins: $w1, $x0
-    ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
-    ; CHECK: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY1]](s32)
-    ; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[TRUNC]](s8)
-    ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 72340172838076673
-    ; CHECK: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[ZEXT]], [[C]]
-    ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MUL]](s64), [[MUL]](s64)
-    ; CHECK: G_STORE [[BUILD_VECTOR]](<2 x s64>), [[COPY]](p0) :: (store (<2 x s64>) into %ir.dst, align 1)
-    ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; CHECK: G_STORE [[BUILD_VECTOR]](<2 x s64>), [[PTR_ADD]](p0) :: (store (<2 x s64>) into %ir.dst + 16, align 1)
-    ; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
-    ; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; CHECK: G_STORE [[BUILD_VECTOR]](<2 x s64>), [[PTR_ADD1]](p0) :: (store (<2 x s64>) into %ir.dst + 32, align 1)
-    ; CHECK: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
-    ; CHECK: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; CHECK: G_STORE [[BUILD_VECTOR]](<2 x s64>), [[PTR_ADD2]](p0) :: (store (<2 x s64>) into %ir.dst + 44, align 1)
-    ; CHECK: RET_ReallyLR
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY1]](s32)
+    ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[TRUNC]](s8)
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 72340172838076673
+    ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[ZEXT]](s64), [[ZEXT]](s64)
+    ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C]](s64), [[C]](s64)
+    ; CHECK-NEXT: [[MUL:%[0-9]+]]:_(<2 x s64>) = G_MUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]]
+    ; CHECK-NEXT: G_STORE [[MUL]](<2 x s64>), [[COPY]](p0) :: (store (<2 x s64>) into %ir.dst, align 1)
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; CHECK-NEXT: G_STORE [[MUL]](<2 x s64>), [[PTR_ADD]](p0) :: (store (<2 x s64>) into %ir.dst + 16, align 1)
+    ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
+    ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; CHECK-NEXT: G_STORE [[MUL]](<2 x s64>), [[PTR_ADD1]](p0) :: (store (<2 x s64>) into %ir.dst + 32, align 1)
+    ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
+    ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; CHECK-NEXT: G_STORE [[MUL]](<2 x s64>), [[PTR_ADD2]](p0) :: (store (<2 x s64>) into %ir.dst + 44, align 1)
+    ; CHECK-NEXT: RET_ReallyLR
     %0:_(p0) = COPY $x0
     %1:_(s32) = COPY $w1
     %3:_(s64) = G_CONSTANT i64 60
@@ -212,17 +218,18 @@ body:             |
 
     ; CHECK-LABEL: name: test_ms4_const_both_unaligned
     ; CHECK: liveins: $x0
-    ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
-    ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4629771061636907072
-    ; CHECK: G_STORE [[C]](s64), [[COPY]](p0) :: (store (s64) into %ir.dst, align 1)
-    ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; CHECK: G_STORE [[C]](s64), [[PTR_ADD]](p0) :: (store (s64) into %ir.dst + 8, align 1)
-    ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[C]](s64)
-    ; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; CHECK: G_STORE [[TRUNC]](s16), [[PTR_ADD1]](p0) :: (store (s16) into %ir.dst + 16, align 1)
-    ; CHECK: RET_ReallyLR
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4629771061636907072
+    ; CHECK-NEXT: G_STORE [[C]](s64), [[COPY]](p0) :: (store (s64) into %ir.dst, align 1)
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; CHECK-NEXT: G_STORE [[C]](s64), [[PTR_ADD]](p0) :: (store (s64) into %ir.dst + 8, align 1)
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[C]](s64)
+    ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; CHECK-NEXT: G_STORE [[TRUNC]](s16), [[PTR_ADD1]](p0) :: (store (s16) into %ir.dst + 16, align 1)
+    ; CHECK-NEXT: RET_ReallyLR
     %0:_(p0) = COPY $x0
     %1:_(s8) = G_CONSTANT i8 64
     %2:_(s64) = G_CONSTANT i64 18
@@ -239,17 +246,18 @@ body:             |
     liveins: $w1, $x0
     ; CHECK-LABEL: name: minsize
     ; CHECK: liveins: $w1, $x0
-    ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
-    ; CHECK: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY1]](s32)
-    ; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[TRUNC]](s8)
-    ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 72340172838076673
-    ; CHECK: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[ZEXT]], [[C]]
-    ; CHECK: G_STORE [[MUL]](s64), [[COPY]](p0) :: (store (s64) into %ir.dst, align 1)
-    ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; CHECK: G_STORE [[MUL]](s64), [[PTR_ADD]](p0) :: (store (s64) into %ir.dst + 8, align 1)
-    ; CHECK: RET_ReallyLR
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY1]](s32)
+    ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[TRUNC]](s8)
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 72340172838076673
+    ; CHECK-NEXT: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[ZEXT]], [[C]]
+    ; CHECK-NEXT: G_STORE [[MUL]](s64), [[COPY]](p0) :: (store (s64) into %ir.dst, align 1)
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; CHECK-NEXT: G_STORE [[MUL]](s64), [[PTR_ADD]](p0) :: (store (s64) into %ir.dst + 8, align 1)
+    ; CHECK-NEXT: RET_ReallyLR
     %0:_(p0) = COPY $x0
     %1:_(s32) = COPY $w1
     %3:_(s64) = G_CONSTANT i64 16
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i32.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i32.ll
index 1061f0003bd4896..29a4992cd01df26 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i32.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i32.ll
@@ -308,96 +308,51 @@ define i32 @v_sdiv_i32_pow2k_denom(i32 %num) {
 }
 
 define <2 x i32> @v_sdiv_v2i32_pow2k_denom(<2 x i32> %num) {
-; GISEL-LABEL: v_sdiv_v2i32_pow2k_denom:
-; GISEL:       ; %bb.0:
-; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GISEL-NEXT:    v_ashrrev_i32_e32 v2, 31, v0
-; GISEL-NEXT:    v_mov_b32_e32 v3, 0x1000
-; GISEL-NEXT:    v_cvt_f32_u32_e32 v4, 0x1000
-; GISEL-NEXT:    v_mov_b32_e32 v5, 0xfffff000
-; GISEL-NEXT:    v_ashrrev_i32_e32 v6, 31, v1
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
-; GISEL-NEXT:    v_rcp_iflag_f32_e32 v4, v4
-; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v1, v6
-; GISEL-NEXT:    v_xor_b32_e32 v0, v0, v2
-; GISEL-NEXT:    v_mul_f32_e32 v4, 0x4f7ffffe, v4
-; GISEL-NEXT:    v_xor_b32_e32 v1, v1, v6
-; GISEL-NEXT:    v_cvt_u32_f32_e32 v4, v4
-; GISEL-NEXT:    v_mul_lo_u32 v5, v4, v5
-; GISEL-NEXT:    v_mul_hi_u32 v5, v4, v5
-; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v5
-; GISEL-NEXT:    v_mul_hi_u32 v5, v0, v4
-; GISEL-NEXT:    v_mul_hi_u32 v4, v1, v4
-; GISEL-NEXT:    v_lshlrev_b32_e32 v7, 12, v5
-; GISEL-NEXT:    v_add_i32_e32 v8, vcc, 1, v5
-; GISEL-NEXT:    v_lshlrev_b32_e32 v9, 12, v4
-; GISEL-NEXT:    v_add_i32_e32 v10, vcc, 1, v4
-; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v7
-; GISEL-NEXT:    v_sub_i32_e32 v1, vcc, v1, v9
-; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v0, v3
-; GISEL-NEXT:    v_cndmask_b32_e64 v5, v5, v8, s[4:5]
-; GISEL-NEXT:    v_sub_i32_e32 v7, vcc, v0, v3
-; GISEL-NEXT:    v_cmp_ge_u32_e64 s[6:7], v1, v3
-; GISEL-NEXT:    v_cndmask_b32_e64 v4, v4, v10, s[6:7]
-; GISEL-NEXT:    v_subrev_i32_e32 v8, vcc, 0x1000, v1
-; GISEL-NEXT:    v_cndmask_b32_e64 v0, v0, v7, s[4:5]
-; GISEL-NEXT:    v_add_i32_e32 v7, vcc, 1, v5
-; GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, v8, s[6:7]
-; GISEL-NEXT:    v_add_i32_e32 v8, vcc, 1, v4
-; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v3
-; GISEL-NEXT:    v_cndmask_b32_e32 v0, v5, v7, vcc
-; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v3
-; GISEL-NEXT:    v_cndmask_b32_e32 v1, v4, v8, vcc
-; GISEL-NEXT:    v_xor_b32_e32 v0, v0, v2
-; GISEL-NEXT:    v_xor_b32_e32 v1, v1, v6
-; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v2
-; GISEL-NEXT:    v_sub_i32_e32 v1, vcc, v1, v6
-; GISEL-NEXT:    s_setpc_b64 s[30:31]
-;
-; CGP-LABEL: v_sdiv_v2i32_pow2k_denom:
-; CGP:       ; %bb.0:
-; CGP-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CGP-NEXT:    v_ashrrev_i32_e32 v2, 31, v0
-; CGP-NEXT:    v_rcp_iflag_f32_e32 v3, 0x45800000
-; CGP-NEXT:    v_mov_b32_e32 v4, 0xfffff000
-; CGP-NEXT:    v_mov_b32_e32 v5, 0x1000
-; CGP-NEXT:    v_ashrrev_i32_e32 v6, 31, v1
-; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
-; CGP-NEXT:    v_mul_f32_e32 v3, 0x4f7ffffe, v3
-; CGP-NEXT:    v_add_i32_e32 v1, vcc, v1, v6
-; CGP-NEXT:    v_xor_b32_e32 v0, v0, v2
-; CGP-NEXT:    v_cvt_u32_f32_e32 v3, v3
-; CGP-NEXT:    v_xor_b32_e32 v1, v1, v6
-; CGP-NEXT:    v_mul_lo_u32 v4, v3, v4
-; CGP-NEXT:    v_mul_hi_u32 v4, v3, v4
-; CGP-NEXT:    v_add_i32_e32 v3, vcc, v3, v4
-; CGP-NEXT:    v_mul_hi_u32 v4, v0, v3
-; CGP-NEXT:    v_mul_hi_u32 v3, v1, v3
-; CGP-NEXT:    v_lshlrev_b32_e32 v7, 12, v4
-; CGP-NEXT:    v_add_i32_e32 v8, vcc, 1, v4
-; CGP-NEXT:    v_lshlrev_b32_e32 v9, 12, v3
-; CGP-NEXT:    v_add_i32_e32 v10, vcc, 1, v3
-; CGP-NEXT:    v_sub_i32_e32 v0, vcc, v0, v7
-; CGP-NEXT:    v_sub_i32_e32 v1, vcc, v1, v9
-; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v0, v5
-; CGP-NEXT:    v_cndmask_b32_e64 v4, v4, v8, s[4:5]
-; CGP-NEXT:    v_sub_i32_e32 v7, vcc, v0, v5
-; CGP-NEXT:    v_cmp_ge_u32_e64 s[6:7], v1, v5
-; CGP-NEXT:    v_cndmask_b32_e64 v3, v3, v10, s[6:7]
-; CGP-NEXT:    v_subrev_i32_e32 v8, vcc, 0x1000, v1
-; CGP-NEXT:    v_cndmask_b32_e64 v0, v0, v7, s[4:5]
-; CGP-NEXT:    v_add_i32_e32 v7, vcc, 1, v4
-; CGP-NEXT:    v_cndmask_b32_e64 v1, v1, v8, s[6:7]
-; CGP-NEXT:    v_add_i32_e32 v8, vcc, 1, v3
-; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v5
-; CGP-NEXT:    v_cndmask_b32_e32 v0, v4, v7, vcc
-; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v5
-; CGP-NEXT:    v_cndmask_b32_e32 v1, v3, v8, vcc
-; CGP-NEXT:    v_xor_b32_e32 v0, v0, v2
-; CGP-NEXT:    v_xor_b32_e32 v1, v1, v6
-; CGP-NEXT:    v_sub_i32_e32 v0, vcc, v0, v2
-; CGP-NEXT:    v_sub_i32_e32 v1, vcc, v1, v6
-; CGP-NEXT:    s_setpc_b64 s[30:31]
+; CHECK-LABEL: v_sdiv_v2i32_pow2k_denom:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT:    v_ashrrev_i32_e32 v2, 31, v0
+; CHECK-NEXT:    v_mov_b32_e32 v3, 0x1000
+; CHECK-NEXT:    v_cvt_f32_u32_e32 v4, 0x1000
+; CHECK-NEXT:    v_mov_b32_e32 v5, 0xfffff000
+; CHECK-NEXT:    v_ashrrev_i32_e32 v6, 31, v1
+; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
+; CHECK-NEXT:    v_rcp_iflag_f32_e32 v4, v4
+; CHECK-NEXT:    v_add_i32_e32 v1, vcc, v1, v6
+; CHECK-NEXT:    v_xor_b32_e32 v0, v0, v2
+; CHECK-NEXT:    v_mul_f32_e32 v4, 0x4f7ffffe, v4
+; CHECK-NEXT:    v_xor_b32_e32 v1, v1, v6
+; CHECK-NEXT:    v_cvt_u32_f32_e32 v4, v4
+; CHECK-NEXT:    v_mul_lo_u32 v5, v4, v5
+; CHECK-NEXT:    v_mul_hi_u32 v5, v4, v5
+; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v4, v5
+; CHECK-NEXT:    v_mul_hi_u32 v5, v0, v4
+; CHECK-NEXT:    v_mul_hi_u32 v4, v1, v4
+; CHECK-NEXT:    v_lshlrev_b32_e32 v7, 12, v5
+; CHECK-NEXT:    v_add_i32_e32 v8, vcc, 1, v5
+; CHECK-NEXT:    v_lshlrev_b32_e32 v9, 12, v4
+; CHECK-NEXT:    v_add_i32_e32 v10, vcc, 1, v4
+; CHECK-NEXT:    v_sub_i32_e32 v0, vcc, v0, v7
+; CHECK-NEXT:    v_sub_i32_e32 v1, vcc, v1, v9
+; CHECK-NEXT:    v_cmp_ge_u32_e64 s[4:5], v0, v3
+; CHECK-NEXT:    v_cndmask_b32_e64 v5, v5, v8, s[4:5]
+; CHECK-NEXT:    v_sub_i32_e32 v7, vcc, v0, v3
+; CHECK-NEXT:    v_cmp_ge_u32_e64 s[6:7], v1, v3
+; CHECK-NEXT:    v_cndmask_b32_e64 v4, v4, v10, s[6:7]
+; CHECK-NEXT:    v_subrev_i32_e32 v8, vcc, 0x1000, v1
+; CHECK-NEXT:    v_cndmask_b32_e64 v0, v0, v7, s[4:5]
+; CHECK-NEXT:    v_add_i32_e32 v7, vcc, 1, v5
+; CHECK-NEXT:    v_cndmask_b32_e64 v1, v1, v8, s[6:7]
+; CHECK-NEXT:    v_add_i32_e32 v8, vcc, 1, v4
+; CHECK-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v3
+; CHECK-NEXT:    v_cndmask_b32_e32 v0, v5, v7, vcc
+; CHECK-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v3
+; CHECK-NEXT:    v_cndmask_b32_e32 v1, v4, v8, vcc
+; CHECK-NEXT:    v_xor_b32_e32 v0, v0, v2
+; CHECK-NEXT:    v_xor_b32_e32 v1, v1, v6
+; CHECK-NEXT:    v_sub_i32_e32 v0, vcc, v0, v2
+; CHECK-NEXT:    v_sub_i32_e32 v1, vcc, v1, v6
+; CHECK-NEXT:    s_setpc_b64 s[30:31]
   %result = sdiv <2 x i32> %num, <i32 4096, i32 4096>
   ret <2 x i32> %result
 }
@@ -436,96 +391,51 @@ define i32 @v_sdiv_i32_oddk_denom(i32 %num) {
 }
 
 define <2 x i32> @v_sdiv_v2i32_oddk_denom(<2 x i32> %num) {
-; GISEL-LABEL: v_sdiv_v2i32_oddk_denom:
-; GISEL:       ; %bb.0:
-; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GISEL-NEXT:    v_ashrrev_i32_e32 v2, 31, v0
-; GISEL-NEXT:    v_mov_b32_e32 v3, 0x12d8fb
-; GISEL-NEXT:    v_cvt_f32_u32_e32 v4, 0x12d8fb
-; GISEL-NEXT:    v_mov_b32_e32 v5, 0xffed2705
-; GISEL-NEXT:    v_ashrrev_i32_e32 v6, 31, v1
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
-; GISEL-NEXT:    v_rcp_iflag_f32_e32 v4, v4
-; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v1, v6
-; GISEL-NEXT:    v_xor_b32_e32 v0, v0, v2
-; GISEL-NEXT:    v_mul_f32_e32 v4, 0x4f7ffffe, v4
-; GISEL-NEXT:    v_xor_b32_e32 v1, v1, v6
-; GISEL-NEXT:    v_cvt_u32_f32_e32 v4, v4
-; GISEL-NEXT:    v_mul_lo_u32 v5, v4, v5
-; GISEL-NEXT:    v_mul_hi_u32 v5, v4, v5
-; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v5
-; GISEL-NEXT:    v_mul_hi_u32 v5, v0, v4
-; GISEL-NEXT:    v_mul_hi_u32 v4, v1, v4
-; GISEL-NEXT:    v_mul_lo_u32 v7, v5, v3
-; GISEL-NEXT:    v_add_i32_e32 v8, vcc, 1, v5
-; GISEL-NEXT:    v_mul_lo_u32 v9, v4, v3
-; GISEL-NEXT:    v_add_i32_e32 v10, vcc, 1, v4
-; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v7
-; GISEL-NEXT:    v_sub_i32_e32 v1, vcc, v1, v9
-; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v0, v3
-; GISEL-NEXT:    v_cndmask_b32_e64 v5, v5, v8, s[4:5]
-; GISEL-NEXT:    v_sub_i32_e32 v7, vcc, v0, v3
-; GISEL-NEXT:    v_cmp_ge_u32_e64 s[6:7], v1, v3
-; GISEL-NEXT:    v_cndmask_b32_e64 v4, v4, v10, s[6:7]
-; GISEL-NEXT:    v_subrev_i32_e32 v8, vcc, 0x12d8fb, v1
-; GISEL-NEXT:    v_cndmask_b32_e64 v0, v0, v7, s[4:5]
-; GISEL-NEXT:    v_add_i32_e32 v7, vcc, 1, v5
-; GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, v8, s[6:7]
-; GISEL-NEXT:    v_add_i32_e32 v8, vcc, 1, v4
-; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v3
-; GISEL-NEXT:    v_cndmask_b32_e32 v0, v5, v7, vcc
-; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v3
-; GISEL-NEXT:    v_cndmask_b32_e32 v1, v4, v8, vcc
-; GISEL-NEXT:    v_xor_b32_e32 v0, v0, v2
-; GISEL-NEXT:    v_xor_b32_e32 v1, v1, v6
-; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v2
-; GISEL-NEXT:    v_sub_i32_e32 v1, vcc, v1, v6
-; GISEL-NEXT:    s_setpc_b64 s[30:31]
-;
-; CGP-LABEL: v_sdiv_v2i32_oddk_denom:
-; CGP:       ; %bb.0:
-; CGP-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CGP-NEXT:    v_ashrrev_i32_e32 v2, 31, v0
-; CGP-NEXT:    v_rcp_iflag_f32_e32 v3, 0x4996c7d8
-; CGP-NEXT:    v_mov_b32_e32 v4, 0xffed2705
-; CGP-NEXT:    v_mov_b32_e32 v5, 0x12d8fb
-; CGP-NEXT:    v_ashrrev_i32_e32 v6, 31, v1
-; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
-; CGP-NEXT:    v_mul_f32_e32 v3, 0x4f7ffffe, v3
-; CGP-NEXT:    v_add_i32_e32 v1, vcc, v1, v6
-; CGP-NEXT:    v_xor_b32_e32 v0, v0, v2
-; CGP-NEXT:    v_cvt_u32_f32_e32 v3, v3
-; CGP-NEXT:    v_xor_b32_e32 v1, v1, v6
-; CGP-NEXT:    v_mul_lo_u32 v4, v3, v4
-; CGP-NEXT:    v_mul_hi_u32 v4, v3, v4
-; CGP-NEXT:    v_add_i32_e32 v3, vcc, v3, v4
-; CGP-NEXT:    v_mul_hi_u32 v4, v0, v3
-; CGP-NEXT:    v_mul_hi_u32 v3, v1, v3
-; CGP-NEXT:    v_mul_lo_u32 v7, v4, v5
-; CGP-NEXT:    v_add_i32_e32 v8, vcc, 1, v4
-; CGP-NEXT:    v_mul_lo_u32 v9, v3, v5
-; CGP-NEXT:    v_add_i32_e32 v10, vcc, 1, v3
-; CGP-NEXT:    v_sub_i32_e32 v0, vcc, v0, v7
-; CGP-NEXT:    v_sub_i32_e32 v1, vcc, v1, v9
-; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v0, v5
-; CGP-NEXT:    v_cndmask_b32_e64 v4, v4, v8, s[4:5]
-; CGP-NEXT:    v_sub_i32_e32 v7, vcc, v0, v5
-; CGP-NEXT:    v_cmp_ge_u32_e64 s[6:7], v1, v5
-; CGP-NEXT:    v_cndmask_b32_e64 v3, v3, v10, s[6:7]
-; CGP-NEXT:    v_subrev_i32_e32 v8, vcc, 0x12d8fb, v1
-; CGP-NEXT:    v_cndmask_b32_e64 v0, v0, v7, s[4:5]
-; CGP-NEXT:    v_add_i32_e32 v7, vcc, 1, v4
-; CGP-NEXT:    v_cndmask_b32_e64 v1, v1, v8, s[6:7]
-; CGP-NEXT:    v_add_i32_e32 v8, vcc, 1, v3
-; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v5
-; CGP-NEXT:    v_cndmask_b32_e32 v0, v4, v7, vcc
-; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v5
-; CGP-NEXT:    v_cndmask_b32_e32 v1, v3, v8, vcc
-; CGP-NEXT:    v_xor_b32_e32 v0, v0, v2
-; CGP-NEXT:    v_xor_b32_e32 v1, v1, v6
-; CGP-NEXT:    v_sub_i32_e32 v0, vcc, v0, v2
-; CGP-NEXT:    v_sub_i32_e32 v1, vcc, v1, v6
-; CGP-NEXT:    s_setpc_b64 s[30:31]
+; CHECK-LABEL: v_sdiv_v2i32_oddk_denom:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT:    v_ashrrev_i32_e32 v2, 31, v0
+; CHECK-NEXT:    v_mov_b32_e32 v3, 0x12d8fb
+; CHECK-NEXT:    v_cvt_f32_u32_e32 v4, 0x12d8fb
+; CHECK-NEXT:    v_mov_b32_e32 v5, 0xffed2705
+; CHECK-NEXT:    v_ashrrev_i32_e32 v6, 31, v1
+; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
+; CHECK-NEXT:    v_rcp_iflag_f32_e32 v4, v4
+; CHECK-NEXT:    v_add_i32_e32 v1, vcc, v1, v6
+; CHECK-NEXT:    v_xor_b32_e32 v0, v0, v2
+; CHECK-NEXT:    v_mul_f32_e32 v4, 0x4f7ffffe, v4
+; CHECK-NEXT:    v_xor_b32_e32 v1, v1, v6
+; CHECK-NEXT:    v_cvt_u32_f32_e32 v4, v4
+; CHECK-NEXT:    v_mul_lo_u32 v5, v4, v5
+; CHECK-NEXT:    v_mul_hi_u32 v5, v4, v5
+; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v4, v5
+; CHECK-NEXT:    v_mul_hi_u32 v5, v0, v4
+; CHECK-NEXT:    v_mul_hi_u32 v4, v1, v4
+; CHECK-NEXT:    v_mul_lo_u32 v7, v5, v3
+; CHECK-NEXT:    v_add_i32_e32 v8, vcc, 1, v5
+; CHECK-NEXT:    v_mul_lo_u32 v9, v4, v3
+; CHECK-NEXT:    v_add_i32_e32 v10, vcc, 1, v4
+; CHECK-NEXT:    v_sub_i32_e32 v0, vcc, v0, v7
+; CHECK-NEXT:    v_sub_i32_e32 v1, vcc, v1, v9
+; CHECK-NEXT:    v_cmp_ge_u32_e64 s[4:5], v0, v3
+; CHECK-NEXT:    v_cndmask_b32_e64 v5, v5, v8, s[4:5]
+; CHECK-NEXT:    v_sub_i32_e32 v7, vcc, v0, v3
+; CHECK-NEXT:    v_cmp_ge_u32_e64 s[6:7], v1, v3
+; CHECK-NEXT:    v_cndmask_b32_e64 v4, v4, v10, s[6:7]
+; CHECK-NEXT:    v_subrev_i32_e32 v8, vcc, 0x12d8fb, v1
+; CHECK-NEXT:    v_cndmask_b32_e64 v0, v0, v7, s[4:5]
+; CHECK-NEXT:    v_add_i32_e32 v7, vcc, 1, v5
+; CHECK-NEXT:    v_cndmask_b32_e64 v1, v1, v8, s[6:7]
+; CHECK-NEXT:    v_add_i32_e32 v8, vcc, 1, v4
+; CHECK-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v3
+; CHECK-NEXT:    v_cndmask_b32_e32 v0, v5, v7, vcc
+; CHECK-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v3
+; CHECK-NEXT:    v_cndmask_b32_e32 v1, v4, v8, vcc
+; CHECK-NEXT:    v_xor_b32_e32 v0, v0, v2
+; CHECK-NEXT:    v_xor_b32_e32 v1, v1, v6
+; CHECK-NEXT:    v_sub_i32_e32 v0, vcc, v0, v2
+; CHECK-NEXT:    v_sub_i32_e32 v1, vcc, v1, v6
+; CHECK-NEXT:    s_setpc_b64 s[30:31]
   %result = sdiv <2 x i32> %num, <i32 1235195, i32 1235195>
   ret <2 x i32> %result
 }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll
index 3eb6f1eced0957f..e204894707e7bff 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll
@@ -1125,477 +1125,245 @@ define i64 @v_sdiv_i64_pow2k_denom(i64 %num) {
 }
 
 define <2 x i64> @v_sdiv_v2i64_pow2k_denom(<2 x i64> %num) {
-; GISEL-LABEL: v_sdiv_v2i64_pow2k_denom:
-; GISEL:       ; %bb.0:
-; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GISEL-NEXT:    v_cvt_f32_u32_e32 v4, 0x1000
-; GISEL-NEXT:    v_cvt_f32_ubyte0_e32 v5, 0
-; GISEL-NEXT:    s_sub_u32 s6, 0, 0x1000
-; GISEL-NEXT:    s_subb_u32 s7, 0, 0
-; GISEL-NEXT:    v_mac_f32_e32 v4, 0x4f800000, v5
-; GISEL-NEXT:    v_rcp_iflag_f32_e32 v4, v4
-; GISEL-NEXT:    v_mul_f32_e32 v4, 0x5f7ffffc, v4
-; GISEL-NEXT:    v_mul_f32_e32 v5, 0x2f800000, v4
-; GISEL-NEXT:    v_trunc_f32_e32 v7, v5
-; GISEL-NEXT:    v_mac_f32_e32 v4, 0xcf800000, v7
-; GISEL-NEXT:    v_cvt_u32_f32_e32 v6, v4
-; GISEL-NEXT:    v_cvt_u32_f32_e32 v7, v7
-; GISEL-NEXT:    v_mad_u64_u32 v[4:5], s[4:5], s6, v6, 0
-; GISEL-NEXT:    v_mad_u64_u32 v[8:9], s[4:5], s6, v7, v[5:6]
-; GISEL-NEXT:    v_mul_lo_u32 v5, v7, v4
-; GISEL-NEXT:    v_mad_u64_u32 v[8:9], s[4:5], s7, v6, v[8:9]
-; GISEL-NEXT:    v_mul_hi_u32 v9, v6, v4
-; GISEL-NEXT:    v_mul_hi_u32 v4, v7, v4
-; GISEL-NEXT:    v_mul_lo_u32 v10, v6, v8
-; GISEL-NEXT:    v_mul_lo_u32 v11, v7, v8
-; GISEL-NEXT:    v_mul_hi_u32 v12, v6, v8
-; GISEL-NEXT:    v_mul_hi_u32 v8, v7, v8
-; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v10
-; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v9
-; GISEL-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v10, v5
-; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v11, v4
-; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v12
-; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v9, v10
-; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v5
-; GISEL-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v9, v5
-; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v8, v5
-; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v6, v4
-; GISEL-NEXT:    v_mad_u64_u32 v[8:9], s[4:5], s6, v11, 0
-; GISEL-NEXT:    v_addc_u32_e32 v5, vcc, v7, v5, vcc
-; GISEL-NEXT:    v_mov_b32_e32 v4, v9
-; GISEL-NEXT:    v_mad_u64_u32 v[9:10], s[4:5], s6, v5, v[4:5]
-; GISEL-NEXT:    v_ashrrev_i32_e32 v4, 31, v1
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v4
-; GISEL-NEXT:    v_mad_u64_u32 v[9:10], s[4:5], s7, v11, v[9:10]
-; GISEL-NEXT:    v_addc_u32_e32 v1, vcc, v1, v4, vcc
-; GISEL-NEXT:    v_xor_b32_e32 v10, v0, v4
-; GISEL-NEXT:    v_mul_lo_u32 v0, v5, v8
-; GISEL-NEXT:    v_mul_lo_u32 v12, v11, v9
-; GISEL-NEXT:    v_xor_b32_e32 v13, v1, v4
-; GISEL-NEXT:    v_mul_hi_u32 v1, v11, v8
-; GISEL-NEXT:    v_mul_hi_u32 v8, v5, v8
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v12
-; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v1
-; GISEL-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
-; GISEL-NEXT:    v_mul_lo_u32 v1, v5, v9
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v12, v0
-; GISEL-NEXT:    v_mul_hi_u32 v12, v11, v9
-; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v1, v8
-; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v1, v12
-; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v12
-; GISEL-NEXT:    v_mul_hi_u32 v9, v5, v9
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v1, v0
-; GISEL-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v8, v1
-; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v9, v1
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v11, v0
-; GISEL-NEXT:    v_addc_u32_e32 v1, vcc, v5, v1, vcc
-; GISEL-NEXT:    v_mul_lo_u32 v8, v13, v0
-; GISEL-NEXT:    v_mul_lo_u32 v9, v10, v1
-; GISEL-NEXT:    v_mul_hi_u32 v11, v10, v0
-; GISEL-NEXT:    v_mul_hi_u32 v0, v13, v0
-; GISEL-NEXT:    v_mov_b32_e32 v5, 0x1000
-; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v9
-; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v11
-; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
-; GISEL-NEXT:    v_mul_lo_u32 v11, v13, v1
-; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v9, v8
-; GISEL-NEXT:    v_mul_hi_u32 v9, v10, v1
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v11, v0
-; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v9
-; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v11, v9
-; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v0, v8
-; GISEL-NEXT:    v_mul_hi_u32 v12, v13, v1
-; GISEL-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], v5, v11, 0
-; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v9, v8
-; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v12, v8
-; GISEL-NEXT:    v_mad_u64_u32 v[8:9], s[4:5], v5, v12, v[1:2]
-; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v10, v0
-; GISEL-NEXT:    v_mad_u64_u32 v[8:9], s[4:5], 0, v11, v[8:9]
-; GISEL-NEXT:    s_sub_u32 s6, 0, 0x1000
-; GISEL-NEXT:    s_subb_u32 s7, 0, 0
-; GISEL-NEXT:    v_subb_u32_e64 v1, s[4:5], v13, v8, vcc
-; GISEL-NEXT:    v_sub_i32_e64 v8, s[4:5], v13, v8
-; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v0, v5
-; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, -1, s[4:5]
-; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v1
-; GISEL-NEXT:    v_subbrev_u32_e32 v1, vcc, 0, v8, vcc
-; GISEL-NEXT:    v_sub_i32_e32 v8, vcc, v0, v5
-; GISEL-NEXT:    v_cndmask_b32_e64 v10, -1, v9, s[4:5]
-; GISEL-NEXT:    v_subbrev_u32_e32 v9, vcc, 0, v1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v13, vcc, 1, v11
-; GISEL-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], s6, v6, 0
-; GISEL-NEXT:    v_addc_u32_e32 v14, vcc, 0, v12, vcc
-; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v8, v5
-; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, -1, vcc
-; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v9
-; GISEL-NEXT:    v_cndmask_b32_e32 v15, -1, v8, vcc
-; GISEL-NEXT:    v_mad_u64_u32 v[8:9], s[4:5], s6, v7, v[1:2]
-; GISEL-NEXT:    v_add_i32_e32 v1, vcc, 1, v13
-; GISEL-NEXT:    v_mad_u64_u32 v[8:9], s[4:5], s7, v6, v[8:9]
-; GISEL-NEXT:    v_addc_u32_e32 v16, vcc, 0, v14, vcc
-; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v15
-; GISEL-NEXT:    v_cndmask_b32_e32 v9, v13, v1, vcc
-; GISEL-NEXT:    v_mul_lo_u32 v1, v7, v0
-; GISEL-NEXT:    v_mul_lo_u32 v13, v6, v8
-; GISEL-NEXT:    v_mul_hi_u32 v15, v6, v0
-; GISEL-NEXT:    v_cndmask_b32_e32 v14, v14, v16, vcc
-; GISEL-NEXT:    v_mul_hi_u32 v0, v7, v0
-; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v1, v13
-; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v1, v15
-; GISEL-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
-; GISEL-NEXT:    v_mul_lo_u32 v15, v7, v8
-; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v13, v1
-; GISEL-NEXT:    v_mul_hi_u32 v13, v6, v8
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v15, v0
-; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v13
-; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v15, v13
-; GISEL-NEXT:    v_mul_hi_u32 v8, v7, v8
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v1
-; GISEL-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v13, v1
-; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v8, v1
-; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v6, v0
-; GISEL-NEXT:    v_addc_u32_e32 v13, vcc, v7, v1, vcc
-; GISEL-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], s6, v8, 0
-; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v10
-; GISEL-NEXT:    v_cndmask_b32_e32 v9, v11, v9, vcc
-; GISEL-NEXT:    v_mad_u64_u32 v[6:7], s[4:5], s6, v13, v[1:2]
-; GISEL-NEXT:    v_xor_b32_e32 v1, v9, v4
-; GISEL-NEXT:    v_ashrrev_i32_e32 v9, 31, v3
-; GISEL-NEXT:    v_mad_u64_u32 v[6:7], s[4:5], s7, v8, v[6:7]
-; GISEL-NEXT:    v_cndmask_b32_e32 v10, v12, v14, vcc
-; GISEL-NEXT:    v_add_i32_e32 v2, vcc, v2, v9
-; GISEL-NEXT:    v_addc_u32_e32 v3, vcc, v3, v9, vcc
-; GISEL-NEXT:    v_xor_b32_e32 v11, v2, v9
-; GISEL-NEXT:    v_mul_lo_u32 v2, v13, v0
-; GISEL-NEXT:    v_mul_lo_u32 v7, v8, v6
-; GISEL-NEXT:    v_xor_b32_e32 v12, v3, v9
-; GISEL-NEXT:    v_mul_hi_u32 v3, v8, v0
-; GISEL-NEXT:    v_mul_hi_u32 v0, v13, v0
-; GISEL-NEXT:    v_add_i32_e32 v2, vcc, v2, v7
-; GISEL-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v2, vcc, v2, v3
-; GISEL-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; GISEL-NEXT:    v_mul_lo_u32 v3, v13, v6
-; GISEL-NEXT:    v_add_i32_e32 v2, vcc, v7, v2
-; GISEL-NEXT:    v_mul_hi_u32 v7, v8, v6
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v3, v0
-; GISEL-NEXT:    v_cndmask_b32_e64 v3, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v7
-; GISEL-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v3, vcc, v3, v7
-; GISEL-NEXT:    v_mul_hi_u32 v6, v13, v6
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
-; GISEL-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v2, vcc, v3, v2
-; GISEL-NEXT:    v_add_i32_e32 v2, vcc, v6, v2
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v8, v0
-; GISEL-NEXT:    v_addc_u32_e32 v2, vcc, v13, v2, vcc
-; GISEL-NEXT:    v_mul_lo_u32 v3, v12, v0
-; GISEL-NEXT:    v_mul_lo_u32 v6, v11, v2
-; GISEL-NEXT:    v_mul_hi_u32 v7, v11, v0
-; GISEL-NEXT:    v_mul_hi_u32 v0, v12, v0
-; GISEL-NEXT:    v_xor_b32_e32 v8, v10, v4
-; GISEL-NEXT:    v_add_i32_e32 v3, vcc, v3, v6
-; GISEL-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v3, vcc, v3, v7
-; GISEL-NEXT:    v_cndmask_b32_e64 v3, 0, 1, vcc
-; GISEL-NEXT:    v_mul_lo_u32 v7, v12, v2
-; GISEL-NEXT:    v_add_i32_e32 v3, vcc, v6, v3
-; GISEL-NEXT:    v_mul_hi_u32 v6, v11, v2
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v7, v0
-; GISEL-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v6
-; GISEL-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v7, v6
-; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v0, v3
-; GISEL-NEXT:    v_mul_hi_u32 v7, v12, v2
-; GISEL-NEXT:    v_mad_u64_u32 v[2:3], s[4:5], v5, v10, 0
-; GISEL-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v6, v0
-; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v7, v0
-; GISEL-NEXT:    v_mov_b32_e32 v0, v3
-; GISEL-NEXT:    v_mad_u64_u32 v[6:7], s[4:5], v5, v13, v[0:1]
-; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v1, v4
-; GISEL-NEXT:    v_subb_u32_e32 v1, vcc, v8, v4, vcc
-; GISEL-NEXT:    v_mad_u64_u32 v[3:4], s[4:5], 0, v10, v[6:7]
-; GISEL-NEXT:    v_sub_i32_e32 v2, vcc, v11, v2
-; GISEL-NEXT:    v_subb_u32_e64 v4, s[4:5], v12, v3, vcc
-; GISEL-NEXT:    v_sub_i32_e64 v3, s[4:5], v12, v3
-; GISEL-NEXT:    v_subbrev_u32_e32 v3, vcc, 0, v3, vcc
-; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v2, v5
-; GISEL-NEXT:    v_sub_i32_e32 v2, vcc, v2, v5
-; GISEL-NEXT:    v_cndmask_b32_e64 v6, 0, -1, s[4:5]
-; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v4
-; GISEL-NEXT:    v_subbrev_u32_e32 v3, vcc, 0, v3, vcc
-; GISEL-NEXT:    v_cndmask_b32_e64 v4, -1, v6, s[4:5]
-; GISEL-NEXT:    v_add_i32_e32 v6, vcc, 1, v10
-; GISEL-NEXT:    v_addc_u32_e32 v7, vcc, 0, v13, vcc
-; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v2, v5
-; GISEL-NEXT:    v_cndmask_b32_e64 v2, 0, -1, vcc
-; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v3
-; GISEL-NEXT:    v_cndmask_b32_e32 v2, -1, v2, vcc
-; GISEL-NEXT:    v_add_i32_e32 v3, vcc, 1, v6
-; GISEL-NEXT:    v_addc_u32_e32 v5, vcc, 0, v7, vcc
-; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v2
-; GISEL-NEXT:    v_cndmask_b32_e32 v2, v6, v3, vcc
-; GISEL-NEXT:    v_cndmask_b32_e32 v3, v7, v5, vcc
-; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v4
-; GISEL-NEXT:    v_cndmask_b32_e32 v2, v10, v2, vcc
-; GISEL-NEXT:    v_cndmask_b32_e32 v3, v13, v3, vcc
-; GISEL-NEXT:    v_xor_b32_e32 v2, v2, v9
-; GISEL-NEXT:    v_xor_b32_e32 v3, v3, v9
-; GISEL-NEXT:    v_sub_i32_e32 v2, vcc, v2, v9
-; GISEL-NEXT:    v_subb_u32_e32 v3, vcc, v3, v9, vcc
-; GISEL-NEXT:    s_setpc_b64 s[30:31]
-;
-; CGP-LABEL: v_sdiv_v2i64_pow2k_denom:
-; CGP:       ; %bb.0:
-; CGP-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CGP-NEXT:    v_cvt_f32_u32_e32 v4, 0x1000
-; CGP-NEXT:    v_cvt_f32_ubyte0_e32 v5, 0
-; CGP-NEXT:    v_mov_b32_e32 v6, 0xfffff000
-; CGP-NEXT:    v_mac_f32_e32 v4, 0x4f800000, v5
-; CGP-NEXT:    v_rcp_iflag_f32_e32 v4, v4
-; CGP-NEXT:    v_mul_f32_e32 v4, 0x5f7ffffc, v4
-; CGP-NEXT:    v_mul_f32_e32 v5, 0x2f800000, v4
-; CGP-NEXT:    v_trunc_f32_e32 v7, v5
-; CGP-NEXT:    v_mac_f32_e32 v4, 0xcf800000, v7
-; CGP-NEXT:    v_cvt_u32_f32_e32 v8, v4
-; CGP-NEXT:    v_cvt_u32_f32_e32 v9, v7
-; CGP-NEXT:    v_mad_u64_u32 v[4:5], s[4:5], v6, v8, 0
-; CGP-NEXT:    v_mov_b32_e32 v7, v5
-; CGP-NEXT:    v_mad_u64_u32 v[10:11], s[4:5], v6, v9, v[7:8]
-; CGP-NEXT:    v_mul_hi_u32 v12, v9, v4
-; CGP-NEXT:    v_mad_u64_u32 v[13:14], s[4:5], -1, v8, v[10:11]
-; CGP-NEXT:    v_mul_lo_u32 v10, v9, v4
-; CGP-NEXT:    v_mul_hi_u32 v11, v8, v4
-; CGP-NEXT:    v_mul_lo_u32 v4, v8, v13
-; CGP-NEXT:    v_mul_lo_u32 v7, v9, v13
-; CGP-NEXT:    v_mul_hi_u32 v14, v8, v13
-; CGP-NEXT:    v_mul_hi_u32 v13, v9, v13
-; CGP-NEXT:    v_add_i32_e32 v4, vcc, v10, v4
-; CGP-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v11
-; CGP-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v4, vcc, v15, v4
-; CGP-NEXT:    v_add_i32_e32 v7, vcc, v7, v12
-; CGP-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v7, vcc, v7, v14
-; CGP-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v14, vcc, v15, v14
-; CGP-NEXT:    v_add_i32_e32 v4, vcc, v7, v4
-; CGP-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v7, vcc, v14, v7
-; CGP-NEXT:    v_add_i32_e32 v7, vcc, v13, v7
-; CGP-NEXT:    v_add_i32_e32 v16, vcc, v8, v4
-; CGP-NEXT:    v_mad_u64_u32 v[13:14], s[4:5], v6, v16, 0
-; CGP-NEXT:    v_addc_u32_e32 v17, vcc, v9, v7, vcc
-; CGP-NEXT:    v_mov_b32_e32 v4, v14
-; CGP-NEXT:    v_mad_u64_u32 v[14:15], s[4:5], v6, v17, v[4:5]
-; CGP-NEXT:    v_ashrrev_i32_e32 v7, 31, v1
-; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v7
-; CGP-NEXT:    v_mad_u64_u32 v[14:15], s[4:5], -1, v16, v[14:15]
-; CGP-NEXT:    v_addc_u32_e32 v1, vcc, v1, v7, vcc
-; CGP-NEXT:    v_xor_b32_e32 v15, v0, v7
-; CGP-NEXT:    v_mul_lo_u32 v0, v17, v13
-; CGP-NEXT:    v_mul_lo_u32 v4, v16, v14
-; CGP-NEXT:    v_xor_b32_e32 v18, v1, v7
-; CGP-NEXT:    v_mul_hi_u32 v1, v16, v13
-; CGP-NEXT:    v_mul_hi_u32 v13, v17, v13
-; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v4
-; CGP-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v1
-; CGP-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
-; CGP-NEXT:    v_mul_lo_u32 v1, v17, v14
-; CGP-NEXT:    v_add_i32_e32 v0, vcc, v4, v0
-; CGP-NEXT:    v_mul_hi_u32 v4, v16, v14
-; CGP-NEXT:    v_add_i32_e32 v1, vcc, v1, v13
-; CGP-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v1, vcc, v1, v4
-; CGP-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v4, vcc, v13, v4
-; CGP-NEXT:    v_mul_hi_u32 v13, v17, v14
-; CGP-NEXT:    v_add_i32_e32 v0, vcc, v1, v0
-; CGP-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v1, vcc, v4, v1
-; CGP-NEXT:    v_add_i32_e32 v1, vcc, v13, v1
-; CGP-NEXT:    v_add_i32_e32 v0, vcc, v16, v0
-; CGP-NEXT:    v_addc_u32_e32 v1, vcc, v17, v1, vcc
-; CGP-NEXT:    v_mul_lo_u32 v13, v18, v0
-; CGP-NEXT:    v_mul_lo_u32 v14, v15, v1
-; CGP-NEXT:    v_mul_hi_u32 v16, v15, v0
-; CGP-NEXT:    v_mul_hi_u32 v0, v18, v0
-; CGP-NEXT:    v_mov_b32_e32 v4, 0x1000
-; CGP-NEXT:    v_add_i32_e32 v13, vcc, v13, v14
-; CGP-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v13, vcc, v13, v16
-; CGP-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
-; CGP-NEXT:    v_mul_lo_u32 v16, v18, v1
-; CGP-NEXT:    v_add_i32_e32 v13, vcc, v14, v13
-; CGP-NEXT:    v_mul_hi_u32 v14, v15, v1
-; CGP-NEXT:    v_add_i32_e32 v0, vcc, v16, v0
-; CGP-NEXT:    v_cndmask_b32_e64 v16, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v14
-; CGP-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v14, vcc, v16, v14
-; CGP-NEXT:    v_add_i32_e32 v16, vcc, v0, v13
-; CGP-NEXT:    v_mul_hi_u32 v17, v18, v1
-; CGP-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], v4, v16, 0
-; CGP-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v13, vcc, v14, v13
-; CGP-NEXT:    v_add_i32_e32 v17, vcc, v17, v13
-; CGP-NEXT:    v_mad_u64_u32 v[13:14], s[4:5], v4, v17, v[1:2]
-; CGP-NEXT:    v_sub_i32_e32 v0, vcc, v15, v0
-; CGP-NEXT:    v_subb_u32_e64 v1, s[4:5], v18, v13, vcc
-; CGP-NEXT:    v_sub_i32_e64 v13, s[4:5], v18, v13
-; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v0, v4
-; CGP-NEXT:    v_cndmask_b32_e64 v14, 0, -1, s[4:5]
-; CGP-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v1
-; CGP-NEXT:    v_subbrev_u32_e32 v1, vcc, 0, v13, vcc
-; CGP-NEXT:    v_sub_i32_e32 v0, vcc, v0, v4
-; CGP-NEXT:    v_subbrev_u32_e32 v13, vcc, 0, v1, vcc
-; CGP-NEXT:    v_add_i32_e32 v15, vcc, 1, v16
-; CGP-NEXT:    v_addc_u32_e32 v18, vcc, 0, v17, vcc
-; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v4
-; CGP-NEXT:    v_mov_b32_e32 v0, v5
-; CGP-NEXT:    v_cndmask_b32_e64 v14, -1, v14, s[4:5]
-; CGP-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], v6, v9, v[0:1]
-; CGP-NEXT:    v_cndmask_b32_e64 v19, 0, -1, vcc
-; CGP-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v13
-; CGP-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], -1, v8, v[0:1]
-; CGP-NEXT:    v_cndmask_b32_e32 v5, -1, v19, vcc
-; CGP-NEXT:    v_add_i32_e32 v1, vcc, 1, v15
-; CGP-NEXT:    v_mul_lo_u32 v19, v8, v0
-; CGP-NEXT:    v_addc_u32_e32 v13, vcc, 0, v18, vcc
-; CGP-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v5
-; CGP-NEXT:    v_cndmask_b32_e32 v5, v15, v1, vcc
-; CGP-NEXT:    v_cndmask_b32_e32 v13, v18, v13, vcc
-; CGP-NEXT:    v_add_i32_e32 v1, vcc, v10, v19
-; CGP-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v1, vcc, v1, v11
-; CGP-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
-; CGP-NEXT:    v_mul_lo_u32 v11, v9, v0
-; CGP-NEXT:    v_add_i32_e32 v1, vcc, v10, v1
-; CGP-NEXT:    v_mul_hi_u32 v10, v8, v0
-; CGP-NEXT:    v_add_i32_e32 v11, vcc, v11, v12
-; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v10, vcc, v11, v10
-; CGP-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v11, vcc, v12, v11
-; CGP-NEXT:    v_mul_hi_u32 v0, v9, v0
-; CGP-NEXT:    v_add_i32_e32 v1, vcc, v10, v1
-; CGP-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v10, vcc, v11, v10
-; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v10
-; CGP-NEXT:    v_add_i32_e32 v8, vcc, v8, v1
-; CGP-NEXT:    v_addc_u32_e32 v9, vcc, v9, v0, vcc
-; CGP-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], v6, v8, 0
-; CGP-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v14
-; CGP-NEXT:    v_cndmask_b32_e32 v5, v16, v5, vcc
-; CGP-NEXT:    v_xor_b32_e32 v11, v5, v7
-; CGP-NEXT:    v_mad_u64_u32 v[5:6], s[4:5], v6, v9, v[1:2]
-; CGP-NEXT:    v_cndmask_b32_e32 v10, v17, v13, vcc
-; CGP-NEXT:    v_xor_b32_e32 v1, v10, v7
-; CGP-NEXT:    v_mad_u64_u32 v[5:6], s[4:5], -1, v8, v[5:6]
-; CGP-NEXT:    v_ashrrev_i32_e32 v10, 31, v3
-; CGP-NEXT:    v_add_i32_e32 v2, vcc, v2, v10
-; CGP-NEXT:    v_addc_u32_e32 v3, vcc, v3, v10, vcc
-; CGP-NEXT:    v_xor_b32_e32 v12, v2, v10
-; CGP-NEXT:    v_mul_lo_u32 v2, v9, v0
-; CGP-NEXT:    v_mul_lo_u32 v6, v8, v5
-; CGP-NEXT:    v_xor_b32_e32 v13, v3, v10
-; CGP-NEXT:    v_mul_hi_u32 v3, v8, v0
-; CGP-NEXT:    v_mul_hi_u32 v0, v9, v0
-; CGP-NEXT:    v_add_i32_e32 v2, vcc, v2, v6
-; CGP-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v2, vcc, v2, v3
-; CGP-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; CGP-NEXT:    v_mul_lo_u32 v3, v9, v5
-; CGP-NEXT:    v_add_i32_e32 v2, vcc, v6, v2
-; CGP-NEXT:    v_mul_hi_u32 v6, v8, v5
-; CGP-NEXT:    v_add_i32_e32 v0, vcc, v3, v0
-; CGP-NEXT:    v_cndmask_b32_e64 v3, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v6
-; CGP-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v3, vcc, v3, v6
-; CGP-NEXT:    v_mul_hi_u32 v5, v9, v5
-; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
-; CGP-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v2, vcc, v3, v2
-; CGP-NEXT:    v_add_i32_e32 v2, vcc, v5, v2
-; CGP-NEXT:    v_add_i32_e32 v3, vcc, v8, v0
-; CGP-NEXT:    v_addc_u32_e32 v2, vcc, v9, v2, vcc
-; CGP-NEXT:    v_mul_lo_u32 v5, v13, v3
-; CGP-NEXT:    v_mul_lo_u32 v6, v12, v2
-; CGP-NEXT:    v_sub_i32_e32 v0, vcc, v11, v7
-; CGP-NEXT:    v_subb_u32_e32 v1, vcc, v1, v7, vcc
-; CGP-NEXT:    v_mul_hi_u32 v7, v12, v3
-; CGP-NEXT:    v_add_i32_e32 v5, vcc, v5, v6
-; CGP-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v5, vcc, v5, v7
-; CGP-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
-; CGP-NEXT:    v_mul_lo_u32 v7, v13, v2
-; CGP-NEXT:    v_mul_hi_u32 v3, v13, v3
-; CGP-NEXT:    v_add_i32_e32 v5, vcc, v6, v5
-; CGP-NEXT:    v_mul_hi_u32 v6, v12, v2
-; CGP-NEXT:    v_add_i32_e32 v3, vcc, v7, v3
-; CGP-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v3, vcc, v3, v6
-; CGP-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v6, vcc, v7, v6
-; CGP-NEXT:    v_add_i32_e32 v7, vcc, v3, v5
-; CGP-NEXT:    v_mul_hi_u32 v8, v13, v2
-; CGP-NEXT:    v_mad_u64_u32 v[2:3], s[4:5], v4, v7, 0
-; CGP-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v5, vcc, v6, v5
-; CGP-NEXT:    v_add_i32_e32 v8, vcc, v8, v5
-; CGP-NEXT:    v_mad_u64_u32 v[5:6], s[4:5], v4, v8, v[3:4]
-; CGP-NEXT:    v_sub_i32_e32 v2, vcc, v12, v2
-; CGP-NEXT:    v_subb_u32_e64 v3, s[4:5], v13, v5, vcc
-; CGP-NEXT:    v_sub_i32_e64 v5, s[4:5], v13, v5
-; CGP-NEXT:    v_subbrev_u32_e32 v5, vcc, 0, v5, vcc
-; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v2, v4
-; CGP-NEXT:    v_sub_i32_e32 v2, vcc, v2, v4
-; CGP-NEXT:    v_cndmask_b32_e64 v6, 0, -1, s[4:5]
-; CGP-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v3
-; CGP-NEXT:    v_subbrev_u32_e32 v5, vcc, 0, v5, vcc
-; CGP-NEXT:    v_cndmask_b32_e64 v3, -1, v6, s[4:5]
-; CGP-NEXT:    v_add_i32_e32 v6, vcc, 1, v7
-; CGP-NEXT:    v_addc_u32_e32 v9, vcc, 0, v8, vcc
-; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v2, v4
-; CGP-NEXT:    v_cndmask_b32_e64 v2, 0, -1, vcc
-; CGP-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v5
-; CGP-NEXT:    v_cndmask_b32_e32 v2, -1, v2, vcc
-; CGP-NEXT:    v_add_i32_e32 v4, vcc, 1, v6
-; CGP-NEXT:    v_addc_u32_e32 v5, vcc, 0, v9, vcc
-; CGP-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v2
-; CGP-NEXT:    v_cndmask_b32_e32 v2, v6, v4, vcc
-; CGP-NEXT:    v_cndmask_b32_e32 v4, v9, v5, vcc
-; CGP-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v3
-; CGP-NEXT:    v_cndmask_b32_e32 v2, v7, v2, vcc
-; CGP-NEXT:    v_cndmask_b32_e32 v3, v8, v4, vcc
-; CGP-NEXT:    v_xor_b32_e32 v2, v2, v10
-; CGP-NEXT:    v_xor_b32_e32 v3, v3, v10
-; CGP-NEXT:    v_sub_i32_e32 v2, vcc, v2, v10
-; CGP-NEXT:    v_subb_u32_e32 v3, vcc, v3, v10, vcc
-; CGP-NEXT:    s_setpc_b64 s[30:31]
+; CHECK-LABEL: v_sdiv_v2i64_pow2k_denom:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT:    v_cvt_f32_u32_e32 v4, 0x1000
+; CHECK-NEXT:    v_cvt_f32_ubyte0_e32 v5, 0
+; CHECK-NEXT:    s_sub_u32 s6, 0, 0x1000
+; CHECK-NEXT:    s_subb_u32 s7, 0, 0
+; CHECK-NEXT:    v_mac_f32_e32 v4, 0x4f800000, v5
+; CHECK-NEXT:    v_rcp_iflag_f32_e32 v4, v4
+; CHECK-NEXT:    v_mul_f32_e32 v4, 0x5f7ffffc, v4
+; CHECK-NEXT:    v_mul_f32_e32 v5, 0x2f800000, v4
+; CHECK-NEXT:    v_trunc_f32_e32 v7, v5
+; CHECK-NEXT:    v_mac_f32_e32 v4, 0xcf800000, v7
+; CHECK-NEXT:    v_cvt_u32_f32_e32 v6, v4
+; CHECK-NEXT:    v_cvt_u32_f32_e32 v7, v7
+; CHECK-NEXT:    v_mad_u64_u32 v[4:5], s[4:5], s6, v6, 0
+; CHECK-NEXT:    v_mad_u64_u32 v[8:9], s[4:5], s6, v7, v[5:6]
+; CHECK-NEXT:    v_mul_lo_u32 v5, v7, v4
+; CHECK-NEXT:    v_mad_u64_u32 v[8:9], s[4:5], s7, v6, v[8:9]
+; CHECK-NEXT:    v_mul_hi_u32 v9, v6, v4
+; CHECK-NEXT:    v_mul_hi_u32 v4, v7, v4
+; CHECK-NEXT:    v_mul_lo_u32 v10, v6, v8
+; CHECK-NEXT:    v_mul_lo_u32 v11, v7, v8
+; CHECK-NEXT:    v_mul_hi_u32 v12, v6, v8
+; CHECK-NEXT:    v_mul_hi_u32 v8, v7, v8
+; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v5, v10
+; CHECK-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v5, v9
+; CHECK-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v10, v5
+; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v11, v4
+; CHECK-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v4, v12
+; CHECK-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v9, vcc, v9, v10
+; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v4, v5
+; CHECK-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v9, v5
+; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v8, v5
+; CHECK-NEXT:    v_add_i32_e32 v11, vcc, v6, v4
+; CHECK-NEXT:    v_mad_u64_u32 v[8:9], s[4:5], s6, v11, 0
+; CHECK-NEXT:    v_addc_u32_e32 v5, vcc, v7, v5, vcc
+; CHECK-NEXT:    v_mov_b32_e32 v4, v9
+; CHECK-NEXT:    v_mad_u64_u32 v[9:10], s[4:5], s6, v5, v[4:5]
+; CHECK-NEXT:    v_ashrrev_i32_e32 v4, 31, v1
+; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v4
+; CHECK-NEXT:    v_mad_u64_u32 v[9:10], s[4:5], s7, v11, v[9:10]
+; CHECK-NEXT:    v_addc_u32_e32 v1, vcc, v1, v4, vcc
+; CHECK-NEXT:    v_xor_b32_e32 v10, v0, v4
+; CHECK-NEXT:    v_mul_lo_u32 v0, v5, v8
+; CHECK-NEXT:    v_mul_lo_u32 v12, v11, v9
+; CHECK-NEXT:    v_xor_b32_e32 v13, v1, v4
+; CHECK-NEXT:    v_mul_hi_u32 v1, v11, v8
+; CHECK-NEXT:    v_mul_hi_u32 v8, v5, v8
+; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v12
+; CHECK-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v1
+; CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
+; CHECK-NEXT:    v_mul_lo_u32 v1, v5, v9
+; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v12, v0
+; CHECK-NEXT:    v_mul_hi_u32 v12, v11, v9
+; CHECK-NEXT:    v_add_i32_e32 v1, vcc, v1, v8
+; CHECK-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v1, vcc, v1, v12
+; CHECK-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v8, vcc, v8, v12
+; CHECK-NEXT:    v_mul_hi_u32 v9, v5, v9
+; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v1, v0
+; CHECK-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v1, vcc, v8, v1
+; CHECK-NEXT:    v_add_i32_e32 v1, vcc, v9, v1
+; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v11, v0
+; CHECK-NEXT:    v_addc_u32_e32 v1, vcc, v5, v1, vcc
+; CHECK-NEXT:    v_mul_lo_u32 v8, v13, v0
+; CHECK-NEXT:    v_mul_lo_u32 v9, v10, v1
+; CHECK-NEXT:    v_mul_hi_u32 v11, v10, v0
+; CHECK-NEXT:    v_mul_hi_u32 v0, v13, v0
+; CHECK-NEXT:    v_mov_b32_e32 v5, 0x1000
+; CHECK-NEXT:    v_add_i32_e32 v8, vcc, v8, v9
+; CHECK-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v8, vcc, v8, v11
+; CHECK-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
+; CHECK-NEXT:    v_mul_lo_u32 v11, v13, v1
+; CHECK-NEXT:    v_add_i32_e32 v8, vcc, v9, v8
+; CHECK-NEXT:    v_mul_hi_u32 v9, v10, v1
+; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v11, v0
+; CHECK-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v9
+; CHECK-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v9, vcc, v11, v9
+; CHECK-NEXT:    v_add_i32_e32 v11, vcc, v0, v8
+; CHECK-NEXT:    v_mul_hi_u32 v12, v13, v1
+; CHECK-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], v5, v11, 0
+; CHECK-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v8, vcc, v9, v8
+; CHECK-NEXT:    v_add_i32_e32 v12, vcc, v12, v8
+; CHECK-NEXT:    v_mad_u64_u32 v[8:9], s[4:5], v5, v12, v[1:2]
+; CHECK-NEXT:    v_sub_i32_e32 v0, vcc, v10, v0
+; CHECK-NEXT:    v_mad_u64_u32 v[8:9], s[4:5], 0, v11, v[8:9]
+; CHECK-NEXT:    s_sub_u32 s6, 0, 0x1000
+; CHECK-NEXT:    s_subb_u32 s7, 0, 0
+; CHECK-NEXT:    v_subb_u32_e64 v1, s[4:5], v13, v8, vcc
+; CHECK-NEXT:    v_sub_i32_e64 v8, s[4:5], v13, v8
+; CHECK-NEXT:    v_cmp_ge_u32_e64 s[4:5], v0, v5
+; CHECK-NEXT:    v_cndmask_b32_e64 v9, 0, -1, s[4:5]
+; CHECK-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v1
+; CHECK-NEXT:    v_subbrev_u32_e32 v1, vcc, 0, v8, vcc
+; CHECK-NEXT:    v_sub_i32_e32 v8, vcc, v0, v5
+; CHECK-NEXT:    v_cndmask_b32_e64 v10, -1, v9, s[4:5]
+; CHECK-NEXT:    v_subbrev_u32_e32 v9, vcc, 0, v1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v13, vcc, 1, v11
+; CHECK-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], s6, v6, 0
+; CHECK-NEXT:    v_addc_u32_e32 v14, vcc, 0, v12, vcc
+; CHECK-NEXT:    v_cmp_ge_u32_e32 vcc, v8, v5
+; CHECK-NEXT:    v_cndmask_b32_e64 v8, 0, -1, vcc
+; CHECK-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v9
+; CHECK-NEXT:    v_cndmask_b32_e32 v15, -1, v8, vcc
+; CHECK-NEXT:    v_mad_u64_u32 v[8:9], s[4:5], s6, v7, v[1:2]
+; CHECK-NEXT:    v_add_i32_e32 v1, vcc, 1, v13
+; CHECK-NEXT:    v_mad_u64_u32 v[8:9], s[4:5], s7, v6, v[8:9]
+; CHECK-NEXT:    v_addc_u32_e32 v16, vcc, 0, v14, vcc
+; CHECK-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v15
+; CHECK-NEXT:    v_cndmask_b32_e32 v9, v13, v1, vcc
+; CHECK-NEXT:    v_mul_lo_u32 v1, v7, v0
+; CHECK-NEXT:    v_mul_lo_u32 v13, v6, v8
+; CHECK-NEXT:    v_mul_hi_u32 v15, v6, v0
+; CHECK-NEXT:    v_cndmask_b32_e32 v14, v14, v16, vcc
+; CHECK-NEXT:    v_mul_hi_u32 v0, v7, v0
+; CHECK-NEXT:    v_add_i32_e32 v1, vcc, v1, v13
+; CHECK-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v1, vcc, v1, v15
+; CHECK-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
+; CHECK-NEXT:    v_mul_lo_u32 v15, v7, v8
+; CHECK-NEXT:    v_add_i32_e32 v1, vcc, v13, v1
+; CHECK-NEXT:    v_mul_hi_u32 v13, v6, v8
+; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v15, v0
+; CHECK-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v13
+; CHECK-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v13, vcc, v15, v13
+; CHECK-NEXT:    v_mul_hi_u32 v8, v7, v8
+; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v1
+; CHECK-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v1, vcc, v13, v1
+; CHECK-NEXT:    v_add_i32_e32 v1, vcc, v8, v1
+; CHECK-NEXT:    v_add_i32_e32 v8, vcc, v6, v0
+; CHECK-NEXT:    v_addc_u32_e32 v13, vcc, v7, v1, vcc
+; CHECK-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], s6, v8, 0
+; CHECK-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v10
+; CHECK-NEXT:    v_cndmask_b32_e32 v9, v11, v9, vcc
+; CHECK-NEXT:    v_mad_u64_u32 v[6:7], s[4:5], s6, v13, v[1:2]
+; CHECK-NEXT:    v_xor_b32_e32 v1, v9, v4
+; CHECK-NEXT:    v_ashrrev_i32_e32 v9, 31, v3
+; CHECK-NEXT:    v_mad_u64_u32 v[6:7], s[4:5], s7, v8, v[6:7]
+; CHECK-NEXT:    v_cndmask_b32_e32 v10, v12, v14, vcc
+; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v9
+; CHECK-NEXT:    v_addc_u32_e32 v3, vcc, v3, v9, vcc
+; CHECK-NEXT:    v_xor_b32_e32 v11, v2, v9
+; CHECK-NEXT:    v_mul_lo_u32 v2, v13, v0
+; CHECK-NEXT:    v_mul_lo_u32 v7, v8, v6
+; CHECK-NEXT:    v_xor_b32_e32 v12, v3, v9
+; CHECK-NEXT:    v_mul_hi_u32 v3, v8, v0
+; CHECK-NEXT:    v_mul_hi_u32 v0, v13, v0
+; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v7
+; CHECK-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v3
+; CHECK-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
+; CHECK-NEXT:    v_mul_lo_u32 v3, v13, v6
+; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v7, v2
+; CHECK-NEXT:    v_mul_hi_u32 v7, v8, v6
+; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v3, v0
+; CHECK-NEXT:    v_cndmask_b32_e64 v3, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v7
+; CHECK-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v3, vcc, v3, v7
+; CHECK-NEXT:    v_mul_hi_u32 v6, v13, v6
+; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
+; CHECK-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v3, v2
+; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v6, v2
+; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v8, v0
+; CHECK-NEXT:    v_addc_u32_e32 v2, vcc, v13, v2, vcc
+; CHECK-NEXT:    v_mul_lo_u32 v3, v12, v0
+; CHECK-NEXT:    v_mul_lo_u32 v6, v11, v2
+; CHECK-NEXT:    v_mul_hi_u32 v7, v11, v0
+; CHECK-NEXT:    v_mul_hi_u32 v0, v12, v0
+; CHECK-NEXT:    v_xor_b32_e32 v8, v10, v4
+; CHECK-NEXT:    v_add_i32_e32 v3, vcc, v3, v6
+; CHECK-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v3, vcc, v3, v7
+; CHECK-NEXT:    v_cndmask_b32_e64 v3, 0, 1, vcc
+; CHECK-NEXT:    v_mul_lo_u32 v7, v12, v2
+; CHECK-NEXT:    v_add_i32_e32 v3, vcc, v6, v3
+; CHECK-NEXT:    v_mul_hi_u32 v6, v11, v2
+; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v7, v0
+; CHECK-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v6
+; CHECK-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v7, v6
+; CHECK-NEXT:    v_add_i32_e32 v10, vcc, v0, v3
+; CHECK-NEXT:    v_mul_hi_u32 v7, v12, v2
+; CHECK-NEXT:    v_mad_u64_u32 v[2:3], s[4:5], v5, v10, 0
+; CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v6, v0
+; CHECK-NEXT:    v_add_i32_e32 v13, vcc, v7, v0
+; CHECK-NEXT:    v_mov_b32_e32 v0, v3
+; CHECK-NEXT:    v_mad_u64_u32 v[6:7], s[4:5], v5, v13, v[0:1]
+; CHECK-NEXT:    v_sub_i32_e32 v0, vcc, v1, v4
+; CHECK-NEXT:    v_subb_u32_e32 v1, vcc, v8, v4, vcc
+; CHECK-NEXT:    v_mad_u64_u32 v[3:4], s[4:5], 0, v10, v[6:7]
+; CHECK-NEXT:    v_sub_i32_e32 v2, vcc, v11, v2
+; CHECK-NEXT:    v_subb_u32_e64 v4, s[4:5], v12, v3, vcc
+; CHECK-NEXT:    v_sub_i32_e64 v3, s[4:5], v12, v3
+; CHECK-NEXT:    v_subbrev_u32_e32 v3, vcc, 0, v3, vcc
+; CHECK-NEXT:    v_cmp_ge_u32_e64 s[4:5], v2, v5
+; CHECK-NEXT:    v_sub_i32_e32 v2, vcc, v2, v5
+; CHECK-NEXT:    v_cndmask_b32_e64 v6, 0, -1, s[4:5]
+; CHECK-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v4
+; CHECK-NEXT:    v_subbrev_u32_e32 v3, vcc, 0, v3, vcc
+; CHECK-NEXT:    v_cndmask_b32_e64 v4, -1, v6, s[4:5]
+; CHECK-NEXT:    v_add_i32_e32 v6, vcc, 1, v10
+; CHECK-NEXT:    v_addc_u32_e32 v7, vcc, 0, v13, vcc
+; CHECK-NEXT:    v_cmp_ge_u32_e32 vcc, v2, v5
+; CHECK-NEXT:    v_cndmask_b32_e64 v2, 0, -1, vcc
+; CHECK-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v3
+; CHECK-NEXT:    v_cndmask_b32_e32 v2, -1, v2, vcc
+; CHECK-NEXT:    v_add_i32_e32 v3, vcc, 1, v6
+; CHECK-NEXT:    v_addc_u32_e32 v5, vcc, 0, v7, vcc
+; CHECK-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v2
+; CHECK-NEXT:    v_cndmask_b32_e32 v2, v6, v3, vcc
+; CHECK-NEXT:    v_cndmask_b32_e32 v3, v7, v5, vcc
+; CHECK-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v4
+; CHECK-NEXT:    v_cndmask_b32_e32 v2, v10, v2, vcc
+; CHECK-NEXT:    v_cndmask_b32_e32 v3, v13, v3, vcc
+; CHECK-NEXT:    v_xor_b32_e32 v2, v2, v9
+; CHECK-NEXT:    v_xor_b32_e32 v3, v3, v9
+; CHECK-NEXT:    v_sub_i32_e32 v2, vcc, v2, v9
+; CHECK-NEXT:    v_subb_u32_e32 v3, vcc, v3, v9, vcc
+; CHECK-NEXT:    s_setpc_b64 s[30:31]
   %result = sdiv <2 x i64> %num, <i64 4096, i64 4096>
   ret <2 x i64> %result
 }
@@ -1730,477 +1498,245 @@ define i64 @v_sdiv_i64_oddk_denom(i64 %num) {
 }
 
 define <2 x i64> @v_sdiv_v2i64_oddk_denom(<2 x i64> %num) {
-; GISEL-LABEL: v_sdiv_v2i64_oddk_denom:
-; GISEL:       ; %bb.0:
-; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GISEL-NEXT:    v_cvt_f32_u32_e32 v4, 0x12d8fb
-; GISEL-NEXT:    v_cvt_f32_ubyte0_e32 v5, 0
-; GISEL-NEXT:    s_sub_u32 s6, 0, 0x12d8fb
-; GISEL-NEXT:    s_subb_u32 s7, 0, 0
-; GISEL-NEXT:    v_mac_f32_e32 v4, 0x4f800000, v5
-; GISEL-NEXT:    v_rcp_iflag_f32_e32 v4, v4
-; GISEL-NEXT:    v_mul_f32_e32 v4, 0x5f7ffffc, v4
-; GISEL-NEXT:    v_mul_f32_e32 v5, 0x2f800000, v4
-; GISEL-NEXT:    v_trunc_f32_e32 v7, v5
-; GISEL-NEXT:    v_mac_f32_e32 v4, 0xcf800000, v7
-; GISEL-NEXT:    v_cvt_u32_f32_e32 v6, v4
-; GISEL-NEXT:    v_cvt_u32_f32_e32 v7, v7
-; GISEL-NEXT:    v_mad_u64_u32 v[4:5], s[4:5], s6, v6, 0
-; GISEL-NEXT:    v_mad_u64_u32 v[8:9], s[4:5], s6, v7, v[5:6]
-; GISEL-NEXT:    v_mul_lo_u32 v5, v7, v4
-; GISEL-NEXT:    v_mad_u64_u32 v[8:9], s[4:5], s7, v6, v[8:9]
-; GISEL-NEXT:    v_mul_hi_u32 v9, v6, v4
-; GISEL-NEXT:    v_mul_hi_u32 v4, v7, v4
-; GISEL-NEXT:    v_mul_lo_u32 v10, v6, v8
-; GISEL-NEXT:    v_mul_lo_u32 v11, v7, v8
-; GISEL-NEXT:    v_mul_hi_u32 v12, v6, v8
-; GISEL-NEXT:    v_mul_hi_u32 v8, v7, v8
-; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v10
-; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v9
-; GISEL-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v10, v5
-; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v11, v4
-; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v12
-; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v9, v10
-; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v5
-; GISEL-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v9, v5
-; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v8, v5
-; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v6, v4
-; GISEL-NEXT:    v_mad_u64_u32 v[8:9], s[4:5], s6, v11, 0
-; GISEL-NEXT:    v_addc_u32_e32 v5, vcc, v7, v5, vcc
-; GISEL-NEXT:    v_mov_b32_e32 v4, v9
-; GISEL-NEXT:    v_mad_u64_u32 v[9:10], s[4:5], s6, v5, v[4:5]
-; GISEL-NEXT:    v_ashrrev_i32_e32 v4, 31, v1
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v4
-; GISEL-NEXT:    v_mad_u64_u32 v[9:10], s[4:5], s7, v11, v[9:10]
-; GISEL-NEXT:    v_addc_u32_e32 v1, vcc, v1, v4, vcc
-; GISEL-NEXT:    v_xor_b32_e32 v10, v0, v4
-; GISEL-NEXT:    v_mul_lo_u32 v0, v5, v8
-; GISEL-NEXT:    v_mul_lo_u32 v12, v11, v9
-; GISEL-NEXT:    v_xor_b32_e32 v13, v1, v4
-; GISEL-NEXT:    v_mul_hi_u32 v1, v11, v8
-; GISEL-NEXT:    v_mul_hi_u32 v8, v5, v8
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v12
-; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v1
-; GISEL-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
-; GISEL-NEXT:    v_mul_lo_u32 v1, v5, v9
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v12, v0
-; GISEL-NEXT:    v_mul_hi_u32 v12, v11, v9
-; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v1, v8
-; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v1, v12
-; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v12
-; GISEL-NEXT:    v_mul_hi_u32 v9, v5, v9
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v1, v0
-; GISEL-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v8, v1
-; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v9, v1
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v11, v0
-; GISEL-NEXT:    v_addc_u32_e32 v1, vcc, v5, v1, vcc
-; GISEL-NEXT:    v_mul_lo_u32 v8, v13, v0
-; GISEL-NEXT:    v_mul_lo_u32 v9, v10, v1
-; GISEL-NEXT:    v_mul_hi_u32 v11, v10, v0
-; GISEL-NEXT:    v_mul_hi_u32 v0, v13, v0
-; GISEL-NEXT:    v_mov_b32_e32 v5, 0x12d8fb
-; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v9
-; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v11
-; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
-; GISEL-NEXT:    v_mul_lo_u32 v11, v13, v1
-; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v9, v8
-; GISEL-NEXT:    v_mul_hi_u32 v9, v10, v1
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v11, v0
-; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v9
-; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v11, v9
-; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v0, v8
-; GISEL-NEXT:    v_mul_hi_u32 v12, v13, v1
-; GISEL-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], v5, v11, 0
-; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v9, v8
-; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v12, v8
-; GISEL-NEXT:    v_mad_u64_u32 v[8:9], s[4:5], v5, v12, v[1:2]
-; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v10, v0
-; GISEL-NEXT:    v_mad_u64_u32 v[8:9], s[4:5], 0, v11, v[8:9]
-; GISEL-NEXT:    s_sub_u32 s6, 0, 0x12d8fb
-; GISEL-NEXT:    s_subb_u32 s7, 0, 0
-; GISEL-NEXT:    v_subb_u32_e64 v1, s[4:5], v13, v8, vcc
-; GISEL-NEXT:    v_sub_i32_e64 v8, s[4:5], v13, v8
-; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v0, v5
-; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, -1, s[4:5]
-; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v1
-; GISEL-NEXT:    v_subbrev_u32_e32 v1, vcc, 0, v8, vcc
-; GISEL-NEXT:    v_sub_i32_e32 v8, vcc, v0, v5
-; GISEL-NEXT:    v_cndmask_b32_e64 v10, -1, v9, s[4:5]
-; GISEL-NEXT:    v_subbrev_u32_e32 v9, vcc, 0, v1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v13, vcc, 1, v11
-; GISEL-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], s6, v6, 0
-; GISEL-NEXT:    v_addc_u32_e32 v14, vcc, 0, v12, vcc
-; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v8, v5
-; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, -1, vcc
-; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v9
-; GISEL-NEXT:    v_cndmask_b32_e32 v15, -1, v8, vcc
-; GISEL-NEXT:    v_mad_u64_u32 v[8:9], s[4:5], s6, v7, v[1:2]
-; GISEL-NEXT:    v_add_i32_e32 v1, vcc, 1, v13
-; GISEL-NEXT:    v_mad_u64_u32 v[8:9], s[4:5], s7, v6, v[8:9]
-; GISEL-NEXT:    v_addc_u32_e32 v16, vcc, 0, v14, vcc
-; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v15
-; GISEL-NEXT:    v_cndmask_b32_e32 v9, v13, v1, vcc
-; GISEL-NEXT:    v_mul_lo_u32 v1, v7, v0
-; GISEL-NEXT:    v_mul_lo_u32 v13, v6, v8
-; GISEL-NEXT:    v_mul_hi_u32 v15, v6, v0
-; GISEL-NEXT:    v_cndmask_b32_e32 v14, v14, v16, vcc
-; GISEL-NEXT:    v_mul_hi_u32 v0, v7, v0
-; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v1, v13
-; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v1, v15
-; GISEL-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
-; GISEL-NEXT:    v_mul_lo_u32 v15, v7, v8
-; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v13, v1
-; GISEL-NEXT:    v_mul_hi_u32 v13, v6, v8
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v15, v0
-; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v13
-; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v15, v13
-; GISEL-NEXT:    v_mul_hi_u32 v8, v7, v8
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v1
-; GISEL-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v13, v1
-; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v8, v1
-; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v6, v0
-; GISEL-NEXT:    v_addc_u32_e32 v13, vcc, v7, v1, vcc
-; GISEL-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], s6, v8, 0
-; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v10
-; GISEL-NEXT:    v_cndmask_b32_e32 v9, v11, v9, vcc
-; GISEL-NEXT:    v_mad_u64_u32 v[6:7], s[4:5], s6, v13, v[1:2]
-; GISEL-NEXT:    v_xor_b32_e32 v1, v9, v4
-; GISEL-NEXT:    v_ashrrev_i32_e32 v9, 31, v3
-; GISEL-NEXT:    v_mad_u64_u32 v[6:7], s[4:5], s7, v8, v[6:7]
-; GISEL-NEXT:    v_cndmask_b32_e32 v10, v12, v14, vcc
-; GISEL-NEXT:    v_add_i32_e32 v2, vcc, v2, v9
-; GISEL-NEXT:    v_addc_u32_e32 v3, vcc, v3, v9, vcc
-; GISEL-NEXT:    v_xor_b32_e32 v11, v2, v9
-; GISEL-NEXT:    v_mul_lo_u32 v2, v13, v0
-; GISEL-NEXT:    v_mul_lo_u32 v7, v8, v6
-; GISEL-NEXT:    v_xor_b32_e32 v12, v3, v9
-; GISEL-NEXT:    v_mul_hi_u32 v3, v8, v0
-; GISEL-NEXT:    v_mul_hi_u32 v0, v13, v0
-; GISEL-NEXT:    v_add_i32_e32 v2, vcc, v2, v7
-; GISEL-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v2, vcc, v2, v3
-; GISEL-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; GISEL-NEXT:    v_mul_lo_u32 v3, v13, v6
-; GISEL-NEXT:    v_add_i32_e32 v2, vcc, v7, v2
-; GISEL-NEXT:    v_mul_hi_u32 v7, v8, v6
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v3, v0
-; GISEL-NEXT:    v_cndmask_b32_e64 v3, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v7
-; GISEL-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v3, vcc, v3, v7
-; GISEL-NEXT:    v_mul_hi_u32 v6, v13, v6
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
-; GISEL-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v2, vcc, v3, v2
-; GISEL-NEXT:    v_add_i32_e32 v2, vcc, v6, v2
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v8, v0
-; GISEL-NEXT:    v_addc_u32_e32 v2, vcc, v13, v2, vcc
-; GISEL-NEXT:    v_mul_lo_u32 v3, v12, v0
-; GISEL-NEXT:    v_mul_lo_u32 v6, v11, v2
-; GISEL-NEXT:    v_mul_hi_u32 v7, v11, v0
-; GISEL-NEXT:    v_mul_hi_u32 v0, v12, v0
-; GISEL-NEXT:    v_xor_b32_e32 v8, v10, v4
-; GISEL-NEXT:    v_add_i32_e32 v3, vcc, v3, v6
-; GISEL-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v3, vcc, v3, v7
-; GISEL-NEXT:    v_cndmask_b32_e64 v3, 0, 1, vcc
-; GISEL-NEXT:    v_mul_lo_u32 v7, v12, v2
-; GISEL-NEXT:    v_add_i32_e32 v3, vcc, v6, v3
-; GISEL-NEXT:    v_mul_hi_u32 v6, v11, v2
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v7, v0
-; GISEL-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v6
-; GISEL-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v7, v6
-; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v0, v3
-; GISEL-NEXT:    v_mul_hi_u32 v7, v12, v2
-; GISEL-NEXT:    v_mad_u64_u32 v[2:3], s[4:5], v5, v10, 0
-; GISEL-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v6, v0
-; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v7, v0
-; GISEL-NEXT:    v_mov_b32_e32 v0, v3
-; GISEL-NEXT:    v_mad_u64_u32 v[6:7], s[4:5], v5, v13, v[0:1]
-; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v1, v4
-; GISEL-NEXT:    v_subb_u32_e32 v1, vcc, v8, v4, vcc
-; GISEL-NEXT:    v_mad_u64_u32 v[3:4], s[4:5], 0, v10, v[6:7]
-; GISEL-NEXT:    v_sub_i32_e32 v2, vcc, v11, v2
-; GISEL-NEXT:    v_subb_u32_e64 v4, s[4:5], v12, v3, vcc
-; GISEL-NEXT:    v_sub_i32_e64 v3, s[4:5], v12, v3
-; GISEL-NEXT:    v_subbrev_u32_e32 v3, vcc, 0, v3, vcc
-; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v2, v5
-; GISEL-NEXT:    v_sub_i32_e32 v2, vcc, v2, v5
-; GISEL-NEXT:    v_cndmask_b32_e64 v6, 0, -1, s[4:5]
-; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v4
-; GISEL-NEXT:    v_subbrev_u32_e32 v3, vcc, 0, v3, vcc
-; GISEL-NEXT:    v_cndmask_b32_e64 v4, -1, v6, s[4:5]
-; GISEL-NEXT:    v_add_i32_e32 v6, vcc, 1, v10
-; GISEL-NEXT:    v_addc_u32_e32 v7, vcc, 0, v13, vcc
-; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v2, v5
-; GISEL-NEXT:    v_cndmask_b32_e64 v2, 0, -1, vcc
-; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v3
-; GISEL-NEXT:    v_cndmask_b32_e32 v2, -1, v2, vcc
-; GISEL-NEXT:    v_add_i32_e32 v3, vcc, 1, v6
-; GISEL-NEXT:    v_addc_u32_e32 v5, vcc, 0, v7, vcc
-; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v2
-; GISEL-NEXT:    v_cndmask_b32_e32 v2, v6, v3, vcc
-; GISEL-NEXT:    v_cndmask_b32_e32 v3, v7, v5, vcc
-; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v4
-; GISEL-NEXT:    v_cndmask_b32_e32 v2, v10, v2, vcc
-; GISEL-NEXT:    v_cndmask_b32_e32 v3, v13, v3, vcc
-; GISEL-NEXT:    v_xor_b32_e32 v2, v2, v9
-; GISEL-NEXT:    v_xor_b32_e32 v3, v3, v9
-; GISEL-NEXT:    v_sub_i32_e32 v2, vcc, v2, v9
-; GISEL-NEXT:    v_subb_u32_e32 v3, vcc, v3, v9, vcc
-; GISEL-NEXT:    s_setpc_b64 s[30:31]
-;
-; CGP-LABEL: v_sdiv_v2i64_oddk_denom:
-; CGP:       ; %bb.0:
-; CGP-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CGP-NEXT:    v_cvt_f32_u32_e32 v4, 0x12d8fb
-; CGP-NEXT:    v_cvt_f32_ubyte0_e32 v5, 0
-; CGP-NEXT:    v_mov_b32_e32 v6, 0xffed2705
-; CGP-NEXT:    v_mac_f32_e32 v4, 0x4f800000, v5
-; CGP-NEXT:    v_rcp_iflag_f32_e32 v4, v4
-; CGP-NEXT:    v_mul_f32_e32 v4, 0x5f7ffffc, v4
-; CGP-NEXT:    v_mul_f32_e32 v5, 0x2f800000, v4
-; CGP-NEXT:    v_trunc_f32_e32 v7, v5
-; CGP-NEXT:    v_mac_f32_e32 v4, 0xcf800000, v7
-; CGP-NEXT:    v_cvt_u32_f32_e32 v8, v4
-; CGP-NEXT:    v_cvt_u32_f32_e32 v9, v7
-; CGP-NEXT:    v_mad_u64_u32 v[4:5], s[4:5], v6, v8, 0
-; CGP-NEXT:    v_mov_b32_e32 v7, v5
-; CGP-NEXT:    v_mad_u64_u32 v[10:11], s[4:5], v6, v9, v[7:8]
-; CGP-NEXT:    v_mul_hi_u32 v12, v9, v4
-; CGP-NEXT:    v_mad_u64_u32 v[13:14], s[4:5], -1, v8, v[10:11]
-; CGP-NEXT:    v_mul_lo_u32 v10, v9, v4
-; CGP-NEXT:    v_mul_hi_u32 v11, v8, v4
-; CGP-NEXT:    v_mul_lo_u32 v4, v8, v13
-; CGP-NEXT:    v_mul_lo_u32 v7, v9, v13
-; CGP-NEXT:    v_mul_hi_u32 v14, v8, v13
-; CGP-NEXT:    v_mul_hi_u32 v13, v9, v13
-; CGP-NEXT:    v_add_i32_e32 v4, vcc, v10, v4
-; CGP-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v11
-; CGP-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v4, vcc, v15, v4
-; CGP-NEXT:    v_add_i32_e32 v7, vcc, v7, v12
-; CGP-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v7, vcc, v7, v14
-; CGP-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v14, vcc, v15, v14
-; CGP-NEXT:    v_add_i32_e32 v4, vcc, v7, v4
-; CGP-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v7, vcc, v14, v7
-; CGP-NEXT:    v_add_i32_e32 v7, vcc, v13, v7
-; CGP-NEXT:    v_add_i32_e32 v16, vcc, v8, v4
-; CGP-NEXT:    v_mad_u64_u32 v[13:14], s[4:5], v6, v16, 0
-; CGP-NEXT:    v_addc_u32_e32 v17, vcc, v9, v7, vcc
-; CGP-NEXT:    v_mov_b32_e32 v4, v14
-; CGP-NEXT:    v_mad_u64_u32 v[14:15], s[4:5], v6, v17, v[4:5]
-; CGP-NEXT:    v_ashrrev_i32_e32 v7, 31, v1
-; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v7
-; CGP-NEXT:    v_mad_u64_u32 v[14:15], s[4:5], -1, v16, v[14:15]
-; CGP-NEXT:    v_addc_u32_e32 v1, vcc, v1, v7, vcc
-; CGP-NEXT:    v_xor_b32_e32 v15, v0, v7
-; CGP-NEXT:    v_mul_lo_u32 v0, v17, v13
-; CGP-NEXT:    v_mul_lo_u32 v4, v16, v14
-; CGP-NEXT:    v_xor_b32_e32 v18, v1, v7
-; CGP-NEXT:    v_mul_hi_u32 v1, v16, v13
-; CGP-NEXT:    v_mul_hi_u32 v13, v17, v13
-; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v4
-; CGP-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v1
-; CGP-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
-; CGP-NEXT:    v_mul_lo_u32 v1, v17, v14
-; CGP-NEXT:    v_add_i32_e32 v0, vcc, v4, v0
-; CGP-NEXT:    v_mul_hi_u32 v4, v16, v14
-; CGP-NEXT:    v_add_i32_e32 v1, vcc, v1, v13
-; CGP-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v1, vcc, v1, v4
-; CGP-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v4, vcc, v13, v4
-; CGP-NEXT:    v_mul_hi_u32 v13, v17, v14
-; CGP-NEXT:    v_add_i32_e32 v0, vcc, v1, v0
-; CGP-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v1, vcc, v4, v1
-; CGP-NEXT:    v_add_i32_e32 v1, vcc, v13, v1
-; CGP-NEXT:    v_add_i32_e32 v0, vcc, v16, v0
-; CGP-NEXT:    v_addc_u32_e32 v1, vcc, v17, v1, vcc
-; CGP-NEXT:    v_mul_lo_u32 v13, v18, v0
-; CGP-NEXT:    v_mul_lo_u32 v14, v15, v1
-; CGP-NEXT:    v_mul_hi_u32 v16, v15, v0
-; CGP-NEXT:    v_mul_hi_u32 v0, v18, v0
-; CGP-NEXT:    v_mov_b32_e32 v4, 0x12d8fb
-; CGP-NEXT:    v_add_i32_e32 v13, vcc, v13, v14
-; CGP-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v13, vcc, v13, v16
-; CGP-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
-; CGP-NEXT:    v_mul_lo_u32 v16, v18, v1
-; CGP-NEXT:    v_add_i32_e32 v13, vcc, v14, v13
-; CGP-NEXT:    v_mul_hi_u32 v14, v15, v1
-; CGP-NEXT:    v_add_i32_e32 v0, vcc, v16, v0
-; CGP-NEXT:    v_cndmask_b32_e64 v16, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v14
-; CGP-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v14, vcc, v16, v14
-; CGP-NEXT:    v_add_i32_e32 v16, vcc, v0, v13
-; CGP-NEXT:    v_mul_hi_u32 v17, v18, v1
-; CGP-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], v4, v16, 0
-; CGP-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v13, vcc, v14, v13
-; CGP-NEXT:    v_add_i32_e32 v17, vcc, v17, v13
-; CGP-NEXT:    v_mad_u64_u32 v[13:14], s[4:5], v4, v17, v[1:2]
-; CGP-NEXT:    v_sub_i32_e32 v0, vcc, v15, v0
-; CGP-NEXT:    v_subb_u32_e64 v1, s[4:5], v18, v13, vcc
-; CGP-NEXT:    v_sub_i32_e64 v13, s[4:5], v18, v13
-; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v0, v4
-; CGP-NEXT:    v_cndmask_b32_e64 v14, 0, -1, s[4:5]
-; CGP-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v1
-; CGP-NEXT:    v_subbrev_u32_e32 v1, vcc, 0, v13, vcc
-; CGP-NEXT:    v_sub_i32_e32 v0, vcc, v0, v4
-; CGP-NEXT:    v_subbrev_u32_e32 v13, vcc, 0, v1, vcc
-; CGP-NEXT:    v_add_i32_e32 v15, vcc, 1, v16
-; CGP-NEXT:    v_addc_u32_e32 v18, vcc, 0, v17, vcc
-; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v4
-; CGP-NEXT:    v_mov_b32_e32 v0, v5
-; CGP-NEXT:    v_cndmask_b32_e64 v14, -1, v14, s[4:5]
-; CGP-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], v6, v9, v[0:1]
-; CGP-NEXT:    v_cndmask_b32_e64 v19, 0, -1, vcc
-; CGP-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v13
-; CGP-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], -1, v8, v[0:1]
-; CGP-NEXT:    v_cndmask_b32_e32 v5, -1, v19, vcc
-; CGP-NEXT:    v_add_i32_e32 v1, vcc, 1, v15
-; CGP-NEXT:    v_mul_lo_u32 v19, v8, v0
-; CGP-NEXT:    v_addc_u32_e32 v13, vcc, 0, v18, vcc
-; CGP-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v5
-; CGP-NEXT:    v_cndmask_b32_e32 v5, v15, v1, vcc
-; CGP-NEXT:    v_cndmask_b32_e32 v13, v18, v13, vcc
-; CGP-NEXT:    v_add_i32_e32 v1, vcc, v10, v19
-; CGP-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v1, vcc, v1, v11
-; CGP-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
-; CGP-NEXT:    v_mul_lo_u32 v11, v9, v0
-; CGP-NEXT:    v_add_i32_e32 v1, vcc, v10, v1
-; CGP-NEXT:    v_mul_hi_u32 v10, v8, v0
-; CGP-NEXT:    v_add_i32_e32 v11, vcc, v11, v12
-; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v10, vcc, v11, v10
-; CGP-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v11, vcc, v12, v11
-; CGP-NEXT:    v_mul_hi_u32 v0, v9, v0
-; CGP-NEXT:    v_add_i32_e32 v1, vcc, v10, v1
-; CGP-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v10, vcc, v11, v10
-; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v10
-; CGP-NEXT:    v_add_i32_e32 v8, vcc, v8, v1
-; CGP-NEXT:    v_addc_u32_e32 v9, vcc, v9, v0, vcc
-; CGP-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], v6, v8, 0
-; CGP-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v14
-; CGP-NEXT:    v_cndmask_b32_e32 v5, v16, v5, vcc
-; CGP-NEXT:    v_xor_b32_e32 v11, v5, v7
-; CGP-NEXT:    v_mad_u64_u32 v[5:6], s[4:5], v6, v9, v[1:2]
-; CGP-NEXT:    v_cndmask_b32_e32 v10, v17, v13, vcc
-; CGP-NEXT:    v_xor_b32_e32 v1, v10, v7
-; CGP-NEXT:    v_mad_u64_u32 v[5:6], s[4:5], -1, v8, v[5:6]
-; CGP-NEXT:    v_ashrrev_i32_e32 v10, 31, v3
-; CGP-NEXT:    v_add_i32_e32 v2, vcc, v2, v10
-; CGP-NEXT:    v_addc_u32_e32 v3, vcc, v3, v10, vcc
-; CGP-NEXT:    v_xor_b32_e32 v12, v2, v10
-; CGP-NEXT:    v_mul_lo_u32 v2, v9, v0
-; CGP-NEXT:    v_mul_lo_u32 v6, v8, v5
-; CGP-NEXT:    v_xor_b32_e32 v13, v3, v10
-; CGP-NEXT:    v_mul_hi_u32 v3, v8, v0
-; CGP-NEXT:    v_mul_hi_u32 v0, v9, v0
-; CGP-NEXT:    v_add_i32_e32 v2, vcc, v2, v6
-; CGP-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v2, vcc, v2, v3
-; CGP-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; CGP-NEXT:    v_mul_lo_u32 v3, v9, v5
-; CGP-NEXT:    v_add_i32_e32 v2, vcc, v6, v2
-; CGP-NEXT:    v_mul_hi_u32 v6, v8, v5
-; CGP-NEXT:    v_add_i32_e32 v0, vcc, v3, v0
-; CGP-NEXT:    v_cndmask_b32_e64 v3, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v6
-; CGP-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v3, vcc, v3, v6
-; CGP-NEXT:    v_mul_hi_u32 v5, v9, v5
-; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
-; CGP-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v2, vcc, v3, v2
-; CGP-NEXT:    v_add_i32_e32 v2, vcc, v5, v2
-; CGP-NEXT:    v_add_i32_e32 v3, vcc, v8, v0
-; CGP-NEXT:    v_addc_u32_e32 v2, vcc, v9, v2, vcc
-; CGP-NEXT:    v_mul_lo_u32 v5, v13, v3
-; CGP-NEXT:    v_mul_lo_u32 v6, v12, v2
-; CGP-NEXT:    v_sub_i32_e32 v0, vcc, v11, v7
-; CGP-NEXT:    v_subb_u32_e32 v1, vcc, v1, v7, vcc
-; CGP-NEXT:    v_mul_hi_u32 v7, v12, v3
-; CGP-NEXT:    v_add_i32_e32 v5, vcc, v5, v6
-; CGP-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v5, vcc, v5, v7
-; CGP-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
-; CGP-NEXT:    v_mul_lo_u32 v7, v13, v2
-; CGP-NEXT:    v_mul_hi_u32 v3, v13, v3
-; CGP-NEXT:    v_add_i32_e32 v5, vcc, v6, v5
-; CGP-NEXT:    v_mul_hi_u32 v6, v12, v2
-; CGP-NEXT:    v_add_i32_e32 v3, vcc, v7, v3
-; CGP-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v3, vcc, v3, v6
-; CGP-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v6, vcc, v7, v6
-; CGP-NEXT:    v_add_i32_e32 v7, vcc, v3, v5
-; CGP-NEXT:    v_mul_hi_u32 v8, v13, v2
-; CGP-NEXT:    v_mad_u64_u32 v[2:3], s[4:5], v4, v7, 0
-; CGP-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v5, vcc, v6, v5
-; CGP-NEXT:    v_add_i32_e32 v8, vcc, v8, v5
-; CGP-NEXT:    v_mad_u64_u32 v[5:6], s[4:5], v4, v8, v[3:4]
-; CGP-NEXT:    v_sub_i32_e32 v2, vcc, v12, v2
-; CGP-NEXT:    v_subb_u32_e64 v3, s[4:5], v13, v5, vcc
-; CGP-NEXT:    v_sub_i32_e64 v5, s[4:5], v13, v5
-; CGP-NEXT:    v_subbrev_u32_e32 v5, vcc, 0, v5, vcc
-; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v2, v4
-; CGP-NEXT:    v_sub_i32_e32 v2, vcc, v2, v4
-; CGP-NEXT:    v_cndmask_b32_e64 v6, 0, -1, s[4:5]
-; CGP-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v3
-; CGP-NEXT:    v_subbrev_u32_e32 v5, vcc, 0, v5, vcc
-; CGP-NEXT:    v_cndmask_b32_e64 v3, -1, v6, s[4:5]
-; CGP-NEXT:    v_add_i32_e32 v6, vcc, 1, v7
-; CGP-NEXT:    v_addc_u32_e32 v9, vcc, 0, v8, vcc
-; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v2, v4
-; CGP-NEXT:    v_cndmask_b32_e64 v2, 0, -1, vcc
-; CGP-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v5
-; CGP-NEXT:    v_cndmask_b32_e32 v2, -1, v2, vcc
-; CGP-NEXT:    v_add_i32_e32 v4, vcc, 1, v6
-; CGP-NEXT:    v_addc_u32_e32 v5, vcc, 0, v9, vcc
-; CGP-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v2
-; CGP-NEXT:    v_cndmask_b32_e32 v2, v6, v4, vcc
-; CGP-NEXT:    v_cndmask_b32_e32 v4, v9, v5, vcc
-; CGP-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v3
-; CGP-NEXT:    v_cndmask_b32_e32 v2, v7, v2, vcc
-; CGP-NEXT:    v_cndmask_b32_e32 v3, v8, v4, vcc
-; CGP-NEXT:    v_xor_b32_e32 v2, v2, v10
-; CGP-NEXT:    v_xor_b32_e32 v3, v3, v10
-; CGP-NEXT:    v_sub_i32_e32 v2, vcc, v2, v10
-; CGP-NEXT:    v_subb_u32_e32 v3, vcc, v3, v10, vcc
-; CGP-NEXT:    s_setpc_b64 s[30:31]
+; CHECK-LABEL: v_sdiv_v2i64_oddk_denom:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT:    v_cvt_f32_u32_e32 v4, 0x12d8fb
+; CHECK-NEXT:    v_cvt_f32_ubyte0_e32 v5, 0
+; CHECK-NEXT:    s_sub_u32 s6, 0, 0x12d8fb
+; CHECK-NEXT:    s_subb_u32 s7, 0, 0
+; CHECK-NEXT:    v_mac_f32_e32 v4, 0x4f800000, v5
+; CHECK-NEXT:    v_rcp_iflag_f32_e32 v4, v4
+; CHECK-NEXT:    v_mul_f32_e32 v4, 0x5f7ffffc, v4
+; CHECK-NEXT:    v_mul_f32_e32 v5, 0x2f800000, v4
+; CHECK-NEXT:    v_trunc_f32_e32 v7, v5
+; CHECK-NEXT:    v_mac_f32_e32 v4, 0xcf800000, v7
+; CHECK-NEXT:    v_cvt_u32_f32_e32 v6, v4
+; CHECK-NEXT:    v_cvt_u32_f32_e32 v7, v7
+; CHECK-NEXT:    v_mad_u64_u32 v[4:5], s[4:5], s6, v6, 0
+; CHECK-NEXT:    v_mad_u64_u32 v[8:9], s[4:5], s6, v7, v[5:6]
+; CHECK-NEXT:    v_mul_lo_u32 v5, v7, v4
+; CHECK-NEXT:    v_mad_u64_u32 v[8:9], s[4:5], s7, v6, v[8:9]
+; CHECK-NEXT:    v_mul_hi_u32 v9, v6, v4
+; CHECK-NEXT:    v_mul_hi_u32 v4, v7, v4
+; CHECK-NEXT:    v_mul_lo_u32 v10, v6, v8
+; CHECK-NEXT:    v_mul_lo_u32 v11, v7, v8
+; CHECK-NEXT:    v_mul_hi_u32 v12, v6, v8
+; CHECK-NEXT:    v_mul_hi_u32 v8, v7, v8
+; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v5, v10
+; CHECK-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v5, v9
+; CHECK-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v10, v5
+; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v11, v4
+; CHECK-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v4, v12
+; CHECK-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v9, vcc, v9, v10
+; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v4, v5
+; CHECK-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v9, v5
+; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v8, v5
+; CHECK-NEXT:    v_add_i32_e32 v11, vcc, v6, v4
+; CHECK-NEXT:    v_mad_u64_u32 v[8:9], s[4:5], s6, v11, 0
+; CHECK-NEXT:    v_addc_u32_e32 v5, vcc, v7, v5, vcc
+; CHECK-NEXT:    v_mov_b32_e32 v4, v9
+; CHECK-NEXT:    v_mad_u64_u32 v[9:10], s[4:5], s6, v5, v[4:5]
+; CHECK-NEXT:    v_ashrrev_i32_e32 v4, 31, v1
+; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v4
+; CHECK-NEXT:    v_mad_u64_u32 v[9:10], s[4:5], s7, v11, v[9:10]
+; CHECK-NEXT:    v_addc_u32_e32 v1, vcc, v1, v4, vcc
+; CHECK-NEXT:    v_xor_b32_e32 v10, v0, v4
+; CHECK-NEXT:    v_mul_lo_u32 v0, v5, v8
+; CHECK-NEXT:    v_mul_lo_u32 v12, v11, v9
+; CHECK-NEXT:    v_xor_b32_e32 v13, v1, v4
+; CHECK-NEXT:    v_mul_hi_u32 v1, v11, v8
+; CHECK-NEXT:    v_mul_hi_u32 v8, v5, v8
+; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v12
+; CHECK-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v1
+; CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
+; CHECK-NEXT:    v_mul_lo_u32 v1, v5, v9
+; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v12, v0
+; CHECK-NEXT:    v_mul_hi_u32 v12, v11, v9
+; CHECK-NEXT:    v_add_i32_e32 v1, vcc, v1, v8
+; CHECK-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v1, vcc, v1, v12
+; CHECK-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v8, vcc, v8, v12
+; CHECK-NEXT:    v_mul_hi_u32 v9, v5, v9
+; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v1, v0
+; CHECK-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v1, vcc, v8, v1
+; CHECK-NEXT:    v_add_i32_e32 v1, vcc, v9, v1
+; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v11, v0
+; CHECK-NEXT:    v_addc_u32_e32 v1, vcc, v5, v1, vcc
+; CHECK-NEXT:    v_mul_lo_u32 v8, v13, v0
+; CHECK-NEXT:    v_mul_lo_u32 v9, v10, v1
+; CHECK-NEXT:    v_mul_hi_u32 v11, v10, v0
+; CHECK-NEXT:    v_mul_hi_u32 v0, v13, v0
+; CHECK-NEXT:    v_mov_b32_e32 v5, 0x12d8fb
+; CHECK-NEXT:    v_add_i32_e32 v8, vcc, v8, v9
+; CHECK-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v8, vcc, v8, v11
+; CHECK-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
+; CHECK-NEXT:    v_mul_lo_u32 v11, v13, v1
+; CHECK-NEXT:    v_add_i32_e32 v8, vcc, v9, v8
+; CHECK-NEXT:    v_mul_hi_u32 v9, v10, v1
+; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v11, v0
+; CHECK-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v9
+; CHECK-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v9, vcc, v11, v9
+; CHECK-NEXT:    v_add_i32_e32 v11, vcc, v0, v8
+; CHECK-NEXT:    v_mul_hi_u32 v12, v13, v1
+; CHECK-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], v5, v11, 0
+; CHECK-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v8, vcc, v9, v8
+; CHECK-NEXT:    v_add_i32_e32 v12, vcc, v12, v8
+; CHECK-NEXT:    v_mad_u64_u32 v[8:9], s[4:5], v5, v12, v[1:2]
+; CHECK-NEXT:    v_sub_i32_e32 v0, vcc, v10, v0
+; CHECK-NEXT:    v_mad_u64_u32 v[8:9], s[4:5], 0, v11, v[8:9]
+; CHECK-NEXT:    s_sub_u32 s6, 0, 0x12d8fb
+; CHECK-NEXT:    s_subb_u32 s7, 0, 0
+; CHECK-NEXT:    v_subb_u32_e64 v1, s[4:5], v13, v8, vcc
+; CHECK-NEXT:    v_sub_i32_e64 v8, s[4:5], v13, v8
+; CHECK-NEXT:    v_cmp_ge_u32_e64 s[4:5], v0, v5
+; CHECK-NEXT:    v_cndmask_b32_e64 v9, 0, -1, s[4:5]
+; CHECK-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v1
+; CHECK-NEXT:    v_subbrev_u32_e32 v1, vcc, 0, v8, vcc
+; CHECK-NEXT:    v_sub_i32_e32 v8, vcc, v0, v5
+; CHECK-NEXT:    v_cndmask_b32_e64 v10, -1, v9, s[4:5]
+; CHECK-NEXT:    v_subbrev_u32_e32 v9, vcc, 0, v1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v13, vcc, 1, v11
+; CHECK-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], s6, v6, 0
+; CHECK-NEXT:    v_addc_u32_e32 v14, vcc, 0, v12, vcc
+; CHECK-NEXT:    v_cmp_ge_u32_e32 vcc, v8, v5
+; CHECK-NEXT:    v_cndmask_b32_e64 v8, 0, -1, vcc
+; CHECK-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v9
+; CHECK-NEXT:    v_cndmask_b32_e32 v15, -1, v8, vcc
+; CHECK-NEXT:    v_mad_u64_u32 v[8:9], s[4:5], s6, v7, v[1:2]
+; CHECK-NEXT:    v_add_i32_e32 v1, vcc, 1, v13
+; CHECK-NEXT:    v_mad_u64_u32 v[8:9], s[4:5], s7, v6, v[8:9]
+; CHECK-NEXT:    v_addc_u32_e32 v16, vcc, 0, v14, vcc
+; CHECK-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v15
+; CHECK-NEXT:    v_cndmask_b32_e32 v9, v13, v1, vcc
+; CHECK-NEXT:    v_mul_lo_u32 v1, v7, v0
+; CHECK-NEXT:    v_mul_lo_u32 v13, v6, v8
+; CHECK-NEXT:    v_mul_hi_u32 v15, v6, v0
+; CHECK-NEXT:    v_cndmask_b32_e32 v14, v14, v16, vcc
+; CHECK-NEXT:    v_mul_hi_u32 v0, v7, v0
+; CHECK-NEXT:    v_add_i32_e32 v1, vcc, v1, v13
+; CHECK-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v1, vcc, v1, v15
+; CHECK-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
+; CHECK-NEXT:    v_mul_lo_u32 v15, v7, v8
+; CHECK-NEXT:    v_add_i32_e32 v1, vcc, v13, v1
+; CHECK-NEXT:    v_mul_hi_u32 v13, v6, v8
+; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v15, v0
+; CHECK-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v13
+; CHECK-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v13, vcc, v15, v13
+; CHECK-NEXT:    v_mul_hi_u32 v8, v7, v8
+; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v1
+; CHECK-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v1, vcc, v13, v1
+; CHECK-NEXT:    v_add_i32_e32 v1, vcc, v8, v1
+; CHECK-NEXT:    v_add_i32_e32 v8, vcc, v6, v0
+; CHECK-NEXT:    v_addc_u32_e32 v13, vcc, v7, v1, vcc
+; CHECK-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], s6, v8, 0
+; CHECK-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v10
+; CHECK-NEXT:    v_cndmask_b32_e32 v9, v11, v9, vcc
+; CHECK-NEXT:    v_mad_u64_u32 v[6:7], s[4:5], s6, v13, v[1:2]
+; CHECK-NEXT:    v_xor_b32_e32 v1, v9, v4
+; CHECK-NEXT:    v_ashrrev_i32_e32 v9, 31, v3
+; CHECK-NEXT:    v_mad_u64_u32 v[6:7], s[4:5], s7, v8, v[6:7]
+; CHECK-NEXT:    v_cndmask_b32_e32 v10, v12, v14, vcc
+; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v9
+; CHECK-NEXT:    v_addc_u32_e32 v3, vcc, v3, v9, vcc
+; CHECK-NEXT:    v_xor_b32_e32 v11, v2, v9
+; CHECK-NEXT:    v_mul_lo_u32 v2, v13, v0
+; CHECK-NEXT:    v_mul_lo_u32 v7, v8, v6
+; CHECK-NEXT:    v_xor_b32_e32 v12, v3, v9
+; CHECK-NEXT:    v_mul_hi_u32 v3, v8, v0
+; CHECK-NEXT:    v_mul_hi_u32 v0, v13, v0
+; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v7
+; CHECK-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v3
+; CHECK-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
+; CHECK-NEXT:    v_mul_lo_u32 v3, v13, v6
+; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v7, v2
+; CHECK-NEXT:    v_mul_hi_u32 v7, v8, v6
+; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v3, v0
+; CHECK-NEXT:    v_cndmask_b32_e64 v3, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v7
+; CHECK-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v3, vcc, v3, v7
+; CHECK-NEXT:    v_mul_hi_u32 v6, v13, v6
+; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
+; CHECK-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v3, v2
+; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v6, v2
+; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v8, v0
+; CHECK-NEXT:    v_addc_u32_e32 v2, vcc, v13, v2, vcc
+; CHECK-NEXT:    v_mul_lo_u32 v3, v12, v0
+; CHECK-NEXT:    v_mul_lo_u32 v6, v11, v2
+; CHECK-NEXT:    v_mul_hi_u32 v7, v11, v0
+; CHECK-NEXT:    v_mul_hi_u32 v0, v12, v0
+; CHECK-NEXT:    v_xor_b32_e32 v8, v10, v4
+; CHECK-NEXT:    v_add_i32_e32 v3, vcc, v3, v6
+; CHECK-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v3, vcc, v3, v7
+; CHECK-NEXT:    v_cndmask_b32_e64 v3, 0, 1, vcc
+; CHECK-NEXT:    v_mul_lo_u32 v7, v12, v2
+; CHECK-NEXT:    v_add_i32_e32 v3, vcc, v6, v3
+; CHECK-NEXT:    v_mul_hi_u32 v6, v11, v2
+; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v7, v0
+; CHECK-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v6
+; CHECK-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v7, v6
+; CHECK-NEXT:    v_add_i32_e32 v10, vcc, v0, v3
+; CHECK-NEXT:    v_mul_hi_u32 v7, v12, v2
+; CHECK-NEXT:    v_mad_u64_u32 v[2:3], s[4:5], v5, v10, 0
+; CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v6, v0
+; CHECK-NEXT:    v_add_i32_e32 v13, vcc, v7, v0
+; CHECK-NEXT:    v_mov_b32_e32 v0, v3
+; CHECK-NEXT:    v_mad_u64_u32 v[6:7], s[4:5], v5, v13, v[0:1]
+; CHECK-NEXT:    v_sub_i32_e32 v0, vcc, v1, v4
+; CHECK-NEXT:    v_subb_u32_e32 v1, vcc, v8, v4, vcc
+; CHECK-NEXT:    v_mad_u64_u32 v[3:4], s[4:5], 0, v10, v[6:7]
+; CHECK-NEXT:    v_sub_i32_e32 v2, vcc, v11, v2
+; CHECK-NEXT:    v_subb_u32_e64 v4, s[4:5], v12, v3, vcc
+; CHECK-NEXT:    v_sub_i32_e64 v3, s[4:5], v12, v3
+; CHECK-NEXT:    v_subbrev_u32_e32 v3, vcc, 0, v3, vcc
+; CHECK-NEXT:    v_cmp_ge_u32_e64 s[4:5], v2, v5
+; CHECK-NEXT:    v_sub_i32_e32 v2, vcc, v2, v5
+; CHECK-NEXT:    v_cndmask_b32_e64 v6, 0, -1, s[4:5]
+; CHECK-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v4
+; CHECK-NEXT:    v_subbrev_u32_e32 v3, vcc, 0, v3, vcc
+; CHECK-NEXT:    v_cndmask_b32_e64 v4, -1, v6, s[4:5]
+; CHECK-NEXT:    v_add_i32_e32 v6, vcc, 1, v10
+; CHECK-NEXT:    v_addc_u32_e32 v7, vcc, 0, v13, vcc
+; CHECK-NEXT:    v_cmp_ge_u32_e32 vcc, v2, v5
+; CHECK-NEXT:    v_cndmask_b32_e64 v2, 0, -1, vcc
+; CHECK-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v3
+; CHECK-NEXT:    v_cndmask_b32_e32 v2, -1, v2, vcc
+; CHECK-NEXT:    v_add_i32_e32 v3, vcc, 1, v6
+; CHECK-NEXT:    v_addc_u32_e32 v5, vcc, 0, v7, vcc
+; CHECK-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v2
+; CHECK-NEXT:    v_cndmask_b32_e32 v2, v6, v3, vcc
+; CHECK-NEXT:    v_cndmask_b32_e32 v3, v7, v5, vcc
+; CHECK-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v4
+; CHECK-NEXT:    v_cndmask_b32_e32 v2, v10, v2, vcc
+; CHECK-NEXT:    v_cndmask_b32_e32 v3, v13, v3, vcc
+; CHECK-NEXT:    v_xor_b32_e32 v2, v2, v9
+; CHECK-NEXT:    v_xor_b32_e32 v3, v3, v9
+; CHECK-NEXT:    v_sub_i32_e32 v2, vcc, v2, v9
+; CHECK-NEXT:    v_subb_u32_e32 v3, vcc, v3, v9, vcc
+; CHECK-NEXT:    s_setpc_b64 s[30:31]
   %result = sdiv <2 x i64> %num, <i64 1235195, i64 1235195>
   ret <2 x i64> %result
 }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i32.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i32.ll
index 1bb606f36e48d2c..0d6366da09e05b6 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i32.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i32.ll
@@ -282,88 +282,47 @@ define i32 @v_srem_i32_pow2k_denom(i32 %num) {
 }
 
 define <2 x i32> @v_srem_v2i32_pow2k_denom(<2 x i32> %num) {
-; GISEL-LABEL: v_srem_v2i32_pow2k_denom:
-; GISEL:       ; %bb.0:
-; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GISEL-NEXT:    v_ashrrev_i32_e32 v2, 31, v0
-; GISEL-NEXT:    v_mov_b32_e32 v3, 0x1000
-; GISEL-NEXT:    v_cvt_f32_u32_e32 v4, 0x1000
-; GISEL-NEXT:    v_mov_b32_e32 v5, 0xfffff000
-; GISEL-NEXT:    v_ashrrev_i32_e32 v6, 31, v1
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
-; GISEL-NEXT:    v_rcp_iflag_f32_e32 v4, v4
-; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v1, v6
-; GISEL-NEXT:    v_xor_b32_e32 v0, v0, v2
-; GISEL-NEXT:    v_mul_f32_e32 v4, 0x4f7ffffe, v4
-; GISEL-NEXT:    v_xor_b32_e32 v1, v1, v6
-; GISEL-NEXT:    v_cvt_u32_f32_e32 v4, v4
-; GISEL-NEXT:    v_mul_lo_u32 v5, v4, v5
-; GISEL-NEXT:    v_mul_hi_u32 v5, v4, v5
-; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v5
-; GISEL-NEXT:    v_mul_hi_u32 v5, v0, v4
-; GISEL-NEXT:    v_mul_hi_u32 v4, v1, v4
-; GISEL-NEXT:    v_lshlrev_b32_e32 v5, 12, v5
-; GISEL-NEXT:    v_lshlrev_b32_e32 v4, 12, v4
-; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v5
-; GISEL-NEXT:    v_sub_i32_e32 v1, vcc, v1, v4
-; GISEL-NEXT:    v_sub_i32_e32 v4, vcc, v0, v3
-; GISEL-NEXT:    v_subrev_i32_e32 v5, vcc, 0x1000, v1
-; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v3
-; GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
-; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v3
-; GISEL-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
-; GISEL-NEXT:    v_sub_i32_e32 v4, vcc, v0, v3
-; GISEL-NEXT:    v_subrev_i32_e32 v5, vcc, 0x1000, v1
-; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v3
-; GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
-; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v3
-; GISEL-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
-; GISEL-NEXT:    v_xor_b32_e32 v0, v0, v2
-; GISEL-NEXT:    v_xor_b32_e32 v1, v1, v6
-; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v2
-; GISEL-NEXT:    v_sub_i32_e32 v1, vcc, v1, v6
-; GISEL-NEXT:    s_setpc_b64 s[30:31]
-;
-; CGP-LABEL: v_srem_v2i32_pow2k_denom:
-; CGP:       ; %bb.0:
-; CGP-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CGP-NEXT:    v_ashrrev_i32_e32 v2, 31, v0
-; CGP-NEXT:    v_rcp_iflag_f32_e32 v3, 0x45800000
-; CGP-NEXT:    v_mov_b32_e32 v4, 0xfffff000
-; CGP-NEXT:    v_mov_b32_e32 v5, 0x1000
-; CGP-NEXT:    v_ashrrev_i32_e32 v6, 31, v1
-; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
-; CGP-NEXT:    v_mul_f32_e32 v3, 0x4f7ffffe, v3
-; CGP-NEXT:    v_add_i32_e32 v1, vcc, v1, v6
-; CGP-NEXT:    v_xor_b32_e32 v0, v0, v2
-; CGP-NEXT:    v_cvt_u32_f32_e32 v3, v3
-; CGP-NEXT:    v_xor_b32_e32 v1, v1, v6
-; CGP-NEXT:    v_mul_lo_u32 v4, v3, v4
-; CGP-NEXT:    v_mul_hi_u32 v4, v3, v4
-; CGP-NEXT:    v_add_i32_e32 v3, vcc, v3, v4
-; CGP-NEXT:    v_mul_hi_u32 v4, v0, v3
-; CGP-NEXT:    v_mul_hi_u32 v3, v1, v3
-; CGP-NEXT:    v_lshlrev_b32_e32 v4, 12, v4
-; CGP-NEXT:    v_lshlrev_b32_e32 v3, 12, v3
-; CGP-NEXT:    v_sub_i32_e32 v0, vcc, v0, v4
-; CGP-NEXT:    v_sub_i32_e32 v1, vcc, v1, v3
-; CGP-NEXT:    v_sub_i32_e32 v3, vcc, v0, v5
-; CGP-NEXT:    v_subrev_i32_e32 v4, vcc, 0x1000, v1
-; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v5
-; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
-; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v5
-; CGP-NEXT:    v_cndmask_b32_e32 v1, v1, v4, vcc
-; CGP-NEXT:    v_sub_i32_e32 v3, vcc, v0, v5
-; CGP-NEXT:    v_subrev_i32_e32 v4, vcc, 0x1000, v1
-; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v5
-; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
-; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v5
-; CGP-NEXT:    v_cndmask_b32_e32 v1, v1, v4, vcc
-; CGP-NEXT:    v_xor_b32_e32 v0, v0, v2
-; CGP-NEXT:    v_xor_b32_e32 v1, v1, v6
-; CGP-NEXT:    v_sub_i32_e32 v0, vcc, v0, v2
-; CGP-NEXT:    v_sub_i32_e32 v1, vcc, v1, v6
-; CGP-NEXT:    s_setpc_b64 s[30:31]
+; CHECK-LABEL: v_srem_v2i32_pow2k_denom:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT:    v_ashrrev_i32_e32 v2, 31, v0
+; CHECK-NEXT:    v_mov_b32_e32 v3, 0x1000
+; CHECK-NEXT:    v_cvt_f32_u32_e32 v4, 0x1000
+; CHECK-NEXT:    v_mov_b32_e32 v5, 0xfffff000
+; CHECK-NEXT:    v_ashrrev_i32_e32 v6, 31, v1
+; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
+; CHECK-NEXT:    v_rcp_iflag_f32_e32 v4, v4
+; CHECK-NEXT:    v_add_i32_e32 v1, vcc, v1, v6
+; CHECK-NEXT:    v_xor_b32_e32 v0, v0, v2
+; CHECK-NEXT:    v_mul_f32_e32 v4, 0x4f7ffffe, v4
+; CHECK-NEXT:    v_xor_b32_e32 v1, v1, v6
+; CHECK-NEXT:    v_cvt_u32_f32_e32 v4, v4
+; CHECK-NEXT:    v_mul_lo_u32 v5, v4, v5
+; CHECK-NEXT:    v_mul_hi_u32 v5, v4, v5
+; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v4, v5
+; CHECK-NEXT:    v_mul_hi_u32 v5, v0, v4
+; CHECK-NEXT:    v_mul_hi_u32 v4, v1, v4
+; CHECK-NEXT:    v_lshlrev_b32_e32 v5, 12, v5
+; CHECK-NEXT:    v_lshlrev_b32_e32 v4, 12, v4
+; CHECK-NEXT:    v_sub_i32_e32 v0, vcc, v0, v5
+; CHECK-NEXT:    v_sub_i32_e32 v1, vcc, v1, v4
+; CHECK-NEXT:    v_sub_i32_e32 v4, vcc, v0, v3
+; CHECK-NEXT:    v_subrev_i32_e32 v5, vcc, 0x1000, v1
+; CHECK-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v3
+; CHECK-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
+; CHECK-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v3
+; CHECK-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
+; CHECK-NEXT:    v_sub_i32_e32 v4, vcc, v0, v3
+; CHECK-NEXT:    v_subrev_i32_e32 v5, vcc, 0x1000, v1
+; CHECK-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v3
+; CHECK-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
+; CHECK-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v3
+; CHECK-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
+; CHECK-NEXT:    v_xor_b32_e32 v0, v0, v2
+; CHECK-NEXT:    v_xor_b32_e32 v1, v1, v6
+; CHECK-NEXT:    v_sub_i32_e32 v0, vcc, v0, v2
+; CHECK-NEXT:    v_sub_i32_e32 v1, vcc, v1, v6
+; CHECK-NEXT:    s_setpc_b64 s[30:31]
   %result = srem <2 x i32> %num, <i32 4096, i32 4096>
   ret <2 x i32> %result
 }
@@ -400,88 +359,47 @@ define i32 @v_srem_i32_oddk_denom(i32 %num) {
 }
 
 define <2 x i32> @v_srem_v2i32_oddk_denom(<2 x i32> %num) {
-; GISEL-LABEL: v_srem_v2i32_oddk_denom:
-; GISEL:       ; %bb.0:
-; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GISEL-NEXT:    v_ashrrev_i32_e32 v2, 31, v0
-; GISEL-NEXT:    v_mov_b32_e32 v3, 0x12d8fb
-; GISEL-NEXT:    v_cvt_f32_u32_e32 v4, 0x12d8fb
-; GISEL-NEXT:    v_mov_b32_e32 v5, 0xffed2705
-; GISEL-NEXT:    v_ashrrev_i32_e32 v6, 31, v1
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
-; GISEL-NEXT:    v_rcp_iflag_f32_e32 v4, v4
-; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v1, v6
-; GISEL-NEXT:    v_xor_b32_e32 v0, v0, v2
-; GISEL-NEXT:    v_mul_f32_e32 v4, 0x4f7ffffe, v4
-; GISEL-NEXT:    v_xor_b32_e32 v1, v1, v6
-; GISEL-NEXT:    v_cvt_u32_f32_e32 v4, v4
-; GISEL-NEXT:    v_mul_lo_u32 v5, v4, v5
-; GISEL-NEXT:    v_mul_hi_u32 v5, v4, v5
-; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v5
-; GISEL-NEXT:    v_mul_hi_u32 v5, v0, v4
-; GISEL-NEXT:    v_mul_hi_u32 v4, v1, v4
-; GISEL-NEXT:    v_mul_lo_u32 v5, v5, v3
-; GISEL-NEXT:    v_mul_lo_u32 v4, v4, v3
-; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v5
-; GISEL-NEXT:    v_sub_i32_e32 v1, vcc, v1, v4
-; GISEL-NEXT:    v_sub_i32_e32 v4, vcc, v0, v3
-; GISEL-NEXT:    v_subrev_i32_e32 v5, vcc, 0x12d8fb, v1
-; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v3
-; GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
-; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v3
-; GISEL-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
-; GISEL-NEXT:    v_sub_i32_e32 v4, vcc, v0, v3
-; GISEL-NEXT:    v_subrev_i32_e32 v5, vcc, 0x12d8fb, v1
-; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v3
-; GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
-; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v3
-; GISEL-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
-; GISEL-NEXT:    v_xor_b32_e32 v0, v0, v2
-; GISEL-NEXT:    v_xor_b32_e32 v1, v1, v6
-; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v2
-; GISEL-NEXT:    v_sub_i32_e32 v1, vcc, v1, v6
-; GISEL-NEXT:    s_setpc_b64 s[30:31]
-;
-; CGP-LABEL: v_srem_v2i32_oddk_denom:
-; CGP:       ; %bb.0:
-; CGP-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CGP-NEXT:    v_ashrrev_i32_e32 v2, 31, v0
-; CGP-NEXT:    v_rcp_iflag_f32_e32 v3, 0x4996c7d8
-; CGP-NEXT:    v_mov_b32_e32 v4, 0xffed2705
-; CGP-NEXT:    v_mov_b32_e32 v5, 0x12d8fb
-; CGP-NEXT:    v_ashrrev_i32_e32 v6, 31, v1
-; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
-; CGP-NEXT:    v_mul_f32_e32 v3, 0x4f7ffffe, v3
-; CGP-NEXT:    v_add_i32_e32 v1, vcc, v1, v6
-; CGP-NEXT:    v_xor_b32_e32 v0, v0, v2
-; CGP-NEXT:    v_cvt_u32_f32_e32 v3, v3
-; CGP-NEXT:    v_xor_b32_e32 v1, v1, v6
-; CGP-NEXT:    v_mul_lo_u32 v4, v3, v4
-; CGP-NEXT:    v_mul_hi_u32 v4, v3, v4
-; CGP-NEXT:    v_add_i32_e32 v3, vcc, v3, v4
-; CGP-NEXT:    v_mul_hi_u32 v4, v0, v3
-; CGP-NEXT:    v_mul_hi_u32 v3, v1, v3
-; CGP-NEXT:    v_mul_lo_u32 v4, v4, v5
-; CGP-NEXT:    v_mul_lo_u32 v3, v3, v5
-; CGP-NEXT:    v_sub_i32_e32 v0, vcc, v0, v4
-; CGP-NEXT:    v_sub_i32_e32 v1, vcc, v1, v3
-; CGP-NEXT:    v_sub_i32_e32 v3, vcc, v0, v5
-; CGP-NEXT:    v_subrev_i32_e32 v4, vcc, 0x12d8fb, v1
-; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v5
-; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
-; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v5
-; CGP-NEXT:    v_cndmask_b32_e32 v1, v1, v4, vcc
-; CGP-NEXT:    v_sub_i32_e32 v3, vcc, v0, v5
-; CGP-NEXT:    v_subrev_i32_e32 v4, vcc, 0x12d8fb, v1
-; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v5
-; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
-; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v5
-; CGP-NEXT:    v_cndmask_b32_e32 v1, v1, v4, vcc
-; CGP-NEXT:    v_xor_b32_e32 v0, v0, v2
-; CGP-NEXT:    v_xor_b32_e32 v1, v1, v6
-; CGP-NEXT:    v_sub_i32_e32 v0, vcc, v0, v2
-; CGP-NEXT:    v_sub_i32_e32 v1, vcc, v1, v6
-; CGP-NEXT:    s_setpc_b64 s[30:31]
+; CHECK-LABEL: v_srem_v2i32_oddk_denom:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT:    v_ashrrev_i32_e32 v2, 31, v0
+; CHECK-NEXT:    v_mov_b32_e32 v3, 0x12d8fb
+; CHECK-NEXT:    v_cvt_f32_u32_e32 v4, 0x12d8fb
+; CHECK-NEXT:    v_mov_b32_e32 v5, 0xffed2705
+; CHECK-NEXT:    v_ashrrev_i32_e32 v6, 31, v1
+; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
+; CHECK-NEXT:    v_rcp_iflag_f32_e32 v4, v4
+; CHECK-NEXT:    v_add_i32_e32 v1, vcc, v1, v6
+; CHECK-NEXT:    v_xor_b32_e32 v0, v0, v2
+; CHECK-NEXT:    v_mul_f32_e32 v4, 0x4f7ffffe, v4
+; CHECK-NEXT:    v_xor_b32_e32 v1, v1, v6
+; CHECK-NEXT:    v_cvt_u32_f32_e32 v4, v4
+; CHECK-NEXT:    v_mul_lo_u32 v5, v4, v5
+; CHECK-NEXT:    v_mul_hi_u32 v5, v4, v5
+; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v4, v5
+; CHECK-NEXT:    v_mul_hi_u32 v5, v0, v4
+; CHECK-NEXT:    v_mul_hi_u32 v4, v1, v4
+; CHECK-NEXT:    v_mul_lo_u32 v5, v5, v3
+; CHECK-NEXT:    v_mul_lo_u32 v4, v4, v3
+; CHECK-NEXT:    v_sub_i32_e32 v0, vcc, v0, v5
+; CHECK-NEXT:    v_sub_i32_e32 v1, vcc, v1, v4
+; CHECK-NEXT:    v_sub_i32_e32 v4, vcc, v0, v3
+; CHECK-NEXT:    v_subrev_i32_e32 v5, vcc, 0x12d8fb, v1
+; CHECK-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v3
+; CHECK-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
+; CHECK-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v3
+; CHECK-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
+; CHECK-NEXT:    v_sub_i32_e32 v4, vcc, v0, v3
+; CHECK-NEXT:    v_subrev_i32_e32 v5, vcc, 0x12d8fb, v1
+; CHECK-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v3
+; CHECK-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
+; CHECK-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v3
+; CHECK-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
+; CHECK-NEXT:    v_xor_b32_e32 v0, v0, v2
+; CHECK-NEXT:    v_xor_b32_e32 v1, v1, v6
+; CHECK-NEXT:    v_sub_i32_e32 v0, vcc, v0, v2
+; CHECK-NEXT:    v_sub_i32_e32 v1, vcc, v1, v6
+; CHECK-NEXT:    s_setpc_b64 s[30:31]
   %result = srem <2 x i32> %num, <i32 1235195, i32 1235195>
   ret <2 x i32> %result
 }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll
index 0b22b3b3a4ba7c6..ee7dcbfb8896b4c 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll
@@ -1101,469 +1101,241 @@ define i64 @v_srem_i64_pow2k_denom(i64 %num) {
 }
 
 define <2 x i64> @v_srem_v2i64_pow2k_denom(<2 x i64> %num) {
-; GISEL-LABEL: v_srem_v2i64_pow2k_denom:
-; GISEL:       ; %bb.0:
-; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GISEL-NEXT:    v_cvt_f32_u32_e32 v4, 0x1000
-; GISEL-NEXT:    v_cvt_f32_ubyte0_e32 v5, 0
-; GISEL-NEXT:    s_sub_u32 s6, 0, 0x1000
-; GISEL-NEXT:    s_subb_u32 s7, 0, 0
-; GISEL-NEXT:    v_mac_f32_e32 v4, 0x4f800000, v5
-; GISEL-NEXT:    v_rcp_iflag_f32_e32 v4, v4
-; GISEL-NEXT:    v_mul_f32_e32 v4, 0x5f7ffffc, v4
-; GISEL-NEXT:    v_mul_f32_e32 v5, 0x2f800000, v4
-; GISEL-NEXT:    v_trunc_f32_e32 v7, v5
-; GISEL-NEXT:    v_mac_f32_e32 v4, 0xcf800000, v7
-; GISEL-NEXT:    v_cvt_u32_f32_e32 v6, v4
-; GISEL-NEXT:    v_cvt_u32_f32_e32 v7, v7
-; GISEL-NEXT:    v_mad_u64_u32 v[4:5], s[4:5], s6, v6, 0
-; GISEL-NEXT:    v_mad_u64_u32 v[8:9], s[4:5], s6, v7, v[5:6]
-; GISEL-NEXT:    v_mul_lo_u32 v5, v7, v4
-; GISEL-NEXT:    v_mad_u64_u32 v[8:9], s[4:5], s7, v6, v[8:9]
-; GISEL-NEXT:    v_mul_hi_u32 v9, v6, v4
-; GISEL-NEXT:    v_mul_hi_u32 v4, v7, v4
-; GISEL-NEXT:    v_mul_lo_u32 v10, v6, v8
-; GISEL-NEXT:    v_mul_lo_u32 v11, v7, v8
-; GISEL-NEXT:    v_mul_hi_u32 v12, v6, v8
-; GISEL-NEXT:    v_mul_hi_u32 v8, v7, v8
-; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v10
-; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v9
-; GISEL-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v10, v5
-; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v11, v4
-; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v12
-; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v9, v10
-; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v5
-; GISEL-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v9, v5
-; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v8, v5
-; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v6, v4
-; GISEL-NEXT:    v_mad_u64_u32 v[8:9], s[4:5], s6, v11, 0
-; GISEL-NEXT:    v_addc_u32_e32 v5, vcc, v7, v5, vcc
-; GISEL-NEXT:    v_mov_b32_e32 v4, v9
-; GISEL-NEXT:    v_mad_u64_u32 v[9:10], s[4:5], s6, v5, v[4:5]
-; GISEL-NEXT:    v_ashrrev_i32_e32 v4, 31, v1
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v4
-; GISEL-NEXT:    v_mad_u64_u32 v[9:10], s[4:5], s7, v11, v[9:10]
-; GISEL-NEXT:    v_addc_u32_e32 v1, vcc, v1, v4, vcc
-; GISEL-NEXT:    v_xor_b32_e32 v10, v0, v4
-; GISEL-NEXT:    v_mul_lo_u32 v0, v5, v8
-; GISEL-NEXT:    v_mul_lo_u32 v12, v11, v9
-; GISEL-NEXT:    v_xor_b32_e32 v13, v1, v4
-; GISEL-NEXT:    v_mul_hi_u32 v1, v11, v8
-; GISEL-NEXT:    v_mul_hi_u32 v8, v5, v8
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v12
-; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v1
-; GISEL-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
-; GISEL-NEXT:    v_mul_lo_u32 v1, v5, v9
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v12, v0
-; GISEL-NEXT:    v_mul_hi_u32 v12, v11, v9
-; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v1, v8
-; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v1, v12
-; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v12
-; GISEL-NEXT:    v_mul_hi_u32 v9, v5, v9
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v1, v0
-; GISEL-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v8, v1
-; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v9, v1
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v11, v0
-; GISEL-NEXT:    v_addc_u32_e32 v1, vcc, v5, v1, vcc
-; GISEL-NEXT:    v_mul_lo_u32 v8, v13, v0
-; GISEL-NEXT:    v_mul_lo_u32 v9, v10, v1
-; GISEL-NEXT:    v_mul_hi_u32 v11, v10, v0
-; GISEL-NEXT:    v_mul_hi_u32 v0, v13, v0
-; GISEL-NEXT:    v_mov_b32_e32 v5, 0x1000
-; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v9
-; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v11
-; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
-; GISEL-NEXT:    v_mul_lo_u32 v11, v13, v1
-; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v9, v8
-; GISEL-NEXT:    v_mul_hi_u32 v9, v10, v1
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v11, v0
-; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v9
-; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v11, v9
-; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v0, v8
-; GISEL-NEXT:    v_mul_hi_u32 v12, v13, v1
-; GISEL-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], v5, v11, 0
-; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v9, v8
-; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v12, v8
-; GISEL-NEXT:    v_mad_u64_u32 v[8:9], s[4:5], v5, v8, v[1:2]
-; GISEL-NEXT:    v_sub_i32_e32 v10, vcc, v10, v0
-; GISEL-NEXT:    v_mad_u64_u32 v[8:9], s[4:5], 0, v11, v[8:9]
-; GISEL-NEXT:    s_sub_u32 s6, 0, 0x1000
-; GISEL-NEXT:    s_subb_u32 s7, 0, 0
-; GISEL-NEXT:    v_subb_u32_e64 v11, s[4:5], v13, v8, vcc
-; GISEL-NEXT:    v_sub_i32_e64 v0, s[4:5], v13, v8
-; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v10, v5
-; GISEL-NEXT:    v_subbrev_u32_e32 v0, vcc, 0, v0, vcc
-; GISEL-NEXT:    v_cndmask_b32_e64 v1, 0, -1, s[4:5]
-; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v11
-; GISEL-NEXT:    v_sub_i32_e32 v13, vcc, v10, v5
-; GISEL-NEXT:    v_cndmask_b32_e64 v12, -1, v1, s[4:5]
-; GISEL-NEXT:    v_subbrev_u32_e32 v14, vcc, 0, v0, vcc
-; GISEL-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], s6, v6, 0
-; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v13, v5
-; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, -1, vcc
-; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v14
-; GISEL-NEXT:    v_cndmask_b32_e32 v15, -1, v8, vcc
-; GISEL-NEXT:    v_mad_u64_u32 v[8:9], s[4:5], s6, v7, v[1:2]
-; GISEL-NEXT:    v_sub_i32_e32 v1, vcc, v13, v5
-; GISEL-NEXT:    v_mad_u64_u32 v[8:9], s[4:5], s7, v6, v[8:9]
-; GISEL-NEXT:    v_subbrev_u32_e32 v16, vcc, 0, v14, vcc
-; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v15
-; GISEL-NEXT:    v_cndmask_b32_e32 v9, v13, v1, vcc
-; GISEL-NEXT:    v_mul_lo_u32 v1, v7, v0
-; GISEL-NEXT:    v_mul_lo_u32 v13, v6, v8
-; GISEL-NEXT:    v_mul_hi_u32 v15, v6, v0
-; GISEL-NEXT:    v_cndmask_b32_e32 v14, v14, v16, vcc
-; GISEL-NEXT:    v_mul_hi_u32 v0, v7, v0
-; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v1, v13
-; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v1, v15
-; GISEL-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
-; GISEL-NEXT:    v_mul_lo_u32 v15, v7, v8
-; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v13, v1
-; GISEL-NEXT:    v_mul_hi_u32 v13, v6, v8
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v15, v0
-; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v13
-; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v15, v13
-; GISEL-NEXT:    v_mul_hi_u32 v8, v7, v8
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v1
-; GISEL-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v13, v1
-; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v8, v1
-; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v6, v0
-; GISEL-NEXT:    v_addc_u32_e32 v13, vcc, v7, v1, vcc
-; GISEL-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], s6, v8, 0
-; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v12
-; GISEL-NEXT:    v_cndmask_b32_e32 v9, v10, v9, vcc
-; GISEL-NEXT:    v_mad_u64_u32 v[6:7], s[4:5], s6, v13, v[1:2]
-; GISEL-NEXT:    v_xor_b32_e32 v1, v9, v4
-; GISEL-NEXT:    v_ashrrev_i32_e32 v9, 31, v3
-; GISEL-NEXT:    v_mad_u64_u32 v[6:7], s[4:5], s7, v8, v[6:7]
-; GISEL-NEXT:    v_cndmask_b32_e32 v10, v11, v14, vcc
-; GISEL-NEXT:    v_add_i32_e32 v2, vcc, v2, v9
-; GISEL-NEXT:    v_addc_u32_e32 v3, vcc, v3, v9, vcc
-; GISEL-NEXT:    v_xor_b32_e32 v11, v2, v9
-; GISEL-NEXT:    v_mul_lo_u32 v2, v13, v0
-; GISEL-NEXT:    v_mul_lo_u32 v7, v8, v6
-; GISEL-NEXT:    v_xor_b32_e32 v12, v3, v9
-; GISEL-NEXT:    v_mul_hi_u32 v3, v8, v0
-; GISEL-NEXT:    v_mul_hi_u32 v0, v13, v0
-; GISEL-NEXT:    v_add_i32_e32 v2, vcc, v2, v7
-; GISEL-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v2, vcc, v2, v3
-; GISEL-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; GISEL-NEXT:    v_mul_lo_u32 v3, v13, v6
-; GISEL-NEXT:    v_add_i32_e32 v2, vcc, v7, v2
-; GISEL-NEXT:    v_mul_hi_u32 v7, v8, v6
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v3, v0
-; GISEL-NEXT:    v_cndmask_b32_e64 v3, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v7
-; GISEL-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v3, vcc, v3, v7
-; GISEL-NEXT:    v_mul_hi_u32 v6, v13, v6
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
-; GISEL-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v2, vcc, v3, v2
-; GISEL-NEXT:    v_add_i32_e32 v2, vcc, v6, v2
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v8, v0
-; GISEL-NEXT:    v_addc_u32_e32 v2, vcc, v13, v2, vcc
-; GISEL-NEXT:    v_mul_lo_u32 v3, v12, v0
-; GISEL-NEXT:    v_mul_lo_u32 v6, v11, v2
-; GISEL-NEXT:    v_mul_hi_u32 v7, v11, v0
-; GISEL-NEXT:    v_mul_hi_u32 v0, v12, v0
-; GISEL-NEXT:    v_xor_b32_e32 v8, v10, v4
-; GISEL-NEXT:    v_add_i32_e32 v3, vcc, v3, v6
-; GISEL-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v3, vcc, v3, v7
-; GISEL-NEXT:    v_cndmask_b32_e64 v3, 0, 1, vcc
-; GISEL-NEXT:    v_mul_lo_u32 v7, v12, v2
-; GISEL-NEXT:    v_add_i32_e32 v3, vcc, v6, v3
-; GISEL-NEXT:    v_mul_hi_u32 v6, v11, v2
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v7, v0
-; GISEL-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v6
-; GISEL-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v7, v6
-; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v0, v3
-; GISEL-NEXT:    v_mul_hi_u32 v7, v12, v2
-; GISEL-NEXT:    v_mad_u64_u32 v[2:3], s[4:5], v5, v10, 0
-; GISEL-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v6, v0
-; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v7, v0
-; GISEL-NEXT:    v_mov_b32_e32 v0, v3
-; GISEL-NEXT:    v_mad_u64_u32 v[6:7], s[4:5], v5, v6, v[0:1]
-; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v1, v4
-; GISEL-NEXT:    v_subb_u32_e32 v1, vcc, v8, v4, vcc
-; GISEL-NEXT:    v_mad_u64_u32 v[3:4], s[4:5], 0, v10, v[6:7]
-; GISEL-NEXT:    v_sub_i32_e32 v2, vcc, v11, v2
-; GISEL-NEXT:    v_subb_u32_e64 v4, s[4:5], v12, v3, vcc
-; GISEL-NEXT:    v_sub_i32_e64 v3, s[4:5], v12, v3
-; GISEL-NEXT:    v_subbrev_u32_e32 v3, vcc, 0, v3, vcc
-; GISEL-NEXT:    v_sub_i32_e32 v7, vcc, v2, v5
-; GISEL-NEXT:    v_subbrev_u32_e32 v3, vcc, 0, v3, vcc
-; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v7, v5
-; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, -1, vcc
-; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v3
-; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v2, v5
-; GISEL-NEXT:    v_cndmask_b32_e32 v8, -1, v8, vcc
-; GISEL-NEXT:    v_sub_i32_e32 v5, vcc, v7, v5
-; GISEL-NEXT:    v_cndmask_b32_e64 v6, 0, -1, s[4:5]
-; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v4
-; GISEL-NEXT:    v_subbrev_u32_e32 v10, vcc, 0, v3, vcc
-; GISEL-NEXT:    v_cndmask_b32_e64 v6, -1, v6, s[4:5]
-; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v8
-; GISEL-NEXT:    v_cndmask_b32_e32 v5, v7, v5, vcc
-; GISEL-NEXT:    v_cndmask_b32_e32 v3, v3, v10, vcc
-; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v6
-; GISEL-NEXT:    v_cndmask_b32_e32 v2, v2, v5, vcc
-; GISEL-NEXT:    v_cndmask_b32_e32 v3, v4, v3, vcc
-; GISEL-NEXT:    v_xor_b32_e32 v2, v2, v9
-; GISEL-NEXT:    v_xor_b32_e32 v3, v3, v9
-; GISEL-NEXT:    v_sub_i32_e32 v2, vcc, v2, v9
-; GISEL-NEXT:    v_subb_u32_e32 v3, vcc, v3, v9, vcc
-; GISEL-NEXT:    s_setpc_b64 s[30:31]
-;
-; CGP-LABEL: v_srem_v2i64_pow2k_denom:
-; CGP:       ; %bb.0:
-; CGP-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CGP-NEXT:    v_cvt_f32_u32_e32 v4, 0x1000
-; CGP-NEXT:    v_cvt_f32_ubyte0_e32 v5, 0
-; CGP-NEXT:    v_mov_b32_e32 v6, 0xfffff000
-; CGP-NEXT:    v_mac_f32_e32 v4, 0x4f800000, v5
-; CGP-NEXT:    v_rcp_iflag_f32_e32 v4, v4
-; CGP-NEXT:    v_mul_f32_e32 v4, 0x5f7ffffc, v4
-; CGP-NEXT:    v_mul_f32_e32 v5, 0x2f800000, v4
-; CGP-NEXT:    v_trunc_f32_e32 v7, v5
-; CGP-NEXT:    v_mac_f32_e32 v4, 0xcf800000, v7
-; CGP-NEXT:    v_cvt_u32_f32_e32 v8, v4
-; CGP-NEXT:    v_cvt_u32_f32_e32 v9, v7
-; CGP-NEXT:    v_mad_u64_u32 v[4:5], s[4:5], v6, v8, 0
-; CGP-NEXT:    v_mov_b32_e32 v7, v5
-; CGP-NEXT:    v_mad_u64_u32 v[10:11], s[4:5], v6, v9, v[7:8]
-; CGP-NEXT:    v_mul_hi_u32 v12, v9, v4
-; CGP-NEXT:    v_mad_u64_u32 v[13:14], s[4:5], -1, v8, v[10:11]
-; CGP-NEXT:    v_mul_lo_u32 v10, v9, v4
-; CGP-NEXT:    v_mul_hi_u32 v11, v8, v4
-; CGP-NEXT:    v_mul_lo_u32 v4, v8, v13
-; CGP-NEXT:    v_mul_lo_u32 v7, v9, v13
-; CGP-NEXT:    v_mul_hi_u32 v14, v8, v13
-; CGP-NEXT:    v_mul_hi_u32 v13, v9, v13
-; CGP-NEXT:    v_add_i32_e32 v4, vcc, v10, v4
-; CGP-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v11
-; CGP-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v4, vcc, v15, v4
-; CGP-NEXT:    v_add_i32_e32 v7, vcc, v7, v12
-; CGP-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v7, vcc, v7, v14
-; CGP-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v14, vcc, v15, v14
-; CGP-NEXT:    v_add_i32_e32 v4, vcc, v7, v4
-; CGP-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v7, vcc, v14, v7
-; CGP-NEXT:    v_add_i32_e32 v7, vcc, v13, v7
-; CGP-NEXT:    v_add_i32_e32 v16, vcc, v8, v4
-; CGP-NEXT:    v_mad_u64_u32 v[13:14], s[4:5], v6, v16, 0
-; CGP-NEXT:    v_addc_u32_e32 v17, vcc, v9, v7, vcc
-; CGP-NEXT:    v_mov_b32_e32 v4, v14
-; CGP-NEXT:    v_mad_u64_u32 v[14:15], s[4:5], v6, v17, v[4:5]
-; CGP-NEXT:    v_ashrrev_i32_e32 v7, 31, v1
-; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v7
-; CGP-NEXT:    v_mad_u64_u32 v[14:15], s[4:5], -1, v16, v[14:15]
-; CGP-NEXT:    v_addc_u32_e32 v1, vcc, v1, v7, vcc
-; CGP-NEXT:    v_xor_b32_e32 v15, v0, v7
-; CGP-NEXT:    v_mul_lo_u32 v0, v17, v13
-; CGP-NEXT:    v_mul_lo_u32 v4, v16, v14
-; CGP-NEXT:    v_xor_b32_e32 v18, v1, v7
-; CGP-NEXT:    v_mul_hi_u32 v1, v16, v13
-; CGP-NEXT:    v_mul_hi_u32 v13, v17, v13
-; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v4
-; CGP-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v1
-; CGP-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
-; CGP-NEXT:    v_mul_lo_u32 v1, v17, v14
-; CGP-NEXT:    v_add_i32_e32 v0, vcc, v4, v0
-; CGP-NEXT:    v_mul_hi_u32 v4, v16, v14
-; CGP-NEXT:    v_add_i32_e32 v1, vcc, v1, v13
-; CGP-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v1, vcc, v1, v4
-; CGP-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v4, vcc, v13, v4
-; CGP-NEXT:    v_mul_hi_u32 v13, v17, v14
-; CGP-NEXT:    v_add_i32_e32 v0, vcc, v1, v0
-; CGP-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v1, vcc, v4, v1
-; CGP-NEXT:    v_add_i32_e32 v1, vcc, v13, v1
-; CGP-NEXT:    v_add_i32_e32 v0, vcc, v16, v0
-; CGP-NEXT:    v_addc_u32_e32 v1, vcc, v17, v1, vcc
-; CGP-NEXT:    v_mul_lo_u32 v13, v18, v0
-; CGP-NEXT:    v_mul_lo_u32 v14, v15, v1
-; CGP-NEXT:    v_mul_hi_u32 v16, v15, v0
-; CGP-NEXT:    v_mul_hi_u32 v0, v18, v0
-; CGP-NEXT:    v_mov_b32_e32 v4, 0x1000
-; CGP-NEXT:    v_add_i32_e32 v13, vcc, v13, v14
-; CGP-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v13, vcc, v13, v16
-; CGP-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
-; CGP-NEXT:    v_mul_lo_u32 v16, v18, v1
-; CGP-NEXT:    v_add_i32_e32 v13, vcc, v14, v13
-; CGP-NEXT:    v_mul_hi_u32 v14, v15, v1
-; CGP-NEXT:    v_add_i32_e32 v0, vcc, v16, v0
-; CGP-NEXT:    v_cndmask_b32_e64 v16, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v14
-; CGP-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v14, vcc, v16, v14
-; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v13
-; CGP-NEXT:    v_mul_hi_u32 v16, v18, v1
-; CGP-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], v4, v0, 0
-; CGP-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v13, vcc, v14, v13
-; CGP-NEXT:    v_add_i32_e32 v13, vcc, v16, v13
-; CGP-NEXT:    v_mad_u64_u32 v[13:14], s[4:5], v4, v13, v[1:2]
-; CGP-NEXT:    v_sub_i32_e32 v14, vcc, v15, v0
-; CGP-NEXT:    v_sub_i32_e64 v0, s[4:5], v18, v13
-; CGP-NEXT:    v_subb_u32_e64 v15, s[4:5], v18, v13, vcc
-; CGP-NEXT:    v_subbrev_u32_e32 v0, vcc, 0, v0, vcc
-; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v14, v4
-; CGP-NEXT:    v_sub_i32_e32 v16, vcc, v14, v4
-; CGP-NEXT:    v_cndmask_b32_e64 v1, 0, -1, s[4:5]
-; CGP-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v15
-; CGP-NEXT:    v_subbrev_u32_e32 v17, vcc, 0, v0, vcc
-; CGP-NEXT:    v_mov_b32_e32 v0, v5
-; CGP-NEXT:    v_cndmask_b32_e64 v13, -1, v1, s[4:5]
-; CGP-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], v6, v9, v[0:1]
-; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v16, v4
-; CGP-NEXT:    v_cndmask_b32_e64 v18, 0, -1, vcc
-; CGP-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], -1, v8, v[0:1]
-; CGP-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v17
-; CGP-NEXT:    v_cndmask_b32_e32 v5, -1, v18, vcc
-; CGP-NEXT:    v_mul_lo_u32 v19, v8, v0
-; CGP-NEXT:    v_sub_i32_e32 v1, vcc, v16, v4
-; CGP-NEXT:    v_subbrev_u32_e32 v18, vcc, 0, v17, vcc
-; CGP-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v5
-; CGP-NEXT:    v_cndmask_b32_e32 v5, v16, v1, vcc
-; CGP-NEXT:    v_cndmask_b32_e32 v16, v17, v18, vcc
-; CGP-NEXT:    v_add_i32_e32 v1, vcc, v10, v19
-; CGP-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v1, vcc, v1, v11
-; CGP-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
-; CGP-NEXT:    v_mul_lo_u32 v11, v9, v0
-; CGP-NEXT:    v_add_i32_e32 v1, vcc, v10, v1
-; CGP-NEXT:    v_mul_hi_u32 v10, v8, v0
-; CGP-NEXT:    v_add_i32_e32 v11, vcc, v11, v12
-; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v10, vcc, v11, v10
-; CGP-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v11, vcc, v12, v11
-; CGP-NEXT:    v_mul_hi_u32 v0, v9, v0
-; CGP-NEXT:    v_add_i32_e32 v1, vcc, v10, v1
-; CGP-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v10, vcc, v11, v10
-; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v10
-; CGP-NEXT:    v_add_i32_e32 v8, vcc, v8, v1
-; CGP-NEXT:    v_addc_u32_e32 v9, vcc, v9, v0, vcc
-; CGP-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], v6, v8, 0
-; CGP-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v13
-; CGP-NEXT:    v_cndmask_b32_e32 v5, v14, v5, vcc
-; CGP-NEXT:    v_xor_b32_e32 v11, v5, v7
-; CGP-NEXT:    v_mad_u64_u32 v[5:6], s[4:5], v6, v9, v[1:2]
-; CGP-NEXT:    v_cndmask_b32_e32 v10, v15, v16, vcc
-; CGP-NEXT:    v_xor_b32_e32 v1, v10, v7
-; CGP-NEXT:    v_mad_u64_u32 v[5:6], s[4:5], -1, v8, v[5:6]
-; CGP-NEXT:    v_ashrrev_i32_e32 v10, 31, v3
-; CGP-NEXT:    v_add_i32_e32 v2, vcc, v2, v10
-; CGP-NEXT:    v_addc_u32_e32 v3, vcc, v3, v10, vcc
-; CGP-NEXT:    v_xor_b32_e32 v12, v2, v10
-; CGP-NEXT:    v_mul_lo_u32 v2, v9, v0
-; CGP-NEXT:    v_mul_lo_u32 v6, v8, v5
-; CGP-NEXT:    v_xor_b32_e32 v13, v3, v10
-; CGP-NEXT:    v_mul_hi_u32 v3, v8, v0
-; CGP-NEXT:    v_mul_hi_u32 v0, v9, v0
-; CGP-NEXT:    v_add_i32_e32 v2, vcc, v2, v6
-; CGP-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v2, vcc, v2, v3
-; CGP-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; CGP-NEXT:    v_mul_lo_u32 v3, v9, v5
-; CGP-NEXT:    v_add_i32_e32 v2, vcc, v6, v2
-; CGP-NEXT:    v_mul_hi_u32 v6, v8, v5
-; CGP-NEXT:    v_add_i32_e32 v0, vcc, v3, v0
-; CGP-NEXT:    v_cndmask_b32_e64 v3, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v6
-; CGP-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v3, vcc, v3, v6
-; CGP-NEXT:    v_mul_hi_u32 v5, v9, v5
-; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
-; CGP-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v2, vcc, v3, v2
-; CGP-NEXT:    v_add_i32_e32 v2, vcc, v5, v2
-; CGP-NEXT:    v_add_i32_e32 v3, vcc, v8, v0
-; CGP-NEXT:    v_addc_u32_e32 v2, vcc, v9, v2, vcc
-; CGP-NEXT:    v_mul_lo_u32 v5, v13, v3
-; CGP-NEXT:    v_mul_lo_u32 v6, v12, v2
-; CGP-NEXT:    v_sub_i32_e32 v0, vcc, v11, v7
-; CGP-NEXT:    v_subb_u32_e32 v1, vcc, v1, v7, vcc
-; CGP-NEXT:    v_mul_hi_u32 v7, v12, v3
-; CGP-NEXT:    v_add_i32_e32 v5, vcc, v5, v6
-; CGP-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v5, vcc, v5, v7
-; CGP-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
-; CGP-NEXT:    v_mul_lo_u32 v7, v13, v2
-; CGP-NEXT:    v_mul_hi_u32 v3, v13, v3
-; CGP-NEXT:    v_add_i32_e32 v5, vcc, v6, v5
-; CGP-NEXT:    v_mul_hi_u32 v6, v12, v2
-; CGP-NEXT:    v_add_i32_e32 v3, vcc, v7, v3
-; CGP-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v3, vcc, v3, v6
-; CGP-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v6, vcc, v7, v6
-; CGP-NEXT:    v_add_i32_e32 v3, vcc, v3, v5
-; CGP-NEXT:    v_mul_hi_u32 v7, v13, v2
-; CGP-NEXT:    v_mad_u64_u32 v[2:3], s[4:5], v4, v3, 0
-; CGP-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v5, vcc, v6, v5
-; CGP-NEXT:    v_add_i32_e32 v5, vcc, v7, v5
-; CGP-NEXT:    v_mad_u64_u32 v[5:6], s[4:5], v4, v5, v[3:4]
-; CGP-NEXT:    v_sub_i32_e32 v2, vcc, v12, v2
-; CGP-NEXT:    v_subb_u32_e64 v3, s[4:5], v13, v5, vcc
-; CGP-NEXT:    v_sub_i32_e64 v5, s[4:5], v13, v5
-; CGP-NEXT:    v_subbrev_u32_e32 v5, vcc, 0, v5, vcc
-; CGP-NEXT:    v_sub_i32_e32 v7, vcc, v2, v4
-; CGP-NEXT:    v_subbrev_u32_e32 v5, vcc, 0, v5, vcc
-; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v7, v4
-; CGP-NEXT:    v_cndmask_b32_e64 v8, 0, -1, vcc
-; CGP-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v5
-; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v2, v4
-; CGP-NEXT:    v_cndmask_b32_e32 v8, -1, v8, vcc
-; CGP-NEXT:    v_sub_i32_e32 v4, vcc, v7, v4
-; CGP-NEXT:    v_cndmask_b32_e64 v6, 0, -1, s[4:5]
-; CGP-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v3
-; CGP-NEXT:    v_subbrev_u32_e32 v9, vcc, 0, v5, vcc
-; CGP-NEXT:    v_cndmask_b32_e64 v6, -1, v6, s[4:5]
-; CGP-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v8
-; CGP-NEXT:    v_cndmask_b32_e32 v4, v7, v4, vcc
-; CGP-NEXT:    v_cndmask_b32_e32 v5, v5, v9, vcc
-; CGP-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v6
-; CGP-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
-; CGP-NEXT:    v_cndmask_b32_e32 v3, v3, v5, vcc
-; CGP-NEXT:    v_xor_b32_e32 v2, v2, v10
-; CGP-NEXT:    v_xor_b32_e32 v3, v3, v10
-; CGP-NEXT:    v_sub_i32_e32 v2, vcc, v2, v10
-; CGP-NEXT:    v_subb_u32_e32 v3, vcc, v3, v10, vcc
-; CGP-NEXT:    s_setpc_b64 s[30:31]
+; CHECK-LABEL: v_srem_v2i64_pow2k_denom:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT:    v_cvt_f32_u32_e32 v4, 0x1000
+; CHECK-NEXT:    v_cvt_f32_ubyte0_e32 v5, 0
+; CHECK-NEXT:    s_sub_u32 s6, 0, 0x1000
+; CHECK-NEXT:    s_subb_u32 s7, 0, 0
+; CHECK-NEXT:    v_mac_f32_e32 v4, 0x4f800000, v5
+; CHECK-NEXT:    v_rcp_iflag_f32_e32 v4, v4
+; CHECK-NEXT:    v_mul_f32_e32 v4, 0x5f7ffffc, v4
+; CHECK-NEXT:    v_mul_f32_e32 v5, 0x2f800000, v4
+; CHECK-NEXT:    v_trunc_f32_e32 v7, v5
+; CHECK-NEXT:    v_mac_f32_e32 v4, 0xcf800000, v7
+; CHECK-NEXT:    v_cvt_u32_f32_e32 v6, v4
+; CHECK-NEXT:    v_cvt_u32_f32_e32 v7, v7
+; CHECK-NEXT:    v_mad_u64_u32 v[4:5], s[4:5], s6, v6, 0
+; CHECK-NEXT:    v_mad_u64_u32 v[8:9], s[4:5], s6, v7, v[5:6]
+; CHECK-NEXT:    v_mul_lo_u32 v5, v7, v4
+; CHECK-NEXT:    v_mad_u64_u32 v[8:9], s[4:5], s7, v6, v[8:9]
+; CHECK-NEXT:    v_mul_hi_u32 v9, v6, v4
+; CHECK-NEXT:    v_mul_hi_u32 v4, v7, v4
+; CHECK-NEXT:    v_mul_lo_u32 v10, v6, v8
+; CHECK-NEXT:    v_mul_lo_u32 v11, v7, v8
+; CHECK-NEXT:    v_mul_hi_u32 v12, v6, v8
+; CHECK-NEXT:    v_mul_hi_u32 v8, v7, v8
+; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v5, v10
+; CHECK-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v5, v9
+; CHECK-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v10, v5
+; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v11, v4
+; CHECK-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v4, v12
+; CHECK-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v9, vcc, v9, v10
+; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v4, v5
+; CHECK-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v9, v5
+; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v8, v5
+; CHECK-NEXT:    v_add_i32_e32 v11, vcc, v6, v4
+; CHECK-NEXT:    v_mad_u64_u32 v[8:9], s[4:5], s6, v11, 0
+; CHECK-NEXT:    v_addc_u32_e32 v5, vcc, v7, v5, vcc
+; CHECK-NEXT:    v_mov_b32_e32 v4, v9
+; CHECK-NEXT:    v_mad_u64_u32 v[9:10], s[4:5], s6, v5, v[4:5]
+; CHECK-NEXT:    v_ashrrev_i32_e32 v4, 31, v1
+; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v4
+; CHECK-NEXT:    v_mad_u64_u32 v[9:10], s[4:5], s7, v11, v[9:10]
+; CHECK-NEXT:    v_addc_u32_e32 v1, vcc, v1, v4, vcc
+; CHECK-NEXT:    v_xor_b32_e32 v10, v0, v4
+; CHECK-NEXT:    v_mul_lo_u32 v0, v5, v8
+; CHECK-NEXT:    v_mul_lo_u32 v12, v11, v9
+; CHECK-NEXT:    v_xor_b32_e32 v13, v1, v4
+; CHECK-NEXT:    v_mul_hi_u32 v1, v11, v8
+; CHECK-NEXT:    v_mul_hi_u32 v8, v5, v8
+; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v12
+; CHECK-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v1
+; CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
+; CHECK-NEXT:    v_mul_lo_u32 v1, v5, v9
+; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v12, v0
+; CHECK-NEXT:    v_mul_hi_u32 v12, v11, v9
+; CHECK-NEXT:    v_add_i32_e32 v1, vcc, v1, v8
+; CHECK-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v1, vcc, v1, v12
+; CHECK-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v8, vcc, v8, v12
+; CHECK-NEXT:    v_mul_hi_u32 v9, v5, v9
+; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v1, v0
+; CHECK-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v1, vcc, v8, v1
+; CHECK-NEXT:    v_add_i32_e32 v1, vcc, v9, v1
+; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v11, v0
+; CHECK-NEXT:    v_addc_u32_e32 v1, vcc, v5, v1, vcc
+; CHECK-NEXT:    v_mul_lo_u32 v8, v13, v0
+; CHECK-NEXT:    v_mul_lo_u32 v9, v10, v1
+; CHECK-NEXT:    v_mul_hi_u32 v11, v10, v0
+; CHECK-NEXT:    v_mul_hi_u32 v0, v13, v0
+; CHECK-NEXT:    v_mov_b32_e32 v5, 0x1000
+; CHECK-NEXT:    v_add_i32_e32 v8, vcc, v8, v9
+; CHECK-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v8, vcc, v8, v11
+; CHECK-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
+; CHECK-NEXT:    v_mul_lo_u32 v11, v13, v1
+; CHECK-NEXT:    v_add_i32_e32 v8, vcc, v9, v8
+; CHECK-NEXT:    v_mul_hi_u32 v9, v10, v1
+; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v11, v0
+; CHECK-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v9
+; CHECK-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v9, vcc, v11, v9
+; CHECK-NEXT:    v_add_i32_e32 v11, vcc, v0, v8
+; CHECK-NEXT:    v_mul_hi_u32 v12, v13, v1
+; CHECK-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], v5, v11, 0
+; CHECK-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v8, vcc, v9, v8
+; CHECK-NEXT:    v_add_i32_e32 v8, vcc, v12, v8
+; CHECK-NEXT:    v_mad_u64_u32 v[8:9], s[4:5], v5, v8, v[1:2]
+; CHECK-NEXT:    v_sub_i32_e32 v10, vcc, v10, v0
+; CHECK-NEXT:    v_mad_u64_u32 v[8:9], s[4:5], 0, v11, v[8:9]
+; CHECK-NEXT:    s_sub_u32 s6, 0, 0x1000
+; CHECK-NEXT:    s_subb_u32 s7, 0, 0
+; CHECK-NEXT:    v_subb_u32_e64 v11, s[4:5], v13, v8, vcc
+; CHECK-NEXT:    v_sub_i32_e64 v0, s[4:5], v13, v8
+; CHECK-NEXT:    v_cmp_ge_u32_e64 s[4:5], v10, v5
+; CHECK-NEXT:    v_subbrev_u32_e32 v0, vcc, 0, v0, vcc
+; CHECK-NEXT:    v_cndmask_b32_e64 v1, 0, -1, s[4:5]
+; CHECK-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v11
+; CHECK-NEXT:    v_sub_i32_e32 v13, vcc, v10, v5
+; CHECK-NEXT:    v_cndmask_b32_e64 v12, -1, v1, s[4:5]
+; CHECK-NEXT:    v_subbrev_u32_e32 v14, vcc, 0, v0, vcc
+; CHECK-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], s6, v6, 0
+; CHECK-NEXT:    v_cmp_ge_u32_e32 vcc, v13, v5
+; CHECK-NEXT:    v_cndmask_b32_e64 v8, 0, -1, vcc
+; CHECK-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v14
+; CHECK-NEXT:    v_cndmask_b32_e32 v15, -1, v8, vcc
+; CHECK-NEXT:    v_mad_u64_u32 v[8:9], s[4:5], s6, v7, v[1:2]
+; CHECK-NEXT:    v_sub_i32_e32 v1, vcc, v13, v5
+; CHECK-NEXT:    v_mad_u64_u32 v[8:9], s[4:5], s7, v6, v[8:9]
+; CHECK-NEXT:    v_subbrev_u32_e32 v16, vcc, 0, v14, vcc
+; CHECK-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v15
+; CHECK-NEXT:    v_cndmask_b32_e32 v9, v13, v1, vcc
+; CHECK-NEXT:    v_mul_lo_u32 v1, v7, v0
+; CHECK-NEXT:    v_mul_lo_u32 v13, v6, v8
+; CHECK-NEXT:    v_mul_hi_u32 v15, v6, v0
+; CHECK-NEXT:    v_cndmask_b32_e32 v14, v14, v16, vcc
+; CHECK-NEXT:    v_mul_hi_u32 v0, v7, v0
+; CHECK-NEXT:    v_add_i32_e32 v1, vcc, v1, v13
+; CHECK-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v1, vcc, v1, v15
+; CHECK-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
+; CHECK-NEXT:    v_mul_lo_u32 v15, v7, v8
+; CHECK-NEXT:    v_add_i32_e32 v1, vcc, v13, v1
+; CHECK-NEXT:    v_mul_hi_u32 v13, v6, v8
+; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v15, v0
+; CHECK-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v13
+; CHECK-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v13, vcc, v15, v13
+; CHECK-NEXT:    v_mul_hi_u32 v8, v7, v8
+; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v1
+; CHECK-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v1, vcc, v13, v1
+; CHECK-NEXT:    v_add_i32_e32 v1, vcc, v8, v1
+; CHECK-NEXT:    v_add_i32_e32 v8, vcc, v6, v0
+; CHECK-NEXT:    v_addc_u32_e32 v13, vcc, v7, v1, vcc
+; CHECK-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], s6, v8, 0
+; CHECK-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v12
+; CHECK-NEXT:    v_cndmask_b32_e32 v9, v10, v9, vcc
+; CHECK-NEXT:    v_mad_u64_u32 v[6:7], s[4:5], s6, v13, v[1:2]
+; CHECK-NEXT:    v_xor_b32_e32 v1, v9, v4
+; CHECK-NEXT:    v_ashrrev_i32_e32 v9, 31, v3
+; CHECK-NEXT:    v_mad_u64_u32 v[6:7], s[4:5], s7, v8, v[6:7]
+; CHECK-NEXT:    v_cndmask_b32_e32 v10, v11, v14, vcc
+; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v9
+; CHECK-NEXT:    v_addc_u32_e32 v3, vcc, v3, v9, vcc
+; CHECK-NEXT:    v_xor_b32_e32 v11, v2, v9
+; CHECK-NEXT:    v_mul_lo_u32 v2, v13, v0
+; CHECK-NEXT:    v_mul_lo_u32 v7, v8, v6
+; CHECK-NEXT:    v_xor_b32_e32 v12, v3, v9
+; CHECK-NEXT:    v_mul_hi_u32 v3, v8, v0
+; CHECK-NEXT:    v_mul_hi_u32 v0, v13, v0
+; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v7
+; CHECK-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v3
+; CHECK-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
+; CHECK-NEXT:    v_mul_lo_u32 v3, v13, v6
+; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v7, v2
+; CHECK-NEXT:    v_mul_hi_u32 v7, v8, v6
+; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v3, v0
+; CHECK-NEXT:    v_cndmask_b32_e64 v3, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v7
+; CHECK-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v3, vcc, v3, v7
+; CHECK-NEXT:    v_mul_hi_u32 v6, v13, v6
+; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
+; CHECK-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v3, v2
+; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v6, v2
+; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v8, v0
+; CHECK-NEXT:    v_addc_u32_e32 v2, vcc, v13, v2, vcc
+; CHECK-NEXT:    v_mul_lo_u32 v3, v12, v0
+; CHECK-NEXT:    v_mul_lo_u32 v6, v11, v2
+; CHECK-NEXT:    v_mul_hi_u32 v7, v11, v0
+; CHECK-NEXT:    v_mul_hi_u32 v0, v12, v0
+; CHECK-NEXT:    v_xor_b32_e32 v8, v10, v4
+; CHECK-NEXT:    v_add_i32_e32 v3, vcc, v3, v6
+; CHECK-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v3, vcc, v3, v7
+; CHECK-NEXT:    v_cndmask_b32_e64 v3, 0, 1, vcc
+; CHECK-NEXT:    v_mul_lo_u32 v7, v12, v2
+; CHECK-NEXT:    v_add_i32_e32 v3, vcc, v6, v3
+; CHECK-NEXT:    v_mul_hi_u32 v6, v11, v2
+; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v7, v0
+; CHECK-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v6
+; CHECK-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v7, v6
+; CHECK-NEXT:    v_add_i32_e32 v10, vcc, v0, v3
+; CHECK-NEXT:    v_mul_hi_u32 v7, v12, v2
+; CHECK-NEXT:    v_mad_u64_u32 v[2:3], s[4:5], v5, v10, 0
+; CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v6, v0
+; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v7, v0
+; CHECK-NEXT:    v_mov_b32_e32 v0, v3
+; CHECK-NEXT:    v_mad_u64_u32 v[6:7], s[4:5], v5, v6, v[0:1]
+; CHECK-NEXT:    v_sub_i32_e32 v0, vcc, v1, v4
+; CHECK-NEXT:    v_subb_u32_e32 v1, vcc, v8, v4, vcc
+; CHECK-NEXT:    v_mad_u64_u32 v[3:4], s[4:5], 0, v10, v[6:7]
+; CHECK-NEXT:    v_sub_i32_e32 v2, vcc, v11, v2
+; CHECK-NEXT:    v_subb_u32_e64 v4, s[4:5], v12, v3, vcc
+; CHECK-NEXT:    v_sub_i32_e64 v3, s[4:5], v12, v3
+; CHECK-NEXT:    v_subbrev_u32_e32 v3, vcc, 0, v3, vcc
+; CHECK-NEXT:    v_sub_i32_e32 v7, vcc, v2, v5
+; CHECK-NEXT:    v_subbrev_u32_e32 v3, vcc, 0, v3, vcc
+; CHECK-NEXT:    v_cmp_ge_u32_e32 vcc, v7, v5
+; CHECK-NEXT:    v_cndmask_b32_e64 v8, 0, -1, vcc
+; CHECK-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v3
+; CHECK-NEXT:    v_cmp_ge_u32_e64 s[4:5], v2, v5
+; CHECK-NEXT:    v_cndmask_b32_e32 v8, -1, v8, vcc
+; CHECK-NEXT:    v_sub_i32_e32 v5, vcc, v7, v5
+; CHECK-NEXT:    v_cndmask_b32_e64 v6, 0, -1, s[4:5]
+; CHECK-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v4
+; CHECK-NEXT:    v_subbrev_u32_e32 v10, vcc, 0, v3, vcc
+; CHECK-NEXT:    v_cndmask_b32_e64 v6, -1, v6, s[4:5]
+; CHECK-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v8
+; CHECK-NEXT:    v_cndmask_b32_e32 v5, v7, v5, vcc
+; CHECK-NEXT:    v_cndmask_b32_e32 v3, v3, v10, vcc
+; CHECK-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v6
+; CHECK-NEXT:    v_cndmask_b32_e32 v2, v2, v5, vcc
+; CHECK-NEXT:    v_cndmask_b32_e32 v3, v4, v3, vcc
+; CHECK-NEXT:    v_xor_b32_e32 v2, v2, v9
+; CHECK-NEXT:    v_xor_b32_e32 v3, v3, v9
+; CHECK-NEXT:    v_sub_i32_e32 v2, vcc, v2, v9
+; CHECK-NEXT:    v_subb_u32_e32 v3, vcc, v3, v9, vcc
+; CHECK-NEXT:    s_setpc_b64 s[30:31]
   %result = srem <2 x i64> %num, <i64 4096, i64 4096>
   ret <2 x i64> %result
 }
@@ -1696,469 +1468,241 @@ define i64 @v_srem_i64_oddk_denom(i64 %num) {
 }
 
 define <2 x i64> @v_srem_v2i64_oddk_denom(<2 x i64> %num) {
-; GISEL-LABEL: v_srem_v2i64_oddk_denom:
-; GISEL:       ; %bb.0:
-; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GISEL-NEXT:    v_cvt_f32_u32_e32 v4, 0x12d8fb
-; GISEL-NEXT:    v_cvt_f32_ubyte0_e32 v5, 0
-; GISEL-NEXT:    s_sub_u32 s6, 0, 0x12d8fb
-; GISEL-NEXT:    s_subb_u32 s7, 0, 0
-; GISEL-NEXT:    v_mac_f32_e32 v4, 0x4f800000, v5
-; GISEL-NEXT:    v_rcp_iflag_f32_e32 v4, v4
-; GISEL-NEXT:    v_mul_f32_e32 v4, 0x5f7ffffc, v4
-; GISEL-NEXT:    v_mul_f32_e32 v5, 0x2f800000, v4
-; GISEL-NEXT:    v_trunc_f32_e32 v7, v5
-; GISEL-NEXT:    v_mac_f32_e32 v4, 0xcf800000, v7
-; GISEL-NEXT:    v_cvt_u32_f32_e32 v6, v4
-; GISEL-NEXT:    v_cvt_u32_f32_e32 v7, v7
-; GISEL-NEXT:    v_mad_u64_u32 v[4:5], s[4:5], s6, v6, 0
-; GISEL-NEXT:    v_mad_u64_u32 v[8:9], s[4:5], s6, v7, v[5:6]
-; GISEL-NEXT:    v_mul_lo_u32 v5, v7, v4
-; GISEL-NEXT:    v_mad_u64_u32 v[8:9], s[4:5], s7, v6, v[8:9]
-; GISEL-NEXT:    v_mul_hi_u32 v9, v6, v4
-; GISEL-NEXT:    v_mul_hi_u32 v4, v7, v4
-; GISEL-NEXT:    v_mul_lo_u32 v10, v6, v8
-; GISEL-NEXT:    v_mul_lo_u32 v11, v7, v8
-; GISEL-NEXT:    v_mul_hi_u32 v12, v6, v8
-; GISEL-NEXT:    v_mul_hi_u32 v8, v7, v8
-; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v10
-; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v9
-; GISEL-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v10, v5
-; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v11, v4
-; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v12
-; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v9, v10
-; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v5
-; GISEL-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v9, v5
-; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v8, v5
-; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v6, v4
-; GISEL-NEXT:    v_mad_u64_u32 v[8:9], s[4:5], s6, v11, 0
-; GISEL-NEXT:    v_addc_u32_e32 v5, vcc, v7, v5, vcc
-; GISEL-NEXT:    v_mov_b32_e32 v4, v9
-; GISEL-NEXT:    v_mad_u64_u32 v[9:10], s[4:5], s6, v5, v[4:5]
-; GISEL-NEXT:    v_ashrrev_i32_e32 v4, 31, v1
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v4
-; GISEL-NEXT:    v_mad_u64_u32 v[9:10], s[4:5], s7, v11, v[9:10]
-; GISEL-NEXT:    v_addc_u32_e32 v1, vcc, v1, v4, vcc
-; GISEL-NEXT:    v_xor_b32_e32 v10, v0, v4
-; GISEL-NEXT:    v_mul_lo_u32 v0, v5, v8
-; GISEL-NEXT:    v_mul_lo_u32 v12, v11, v9
-; GISEL-NEXT:    v_xor_b32_e32 v13, v1, v4
-; GISEL-NEXT:    v_mul_hi_u32 v1, v11, v8
-; GISEL-NEXT:    v_mul_hi_u32 v8, v5, v8
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v12
-; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v1
-; GISEL-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
-; GISEL-NEXT:    v_mul_lo_u32 v1, v5, v9
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v12, v0
-; GISEL-NEXT:    v_mul_hi_u32 v12, v11, v9
-; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v1, v8
-; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v1, v12
-; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v12
-; GISEL-NEXT:    v_mul_hi_u32 v9, v5, v9
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v1, v0
-; GISEL-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v8, v1
-; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v9, v1
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v11, v0
-; GISEL-NEXT:    v_addc_u32_e32 v1, vcc, v5, v1, vcc
-; GISEL-NEXT:    v_mul_lo_u32 v8, v13, v0
-; GISEL-NEXT:    v_mul_lo_u32 v9, v10, v1
-; GISEL-NEXT:    v_mul_hi_u32 v11, v10, v0
-; GISEL-NEXT:    v_mul_hi_u32 v0, v13, v0
-; GISEL-NEXT:    v_mov_b32_e32 v5, 0x12d8fb
-; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v9
-; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v11
-; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
-; GISEL-NEXT:    v_mul_lo_u32 v11, v13, v1
-; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v9, v8
-; GISEL-NEXT:    v_mul_hi_u32 v9, v10, v1
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v11, v0
-; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v9
-; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v11, v9
-; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v0, v8
-; GISEL-NEXT:    v_mul_hi_u32 v12, v13, v1
-; GISEL-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], v5, v11, 0
-; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v9, v8
-; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v12, v8
-; GISEL-NEXT:    v_mad_u64_u32 v[8:9], s[4:5], v5, v8, v[1:2]
-; GISEL-NEXT:    v_sub_i32_e32 v10, vcc, v10, v0
-; GISEL-NEXT:    v_mad_u64_u32 v[8:9], s[4:5], 0, v11, v[8:9]
-; GISEL-NEXT:    s_sub_u32 s6, 0, 0x12d8fb
-; GISEL-NEXT:    s_subb_u32 s7, 0, 0
-; GISEL-NEXT:    v_subb_u32_e64 v11, s[4:5], v13, v8, vcc
-; GISEL-NEXT:    v_sub_i32_e64 v0, s[4:5], v13, v8
-; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v10, v5
-; GISEL-NEXT:    v_subbrev_u32_e32 v0, vcc, 0, v0, vcc
-; GISEL-NEXT:    v_cndmask_b32_e64 v1, 0, -1, s[4:5]
-; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v11
-; GISEL-NEXT:    v_sub_i32_e32 v13, vcc, v10, v5
-; GISEL-NEXT:    v_cndmask_b32_e64 v12, -1, v1, s[4:5]
-; GISEL-NEXT:    v_subbrev_u32_e32 v14, vcc, 0, v0, vcc
-; GISEL-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], s6, v6, 0
-; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v13, v5
-; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, -1, vcc
-; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v14
-; GISEL-NEXT:    v_cndmask_b32_e32 v15, -1, v8, vcc
-; GISEL-NEXT:    v_mad_u64_u32 v[8:9], s[4:5], s6, v7, v[1:2]
-; GISEL-NEXT:    v_sub_i32_e32 v1, vcc, v13, v5
-; GISEL-NEXT:    v_mad_u64_u32 v[8:9], s[4:5], s7, v6, v[8:9]
-; GISEL-NEXT:    v_subbrev_u32_e32 v16, vcc, 0, v14, vcc
-; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v15
-; GISEL-NEXT:    v_cndmask_b32_e32 v9, v13, v1, vcc
-; GISEL-NEXT:    v_mul_lo_u32 v1, v7, v0
-; GISEL-NEXT:    v_mul_lo_u32 v13, v6, v8
-; GISEL-NEXT:    v_mul_hi_u32 v15, v6, v0
-; GISEL-NEXT:    v_cndmask_b32_e32 v14, v14, v16, vcc
-; GISEL-NEXT:    v_mul_hi_u32 v0, v7, v0
-; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v1, v13
-; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v1, v15
-; GISEL-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
-; GISEL-NEXT:    v_mul_lo_u32 v15, v7, v8
-; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v13, v1
-; GISEL-NEXT:    v_mul_hi_u32 v13, v6, v8
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v15, v0
-; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v13
-; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v15, v13
-; GISEL-NEXT:    v_mul_hi_u32 v8, v7, v8
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v1
-; GISEL-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v13, v1
-; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v8, v1
-; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v6, v0
-; GISEL-NEXT:    v_addc_u32_e32 v13, vcc, v7, v1, vcc
-; GISEL-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], s6, v8, 0
-; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v12
-; GISEL-NEXT:    v_cndmask_b32_e32 v9, v10, v9, vcc
-; GISEL-NEXT:    v_mad_u64_u32 v[6:7], s[4:5], s6, v13, v[1:2]
-; GISEL-NEXT:    v_xor_b32_e32 v1, v9, v4
-; GISEL-NEXT:    v_ashrrev_i32_e32 v9, 31, v3
-; GISEL-NEXT:    v_mad_u64_u32 v[6:7], s[4:5], s7, v8, v[6:7]
-; GISEL-NEXT:    v_cndmask_b32_e32 v10, v11, v14, vcc
-; GISEL-NEXT:    v_add_i32_e32 v2, vcc, v2, v9
-; GISEL-NEXT:    v_addc_u32_e32 v3, vcc, v3, v9, vcc
-; GISEL-NEXT:    v_xor_b32_e32 v11, v2, v9
-; GISEL-NEXT:    v_mul_lo_u32 v2, v13, v0
-; GISEL-NEXT:    v_mul_lo_u32 v7, v8, v6
-; GISEL-NEXT:    v_xor_b32_e32 v12, v3, v9
-; GISEL-NEXT:    v_mul_hi_u32 v3, v8, v0
-; GISEL-NEXT:    v_mul_hi_u32 v0, v13, v0
-; GISEL-NEXT:    v_add_i32_e32 v2, vcc, v2, v7
-; GISEL-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v2, vcc, v2, v3
-; GISEL-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; GISEL-NEXT:    v_mul_lo_u32 v3, v13, v6
-; GISEL-NEXT:    v_add_i32_e32 v2, vcc, v7, v2
-; GISEL-NEXT:    v_mul_hi_u32 v7, v8, v6
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v3, v0
-; GISEL-NEXT:    v_cndmask_b32_e64 v3, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v7
-; GISEL-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v3, vcc, v3, v7
-; GISEL-NEXT:    v_mul_hi_u32 v6, v13, v6
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
-; GISEL-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v2, vcc, v3, v2
-; GISEL-NEXT:    v_add_i32_e32 v2, vcc, v6, v2
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v8, v0
-; GISEL-NEXT:    v_addc_u32_e32 v2, vcc, v13, v2, vcc
-; GISEL-NEXT:    v_mul_lo_u32 v3, v12, v0
-; GISEL-NEXT:    v_mul_lo_u32 v6, v11, v2
-; GISEL-NEXT:    v_mul_hi_u32 v7, v11, v0
-; GISEL-NEXT:    v_mul_hi_u32 v0, v12, v0
-; GISEL-NEXT:    v_xor_b32_e32 v8, v10, v4
-; GISEL-NEXT:    v_add_i32_e32 v3, vcc, v3, v6
-; GISEL-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v3, vcc, v3, v7
-; GISEL-NEXT:    v_cndmask_b32_e64 v3, 0, 1, vcc
-; GISEL-NEXT:    v_mul_lo_u32 v7, v12, v2
-; GISEL-NEXT:    v_add_i32_e32 v3, vcc, v6, v3
-; GISEL-NEXT:    v_mul_hi_u32 v6, v11, v2
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v7, v0
-; GISEL-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v6
-; GISEL-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v7, v6
-; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v0, v3
-; GISEL-NEXT:    v_mul_hi_u32 v7, v12, v2
-; GISEL-NEXT:    v_mad_u64_u32 v[2:3], s[4:5], v5, v10, 0
-; GISEL-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v6, v0
-; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v7, v0
-; GISEL-NEXT:    v_mov_b32_e32 v0, v3
-; GISEL-NEXT:    v_mad_u64_u32 v[6:7], s[4:5], v5, v6, v[0:1]
-; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v1, v4
-; GISEL-NEXT:    v_subb_u32_e32 v1, vcc, v8, v4, vcc
-; GISEL-NEXT:    v_mad_u64_u32 v[3:4], s[4:5], 0, v10, v[6:7]
-; GISEL-NEXT:    v_sub_i32_e32 v2, vcc, v11, v2
-; GISEL-NEXT:    v_subb_u32_e64 v4, s[4:5], v12, v3, vcc
-; GISEL-NEXT:    v_sub_i32_e64 v3, s[4:5], v12, v3
-; GISEL-NEXT:    v_subbrev_u32_e32 v3, vcc, 0, v3, vcc
-; GISEL-NEXT:    v_sub_i32_e32 v7, vcc, v2, v5
-; GISEL-NEXT:    v_subbrev_u32_e32 v3, vcc, 0, v3, vcc
-; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v7, v5
-; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, -1, vcc
-; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v3
-; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v2, v5
-; GISEL-NEXT:    v_cndmask_b32_e32 v8, -1, v8, vcc
-; GISEL-NEXT:    v_sub_i32_e32 v5, vcc, v7, v5
-; GISEL-NEXT:    v_cndmask_b32_e64 v6, 0, -1, s[4:5]
-; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v4
-; GISEL-NEXT:    v_subbrev_u32_e32 v10, vcc, 0, v3, vcc
-; GISEL-NEXT:    v_cndmask_b32_e64 v6, -1, v6, s[4:5]
-; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v8
-; GISEL-NEXT:    v_cndmask_b32_e32 v5, v7, v5, vcc
-; GISEL-NEXT:    v_cndmask_b32_e32 v3, v3, v10, vcc
-; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v6
-; GISEL-NEXT:    v_cndmask_b32_e32 v2, v2, v5, vcc
-; GISEL-NEXT:    v_cndmask_b32_e32 v3, v4, v3, vcc
-; GISEL-NEXT:    v_xor_b32_e32 v2, v2, v9
-; GISEL-NEXT:    v_xor_b32_e32 v3, v3, v9
-; GISEL-NEXT:    v_sub_i32_e32 v2, vcc, v2, v9
-; GISEL-NEXT:    v_subb_u32_e32 v3, vcc, v3, v9, vcc
-; GISEL-NEXT:    s_setpc_b64 s[30:31]
-;
-; CGP-LABEL: v_srem_v2i64_oddk_denom:
-; CGP:       ; %bb.0:
-; CGP-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CGP-NEXT:    v_cvt_f32_u32_e32 v4, 0x12d8fb
-; CGP-NEXT:    v_cvt_f32_ubyte0_e32 v5, 0
-; CGP-NEXT:    v_mov_b32_e32 v6, 0xffed2705
-; CGP-NEXT:    v_mac_f32_e32 v4, 0x4f800000, v5
-; CGP-NEXT:    v_rcp_iflag_f32_e32 v4, v4
-; CGP-NEXT:    v_mul_f32_e32 v4, 0x5f7ffffc, v4
-; CGP-NEXT:    v_mul_f32_e32 v5, 0x2f800000, v4
-; CGP-NEXT:    v_trunc_f32_e32 v7, v5
-; CGP-NEXT:    v_mac_f32_e32 v4, 0xcf800000, v7
-; CGP-NEXT:    v_cvt_u32_f32_e32 v8, v4
-; CGP-NEXT:    v_cvt_u32_f32_e32 v9, v7
-; CGP-NEXT:    v_mad_u64_u32 v[4:5], s[4:5], v6, v8, 0
-; CGP-NEXT:    v_mov_b32_e32 v7, v5
-; CGP-NEXT:    v_mad_u64_u32 v[10:11], s[4:5], v6, v9, v[7:8]
-; CGP-NEXT:    v_mul_hi_u32 v12, v9, v4
-; CGP-NEXT:    v_mad_u64_u32 v[13:14], s[4:5], -1, v8, v[10:11]
-; CGP-NEXT:    v_mul_lo_u32 v10, v9, v4
-; CGP-NEXT:    v_mul_hi_u32 v11, v8, v4
-; CGP-NEXT:    v_mul_lo_u32 v4, v8, v13
-; CGP-NEXT:    v_mul_lo_u32 v7, v9, v13
-; CGP-NEXT:    v_mul_hi_u32 v14, v8, v13
-; CGP-NEXT:    v_mul_hi_u32 v13, v9, v13
-; CGP-NEXT:    v_add_i32_e32 v4, vcc, v10, v4
-; CGP-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v11
-; CGP-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v4, vcc, v15, v4
-; CGP-NEXT:    v_add_i32_e32 v7, vcc, v7, v12
-; CGP-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v7, vcc, v7, v14
-; CGP-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v14, vcc, v15, v14
-; CGP-NEXT:    v_add_i32_e32 v4, vcc, v7, v4
-; CGP-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v7, vcc, v14, v7
-; CGP-NEXT:    v_add_i32_e32 v7, vcc, v13, v7
-; CGP-NEXT:    v_add_i32_e32 v16, vcc, v8, v4
-; CGP-NEXT:    v_mad_u64_u32 v[13:14], s[4:5], v6, v16, 0
-; CGP-NEXT:    v_addc_u32_e32 v17, vcc, v9, v7, vcc
-; CGP-NEXT:    v_mov_b32_e32 v4, v14
-; CGP-NEXT:    v_mad_u64_u32 v[14:15], s[4:5], v6, v17, v[4:5]
-; CGP-NEXT:    v_ashrrev_i32_e32 v7, 31, v1
-; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v7
-; CGP-NEXT:    v_mad_u64_u32 v[14:15], s[4:5], -1, v16, v[14:15]
-; CGP-NEXT:    v_addc_u32_e32 v1, vcc, v1, v7, vcc
-; CGP-NEXT:    v_xor_b32_e32 v15, v0, v7
-; CGP-NEXT:    v_mul_lo_u32 v0, v17, v13
-; CGP-NEXT:    v_mul_lo_u32 v4, v16, v14
-; CGP-NEXT:    v_xor_b32_e32 v18, v1, v7
-; CGP-NEXT:    v_mul_hi_u32 v1, v16, v13
-; CGP-NEXT:    v_mul_hi_u32 v13, v17, v13
-; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v4
-; CGP-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v1
-; CGP-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
-; CGP-NEXT:    v_mul_lo_u32 v1, v17, v14
-; CGP-NEXT:    v_add_i32_e32 v0, vcc, v4, v0
-; CGP-NEXT:    v_mul_hi_u32 v4, v16, v14
-; CGP-NEXT:    v_add_i32_e32 v1, vcc, v1, v13
-; CGP-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v1, vcc, v1, v4
-; CGP-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v4, vcc, v13, v4
-; CGP-NEXT:    v_mul_hi_u32 v13, v17, v14
-; CGP-NEXT:    v_add_i32_e32 v0, vcc, v1, v0
-; CGP-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v1, vcc, v4, v1
-; CGP-NEXT:    v_add_i32_e32 v1, vcc, v13, v1
-; CGP-NEXT:    v_add_i32_e32 v0, vcc, v16, v0
-; CGP-NEXT:    v_addc_u32_e32 v1, vcc, v17, v1, vcc
-; CGP-NEXT:    v_mul_lo_u32 v13, v18, v0
-; CGP-NEXT:    v_mul_lo_u32 v14, v15, v1
-; CGP-NEXT:    v_mul_hi_u32 v16, v15, v0
-; CGP-NEXT:    v_mul_hi_u32 v0, v18, v0
-; CGP-NEXT:    v_mov_b32_e32 v4, 0x12d8fb
-; CGP-NEXT:    v_add_i32_e32 v13, vcc, v13, v14
-; CGP-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v13, vcc, v13, v16
-; CGP-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
-; CGP-NEXT:    v_mul_lo_u32 v16, v18, v1
-; CGP-NEXT:    v_add_i32_e32 v13, vcc, v14, v13
-; CGP-NEXT:    v_mul_hi_u32 v14, v15, v1
-; CGP-NEXT:    v_add_i32_e32 v0, vcc, v16, v0
-; CGP-NEXT:    v_cndmask_b32_e64 v16, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v14
-; CGP-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v14, vcc, v16, v14
-; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v13
-; CGP-NEXT:    v_mul_hi_u32 v16, v18, v1
-; CGP-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], v4, v0, 0
-; CGP-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v13, vcc, v14, v13
-; CGP-NEXT:    v_add_i32_e32 v13, vcc, v16, v13
-; CGP-NEXT:    v_mad_u64_u32 v[13:14], s[4:5], v4, v13, v[1:2]
-; CGP-NEXT:    v_sub_i32_e32 v14, vcc, v15, v0
-; CGP-NEXT:    v_sub_i32_e64 v0, s[4:5], v18, v13
-; CGP-NEXT:    v_subb_u32_e64 v15, s[4:5], v18, v13, vcc
-; CGP-NEXT:    v_subbrev_u32_e32 v0, vcc, 0, v0, vcc
-; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v14, v4
-; CGP-NEXT:    v_sub_i32_e32 v16, vcc, v14, v4
-; CGP-NEXT:    v_cndmask_b32_e64 v1, 0, -1, s[4:5]
-; CGP-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v15
-; CGP-NEXT:    v_subbrev_u32_e32 v17, vcc, 0, v0, vcc
-; CGP-NEXT:    v_mov_b32_e32 v0, v5
-; CGP-NEXT:    v_cndmask_b32_e64 v13, -1, v1, s[4:5]
-; CGP-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], v6, v9, v[0:1]
-; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v16, v4
-; CGP-NEXT:    v_cndmask_b32_e64 v18, 0, -1, vcc
-; CGP-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], -1, v8, v[0:1]
-; CGP-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v17
-; CGP-NEXT:    v_cndmask_b32_e32 v5, -1, v18, vcc
-; CGP-NEXT:    v_mul_lo_u32 v19, v8, v0
-; CGP-NEXT:    v_sub_i32_e32 v1, vcc, v16, v4
-; CGP-NEXT:    v_subbrev_u32_e32 v18, vcc, 0, v17, vcc
-; CGP-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v5
-; CGP-NEXT:    v_cndmask_b32_e32 v5, v16, v1, vcc
-; CGP-NEXT:    v_cndmask_b32_e32 v16, v17, v18, vcc
-; CGP-NEXT:    v_add_i32_e32 v1, vcc, v10, v19
-; CGP-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v1, vcc, v1, v11
-; CGP-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
-; CGP-NEXT:    v_mul_lo_u32 v11, v9, v0
-; CGP-NEXT:    v_add_i32_e32 v1, vcc, v10, v1
-; CGP-NEXT:    v_mul_hi_u32 v10, v8, v0
-; CGP-NEXT:    v_add_i32_e32 v11, vcc, v11, v12
-; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v10, vcc, v11, v10
-; CGP-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v11, vcc, v12, v11
-; CGP-NEXT:    v_mul_hi_u32 v0, v9, v0
-; CGP-NEXT:    v_add_i32_e32 v1, vcc, v10, v1
-; CGP-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v10, vcc, v11, v10
-; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v10
-; CGP-NEXT:    v_add_i32_e32 v8, vcc, v8, v1
-; CGP-NEXT:    v_addc_u32_e32 v9, vcc, v9, v0, vcc
-; CGP-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], v6, v8, 0
-; CGP-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v13
-; CGP-NEXT:    v_cndmask_b32_e32 v5, v14, v5, vcc
-; CGP-NEXT:    v_xor_b32_e32 v11, v5, v7
-; CGP-NEXT:    v_mad_u64_u32 v[5:6], s[4:5], v6, v9, v[1:2]
-; CGP-NEXT:    v_cndmask_b32_e32 v10, v15, v16, vcc
-; CGP-NEXT:    v_xor_b32_e32 v1, v10, v7
-; CGP-NEXT:    v_mad_u64_u32 v[5:6], s[4:5], -1, v8, v[5:6]
-; CGP-NEXT:    v_ashrrev_i32_e32 v10, 31, v3
-; CGP-NEXT:    v_add_i32_e32 v2, vcc, v2, v10
-; CGP-NEXT:    v_addc_u32_e32 v3, vcc, v3, v10, vcc
-; CGP-NEXT:    v_xor_b32_e32 v12, v2, v10
-; CGP-NEXT:    v_mul_lo_u32 v2, v9, v0
-; CGP-NEXT:    v_mul_lo_u32 v6, v8, v5
-; CGP-NEXT:    v_xor_b32_e32 v13, v3, v10
-; CGP-NEXT:    v_mul_hi_u32 v3, v8, v0
-; CGP-NEXT:    v_mul_hi_u32 v0, v9, v0
-; CGP-NEXT:    v_add_i32_e32 v2, vcc, v2, v6
-; CGP-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v2, vcc, v2, v3
-; CGP-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; CGP-NEXT:    v_mul_lo_u32 v3, v9, v5
-; CGP-NEXT:    v_add_i32_e32 v2, vcc, v6, v2
-; CGP-NEXT:    v_mul_hi_u32 v6, v8, v5
-; CGP-NEXT:    v_add_i32_e32 v0, vcc, v3, v0
-; CGP-NEXT:    v_cndmask_b32_e64 v3, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v6
-; CGP-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v3, vcc, v3, v6
-; CGP-NEXT:    v_mul_hi_u32 v5, v9, v5
-; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
-; CGP-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v2, vcc, v3, v2
-; CGP-NEXT:    v_add_i32_e32 v2, vcc, v5, v2
-; CGP-NEXT:    v_add_i32_e32 v3, vcc, v8, v0
-; CGP-NEXT:    v_addc_u32_e32 v2, vcc, v9, v2, vcc
-; CGP-NEXT:    v_mul_lo_u32 v5, v13, v3
-; CGP-NEXT:    v_mul_lo_u32 v6, v12, v2
-; CGP-NEXT:    v_sub_i32_e32 v0, vcc, v11, v7
-; CGP-NEXT:    v_subb_u32_e32 v1, vcc, v1, v7, vcc
-; CGP-NEXT:    v_mul_hi_u32 v7, v12, v3
-; CGP-NEXT:    v_add_i32_e32 v5, vcc, v5, v6
-; CGP-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v5, vcc, v5, v7
-; CGP-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
-; CGP-NEXT:    v_mul_lo_u32 v7, v13, v2
-; CGP-NEXT:    v_mul_hi_u32 v3, v13, v3
-; CGP-NEXT:    v_add_i32_e32 v5, vcc, v6, v5
-; CGP-NEXT:    v_mul_hi_u32 v6, v12, v2
-; CGP-NEXT:    v_add_i32_e32 v3, vcc, v7, v3
-; CGP-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v3, vcc, v3, v6
-; CGP-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v6, vcc, v7, v6
-; CGP-NEXT:    v_add_i32_e32 v3, vcc, v3, v5
-; CGP-NEXT:    v_mul_hi_u32 v7, v13, v2
-; CGP-NEXT:    v_mad_u64_u32 v[2:3], s[4:5], v4, v3, 0
-; CGP-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v5, vcc, v6, v5
-; CGP-NEXT:    v_add_i32_e32 v5, vcc, v7, v5
-; CGP-NEXT:    v_mad_u64_u32 v[5:6], s[4:5], v4, v5, v[3:4]
-; CGP-NEXT:    v_sub_i32_e32 v2, vcc, v12, v2
-; CGP-NEXT:    v_subb_u32_e64 v3, s[4:5], v13, v5, vcc
-; CGP-NEXT:    v_sub_i32_e64 v5, s[4:5], v13, v5
-; CGP-NEXT:    v_subbrev_u32_e32 v5, vcc, 0, v5, vcc
-; CGP-NEXT:    v_sub_i32_e32 v7, vcc, v2, v4
-; CGP-NEXT:    v_subbrev_u32_e32 v5, vcc, 0, v5, vcc
-; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v7, v4
-; CGP-NEXT:    v_cndmask_b32_e64 v8, 0, -1, vcc
-; CGP-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v5
-; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v2, v4
-; CGP-NEXT:    v_cndmask_b32_e32 v8, -1, v8, vcc
-; CGP-NEXT:    v_sub_i32_e32 v4, vcc, v7, v4
-; CGP-NEXT:    v_cndmask_b32_e64 v6, 0, -1, s[4:5]
-; CGP-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v3
-; CGP-NEXT:    v_subbrev_u32_e32 v9, vcc, 0, v5, vcc
-; CGP-NEXT:    v_cndmask_b32_e64 v6, -1, v6, s[4:5]
-; CGP-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v8
-; CGP-NEXT:    v_cndmask_b32_e32 v4, v7, v4, vcc
-; CGP-NEXT:    v_cndmask_b32_e32 v5, v5, v9, vcc
-; CGP-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v6
-; CGP-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
-; CGP-NEXT:    v_cndmask_b32_e32 v3, v3, v5, vcc
-; CGP-NEXT:    v_xor_b32_e32 v2, v2, v10
-; CGP-NEXT:    v_xor_b32_e32 v3, v3, v10
-; CGP-NEXT:    v_sub_i32_e32 v2, vcc, v2, v10
-; CGP-NEXT:    v_subb_u32_e32 v3, vcc, v3, v10, vcc
-; CGP-NEXT:    s_setpc_b64 s[30:31]
+; CHECK-LABEL: v_srem_v2i64_oddk_denom:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT:    v_cvt_f32_u32_e32 v4, 0x12d8fb
+; CHECK-NEXT:    v_cvt_f32_ubyte0_e32 v5, 0
+; CHECK-NEXT:    s_sub_u32 s6, 0, 0x12d8fb
+; CHECK-NEXT:    s_subb_u32 s7, 0, 0
+; CHECK-NEXT:    v_mac_f32_e32 v4, 0x4f800000, v5
+; CHECK-NEXT:    v_rcp_iflag_f32_e32 v4, v4
+; CHECK-NEXT:    v_mul_f32_e32 v4, 0x5f7ffffc, v4
+; CHECK-NEXT:    v_mul_f32_e32 v5, 0x2f800000, v4
+; CHECK-NEXT:    v_trunc_f32_e32 v7, v5
+; CHECK-NEXT:    v_mac_f32_e32 v4, 0xcf800000, v7
+; CHECK-NEXT:    v_cvt_u32_f32_e32 v6, v4
+; CHECK-NEXT:    v_cvt_u32_f32_e32 v7, v7
+; CHECK-NEXT:    v_mad_u64_u32 v[4:5], s[4:5], s6, v6, 0
+; CHECK-NEXT:    v_mad_u64_u32 v[8:9], s[4:5], s6, v7, v[5:6]
+; CHECK-NEXT:    v_mul_lo_u32 v5, v7, v4
+; CHECK-NEXT:    v_mad_u64_u32 v[8:9], s[4:5], s7, v6, v[8:9]
+; CHECK-NEXT:    v_mul_hi_u32 v9, v6, v4
+; CHECK-NEXT:    v_mul_hi_u32 v4, v7, v4
+; CHECK-NEXT:    v_mul_lo_u32 v10, v6, v8
+; CHECK-NEXT:    v_mul_lo_u32 v11, v7, v8
+; CHECK-NEXT:    v_mul_hi_u32 v12, v6, v8
+; CHECK-NEXT:    v_mul_hi_u32 v8, v7, v8
+; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v5, v10
+; CHECK-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v5, v9
+; CHECK-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v10, v5
+; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v11, v4
+; CHECK-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v4, v12
+; CHECK-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v9, vcc, v9, v10
+; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v4, v5
+; CHECK-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v9, v5
+; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v8, v5
+; CHECK-NEXT:    v_add_i32_e32 v11, vcc, v6, v4
+; CHECK-NEXT:    v_mad_u64_u32 v[8:9], s[4:5], s6, v11, 0
+; CHECK-NEXT:    v_addc_u32_e32 v5, vcc, v7, v5, vcc
+; CHECK-NEXT:    v_mov_b32_e32 v4, v9
+; CHECK-NEXT:    v_mad_u64_u32 v[9:10], s[4:5], s6, v5, v[4:5]
+; CHECK-NEXT:    v_ashrrev_i32_e32 v4, 31, v1
+; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v4
+; CHECK-NEXT:    v_mad_u64_u32 v[9:10], s[4:5], s7, v11, v[9:10]
+; CHECK-NEXT:    v_addc_u32_e32 v1, vcc, v1, v4, vcc
+; CHECK-NEXT:    v_xor_b32_e32 v10, v0, v4
+; CHECK-NEXT:    v_mul_lo_u32 v0, v5, v8
+; CHECK-NEXT:    v_mul_lo_u32 v12, v11, v9
+; CHECK-NEXT:    v_xor_b32_e32 v13, v1, v4
+; CHECK-NEXT:    v_mul_hi_u32 v1, v11, v8
+; CHECK-NEXT:    v_mul_hi_u32 v8, v5, v8
+; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v12
+; CHECK-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v1
+; CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
+; CHECK-NEXT:    v_mul_lo_u32 v1, v5, v9
+; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v12, v0
+; CHECK-NEXT:    v_mul_hi_u32 v12, v11, v9
+; CHECK-NEXT:    v_add_i32_e32 v1, vcc, v1, v8
+; CHECK-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v1, vcc, v1, v12
+; CHECK-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v8, vcc, v8, v12
+; CHECK-NEXT:    v_mul_hi_u32 v9, v5, v9
+; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v1, v0
+; CHECK-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v1, vcc, v8, v1
+; CHECK-NEXT:    v_add_i32_e32 v1, vcc, v9, v1
+; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v11, v0
+; CHECK-NEXT:    v_addc_u32_e32 v1, vcc, v5, v1, vcc
+; CHECK-NEXT:    v_mul_lo_u32 v8, v13, v0
+; CHECK-NEXT:    v_mul_lo_u32 v9, v10, v1
+; CHECK-NEXT:    v_mul_hi_u32 v11, v10, v0
+; CHECK-NEXT:    v_mul_hi_u32 v0, v13, v0
+; CHECK-NEXT:    v_mov_b32_e32 v5, 0x12d8fb
+; CHECK-NEXT:    v_add_i32_e32 v8, vcc, v8, v9
+; CHECK-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v8, vcc, v8, v11
+; CHECK-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
+; CHECK-NEXT:    v_mul_lo_u32 v11, v13, v1
+; CHECK-NEXT:    v_add_i32_e32 v8, vcc, v9, v8
+; CHECK-NEXT:    v_mul_hi_u32 v9, v10, v1
+; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v11, v0
+; CHECK-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v9
+; CHECK-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v9, vcc, v11, v9
+; CHECK-NEXT:    v_add_i32_e32 v11, vcc, v0, v8
+; CHECK-NEXT:    v_mul_hi_u32 v12, v13, v1
+; CHECK-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], v5, v11, 0
+; CHECK-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v8, vcc, v9, v8
+; CHECK-NEXT:    v_add_i32_e32 v8, vcc, v12, v8
+; CHECK-NEXT:    v_mad_u64_u32 v[8:9], s[4:5], v5, v8, v[1:2]
+; CHECK-NEXT:    v_sub_i32_e32 v10, vcc, v10, v0
+; CHECK-NEXT:    v_mad_u64_u32 v[8:9], s[4:5], 0, v11, v[8:9]
+; CHECK-NEXT:    s_sub_u32 s6, 0, 0x12d8fb
+; CHECK-NEXT:    s_subb_u32 s7, 0, 0
+; CHECK-NEXT:    v_subb_u32_e64 v11, s[4:5], v13, v8, vcc
+; CHECK-NEXT:    v_sub_i32_e64 v0, s[4:5], v13, v8
+; CHECK-NEXT:    v_cmp_ge_u32_e64 s[4:5], v10, v5
+; CHECK-NEXT:    v_subbrev_u32_e32 v0, vcc, 0, v0, vcc
+; CHECK-NEXT:    v_cndmask_b32_e64 v1, 0, -1, s[4:5]
+; CHECK-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v11
+; CHECK-NEXT:    v_sub_i32_e32 v13, vcc, v10, v5
+; CHECK-NEXT:    v_cndmask_b32_e64 v12, -1, v1, s[4:5]
+; CHECK-NEXT:    v_subbrev_u32_e32 v14, vcc, 0, v0, vcc
+; CHECK-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], s6, v6, 0
+; CHECK-NEXT:    v_cmp_ge_u32_e32 vcc, v13, v5
+; CHECK-NEXT:    v_cndmask_b32_e64 v8, 0, -1, vcc
+; CHECK-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v14
+; CHECK-NEXT:    v_cndmask_b32_e32 v15, -1, v8, vcc
+; CHECK-NEXT:    v_mad_u64_u32 v[8:9], s[4:5], s6, v7, v[1:2]
+; CHECK-NEXT:    v_sub_i32_e32 v1, vcc, v13, v5
+; CHECK-NEXT:    v_mad_u64_u32 v[8:9], s[4:5], s7, v6, v[8:9]
+; CHECK-NEXT:    v_subbrev_u32_e32 v16, vcc, 0, v14, vcc
+; CHECK-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v15
+; CHECK-NEXT:    v_cndmask_b32_e32 v9, v13, v1, vcc
+; CHECK-NEXT:    v_mul_lo_u32 v1, v7, v0
+; CHECK-NEXT:    v_mul_lo_u32 v13, v6, v8
+; CHECK-NEXT:    v_mul_hi_u32 v15, v6, v0
+; CHECK-NEXT:    v_cndmask_b32_e32 v14, v14, v16, vcc
+; CHECK-NEXT:    v_mul_hi_u32 v0, v7, v0
+; CHECK-NEXT:    v_add_i32_e32 v1, vcc, v1, v13
+; CHECK-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v1, vcc, v1, v15
+; CHECK-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
+; CHECK-NEXT:    v_mul_lo_u32 v15, v7, v8
+; CHECK-NEXT:    v_add_i32_e32 v1, vcc, v13, v1
+; CHECK-NEXT:    v_mul_hi_u32 v13, v6, v8
+; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v15, v0
+; CHECK-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v13
+; CHECK-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v13, vcc, v15, v13
+; CHECK-NEXT:    v_mul_hi_u32 v8, v7, v8
+; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v1
+; CHECK-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v1, vcc, v13, v1
+; CHECK-NEXT:    v_add_i32_e32 v1, vcc, v8, v1
+; CHECK-NEXT:    v_add_i32_e32 v8, vcc, v6, v0
+; CHECK-NEXT:    v_addc_u32_e32 v13, vcc, v7, v1, vcc
+; CHECK-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], s6, v8, 0
+; CHECK-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v12
+; CHECK-NEXT:    v_cndmask_b32_e32 v9, v10, v9, vcc
+; CHECK-NEXT:    v_mad_u64_u32 v[6:7], s[4:5], s6, v13, v[1:2]
+; CHECK-NEXT:    v_xor_b32_e32 v1, v9, v4
+; CHECK-NEXT:    v_ashrrev_i32_e32 v9, 31, v3
+; CHECK-NEXT:    v_mad_u64_u32 v[6:7], s[4:5], s7, v8, v[6:7]
+; CHECK-NEXT:    v_cndmask_b32_e32 v10, v11, v14, vcc
+; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v9
+; CHECK-NEXT:    v_addc_u32_e32 v3, vcc, v3, v9, vcc
+; CHECK-NEXT:    v_xor_b32_e32 v11, v2, v9
+; CHECK-NEXT:    v_mul_lo_u32 v2, v13, v0
+; CHECK-NEXT:    v_mul_lo_u32 v7, v8, v6
+; CHECK-NEXT:    v_xor_b32_e32 v12, v3, v9
+; CHECK-NEXT:    v_mul_hi_u32 v3, v8, v0
+; CHECK-NEXT:    v_mul_hi_u32 v0, v13, v0
+; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v7
+; CHECK-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v3
+; CHECK-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
+; CHECK-NEXT:    v_mul_lo_u32 v3, v13, v6
+; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v7, v2
+; CHECK-NEXT:    v_mul_hi_u32 v7, v8, v6
+; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v3, v0
+; CHECK-NEXT:    v_cndmask_b32_e64 v3, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v7
+; CHECK-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v3, vcc, v3, v7
+; CHECK-NEXT:    v_mul_hi_u32 v6, v13, v6
+; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
+; CHECK-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v3, v2
+; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v6, v2
+; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v8, v0
+; CHECK-NEXT:    v_addc_u32_e32 v2, vcc, v13, v2, vcc
+; CHECK-NEXT:    v_mul_lo_u32 v3, v12, v0
+; CHECK-NEXT:    v_mul_lo_u32 v6, v11, v2
+; CHECK-NEXT:    v_mul_hi_u32 v7, v11, v0
+; CHECK-NEXT:    v_mul_hi_u32 v0, v12, v0
+; CHECK-NEXT:    v_xor_b32_e32 v8, v10, v4
+; CHECK-NEXT:    v_add_i32_e32 v3, vcc, v3, v6
+; CHECK-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v3, vcc, v3, v7
+; CHECK-NEXT:    v_cndmask_b32_e64 v3, 0, 1, vcc
+; CHECK-NEXT:    v_mul_lo_u32 v7, v12, v2
+; CHECK-NEXT:    v_add_i32_e32 v3, vcc, v6, v3
+; CHECK-NEXT:    v_mul_hi_u32 v6, v11, v2
+; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v7, v0
+; CHECK-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v6
+; CHECK-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v7, v6
+; CHECK-NEXT:    v_add_i32_e32 v10, vcc, v0, v3
+; CHECK-NEXT:    v_mul_hi_u32 v7, v12, v2
+; CHECK-NEXT:    v_mad_u64_u32 v[2:3], s[4:5], v5, v10, 0
+; CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v6, v0
+; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v7, v0
+; CHECK-NEXT:    v_mov_b32_e32 v0, v3
+; CHECK-NEXT:    v_mad_u64_u32 v[6:7], s[4:5], v5, v6, v[0:1]
+; CHECK-NEXT:    v_sub_i32_e32 v0, vcc, v1, v4
+; CHECK-NEXT:    v_subb_u32_e32 v1, vcc, v8, v4, vcc
+; CHECK-NEXT:    v_mad_u64_u32 v[3:4], s[4:5], 0, v10, v[6:7]
+; CHECK-NEXT:    v_sub_i32_e32 v2, vcc, v11, v2
+; CHECK-NEXT:    v_subb_u32_e64 v4, s[4:5], v12, v3, vcc
+; CHECK-NEXT:    v_sub_i32_e64 v3, s[4:5], v12, v3
+; CHECK-NEXT:    v_subbrev_u32_e32 v3, vcc, 0, v3, vcc
+; CHECK-NEXT:    v_sub_i32_e32 v7, vcc, v2, v5
+; CHECK-NEXT:    v_subbrev_u32_e32 v3, vcc, 0, v3, vcc
+; CHECK-NEXT:    v_cmp_ge_u32_e32 vcc, v7, v5
+; CHECK-NEXT:    v_cndmask_b32_e64 v8, 0, -1, vcc
+; CHECK-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v3
+; CHECK-NEXT:    v_cmp_ge_u32_e64 s[4:5], v2, v5
+; CHECK-NEXT:    v_cndmask_b32_e32 v8, -1, v8, vcc
+; CHECK-NEXT:    v_sub_i32_e32 v5, vcc, v7, v5
+; CHECK-NEXT:    v_cndmask_b32_e64 v6, 0, -1, s[4:5]
+; CHECK-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v4
+; CHECK-NEXT:    v_subbrev_u32_e32 v10, vcc, 0, v3, vcc
+; CHECK-NEXT:    v_cndmask_b32_e64 v6, -1, v6, s[4:5]
+; CHECK-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v8
+; CHECK-NEXT:    v_cndmask_b32_e32 v5, v7, v5, vcc
+; CHECK-NEXT:    v_cndmask_b32_e32 v3, v3, v10, vcc
+; CHECK-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v6
+; CHECK-NEXT:    v_cndmask_b32_e32 v2, v2, v5, vcc
+; CHECK-NEXT:    v_cndmask_b32_e32 v3, v4, v3, vcc
+; CHECK-NEXT:    v_xor_b32_e32 v2, v2, v9
+; CHECK-NEXT:    v_xor_b32_e32 v3, v3, v9
+; CHECK-NEXT:    v_sub_i32_e32 v2, vcc, v2, v9
+; CHECK-NEXT:    v_subb_u32_e32 v3, vcc, v3, v9, vcc
+; CHECK-NEXT:    s_setpc_b64 s[30:31]
   %result = srem <2 x i64> %num, <i64 1235195, i64 1235195>
   ret <2 x i64> %result
 }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i32.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i32.ll
index 158403644607abe..e5d236288c221c4 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i32.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i32.ll
@@ -234,68 +234,37 @@ define i32 @v_urem_i32_oddk_denom(i32 %num) {
 }
 
 define <2 x i32> @v_urem_v2i32_oddk_denom(<2 x i32> %num) {
-; GISEL-LABEL: v_urem_v2i32_oddk_denom:
-; GISEL:       ; %bb.0:
-; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GISEL-NEXT:    v_mov_b32_e32 v2, 0x12d8fb
-; GISEL-NEXT:    v_cvt_f32_u32_e32 v3, 0x12d8fb
-; GISEL-NEXT:    v_mov_b32_e32 v4, 0xffed2705
-; GISEL-NEXT:    v_rcp_iflag_f32_e32 v3, v3
-; GISEL-NEXT:    v_mul_f32_e32 v3, 0x4f7ffffe, v3
-; GISEL-NEXT:    v_cvt_u32_f32_e32 v3, v3
-; GISEL-NEXT:    v_mul_lo_u32 v4, v3, v4
-; GISEL-NEXT:    v_mul_hi_u32 v4, v3, v4
-; GISEL-NEXT:    v_add_i32_e32 v3, vcc, v3, v4
-; GISEL-NEXT:    v_mul_hi_u32 v4, v0, v3
-; GISEL-NEXT:    v_mul_hi_u32 v3, v1, v3
-; GISEL-NEXT:    v_mul_lo_u32 v4, v4, v2
-; GISEL-NEXT:    v_mul_lo_u32 v3, v3, v2
-; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v4
-; GISEL-NEXT:    v_sub_i32_e32 v1, vcc, v1, v3
-; GISEL-NEXT:    v_sub_i32_e32 v3, vcc, v0, v2
-; GISEL-NEXT:    v_subrev_i32_e32 v4, vcc, 0x12d8fb, v1
-; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v2
-; GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
-; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v2
-; GISEL-NEXT:    v_cndmask_b32_e32 v1, v1, v4, vcc
-; GISEL-NEXT:    v_sub_i32_e32 v3, vcc, v0, v2
-; GISEL-NEXT:    v_subrev_i32_e32 v4, vcc, 0x12d8fb, v1
-; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v2
-; GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
-; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v2
-; GISEL-NEXT:    v_cndmask_b32_e32 v1, v1, v4, vcc
-; GISEL-NEXT:    s_setpc_b64 s[30:31]
-;
-; CGP-LABEL: v_urem_v2i32_oddk_denom:
-; CGP:       ; %bb.0:
-; CGP-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CGP-NEXT:    v_rcp_iflag_f32_e32 v2, 0x4996c7d8
-; CGP-NEXT:    v_mov_b32_e32 v3, 0xffed2705
-; CGP-NEXT:    v_mov_b32_e32 v4, 0x12d8fb
-; CGP-NEXT:    v_mul_f32_e32 v2, 0x4f7ffffe, v2
-; CGP-NEXT:    v_cvt_u32_f32_e32 v2, v2
-; CGP-NEXT:    v_mul_lo_u32 v3, v2, v3
-; CGP-NEXT:    v_mul_hi_u32 v3, v2, v3
-; CGP-NEXT:    v_add_i32_e32 v2, vcc, v2, v3
-; CGP-NEXT:    v_mul_hi_u32 v3, v0, v2
-; CGP-NEXT:    v_mul_hi_u32 v2, v1, v2
-; CGP-NEXT:    v_mul_lo_u32 v3, v3, v4
-; CGP-NEXT:    v_mul_lo_u32 v2, v2, v4
-; CGP-NEXT:    v_sub_i32_e32 v0, vcc, v0, v3
-; CGP-NEXT:    v_sub_i32_e32 v1, vcc, v1, v2
-; CGP-NEXT:    v_subrev_i32_e32 v2, vcc, 0x12d8fb, v0
-; CGP-NEXT:    v_subrev_i32_e32 v3, vcc, 0x12d8fb, v1
-; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v4
-; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
-; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v4
-; CGP-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
-; CGP-NEXT:    v_sub_i32_e32 v2, vcc, v0, v4
-; CGP-NEXT:    v_subrev_i32_e32 v3, vcc, 0x12d8fb, v1
-; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v4
-; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
-; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v4
-; CGP-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
-; CGP-NEXT:    s_setpc_b64 s[30:31]
+; CHECK-LABEL: v_urem_v2i32_oddk_denom:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT:    v_mov_b32_e32 v2, 0x12d8fb
+; CHECK-NEXT:    v_cvt_f32_u32_e32 v3, 0x12d8fb
+; CHECK-NEXT:    v_mov_b32_e32 v4, 0xffed2705
+; CHECK-NEXT:    v_rcp_iflag_f32_e32 v3, v3
+; CHECK-NEXT:    v_mul_f32_e32 v3, 0x4f7ffffe, v3
+; CHECK-NEXT:    v_cvt_u32_f32_e32 v3, v3
+; CHECK-NEXT:    v_mul_lo_u32 v4, v3, v4
+; CHECK-NEXT:    v_mul_hi_u32 v4, v3, v4
+; CHECK-NEXT:    v_add_i32_e32 v3, vcc, v3, v4
+; CHECK-NEXT:    v_mul_hi_u32 v4, v0, v3
+; CHECK-NEXT:    v_mul_hi_u32 v3, v1, v3
+; CHECK-NEXT:    v_mul_lo_u32 v4, v4, v2
+; CHECK-NEXT:    v_mul_lo_u32 v3, v3, v2
+; CHECK-NEXT:    v_sub_i32_e32 v0, vcc, v0, v4
+; CHECK-NEXT:    v_sub_i32_e32 v1, vcc, v1, v3
+; CHECK-NEXT:    v_sub_i32_e32 v3, vcc, v0, v2
+; CHECK-NEXT:    v_subrev_i32_e32 v4, vcc, 0x12d8fb, v1
+; CHECK-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v2
+; CHECK-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
+; CHECK-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v2
+; CHECK-NEXT:    v_cndmask_b32_e32 v1, v1, v4, vcc
+; CHECK-NEXT:    v_sub_i32_e32 v3, vcc, v0, v2
+; CHECK-NEXT:    v_subrev_i32_e32 v4, vcc, 0x12d8fb, v1
+; CHECK-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v2
+; CHECK-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
+; CHECK-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v2
+; CHECK-NEXT:    v_cndmask_b32_e32 v1, v1, v4, vcc
+; CHECK-NEXT:    s_setpc_b64 s[30:31]
   %result = urem <2 x i32> %num, <i32 1235195, i32 1235195>
   ret <2 x i32> %result
 }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll
index 12df4b7c7fc33d7..b58140f5c30b2c5 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll
@@ -1091,404 +1091,236 @@ define i64 @v_urem_i64_oddk_denom(i64 %num) {
 }
 
 define <2 x i64> @v_urem_v2i64_oddk_denom(<2 x i64> %num) {
-; GISEL-LABEL: v_urem_v2i64_oddk_denom:
-; GISEL:       ; %bb.0:
-; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GISEL-NEXT:    v_mov_b32_e32 v4, 0x12d8fb
-; GISEL-NEXT:    v_cvt_f32_u32_e32 v5, 0x12d8fb
-; GISEL-NEXT:    v_cvt_f32_ubyte0_e32 v6, 0
-; GISEL-NEXT:    s_sub_u32 s4, 0, 0x12d8fb
-; GISEL-NEXT:    v_mac_f32_e32 v5, 0x4f800000, v6
-; GISEL-NEXT:    s_subb_u32 s5, 0, 0
-; GISEL-NEXT:    v_rcp_iflag_f32_e32 v5, v5
-; GISEL-NEXT:    s_sub_u32 s6, 0, 0x12d8fb
-; GISEL-NEXT:    v_mul_f32_e32 v5, 0x5f7ffffc, v5
-; GISEL-NEXT:    s_subb_u32 s7, 0, 0
-; GISEL-NEXT:    v_mul_f32_e32 v6, 0x2f800000, v5
-; GISEL-NEXT:    v_trunc_f32_e32 v6, v6
-; GISEL-NEXT:    v_mac_f32_e32 v5, 0xcf800000, v6
-; GISEL-NEXT:    v_cvt_u32_f32_e32 v6, v6
-; GISEL-NEXT:    v_cvt_u32_f32_e32 v5, v5
-; GISEL-NEXT:    v_mul_lo_u32 v7, s4, v6
-; GISEL-NEXT:    v_mul_lo_u32 v8, s6, v6
-; GISEL-NEXT:    v_mul_lo_u32 v9, s4, v5
-; GISEL-NEXT:    v_mul_lo_u32 v10, s5, v5
-; GISEL-NEXT:    v_mul_hi_u32 v11, s4, v5
-; GISEL-NEXT:    v_mul_lo_u32 v12, s6, v5
-; GISEL-NEXT:    v_mul_lo_u32 v13, s7, v5
-; GISEL-NEXT:    v_mul_hi_u32 v14, s6, v5
-; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v10, v7
-; GISEL-NEXT:    v_mul_lo_u32 v10, v6, v9
-; GISEL-NEXT:    v_mul_hi_u32 v15, v5, v9
-; GISEL-NEXT:    v_mul_hi_u32 v9, v6, v9
-; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v13, v8
-; GISEL-NEXT:    v_mul_lo_u32 v13, v6, v12
-; GISEL-NEXT:    v_mul_hi_u32 v16, v5, v12
-; GISEL-NEXT:    v_mul_hi_u32 v12, v6, v12
-; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v7, v11
-; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v14
-; GISEL-NEXT:    v_mul_lo_u32 v11, v5, v7
-; GISEL-NEXT:    v_mul_lo_u32 v14, v6, v7
-; GISEL-NEXT:    v_mul_hi_u32 v17, v5, v7
-; GISEL-NEXT:    v_mul_hi_u32 v7, v6, v7
-; GISEL-NEXT:    v_mul_lo_u32 v18, v5, v8
-; GISEL-NEXT:    v_mul_lo_u32 v19, v6, v8
-; GISEL-NEXT:    v_mul_hi_u32 v20, v5, v8
-; GISEL-NEXT:    v_mul_hi_u32 v8, v6, v8
-; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v10, v11
-; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v14, v9
-; GISEL-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v13, v18
-; GISEL-NEXT:    v_cndmask_b32_e64 v18, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v19, v12
-; GISEL-NEXT:    v_cndmask_b32_e64 v19, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v10, v15
-; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v9, v17
-; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v13, v16
-; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v12, v20
-; GISEL-NEXT:    v_cndmask_b32_e64 v16, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v11, v10
-; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v14, v15
-; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v18, v13
-; GISEL-NEXT:    v_add_i32_e32 v14, vcc, v19, v16
-; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v9, v10
-; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v12, v13
-; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v11, v10
-; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v14, v13
-; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v7, v10
-; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v11
-; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v5, v9
-; GISEL-NEXT:    v_addc_u32_e32 v7, vcc, v6, v7, vcc
-; GISEL-NEXT:    v_mul_lo_u32 v10, s4, v9
-; GISEL-NEXT:    v_mul_lo_u32 v11, s5, v9
-; GISEL-NEXT:    v_mul_hi_u32 v13, s4, v9
-; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v12
-; GISEL-NEXT:    v_addc_u32_e32 v6, vcc, v6, v8, vcc
-; GISEL-NEXT:    v_mul_lo_u32 v8, s6, v5
-; GISEL-NEXT:    v_mul_lo_u32 v12, s7, v5
-; GISEL-NEXT:    v_mul_hi_u32 v14, s6, v5
-; GISEL-NEXT:    v_mul_lo_u32 v15, s4, v7
-; GISEL-NEXT:    v_mul_lo_u32 v16, v7, v10
-; GISEL-NEXT:    v_mul_hi_u32 v17, v9, v10
-; GISEL-NEXT:    v_mul_hi_u32 v10, v7, v10
-; GISEL-NEXT:    v_mul_lo_u32 v18, s6, v6
-; GISEL-NEXT:    v_mul_lo_u32 v19, v6, v8
-; GISEL-NEXT:    v_mul_hi_u32 v20, v5, v8
-; GISEL-NEXT:    v_mul_hi_u32 v8, v6, v8
-; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v11, v15
-; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v12, v18
-; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v11, v13
-; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v12, v14
-; GISEL-NEXT:    v_mul_lo_u32 v13, v9, v11
-; GISEL-NEXT:    v_mul_lo_u32 v14, v7, v11
-; GISEL-NEXT:    v_mul_hi_u32 v15, v9, v11
-; GISEL-NEXT:    v_mul_hi_u32 v11, v7, v11
-; GISEL-NEXT:    v_mul_lo_u32 v18, v5, v12
-; GISEL-NEXT:    v_mul_lo_u32 v21, v6, v12
-; GISEL-NEXT:    v_mul_hi_u32 v22, v5, v12
-; GISEL-NEXT:    v_mul_hi_u32 v12, v6, v12
-; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v16, v13
-; GISEL-NEXT:    v_cndmask_b32_e64 v16, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v14, v10
-; GISEL-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v18, vcc, v19, v18
-; GISEL-NEXT:    v_cndmask_b32_e64 v19, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v21, v8
-; GISEL-NEXT:    v_cndmask_b32_e64 v21, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v13, v17
-; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v10, v15
-; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v17, vcc, v18, v20
-; GISEL-NEXT:    v_cndmask_b32_e64 v17, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v22
-; GISEL-NEXT:    v_cndmask_b32_e64 v18, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v16, v13
-; GISEL-NEXT:    v_add_i32_e32 v14, vcc, v14, v15
-; GISEL-NEXT:    v_add_i32_e32 v15, vcc, v19, v17
-; GISEL-NEXT:    v_add_i32_e32 v16, vcc, v21, v18
-; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v10, v13
-; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v15
-; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v14, v13
-; GISEL-NEXT:    v_add_i32_e32 v14, vcc, v16, v15
-; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v11, v13
-; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v12, v14
-; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v9, v10
-; GISEL-NEXT:    v_addc_u32_e32 v7, vcc, v7, v11, vcc
-; GISEL-NEXT:    v_mul_lo_u32 v10, v1, v9
-; GISEL-NEXT:    v_mul_hi_u32 v11, v0, v9
-; GISEL-NEXT:    v_mul_hi_u32 v9, v1, v9
-; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v8
-; GISEL-NEXT:    v_addc_u32_e32 v6, vcc, v6, v12, vcc
-; GISEL-NEXT:    v_mul_lo_u32 v8, v3, v5
-; GISEL-NEXT:    v_mul_hi_u32 v12, v2, v5
-; GISEL-NEXT:    v_mul_hi_u32 v5, v3, v5
-; GISEL-NEXT:    v_mul_lo_u32 v13, v0, v7
-; GISEL-NEXT:    v_mul_lo_u32 v14, v1, v7
-; GISEL-NEXT:    v_mul_hi_u32 v15, v0, v7
-; GISEL-NEXT:    v_mul_hi_u32 v7, v1, v7
-; GISEL-NEXT:    v_mul_lo_u32 v16, v2, v6
-; GISEL-NEXT:    v_mul_lo_u32 v17, v3, v6
-; GISEL-NEXT:    v_mul_hi_u32 v18, v2, v6
-; GISEL-NEXT:    v_mul_hi_u32 v6, v3, v6
-; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v10, v13
-; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v14, v9
-; GISEL-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v16
-; GISEL-NEXT:    v_cndmask_b32_e64 v16, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v17, v5
-; GISEL-NEXT:    v_cndmask_b32_e64 v17, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v10, v11
-; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v9, v15
-; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v12
-; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v18
-; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v13, v10
-; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v14, v11
-; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v16, v8
-; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v17, v12
-; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v9, v10
-; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v8
-; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v11, v10
-; GISEL-NEXT:    v_mul_lo_u32 v11, v9, v4
-; GISEL-NEXT:    v_mul_hi_u32 v9, v4, v9
-; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v12, v8
-; GISEL-NEXT:    v_mul_lo_u32 v12, v5, v4
-; GISEL-NEXT:    v_mul_hi_u32 v5, v4, v5
-; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v7, v10
-; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v6, v8
-; GISEL-NEXT:    v_mul_lo_u32 v7, v7, v4
-; GISEL-NEXT:    v_mul_lo_u32 v6, v6, v4
-; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v7, v9
-; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v6, v5
-; GISEL-NEXT:    v_sub_i32_e64 v0, s[4:5], v0, v11
-; GISEL-NEXT:    v_subb_u32_e64 v6, vcc, v1, v7, s[4:5]
-; GISEL-NEXT:    v_sub_i32_e32 v1, vcc, v1, v7
-; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v4
-; GISEL-NEXT:    v_cndmask_b32_e64 v7, 0, -1, vcc
-; GISEL-NEXT:    v_sub_i32_e64 v2, s[6:7], v2, v12
-; GISEL-NEXT:    v_subb_u32_e64 v8, vcc, v3, v5, s[6:7]
-; GISEL-NEXT:    v_sub_i32_e32 v3, vcc, v3, v5
-; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v2, v4
-; GISEL-NEXT:    v_cndmask_b32_e64 v5, 0, -1, vcc
-; GISEL-NEXT:    v_sub_i32_e32 v9, vcc, v2, v4
-; GISEL-NEXT:    v_cmp_eq_u32_e64 s[8:9], 0, v6
-; GISEL-NEXT:    v_cndmask_b32_e64 v7, -1, v7, s[8:9]
-; GISEL-NEXT:    v_subbrev_u32_e64 v1, s[4:5], 0, v1, s[4:5]
-; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v8
-; GISEL-NEXT:    v_cndmask_b32_e64 v5, -1, v5, s[4:5]
-; GISEL-NEXT:    v_subbrev_u32_e64 v3, s[4:5], 0, v3, s[6:7]
-; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v9, v4
-; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, -1, s[4:5]
-; GISEL-NEXT:    s_mov_b64 s[4:5], vcc
-; GISEL-NEXT:    v_subrev_i32_e32 v11, vcc, 0x12d8fb, v9
-; GISEL-NEXT:    v_sub_i32_e64 v12, s[6:7], v0, v4
-; GISEL-NEXT:    v_subbrev_u32_e64 v1, s[6:7], 0, v1, s[6:7]
-; GISEL-NEXT:    v_cmp_ge_u32_e64 s[6:7], v12, v4
-; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, -1, s[6:7]
-; GISEL-NEXT:    v_subbrev_u32_e64 v3, s[4:5], 0, v3, s[4:5]
-; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v1
-; GISEL-NEXT:    v_cndmask_b32_e64 v13, -1, v13, s[4:5]
-; GISEL-NEXT:    v_sub_i32_e64 v4, s[4:5], v12, v4
-; GISEL-NEXT:    v_subbrev_u32_e64 v14, s[4:5], 0, v1, s[4:5]
-; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v3
-; GISEL-NEXT:    v_cndmask_b32_e64 v10, -1, v10, s[4:5]
-; GISEL-NEXT:    v_subbrev_u32_e32 v15, vcc, 0, v3, vcc
-; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v13
-; GISEL-NEXT:    v_cndmask_b32_e32 v4, v12, v4, vcc
-; GISEL-NEXT:    v_cmp_ne_u32_e64 s[4:5], 0, v10
-; GISEL-NEXT:    v_cndmask_b32_e64 v9, v9, v11, s[4:5]
-; GISEL-NEXT:    v_cndmask_b32_e32 v1, v1, v14, vcc
-; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v7
-; GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
-; GISEL-NEXT:    v_cndmask_b32_e64 v3, v3, v15, s[4:5]
-; GISEL-NEXT:    v_cmp_ne_u32_e64 s[4:5], 0, v5
-; GISEL-NEXT:    v_cndmask_b32_e64 v2, v2, v9, s[4:5]
-; GISEL-NEXT:    v_cndmask_b32_e32 v1, v6, v1, vcc
-; GISEL-NEXT:    v_cndmask_b32_e64 v3, v8, v3, s[4:5]
-; GISEL-NEXT:    s_setpc_b64 s[30:31]
-;
-; CGP-LABEL: v_urem_v2i64_oddk_denom:
-; CGP:       ; %bb.0:
-; CGP-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CGP-NEXT:    v_mov_b32_e32 v4, 0x12d8fb
-; CGP-NEXT:    v_cvt_f32_u32_e32 v5, 0x12d8fb
-; CGP-NEXT:    v_cvt_f32_ubyte0_e32 v6, 0
-; CGP-NEXT:    v_mov_b32_e32 v7, 0xffed2705
-; CGP-NEXT:    v_mac_f32_e32 v5, 0x4f800000, v6
-; CGP-NEXT:    v_rcp_iflag_f32_e32 v5, v5
-; CGP-NEXT:    v_mul_f32_e32 v5, 0x5f7ffffc, v5
-; CGP-NEXT:    v_mul_f32_e32 v6, 0x2f800000, v5
-; CGP-NEXT:    v_trunc_f32_e32 v6, v6
-; CGP-NEXT:    v_mac_f32_e32 v5, 0xcf800000, v6
-; CGP-NEXT:    v_cvt_u32_f32_e32 v6, v6
-; CGP-NEXT:    v_cvt_u32_f32_e32 v5, v5
-; CGP-NEXT:    v_mul_lo_u32 v8, v6, v7
-; CGP-NEXT:    v_mul_lo_u32 v9, v5, v7
-; CGP-NEXT:    v_mul_hi_u32 v10, v7, v5
-; CGP-NEXT:    v_sub_i32_e32 v8, vcc, v8, v5
-; CGP-NEXT:    v_add_i32_e32 v8, vcc, v8, v10
-; CGP-NEXT:    v_mul_lo_u32 v10, v6, v9
-; CGP-NEXT:    v_mul_hi_u32 v11, v5, v9
-; CGP-NEXT:    v_mul_hi_u32 v9, v6, v9
-; CGP-NEXT:    v_mul_lo_u32 v12, v5, v8
-; CGP-NEXT:    v_mul_lo_u32 v13, v6, v8
-; CGP-NEXT:    v_mul_hi_u32 v14, v5, v8
-; CGP-NEXT:    v_mul_hi_u32 v8, v6, v8
-; CGP-NEXT:    v_add_i32_e32 v10, vcc, v10, v12
-; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v9, vcc, v13, v9
-; CGP-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v10, vcc, v10, v11
-; CGP-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v9, vcc, v9, v14
-; CGP-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v10, vcc, v12, v10
-; CGP-NEXT:    v_add_i32_e32 v11, vcc, v13, v11
-; CGP-NEXT:    v_add_i32_e32 v9, vcc, v9, v10
-; CGP-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v10, vcc, v11, v10
-; CGP-NEXT:    v_add_i32_e32 v8, vcc, v8, v10
-; CGP-NEXT:    v_add_i32_e32 v5, vcc, v5, v9
-; CGP-NEXT:    v_addc_u32_e32 v6, vcc, v6, v8, vcc
-; CGP-NEXT:    v_mul_lo_u32 v8, v5, v7
-; CGP-NEXT:    v_mul_hi_u32 v9, v7, v5
-; CGP-NEXT:    v_mul_lo_u32 v7, v6, v7
-; CGP-NEXT:    v_mul_lo_u32 v10, v6, v8
-; CGP-NEXT:    v_mul_hi_u32 v11, v5, v8
-; CGP-NEXT:    v_mul_hi_u32 v8, v6, v8
-; CGP-NEXT:    v_sub_i32_e32 v7, vcc, v7, v5
-; CGP-NEXT:    v_add_i32_e32 v7, vcc, v7, v9
-; CGP-NEXT:    v_mul_lo_u32 v9, v5, v7
-; CGP-NEXT:    v_mul_lo_u32 v12, v6, v7
-; CGP-NEXT:    v_mul_hi_u32 v13, v5, v7
-; CGP-NEXT:    v_mul_hi_u32 v7, v6, v7
-; CGP-NEXT:    v_add_i32_e32 v9, vcc, v10, v9
-; CGP-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v8, vcc, v12, v8
-; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v9, vcc, v9, v11
-; CGP-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v8, vcc, v8, v13
-; CGP-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v9, vcc, v10, v9
-; CGP-NEXT:    v_add_i32_e32 v10, vcc, v12, v11
-; CGP-NEXT:    v_add_i32_e32 v8, vcc, v8, v9
-; CGP-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v9, vcc, v10, v9
-; CGP-NEXT:    v_add_i32_e32 v7, vcc, v7, v9
-; CGP-NEXT:    v_add_i32_e32 v5, vcc, v5, v8
-; CGP-NEXT:    v_addc_u32_e32 v6, vcc, v6, v7, vcc
-; CGP-NEXT:    v_mul_lo_u32 v7, v1, v5
-; CGP-NEXT:    v_mul_hi_u32 v8, v0, v5
-; CGP-NEXT:    v_mul_hi_u32 v9, v1, v5
-; CGP-NEXT:    v_mul_lo_u32 v10, v3, v5
-; CGP-NEXT:    v_mul_hi_u32 v11, v2, v5
-; CGP-NEXT:    v_mul_hi_u32 v5, v3, v5
-; CGP-NEXT:    v_mul_lo_u32 v12, v0, v6
-; CGP-NEXT:    v_mul_lo_u32 v13, v1, v6
-; CGP-NEXT:    v_mul_hi_u32 v14, v0, v6
-; CGP-NEXT:    v_mul_hi_u32 v15, v1, v6
-; CGP-NEXT:    v_mul_lo_u32 v16, v2, v6
-; CGP-NEXT:    v_mul_lo_u32 v17, v3, v6
-; CGP-NEXT:    v_mul_hi_u32 v18, v2, v6
-; CGP-NEXT:    v_mul_hi_u32 v6, v3, v6
-; CGP-NEXT:    v_add_i32_e32 v7, vcc, v7, v12
-; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v9, vcc, v13, v9
-; CGP-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v10, vcc, v10, v16
-; CGP-NEXT:    v_cndmask_b32_e64 v16, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v5, vcc, v17, v5
-; CGP-NEXT:    v_cndmask_b32_e64 v17, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v7, vcc, v7, v8
-; CGP-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v8, vcc, v9, v14
-; CGP-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v10, vcc, v10, v11
-; CGP-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v5, vcc, v5, v18
-; CGP-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v7, vcc, v12, v7
-; CGP-NEXT:    v_add_i32_e32 v9, vcc, v13, v9
-; CGP-NEXT:    v_add_i32_e32 v10, vcc, v16, v10
-; CGP-NEXT:    v_add_i32_e32 v11, vcc, v17, v11
-; CGP-NEXT:    v_add_i32_e32 v7, vcc, v8, v7
-; CGP-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v5, vcc, v5, v10
-; CGP-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
-; CGP-NEXT:    v_add_i32_e32 v8, vcc, v9, v8
-; CGP-NEXT:    v_mul_lo_u32 v9, v7, v4
-; CGP-NEXT:    v_mul_hi_u32 v7, v4, v7
-; CGP-NEXT:    v_add_i32_e32 v10, vcc, v11, v10
-; CGP-NEXT:    v_mul_lo_u32 v11, v5, v4
-; CGP-NEXT:    v_mul_hi_u32 v5, v4, v5
-; CGP-NEXT:    v_add_i32_e32 v8, vcc, v15, v8
-; CGP-NEXT:    v_add_i32_e32 v6, vcc, v6, v10
-; CGP-NEXT:    v_mul_lo_u32 v8, v8, v4
-; CGP-NEXT:    v_mul_lo_u32 v6, v6, v4
-; CGP-NEXT:    v_add_i32_e32 v7, vcc, v8, v7
-; CGP-NEXT:    v_add_i32_e32 v5, vcc, v6, v5
-; CGP-NEXT:    v_sub_i32_e64 v0, s[4:5], v0, v9
-; CGP-NEXT:    v_subb_u32_e64 v6, vcc, v1, v7, s[4:5]
-; CGP-NEXT:    v_sub_i32_e32 v1, vcc, v1, v7
-; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v4
-; CGP-NEXT:    v_cndmask_b32_e64 v7, 0, -1, vcc
-; CGP-NEXT:    v_sub_i32_e64 v2, s[6:7], v2, v11
-; CGP-NEXT:    v_subb_u32_e64 v8, vcc, v3, v5, s[6:7]
-; CGP-NEXT:    v_sub_i32_e32 v3, vcc, v3, v5
-; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v2, v4
-; CGP-NEXT:    v_cndmask_b32_e64 v5, 0, -1, vcc
-; CGP-NEXT:    v_sub_i32_e32 v9, vcc, v2, v4
-; CGP-NEXT:    v_cmp_eq_u32_e64 s[8:9], 0, v6
-; CGP-NEXT:    v_cndmask_b32_e64 v7, -1, v7, s[8:9]
-; CGP-NEXT:    v_subbrev_u32_e64 v1, s[4:5], 0, v1, s[4:5]
-; CGP-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v8
-; CGP-NEXT:    v_cndmask_b32_e64 v5, -1, v5, s[4:5]
-; CGP-NEXT:    v_subbrev_u32_e64 v3, s[4:5], 0, v3, s[6:7]
-; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v9, v4
-; CGP-NEXT:    v_cndmask_b32_e64 v10, 0, -1, s[4:5]
-; CGP-NEXT:    s_mov_b64 s[4:5], vcc
-; CGP-NEXT:    v_subrev_i32_e32 v11, vcc, 0x12d8fb, v9
-; CGP-NEXT:    v_sub_i32_e64 v12, s[6:7], v0, v4
-; CGP-NEXT:    v_subbrev_u32_e64 v1, s[6:7], 0, v1, s[6:7]
-; CGP-NEXT:    v_cmp_ge_u32_e64 s[6:7], v12, v4
-; CGP-NEXT:    v_cndmask_b32_e64 v13, 0, -1, s[6:7]
-; CGP-NEXT:    v_subbrev_u32_e64 v3, s[4:5], 0, v3, s[4:5]
-; CGP-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v1
-; CGP-NEXT:    v_cndmask_b32_e64 v13, -1, v13, s[4:5]
-; CGP-NEXT:    v_sub_i32_e64 v4, s[4:5], v12, v4
-; CGP-NEXT:    v_subbrev_u32_e64 v14, s[4:5], 0, v1, s[4:5]
-; CGP-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v3
-; CGP-NEXT:    v_cndmask_b32_e64 v10, -1, v10, s[4:5]
-; CGP-NEXT:    v_subbrev_u32_e32 v15, vcc, 0, v3, vcc
-; CGP-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v13
-; CGP-NEXT:    v_cndmask_b32_e32 v4, v12, v4, vcc
-; CGP-NEXT:    v_cmp_ne_u32_e64 s[4:5], 0, v10
-; CGP-NEXT:    v_cndmask_b32_e64 v9, v9, v11, s[4:5]
-; CGP-NEXT:    v_cndmask_b32_e32 v1, v1, v14, vcc
-; CGP-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v7
-; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
-; CGP-NEXT:    v_cndmask_b32_e64 v3, v3, v15, s[4:5]
-; CGP-NEXT:    v_cmp_ne_u32_e64 s[4:5], 0, v5
-; CGP-NEXT:    v_cndmask_b32_e64 v2, v2, v9, s[4:5]
-; CGP-NEXT:    v_cndmask_b32_e32 v1, v6, v1, vcc
-; CGP-NEXT:    v_cndmask_b32_e64 v3, v8, v3, s[4:5]
-; CGP-NEXT:    s_setpc_b64 s[30:31]
+; CHECK-LABEL: v_urem_v2i64_oddk_denom:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT:    v_mov_b32_e32 v4, 0x12d8fb
+; CHECK-NEXT:    v_cvt_f32_u32_e32 v5, 0x12d8fb
+; CHECK-NEXT:    v_cvt_f32_ubyte0_e32 v6, 0
+; CHECK-NEXT:    s_sub_u32 s4, 0, 0x12d8fb
+; CHECK-NEXT:    v_mac_f32_e32 v5, 0x4f800000, v6
+; CHECK-NEXT:    s_subb_u32 s5, 0, 0
+; CHECK-NEXT:    v_rcp_iflag_f32_e32 v5, v5
+; CHECK-NEXT:    s_sub_u32 s6, 0, 0x12d8fb
+; CHECK-NEXT:    v_mul_f32_e32 v5, 0x5f7ffffc, v5
+; CHECK-NEXT:    s_subb_u32 s7, 0, 0
+; CHECK-NEXT:    v_mul_f32_e32 v6, 0x2f800000, v5
+; CHECK-NEXT:    v_trunc_f32_e32 v6, v6
+; CHECK-NEXT:    v_mac_f32_e32 v5, 0xcf800000, v6
+; CHECK-NEXT:    v_cvt_u32_f32_e32 v6, v6
+; CHECK-NEXT:    v_cvt_u32_f32_e32 v5, v5
+; CHECK-NEXT:    v_mul_lo_u32 v7, s4, v6
+; CHECK-NEXT:    v_mul_lo_u32 v8, s6, v6
+; CHECK-NEXT:    v_mul_lo_u32 v9, s4, v5
+; CHECK-NEXT:    v_mul_lo_u32 v10, s5, v5
+; CHECK-NEXT:    v_mul_hi_u32 v11, s4, v5
+; CHECK-NEXT:    v_mul_lo_u32 v12, s6, v5
+; CHECK-NEXT:    v_mul_lo_u32 v13, s7, v5
+; CHECK-NEXT:    v_mul_hi_u32 v14, s6, v5
+; CHECK-NEXT:    v_add_i32_e32 v7, vcc, v10, v7
+; CHECK-NEXT:    v_mul_lo_u32 v10, v6, v9
+; CHECK-NEXT:    v_mul_hi_u32 v15, v5, v9
+; CHECK-NEXT:    v_mul_hi_u32 v9, v6, v9
+; CHECK-NEXT:    v_add_i32_e32 v8, vcc, v13, v8
+; CHECK-NEXT:    v_mul_lo_u32 v13, v6, v12
+; CHECK-NEXT:    v_mul_hi_u32 v16, v5, v12
+; CHECK-NEXT:    v_mul_hi_u32 v12, v6, v12
+; CHECK-NEXT:    v_add_i32_e32 v7, vcc, v7, v11
+; CHECK-NEXT:    v_add_i32_e32 v8, vcc, v8, v14
+; CHECK-NEXT:    v_mul_lo_u32 v11, v5, v7
+; CHECK-NEXT:    v_mul_lo_u32 v14, v6, v7
+; CHECK-NEXT:    v_mul_hi_u32 v17, v5, v7
+; CHECK-NEXT:    v_mul_hi_u32 v7, v6, v7
+; CHECK-NEXT:    v_mul_lo_u32 v18, v5, v8
+; CHECK-NEXT:    v_mul_lo_u32 v19, v6, v8
+; CHECK-NEXT:    v_mul_hi_u32 v20, v5, v8
+; CHECK-NEXT:    v_mul_hi_u32 v8, v6, v8
+; CHECK-NEXT:    v_add_i32_e32 v10, vcc, v10, v11
+; CHECK-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v9, vcc, v14, v9
+; CHECK-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v13, vcc, v13, v18
+; CHECK-NEXT:    v_cndmask_b32_e64 v18, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v12, vcc, v19, v12
+; CHECK-NEXT:    v_cndmask_b32_e64 v19, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v10, vcc, v10, v15
+; CHECK-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v9, vcc, v9, v17
+; CHECK-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v13, vcc, v13, v16
+; CHECK-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v12, vcc, v12, v20
+; CHECK-NEXT:    v_cndmask_b32_e64 v16, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v10, vcc, v11, v10
+; CHECK-NEXT:    v_add_i32_e32 v11, vcc, v14, v15
+; CHECK-NEXT:    v_add_i32_e32 v13, vcc, v18, v13
+; CHECK-NEXT:    v_add_i32_e32 v14, vcc, v19, v16
+; CHECK-NEXT:    v_add_i32_e32 v9, vcc, v9, v10
+; CHECK-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v12, vcc, v12, v13
+; CHECK-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v10, vcc, v11, v10
+; CHECK-NEXT:    v_add_i32_e32 v11, vcc, v14, v13
+; CHECK-NEXT:    v_add_i32_e32 v7, vcc, v7, v10
+; CHECK-NEXT:    v_add_i32_e32 v8, vcc, v8, v11
+; CHECK-NEXT:    v_add_i32_e32 v9, vcc, v5, v9
+; CHECK-NEXT:    v_addc_u32_e32 v7, vcc, v6, v7, vcc
+; CHECK-NEXT:    v_mul_lo_u32 v10, s4, v9
+; CHECK-NEXT:    v_mul_lo_u32 v11, s5, v9
+; CHECK-NEXT:    v_mul_hi_u32 v13, s4, v9
+; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v5, v12
+; CHECK-NEXT:    v_addc_u32_e32 v6, vcc, v6, v8, vcc
+; CHECK-NEXT:    v_mul_lo_u32 v8, s6, v5
+; CHECK-NEXT:    v_mul_lo_u32 v12, s7, v5
+; CHECK-NEXT:    v_mul_hi_u32 v14, s6, v5
+; CHECK-NEXT:    v_mul_lo_u32 v15, s4, v7
+; CHECK-NEXT:    v_mul_lo_u32 v16, v7, v10
+; CHECK-NEXT:    v_mul_hi_u32 v17, v9, v10
+; CHECK-NEXT:    v_mul_hi_u32 v10, v7, v10
+; CHECK-NEXT:    v_mul_lo_u32 v18, s6, v6
+; CHECK-NEXT:    v_mul_lo_u32 v19, v6, v8
+; CHECK-NEXT:    v_mul_hi_u32 v20, v5, v8
+; CHECK-NEXT:    v_mul_hi_u32 v8, v6, v8
+; CHECK-NEXT:    v_add_i32_e32 v11, vcc, v11, v15
+; CHECK-NEXT:    v_add_i32_e32 v12, vcc, v12, v18
+; CHECK-NEXT:    v_add_i32_e32 v11, vcc, v11, v13
+; CHECK-NEXT:    v_add_i32_e32 v12, vcc, v12, v14
+; CHECK-NEXT:    v_mul_lo_u32 v13, v9, v11
+; CHECK-NEXT:    v_mul_lo_u32 v14, v7, v11
+; CHECK-NEXT:    v_mul_hi_u32 v15, v9, v11
+; CHECK-NEXT:    v_mul_hi_u32 v11, v7, v11
+; CHECK-NEXT:    v_mul_lo_u32 v18, v5, v12
+; CHECK-NEXT:    v_mul_lo_u32 v21, v6, v12
+; CHECK-NEXT:    v_mul_hi_u32 v22, v5, v12
+; CHECK-NEXT:    v_mul_hi_u32 v12, v6, v12
+; CHECK-NEXT:    v_add_i32_e32 v13, vcc, v16, v13
+; CHECK-NEXT:    v_cndmask_b32_e64 v16, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v10, vcc, v14, v10
+; CHECK-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v18, vcc, v19, v18
+; CHECK-NEXT:    v_cndmask_b32_e64 v19, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v8, vcc, v21, v8
+; CHECK-NEXT:    v_cndmask_b32_e64 v21, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v13, vcc, v13, v17
+; CHECK-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v10, vcc, v10, v15
+; CHECK-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v17, vcc, v18, v20
+; CHECK-NEXT:    v_cndmask_b32_e64 v17, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v8, vcc, v8, v22
+; CHECK-NEXT:    v_cndmask_b32_e64 v18, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v13, vcc, v16, v13
+; CHECK-NEXT:    v_add_i32_e32 v14, vcc, v14, v15
+; CHECK-NEXT:    v_add_i32_e32 v15, vcc, v19, v17
+; CHECK-NEXT:    v_add_i32_e32 v16, vcc, v21, v18
+; CHECK-NEXT:    v_add_i32_e32 v10, vcc, v10, v13
+; CHECK-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v8, vcc, v8, v15
+; CHECK-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v13, vcc, v14, v13
+; CHECK-NEXT:    v_add_i32_e32 v14, vcc, v16, v15
+; CHECK-NEXT:    v_add_i32_e32 v11, vcc, v11, v13
+; CHECK-NEXT:    v_add_i32_e32 v12, vcc, v12, v14
+; CHECK-NEXT:    v_add_i32_e32 v9, vcc, v9, v10
+; CHECK-NEXT:    v_addc_u32_e32 v7, vcc, v7, v11, vcc
+; CHECK-NEXT:    v_mul_lo_u32 v10, v1, v9
+; CHECK-NEXT:    v_mul_hi_u32 v11, v0, v9
+; CHECK-NEXT:    v_mul_hi_u32 v9, v1, v9
+; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v5, v8
+; CHECK-NEXT:    v_addc_u32_e32 v6, vcc, v6, v12, vcc
+; CHECK-NEXT:    v_mul_lo_u32 v8, v3, v5
+; CHECK-NEXT:    v_mul_hi_u32 v12, v2, v5
+; CHECK-NEXT:    v_mul_hi_u32 v5, v3, v5
+; CHECK-NEXT:    v_mul_lo_u32 v13, v0, v7
+; CHECK-NEXT:    v_mul_lo_u32 v14, v1, v7
+; CHECK-NEXT:    v_mul_hi_u32 v15, v0, v7
+; CHECK-NEXT:    v_mul_hi_u32 v7, v1, v7
+; CHECK-NEXT:    v_mul_lo_u32 v16, v2, v6
+; CHECK-NEXT:    v_mul_lo_u32 v17, v3, v6
+; CHECK-NEXT:    v_mul_hi_u32 v18, v2, v6
+; CHECK-NEXT:    v_mul_hi_u32 v6, v3, v6
+; CHECK-NEXT:    v_add_i32_e32 v10, vcc, v10, v13
+; CHECK-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v9, vcc, v14, v9
+; CHECK-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v8, vcc, v8, v16
+; CHECK-NEXT:    v_cndmask_b32_e64 v16, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v17, v5
+; CHECK-NEXT:    v_cndmask_b32_e64 v17, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v10, vcc, v10, v11
+; CHECK-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v9, vcc, v9, v15
+; CHECK-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v8, vcc, v8, v12
+; CHECK-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v5, v18
+; CHECK-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v10, vcc, v13, v10
+; CHECK-NEXT:    v_add_i32_e32 v11, vcc, v14, v11
+; CHECK-NEXT:    v_add_i32_e32 v8, vcc, v16, v8
+; CHECK-NEXT:    v_add_i32_e32 v12, vcc, v17, v12
+; CHECK-NEXT:    v_add_i32_e32 v9, vcc, v9, v10
+; CHECK-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v5, v8
+; CHECK-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
+; CHECK-NEXT:    v_add_i32_e32 v10, vcc, v11, v10
+; CHECK-NEXT:    v_mul_lo_u32 v11, v9, v4
+; CHECK-NEXT:    v_mul_hi_u32 v9, v4, v9
+; CHECK-NEXT:    v_add_i32_e32 v8, vcc, v12, v8
+; CHECK-NEXT:    v_mul_lo_u32 v12, v5, v4
+; CHECK-NEXT:    v_mul_hi_u32 v5, v4, v5
+; CHECK-NEXT:    v_add_i32_e32 v7, vcc, v7, v10
+; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v6, v8
+; CHECK-NEXT:    v_mul_lo_u32 v7, v7, v4
+; CHECK-NEXT:    v_mul_lo_u32 v6, v6, v4
+; CHECK-NEXT:    v_add_i32_e32 v7, vcc, v7, v9
+; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v6, v5
+; CHECK-NEXT:    v_sub_i32_e64 v0, s[4:5], v0, v11
+; CHECK-NEXT:    v_subb_u32_e64 v6, vcc, v1, v7, s[4:5]
+; CHECK-NEXT:    v_sub_i32_e32 v1, vcc, v1, v7
+; CHECK-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v4
+; CHECK-NEXT:    v_cndmask_b32_e64 v7, 0, -1, vcc
+; CHECK-NEXT:    v_sub_i32_e64 v2, s[6:7], v2, v12
+; CHECK-NEXT:    v_subb_u32_e64 v8, vcc, v3, v5, s[6:7]
+; CHECK-NEXT:    v_sub_i32_e32 v3, vcc, v3, v5
+; CHECK-NEXT:    v_cmp_ge_u32_e32 vcc, v2, v4
+; CHECK-NEXT:    v_cndmask_b32_e64 v5, 0, -1, vcc
+; CHECK-NEXT:    v_sub_i32_e32 v9, vcc, v2, v4
+; CHECK-NEXT:    v_cmp_eq_u32_e64 s[8:9], 0, v6
+; CHECK-NEXT:    v_cndmask_b32_e64 v7, -1, v7, s[8:9]
+; CHECK-NEXT:    v_subbrev_u32_e64 v1, s[4:5], 0, v1, s[4:5]
+; CHECK-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v8
+; CHECK-NEXT:    v_cndmask_b32_e64 v5, -1, v5, s[4:5]
+; CHECK-NEXT:    v_subbrev_u32_e64 v3, s[4:5], 0, v3, s[6:7]
+; CHECK-NEXT:    v_cmp_ge_u32_e64 s[4:5], v9, v4
+; CHECK-NEXT:    v_cndmask_b32_e64 v10, 0, -1, s[4:5]
+; CHECK-NEXT:    s_mov_b64 s[4:5], vcc
+; CHECK-NEXT:    v_subrev_i32_e32 v11, vcc, 0x12d8fb, v9
+; CHECK-NEXT:    v_sub_i32_e64 v12, s[6:7], v0, v4
+; CHECK-NEXT:    v_subbrev_u32_e64 v1, s[6:7], 0, v1, s[6:7]
+; CHECK-NEXT:    v_cmp_ge_u32_e64 s[6:7], v12, v4
+; CHECK-NEXT:    v_cndmask_b32_e64 v13, 0, -1, s[6:7]
+; CHECK-NEXT:    v_subbrev_u32_e64 v3, s[4:5], 0, v3, s[4:5]
+; CHECK-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v1
+; CHECK-NEXT:    v_cndmask_b32_e64 v13, -1, v13, s[4:5]
+; CHECK-NEXT:    v_sub_i32_e64 v4, s[4:5], v12, v4
+; CHECK-NEXT:    v_subbrev_u32_e64 v14, s[4:5], 0, v1, s[4:5]
+; CHECK-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v3
+; CHECK-NEXT:    v_cndmask_b32_e64 v10, -1, v10, s[4:5]
+; CHECK-NEXT:    v_subbrev_u32_e32 v15, vcc, 0, v3, vcc
+; CHECK-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v13
+; CHECK-NEXT:    v_cndmask_b32_e32 v4, v12, v4, vcc
+; CHECK-NEXT:    v_cmp_ne_u32_e64 s[4:5], 0, v10
+; CHECK-NEXT:    v_cndmask_b32_e64 v9, v9, v11, s[4:5]
+; CHECK-NEXT:    v_cndmask_b32_e32 v1, v1, v14, vcc
+; CHECK-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v7
+; CHECK-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
+; CHECK-NEXT:    v_cndmask_b32_e64 v3, v3, v15, s[4:5]
+; CHECK-NEXT:    v_cmp_ne_u32_e64 s[4:5], 0, v5
+; CHECK-NEXT:    v_cndmask_b32_e64 v2, v2, v9, s[4:5]
+; CHECK-NEXT:    v_cndmask_b32_e32 v1, v6, v1, vcc
+; CHECK-NEXT:    v_cndmask_b32_e64 v3, v8, v3, s[4:5]
+; CHECK-NEXT:    s_setpc_b64 s[30:31]
   %result = urem <2 x i64> %num, <i64 1235195, i64 1235195>
   ret <2 x i64> %result
 }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.log.ll b/llvm/test/CodeGen/AMDGPU/llvm.log.ll
index a0b2d3b32b7957f..ed2aaaa9ee411a9 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.log.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.log.ll
@@ -6653,28 +6653,50 @@ define <2 x half> @v_log_v2f16(<2 x half> %in) {
 ; VI-GISEL-NEXT:    v_or_b32_e32 v0, v1, v0
 ; VI-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX900-LABEL: v_log_v2f16:
-; GFX900:       ; %bb.0:
-; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT:    v_log_f16_e32 v1, v0
-; GFX900-NEXT:    v_log_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
-; GFX900-NEXT:    v_mul_f16_e32 v1, 0x398c, v1
-; GFX900-NEXT:    v_mul_f16_e32 v0, 0x398c, v0
-; GFX900-NEXT:    v_pack_b32_f16 v0, v1, v0
-; GFX900-NEXT:    s_setpc_b64 s[30:31]
+; GFX900-SDAG-LABEL: v_log_v2f16:
+; GFX900-SDAG:       ; %bb.0:
+; GFX900-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-SDAG-NEXT:    v_log_f16_e32 v1, v0
+; GFX900-SDAG-NEXT:    v_log_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GFX900-SDAG-NEXT:    v_mul_f16_e32 v1, 0x398c, v1
+; GFX900-SDAG-NEXT:    v_mul_f16_e32 v0, 0x398c, v0
+; GFX900-SDAG-NEXT:    v_pack_b32_f16 v0, v1, v0
+; GFX900-SDAG-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX1100-LABEL: v_log_v2f16:
-; GFX1100:       ; %bb.0:
-; GFX1100-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT:    v_lshrrev_b32_e32 v1, 16, v0
-; GFX1100-NEXT:    v_log_f16_e32 v0, v0
-; GFX1100-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_1)
-; GFX1100-NEXT:    v_log_f16_e32 v1, v1
-; GFX1100-NEXT:    s_waitcnt_depctr 0xfff
-; GFX1100-NEXT:    v_mul_f16_e32 v0, 0x398c, v0
-; GFX1100-NEXT:    v_mul_f16_e32 v1, 0x398c, v1
-; GFX1100-NEXT:    v_pack_b32_f16 v0, v0, v1
-; GFX1100-NEXT:    s_setpc_b64 s[30:31]
+; GFX900-GISEL-LABEL: v_log_v2f16:
+; GFX900-GISEL:       ; %bb.0:
+; GFX900-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT:    v_log_f16_e32 v1, v0
+; GFX900-GISEL-NEXT:    v_log_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GFX900-GISEL-NEXT:    v_pack_b32_f16 v0, v1, v0
+; GFX900-GISEL-NEXT:    v_mov_b32_e32 v1, 0x398c398c
+; GFX900-GISEL-NEXT:    v_pk_mul_f16 v0, v0, v1
+; GFX900-GISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1100-SDAG-LABEL: v_log_v2f16:
+; GFX1100-SDAG:       ; %bb.0:
+; GFX1100-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-SDAG-NEXT:    v_lshrrev_b32_e32 v1, 16, v0
+; GFX1100-SDAG-NEXT:    v_log_f16_e32 v0, v0
+; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_1)
+; GFX1100-SDAG-NEXT:    v_log_f16_e32 v1, v1
+; GFX1100-SDAG-NEXT:    s_waitcnt_depctr 0xfff
+; GFX1100-SDAG-NEXT:    v_mul_f16_e32 v0, 0x398c, v0
+; GFX1100-SDAG-NEXT:    v_mul_f16_e32 v1, 0x398c, v1
+; GFX1100-SDAG-NEXT:    v_pack_b32_f16 v0, v0, v1
+; GFX1100-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1100-GISEL-LABEL: v_log_v2f16:
+; GFX1100-GISEL:       ; %bb.0:
+; GFX1100-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-GISEL-NEXT:    v_lshrrev_b32_e32 v1, 16, v0
+; GFX1100-GISEL-NEXT:    v_log_f16_e32 v0, v0
+; GFX1100-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1)
+; GFX1100-GISEL-NEXT:    v_log_f16_e32 v1, v1
+; GFX1100-GISEL-NEXT:    s_waitcnt_depctr 0xfff
+; GFX1100-GISEL-NEXT:    v_pack_b32_f16 v0, v0, v1
+; GFX1100-GISEL-NEXT:    v_pk_mul_f16 v0, 0x398c, v0 op_sel_hi:[0,1]
+; GFX1100-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; R600-LABEL: v_log_v2f16:
 ; R600:       ; %bb.0:
@@ -6764,9 +6786,9 @@ define <2 x half> @v_log_fabs_v2f16(<2 x half> %in) {
 ; GFX900-GISEL-NEXT:    v_and_b32_e32 v0, 0x7fff7fff, v0
 ; GFX900-GISEL-NEXT:    v_log_f16_e32 v1, v0
 ; GFX900-GISEL-NEXT:    v_log_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
-; GFX900-GISEL-NEXT:    v_mul_f16_e32 v1, 0x398c, v1
-; GFX900-GISEL-NEXT:    v_mul_f16_e32 v0, 0x398c, v0
 ; GFX900-GISEL-NEXT:    v_pack_b32_f16 v0, v1, v0
+; GFX900-GISEL-NEXT:    v_mov_b32_e32 v1, 0x398c398c
+; GFX900-GISEL-NEXT:    v_pk_mul_f16 v0, v0, v1
 ; GFX900-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX1100-SDAG-LABEL: v_log_fabs_v2f16:
@@ -6791,10 +6813,9 @@ define <2 x half> @v_log_fabs_v2f16(<2 x half> %in) {
 ; GFX1100-GISEL-NEXT:    v_log_f16_e32 v0, v0
 ; GFX1100-GISEL-NEXT:    v_log_f16_e32 v1, v1
 ; GFX1100-GISEL-NEXT:    s_waitcnt_depctr 0xfff
-; GFX1100-GISEL-NEXT:    v_mul_f16_e32 v0, 0x398c, v0
-; GFX1100-GISEL-NEXT:    v_mul_f16_e32 v1, 0x398c, v1
-; GFX1100-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GFX1100-GISEL-NEXT:    v_pack_b32_f16 v0, v0, v1
+; GFX1100-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1100-GISEL-NEXT:    v_pk_mul_f16 v0, 0x398c, v0 op_sel_hi:[0,1]
 ; GFX1100-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; R600-LABEL: v_log_fabs_v2f16:
@@ -6890,9 +6911,9 @@ define <2 x half> @v_log_fneg_fabs_v2f16(<2 x half> %in) {
 ; GFX900-GISEL-NEXT:    v_or_b32_e32 v0, 0x80008000, v0
 ; GFX900-GISEL-NEXT:    v_log_f16_e32 v1, v0
 ; GFX900-GISEL-NEXT:    v_log_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
-; GFX900-GISEL-NEXT:    v_mul_f16_e32 v1, 0x398c, v1
-; GFX900-GISEL-NEXT:    v_mul_f16_e32 v0, 0x398c, v0
 ; GFX900-GISEL-NEXT:    v_pack_b32_f16 v0, v1, v0
+; GFX900-GISEL-NEXT:    v_mov_b32_e32 v1, 0x398c398c
+; GFX900-GISEL-NEXT:    v_pk_mul_f16 v0, v0, v1
 ; GFX900-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX1100-SDAG-LABEL: v_log_fneg_fabs_v2f16:
@@ -6917,10 +6938,9 @@ define <2 x half> @v_log_fneg_fabs_v2f16(<2 x half> %in) {
 ; GFX1100-GISEL-NEXT:    v_log_f16_e32 v0, v0
 ; GFX1100-GISEL-NEXT:    v_log_f16_e32 v1, v1
 ; GFX1100-GISEL-NEXT:    s_waitcnt_depctr 0xfff
-; GFX1100-GISEL-NEXT:    v_mul_f16_e32 v0, 0x398c, v0
-; GFX1100-GISEL-NEXT:    v_mul_f16_e32 v1, 0x398c, v1
-; GFX1100-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GFX1100-GISEL-NEXT:    v_pack_b32_f16 v0, v0, v1
+; GFX1100-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1100-GISEL-NEXT:    v_pk_mul_f16 v0, 0x398c, v0 op_sel_hi:[0,1]
 ; GFX1100-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; R600-LABEL: v_log_fneg_fabs_v2f16:
@@ -7017,9 +7037,9 @@ define <2 x half> @v_log_fneg_v2f16(<2 x half> %in) {
 ; GFX900-GISEL-NEXT:    v_xor_b32_e32 v0, 0x80008000, v0
 ; GFX900-GISEL-NEXT:    v_log_f16_e32 v1, v0
 ; GFX900-GISEL-NEXT:    v_log_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
-; GFX900-GISEL-NEXT:    v_mul_f16_e32 v1, 0x398c, v1
-; GFX900-GISEL-NEXT:    v_mul_f16_e32 v0, 0x398c, v0
 ; GFX900-GISEL-NEXT:    v_pack_b32_f16 v0, v1, v0
+; GFX900-GISEL-NEXT:    v_mov_b32_e32 v1, 0x398c398c
+; GFX900-GISEL-NEXT:    v_pk_mul_f16 v0, v0, v1
 ; GFX900-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX1100-SDAG-LABEL: v_log_fneg_v2f16:
@@ -7044,10 +7064,9 @@ define <2 x half> @v_log_fneg_v2f16(<2 x half> %in) {
 ; GFX1100-GISEL-NEXT:    v_log_f16_e32 v0, v0
 ; GFX1100-GISEL-NEXT:    v_log_f16_e32 v1, v1
 ; GFX1100-GISEL-NEXT:    s_waitcnt_depctr 0xfff
-; GFX1100-GISEL-NEXT:    v_mul_f16_e32 v0, 0x398c, v0
-; GFX1100-GISEL-NEXT:    v_mul_f16_e32 v1, 0x398c, v1
-; GFX1100-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GFX1100-GISEL-NEXT:    v_pack_b32_f16 v0, v0, v1
+; GFX1100-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1100-GISEL-NEXT:    v_pk_mul_f16 v0, 0x398c, v0 op_sel_hi:[0,1]
 ; GFX1100-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; R600-LABEL: v_log_fneg_v2f16:
@@ -7117,28 +7136,50 @@ define <2 x half> @v_log_v2f16_fast(<2 x half> %in) {
 ; VI-GISEL-NEXT:    v_or_b32_e32 v0, v1, v0
 ; VI-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX900-LABEL: v_log_v2f16_fast:
-; GFX900:       ; %bb.0:
-; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT:    v_log_f16_e32 v1, v0
-; GFX900-NEXT:    v_log_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
-; GFX900-NEXT:    v_mul_f16_e32 v1, 0x398c, v1
-; GFX900-NEXT:    v_mul_f16_e32 v0, 0x398c, v0
-; GFX900-NEXT:    v_pack_b32_f16 v0, v1, v0
-; GFX900-NEXT:    s_setpc_b64 s[30:31]
+; GFX900-SDAG-LABEL: v_log_v2f16_fast:
+; GFX900-SDAG:       ; %bb.0:
+; GFX900-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-SDAG-NEXT:    v_log_f16_e32 v1, v0
+; GFX900-SDAG-NEXT:    v_log_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GFX900-SDAG-NEXT:    v_mul_f16_e32 v1, 0x398c, v1
+; GFX900-SDAG-NEXT:    v_mul_f16_e32 v0, 0x398c, v0
+; GFX900-SDAG-NEXT:    v_pack_b32_f16 v0, v1, v0
+; GFX900-SDAG-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX1100-LABEL: v_log_v2f16_fast:
-; GFX1100:       ; %bb.0:
-; GFX1100-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT:    v_lshrrev_b32_e32 v1, 16, v0
-; GFX1100-NEXT:    v_log_f16_e32 v0, v0
-; GFX1100-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_1)
-; GFX1100-NEXT:    v_log_f16_e32 v1, v1
-; GFX1100-NEXT:    s_waitcnt_depctr 0xfff
-; GFX1100-NEXT:    v_mul_f16_e32 v0, 0x398c, v0
-; GFX1100-NEXT:    v_mul_f16_e32 v1, 0x398c, v1
-; GFX1100-NEXT:    v_pack_b32_f16 v0, v0, v1
-; GFX1100-NEXT:    s_setpc_b64 s[30:31]
+; GFX900-GISEL-LABEL: v_log_v2f16_fast:
+; GFX900-GISEL:       ; %bb.0:
+; GFX900-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT:    v_log_f16_e32 v1, v0
+; GFX900-GISEL-NEXT:    v_log_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GFX900-GISEL-NEXT:    v_pack_b32_f16 v0, v1, v0
+; GFX900-GISEL-NEXT:    v_mov_b32_e32 v1, 0x398c398c
+; GFX900-GISEL-NEXT:    v_pk_mul_f16 v0, v0, v1
+; GFX900-GISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1100-SDAG-LABEL: v_log_v2f16_fast:
+; GFX1100-SDAG:       ; %bb.0:
+; GFX1100-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-SDAG-NEXT:    v_lshrrev_b32_e32 v1, 16, v0
+; GFX1100-SDAG-NEXT:    v_log_f16_e32 v0, v0
+; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_1)
+; GFX1100-SDAG-NEXT:    v_log_f16_e32 v1, v1
+; GFX1100-SDAG-NEXT:    s_waitcnt_depctr 0xfff
+; GFX1100-SDAG-NEXT:    v_mul_f16_e32 v0, 0x398c, v0
+; GFX1100-SDAG-NEXT:    v_mul_f16_e32 v1, 0x398c, v1
+; GFX1100-SDAG-NEXT:    v_pack_b32_f16 v0, v0, v1
+; GFX1100-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1100-GISEL-LABEL: v_log_v2f16_fast:
+; GFX1100-GISEL:       ; %bb.0:
+; GFX1100-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-GISEL-NEXT:    v_lshrrev_b32_e32 v1, 16, v0
+; GFX1100-GISEL-NEXT:    v_log_f16_e32 v0, v0
+; GFX1100-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1)
+; GFX1100-GISEL-NEXT:    v_log_f16_e32 v1, v1
+; GFX1100-GISEL-NEXT:    s_waitcnt_depctr 0xfff
+; GFX1100-GISEL-NEXT:    v_pack_b32_f16 v0, v0, v1
+; GFX1100-GISEL-NEXT:    v_pk_mul_f16 v0, 0x398c, v0 op_sel_hi:[0,1]
+; GFX1100-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; R600-LABEL: v_log_v2f16_fast:
 ; R600:       ; %bb.0:
@@ -7207,33 +7248,59 @@ define <3 x half> @v_log_v3f16(<3 x half> %in) {
 ; VI-NEXT:    v_or_b32_e32 v0, v2, v0
 ; VI-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX900-LABEL: v_log_v3f16:
-; GFX900:       ; %bb.0:
-; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT:    v_log_f16_e32 v2, v0
-; GFX900-NEXT:    v_log_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
-; GFX900-NEXT:    v_log_f16_e32 v1, v1
-; GFX900-NEXT:    v_mul_f16_e32 v2, 0x398c, v2
-; GFX900-NEXT:    v_mul_f16_e32 v0, 0x398c, v0
-; GFX900-NEXT:    v_mul_f16_e32 v1, 0x398c, v1
-; GFX900-NEXT:    v_pack_b32_f16 v0, v2, v0
-; GFX900-NEXT:    s_setpc_b64 s[30:31]
+; GFX900-SDAG-LABEL: v_log_v3f16:
+; GFX900-SDAG:       ; %bb.0:
+; GFX900-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-SDAG-NEXT:    v_log_f16_e32 v2, v0
+; GFX900-SDAG-NEXT:    v_log_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GFX900-SDAG-NEXT:    v_log_f16_e32 v1, v1
+; GFX900-SDAG-NEXT:    v_mul_f16_e32 v2, 0x398c, v2
+; GFX900-SDAG-NEXT:    v_mul_f16_e32 v0, 0x398c, v0
+; GFX900-SDAG-NEXT:    v_mul_f16_e32 v1, 0x398c, v1
+; GFX900-SDAG-NEXT:    v_pack_b32_f16 v0, v2, v0
+; GFX900-SDAG-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX1100-LABEL: v_log_v3f16:
-; GFX1100:       ; %bb.0:
-; GFX1100-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT:    v_lshrrev_b32_e32 v2, 16, v0
-; GFX1100-NEXT:    v_log_f16_e32 v0, v0
-; GFX1100-NEXT:    v_log_f16_e32 v1, v1
-; GFX1100-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(TRANS32_DEP_3)
-; GFX1100-NEXT:    v_log_f16_e32 v2, v2
-; GFX1100-NEXT:    v_mul_f16_e32 v0, 0x398c, v0
-; GFX1100-NEXT:    s_waitcnt_depctr 0xfff
-; GFX1100-NEXT:    v_mul_f16_e32 v1, 0x398c, v1
-; GFX1100-NEXT:    v_mul_f16_e32 v2, 0x398c, v2
-; GFX1100-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX1100-NEXT:    v_pack_b32_f16 v0, v0, v2
-; GFX1100-NEXT:    s_setpc_b64 s[30:31]
+; GFX900-GISEL-LABEL: v_log_v3f16:
+; GFX900-GISEL:       ; %bb.0:
+; GFX900-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT:    v_log_f16_e32 v2, v0
+; GFX900-GISEL-NEXT:    v_log_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GFX900-GISEL-NEXT:    v_log_f16_e32 v1, v1
+; GFX900-GISEL-NEXT:    v_pack_b32_f16 v0, v2, v0
+; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0x398c398c
+; GFX900-GISEL-NEXT:    v_mul_f16_e32 v1, 0x398c, v1
+; GFX900-GISEL-NEXT:    v_pk_mul_f16 v0, v0, v2
+; GFX900-GISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1100-SDAG-LABEL: v_log_v3f16:
+; GFX1100-SDAG:       ; %bb.0:
+; GFX1100-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-SDAG-NEXT:    v_lshrrev_b32_e32 v2, 16, v0
+; GFX1100-SDAG-NEXT:    v_log_f16_e32 v0, v0
+; GFX1100-SDAG-NEXT:    v_log_f16_e32 v1, v1
+; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(TRANS32_DEP_3)
+; GFX1100-SDAG-NEXT:    v_log_f16_e32 v2, v2
+; GFX1100-SDAG-NEXT:    v_mul_f16_e32 v0, 0x398c, v0
+; GFX1100-SDAG-NEXT:    s_waitcnt_depctr 0xfff
+; GFX1100-SDAG-NEXT:    v_mul_f16_e32 v1, 0x398c, v1
+; GFX1100-SDAG-NEXT:    v_mul_f16_e32 v2, 0x398c, v2
+; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1100-SDAG-NEXT:    v_pack_b32_f16 v0, v0, v2
+; GFX1100-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1100-GISEL-LABEL: v_log_v3f16:
+; GFX1100-GISEL:       ; %bb.0:
+; GFX1100-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-GISEL-NEXT:    v_lshrrev_b32_e32 v2, 16, v0
+; GFX1100-GISEL-NEXT:    v_log_f16_e32 v0, v0
+; GFX1100-GISEL-NEXT:    v_log_f16_e32 v1, v1
+; GFX1100-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_1)
+; GFX1100-GISEL-NEXT:    v_log_f16_e32 v2, v2
+; GFX1100-GISEL-NEXT:    s_waitcnt_depctr 0xfff
+; GFX1100-GISEL-NEXT:    v_mul_f16_e32 v1, 0x398c, v1
+; GFX1100-GISEL-NEXT:    v_pack_b32_f16 v0, v0, v2
+; GFX1100-GISEL-NEXT:    v_pk_mul_f16 v0, 0x398c, v0 op_sel_hi:[0,1]
+; GFX1100-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; R600-LABEL: v_log_v3f16:
 ; R600:       ; %bb.0:
@@ -7302,33 +7369,59 @@ define <3 x half> @v_log_v3f16_fast(<3 x half> %in) {
 ; VI-NEXT:    v_or_b32_e32 v0, v2, v0
 ; VI-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX900-LABEL: v_log_v3f16_fast:
-; GFX900:       ; %bb.0:
-; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT:    v_log_f16_e32 v2, v0
-; GFX900-NEXT:    v_log_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
-; GFX900-NEXT:    v_log_f16_e32 v1, v1
-; GFX900-NEXT:    v_mul_f16_e32 v2, 0x398c, v2
-; GFX900-NEXT:    v_mul_f16_e32 v0, 0x398c, v0
-; GFX900-NEXT:    v_mul_f16_e32 v1, 0x398c, v1
-; GFX900-NEXT:    v_pack_b32_f16 v0, v2, v0
-; GFX900-NEXT:    s_setpc_b64 s[30:31]
+; GFX900-SDAG-LABEL: v_log_v3f16_fast:
+; GFX900-SDAG:       ; %bb.0:
+; GFX900-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-SDAG-NEXT:    v_log_f16_e32 v2, v0
+; GFX900-SDAG-NEXT:    v_log_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GFX900-SDAG-NEXT:    v_log_f16_e32 v1, v1
+; GFX900-SDAG-NEXT:    v_mul_f16_e32 v2, 0x398c, v2
+; GFX900-SDAG-NEXT:    v_mul_f16_e32 v0, 0x398c, v0
+; GFX900-SDAG-NEXT:    v_mul_f16_e32 v1, 0x398c, v1
+; GFX900-SDAG-NEXT:    v_pack_b32_f16 v0, v2, v0
+; GFX900-SDAG-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX1100-LABEL: v_log_v3f16_fast:
-; GFX1100:       ; %bb.0:
-; GFX1100-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT:    v_lshrrev_b32_e32 v2, 16, v0
-; GFX1100-NEXT:    v_log_f16_e32 v0, v0
-; GFX1100-NEXT:    v_log_f16_e32 v1, v1
-; GFX1100-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(TRANS32_DEP_3)
-; GFX1100-NEXT:    v_log_f16_e32 v2, v2
-; GFX1100-NEXT:    v_mul_f16_e32 v0, 0x398c, v0
-; GFX1100-NEXT:    s_waitcnt_depctr 0xfff
-; GFX1100-NEXT:    v_mul_f16_e32 v1, 0x398c, v1
-; GFX1100-NEXT:    v_mul_f16_e32 v2, 0x398c, v2
-; GFX1100-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX1100-NEXT:    v_pack_b32_f16 v0, v0, v2
-; GFX1100-NEXT:    s_setpc_b64 s[30:31]
+; GFX900-GISEL-LABEL: v_log_v3f16_fast:
+; GFX900-GISEL:       ; %bb.0:
+; GFX900-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT:    v_log_f16_e32 v2, v0
+; GFX900-GISEL-NEXT:    v_log_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GFX900-GISEL-NEXT:    v_log_f16_e32 v1, v1
+; GFX900-GISEL-NEXT:    v_pack_b32_f16 v0, v2, v0
+; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0x398c398c
+; GFX900-GISEL-NEXT:    v_mul_f16_e32 v1, 0x398c, v1
+; GFX900-GISEL-NEXT:    v_pk_mul_f16 v0, v0, v2
+; GFX900-GISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1100-SDAG-LABEL: v_log_v3f16_fast:
+; GFX1100-SDAG:       ; %bb.0:
+; GFX1100-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-SDAG-NEXT:    v_lshrrev_b32_e32 v2, 16, v0
+; GFX1100-SDAG-NEXT:    v_log_f16_e32 v0, v0
+; GFX1100-SDAG-NEXT:    v_log_f16_e32 v1, v1
+; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(TRANS32_DEP_3)
+; GFX1100-SDAG-NEXT:    v_log_f16_e32 v2, v2
+; GFX1100-SDAG-NEXT:    v_mul_f16_e32 v0, 0x398c, v0
+; GFX1100-SDAG-NEXT:    s_waitcnt_depctr 0xfff
+; GFX1100-SDAG-NEXT:    v_mul_f16_e32 v1, 0x398c, v1
+; GFX1100-SDAG-NEXT:    v_mul_f16_e32 v2, 0x398c, v2
+; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1100-SDAG-NEXT:    v_pack_b32_f16 v0, v0, v2
+; GFX1100-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1100-GISEL-LABEL: v_log_v3f16_fast:
+; GFX1100-GISEL:       ; %bb.0:
+; GFX1100-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-GISEL-NEXT:    v_lshrrev_b32_e32 v2, 16, v0
+; GFX1100-GISEL-NEXT:    v_log_f16_e32 v0, v0
+; GFX1100-GISEL-NEXT:    v_log_f16_e32 v1, v1
+; GFX1100-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_1)
+; GFX1100-GISEL-NEXT:    v_log_f16_e32 v2, v2
+; GFX1100-GISEL-NEXT:    s_waitcnt_depctr 0xfff
+; GFX1100-GISEL-NEXT:    v_mul_f16_e32 v1, 0x398c, v1
+; GFX1100-GISEL-NEXT:    v_pack_b32_f16 v0, v0, v2
+; GFX1100-GISEL-NEXT:    v_pk_mul_f16 v0, 0x398c, v0 op_sel_hi:[0,1]
+; GFX1100-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; R600-LABEL: v_log_v3f16_fast:
 ; R600:       ; %bb.0:
@@ -7448,12 +7541,11 @@ define <4 x half> @v_log_v4f16(<4 x half> %in) {
 ; GFX900-GISEL-NEXT:    v_log_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
 ; GFX900-GISEL-NEXT:    v_log_f16_e32 v3, v1
 ; GFX900-GISEL-NEXT:    v_log_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
-; GFX900-GISEL-NEXT:    v_mul_f16_e32 v2, 0x398c, v2
-; GFX900-GISEL-NEXT:    v_mul_f16_e32 v0, 0x398c, v0
-; GFX900-GISEL-NEXT:    v_mul_f16_e32 v3, 0x398c, v3
-; GFX900-GISEL-NEXT:    v_mul_f16_e32 v1, 0x398c, v1
 ; GFX900-GISEL-NEXT:    v_pack_b32_f16 v0, v2, v0
+; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0x398c398c
 ; GFX900-GISEL-NEXT:    v_pack_b32_f16 v1, v3, v1
+; GFX900-GISEL-NEXT:    v_pk_mul_f16 v0, v0, v2
+; GFX900-GISEL-NEXT:    v_pk_mul_f16 v1, v1, v2
 ; GFX900-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX1100-SDAG-LABEL: v_log_v4f16:
@@ -7487,15 +7579,12 @@ define <4 x half> @v_log_v4f16(<4 x half> %in) {
 ; GFX1100-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX1100-GISEL-NEXT:    v_log_f16_e32 v2, v2
 ; GFX1100-GISEL-NEXT:    v_log_f16_e32 v3, v3
-; GFX1100-GISEL-NEXT:    v_mul_f16_e32 v0, 0x398c, v0
-; GFX1100-GISEL-NEXT:    s_delay_alu instid0(TRANS32_DEP_3) | instskip(SKIP_3) | instid1(VALU_DEP_2)
-; GFX1100-GISEL-NEXT:    v_mul_f16_e32 v1, 0x398c, v1
 ; GFX1100-GISEL-NEXT:    s_waitcnt_depctr 0xfff
-; GFX1100-GISEL-NEXT:    v_mul_f16_e32 v2, 0x398c, v2
-; GFX1100-GISEL-NEXT:    v_mul_f16_e32 v3, 0x398c, v3
 ; GFX1100-GISEL-NEXT:    v_pack_b32_f16 v0, v0, v2
-; GFX1100-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2)
 ; GFX1100-GISEL-NEXT:    v_pack_b32_f16 v1, v1, v3
+; GFX1100-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1100-GISEL-NEXT:    v_pk_mul_f16 v0, 0x398c, v0 op_sel_hi:[0,1]
+; GFX1100-GISEL-NEXT:    v_pk_mul_f16 v1, 0x398c, v1 op_sel_hi:[0,1]
 ; GFX1100-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; R600-LABEL: v_log_v4f16:
@@ -7616,12 +7705,11 @@ define <4 x half> @v_log_v4f16_fast(<4 x half> %in) {
 ; GFX900-GISEL-NEXT:    v_log_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
 ; GFX900-GISEL-NEXT:    v_log_f16_e32 v3, v1
 ; GFX900-GISEL-NEXT:    v_log_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
-; GFX900-GISEL-NEXT:    v_mul_f16_e32 v2, 0x398c, v2
-; GFX900-GISEL-NEXT:    v_mul_f16_e32 v0, 0x398c, v0
-; GFX900-GISEL-NEXT:    v_mul_f16_e32 v3, 0x398c, v3
-; GFX900-GISEL-NEXT:    v_mul_f16_e32 v1, 0x398c, v1
 ; GFX900-GISEL-NEXT:    v_pack_b32_f16 v0, v2, v0
+; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0x398c398c
 ; GFX900-GISEL-NEXT:    v_pack_b32_f16 v1, v3, v1
+; GFX900-GISEL-NEXT:    v_pk_mul_f16 v0, v0, v2
+; GFX900-GISEL-NEXT:    v_pk_mul_f16 v1, v1, v2
 ; GFX900-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX1100-SDAG-LABEL: v_log_v4f16_fast:
@@ -7655,15 +7743,12 @@ define <4 x half> @v_log_v4f16_fast(<4 x half> %in) {
 ; GFX1100-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX1100-GISEL-NEXT:    v_log_f16_e32 v2, v2
 ; GFX1100-GISEL-NEXT:    v_log_f16_e32 v3, v3
-; GFX1100-GISEL-NEXT:    v_mul_f16_e32 v0, 0x398c, v0
-; GFX1100-GISEL-NEXT:    s_delay_alu instid0(TRANS32_DEP_3) | instskip(SKIP_3) | instid1(VALU_DEP_2)
-; GFX1100-GISEL-NEXT:    v_mul_f16_e32 v1, 0x398c, v1
 ; GFX1100-GISEL-NEXT:    s_waitcnt_depctr 0xfff
-; GFX1100-GISEL-NEXT:    v_mul_f16_e32 v2, 0x398c, v2
-; GFX1100-GISEL-NEXT:    v_mul_f16_e32 v3, 0x398c, v3
 ; GFX1100-GISEL-NEXT:    v_pack_b32_f16 v0, v0, v2
-; GFX1100-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2)
 ; GFX1100-GISEL-NEXT:    v_pack_b32_f16 v1, v1, v3
+; GFX1100-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1100-GISEL-NEXT:    v_pk_mul_f16 v0, 0x398c, v0 op_sel_hi:[0,1]
+; GFX1100-GISEL-NEXT:    v_pk_mul_f16 v1, 0x398c, v1 op_sel_hi:[0,1]
 ; GFX1100-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; R600-LABEL: v_log_v4f16_fast:
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.log10.ll b/llvm/test/CodeGen/AMDGPU/llvm.log10.ll
index 5ba72612321a6ad..0d47e557101b0bc 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.log10.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.log10.ll
@@ -6653,28 +6653,50 @@ define <2 x half> @v_log10_v2f16(<2 x half> %in) {
 ; VI-GISEL-NEXT:    v_or_b32_e32 v0, v1, v0
 ; VI-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX900-LABEL: v_log10_v2f16:
-; GFX900:       ; %bb.0:
-; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT:    v_log_f16_e32 v1, v0
-; GFX900-NEXT:    v_log_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
-; GFX900-NEXT:    v_mul_f16_e32 v1, 0x34d1, v1
-; GFX900-NEXT:    v_mul_f16_e32 v0, 0x34d1, v0
-; GFX900-NEXT:    v_pack_b32_f16 v0, v1, v0
-; GFX900-NEXT:    s_setpc_b64 s[30:31]
+; GFX900-SDAG-LABEL: v_log10_v2f16:
+; GFX900-SDAG:       ; %bb.0:
+; GFX900-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-SDAG-NEXT:    v_log_f16_e32 v1, v0
+; GFX900-SDAG-NEXT:    v_log_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GFX900-SDAG-NEXT:    v_mul_f16_e32 v1, 0x34d1, v1
+; GFX900-SDAG-NEXT:    v_mul_f16_e32 v0, 0x34d1, v0
+; GFX900-SDAG-NEXT:    v_pack_b32_f16 v0, v1, v0
+; GFX900-SDAG-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX1100-LABEL: v_log10_v2f16:
-; GFX1100:       ; %bb.0:
-; GFX1100-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT:    v_lshrrev_b32_e32 v1, 16, v0
-; GFX1100-NEXT:    v_log_f16_e32 v0, v0
-; GFX1100-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_1)
-; GFX1100-NEXT:    v_log_f16_e32 v1, v1
-; GFX1100-NEXT:    s_waitcnt_depctr 0xfff
-; GFX1100-NEXT:    v_mul_f16_e32 v0, 0x34d1, v0
-; GFX1100-NEXT:    v_mul_f16_e32 v1, 0x34d1, v1
-; GFX1100-NEXT:    v_pack_b32_f16 v0, v0, v1
-; GFX1100-NEXT:    s_setpc_b64 s[30:31]
+; GFX900-GISEL-LABEL: v_log10_v2f16:
+; GFX900-GISEL:       ; %bb.0:
+; GFX900-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT:    v_log_f16_e32 v1, v0
+; GFX900-GISEL-NEXT:    v_log_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GFX900-GISEL-NEXT:    v_pack_b32_f16 v0, v1, v0
+; GFX900-GISEL-NEXT:    v_mov_b32_e32 v1, 0x34d134d1
+; GFX900-GISEL-NEXT:    v_pk_mul_f16 v0, v0, v1
+; GFX900-GISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1100-SDAG-LABEL: v_log10_v2f16:
+; GFX1100-SDAG:       ; %bb.0:
+; GFX1100-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-SDAG-NEXT:    v_lshrrev_b32_e32 v1, 16, v0
+; GFX1100-SDAG-NEXT:    v_log_f16_e32 v0, v0
+; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_1)
+; GFX1100-SDAG-NEXT:    v_log_f16_e32 v1, v1
+; GFX1100-SDAG-NEXT:    s_waitcnt_depctr 0xfff
+; GFX1100-SDAG-NEXT:    v_mul_f16_e32 v0, 0x34d1, v0
+; GFX1100-SDAG-NEXT:    v_mul_f16_e32 v1, 0x34d1, v1
+; GFX1100-SDAG-NEXT:    v_pack_b32_f16 v0, v0, v1
+; GFX1100-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1100-GISEL-LABEL: v_log10_v2f16:
+; GFX1100-GISEL:       ; %bb.0:
+; GFX1100-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-GISEL-NEXT:    v_lshrrev_b32_e32 v1, 16, v0
+; GFX1100-GISEL-NEXT:    v_log_f16_e32 v0, v0
+; GFX1100-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1)
+; GFX1100-GISEL-NEXT:    v_log_f16_e32 v1, v1
+; GFX1100-GISEL-NEXT:    s_waitcnt_depctr 0xfff
+; GFX1100-GISEL-NEXT:    v_pack_b32_f16 v0, v0, v1
+; GFX1100-GISEL-NEXT:    v_pk_mul_f16 v0, 0x34d1, v0 op_sel_hi:[0,1]
+; GFX1100-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; R600-LABEL: v_log10_v2f16:
 ; R600:       ; %bb.0:
@@ -6764,9 +6786,9 @@ define <2 x half> @v_log10_fabs_v2f16(<2 x half> %in) {
 ; GFX900-GISEL-NEXT:    v_and_b32_e32 v0, 0x7fff7fff, v0
 ; GFX900-GISEL-NEXT:    v_log_f16_e32 v1, v0
 ; GFX900-GISEL-NEXT:    v_log_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
-; GFX900-GISEL-NEXT:    v_mul_f16_e32 v1, 0x34d1, v1
-; GFX900-GISEL-NEXT:    v_mul_f16_e32 v0, 0x34d1, v0
 ; GFX900-GISEL-NEXT:    v_pack_b32_f16 v0, v1, v0
+; GFX900-GISEL-NEXT:    v_mov_b32_e32 v1, 0x34d134d1
+; GFX900-GISEL-NEXT:    v_pk_mul_f16 v0, v0, v1
 ; GFX900-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX1100-SDAG-LABEL: v_log10_fabs_v2f16:
@@ -6791,10 +6813,9 @@ define <2 x half> @v_log10_fabs_v2f16(<2 x half> %in) {
 ; GFX1100-GISEL-NEXT:    v_log_f16_e32 v0, v0
 ; GFX1100-GISEL-NEXT:    v_log_f16_e32 v1, v1
 ; GFX1100-GISEL-NEXT:    s_waitcnt_depctr 0xfff
-; GFX1100-GISEL-NEXT:    v_mul_f16_e32 v0, 0x34d1, v0
-; GFX1100-GISEL-NEXT:    v_mul_f16_e32 v1, 0x34d1, v1
-; GFX1100-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GFX1100-GISEL-NEXT:    v_pack_b32_f16 v0, v0, v1
+; GFX1100-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1100-GISEL-NEXT:    v_pk_mul_f16 v0, 0x34d1, v0 op_sel_hi:[0,1]
 ; GFX1100-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; R600-LABEL: v_log10_fabs_v2f16:
@@ -6890,9 +6911,9 @@ define <2 x half> @v_log10_fneg_fabs_v2f16(<2 x half> %in) {
 ; GFX900-GISEL-NEXT:    v_or_b32_e32 v0, 0x80008000, v0
 ; GFX900-GISEL-NEXT:    v_log_f16_e32 v1, v0
 ; GFX900-GISEL-NEXT:    v_log_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
-; GFX900-GISEL-NEXT:    v_mul_f16_e32 v1, 0x34d1, v1
-; GFX900-GISEL-NEXT:    v_mul_f16_e32 v0, 0x34d1, v0
 ; GFX900-GISEL-NEXT:    v_pack_b32_f16 v0, v1, v0
+; GFX900-GISEL-NEXT:    v_mov_b32_e32 v1, 0x34d134d1
+; GFX900-GISEL-NEXT:    v_pk_mul_f16 v0, v0, v1
 ; GFX900-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX1100-SDAG-LABEL: v_log10_fneg_fabs_v2f16:
@@ -6917,10 +6938,9 @@ define <2 x half> @v_log10_fneg_fabs_v2f16(<2 x half> %in) {
 ; GFX1100-GISEL-NEXT:    v_log_f16_e32 v0, v0
 ; GFX1100-GISEL-NEXT:    v_log_f16_e32 v1, v1
 ; GFX1100-GISEL-NEXT:    s_waitcnt_depctr 0xfff
-; GFX1100-GISEL-NEXT:    v_mul_f16_e32 v0, 0x34d1, v0
-; GFX1100-GISEL-NEXT:    v_mul_f16_e32 v1, 0x34d1, v1
-; GFX1100-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GFX1100-GISEL-NEXT:    v_pack_b32_f16 v0, v0, v1
+; GFX1100-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1100-GISEL-NEXT:    v_pk_mul_f16 v0, 0x34d1, v0 op_sel_hi:[0,1]
 ; GFX1100-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; R600-LABEL: v_log10_fneg_fabs_v2f16:
@@ -7017,9 +7037,9 @@ define <2 x half> @v_log10_fneg_v2f16(<2 x half> %in) {
 ; GFX900-GISEL-NEXT:    v_xor_b32_e32 v0, 0x80008000, v0
 ; GFX900-GISEL-NEXT:    v_log_f16_e32 v1, v0
 ; GFX900-GISEL-NEXT:    v_log_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
-; GFX900-GISEL-NEXT:    v_mul_f16_e32 v1, 0x34d1, v1
-; GFX900-GISEL-NEXT:    v_mul_f16_e32 v0, 0x34d1, v0
 ; GFX900-GISEL-NEXT:    v_pack_b32_f16 v0, v1, v0
+; GFX900-GISEL-NEXT:    v_mov_b32_e32 v1, 0x34d134d1
+; GFX900-GISEL-NEXT:    v_pk_mul_f16 v0, v0, v1
 ; GFX900-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX1100-SDAG-LABEL: v_log10_fneg_v2f16:
@@ -7044,10 +7064,9 @@ define <2 x half> @v_log10_fneg_v2f16(<2 x half> %in) {
 ; GFX1100-GISEL-NEXT:    v_log_f16_e32 v0, v0
 ; GFX1100-GISEL-NEXT:    v_log_f16_e32 v1, v1
 ; GFX1100-GISEL-NEXT:    s_waitcnt_depctr 0xfff
-; GFX1100-GISEL-NEXT:    v_mul_f16_e32 v0, 0x34d1, v0
-; GFX1100-GISEL-NEXT:    v_mul_f16_e32 v1, 0x34d1, v1
-; GFX1100-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GFX1100-GISEL-NEXT:    v_pack_b32_f16 v0, v0, v1
+; GFX1100-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1100-GISEL-NEXT:    v_pk_mul_f16 v0, 0x34d1, v0 op_sel_hi:[0,1]
 ; GFX1100-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; R600-LABEL: v_log10_fneg_v2f16:
@@ -7117,28 +7136,50 @@ define <2 x half> @v_log10_v2f16_fast(<2 x half> %in) {
 ; VI-GISEL-NEXT:    v_or_b32_e32 v0, v1, v0
 ; VI-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX900-LABEL: v_log10_v2f16_fast:
-; GFX900:       ; %bb.0:
-; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT:    v_log_f16_e32 v1, v0
-; GFX900-NEXT:    v_log_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
-; GFX900-NEXT:    v_mul_f16_e32 v1, 0x34d1, v1
-; GFX900-NEXT:    v_mul_f16_e32 v0, 0x34d1, v0
-; GFX900-NEXT:    v_pack_b32_f16 v0, v1, v0
-; GFX900-NEXT:    s_setpc_b64 s[30:31]
+; GFX900-SDAG-LABEL: v_log10_v2f16_fast:
+; GFX900-SDAG:       ; %bb.0:
+; GFX900-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-SDAG-NEXT:    v_log_f16_e32 v1, v0
+; GFX900-SDAG-NEXT:    v_log_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GFX900-SDAG-NEXT:    v_mul_f16_e32 v1, 0x34d1, v1
+; GFX900-SDAG-NEXT:    v_mul_f16_e32 v0, 0x34d1, v0
+; GFX900-SDAG-NEXT:    v_pack_b32_f16 v0, v1, v0
+; GFX900-SDAG-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX1100-LABEL: v_log10_v2f16_fast:
-; GFX1100:       ; %bb.0:
-; GFX1100-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT:    v_lshrrev_b32_e32 v1, 16, v0
-; GFX1100-NEXT:    v_log_f16_e32 v0, v0
-; GFX1100-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_1)
-; GFX1100-NEXT:    v_log_f16_e32 v1, v1
-; GFX1100-NEXT:    s_waitcnt_depctr 0xfff
-; GFX1100-NEXT:    v_mul_f16_e32 v0, 0x34d1, v0
-; GFX1100-NEXT:    v_mul_f16_e32 v1, 0x34d1, v1
-; GFX1100-NEXT:    v_pack_b32_f16 v0, v0, v1
-; GFX1100-NEXT:    s_setpc_b64 s[30:31]
+; GFX900-GISEL-LABEL: v_log10_v2f16_fast:
+; GFX900-GISEL:       ; %bb.0:
+; GFX900-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT:    v_log_f16_e32 v1, v0
+; GFX900-GISEL-NEXT:    v_log_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GFX900-GISEL-NEXT:    v_pack_b32_f16 v0, v1, v0
+; GFX900-GISEL-NEXT:    v_mov_b32_e32 v1, 0x34d134d1
+; GFX900-GISEL-NEXT:    v_pk_mul_f16 v0, v0, v1
+; GFX900-GISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1100-SDAG-LABEL: v_log10_v2f16_fast:
+; GFX1100-SDAG:       ; %bb.0:
+; GFX1100-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-SDAG-NEXT:    v_lshrrev_b32_e32 v1, 16, v0
+; GFX1100-SDAG-NEXT:    v_log_f16_e32 v0, v0
+; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_1)
+; GFX1100-SDAG-NEXT:    v_log_f16_e32 v1, v1
+; GFX1100-SDAG-NEXT:    s_waitcnt_depctr 0xfff
+; GFX1100-SDAG-NEXT:    v_mul_f16_e32 v0, 0x34d1, v0
+; GFX1100-SDAG-NEXT:    v_mul_f16_e32 v1, 0x34d1, v1
+; GFX1100-SDAG-NEXT:    v_pack_b32_f16 v0, v0, v1
+; GFX1100-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1100-GISEL-LABEL: v_log10_v2f16_fast:
+; GFX1100-GISEL:       ; %bb.0:
+; GFX1100-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-GISEL-NEXT:    v_lshrrev_b32_e32 v1, 16, v0
+; GFX1100-GISEL-NEXT:    v_log_f16_e32 v0, v0
+; GFX1100-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1)
+; GFX1100-GISEL-NEXT:    v_log_f16_e32 v1, v1
+; GFX1100-GISEL-NEXT:    s_waitcnt_depctr 0xfff
+; GFX1100-GISEL-NEXT:    v_pack_b32_f16 v0, v0, v1
+; GFX1100-GISEL-NEXT:    v_pk_mul_f16 v0, 0x34d1, v0 op_sel_hi:[0,1]
+; GFX1100-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; R600-LABEL: v_log10_v2f16_fast:
 ; R600:       ; %bb.0:
@@ -7207,33 +7248,59 @@ define <3 x half> @v_log10_v3f16(<3 x half> %in) {
 ; VI-NEXT:    v_or_b32_e32 v0, v2, v0
 ; VI-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX900-LABEL: v_log10_v3f16:
-; GFX900:       ; %bb.0:
-; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT:    v_log_f16_e32 v2, v0
-; GFX900-NEXT:    v_log_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
-; GFX900-NEXT:    v_log_f16_e32 v1, v1
-; GFX900-NEXT:    v_mul_f16_e32 v2, 0x34d1, v2
-; GFX900-NEXT:    v_mul_f16_e32 v0, 0x34d1, v0
-; GFX900-NEXT:    v_mul_f16_e32 v1, 0x34d1, v1
-; GFX900-NEXT:    v_pack_b32_f16 v0, v2, v0
-; GFX900-NEXT:    s_setpc_b64 s[30:31]
+; GFX900-SDAG-LABEL: v_log10_v3f16:
+; GFX900-SDAG:       ; %bb.0:
+; GFX900-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-SDAG-NEXT:    v_log_f16_e32 v2, v0
+; GFX900-SDAG-NEXT:    v_log_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GFX900-SDAG-NEXT:    v_log_f16_e32 v1, v1
+; GFX900-SDAG-NEXT:    v_mul_f16_e32 v2, 0x34d1, v2
+; GFX900-SDAG-NEXT:    v_mul_f16_e32 v0, 0x34d1, v0
+; GFX900-SDAG-NEXT:    v_mul_f16_e32 v1, 0x34d1, v1
+; GFX900-SDAG-NEXT:    v_pack_b32_f16 v0, v2, v0
+; GFX900-SDAG-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX1100-LABEL: v_log10_v3f16:
-; GFX1100:       ; %bb.0:
-; GFX1100-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT:    v_lshrrev_b32_e32 v2, 16, v0
-; GFX1100-NEXT:    v_log_f16_e32 v0, v0
-; GFX1100-NEXT:    v_log_f16_e32 v1, v1
-; GFX1100-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(TRANS32_DEP_3)
-; GFX1100-NEXT:    v_log_f16_e32 v2, v2
-; GFX1100-NEXT:    v_mul_f16_e32 v0, 0x34d1, v0
-; GFX1100-NEXT:    s_waitcnt_depctr 0xfff
-; GFX1100-NEXT:    v_mul_f16_e32 v1, 0x34d1, v1
-; GFX1100-NEXT:    v_mul_f16_e32 v2, 0x34d1, v2
-; GFX1100-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX1100-NEXT:    v_pack_b32_f16 v0, v0, v2
-; GFX1100-NEXT:    s_setpc_b64 s[30:31]
+; GFX900-GISEL-LABEL: v_log10_v3f16:
+; GFX900-GISEL:       ; %bb.0:
+; GFX900-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT:    v_log_f16_e32 v2, v0
+; GFX900-GISEL-NEXT:    v_log_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GFX900-GISEL-NEXT:    v_log_f16_e32 v1, v1
+; GFX900-GISEL-NEXT:    v_pack_b32_f16 v0, v2, v0
+; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0x34d134d1
+; GFX900-GISEL-NEXT:    v_mul_f16_e32 v1, 0x34d1, v1
+; GFX900-GISEL-NEXT:    v_pk_mul_f16 v0, v0, v2
+; GFX900-GISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1100-SDAG-LABEL: v_log10_v3f16:
+; GFX1100-SDAG:       ; %bb.0:
+; GFX1100-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-SDAG-NEXT:    v_lshrrev_b32_e32 v2, 16, v0
+; GFX1100-SDAG-NEXT:    v_log_f16_e32 v0, v0
+; GFX1100-SDAG-NEXT:    v_log_f16_e32 v1, v1
+; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(TRANS32_DEP_3)
+; GFX1100-SDAG-NEXT:    v_log_f16_e32 v2, v2
+; GFX1100-SDAG-NEXT:    v_mul_f16_e32 v0, 0x34d1, v0
+; GFX1100-SDAG-NEXT:    s_waitcnt_depctr 0xfff
+; GFX1100-SDAG-NEXT:    v_mul_f16_e32 v1, 0x34d1, v1
+; GFX1100-SDAG-NEXT:    v_mul_f16_e32 v2, 0x34d1, v2
+; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1100-SDAG-NEXT:    v_pack_b32_f16 v0, v0, v2
+; GFX1100-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1100-GISEL-LABEL: v_log10_v3f16:
+; GFX1100-GISEL:       ; %bb.0:
+; GFX1100-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-GISEL-NEXT:    v_lshrrev_b32_e32 v2, 16, v0
+; GFX1100-GISEL-NEXT:    v_log_f16_e32 v0, v0
+; GFX1100-GISEL-NEXT:    v_log_f16_e32 v1, v1
+; GFX1100-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_1)
+; GFX1100-GISEL-NEXT:    v_log_f16_e32 v2, v2
+; GFX1100-GISEL-NEXT:    s_waitcnt_depctr 0xfff
+; GFX1100-GISEL-NEXT:    v_mul_f16_e32 v1, 0x34d1, v1
+; GFX1100-GISEL-NEXT:    v_pack_b32_f16 v0, v0, v2
+; GFX1100-GISEL-NEXT:    v_pk_mul_f16 v0, 0x34d1, v0 op_sel_hi:[0,1]
+; GFX1100-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; R600-LABEL: v_log10_v3f16:
 ; R600:       ; %bb.0:
@@ -7302,33 +7369,59 @@ define <3 x half> @v_log10_v3f16_fast(<3 x half> %in) {
 ; VI-NEXT:    v_or_b32_e32 v0, v2, v0
 ; VI-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX900-LABEL: v_log10_v3f16_fast:
-; GFX900:       ; %bb.0:
-; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT:    v_log_f16_e32 v2, v0
-; GFX900-NEXT:    v_log_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
-; GFX900-NEXT:    v_log_f16_e32 v1, v1
-; GFX900-NEXT:    v_mul_f16_e32 v2, 0x34d1, v2
-; GFX900-NEXT:    v_mul_f16_e32 v0, 0x34d1, v0
-; GFX900-NEXT:    v_mul_f16_e32 v1, 0x34d1, v1
-; GFX900-NEXT:    v_pack_b32_f16 v0, v2, v0
-; GFX900-NEXT:    s_setpc_b64 s[30:31]
+; GFX900-SDAG-LABEL: v_log10_v3f16_fast:
+; GFX900-SDAG:       ; %bb.0:
+; GFX900-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-SDAG-NEXT:    v_log_f16_e32 v2, v0
+; GFX900-SDAG-NEXT:    v_log_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GFX900-SDAG-NEXT:    v_log_f16_e32 v1, v1
+; GFX900-SDAG-NEXT:    v_mul_f16_e32 v2, 0x34d1, v2
+; GFX900-SDAG-NEXT:    v_mul_f16_e32 v0, 0x34d1, v0
+; GFX900-SDAG-NEXT:    v_mul_f16_e32 v1, 0x34d1, v1
+; GFX900-SDAG-NEXT:    v_pack_b32_f16 v0, v2, v0
+; GFX900-SDAG-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX1100-LABEL: v_log10_v3f16_fast:
-; GFX1100:       ; %bb.0:
-; GFX1100-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT:    v_lshrrev_b32_e32 v2, 16, v0
-; GFX1100-NEXT:    v_log_f16_e32 v0, v0
-; GFX1100-NEXT:    v_log_f16_e32 v1, v1
-; GFX1100-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(TRANS32_DEP_3)
-; GFX1100-NEXT:    v_log_f16_e32 v2, v2
-; GFX1100-NEXT:    v_mul_f16_e32 v0, 0x34d1, v0
-; GFX1100-NEXT:    s_waitcnt_depctr 0xfff
-; GFX1100-NEXT:    v_mul_f16_e32 v1, 0x34d1, v1
-; GFX1100-NEXT:    v_mul_f16_e32 v2, 0x34d1, v2
-; GFX1100-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX1100-NEXT:    v_pack_b32_f16 v0, v0, v2
-; GFX1100-NEXT:    s_setpc_b64 s[30:31]
+; GFX900-GISEL-LABEL: v_log10_v3f16_fast:
+; GFX900-GISEL:       ; %bb.0:
+; GFX900-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT:    v_log_f16_e32 v2, v0
+; GFX900-GISEL-NEXT:    v_log_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GFX900-GISEL-NEXT:    v_log_f16_e32 v1, v1
+; GFX900-GISEL-NEXT:    v_pack_b32_f16 v0, v2, v0
+; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0x34d134d1
+; GFX900-GISEL-NEXT:    v_mul_f16_e32 v1, 0x34d1, v1
+; GFX900-GISEL-NEXT:    v_pk_mul_f16 v0, v0, v2
+; GFX900-GISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1100-SDAG-LABEL: v_log10_v3f16_fast:
+; GFX1100-SDAG:       ; %bb.0:
+; GFX1100-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-SDAG-NEXT:    v_lshrrev_b32_e32 v2, 16, v0
+; GFX1100-SDAG-NEXT:    v_log_f16_e32 v0, v0
+; GFX1100-SDAG-NEXT:    v_log_f16_e32 v1, v1
+; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(TRANS32_DEP_3)
+; GFX1100-SDAG-NEXT:    v_log_f16_e32 v2, v2
+; GFX1100-SDAG-NEXT:    v_mul_f16_e32 v0, 0x34d1, v0
+; GFX1100-SDAG-NEXT:    s_waitcnt_depctr 0xfff
+; GFX1100-SDAG-NEXT:    v_mul_f16_e32 v1, 0x34d1, v1
+; GFX1100-SDAG-NEXT:    v_mul_f16_e32 v2, 0x34d1, v2
+; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1100-SDAG-NEXT:    v_pack_b32_f16 v0, v0, v2
+; GFX1100-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1100-GISEL-LABEL: v_log10_v3f16_fast:
+; GFX1100-GISEL:       ; %bb.0:
+; GFX1100-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-GISEL-NEXT:    v_lshrrev_b32_e32 v2, 16, v0
+; GFX1100-GISEL-NEXT:    v_log_f16_e32 v0, v0
+; GFX1100-GISEL-NEXT:    v_log_f16_e32 v1, v1
+; GFX1100-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_1)
+; GFX1100-GISEL-NEXT:    v_log_f16_e32 v2, v2
+; GFX1100-GISEL-NEXT:    s_waitcnt_depctr 0xfff
+; GFX1100-GISEL-NEXT:    v_mul_f16_e32 v1, 0x34d1, v1
+; GFX1100-GISEL-NEXT:    v_pack_b32_f16 v0, v0, v2
+; GFX1100-GISEL-NEXT:    v_pk_mul_f16 v0, 0x34d1, v0 op_sel_hi:[0,1]
+; GFX1100-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; R600-LABEL: v_log10_v3f16_fast:
 ; R600:       ; %bb.0:
@@ -7448,12 +7541,11 @@ define <4 x half> @v_log10_v4f16(<4 x half> %in) {
 ; GFX900-GISEL-NEXT:    v_log_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
 ; GFX900-GISEL-NEXT:    v_log_f16_e32 v3, v1
 ; GFX900-GISEL-NEXT:    v_log_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
-; GFX900-GISEL-NEXT:    v_mul_f16_e32 v2, 0x34d1, v2
-; GFX900-GISEL-NEXT:    v_mul_f16_e32 v0, 0x34d1, v0
-; GFX900-GISEL-NEXT:    v_mul_f16_e32 v3, 0x34d1, v3
-; GFX900-GISEL-NEXT:    v_mul_f16_e32 v1, 0x34d1, v1
 ; GFX900-GISEL-NEXT:    v_pack_b32_f16 v0, v2, v0
+; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0x34d134d1
 ; GFX900-GISEL-NEXT:    v_pack_b32_f16 v1, v3, v1
+; GFX900-GISEL-NEXT:    v_pk_mul_f16 v0, v0, v2
+; GFX900-GISEL-NEXT:    v_pk_mul_f16 v1, v1, v2
 ; GFX900-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX1100-SDAG-LABEL: v_log10_v4f16:
@@ -7487,15 +7579,12 @@ define <4 x half> @v_log10_v4f16(<4 x half> %in) {
 ; GFX1100-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX1100-GISEL-NEXT:    v_log_f16_e32 v2, v2
 ; GFX1100-GISEL-NEXT:    v_log_f16_e32 v3, v3
-; GFX1100-GISEL-NEXT:    v_mul_f16_e32 v0, 0x34d1, v0
-; GFX1100-GISEL-NEXT:    s_delay_alu instid0(TRANS32_DEP_3) | instskip(SKIP_3) | instid1(VALU_DEP_2)
-; GFX1100-GISEL-NEXT:    v_mul_f16_e32 v1, 0x34d1, v1
 ; GFX1100-GISEL-NEXT:    s_waitcnt_depctr 0xfff
-; GFX1100-GISEL-NEXT:    v_mul_f16_e32 v2, 0x34d1, v2
-; GFX1100-GISEL-NEXT:    v_mul_f16_e32 v3, 0x34d1, v3
 ; GFX1100-GISEL-NEXT:    v_pack_b32_f16 v0, v0, v2
-; GFX1100-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2)
 ; GFX1100-GISEL-NEXT:    v_pack_b32_f16 v1, v1, v3
+; GFX1100-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1100-GISEL-NEXT:    v_pk_mul_f16 v0, 0x34d1, v0 op_sel_hi:[0,1]
+; GFX1100-GISEL-NEXT:    v_pk_mul_f16 v1, 0x34d1, v1 op_sel_hi:[0,1]
 ; GFX1100-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; R600-LABEL: v_log10_v4f16:
@@ -7616,12 +7705,11 @@ define <4 x half> @v_log10_v4f16_fast(<4 x half> %in) {
 ; GFX900-GISEL-NEXT:    v_log_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
 ; GFX900-GISEL-NEXT:    v_log_f16_e32 v3, v1
 ; GFX900-GISEL-NEXT:    v_log_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
-; GFX900-GISEL-NEXT:    v_mul_f16_e32 v2, 0x34d1, v2
-; GFX900-GISEL-NEXT:    v_mul_f16_e32 v0, 0x34d1, v0
-; GFX900-GISEL-NEXT:    v_mul_f16_e32 v3, 0x34d1, v3
-; GFX900-GISEL-NEXT:    v_mul_f16_e32 v1, 0x34d1, v1
 ; GFX900-GISEL-NEXT:    v_pack_b32_f16 v0, v2, v0
+; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0x34d134d1
 ; GFX900-GISEL-NEXT:    v_pack_b32_f16 v1, v3, v1
+; GFX900-GISEL-NEXT:    v_pk_mul_f16 v0, v0, v2
+; GFX900-GISEL-NEXT:    v_pk_mul_f16 v1, v1, v2
 ; GFX900-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX1100-SDAG-LABEL: v_log10_v4f16_fast:
@@ -7655,15 +7743,12 @@ define <4 x half> @v_log10_v4f16_fast(<4 x half> %in) {
 ; GFX1100-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX1100-GISEL-NEXT:    v_log_f16_e32 v2, v2
 ; GFX1100-GISEL-NEXT:    v_log_f16_e32 v3, v3
-; GFX1100-GISEL-NEXT:    v_mul_f16_e32 v0, 0x34d1, v0
-; GFX1100-GISEL-NEXT:    s_delay_alu instid0(TRANS32_DEP_3) | instskip(SKIP_3) | instid1(VALU_DEP_2)
-; GFX1100-GISEL-NEXT:    v_mul_f16_e32 v1, 0x34d1, v1
 ; GFX1100-GISEL-NEXT:    s_waitcnt_depctr 0xfff
-; GFX1100-GISEL-NEXT:    v_mul_f16_e32 v2, 0x34d1, v2
-; GFX1100-GISEL-NEXT:    v_mul_f16_e32 v3, 0x34d1, v3
 ; GFX1100-GISEL-NEXT:    v_pack_b32_f16 v0, v0, v2
-; GFX1100-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2)
 ; GFX1100-GISEL-NEXT:    v_pack_b32_f16 v1, v1, v3
+; GFX1100-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1100-GISEL-NEXT:    v_pk_mul_f16 v0, 0x34d1, v0 op_sel_hi:[0,1]
+; GFX1100-GISEL-NEXT:    v_pk_mul_f16 v1, 0x34d1, v1 op_sel_hi:[0,1]
 ; GFX1100-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; R600-LABEL: v_log10_v4f16_fast:
diff --git a/llvm/test/CodeGen/AMDGPU/v_pack.ll b/llvm/test/CodeGen/AMDGPU/v_pack.ll
index a8cbb0000ce722b..bd68baae12cf81b 100644
--- a/llvm/test/CodeGen/AMDGPU/v_pack.ll
+++ b/llvm/test/CodeGen/AMDGPU/v_pack.ll
@@ -31,9 +31,9 @@ define amdgpu_kernel void @v_pack_b32_v2f16(ptr addrspace(1) %in0, ptr addrspace
 ; GISEL-NEXT:    s_waitcnt vmcnt(0)
 ; GISEL-NEXT:    global_load_ushort v2, v0, s[2:3] glc dlc
 ; GISEL-NEXT:    s_waitcnt vmcnt(0)
-; GISEL-NEXT:    v_add_f16_e32 v0, 2.0, v1
-; GISEL-NEXT:    v_add_f16_e32 v1, 2.0, v2
-; GISEL-NEXT:    v_pack_b32_f16 v0, v0, v1
+; GISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v1
+; GISEL-NEXT:    v_lshl_or_b32 v0, v2, 16, v0
+; GISEL-NEXT:    v_pk_add_f16 v0, v0, 2.0 op_sel_hi:[1,0]
 ; GISEL-NEXT:    ;;#ASMSTART
 ; GISEL-NEXT:    ; use v0
 ; GISEL-NEXT:    ;;#ASMEND
diff --git a/llvm/test/CodeGen/AMDGPU/v_sat_pk_u8_i16.ll b/llvm/test/CodeGen/AMDGPU/v_sat_pk_u8_i16.ll
index e46992ccbbc57d4..88b277365b74ade 100644
--- a/llvm/test/CodeGen/AMDGPU/v_sat_pk_u8_i16.ll
+++ b/llvm/test/CodeGen/AMDGPU/v_sat_pk_u8_i16.ll
@@ -50,8 +50,8 @@ define <2 x i16> @basic_smax_smin(i16 %src0, i16 %src1) {
 ; GISEL-VI:       ; %bb.0:
 ; GISEL-VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GISEL-VI-NEXT:    v_max_i16_e32 v0, 0, v0
-; GISEL-VI-NEXT:    v_mov_b32_e32 v2, 0xff
 ; GISEL-VI-NEXT:    v_max_i16_e32 v1, 0, v1
+; GISEL-VI-NEXT:    v_mov_b32_e32 v2, 0xff
 ; GISEL-VI-NEXT:    v_min_i16_e32 v0, 0xff, v0
 ; GISEL-VI-NEXT:    v_min_i16_sdwa v1, v1, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
 ; GISEL-VI-NEXT:    v_or_b32_e32 v0, v0, v1
@@ -60,21 +60,22 @@ define <2 x i16> @basic_smax_smin(i16 %src0, i16 %src1) {
 ; GISEL-GFX9-LABEL: basic_smax_smin:
 ; GISEL-GFX9:       ; %bb.0:
 ; GISEL-GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GISEL-GFX9-NEXT:    v_mov_b32_e32 v2, 0xff
-; GISEL-GFX9-NEXT:    v_med3_i16 v0, v0, 0, v2
-; GISEL-GFX9-NEXT:    v_med3_i16 v1, v1, 0, v2
 ; GISEL-GFX9-NEXT:    v_and_b32_e32 v0, 0xffff, v0
 ; GISEL-GFX9-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
+; GISEL-GFX9-NEXT:    v_pk_max_i16 v0, v0, 0
+; GISEL-GFX9-NEXT:    v_mov_b32_e32 v1, 0xff00ff
+; GISEL-GFX9-NEXT:    v_pk_min_i16 v0, v0, v1
 ; GISEL-GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GISEL-GFX11-LABEL: basic_smax_smin:
 ; GISEL-GFX11:       ; %bb.0:
 ; GISEL-GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GISEL-GFX11-NEXT:    v_med3_i16 v0, v0, 0, 0xff
-; GISEL-GFX11-NEXT:    v_med3_i16 v1, v1, 0, 0xff
-; GISEL-GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GISEL-GFX11-NEXT:    v_and_b32_e32 v0, 0xffff, v0
+; GISEL-GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GISEL-GFX11-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
+; GISEL-GFX11-NEXT:    v_pk_max_i16 v0, v0, 0
+; GISEL-GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GISEL-GFX11-NEXT:    v_pk_min_i16 v0, 0xff, v0 op_sel_hi:[0,1]
 ; GISEL-GFX11-NEXT:    s_setpc_b64 s[30:31]
   %src0.max = call i16 @llvm.smax.i16(i16 %src0, i16 0)
   %src0.clamp = call i16 @llvm.smin.i16(i16 %src0.max, i16 255)
@@ -158,18 +159,20 @@ define amdgpu_kernel void @basic_smax_smin_sgpr(ptr addrspace(1) %out, i32 inreg
 ; GISEL-GFX9:       ; %bb.0:
 ; GISEL-GFX9-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
 ; GISEL-GFX9-NEXT:    s_sext_i32_i16 s4, 0
-; GISEL-GFX9-NEXT:    s_sext_i32_i16 s5, 0xff
 ; GISEL-GFX9-NEXT:    v_mov_b32_e32 v1, 0
 ; GISEL-GFX9-NEXT:    s_waitcnt lgkmcnt(0)
-; GISEL-GFX9-NEXT:    s_sext_i32_i16 s2, s2
-; GISEL-GFX9-NEXT:    s_sext_i32_i16 s3, s3
-; GISEL-GFX9-NEXT:    s_max_i32 s2, s2, s4
-; GISEL-GFX9-NEXT:    s_max_i32 s3, s3, s4
-; GISEL-GFX9-NEXT:    s_sext_i32_i16 s2, s2
-; GISEL-GFX9-NEXT:    s_sext_i32_i16 s3, s3
-; GISEL-GFX9-NEXT:    s_min_i32 s2, s2, s5
-; GISEL-GFX9-NEXT:    s_min_i32 s3, s3, s5
 ; GISEL-GFX9-NEXT:    s_pack_ll_b32_b16 s2, s2, s3
+; GISEL-GFX9-NEXT:    s_sext_i32_i16 s3, s2
+; GISEL-GFX9-NEXT:    s_ashr_i32 s2, s2, 16
+; GISEL-GFX9-NEXT:    s_max_i32 s3, s3, s4
+; GISEL-GFX9-NEXT:    s_max_i32 s2, s2, 0
+; GISEL-GFX9-NEXT:    s_pack_ll_b32_b16 s2, s3, s2
+; GISEL-GFX9-NEXT:    s_sext_i32_i16 s3, s2
+; GISEL-GFX9-NEXT:    s_ashr_i32 s2, s2, 16
+; GISEL-GFX9-NEXT:    s_sext_i32_i16 s4, 0xff00ff
+; GISEL-GFX9-NEXT:    s_min_i32 s3, s3, s4
+; GISEL-GFX9-NEXT:    s_min_i32 s2, s2, 0xff
+; GISEL-GFX9-NEXT:    s_pack_ll_b32_b16 s2, s3, s2
 ; GISEL-GFX9-NEXT:    v_mov_b32_e32 v0, s2
 ; GISEL-GFX9-NEXT:    global_store_dword v1, v0, s[0:1]
 ; GISEL-GFX9-NEXT:    s_endpgm
@@ -177,20 +180,23 @@ define amdgpu_kernel void @basic_smax_smin_sgpr(ptr addrspace(1) %out, i32 inreg
 ; GISEL-GFX11-LABEL: basic_smax_smin_sgpr:
 ; GISEL-GFX11:       ; %bb.0:
 ; GISEL-GFX11-NEXT:    s_load_b128 s[0:3], s[0:1], 0x24
-; GISEL-GFX11-NEXT:    s_sext_i32_i16 s4, 0
-; GISEL-GFX11-NEXT:    s_sext_i32_i16 s5, 0xff
 ; GISEL-GFX11-NEXT:    v_mov_b32_e32 v1, 0
 ; GISEL-GFX11-NEXT:    s_waitcnt lgkmcnt(0)
-; GISEL-GFX11-NEXT:    s_sext_i32_i16 s2, s2
-; GISEL-GFX11-NEXT:    s_sext_i32_i16 s3, s3
-; GISEL-GFX11-NEXT:    s_max_i32 s2, s2, s4
-; GISEL-GFX11-NEXT:    s_max_i32 s3, s3, s4
-; GISEL-GFX11-NEXT:    s_sext_i32_i16 s2, s2
-; GISEL-GFX11-NEXT:    s_sext_i32_i16 s3, s3
-; GISEL-GFX11-NEXT:    s_min_i32 s2, s2, s5
-; GISEL-GFX11-NEXT:    s_min_i32 s3, s3, s5
-; GISEL-GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
 ; GISEL-GFX11-NEXT:    s_pack_ll_b32_b16 s2, s2, s3
+; GISEL-GFX11-NEXT:    s_sext_i32_i16 s3, 0
+; GISEL-GFX11-NEXT:    s_sext_i32_i16 s4, s2
+; GISEL-GFX11-NEXT:    s_ashr_i32 s2, s2, 16
+; GISEL-GFX11-NEXT:    s_max_i32 s3, s4, s3
+; GISEL-GFX11-NEXT:    s_max_i32 s2, s2, 0
+; GISEL-GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; GISEL-GFX11-NEXT:    s_pack_ll_b32_b16 s2, s3, s2
+; GISEL-GFX11-NEXT:    s_sext_i32_i16 s3, 0xff00ff
+; GISEL-GFX11-NEXT:    s_sext_i32_i16 s4, s2
+; GISEL-GFX11-NEXT:    s_ashr_i32 s2, s2, 16
+; GISEL-GFX11-NEXT:    s_min_i32 s3, s4, s3
+; GISEL-GFX11-NEXT:    s_min_i32 s2, s2, 0xff
+; GISEL-GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GISEL-GFX11-NEXT:    s_pack_ll_b32_b16 s2, s3, s2
 ; GISEL-GFX11-NEXT:    v_mov_b32_e32 v0, s2
 ; GISEL-GFX11-NEXT:    global_store_b32 v1, v0, s[0:1]
 ; GISEL-GFX11-NEXT:    s_nop 0
@@ -253,21 +259,22 @@ define <2 x i16> @basic_smin_smax(i16 %src0, i16 %src1) {
 ; GISEL-GFX9-LABEL: basic_smin_smax:
 ; GISEL-GFX9:       ; %bb.0:
 ; GISEL-GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GISEL-GFX9-NEXT:    v_mov_b32_e32 v2, 0xff
-; GISEL-GFX9-NEXT:    v_med3_i16 v0, v0, 0, v2
-; GISEL-GFX9-NEXT:    v_med3_i16 v1, v1, 0, v2
 ; GISEL-GFX9-NEXT:    v_and_b32_e32 v0, 0xffff, v0
 ; GISEL-GFX9-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
+; GISEL-GFX9-NEXT:    v_mov_b32_e32 v1, 0xff00ff
+; GISEL-GFX9-NEXT:    v_pk_min_i16 v0, v0, v1
+; GISEL-GFX9-NEXT:    v_pk_max_i16 v0, v0, 0
 ; GISEL-GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GISEL-GFX11-LABEL: basic_smin_smax:
 ; GISEL-GFX11:       ; %bb.0:
 ; GISEL-GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GISEL-GFX11-NEXT:    v_med3_i16 v0, v0, 0, 0xff
-; GISEL-GFX11-NEXT:    v_med3_i16 v1, v1, 0, 0xff
-; GISEL-GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GISEL-GFX11-NEXT:    v_and_b32_e32 v0, 0xffff, v0
+; GISEL-GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GISEL-GFX11-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
+; GISEL-GFX11-NEXT:    v_pk_min_i16 v0, 0xff, v0 op_sel_hi:[0,1]
+; GISEL-GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GISEL-GFX11-NEXT:    v_pk_max_i16 v0, v0, 0
 ; GISEL-GFX11-NEXT:    s_setpc_b64 s[30:31]
   %src0.min = call i16 @llvm.smin.i16(i16 %src0, i16 255)
   %src0.clamp = call i16 @llvm.smax.i16(i16 %src0.min, i16 0)



More information about the llvm-commits mailing list