[llvm] [GlobalISel] Support vector G_UNMERGE_VALUES in computeKnownBits. (PR #112172)

David Green via llvm-commits llvm-commits at lists.llvm.org
Mon Oct 14 02:01:43 PDT 2024


https://github.com/davemgreen created https://github.com/llvm/llvm-project/pull/112172

This adds computeKnownBits support for vector->vector G_UNMERGE_VALUES, grabbing the known bits with an adjusted DemandedElts mask.

>From b360a7198934979460c9e4d9134620471bea7a0d Mon Sep 17 00:00:00 2001
From: David Green <david.green at arm.com>
Date: Mon, 14 Oct 2024 10:00:00 +0100
Subject: [PATCH] [GlobalISel] Support vector G_UNMERGE_VALUES in
 computeKnownBits.

This adds computeKnownBits support for vector->vector G_UNMERGE_VALUES,
grabbing the known bits with an adjusted DemandedElts mask.
---
 .../lib/CodeGen/GlobalISel/GISelKnownBits.cpp |  24 +-
 .../CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll     | 252 +++++----
 .../CodeGen/AMDGPU/GlobalISel/srem.i64.ll     | 500 +++++++++---------
 .../GlobalISel/KnownBitsVectorTest.cpp        |  46 ++
 4 files changed, 436 insertions(+), 386 deletions(-)

diff --git a/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp b/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp
index 52f5d408c8eddd..a7aebfbb285a74 100644
--- a/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp
@@ -514,15 +514,12 @@ void GISelKnownBits::computeKnownBitsImpl(Register R, KnownBits &Known,
     break;
   }
   case TargetOpcode::G_UNMERGE_VALUES: {
-    if (DstTy.isVector())
-      break;
     unsigned NumOps = MI.getNumOperands();
     Register SrcReg = MI.getOperand(NumOps - 1).getReg();
-    if (MRI.getType(SrcReg).isVector())
-      return; // TODO: Handle vectors.
+    LLT SrcTy = MRI.getType(SrcReg);
 
-    KnownBits SrcOpKnown;
-    computeKnownBitsImpl(SrcReg, SrcOpKnown, DemandedElts, Depth + 1);
+    if (SrcTy.isVector() && SrcTy.getScalarType() != DstTy.getScalarType())
+      return; // TODO: Handle vector->subelement unmerges?
 
     // Figure out the result operand index
     unsigned DstIdx = 0;
@@ -530,7 +527,20 @@ void GISelKnownBits::computeKnownBitsImpl(Register R, KnownBits &Known,
          ++DstIdx)
       ;
 
-    Known = SrcOpKnown.extractBits(BitWidth, BitWidth * DstIdx);
+    APInt SubDemandedElts = DemandedElts;
+    if (SrcTy.isVector()) {
+      unsigned DstLanes = DstTy.isVector() ? DstTy.getNumElements() : 1;
+      SubDemandedElts =
+          DemandedElts.zext(SrcTy.getNumElements()).shl(DstIdx * DstLanes);
+    }
+
+    KnownBits SrcOpKnown;
+    computeKnownBitsImpl(SrcReg, SrcOpKnown, SubDemandedElts, Depth + 1);
+
+    if (SrcTy.isVector())
+      Known = SrcOpKnown;
+    else
+      Known = SrcOpKnown.extractBits(BitWidth, BitWidth * DstIdx);
     break;
   }
   case TargetOpcode::G_BSWAP: {
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll
index 81abe91b283f96..0b5706aa45b693 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll
@@ -1184,73 +1184,74 @@ define <2 x i64> @v_sdiv_v2i64_oddk_denom(<2 x i64> %num) {
 ; GISEL-NEXT:    v_rcp_iflag_f32_e32 v4, v4
 ; GISEL-NEXT:    v_mul_f32_e32 v4, 0x5f7ffffc, v4
 ; GISEL-NEXT:    v_mul_f32_e32 v5, 0x2f800000, v4
-; GISEL-NEXT:    v_trunc_f32_e32 v7, v5
-; GISEL-NEXT:    v_mac_f32_e32 v4, 0xcf800000, v7
-; GISEL-NEXT:    v_cvt_u32_f32_e32 v6, v4
-; GISEL-NEXT:    v_cvt_u32_f32_e32 v7, v7
-; GISEL-NEXT:    v_mad_u64_u32 v[4:5], s[4:5], s6, v6, 0
-; GISEL-NEXT:    v_mad_u64_u32 v[8:9], s[4:5], s6, v7, v[5:6]
-; GISEL-NEXT:    v_mul_lo_u32 v5, v7, v4
-; GISEL-NEXT:    v_mad_u64_u32 v[8:9], s[4:5], s7, v6, v[8:9]
-; GISEL-NEXT:    v_mul_hi_u32 v9, v6, v4
-; GISEL-NEXT:    v_mul_hi_u32 v4, v7, v4
-; GISEL-NEXT:    v_mul_lo_u32 v10, v6, v8
-; GISEL-NEXT:    v_mul_lo_u32 v11, v7, v8
-; GISEL-NEXT:    v_mul_hi_u32 v12, v6, v8
+; GISEL-NEXT:    v_trunc_f32_e32 v6, v5
+; GISEL-NEXT:    v_mac_f32_e32 v4, 0xcf800000, v6
+; GISEL-NEXT:    v_cvt_u32_f32_e32 v5, v4
+; GISEL-NEXT:    v_cvt_u32_f32_e32 v7, v6
+; GISEL-NEXT:    v_mad_u64_u32 v[8:9], s[4:5], s6, v5, 0
+; GISEL-NEXT:    v_mov_b32_e32 v4, v9
+; GISEL-NEXT:    v_mad_u64_u32 v[9:10], s[4:5], s6, v7, v[4:5]
+; GISEL-NEXT:    v_mul_lo_u32 v4, v7, v8
+; GISEL-NEXT:    v_mul_hi_u32 v6, v5, v8
+; GISEL-NEXT:    v_mad_u64_u32 v[9:10], s[4:5], s7, v5, v[9:10]
 ; GISEL-NEXT:    v_mul_hi_u32 v8, v7, v8
-; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v10
+; GISEL-NEXT:    v_mul_lo_u32 v10, v5, v9
+; GISEL-NEXT:    v_mul_lo_u32 v11, v7, v9
+; GISEL-NEXT:    v_mul_hi_u32 v12, v5, v9
+; GISEL-NEXT:    v_mul_hi_u32 v9, v7, v9
+; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v10
 ; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v9
-; GISEL-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v10, v5
-; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v11, v4
-; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v12
+; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v6
+; GISEL-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v10, v4
+; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v11, v8
+; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v6, v12
 ; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v9, v10
-; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v5
-; GISEL-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v9, v5
-; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v8, v5
-; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v6, v4
+; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v10
+; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v6, v4
+; GISEL-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v8, v6
+; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v9, v6
+; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v5, v4
 ; GISEL-NEXT:    v_mad_u64_u32 v[8:9], s[4:5], s6, v11, 0
-; GISEL-NEXT:    v_addc_u32_e32 v5, vcc, v7, v5, vcc
+; GISEL-NEXT:    v_addc_u32_e32 v12, vcc, v7, v6, vcc
 ; GISEL-NEXT:    v_mov_b32_e32 v4, v9
-; GISEL-NEXT:    v_mad_u64_u32 v[9:10], s[4:5], s6, v5, v[4:5]
-; GISEL-NEXT:    v_ashrrev_i32_e32 v4, 31, v1
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v4
+; GISEL-NEXT:    v_mad_u64_u32 v[9:10], s[4:5], s6, v12, v[4:5]
+; GISEL-NEXT:    v_ashrrev_i32_e32 v6, 31, v1
+; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v6
 ; GISEL-NEXT:    v_mad_u64_u32 v[9:10], s[4:5], s7, v11, v[9:10]
-; GISEL-NEXT:    v_addc_u32_e32 v1, vcc, v1, v4, vcc
-; GISEL-NEXT:    v_xor_b32_e32 v10, v0, v4
-; GISEL-NEXT:    v_mul_lo_u32 v0, v5, v8
-; GISEL-NEXT:    v_mul_lo_u32 v12, v11, v9
-; GISEL-NEXT:    v_xor_b32_e32 v13, v1, v4
+; GISEL-NEXT:    v_addc_u32_e32 v1, vcc, v1, v6, vcc
+; GISEL-NEXT:    v_xor_b32_e32 v10, v0, v6
+; GISEL-NEXT:    v_mul_lo_u32 v0, v12, v8
+; GISEL-NEXT:    v_mul_lo_u32 v4, v11, v9
+; GISEL-NEXT:    v_xor_b32_e32 v13, v1, v6
 ; GISEL-NEXT:    v_mul_hi_u32 v1, v11, v8
-; GISEL-NEXT:    v_mul_hi_u32 v8, v5, v8
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v12
-; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
+; GISEL-NEXT:    v_mul_hi_u32 v8, v12, v8
+; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v4
+; GISEL-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
 ; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v1
 ; GISEL-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
-; GISEL-NEXT:    v_mul_lo_u32 v1, v5, v9
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v12, v0
-; GISEL-NEXT:    v_mul_hi_u32 v12, v11, v9
+; GISEL-NEXT:    v_mul_lo_u32 v1, v12, v9
+; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v4, v0
+; GISEL-NEXT:    v_mul_hi_u32 v4, v11, v9
 ; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v1, v8
 ; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v1, v12
-; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v12
-; GISEL-NEXT:    v_mul_hi_u32 v9, v5, v9
+; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v1, v4
+; GISEL-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v8, v4
+; GISEL-NEXT:    v_mul_hi_u32 v8, v12, v9
 ; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v1, v0
 ; GISEL-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v4, v1
 ; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v8, v1
-; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v9, v1
 ; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v11, v0
-; GISEL-NEXT:    v_addc_u32_e32 v1, vcc, v5, v1, vcc
+; GISEL-NEXT:    v_addc_u32_e32 v1, vcc, v12, v1, vcc
 ; GISEL-NEXT:    v_mul_lo_u32 v8, v13, v0
 ; GISEL-NEXT:    v_mul_lo_u32 v9, v10, v1
 ; GISEL-NEXT:    v_mul_hi_u32 v11, v10, v0
 ; GISEL-NEXT:    v_mul_hi_u32 v0, v13, v0
-; GISEL-NEXT:    v_mov_b32_e32 v5, 0x12d8fb
+; GISEL-NEXT:    v_mov_b32_e32 v4, 0x12d8fb
 ; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v9
 ; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
 ; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v11
@@ -1265,40 +1266,39 @@ define <2 x i64> @v_sdiv_v2i64_oddk_denom(<2 x i64> %num) {
 ; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v11, v9
 ; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v0, v8
 ; GISEL-NEXT:    v_mul_hi_u32 v12, v13, v1
-; GISEL-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], v5, v11, 0
+; GISEL-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], v4, v11, 0
 ; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
 ; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v9, v8
 ; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v12, v8
-; GISEL-NEXT:    v_mad_u64_u32 v[8:9], s[4:5], v5, v12, v[1:2]
+; GISEL-NEXT:    v_mad_u64_u32 v[8:9], s[4:5], v4, v12, v[1:2]
 ; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v10, v0
-; GISEL-NEXT:    v_mad_u64_u32 v[8:9], s[4:5], 0, v11, v[8:9]
-; GISEL-NEXT:    s_sub_u32 s6, 0, 0x12d8fb
-; GISEL-NEXT:    s_subb_u32 s7, 0, 0
 ; GISEL-NEXT:    v_subb_u32_e64 v1, s[4:5], v13, v8, vcc
 ; GISEL-NEXT:    v_sub_i32_e64 v8, s[4:5], v13, v8
-; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v0, v5
+; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v0, v4
 ; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, -1, s[4:5]
 ; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v1
 ; GISEL-NEXT:    v_subbrev_u32_e32 v1, vcc, 0, v8, vcc
-; GISEL-NEXT:    v_sub_i32_e32 v8, vcc, v0, v5
+; GISEL-NEXT:    v_sub_i32_e32 v8, vcc, v0, v4
 ; GISEL-NEXT:    v_cndmask_b32_e64 v10, -1, v9, s[4:5]
 ; GISEL-NEXT:    v_subbrev_u32_e32 v9, vcc, 0, v1, vcc
+; GISEL-NEXT:    s_sub_u32 s6, 0, 0x12d8fb
 ; GISEL-NEXT:    v_add_i32_e32 v13, vcc, 1, v11
-; GISEL-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], s6, v6, 0
+; GISEL-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], s6, v5, 0
 ; GISEL-NEXT:    v_addc_u32_e32 v14, vcc, 0, v12, vcc
-; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v8, v5
+; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v8, v4
 ; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, -1, vcc
 ; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v9
 ; GISEL-NEXT:    v_cndmask_b32_e32 v15, -1, v8, vcc
 ; GISEL-NEXT:    v_mad_u64_u32 v[8:9], s[4:5], s6, v7, v[1:2]
+; GISEL-NEXT:    s_subb_u32 s7, 0, 0
 ; GISEL-NEXT:    v_add_i32_e32 v1, vcc, 1, v13
-; GISEL-NEXT:    v_mad_u64_u32 v[8:9], s[4:5], s7, v6, v[8:9]
+; GISEL-NEXT:    v_mad_u64_u32 v[8:9], s[4:5], s7, v5, v[8:9]
 ; GISEL-NEXT:    v_addc_u32_e32 v16, vcc, 0, v14, vcc
 ; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v15
 ; GISEL-NEXT:    v_cndmask_b32_e32 v9, v13, v1, vcc
 ; GISEL-NEXT:    v_mul_lo_u32 v1, v7, v0
-; GISEL-NEXT:    v_mul_lo_u32 v13, v6, v8
-; GISEL-NEXT:    v_mul_hi_u32 v15, v6, v0
+; GISEL-NEXT:    v_mul_lo_u32 v13, v5, v8
+; GISEL-NEXT:    v_mul_hi_u32 v15, v5, v0
 ; GISEL-NEXT:    v_cndmask_b32_e32 v14, v14, v16, vcc
 ; GISEL-NEXT:    v_mul_hi_u32 v0, v7, v0
 ; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v1, v13
@@ -1307,7 +1307,7 @@ define <2 x i64> @v_sdiv_v2i64_oddk_denom(<2 x i64> %num) {
 ; GISEL-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
 ; GISEL-NEXT:    v_mul_lo_u32 v15, v7, v8
 ; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v13, v1
-; GISEL-NEXT:    v_mul_hi_u32 v13, v6, v8
+; GISEL-NEXT:    v_mul_hi_u32 v13, v5, v8
 ; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v15, v0
 ; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
 ; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v13
@@ -1318,95 +1318,93 @@ define <2 x i64> @v_sdiv_v2i64_oddk_denom(<2 x i64> %num) {
 ; GISEL-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
 ; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v13, v1
 ; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v8, v1
-; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v6, v0
+; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v0
 ; GISEL-NEXT:    v_addc_u32_e32 v13, vcc, v7, v1, vcc
-; GISEL-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], s6, v8, 0
+; GISEL-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], s6, v5, 0
 ; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v10
-; GISEL-NEXT:    v_cndmask_b32_e32 v9, v11, v9, vcc
-; GISEL-NEXT:    v_mad_u64_u32 v[6:7], s[4:5], s6, v13, v[1:2]
-; GISEL-NEXT:    v_xor_b32_e32 v1, v9, v4
+; GISEL-NEXT:    v_cndmask_b32_e32 v7, v11, v9, vcc
+; GISEL-NEXT:    v_xor_b32_e32 v10, v7, v6
+; GISEL-NEXT:    v_mad_u64_u32 v[7:8], s[4:5], s6, v13, v[1:2]
+; GISEL-NEXT:    v_cndmask_b32_e32 v9, v12, v14, vcc
+; GISEL-NEXT:    v_xor_b32_e32 v1, v9, v6
+; GISEL-NEXT:    v_mad_u64_u32 v[7:8], s[4:5], s7, v5, v[7:8]
 ; GISEL-NEXT:    v_ashrrev_i32_e32 v9, 31, v3
-; GISEL-NEXT:    v_mad_u64_u32 v[6:7], s[4:5], s7, v8, v[6:7]
-; GISEL-NEXT:    v_cndmask_b32_e32 v10, v12, v14, vcc
 ; GISEL-NEXT:    v_add_i32_e32 v2, vcc, v2, v9
 ; GISEL-NEXT:    v_addc_u32_e32 v3, vcc, v3, v9, vcc
-; GISEL-NEXT:    v_xor_b32_e32 v11, v2, v9
+; GISEL-NEXT:    v_xor_b32_e32 v8, v2, v9
 ; GISEL-NEXT:    v_mul_lo_u32 v2, v13, v0
-; GISEL-NEXT:    v_mul_lo_u32 v7, v8, v6
+; GISEL-NEXT:    v_mul_lo_u32 v11, v5, v7
 ; GISEL-NEXT:    v_xor_b32_e32 v12, v3, v9
-; GISEL-NEXT:    v_mul_hi_u32 v3, v8, v0
+; GISEL-NEXT:    v_mul_hi_u32 v3, v5, v0
 ; GISEL-NEXT:    v_mul_hi_u32 v0, v13, v0
-; GISEL-NEXT:    v_add_i32_e32 v2, vcc, v2, v7
-; GISEL-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v2, vcc, v2, v11
+; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
 ; GISEL-NEXT:    v_add_i32_e32 v2, vcc, v2, v3
 ; GISEL-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; GISEL-NEXT:    v_mul_lo_u32 v3, v13, v6
-; GISEL-NEXT:    v_add_i32_e32 v2, vcc, v7, v2
-; GISEL-NEXT:    v_mul_hi_u32 v7, v8, v6
+; GISEL-NEXT:    v_mul_lo_u32 v3, v13, v7
+; GISEL-NEXT:    v_add_i32_e32 v2, vcc, v11, v2
+; GISEL-NEXT:    v_mul_hi_u32 v11, v5, v7
 ; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v3, v0
 ; GISEL-NEXT:    v_cndmask_b32_e64 v3, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v7
-; GISEL-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v3, vcc, v3, v7
-; GISEL-NEXT:    v_mul_hi_u32 v6, v13, v6
+; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v11
+; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v3, vcc, v3, v11
+; GISEL-NEXT:    v_mul_hi_u32 v7, v13, v7
 ; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
 ; GISEL-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
 ; GISEL-NEXT:    v_add_i32_e32 v2, vcc, v3, v2
-; GISEL-NEXT:    v_add_i32_e32 v2, vcc, v6, v2
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v8, v0
+; GISEL-NEXT:    v_add_i32_e32 v2, vcc, v7, v2
+; GISEL-NEXT:    v_add_i32_e32 v3, vcc, v5, v0
 ; GISEL-NEXT:    v_addc_u32_e32 v2, vcc, v13, v2, vcc
-; GISEL-NEXT:    v_mul_lo_u32 v3, v12, v0
-; GISEL-NEXT:    v_mul_lo_u32 v6, v11, v2
-; GISEL-NEXT:    v_mul_hi_u32 v7, v11, v0
-; GISEL-NEXT:    v_mul_hi_u32 v0, v12, v0
-; GISEL-NEXT:    v_xor_b32_e32 v8, v10, v4
-; GISEL-NEXT:    v_add_i32_e32 v3, vcc, v3, v6
+; GISEL-NEXT:    v_mul_lo_u32 v5, v12, v3
+; GISEL-NEXT:    v_mul_lo_u32 v7, v8, v2
+; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v10, v6
+; GISEL-NEXT:    v_subb_u32_e32 v1, vcc, v1, v6, vcc
+; GISEL-NEXT:    v_mul_hi_u32 v6, v8, v3
+; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v7
+; GISEL-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v6
+; GISEL-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
+; GISEL-NEXT:    v_mul_lo_u32 v6, v12, v2
+; GISEL-NEXT:    v_mul_hi_u32 v3, v12, v3
+; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v7, v5
+; GISEL-NEXT:    v_mul_hi_u32 v7, v8, v2
+; GISEL-NEXT:    v_add_i32_e32 v3, vcc, v6, v3
 ; GISEL-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
 ; GISEL-NEXT:    v_add_i32_e32 v3, vcc, v3, v7
-; GISEL-NEXT:    v_cndmask_b32_e64 v3, 0, 1, vcc
-; GISEL-NEXT:    v_mul_lo_u32 v7, v12, v2
-; GISEL-NEXT:    v_add_i32_e32 v3, vcc, v6, v3
-; GISEL-NEXT:    v_mul_hi_u32 v6, v11, v2
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v7, v0
 ; GISEL-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v6
-; GISEL-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v7, v6
-; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v0, v3
-; GISEL-NEXT:    v_mul_hi_u32 v7, v12, v2
-; GISEL-NEXT:    v_mad_u64_u32 v[2:3], s[4:5], v5, v10, 0
-; GISEL-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v6, v0
-; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v7, v0
-; GISEL-NEXT:    v_mov_b32_e32 v0, v3
-; GISEL-NEXT:    v_mad_u64_u32 v[6:7], s[4:5], v5, v13, v[0:1]
-; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v1, v4
-; GISEL-NEXT:    v_subb_u32_e32 v1, vcc, v8, v4, vcc
-; GISEL-NEXT:    v_mad_u64_u32 v[3:4], s[4:5], 0, v10, v[6:7]
-; GISEL-NEXT:    v_sub_i32_e32 v2, vcc, v11, v2
-; GISEL-NEXT:    v_subb_u32_e64 v4, s[4:5], v12, v3, vcc
-; GISEL-NEXT:    v_sub_i32_e64 v3, s[4:5], v12, v3
-; GISEL-NEXT:    v_subbrev_u32_e32 v3, vcc, 0, v3, vcc
-; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v2, v5
-; GISEL-NEXT:    v_sub_i32_e32 v2, vcc, v2, v5
+; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v6, v7
+; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v3, v5
+; GISEL-NEXT:    v_mul_hi_u32 v10, v12, v2
+; GISEL-NEXT:    v_mad_u64_u32 v[2:3], s[4:5], v4, v7, 0
+; GISEL-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v6, v5
+; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v10, v5
+; GISEL-NEXT:    v_mad_u64_u32 v[5:6], s[4:5], v4, v10, v[3:4]
+; GISEL-NEXT:    v_sub_i32_e32 v2, vcc, v8, v2
+; GISEL-NEXT:    v_subb_u32_e64 v3, s[4:5], v12, v5, vcc
+; GISEL-NEXT:    v_sub_i32_e64 v5, s[4:5], v12, v5
+; GISEL-NEXT:    v_subbrev_u32_e32 v5, vcc, 0, v5, vcc
+; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v2, v4
+; GISEL-NEXT:    v_sub_i32_e32 v2, vcc, v2, v4
 ; GISEL-NEXT:    v_cndmask_b32_e64 v6, 0, -1, s[4:5]
-; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v4
-; GISEL-NEXT:    v_subbrev_u32_e32 v3, vcc, 0, v3, vcc
-; GISEL-NEXT:    v_cndmask_b32_e64 v4, -1, v6, s[4:5]
-; GISEL-NEXT:    v_add_i32_e32 v6, vcc, 1, v10
-; GISEL-NEXT:    v_addc_u32_e32 v7, vcc, 0, v13, vcc
-; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v2, v5
+; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v3
+; GISEL-NEXT:    v_subbrev_u32_e32 v5, vcc, 0, v5, vcc
+; GISEL-NEXT:    v_cndmask_b32_e64 v3, -1, v6, s[4:5]
+; GISEL-NEXT:    v_add_i32_e32 v6, vcc, 1, v7
+; GISEL-NEXT:    v_addc_u32_e32 v8, vcc, 0, v10, vcc
+; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v2, v4
 ; GISEL-NEXT:    v_cndmask_b32_e64 v2, 0, -1, vcc
-; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v3
+; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v5
 ; GISEL-NEXT:    v_cndmask_b32_e32 v2, -1, v2, vcc
-; GISEL-NEXT:    v_add_i32_e32 v3, vcc, 1, v6
-; GISEL-NEXT:    v_addc_u32_e32 v5, vcc, 0, v7, vcc
+; GISEL-NEXT:    v_add_i32_e32 v4, vcc, 1, v6
+; GISEL-NEXT:    v_addc_u32_e32 v5, vcc, 0, v8, vcc
 ; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v2
-; GISEL-NEXT:    v_cndmask_b32_e32 v2, v6, v3, vcc
-; GISEL-NEXT:    v_cndmask_b32_e32 v3, v7, v5, vcc
-; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v4
-; GISEL-NEXT:    v_cndmask_b32_e32 v2, v10, v2, vcc
-; GISEL-NEXT:    v_cndmask_b32_e32 v3, v13, v3, vcc
+; GISEL-NEXT:    v_cndmask_b32_e32 v2, v6, v4, vcc
+; GISEL-NEXT:    v_cndmask_b32_e32 v4, v8, v5, vcc
+; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v3
+; GISEL-NEXT:    v_cndmask_b32_e32 v2, v7, v2, vcc
+; GISEL-NEXT:    v_cndmask_b32_e32 v3, v10, v4, vcc
 ; GISEL-NEXT:    v_xor_b32_e32 v2, v2, v9
 ; GISEL-NEXT:    v_xor_b32_e32 v3, v3, v9
 ; GISEL-NEXT:    v_sub_i32_e32 v2, vcc, v2, v9
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll
index cfac0c2fa56aaf..3ed864d463ee9c 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll
@@ -1112,73 +1112,74 @@ define <2 x i64> @v_srem_v2i64_pow2k_denom(<2 x i64> %num) {
 ; GISEL-NEXT:    v_rcp_iflag_f32_e32 v4, v4
 ; GISEL-NEXT:    v_mul_f32_e32 v4, 0x5f7ffffc, v4
 ; GISEL-NEXT:    v_mul_f32_e32 v5, 0x2f800000, v4
-; GISEL-NEXT:    v_trunc_f32_e32 v7, v5
-; GISEL-NEXT:    v_mac_f32_e32 v4, 0xcf800000, v7
-; GISEL-NEXT:    v_cvt_u32_f32_e32 v6, v4
-; GISEL-NEXT:    v_cvt_u32_f32_e32 v7, v7
-; GISEL-NEXT:    v_mad_u64_u32 v[4:5], s[4:5], s6, v6, 0
-; GISEL-NEXT:    v_mad_u64_u32 v[8:9], s[4:5], s6, v7, v[5:6]
-; GISEL-NEXT:    v_mul_lo_u32 v5, v7, v4
-; GISEL-NEXT:    v_mad_u64_u32 v[8:9], s[4:5], s7, v6, v[8:9]
-; GISEL-NEXT:    v_mul_hi_u32 v9, v6, v4
-; GISEL-NEXT:    v_mul_hi_u32 v4, v7, v4
-; GISEL-NEXT:    v_mul_lo_u32 v10, v6, v8
-; GISEL-NEXT:    v_mul_lo_u32 v11, v7, v8
-; GISEL-NEXT:    v_mul_hi_u32 v12, v6, v8
+; GISEL-NEXT:    v_trunc_f32_e32 v6, v5
+; GISEL-NEXT:    v_mac_f32_e32 v4, 0xcf800000, v6
+; GISEL-NEXT:    v_cvt_u32_f32_e32 v5, v4
+; GISEL-NEXT:    v_cvt_u32_f32_e32 v7, v6
+; GISEL-NEXT:    v_mad_u64_u32 v[8:9], s[4:5], s6, v5, 0
+; GISEL-NEXT:    v_mov_b32_e32 v4, v9
+; GISEL-NEXT:    v_mad_u64_u32 v[9:10], s[4:5], s6, v7, v[4:5]
+; GISEL-NEXT:    v_mul_lo_u32 v4, v7, v8
+; GISEL-NEXT:    v_mul_hi_u32 v6, v5, v8
+; GISEL-NEXT:    v_mad_u64_u32 v[9:10], s[4:5], s7, v5, v[9:10]
 ; GISEL-NEXT:    v_mul_hi_u32 v8, v7, v8
-; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v10
+; GISEL-NEXT:    v_mul_lo_u32 v10, v5, v9
+; GISEL-NEXT:    v_mul_lo_u32 v11, v7, v9
+; GISEL-NEXT:    v_mul_hi_u32 v12, v5, v9
+; GISEL-NEXT:    v_mul_hi_u32 v9, v7, v9
+; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v10
 ; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v9
-; GISEL-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v10, v5
-; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v11, v4
-; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v12
+; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v6
+; GISEL-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v10, v4
+; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v11, v8
+; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v6, v12
 ; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v9, v10
-; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v5
-; GISEL-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v9, v5
-; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v8, v5
-; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v6, v4
+; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v10
+; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v6, v4
+; GISEL-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v8, v6
+; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v9, v6
+; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v5, v4
 ; GISEL-NEXT:    v_mad_u64_u32 v[8:9], s[4:5], s6, v11, 0
-; GISEL-NEXT:    v_addc_u32_e32 v5, vcc, v7, v5, vcc
+; GISEL-NEXT:    v_addc_u32_e32 v12, vcc, v7, v6, vcc
 ; GISEL-NEXT:    v_mov_b32_e32 v4, v9
-; GISEL-NEXT:    v_mad_u64_u32 v[9:10], s[4:5], s6, v5, v[4:5]
-; GISEL-NEXT:    v_ashrrev_i32_e32 v4, 31, v1
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v4
+; GISEL-NEXT:    v_mad_u64_u32 v[9:10], s[4:5], s6, v12, v[4:5]
+; GISEL-NEXT:    v_ashrrev_i32_e32 v6, 31, v1
+; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v6
 ; GISEL-NEXT:    v_mad_u64_u32 v[9:10], s[4:5], s7, v11, v[9:10]
-; GISEL-NEXT:    v_addc_u32_e32 v1, vcc, v1, v4, vcc
-; GISEL-NEXT:    v_xor_b32_e32 v10, v0, v4
-; GISEL-NEXT:    v_mul_lo_u32 v0, v5, v8
-; GISEL-NEXT:    v_mul_lo_u32 v12, v11, v9
-; GISEL-NEXT:    v_xor_b32_e32 v13, v1, v4
+; GISEL-NEXT:    v_addc_u32_e32 v1, vcc, v1, v6, vcc
+; GISEL-NEXT:    v_xor_b32_e32 v10, v0, v6
+; GISEL-NEXT:    v_mul_lo_u32 v0, v12, v8
+; GISEL-NEXT:    v_mul_lo_u32 v4, v11, v9
+; GISEL-NEXT:    v_xor_b32_e32 v13, v1, v6
 ; GISEL-NEXT:    v_mul_hi_u32 v1, v11, v8
-; GISEL-NEXT:    v_mul_hi_u32 v8, v5, v8
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v12
-; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
+; GISEL-NEXT:    v_mul_hi_u32 v8, v12, v8
+; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v4
+; GISEL-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
 ; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v1
 ; GISEL-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
-; GISEL-NEXT:    v_mul_lo_u32 v1, v5, v9
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v12, v0
-; GISEL-NEXT:    v_mul_hi_u32 v12, v11, v9
+; GISEL-NEXT:    v_mul_lo_u32 v1, v12, v9
+; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v4, v0
+; GISEL-NEXT:    v_mul_hi_u32 v4, v11, v9
 ; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v1, v8
 ; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v1, v12
-; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v12
-; GISEL-NEXT:    v_mul_hi_u32 v9, v5, v9
+; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v1, v4
+; GISEL-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v8, v4
+; GISEL-NEXT:    v_mul_hi_u32 v8, v12, v9
 ; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v1, v0
 ; GISEL-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v4, v1
 ; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v8, v1
-; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v9, v1
 ; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v11, v0
-; GISEL-NEXT:    v_addc_u32_e32 v1, vcc, v5, v1, vcc
+; GISEL-NEXT:    v_addc_u32_e32 v1, vcc, v12, v1, vcc
 ; GISEL-NEXT:    v_mul_lo_u32 v8, v13, v0
 ; GISEL-NEXT:    v_mul_lo_u32 v9, v10, v1
 ; GISEL-NEXT:    v_mul_hi_u32 v11, v10, v0
 ; GISEL-NEXT:    v_mul_hi_u32 v0, v13, v0
-; GISEL-NEXT:    v_mov_b32_e32 v5, 0x1000
+; GISEL-NEXT:    v_mov_b32_e32 v4, 0x1000
 ; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v9
 ; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
 ; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v11
@@ -1191,40 +1192,39 @@ define <2 x i64> @v_srem_v2i64_pow2k_denom(<2 x i64> %num) {
 ; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v9
 ; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
 ; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v11, v9
-; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v0, v8
-; GISEL-NEXT:    v_mul_hi_u32 v12, v13, v1
-; GISEL-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], v5, v11, 0
+; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v8
+; GISEL-NEXT:    v_mul_hi_u32 v11, v13, v1
+; GISEL-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], v4, v0, 0
 ; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
 ; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v9, v8
-; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v12, v8
-; GISEL-NEXT:    v_mad_u64_u32 v[8:9], s[4:5], v5, v8, v[1:2]
+; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v11, v8
+; GISEL-NEXT:    v_mad_u64_u32 v[8:9], s[4:5], v4, v8, v[1:2]
 ; GISEL-NEXT:    v_sub_i32_e32 v10, vcc, v10, v0
-; GISEL-NEXT:    v_mad_u64_u32 v[8:9], s[4:5], 0, v11, v[8:9]
-; GISEL-NEXT:    s_sub_u32 s6, 0, 0x1000
-; GISEL-NEXT:    s_subb_u32 s7, 0, 0
 ; GISEL-NEXT:    v_subb_u32_e64 v11, s[4:5], v13, v8, vcc
 ; GISEL-NEXT:    v_sub_i32_e64 v0, s[4:5], v13, v8
-; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v10, v5
+; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v10, v4
 ; GISEL-NEXT:    v_subbrev_u32_e32 v0, vcc, 0, v0, vcc
 ; GISEL-NEXT:    v_cndmask_b32_e64 v1, 0, -1, s[4:5]
 ; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v11
-; GISEL-NEXT:    v_sub_i32_e32 v13, vcc, v10, v5
+; GISEL-NEXT:    v_sub_i32_e32 v13, vcc, v10, v4
+; GISEL-NEXT:    s_sub_u32 s6, 0, 0x1000
 ; GISEL-NEXT:    v_cndmask_b32_e64 v12, -1, v1, s[4:5]
 ; GISEL-NEXT:    v_subbrev_u32_e32 v14, vcc, 0, v0, vcc
-; GISEL-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], s6, v6, 0
-; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v13, v5
+; GISEL-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], s6, v5, 0
+; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v13, v4
 ; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, -1, vcc
 ; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v14
 ; GISEL-NEXT:    v_cndmask_b32_e32 v15, -1, v8, vcc
 ; GISEL-NEXT:    v_mad_u64_u32 v[8:9], s[4:5], s6, v7, v[1:2]
-; GISEL-NEXT:    v_sub_i32_e32 v1, vcc, v13, v5
-; GISEL-NEXT:    v_mad_u64_u32 v[8:9], s[4:5], s7, v6, v[8:9]
+; GISEL-NEXT:    s_subb_u32 s7, 0, 0
+; GISEL-NEXT:    v_sub_i32_e32 v1, vcc, v13, v4
+; GISEL-NEXT:    v_mad_u64_u32 v[8:9], s[4:5], s7, v5, v[8:9]
 ; GISEL-NEXT:    v_subbrev_u32_e32 v16, vcc, 0, v14, vcc
 ; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v15
 ; GISEL-NEXT:    v_cndmask_b32_e32 v9, v13, v1, vcc
 ; GISEL-NEXT:    v_mul_lo_u32 v1, v7, v0
-; GISEL-NEXT:    v_mul_lo_u32 v13, v6, v8
-; GISEL-NEXT:    v_mul_hi_u32 v15, v6, v0
+; GISEL-NEXT:    v_mul_lo_u32 v13, v5, v8
+; GISEL-NEXT:    v_mul_hi_u32 v15, v5, v0
 ; GISEL-NEXT:    v_cndmask_b32_e32 v14, v14, v16, vcc
 ; GISEL-NEXT:    v_mul_hi_u32 v0, v7, v0
 ; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v1, v13
@@ -1233,7 +1233,7 @@ define <2 x i64> @v_srem_v2i64_pow2k_denom(<2 x i64> %num) {
 ; GISEL-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
 ; GISEL-NEXT:    v_mul_lo_u32 v15, v7, v8
 ; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v13, v1
-; GISEL-NEXT:    v_mul_hi_u32 v13, v6, v8
+; GISEL-NEXT:    v_mul_hi_u32 v13, v5, v8
 ; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v15, v0
 ; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
 ; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v13
@@ -1244,93 +1244,91 @@ define <2 x i64> @v_srem_v2i64_pow2k_denom(<2 x i64> %num) {
 ; GISEL-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
 ; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v13, v1
 ; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v8, v1
-; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v6, v0
+; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v0
 ; GISEL-NEXT:    v_addc_u32_e32 v13, vcc, v7, v1, vcc
-; GISEL-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], s6, v8, 0
+; GISEL-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], s6, v5, 0
 ; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v12
-; GISEL-NEXT:    v_cndmask_b32_e32 v9, v10, v9, vcc
-; GISEL-NEXT:    v_mad_u64_u32 v[6:7], s[4:5], s6, v13, v[1:2]
-; GISEL-NEXT:    v_xor_b32_e32 v1, v9, v4
+; GISEL-NEXT:    v_cndmask_b32_e32 v7, v10, v9, vcc
+; GISEL-NEXT:    v_xor_b32_e32 v10, v7, v6
+; GISEL-NEXT:    v_mad_u64_u32 v[7:8], s[4:5], s6, v13, v[1:2]
+; GISEL-NEXT:    v_cndmask_b32_e32 v9, v11, v14, vcc
+; GISEL-NEXT:    v_xor_b32_e32 v1, v9, v6
+; GISEL-NEXT:    v_mad_u64_u32 v[7:8], s[4:5], s7, v5, v[7:8]
 ; GISEL-NEXT:    v_ashrrev_i32_e32 v9, 31, v3
-; GISEL-NEXT:    v_mad_u64_u32 v[6:7], s[4:5], s7, v8, v[6:7]
-; GISEL-NEXT:    v_cndmask_b32_e32 v10, v11, v14, vcc
 ; GISEL-NEXT:    v_add_i32_e32 v2, vcc, v2, v9
 ; GISEL-NEXT:    v_addc_u32_e32 v3, vcc, v3, v9, vcc
-; GISEL-NEXT:    v_xor_b32_e32 v11, v2, v9
+; GISEL-NEXT:    v_xor_b32_e32 v8, v2, v9
 ; GISEL-NEXT:    v_mul_lo_u32 v2, v13, v0
-; GISEL-NEXT:    v_mul_lo_u32 v7, v8, v6
+; GISEL-NEXT:    v_mul_lo_u32 v11, v5, v7
 ; GISEL-NEXT:    v_xor_b32_e32 v12, v3, v9
-; GISEL-NEXT:    v_mul_hi_u32 v3, v8, v0
+; GISEL-NEXT:    v_mul_hi_u32 v3, v5, v0
 ; GISEL-NEXT:    v_mul_hi_u32 v0, v13, v0
-; GISEL-NEXT:    v_add_i32_e32 v2, vcc, v2, v7
-; GISEL-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v2, vcc, v2, v11
+; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
 ; GISEL-NEXT:    v_add_i32_e32 v2, vcc, v2, v3
 ; GISEL-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; GISEL-NEXT:    v_mul_lo_u32 v3, v13, v6
-; GISEL-NEXT:    v_add_i32_e32 v2, vcc, v7, v2
-; GISEL-NEXT:    v_mul_hi_u32 v7, v8, v6
+; GISEL-NEXT:    v_mul_lo_u32 v3, v13, v7
+; GISEL-NEXT:    v_add_i32_e32 v2, vcc, v11, v2
+; GISEL-NEXT:    v_mul_hi_u32 v11, v5, v7
 ; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v3, v0
 ; GISEL-NEXT:    v_cndmask_b32_e64 v3, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v7
-; GISEL-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v3, vcc, v3, v7
-; GISEL-NEXT:    v_mul_hi_u32 v6, v13, v6
+; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v11
+; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v3, vcc, v3, v11
+; GISEL-NEXT:    v_mul_hi_u32 v7, v13, v7
 ; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
 ; GISEL-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
 ; GISEL-NEXT:    v_add_i32_e32 v2, vcc, v3, v2
-; GISEL-NEXT:    v_add_i32_e32 v2, vcc, v6, v2
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v8, v0
+; GISEL-NEXT:    v_add_i32_e32 v2, vcc, v7, v2
+; GISEL-NEXT:    v_add_i32_e32 v3, vcc, v5, v0
 ; GISEL-NEXT:    v_addc_u32_e32 v2, vcc, v13, v2, vcc
-; GISEL-NEXT:    v_mul_lo_u32 v3, v12, v0
-; GISEL-NEXT:    v_mul_lo_u32 v6, v11, v2
-; GISEL-NEXT:    v_mul_hi_u32 v7, v11, v0
-; GISEL-NEXT:    v_mul_hi_u32 v0, v12, v0
-; GISEL-NEXT:    v_xor_b32_e32 v8, v10, v4
-; GISEL-NEXT:    v_add_i32_e32 v3, vcc, v3, v6
+; GISEL-NEXT:    v_mul_lo_u32 v5, v12, v3
+; GISEL-NEXT:    v_mul_lo_u32 v7, v8, v2
+; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v10, v6
+; GISEL-NEXT:    v_subb_u32_e32 v1, vcc, v1, v6, vcc
+; GISEL-NEXT:    v_mul_hi_u32 v6, v8, v3
+; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v7
+; GISEL-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v6
+; GISEL-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
+; GISEL-NEXT:    v_mul_lo_u32 v6, v12, v2
+; GISEL-NEXT:    v_mul_hi_u32 v3, v12, v3
+; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v7, v5
+; GISEL-NEXT:    v_mul_hi_u32 v7, v8, v2
+; GISEL-NEXT:    v_add_i32_e32 v3, vcc, v6, v3
 ; GISEL-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
 ; GISEL-NEXT:    v_add_i32_e32 v3, vcc, v3, v7
-; GISEL-NEXT:    v_cndmask_b32_e64 v3, 0, 1, vcc
-; GISEL-NEXT:    v_mul_lo_u32 v7, v12, v2
-; GISEL-NEXT:    v_add_i32_e32 v3, vcc, v6, v3
-; GISEL-NEXT:    v_mul_hi_u32 v6, v11, v2
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v7, v0
 ; GISEL-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v6
-; GISEL-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v7, v6
-; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v0, v3
+; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v6, v7
+; GISEL-NEXT:    v_add_i32_e32 v3, vcc, v3, v5
 ; GISEL-NEXT:    v_mul_hi_u32 v7, v12, v2
-; GISEL-NEXT:    v_mad_u64_u32 v[2:3], s[4:5], v5, v10, 0
-; GISEL-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v6, v0
-; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v7, v0
-; GISEL-NEXT:    v_mov_b32_e32 v0, v3
-; GISEL-NEXT:    v_mad_u64_u32 v[6:7], s[4:5], v5, v6, v[0:1]
-; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v1, v4
-; GISEL-NEXT:    v_subb_u32_e32 v1, vcc, v8, v4, vcc
-; GISEL-NEXT:    v_mad_u64_u32 v[3:4], s[4:5], 0, v10, v[6:7]
-; GISEL-NEXT:    v_sub_i32_e32 v2, vcc, v11, v2
-; GISEL-NEXT:    v_subb_u32_e64 v4, s[4:5], v12, v3, vcc
-; GISEL-NEXT:    v_sub_i32_e64 v3, s[4:5], v12, v3
-; GISEL-NEXT:    v_subbrev_u32_e32 v3, vcc, 0, v3, vcc
-; GISEL-NEXT:    v_sub_i32_e32 v7, vcc, v2, v5
-; GISEL-NEXT:    v_subbrev_u32_e32 v3, vcc, 0, v3, vcc
-; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v7, v5
+; GISEL-NEXT:    v_mad_u64_u32 v[2:3], s[4:5], v4, v3, 0
+; GISEL-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v6, v5
+; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v7, v5
+; GISEL-NEXT:    v_mad_u64_u32 v[5:6], s[4:5], v4, v5, v[3:4]
+; GISEL-NEXT:    v_sub_i32_e32 v2, vcc, v8, v2
+; GISEL-NEXT:    v_subb_u32_e64 v3, s[4:5], v12, v5, vcc
+; GISEL-NEXT:    v_sub_i32_e64 v5, s[4:5], v12, v5
+; GISEL-NEXT:    v_subbrev_u32_e32 v5, vcc, 0, v5, vcc
+; GISEL-NEXT:    v_sub_i32_e32 v7, vcc, v2, v4
+; GISEL-NEXT:    v_subbrev_u32_e32 v5, vcc, 0, v5, vcc
+; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v7, v4
 ; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, -1, vcc
-; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v3
-; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v2, v5
+; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v5
+; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v2, v4
 ; GISEL-NEXT:    v_cndmask_b32_e32 v8, -1, v8, vcc
-; GISEL-NEXT:    v_sub_i32_e32 v5, vcc, v7, v5
+; GISEL-NEXT:    v_sub_i32_e32 v4, vcc, v7, v4
 ; GISEL-NEXT:    v_cndmask_b32_e64 v6, 0, -1, s[4:5]
-; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v4
-; GISEL-NEXT:    v_subbrev_u32_e32 v10, vcc, 0, v3, vcc
+; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v3
+; GISEL-NEXT:    v_subbrev_u32_e32 v10, vcc, 0, v5, vcc
 ; GISEL-NEXT:    v_cndmask_b32_e64 v6, -1, v6, s[4:5]
 ; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v8
-; GISEL-NEXT:    v_cndmask_b32_e32 v5, v7, v5, vcc
-; GISEL-NEXT:    v_cndmask_b32_e32 v3, v3, v10, vcc
+; GISEL-NEXT:    v_cndmask_b32_e32 v4, v7, v4, vcc
+; GISEL-NEXT:    v_cndmask_b32_e32 v5, v5, v10, vcc
 ; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v6
-; GISEL-NEXT:    v_cndmask_b32_e32 v2, v2, v5, vcc
-; GISEL-NEXT:    v_cndmask_b32_e32 v3, v4, v3, vcc
+; GISEL-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
+; GISEL-NEXT:    v_cndmask_b32_e32 v3, v3, v5, vcc
 ; GISEL-NEXT:    v_xor_b32_e32 v2, v2, v9
 ; GISEL-NEXT:    v_xor_b32_e32 v3, v3, v9
 ; GISEL-NEXT:    v_sub_i32_e32 v2, vcc, v2, v9
@@ -1707,73 +1705,74 @@ define <2 x i64> @v_srem_v2i64_oddk_denom(<2 x i64> %num) {
 ; GISEL-NEXT:    v_rcp_iflag_f32_e32 v4, v4
 ; GISEL-NEXT:    v_mul_f32_e32 v4, 0x5f7ffffc, v4
 ; GISEL-NEXT:    v_mul_f32_e32 v5, 0x2f800000, v4
-; GISEL-NEXT:    v_trunc_f32_e32 v7, v5
-; GISEL-NEXT:    v_mac_f32_e32 v4, 0xcf800000, v7
-; GISEL-NEXT:    v_cvt_u32_f32_e32 v6, v4
-; GISEL-NEXT:    v_cvt_u32_f32_e32 v7, v7
-; GISEL-NEXT:    v_mad_u64_u32 v[4:5], s[4:5], s6, v6, 0
-; GISEL-NEXT:    v_mad_u64_u32 v[8:9], s[4:5], s6, v7, v[5:6]
-; GISEL-NEXT:    v_mul_lo_u32 v5, v7, v4
-; GISEL-NEXT:    v_mad_u64_u32 v[8:9], s[4:5], s7, v6, v[8:9]
-; GISEL-NEXT:    v_mul_hi_u32 v9, v6, v4
-; GISEL-NEXT:    v_mul_hi_u32 v4, v7, v4
-; GISEL-NEXT:    v_mul_lo_u32 v10, v6, v8
-; GISEL-NEXT:    v_mul_lo_u32 v11, v7, v8
-; GISEL-NEXT:    v_mul_hi_u32 v12, v6, v8
+; GISEL-NEXT:    v_trunc_f32_e32 v6, v5
+; GISEL-NEXT:    v_mac_f32_e32 v4, 0xcf800000, v6
+; GISEL-NEXT:    v_cvt_u32_f32_e32 v5, v4
+; GISEL-NEXT:    v_cvt_u32_f32_e32 v7, v6
+; GISEL-NEXT:    v_mad_u64_u32 v[8:9], s[4:5], s6, v5, 0
+; GISEL-NEXT:    v_mov_b32_e32 v4, v9
+; GISEL-NEXT:    v_mad_u64_u32 v[9:10], s[4:5], s6, v7, v[4:5]
+; GISEL-NEXT:    v_mul_lo_u32 v4, v7, v8
+; GISEL-NEXT:    v_mul_hi_u32 v6, v5, v8
+; GISEL-NEXT:    v_mad_u64_u32 v[9:10], s[4:5], s7, v5, v[9:10]
 ; GISEL-NEXT:    v_mul_hi_u32 v8, v7, v8
-; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v10
+; GISEL-NEXT:    v_mul_lo_u32 v10, v5, v9
+; GISEL-NEXT:    v_mul_lo_u32 v11, v7, v9
+; GISEL-NEXT:    v_mul_hi_u32 v12, v5, v9
+; GISEL-NEXT:    v_mul_hi_u32 v9, v7, v9
+; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v10
 ; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v9
-; GISEL-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v10, v5
-; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v11, v4
-; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v12
+; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v6
+; GISEL-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v10, v4
+; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v11, v8
+; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v6, v12
 ; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v9, v10
-; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v5
-; GISEL-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v9, v5
-; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v8, v5
-; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v6, v4
+; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v10
+; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v6, v4
+; GISEL-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v8, v6
+; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v9, v6
+; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v5, v4
 ; GISEL-NEXT:    v_mad_u64_u32 v[8:9], s[4:5], s6, v11, 0
-; GISEL-NEXT:    v_addc_u32_e32 v5, vcc, v7, v5, vcc
+; GISEL-NEXT:    v_addc_u32_e32 v12, vcc, v7, v6, vcc
 ; GISEL-NEXT:    v_mov_b32_e32 v4, v9
-; GISEL-NEXT:    v_mad_u64_u32 v[9:10], s[4:5], s6, v5, v[4:5]
-; GISEL-NEXT:    v_ashrrev_i32_e32 v4, 31, v1
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v4
+; GISEL-NEXT:    v_mad_u64_u32 v[9:10], s[4:5], s6, v12, v[4:5]
+; GISEL-NEXT:    v_ashrrev_i32_e32 v6, 31, v1
+; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v6
 ; GISEL-NEXT:    v_mad_u64_u32 v[9:10], s[4:5], s7, v11, v[9:10]
-; GISEL-NEXT:    v_addc_u32_e32 v1, vcc, v1, v4, vcc
-; GISEL-NEXT:    v_xor_b32_e32 v10, v0, v4
-; GISEL-NEXT:    v_mul_lo_u32 v0, v5, v8
-; GISEL-NEXT:    v_mul_lo_u32 v12, v11, v9
-; GISEL-NEXT:    v_xor_b32_e32 v13, v1, v4
+; GISEL-NEXT:    v_addc_u32_e32 v1, vcc, v1, v6, vcc
+; GISEL-NEXT:    v_xor_b32_e32 v10, v0, v6
+; GISEL-NEXT:    v_mul_lo_u32 v0, v12, v8
+; GISEL-NEXT:    v_mul_lo_u32 v4, v11, v9
+; GISEL-NEXT:    v_xor_b32_e32 v13, v1, v6
 ; GISEL-NEXT:    v_mul_hi_u32 v1, v11, v8
-; GISEL-NEXT:    v_mul_hi_u32 v8, v5, v8
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v12
-; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
+; GISEL-NEXT:    v_mul_hi_u32 v8, v12, v8
+; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v4
+; GISEL-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
 ; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v1
 ; GISEL-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
-; GISEL-NEXT:    v_mul_lo_u32 v1, v5, v9
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v12, v0
-; GISEL-NEXT:    v_mul_hi_u32 v12, v11, v9
+; GISEL-NEXT:    v_mul_lo_u32 v1, v12, v9
+; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v4, v0
+; GISEL-NEXT:    v_mul_hi_u32 v4, v11, v9
 ; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v1, v8
 ; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v1, v12
-; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v12
-; GISEL-NEXT:    v_mul_hi_u32 v9, v5, v9
+; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v1, v4
+; GISEL-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v8, v4
+; GISEL-NEXT:    v_mul_hi_u32 v8, v12, v9
 ; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v1, v0
 ; GISEL-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v4, v1
 ; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v8, v1
-; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v9, v1
 ; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v11, v0
-; GISEL-NEXT:    v_addc_u32_e32 v1, vcc, v5, v1, vcc
+; GISEL-NEXT:    v_addc_u32_e32 v1, vcc, v12, v1, vcc
 ; GISEL-NEXT:    v_mul_lo_u32 v8, v13, v0
 ; GISEL-NEXT:    v_mul_lo_u32 v9, v10, v1
 ; GISEL-NEXT:    v_mul_hi_u32 v11, v10, v0
 ; GISEL-NEXT:    v_mul_hi_u32 v0, v13, v0
-; GISEL-NEXT:    v_mov_b32_e32 v5, 0x12d8fb
+; GISEL-NEXT:    v_mov_b32_e32 v4, 0x12d8fb
 ; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v9
 ; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
 ; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v11
@@ -1786,40 +1785,39 @@ define <2 x i64> @v_srem_v2i64_oddk_denom(<2 x i64> %num) {
 ; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v9
 ; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
 ; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v11, v9
-; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v0, v8
-; GISEL-NEXT:    v_mul_hi_u32 v12, v13, v1
-; GISEL-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], v5, v11, 0
+; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v8
+; GISEL-NEXT:    v_mul_hi_u32 v11, v13, v1
+; GISEL-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], v4, v0, 0
 ; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
 ; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v9, v8
-; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v12, v8
-; GISEL-NEXT:    v_mad_u64_u32 v[8:9], s[4:5], v5, v8, v[1:2]
+; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v11, v8
+; GISEL-NEXT:    v_mad_u64_u32 v[8:9], s[4:5], v4, v8, v[1:2]
 ; GISEL-NEXT:    v_sub_i32_e32 v10, vcc, v10, v0
-; GISEL-NEXT:    v_mad_u64_u32 v[8:9], s[4:5], 0, v11, v[8:9]
-; GISEL-NEXT:    s_sub_u32 s6, 0, 0x12d8fb
-; GISEL-NEXT:    s_subb_u32 s7, 0, 0
 ; GISEL-NEXT:    v_subb_u32_e64 v11, s[4:5], v13, v8, vcc
 ; GISEL-NEXT:    v_sub_i32_e64 v0, s[4:5], v13, v8
-; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v10, v5
+; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v10, v4
 ; GISEL-NEXT:    v_subbrev_u32_e32 v0, vcc, 0, v0, vcc
 ; GISEL-NEXT:    v_cndmask_b32_e64 v1, 0, -1, s[4:5]
 ; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v11
-; GISEL-NEXT:    v_sub_i32_e32 v13, vcc, v10, v5
+; GISEL-NEXT:    v_sub_i32_e32 v13, vcc, v10, v4
+; GISEL-NEXT:    s_sub_u32 s6, 0, 0x12d8fb
 ; GISEL-NEXT:    v_cndmask_b32_e64 v12, -1, v1, s[4:5]
 ; GISEL-NEXT:    v_subbrev_u32_e32 v14, vcc, 0, v0, vcc
-; GISEL-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], s6, v6, 0
-; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v13, v5
+; GISEL-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], s6, v5, 0
+; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v13, v4
 ; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, -1, vcc
 ; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v14
 ; GISEL-NEXT:    v_cndmask_b32_e32 v15, -1, v8, vcc
 ; GISEL-NEXT:    v_mad_u64_u32 v[8:9], s[4:5], s6, v7, v[1:2]
-; GISEL-NEXT:    v_sub_i32_e32 v1, vcc, v13, v5
-; GISEL-NEXT:    v_mad_u64_u32 v[8:9], s[4:5], s7, v6, v[8:9]
+; GISEL-NEXT:    s_subb_u32 s7, 0, 0
+; GISEL-NEXT:    v_sub_i32_e32 v1, vcc, v13, v4
+; GISEL-NEXT:    v_mad_u64_u32 v[8:9], s[4:5], s7, v5, v[8:9]
 ; GISEL-NEXT:    v_subbrev_u32_e32 v16, vcc, 0, v14, vcc
 ; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v15
 ; GISEL-NEXT:    v_cndmask_b32_e32 v9, v13, v1, vcc
 ; GISEL-NEXT:    v_mul_lo_u32 v1, v7, v0
-; GISEL-NEXT:    v_mul_lo_u32 v13, v6, v8
-; GISEL-NEXT:    v_mul_hi_u32 v15, v6, v0
+; GISEL-NEXT:    v_mul_lo_u32 v13, v5, v8
+; GISEL-NEXT:    v_mul_hi_u32 v15, v5, v0
 ; GISEL-NEXT:    v_cndmask_b32_e32 v14, v14, v16, vcc
 ; GISEL-NEXT:    v_mul_hi_u32 v0, v7, v0
 ; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v1, v13
@@ -1828,7 +1826,7 @@ define <2 x i64> @v_srem_v2i64_oddk_denom(<2 x i64> %num) {
 ; GISEL-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
 ; GISEL-NEXT:    v_mul_lo_u32 v15, v7, v8
 ; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v13, v1
-; GISEL-NEXT:    v_mul_hi_u32 v13, v6, v8
+; GISEL-NEXT:    v_mul_hi_u32 v13, v5, v8
 ; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v15, v0
 ; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
 ; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v13
@@ -1839,93 +1837,91 @@ define <2 x i64> @v_srem_v2i64_oddk_denom(<2 x i64> %num) {
 ; GISEL-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
 ; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v13, v1
 ; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v8, v1
-; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v6, v0
+; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v0
 ; GISEL-NEXT:    v_addc_u32_e32 v13, vcc, v7, v1, vcc
-; GISEL-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], s6, v8, 0
+; GISEL-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], s6, v5, 0
 ; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v12
-; GISEL-NEXT:    v_cndmask_b32_e32 v9, v10, v9, vcc
-; GISEL-NEXT:    v_mad_u64_u32 v[6:7], s[4:5], s6, v13, v[1:2]
-; GISEL-NEXT:    v_xor_b32_e32 v1, v9, v4
+; GISEL-NEXT:    v_cndmask_b32_e32 v7, v10, v9, vcc
+; GISEL-NEXT:    v_xor_b32_e32 v10, v7, v6
+; GISEL-NEXT:    v_mad_u64_u32 v[7:8], s[4:5], s6, v13, v[1:2]
+; GISEL-NEXT:    v_cndmask_b32_e32 v9, v11, v14, vcc
+; GISEL-NEXT:    v_xor_b32_e32 v1, v9, v6
+; GISEL-NEXT:    v_mad_u64_u32 v[7:8], s[4:5], s7, v5, v[7:8]
 ; GISEL-NEXT:    v_ashrrev_i32_e32 v9, 31, v3
-; GISEL-NEXT:    v_mad_u64_u32 v[6:7], s[4:5], s7, v8, v[6:7]
-; GISEL-NEXT:    v_cndmask_b32_e32 v10, v11, v14, vcc
 ; GISEL-NEXT:    v_add_i32_e32 v2, vcc, v2, v9
 ; GISEL-NEXT:    v_addc_u32_e32 v3, vcc, v3, v9, vcc
-; GISEL-NEXT:    v_xor_b32_e32 v11, v2, v9
+; GISEL-NEXT:    v_xor_b32_e32 v8, v2, v9
 ; GISEL-NEXT:    v_mul_lo_u32 v2, v13, v0
-; GISEL-NEXT:    v_mul_lo_u32 v7, v8, v6
+; GISEL-NEXT:    v_mul_lo_u32 v11, v5, v7
 ; GISEL-NEXT:    v_xor_b32_e32 v12, v3, v9
-; GISEL-NEXT:    v_mul_hi_u32 v3, v8, v0
+; GISEL-NEXT:    v_mul_hi_u32 v3, v5, v0
 ; GISEL-NEXT:    v_mul_hi_u32 v0, v13, v0
-; GISEL-NEXT:    v_add_i32_e32 v2, vcc, v2, v7
-; GISEL-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v2, vcc, v2, v11
+; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
 ; GISEL-NEXT:    v_add_i32_e32 v2, vcc, v2, v3
 ; GISEL-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; GISEL-NEXT:    v_mul_lo_u32 v3, v13, v6
-; GISEL-NEXT:    v_add_i32_e32 v2, vcc, v7, v2
-; GISEL-NEXT:    v_mul_hi_u32 v7, v8, v6
+; GISEL-NEXT:    v_mul_lo_u32 v3, v13, v7
+; GISEL-NEXT:    v_add_i32_e32 v2, vcc, v11, v2
+; GISEL-NEXT:    v_mul_hi_u32 v11, v5, v7
 ; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v3, v0
 ; GISEL-NEXT:    v_cndmask_b32_e64 v3, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v7
-; GISEL-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v3, vcc, v3, v7
-; GISEL-NEXT:    v_mul_hi_u32 v6, v13, v6
+; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v11
+; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v3, vcc, v3, v11
+; GISEL-NEXT:    v_mul_hi_u32 v7, v13, v7
 ; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
 ; GISEL-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
 ; GISEL-NEXT:    v_add_i32_e32 v2, vcc, v3, v2
-; GISEL-NEXT:    v_add_i32_e32 v2, vcc, v6, v2
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v8, v0
+; GISEL-NEXT:    v_add_i32_e32 v2, vcc, v7, v2
+; GISEL-NEXT:    v_add_i32_e32 v3, vcc, v5, v0
 ; GISEL-NEXT:    v_addc_u32_e32 v2, vcc, v13, v2, vcc
-; GISEL-NEXT:    v_mul_lo_u32 v3, v12, v0
-; GISEL-NEXT:    v_mul_lo_u32 v6, v11, v2
-; GISEL-NEXT:    v_mul_hi_u32 v7, v11, v0
-; GISEL-NEXT:    v_mul_hi_u32 v0, v12, v0
-; GISEL-NEXT:    v_xor_b32_e32 v8, v10, v4
-; GISEL-NEXT:    v_add_i32_e32 v3, vcc, v3, v6
+; GISEL-NEXT:    v_mul_lo_u32 v5, v12, v3
+; GISEL-NEXT:    v_mul_lo_u32 v7, v8, v2
+; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v10, v6
+; GISEL-NEXT:    v_subb_u32_e32 v1, vcc, v1, v6, vcc
+; GISEL-NEXT:    v_mul_hi_u32 v6, v8, v3
+; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v7
+; GISEL-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v6
+; GISEL-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
+; GISEL-NEXT:    v_mul_lo_u32 v6, v12, v2
+; GISEL-NEXT:    v_mul_hi_u32 v3, v12, v3
+; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v7, v5
+; GISEL-NEXT:    v_mul_hi_u32 v7, v8, v2
+; GISEL-NEXT:    v_add_i32_e32 v3, vcc, v6, v3
 ; GISEL-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
 ; GISEL-NEXT:    v_add_i32_e32 v3, vcc, v3, v7
-; GISEL-NEXT:    v_cndmask_b32_e64 v3, 0, 1, vcc
-; GISEL-NEXT:    v_mul_lo_u32 v7, v12, v2
-; GISEL-NEXT:    v_add_i32_e32 v3, vcc, v6, v3
-; GISEL-NEXT:    v_mul_hi_u32 v6, v11, v2
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v7, v0
 ; GISEL-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v6
-; GISEL-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v7, v6
-; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v0, v3
+; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v6, v7
+; GISEL-NEXT:    v_add_i32_e32 v3, vcc, v3, v5
 ; GISEL-NEXT:    v_mul_hi_u32 v7, v12, v2
-; GISEL-NEXT:    v_mad_u64_u32 v[2:3], s[4:5], v5, v10, 0
-; GISEL-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v6, v0
-; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v7, v0
-; GISEL-NEXT:    v_mov_b32_e32 v0, v3
-; GISEL-NEXT:    v_mad_u64_u32 v[6:7], s[4:5], v5, v6, v[0:1]
-; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v1, v4
-; GISEL-NEXT:    v_subb_u32_e32 v1, vcc, v8, v4, vcc
-; GISEL-NEXT:    v_mad_u64_u32 v[3:4], s[4:5], 0, v10, v[6:7]
-; GISEL-NEXT:    v_sub_i32_e32 v2, vcc, v11, v2
-; GISEL-NEXT:    v_subb_u32_e64 v4, s[4:5], v12, v3, vcc
-; GISEL-NEXT:    v_sub_i32_e64 v3, s[4:5], v12, v3
-; GISEL-NEXT:    v_subbrev_u32_e32 v3, vcc, 0, v3, vcc
-; GISEL-NEXT:    v_sub_i32_e32 v7, vcc, v2, v5
-; GISEL-NEXT:    v_subbrev_u32_e32 v3, vcc, 0, v3, vcc
-; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v7, v5
+; GISEL-NEXT:    v_mad_u64_u32 v[2:3], s[4:5], v4, v3, 0
+; GISEL-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
+; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v6, v5
+; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v7, v5
+; GISEL-NEXT:    v_mad_u64_u32 v[5:6], s[4:5], v4, v5, v[3:4]
+; GISEL-NEXT:    v_sub_i32_e32 v2, vcc, v8, v2
+; GISEL-NEXT:    v_subb_u32_e64 v3, s[4:5], v12, v5, vcc
+; GISEL-NEXT:    v_sub_i32_e64 v5, s[4:5], v12, v5
+; GISEL-NEXT:    v_subbrev_u32_e32 v5, vcc, 0, v5, vcc
+; GISEL-NEXT:    v_sub_i32_e32 v7, vcc, v2, v4
+; GISEL-NEXT:    v_subbrev_u32_e32 v5, vcc, 0, v5, vcc
+; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v7, v4
 ; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, -1, vcc
-; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v3
-; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v2, v5
+; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v5
+; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v2, v4
 ; GISEL-NEXT:    v_cndmask_b32_e32 v8, -1, v8, vcc
-; GISEL-NEXT:    v_sub_i32_e32 v5, vcc, v7, v5
+; GISEL-NEXT:    v_sub_i32_e32 v4, vcc, v7, v4
 ; GISEL-NEXT:    v_cndmask_b32_e64 v6, 0, -1, s[4:5]
-; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v4
-; GISEL-NEXT:    v_subbrev_u32_e32 v10, vcc, 0, v3, vcc
+; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v3
+; GISEL-NEXT:    v_subbrev_u32_e32 v10, vcc, 0, v5, vcc
 ; GISEL-NEXT:    v_cndmask_b32_e64 v6, -1, v6, s[4:5]
 ; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v8
-; GISEL-NEXT:    v_cndmask_b32_e32 v5, v7, v5, vcc
-; GISEL-NEXT:    v_cndmask_b32_e32 v3, v3, v10, vcc
+; GISEL-NEXT:    v_cndmask_b32_e32 v4, v7, v4, vcc
+; GISEL-NEXT:    v_cndmask_b32_e32 v5, v5, v10, vcc
 ; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v6
-; GISEL-NEXT:    v_cndmask_b32_e32 v2, v2, v5, vcc
-; GISEL-NEXT:    v_cndmask_b32_e32 v3, v4, v3, vcc
+; GISEL-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
+; GISEL-NEXT:    v_cndmask_b32_e32 v3, v3, v5, vcc
 ; GISEL-NEXT:    v_xor_b32_e32 v2, v2, v9
 ; GISEL-NEXT:    v_xor_b32_e32 v3, v3, v9
 ; GISEL-NEXT:    v_sub_i32_e32 v2, vcc, v2, v9
diff --git a/llvm/unittests/CodeGen/GlobalISel/KnownBitsVectorTest.cpp b/llvm/unittests/CodeGen/GlobalISel/KnownBitsVectorTest.cpp
index dd6edd35a8468b..dada571564aefc 100644
--- a/llvm/unittests/CodeGen/GlobalISel/KnownBitsVectorTest.cpp
+++ b/llvm/unittests/CodeGen/GlobalISel/KnownBitsVectorTest.cpp
@@ -1548,3 +1548,49 @@ TEST_F(AArch64GISelMITest, TestNumSignBitsUAddoOverflow) {
   // Assert sign-extension from vector boolean
   EXPECT_EQ(32u, Info.computeNumSignBits(CopyOverflow));
 }
+
+TEST_F(AArch64GISelMITest, TestKnwonBitsUnmergeVectorScalar) {
+  StringRef MIRString = R"(
+   %copy_x0:_(<2 x s16>) = COPY $w0
+   %maskf:_(s16) = G_CONSTANT i16 15
+   %x0_x1:_(<2 x s16>) = G_BUILD_VECTOR %maskf, %maskf
+   %and:_(<2 x s16>) = G_AND %copy_x0, %x0_x1
+   %x0_0:_(s16), %x0_1:_(s16) = G_UNMERGE_VALUES %and
+   %result:_(s16) = COPY %x0_0
+)";
+
+  setUp(MIRString);
+  if (!TM)
+    GTEST_SKIP();
+
+  Register CopyOverflow = Copies[Copies.size() - 1];
+
+  GISelKnownBits Info(*MF);
+
+  EXPECT_EQ(0xFFF0u, Info.getKnownBits(CopyOverflow).Zero.getZExtValue());
+}
+
+TEST_F(AArch64GISelMITest, TestKnwonBitsUnmergeVectorVector) {
+  StringRef MIRString = R"(
+   %copy_x0:_(<4 x s8>) = COPY $w0
+   %maskff:_(s8) = G_CONSTANT i8 255
+   %maskf:_(s8) = G_CONSTANT i8 15
+   %x0_x1:_(<4 x s8>) = G_BUILD_VECTOR %maskf, %maskf, %maskff, %maskff
+   %and:_(<4 x s8>) = G_AND %copy_x0, %x0_x1
+   %x0_0:_(<2 x s8>), %x0_1:_(<2 x s8>) = G_UNMERGE_VALUES %and
+   %result1:_(<2 x s8>) = COPY %x0_0
+   %result2:_(<2 x s8>) = COPY %x0_1
+)";
+
+  setUp(MIRString);
+  if (!TM)
+    GTEST_SKIP();
+
+
+  GISelKnownBits Info(*MF);
+
+  Register CopyOverflow1 = Copies[Copies.size() - 2];
+  EXPECT_EQ(0xF0u, Info.getKnownBits(CopyOverflow1).Zero.getZExtValue());
+  Register CopyOverflow2 = Copies[Copies.size() - 1];
+  EXPECT_EQ(0x00u, Info.getKnownBits(CopyOverflow2).Zero.getZExtValue());
+}



More information about the llvm-commits mailing list