[llvm] [GlobalISel] Support vector G_UNMERGE_VALUES in computeKnownBits. (PR #112172)
David Green via llvm-commits
llvm-commits at lists.llvm.org
Mon Oct 14 02:01:43 PDT 2024
https://github.com/davemgreen created https://github.com/llvm/llvm-project/pull/112172
This adds computeKnownBits support for vector->vector G_UNMERGE_VALUES, grabbing the known bits with an adjusted DemandedElts mask.
>From b360a7198934979460c9e4d9134620471bea7a0d Mon Sep 17 00:00:00 2001
From: David Green <david.green at arm.com>
Date: Mon, 14 Oct 2024 10:00:00 +0100
Subject: [PATCH] [GlobalISel] Support vector G_UNMERGE_VALUES in
computeKnownBits.
This adds computeKnownBits support for vector->vector G_UNMERGE_VALUES,
grabbing the known bits with an adjusted DemandedElts mask.
---
.../lib/CodeGen/GlobalISel/GISelKnownBits.cpp | 24 +-
.../CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll | 252 +++++----
.../CodeGen/AMDGPU/GlobalISel/srem.i64.ll | 500 +++++++++---------
.../GlobalISel/KnownBitsVectorTest.cpp | 46 ++
4 files changed, 436 insertions(+), 386 deletions(-)
diff --git a/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp b/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp
index 52f5d408c8eddd..a7aebfbb285a74 100644
--- a/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp
@@ -514,15 +514,12 @@ void GISelKnownBits::computeKnownBitsImpl(Register R, KnownBits &Known,
break;
}
case TargetOpcode::G_UNMERGE_VALUES: {
- if (DstTy.isVector())
- break;
unsigned NumOps = MI.getNumOperands();
Register SrcReg = MI.getOperand(NumOps - 1).getReg();
- if (MRI.getType(SrcReg).isVector())
- return; // TODO: Handle vectors.
+ LLT SrcTy = MRI.getType(SrcReg);
- KnownBits SrcOpKnown;
- computeKnownBitsImpl(SrcReg, SrcOpKnown, DemandedElts, Depth + 1);
+ if (SrcTy.isVector() && SrcTy.getScalarType() != DstTy.getScalarType())
+ return; // TODO: Handle vector->subelement unmerges?
// Figure out the result operand index
unsigned DstIdx = 0;
@@ -530,7 +527,20 @@ void GISelKnownBits::computeKnownBitsImpl(Register R, KnownBits &Known,
++DstIdx)
;
- Known = SrcOpKnown.extractBits(BitWidth, BitWidth * DstIdx);
+ APInt SubDemandedElts = DemandedElts;
+ if (SrcTy.isVector()) {
+ unsigned DstLanes = DstTy.isVector() ? DstTy.getNumElements() : 1;
+ SubDemandedElts =
+ DemandedElts.zext(SrcTy.getNumElements()).shl(DstIdx * DstLanes);
+ }
+
+ KnownBits SrcOpKnown;
+ computeKnownBitsImpl(SrcReg, SrcOpKnown, SubDemandedElts, Depth + 1);
+
+ if (SrcTy.isVector())
+ Known = SrcOpKnown;
+ else
+ Known = SrcOpKnown.extractBits(BitWidth, BitWidth * DstIdx);
break;
}
case TargetOpcode::G_BSWAP: {
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll
index 81abe91b283f96..0b5706aa45b693 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll
@@ -1184,73 +1184,74 @@ define <2 x i64> @v_sdiv_v2i64_oddk_denom(<2 x i64> %num) {
; GISEL-NEXT: v_rcp_iflag_f32_e32 v4, v4
; GISEL-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4
; GISEL-NEXT: v_mul_f32_e32 v5, 0x2f800000, v4
-; GISEL-NEXT: v_trunc_f32_e32 v7, v5
-; GISEL-NEXT: v_mac_f32_e32 v4, 0xcf800000, v7
-; GISEL-NEXT: v_cvt_u32_f32_e32 v6, v4
-; GISEL-NEXT: v_cvt_u32_f32_e32 v7, v7
-; GISEL-NEXT: v_mad_u64_u32 v[4:5], s[4:5], s6, v6, 0
-; GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], s6, v7, v[5:6]
-; GISEL-NEXT: v_mul_lo_u32 v5, v7, v4
-; GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], s7, v6, v[8:9]
-; GISEL-NEXT: v_mul_hi_u32 v9, v6, v4
-; GISEL-NEXT: v_mul_hi_u32 v4, v7, v4
-; GISEL-NEXT: v_mul_lo_u32 v10, v6, v8
-; GISEL-NEXT: v_mul_lo_u32 v11, v7, v8
-; GISEL-NEXT: v_mul_hi_u32 v12, v6, v8
+; GISEL-NEXT: v_trunc_f32_e32 v6, v5
+; GISEL-NEXT: v_mac_f32_e32 v4, 0xcf800000, v6
+; GISEL-NEXT: v_cvt_u32_f32_e32 v5, v4
+; GISEL-NEXT: v_cvt_u32_f32_e32 v7, v6
+; GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], s6, v5, 0
+; GISEL-NEXT: v_mov_b32_e32 v4, v9
+; GISEL-NEXT: v_mad_u64_u32 v[9:10], s[4:5], s6, v7, v[4:5]
+; GISEL-NEXT: v_mul_lo_u32 v4, v7, v8
+; GISEL-NEXT: v_mul_hi_u32 v6, v5, v8
+; GISEL-NEXT: v_mad_u64_u32 v[9:10], s[4:5], s7, v5, v[9:10]
; GISEL-NEXT: v_mul_hi_u32 v8, v7, v8
-; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v10
+; GISEL-NEXT: v_mul_lo_u32 v10, v5, v9
+; GISEL-NEXT: v_mul_lo_u32 v11, v7, v9
+; GISEL-NEXT: v_mul_hi_u32 v12, v5, v9
+; GISEL-NEXT: v_mul_hi_u32 v9, v7, v9
+; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v10
; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v9
-; GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v5, vcc, v10, v5
-; GISEL-NEXT: v_add_i32_e32 v4, vcc, v11, v4
-; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v12
+; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v6
+; GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v4, vcc, v10, v4
+; GISEL-NEXT: v_add_i32_e32 v6, vcc, v11, v8
+; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v12
; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v10
-; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v5
-; GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v5, vcc, v9, v5
-; GISEL-NEXT: v_add_i32_e32 v5, vcc, v8, v5
-; GISEL-NEXT: v_add_i32_e32 v11, vcc, v6, v4
+; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v10
+; GISEL-NEXT: v_add_i32_e32 v4, vcc, v6, v4
+; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v6, vcc, v8, v6
+; GISEL-NEXT: v_add_i32_e32 v6, vcc, v9, v6
+; GISEL-NEXT: v_add_i32_e32 v11, vcc, v5, v4
; GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], s6, v11, 0
-; GISEL-NEXT: v_addc_u32_e32 v5, vcc, v7, v5, vcc
+; GISEL-NEXT: v_addc_u32_e32 v12, vcc, v7, v6, vcc
; GISEL-NEXT: v_mov_b32_e32 v4, v9
-; GISEL-NEXT: v_mad_u64_u32 v[9:10], s[4:5], s6, v5, v[4:5]
-; GISEL-NEXT: v_ashrrev_i32_e32 v4, 31, v1
-; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v4
+; GISEL-NEXT: v_mad_u64_u32 v[9:10], s[4:5], s6, v12, v[4:5]
+; GISEL-NEXT: v_ashrrev_i32_e32 v6, 31, v1
+; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v6
; GISEL-NEXT: v_mad_u64_u32 v[9:10], s[4:5], s7, v11, v[9:10]
-; GISEL-NEXT: v_addc_u32_e32 v1, vcc, v1, v4, vcc
-; GISEL-NEXT: v_xor_b32_e32 v10, v0, v4
-; GISEL-NEXT: v_mul_lo_u32 v0, v5, v8
-; GISEL-NEXT: v_mul_lo_u32 v12, v11, v9
-; GISEL-NEXT: v_xor_b32_e32 v13, v1, v4
+; GISEL-NEXT: v_addc_u32_e32 v1, vcc, v1, v6, vcc
+; GISEL-NEXT: v_xor_b32_e32 v10, v0, v6
+; GISEL-NEXT: v_mul_lo_u32 v0, v12, v8
+; GISEL-NEXT: v_mul_lo_u32 v4, v11, v9
+; GISEL-NEXT: v_xor_b32_e32 v13, v1, v6
; GISEL-NEXT: v_mul_hi_u32 v1, v11, v8
-; GISEL-NEXT: v_mul_hi_u32 v8, v5, v8
-; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v12
-; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc
+; GISEL-NEXT: v_mul_hi_u32 v8, v12, v8
+; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v4
+; GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v1
; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
-; GISEL-NEXT: v_mul_lo_u32 v1, v5, v9
-; GISEL-NEXT: v_add_i32_e32 v0, vcc, v12, v0
-; GISEL-NEXT: v_mul_hi_u32 v12, v11, v9
+; GISEL-NEXT: v_mul_lo_u32 v1, v12, v9
+; GISEL-NEXT: v_add_i32_e32 v0, vcc, v4, v0
+; GISEL-NEXT: v_mul_hi_u32 v4, v11, v9
; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v8
; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v12
-; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v12
-; GISEL-NEXT: v_mul_hi_u32 v9, v5, v9
+; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v4
+; GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v4, vcc, v8, v4
+; GISEL-NEXT: v_mul_hi_u32 v8, v12, v9
; GISEL-NEXT: v_add_i32_e32 v0, vcc, v1, v0
; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v1, vcc, v4, v1
; GISEL-NEXT: v_add_i32_e32 v1, vcc, v8, v1
-; GISEL-NEXT: v_add_i32_e32 v1, vcc, v9, v1
; GISEL-NEXT: v_add_i32_e32 v0, vcc, v11, v0
-; GISEL-NEXT: v_addc_u32_e32 v1, vcc, v5, v1, vcc
+; GISEL-NEXT: v_addc_u32_e32 v1, vcc, v12, v1, vcc
; GISEL-NEXT: v_mul_lo_u32 v8, v13, v0
; GISEL-NEXT: v_mul_lo_u32 v9, v10, v1
; GISEL-NEXT: v_mul_hi_u32 v11, v10, v0
; GISEL-NEXT: v_mul_hi_u32 v0, v13, v0
-; GISEL-NEXT: v_mov_b32_e32 v5, 0x12d8fb
+; GISEL-NEXT: v_mov_b32_e32 v4, 0x12d8fb
; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v9
; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v11
@@ -1265,40 +1266,39 @@ define <2 x i64> @v_sdiv_v2i64_oddk_denom(<2 x i64> %num) {
; GISEL-NEXT: v_add_i32_e32 v9, vcc, v11, v9
; GISEL-NEXT: v_add_i32_e32 v11, vcc, v0, v8
; GISEL-NEXT: v_mul_hi_u32 v12, v13, v1
-; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v5, v11, 0
+; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v4, v11, 0
; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
; GISEL-NEXT: v_add_i32_e32 v8, vcc, v9, v8
; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v8
-; GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v5, v12, v[1:2]
+; GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v4, v12, v[1:2]
; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v10, v0
-; GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], 0, v11, v[8:9]
-; GISEL-NEXT: s_sub_u32 s6, 0, 0x12d8fb
-; GISEL-NEXT: s_subb_u32 s7, 0, 0
; GISEL-NEXT: v_subb_u32_e64 v1, s[4:5], v13, v8, vcc
; GISEL-NEXT: v_sub_i32_e64 v8, s[4:5], v13, v8
-; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v5
+; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v4
; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, -1, s[4:5]
; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v1
; GISEL-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v8, vcc
-; GISEL-NEXT: v_sub_i32_e32 v8, vcc, v0, v5
+; GISEL-NEXT: v_sub_i32_e32 v8, vcc, v0, v4
; GISEL-NEXT: v_cndmask_b32_e64 v10, -1, v9, s[4:5]
; GISEL-NEXT: v_subbrev_u32_e32 v9, vcc, 0, v1, vcc
+; GISEL-NEXT: s_sub_u32 s6, 0, 0x12d8fb
; GISEL-NEXT: v_add_i32_e32 v13, vcc, 1, v11
-; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], s6, v6, 0
+; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], s6, v5, 0
; GISEL-NEXT: v_addc_u32_e32 v14, vcc, 0, v12, vcc
-; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v8, v5
+; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v8, v4
; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, -1, vcc
; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v9
; GISEL-NEXT: v_cndmask_b32_e32 v15, -1, v8, vcc
; GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], s6, v7, v[1:2]
+; GISEL-NEXT: s_subb_u32 s7, 0, 0
; GISEL-NEXT: v_add_i32_e32 v1, vcc, 1, v13
-; GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], s7, v6, v[8:9]
+; GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], s7, v5, v[8:9]
; GISEL-NEXT: v_addc_u32_e32 v16, vcc, 0, v14, vcc
; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v15
; GISEL-NEXT: v_cndmask_b32_e32 v9, v13, v1, vcc
; GISEL-NEXT: v_mul_lo_u32 v1, v7, v0
-; GISEL-NEXT: v_mul_lo_u32 v13, v6, v8
-; GISEL-NEXT: v_mul_hi_u32 v15, v6, v0
+; GISEL-NEXT: v_mul_lo_u32 v13, v5, v8
+; GISEL-NEXT: v_mul_hi_u32 v15, v5, v0
; GISEL-NEXT: v_cndmask_b32_e32 v14, v14, v16, vcc
; GISEL-NEXT: v_mul_hi_u32 v0, v7, v0
; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v13
@@ -1307,7 +1307,7 @@ define <2 x i64> @v_sdiv_v2i64_oddk_denom(<2 x i64> %num) {
; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
; GISEL-NEXT: v_mul_lo_u32 v15, v7, v8
; GISEL-NEXT: v_add_i32_e32 v1, vcc, v13, v1
-; GISEL-NEXT: v_mul_hi_u32 v13, v6, v8
+; GISEL-NEXT: v_mul_hi_u32 v13, v5, v8
; GISEL-NEXT: v_add_i32_e32 v0, vcc, v15, v0
; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc
; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v13
@@ -1318,95 +1318,93 @@ define <2 x i64> @v_sdiv_v2i64_oddk_denom(<2 x i64> %num) {
; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
; GISEL-NEXT: v_add_i32_e32 v1, vcc, v13, v1
; GISEL-NEXT: v_add_i32_e32 v1, vcc, v8, v1
-; GISEL-NEXT: v_add_i32_e32 v8, vcc, v6, v0
+; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v0
; GISEL-NEXT: v_addc_u32_e32 v13, vcc, v7, v1, vcc
-; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], s6, v8, 0
+; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], s6, v5, 0
; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v10
-; GISEL-NEXT: v_cndmask_b32_e32 v9, v11, v9, vcc
-; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[4:5], s6, v13, v[1:2]
-; GISEL-NEXT: v_xor_b32_e32 v1, v9, v4
+; GISEL-NEXT: v_cndmask_b32_e32 v7, v11, v9, vcc
+; GISEL-NEXT: v_xor_b32_e32 v10, v7, v6
+; GISEL-NEXT: v_mad_u64_u32 v[7:8], s[4:5], s6, v13, v[1:2]
+; GISEL-NEXT: v_cndmask_b32_e32 v9, v12, v14, vcc
+; GISEL-NEXT: v_xor_b32_e32 v1, v9, v6
+; GISEL-NEXT: v_mad_u64_u32 v[7:8], s[4:5], s7, v5, v[7:8]
; GISEL-NEXT: v_ashrrev_i32_e32 v9, 31, v3
-; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[4:5], s7, v8, v[6:7]
-; GISEL-NEXT: v_cndmask_b32_e32 v10, v12, v14, vcc
; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v9
; GISEL-NEXT: v_addc_u32_e32 v3, vcc, v3, v9, vcc
-; GISEL-NEXT: v_xor_b32_e32 v11, v2, v9
+; GISEL-NEXT: v_xor_b32_e32 v8, v2, v9
; GISEL-NEXT: v_mul_lo_u32 v2, v13, v0
-; GISEL-NEXT: v_mul_lo_u32 v7, v8, v6
+; GISEL-NEXT: v_mul_lo_u32 v11, v5, v7
; GISEL-NEXT: v_xor_b32_e32 v12, v3, v9
-; GISEL-NEXT: v_mul_hi_u32 v3, v8, v0
+; GISEL-NEXT: v_mul_hi_u32 v3, v5, v0
; GISEL-NEXT: v_mul_hi_u32 v0, v13, v0
-; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v7
-; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v11
+; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v3
; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
-; GISEL-NEXT: v_mul_lo_u32 v3, v13, v6
-; GISEL-NEXT: v_add_i32_e32 v2, vcc, v7, v2
-; GISEL-NEXT: v_mul_hi_u32 v7, v8, v6
+; GISEL-NEXT: v_mul_lo_u32 v3, v13, v7
+; GISEL-NEXT: v_add_i32_e32 v2, vcc, v11, v2
+; GISEL-NEXT: v_mul_hi_u32 v11, v5, v7
; GISEL-NEXT: v_add_i32_e32 v0, vcc, v3, v0
; GISEL-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v7
-; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v7
-; GISEL-NEXT: v_mul_hi_u32 v6, v13, v6
+; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v11
+; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v11
+; GISEL-NEXT: v_mul_hi_u32 v7, v13, v7
; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v2
; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
; GISEL-NEXT: v_add_i32_e32 v2, vcc, v3, v2
-; GISEL-NEXT: v_add_i32_e32 v2, vcc, v6, v2
-; GISEL-NEXT: v_add_i32_e32 v0, vcc, v8, v0
+; GISEL-NEXT: v_add_i32_e32 v2, vcc, v7, v2
+; GISEL-NEXT: v_add_i32_e32 v3, vcc, v5, v0
; GISEL-NEXT: v_addc_u32_e32 v2, vcc, v13, v2, vcc
-; GISEL-NEXT: v_mul_lo_u32 v3, v12, v0
-; GISEL-NEXT: v_mul_lo_u32 v6, v11, v2
-; GISEL-NEXT: v_mul_hi_u32 v7, v11, v0
-; GISEL-NEXT: v_mul_hi_u32 v0, v12, v0
-; GISEL-NEXT: v_xor_b32_e32 v8, v10, v4
-; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v6
+; GISEL-NEXT: v_mul_lo_u32 v5, v12, v3
+; GISEL-NEXT: v_mul_lo_u32 v7, v8, v2
+; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v10, v6
+; GISEL-NEXT: v_subb_u32_e32 v1, vcc, v1, v6, vcc
+; GISEL-NEXT: v_mul_hi_u32 v6, v8, v3
+; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v7
+; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v6
+; GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
+; GISEL-NEXT: v_mul_lo_u32 v6, v12, v2
+; GISEL-NEXT: v_mul_hi_u32 v3, v12, v3
+; GISEL-NEXT: v_add_i32_e32 v5, vcc, v7, v5
+; GISEL-NEXT: v_mul_hi_u32 v7, v8, v2
+; GISEL-NEXT: v_add_i32_e32 v3, vcc, v6, v3
; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v7
-; GISEL-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc
-; GISEL-NEXT: v_mul_lo_u32 v7, v12, v2
-; GISEL-NEXT: v_add_i32_e32 v3, vcc, v6, v3
-; GISEL-NEXT: v_mul_hi_u32 v6, v11, v2
-; GISEL-NEXT: v_add_i32_e32 v0, vcc, v7, v0
; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v6
-; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v6, vcc, v7, v6
-; GISEL-NEXT: v_add_i32_e32 v10, vcc, v0, v3
-; GISEL-NEXT: v_mul_hi_u32 v7, v12, v2
-; GISEL-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v5, v10, 0
-; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v0, vcc, v6, v0
-; GISEL-NEXT: v_add_i32_e32 v13, vcc, v7, v0
-; GISEL-NEXT: v_mov_b32_e32 v0, v3
-; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v5, v13, v[0:1]
-; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v1, v4
-; GISEL-NEXT: v_subb_u32_e32 v1, vcc, v8, v4, vcc
-; GISEL-NEXT: v_mad_u64_u32 v[3:4], s[4:5], 0, v10, v[6:7]
-; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v11, v2
-; GISEL-NEXT: v_subb_u32_e64 v4, s[4:5], v12, v3, vcc
-; GISEL-NEXT: v_sub_i32_e64 v3, s[4:5], v12, v3
-; GISEL-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc
-; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v2, v5
-; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v2, v5
+; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v7
+; GISEL-NEXT: v_add_i32_e32 v7, vcc, v3, v5
+; GISEL-NEXT: v_mul_hi_u32 v10, v12, v2
+; GISEL-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v4, v7, 0
+; GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v5, vcc, v6, v5
+; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v5
+; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v4, v10, v[3:4]
+; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v8, v2
+; GISEL-NEXT: v_subb_u32_e64 v3, s[4:5], v12, v5, vcc
+; GISEL-NEXT: v_sub_i32_e64 v5, s[4:5], v12, v5
+; GISEL-NEXT: v_subbrev_u32_e32 v5, vcc, 0, v5, vcc
+; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v2, v4
+; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v2, v4
; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, -1, s[4:5]
-; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v4
-; GISEL-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc
-; GISEL-NEXT: v_cndmask_b32_e64 v4, -1, v6, s[4:5]
-; GISEL-NEXT: v_add_i32_e32 v6, vcc, 1, v10
-; GISEL-NEXT: v_addc_u32_e32 v7, vcc, 0, v13, vcc
-; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v2, v5
+; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v3
+; GISEL-NEXT: v_subbrev_u32_e32 v5, vcc, 0, v5, vcc
+; GISEL-NEXT: v_cndmask_b32_e64 v3, -1, v6, s[4:5]
+; GISEL-NEXT: v_add_i32_e32 v6, vcc, 1, v7
+; GISEL-NEXT: v_addc_u32_e32 v8, vcc, 0, v10, vcc
+; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v2, v4
; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc
-; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3
+; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v5
; GISEL-NEXT: v_cndmask_b32_e32 v2, -1, v2, vcc
-; GISEL-NEXT: v_add_i32_e32 v3, vcc, 1, v6
-; GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v7, vcc
+; GISEL-NEXT: v_add_i32_e32 v4, vcc, 1, v6
+; GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v8, vcc
; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2
-; GISEL-NEXT: v_cndmask_b32_e32 v2, v6, v3, vcc
-; GISEL-NEXT: v_cndmask_b32_e32 v3, v7, v5, vcc
-; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4
-; GISEL-NEXT: v_cndmask_b32_e32 v2, v10, v2, vcc
-; GISEL-NEXT: v_cndmask_b32_e32 v3, v13, v3, vcc
+; GISEL-NEXT: v_cndmask_b32_e32 v2, v6, v4, vcc
+; GISEL-NEXT: v_cndmask_b32_e32 v4, v8, v5, vcc
+; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v3
+; GISEL-NEXT: v_cndmask_b32_e32 v2, v7, v2, vcc
+; GISEL-NEXT: v_cndmask_b32_e32 v3, v10, v4, vcc
; GISEL-NEXT: v_xor_b32_e32 v2, v2, v9
; GISEL-NEXT: v_xor_b32_e32 v3, v3, v9
; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v2, v9
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll
index cfac0c2fa56aaf..3ed864d463ee9c 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll
@@ -1112,73 +1112,74 @@ define <2 x i64> @v_srem_v2i64_pow2k_denom(<2 x i64> %num) {
; GISEL-NEXT: v_rcp_iflag_f32_e32 v4, v4
; GISEL-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4
; GISEL-NEXT: v_mul_f32_e32 v5, 0x2f800000, v4
-; GISEL-NEXT: v_trunc_f32_e32 v7, v5
-; GISEL-NEXT: v_mac_f32_e32 v4, 0xcf800000, v7
-; GISEL-NEXT: v_cvt_u32_f32_e32 v6, v4
-; GISEL-NEXT: v_cvt_u32_f32_e32 v7, v7
-; GISEL-NEXT: v_mad_u64_u32 v[4:5], s[4:5], s6, v6, 0
-; GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], s6, v7, v[5:6]
-; GISEL-NEXT: v_mul_lo_u32 v5, v7, v4
-; GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], s7, v6, v[8:9]
-; GISEL-NEXT: v_mul_hi_u32 v9, v6, v4
-; GISEL-NEXT: v_mul_hi_u32 v4, v7, v4
-; GISEL-NEXT: v_mul_lo_u32 v10, v6, v8
-; GISEL-NEXT: v_mul_lo_u32 v11, v7, v8
-; GISEL-NEXT: v_mul_hi_u32 v12, v6, v8
+; GISEL-NEXT: v_trunc_f32_e32 v6, v5
+; GISEL-NEXT: v_mac_f32_e32 v4, 0xcf800000, v6
+; GISEL-NEXT: v_cvt_u32_f32_e32 v5, v4
+; GISEL-NEXT: v_cvt_u32_f32_e32 v7, v6
+; GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], s6, v5, 0
+; GISEL-NEXT: v_mov_b32_e32 v4, v9
+; GISEL-NEXT: v_mad_u64_u32 v[9:10], s[4:5], s6, v7, v[4:5]
+; GISEL-NEXT: v_mul_lo_u32 v4, v7, v8
+; GISEL-NEXT: v_mul_hi_u32 v6, v5, v8
+; GISEL-NEXT: v_mad_u64_u32 v[9:10], s[4:5], s7, v5, v[9:10]
; GISEL-NEXT: v_mul_hi_u32 v8, v7, v8
-; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v10
+; GISEL-NEXT: v_mul_lo_u32 v10, v5, v9
+; GISEL-NEXT: v_mul_lo_u32 v11, v7, v9
+; GISEL-NEXT: v_mul_hi_u32 v12, v5, v9
+; GISEL-NEXT: v_mul_hi_u32 v9, v7, v9
+; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v10
; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v9
-; GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v5, vcc, v10, v5
-; GISEL-NEXT: v_add_i32_e32 v4, vcc, v11, v4
-; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v12
+; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v6
+; GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v4, vcc, v10, v4
+; GISEL-NEXT: v_add_i32_e32 v6, vcc, v11, v8
+; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v12
; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v10
-; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v5
-; GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v5, vcc, v9, v5
-; GISEL-NEXT: v_add_i32_e32 v5, vcc, v8, v5
-; GISEL-NEXT: v_add_i32_e32 v11, vcc, v6, v4
+; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v10
+; GISEL-NEXT: v_add_i32_e32 v4, vcc, v6, v4
+; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v6, vcc, v8, v6
+; GISEL-NEXT: v_add_i32_e32 v6, vcc, v9, v6
+; GISEL-NEXT: v_add_i32_e32 v11, vcc, v5, v4
; GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], s6, v11, 0
-; GISEL-NEXT: v_addc_u32_e32 v5, vcc, v7, v5, vcc
+; GISEL-NEXT: v_addc_u32_e32 v12, vcc, v7, v6, vcc
; GISEL-NEXT: v_mov_b32_e32 v4, v9
-; GISEL-NEXT: v_mad_u64_u32 v[9:10], s[4:5], s6, v5, v[4:5]
-; GISEL-NEXT: v_ashrrev_i32_e32 v4, 31, v1
-; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v4
+; GISEL-NEXT: v_mad_u64_u32 v[9:10], s[4:5], s6, v12, v[4:5]
+; GISEL-NEXT: v_ashrrev_i32_e32 v6, 31, v1
+; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v6
; GISEL-NEXT: v_mad_u64_u32 v[9:10], s[4:5], s7, v11, v[9:10]
-; GISEL-NEXT: v_addc_u32_e32 v1, vcc, v1, v4, vcc
-; GISEL-NEXT: v_xor_b32_e32 v10, v0, v4
-; GISEL-NEXT: v_mul_lo_u32 v0, v5, v8
-; GISEL-NEXT: v_mul_lo_u32 v12, v11, v9
-; GISEL-NEXT: v_xor_b32_e32 v13, v1, v4
+; GISEL-NEXT: v_addc_u32_e32 v1, vcc, v1, v6, vcc
+; GISEL-NEXT: v_xor_b32_e32 v10, v0, v6
+; GISEL-NEXT: v_mul_lo_u32 v0, v12, v8
+; GISEL-NEXT: v_mul_lo_u32 v4, v11, v9
+; GISEL-NEXT: v_xor_b32_e32 v13, v1, v6
; GISEL-NEXT: v_mul_hi_u32 v1, v11, v8
-; GISEL-NEXT: v_mul_hi_u32 v8, v5, v8
-; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v12
-; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc
+; GISEL-NEXT: v_mul_hi_u32 v8, v12, v8
+; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v4
+; GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v1
; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
-; GISEL-NEXT: v_mul_lo_u32 v1, v5, v9
-; GISEL-NEXT: v_add_i32_e32 v0, vcc, v12, v0
-; GISEL-NEXT: v_mul_hi_u32 v12, v11, v9
+; GISEL-NEXT: v_mul_lo_u32 v1, v12, v9
+; GISEL-NEXT: v_add_i32_e32 v0, vcc, v4, v0
+; GISEL-NEXT: v_mul_hi_u32 v4, v11, v9
; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v8
; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v12
-; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v12
-; GISEL-NEXT: v_mul_hi_u32 v9, v5, v9
+; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v4
+; GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v4, vcc, v8, v4
+; GISEL-NEXT: v_mul_hi_u32 v8, v12, v9
; GISEL-NEXT: v_add_i32_e32 v0, vcc, v1, v0
; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v1, vcc, v4, v1
; GISEL-NEXT: v_add_i32_e32 v1, vcc, v8, v1
-; GISEL-NEXT: v_add_i32_e32 v1, vcc, v9, v1
; GISEL-NEXT: v_add_i32_e32 v0, vcc, v11, v0
-; GISEL-NEXT: v_addc_u32_e32 v1, vcc, v5, v1, vcc
+; GISEL-NEXT: v_addc_u32_e32 v1, vcc, v12, v1, vcc
; GISEL-NEXT: v_mul_lo_u32 v8, v13, v0
; GISEL-NEXT: v_mul_lo_u32 v9, v10, v1
; GISEL-NEXT: v_mul_hi_u32 v11, v10, v0
; GISEL-NEXT: v_mul_hi_u32 v0, v13, v0
-; GISEL-NEXT: v_mov_b32_e32 v5, 0x1000
+; GISEL-NEXT: v_mov_b32_e32 v4, 0x1000
; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v9
; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v11
@@ -1191,40 +1192,39 @@ define <2 x i64> @v_srem_v2i64_pow2k_denom(<2 x i64> %num) {
; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v9
; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
; GISEL-NEXT: v_add_i32_e32 v9, vcc, v11, v9
-; GISEL-NEXT: v_add_i32_e32 v11, vcc, v0, v8
-; GISEL-NEXT: v_mul_hi_u32 v12, v13, v1
-; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v5, v11, 0
+; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v8
+; GISEL-NEXT: v_mul_hi_u32 v11, v13, v1
+; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v4, v0, 0
; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
; GISEL-NEXT: v_add_i32_e32 v8, vcc, v9, v8
-; GISEL-NEXT: v_add_i32_e32 v8, vcc, v12, v8
-; GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v5, v8, v[1:2]
+; GISEL-NEXT: v_add_i32_e32 v8, vcc, v11, v8
+; GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v4, v8, v[1:2]
; GISEL-NEXT: v_sub_i32_e32 v10, vcc, v10, v0
-; GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], 0, v11, v[8:9]
-; GISEL-NEXT: s_sub_u32 s6, 0, 0x1000
-; GISEL-NEXT: s_subb_u32 s7, 0, 0
; GISEL-NEXT: v_subb_u32_e64 v11, s[4:5], v13, v8, vcc
; GISEL-NEXT: v_sub_i32_e64 v0, s[4:5], v13, v8
-; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v10, v5
+; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v10, v4
; GISEL-NEXT: v_subbrev_u32_e32 v0, vcc, 0, v0, vcc
; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, -1, s[4:5]
; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v11
-; GISEL-NEXT: v_sub_i32_e32 v13, vcc, v10, v5
+; GISEL-NEXT: v_sub_i32_e32 v13, vcc, v10, v4
+; GISEL-NEXT: s_sub_u32 s6, 0, 0x1000
; GISEL-NEXT: v_cndmask_b32_e64 v12, -1, v1, s[4:5]
; GISEL-NEXT: v_subbrev_u32_e32 v14, vcc, 0, v0, vcc
-; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], s6, v6, 0
-; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v13, v5
+; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], s6, v5, 0
+; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v13, v4
; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, -1, vcc
; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v14
; GISEL-NEXT: v_cndmask_b32_e32 v15, -1, v8, vcc
; GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], s6, v7, v[1:2]
-; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v13, v5
-; GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], s7, v6, v[8:9]
+; GISEL-NEXT: s_subb_u32 s7, 0, 0
+; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v13, v4
+; GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], s7, v5, v[8:9]
; GISEL-NEXT: v_subbrev_u32_e32 v16, vcc, 0, v14, vcc
; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v15
; GISEL-NEXT: v_cndmask_b32_e32 v9, v13, v1, vcc
; GISEL-NEXT: v_mul_lo_u32 v1, v7, v0
-; GISEL-NEXT: v_mul_lo_u32 v13, v6, v8
-; GISEL-NEXT: v_mul_hi_u32 v15, v6, v0
+; GISEL-NEXT: v_mul_lo_u32 v13, v5, v8
+; GISEL-NEXT: v_mul_hi_u32 v15, v5, v0
; GISEL-NEXT: v_cndmask_b32_e32 v14, v14, v16, vcc
; GISEL-NEXT: v_mul_hi_u32 v0, v7, v0
; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v13
@@ -1233,7 +1233,7 @@ define <2 x i64> @v_srem_v2i64_pow2k_denom(<2 x i64> %num) {
; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
; GISEL-NEXT: v_mul_lo_u32 v15, v7, v8
; GISEL-NEXT: v_add_i32_e32 v1, vcc, v13, v1
-; GISEL-NEXT: v_mul_hi_u32 v13, v6, v8
+; GISEL-NEXT: v_mul_hi_u32 v13, v5, v8
; GISEL-NEXT: v_add_i32_e32 v0, vcc, v15, v0
; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc
; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v13
@@ -1244,93 +1244,91 @@ define <2 x i64> @v_srem_v2i64_pow2k_denom(<2 x i64> %num) {
; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
; GISEL-NEXT: v_add_i32_e32 v1, vcc, v13, v1
; GISEL-NEXT: v_add_i32_e32 v1, vcc, v8, v1
-; GISEL-NEXT: v_add_i32_e32 v8, vcc, v6, v0
+; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v0
; GISEL-NEXT: v_addc_u32_e32 v13, vcc, v7, v1, vcc
-; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], s6, v8, 0
+; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], s6, v5, 0
; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v12
-; GISEL-NEXT: v_cndmask_b32_e32 v9, v10, v9, vcc
-; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[4:5], s6, v13, v[1:2]
-; GISEL-NEXT: v_xor_b32_e32 v1, v9, v4
+; GISEL-NEXT: v_cndmask_b32_e32 v7, v10, v9, vcc
+; GISEL-NEXT: v_xor_b32_e32 v10, v7, v6
+; GISEL-NEXT: v_mad_u64_u32 v[7:8], s[4:5], s6, v13, v[1:2]
+; GISEL-NEXT: v_cndmask_b32_e32 v9, v11, v14, vcc
+; GISEL-NEXT: v_xor_b32_e32 v1, v9, v6
+; GISEL-NEXT: v_mad_u64_u32 v[7:8], s[4:5], s7, v5, v[7:8]
; GISEL-NEXT: v_ashrrev_i32_e32 v9, 31, v3
-; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[4:5], s7, v8, v[6:7]
-; GISEL-NEXT: v_cndmask_b32_e32 v10, v11, v14, vcc
; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v9
; GISEL-NEXT: v_addc_u32_e32 v3, vcc, v3, v9, vcc
-; GISEL-NEXT: v_xor_b32_e32 v11, v2, v9
+; GISEL-NEXT: v_xor_b32_e32 v8, v2, v9
; GISEL-NEXT: v_mul_lo_u32 v2, v13, v0
-; GISEL-NEXT: v_mul_lo_u32 v7, v8, v6
+; GISEL-NEXT: v_mul_lo_u32 v11, v5, v7
; GISEL-NEXT: v_xor_b32_e32 v12, v3, v9
-; GISEL-NEXT: v_mul_hi_u32 v3, v8, v0
+; GISEL-NEXT: v_mul_hi_u32 v3, v5, v0
; GISEL-NEXT: v_mul_hi_u32 v0, v13, v0
-; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v7
-; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v11
+; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v3
; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
-; GISEL-NEXT: v_mul_lo_u32 v3, v13, v6
-; GISEL-NEXT: v_add_i32_e32 v2, vcc, v7, v2
-; GISEL-NEXT: v_mul_hi_u32 v7, v8, v6
+; GISEL-NEXT: v_mul_lo_u32 v3, v13, v7
+; GISEL-NEXT: v_add_i32_e32 v2, vcc, v11, v2
+; GISEL-NEXT: v_mul_hi_u32 v11, v5, v7
; GISEL-NEXT: v_add_i32_e32 v0, vcc, v3, v0
; GISEL-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v7
-; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v7
-; GISEL-NEXT: v_mul_hi_u32 v6, v13, v6
+; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v11
+; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v11
+; GISEL-NEXT: v_mul_hi_u32 v7, v13, v7
; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v2
; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
; GISEL-NEXT: v_add_i32_e32 v2, vcc, v3, v2
-; GISEL-NEXT: v_add_i32_e32 v2, vcc, v6, v2
-; GISEL-NEXT: v_add_i32_e32 v0, vcc, v8, v0
+; GISEL-NEXT: v_add_i32_e32 v2, vcc, v7, v2
+; GISEL-NEXT: v_add_i32_e32 v3, vcc, v5, v0
; GISEL-NEXT: v_addc_u32_e32 v2, vcc, v13, v2, vcc
-; GISEL-NEXT: v_mul_lo_u32 v3, v12, v0
-; GISEL-NEXT: v_mul_lo_u32 v6, v11, v2
-; GISEL-NEXT: v_mul_hi_u32 v7, v11, v0
-; GISEL-NEXT: v_mul_hi_u32 v0, v12, v0
-; GISEL-NEXT: v_xor_b32_e32 v8, v10, v4
-; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v6
+; GISEL-NEXT: v_mul_lo_u32 v5, v12, v3
+; GISEL-NEXT: v_mul_lo_u32 v7, v8, v2
+; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v10, v6
+; GISEL-NEXT: v_subb_u32_e32 v1, vcc, v1, v6, vcc
+; GISEL-NEXT: v_mul_hi_u32 v6, v8, v3
+; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v7
+; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v6
+; GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
+; GISEL-NEXT: v_mul_lo_u32 v6, v12, v2
+; GISEL-NEXT: v_mul_hi_u32 v3, v12, v3
+; GISEL-NEXT: v_add_i32_e32 v5, vcc, v7, v5
+; GISEL-NEXT: v_mul_hi_u32 v7, v8, v2
+; GISEL-NEXT: v_add_i32_e32 v3, vcc, v6, v3
; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v7
-; GISEL-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc
-; GISEL-NEXT: v_mul_lo_u32 v7, v12, v2
-; GISEL-NEXT: v_add_i32_e32 v3, vcc, v6, v3
-; GISEL-NEXT: v_mul_hi_u32 v6, v11, v2
-; GISEL-NEXT: v_add_i32_e32 v0, vcc, v7, v0
; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v6
-; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v6, vcc, v7, v6
-; GISEL-NEXT: v_add_i32_e32 v10, vcc, v0, v3
+; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v7
+; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v5
; GISEL-NEXT: v_mul_hi_u32 v7, v12, v2
-; GISEL-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v5, v10, 0
-; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v0, vcc, v6, v0
-; GISEL-NEXT: v_add_i32_e32 v6, vcc, v7, v0
-; GISEL-NEXT: v_mov_b32_e32 v0, v3
-; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v5, v6, v[0:1]
-; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v1, v4
-; GISEL-NEXT: v_subb_u32_e32 v1, vcc, v8, v4, vcc
-; GISEL-NEXT: v_mad_u64_u32 v[3:4], s[4:5], 0, v10, v[6:7]
-; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v11, v2
-; GISEL-NEXT: v_subb_u32_e64 v4, s[4:5], v12, v3, vcc
-; GISEL-NEXT: v_sub_i32_e64 v3, s[4:5], v12, v3
-; GISEL-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc
-; GISEL-NEXT: v_sub_i32_e32 v7, vcc, v2, v5
-; GISEL-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc
-; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v7, v5
+; GISEL-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v4, v3, 0
+; GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v5, vcc, v6, v5
+; GISEL-NEXT: v_add_i32_e32 v5, vcc, v7, v5
+; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v4, v5, v[3:4]
+; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v8, v2
+; GISEL-NEXT: v_subb_u32_e64 v3, s[4:5], v12, v5, vcc
+; GISEL-NEXT: v_sub_i32_e64 v5, s[4:5], v12, v5
+; GISEL-NEXT: v_subbrev_u32_e32 v5, vcc, 0, v5, vcc
+; GISEL-NEXT: v_sub_i32_e32 v7, vcc, v2, v4
+; GISEL-NEXT: v_subbrev_u32_e32 v5, vcc, 0, v5, vcc
+; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v7, v4
; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, -1, vcc
-; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3
-; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v2, v5
+; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v5
+; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v2, v4
; GISEL-NEXT: v_cndmask_b32_e32 v8, -1, v8, vcc
-; GISEL-NEXT: v_sub_i32_e32 v5, vcc, v7, v5
+; GISEL-NEXT: v_sub_i32_e32 v4, vcc, v7, v4
; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, -1, s[4:5]
-; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v4
-; GISEL-NEXT: v_subbrev_u32_e32 v10, vcc, 0, v3, vcc
+; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v3
+; GISEL-NEXT: v_subbrev_u32_e32 v10, vcc, 0, v5, vcc
; GISEL-NEXT: v_cndmask_b32_e64 v6, -1, v6, s[4:5]
; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v8
-; GISEL-NEXT: v_cndmask_b32_e32 v5, v7, v5, vcc
-; GISEL-NEXT: v_cndmask_b32_e32 v3, v3, v10, vcc
+; GISEL-NEXT: v_cndmask_b32_e32 v4, v7, v4, vcc
+; GISEL-NEXT: v_cndmask_b32_e32 v5, v5, v10, vcc
; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v6
-; GISEL-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc
-; GISEL-NEXT: v_cndmask_b32_e32 v3, v4, v3, vcc
+; GISEL-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
+; GISEL-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc
; GISEL-NEXT: v_xor_b32_e32 v2, v2, v9
; GISEL-NEXT: v_xor_b32_e32 v3, v3, v9
; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v2, v9
@@ -1707,73 +1705,74 @@ define <2 x i64> @v_srem_v2i64_oddk_denom(<2 x i64> %num) {
; GISEL-NEXT: v_rcp_iflag_f32_e32 v4, v4
; GISEL-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4
; GISEL-NEXT: v_mul_f32_e32 v5, 0x2f800000, v4
-; GISEL-NEXT: v_trunc_f32_e32 v7, v5
-; GISEL-NEXT: v_mac_f32_e32 v4, 0xcf800000, v7
-; GISEL-NEXT: v_cvt_u32_f32_e32 v6, v4
-; GISEL-NEXT: v_cvt_u32_f32_e32 v7, v7
-; GISEL-NEXT: v_mad_u64_u32 v[4:5], s[4:5], s6, v6, 0
-; GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], s6, v7, v[5:6]
-; GISEL-NEXT: v_mul_lo_u32 v5, v7, v4
-; GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], s7, v6, v[8:9]
-; GISEL-NEXT: v_mul_hi_u32 v9, v6, v4
-; GISEL-NEXT: v_mul_hi_u32 v4, v7, v4
-; GISEL-NEXT: v_mul_lo_u32 v10, v6, v8
-; GISEL-NEXT: v_mul_lo_u32 v11, v7, v8
-; GISEL-NEXT: v_mul_hi_u32 v12, v6, v8
+; GISEL-NEXT: v_trunc_f32_e32 v6, v5
+; GISEL-NEXT: v_mac_f32_e32 v4, 0xcf800000, v6
+; GISEL-NEXT: v_cvt_u32_f32_e32 v5, v4
+; GISEL-NEXT: v_cvt_u32_f32_e32 v7, v6
+; GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], s6, v5, 0
+; GISEL-NEXT: v_mov_b32_e32 v4, v9
+; GISEL-NEXT: v_mad_u64_u32 v[9:10], s[4:5], s6, v7, v[4:5]
+; GISEL-NEXT: v_mul_lo_u32 v4, v7, v8
+; GISEL-NEXT: v_mul_hi_u32 v6, v5, v8
+; GISEL-NEXT: v_mad_u64_u32 v[9:10], s[4:5], s7, v5, v[9:10]
; GISEL-NEXT: v_mul_hi_u32 v8, v7, v8
-; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v10
+; GISEL-NEXT: v_mul_lo_u32 v10, v5, v9
+; GISEL-NEXT: v_mul_lo_u32 v11, v7, v9
+; GISEL-NEXT: v_mul_hi_u32 v12, v5, v9
+; GISEL-NEXT: v_mul_hi_u32 v9, v7, v9
+; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v10
; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v9
-; GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v5, vcc, v10, v5
-; GISEL-NEXT: v_add_i32_e32 v4, vcc, v11, v4
-; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v12
+; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v6
+; GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v4, vcc, v10, v4
+; GISEL-NEXT: v_add_i32_e32 v6, vcc, v11, v8
+; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v12
; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v10
-; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v5
-; GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v5, vcc, v9, v5
-; GISEL-NEXT: v_add_i32_e32 v5, vcc, v8, v5
-; GISEL-NEXT: v_add_i32_e32 v11, vcc, v6, v4
+; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v10
+; GISEL-NEXT: v_add_i32_e32 v4, vcc, v6, v4
+; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v6, vcc, v8, v6
+; GISEL-NEXT: v_add_i32_e32 v6, vcc, v9, v6
+; GISEL-NEXT: v_add_i32_e32 v11, vcc, v5, v4
; GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], s6, v11, 0
-; GISEL-NEXT: v_addc_u32_e32 v5, vcc, v7, v5, vcc
+; GISEL-NEXT: v_addc_u32_e32 v12, vcc, v7, v6, vcc
; GISEL-NEXT: v_mov_b32_e32 v4, v9
-; GISEL-NEXT: v_mad_u64_u32 v[9:10], s[4:5], s6, v5, v[4:5]
-; GISEL-NEXT: v_ashrrev_i32_e32 v4, 31, v1
-; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v4
+; GISEL-NEXT: v_mad_u64_u32 v[9:10], s[4:5], s6, v12, v[4:5]
+; GISEL-NEXT: v_ashrrev_i32_e32 v6, 31, v1
+; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v6
; GISEL-NEXT: v_mad_u64_u32 v[9:10], s[4:5], s7, v11, v[9:10]
-; GISEL-NEXT: v_addc_u32_e32 v1, vcc, v1, v4, vcc
-; GISEL-NEXT: v_xor_b32_e32 v10, v0, v4
-; GISEL-NEXT: v_mul_lo_u32 v0, v5, v8
-; GISEL-NEXT: v_mul_lo_u32 v12, v11, v9
-; GISEL-NEXT: v_xor_b32_e32 v13, v1, v4
+; GISEL-NEXT: v_addc_u32_e32 v1, vcc, v1, v6, vcc
+; GISEL-NEXT: v_xor_b32_e32 v10, v0, v6
+; GISEL-NEXT: v_mul_lo_u32 v0, v12, v8
+; GISEL-NEXT: v_mul_lo_u32 v4, v11, v9
+; GISEL-NEXT: v_xor_b32_e32 v13, v1, v6
; GISEL-NEXT: v_mul_hi_u32 v1, v11, v8
-; GISEL-NEXT: v_mul_hi_u32 v8, v5, v8
-; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v12
-; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc
+; GISEL-NEXT: v_mul_hi_u32 v8, v12, v8
+; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v4
+; GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v1
; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
-; GISEL-NEXT: v_mul_lo_u32 v1, v5, v9
-; GISEL-NEXT: v_add_i32_e32 v0, vcc, v12, v0
-; GISEL-NEXT: v_mul_hi_u32 v12, v11, v9
+; GISEL-NEXT: v_mul_lo_u32 v1, v12, v9
+; GISEL-NEXT: v_add_i32_e32 v0, vcc, v4, v0
+; GISEL-NEXT: v_mul_hi_u32 v4, v11, v9
; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v8
; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v12
-; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v12
-; GISEL-NEXT: v_mul_hi_u32 v9, v5, v9
+; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v4
+; GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v4, vcc, v8, v4
+; GISEL-NEXT: v_mul_hi_u32 v8, v12, v9
; GISEL-NEXT: v_add_i32_e32 v0, vcc, v1, v0
; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v1, vcc, v4, v1
; GISEL-NEXT: v_add_i32_e32 v1, vcc, v8, v1
-; GISEL-NEXT: v_add_i32_e32 v1, vcc, v9, v1
; GISEL-NEXT: v_add_i32_e32 v0, vcc, v11, v0
-; GISEL-NEXT: v_addc_u32_e32 v1, vcc, v5, v1, vcc
+; GISEL-NEXT: v_addc_u32_e32 v1, vcc, v12, v1, vcc
; GISEL-NEXT: v_mul_lo_u32 v8, v13, v0
; GISEL-NEXT: v_mul_lo_u32 v9, v10, v1
; GISEL-NEXT: v_mul_hi_u32 v11, v10, v0
; GISEL-NEXT: v_mul_hi_u32 v0, v13, v0
-; GISEL-NEXT: v_mov_b32_e32 v5, 0x12d8fb
+; GISEL-NEXT: v_mov_b32_e32 v4, 0x12d8fb
; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v9
; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v11
@@ -1786,40 +1785,39 @@ define <2 x i64> @v_srem_v2i64_oddk_denom(<2 x i64> %num) {
; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v9
; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
; GISEL-NEXT: v_add_i32_e32 v9, vcc, v11, v9
-; GISEL-NEXT: v_add_i32_e32 v11, vcc, v0, v8
-; GISEL-NEXT: v_mul_hi_u32 v12, v13, v1
-; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v5, v11, 0
+; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v8
+; GISEL-NEXT: v_mul_hi_u32 v11, v13, v1
+; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v4, v0, 0
; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
; GISEL-NEXT: v_add_i32_e32 v8, vcc, v9, v8
-; GISEL-NEXT: v_add_i32_e32 v8, vcc, v12, v8
-; GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v5, v8, v[1:2]
+; GISEL-NEXT: v_add_i32_e32 v8, vcc, v11, v8
+; GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v4, v8, v[1:2]
; GISEL-NEXT: v_sub_i32_e32 v10, vcc, v10, v0
-; GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], 0, v11, v[8:9]
-; GISEL-NEXT: s_sub_u32 s6, 0, 0x12d8fb
-; GISEL-NEXT: s_subb_u32 s7, 0, 0
; GISEL-NEXT: v_subb_u32_e64 v11, s[4:5], v13, v8, vcc
; GISEL-NEXT: v_sub_i32_e64 v0, s[4:5], v13, v8
-; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v10, v5
+; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v10, v4
; GISEL-NEXT: v_subbrev_u32_e32 v0, vcc, 0, v0, vcc
; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, -1, s[4:5]
; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v11
-; GISEL-NEXT: v_sub_i32_e32 v13, vcc, v10, v5
+; GISEL-NEXT: v_sub_i32_e32 v13, vcc, v10, v4
+; GISEL-NEXT: s_sub_u32 s6, 0, 0x12d8fb
; GISEL-NEXT: v_cndmask_b32_e64 v12, -1, v1, s[4:5]
; GISEL-NEXT: v_subbrev_u32_e32 v14, vcc, 0, v0, vcc
-; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], s6, v6, 0
-; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v13, v5
+; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], s6, v5, 0
+; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v13, v4
; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, -1, vcc
; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v14
; GISEL-NEXT: v_cndmask_b32_e32 v15, -1, v8, vcc
; GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], s6, v7, v[1:2]
-; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v13, v5
-; GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], s7, v6, v[8:9]
+; GISEL-NEXT: s_subb_u32 s7, 0, 0
+; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v13, v4
+; GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], s7, v5, v[8:9]
; GISEL-NEXT: v_subbrev_u32_e32 v16, vcc, 0, v14, vcc
; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v15
; GISEL-NEXT: v_cndmask_b32_e32 v9, v13, v1, vcc
; GISEL-NEXT: v_mul_lo_u32 v1, v7, v0
-; GISEL-NEXT: v_mul_lo_u32 v13, v6, v8
-; GISEL-NEXT: v_mul_hi_u32 v15, v6, v0
+; GISEL-NEXT: v_mul_lo_u32 v13, v5, v8
+; GISEL-NEXT: v_mul_hi_u32 v15, v5, v0
; GISEL-NEXT: v_cndmask_b32_e32 v14, v14, v16, vcc
; GISEL-NEXT: v_mul_hi_u32 v0, v7, v0
; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v13
@@ -1828,7 +1826,7 @@ define <2 x i64> @v_srem_v2i64_oddk_denom(<2 x i64> %num) {
; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
; GISEL-NEXT: v_mul_lo_u32 v15, v7, v8
; GISEL-NEXT: v_add_i32_e32 v1, vcc, v13, v1
-; GISEL-NEXT: v_mul_hi_u32 v13, v6, v8
+; GISEL-NEXT: v_mul_hi_u32 v13, v5, v8
; GISEL-NEXT: v_add_i32_e32 v0, vcc, v15, v0
; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc
; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v13
@@ -1839,93 +1837,91 @@ define <2 x i64> @v_srem_v2i64_oddk_denom(<2 x i64> %num) {
; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
; GISEL-NEXT: v_add_i32_e32 v1, vcc, v13, v1
; GISEL-NEXT: v_add_i32_e32 v1, vcc, v8, v1
-; GISEL-NEXT: v_add_i32_e32 v8, vcc, v6, v0
+; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v0
; GISEL-NEXT: v_addc_u32_e32 v13, vcc, v7, v1, vcc
-; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], s6, v8, 0
+; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], s6, v5, 0
; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v12
-; GISEL-NEXT: v_cndmask_b32_e32 v9, v10, v9, vcc
-; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[4:5], s6, v13, v[1:2]
-; GISEL-NEXT: v_xor_b32_e32 v1, v9, v4
+; GISEL-NEXT: v_cndmask_b32_e32 v7, v10, v9, vcc
+; GISEL-NEXT: v_xor_b32_e32 v10, v7, v6
+; GISEL-NEXT: v_mad_u64_u32 v[7:8], s[4:5], s6, v13, v[1:2]
+; GISEL-NEXT: v_cndmask_b32_e32 v9, v11, v14, vcc
+; GISEL-NEXT: v_xor_b32_e32 v1, v9, v6
+; GISEL-NEXT: v_mad_u64_u32 v[7:8], s[4:5], s7, v5, v[7:8]
; GISEL-NEXT: v_ashrrev_i32_e32 v9, 31, v3
-; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[4:5], s7, v8, v[6:7]
-; GISEL-NEXT: v_cndmask_b32_e32 v10, v11, v14, vcc
; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v9
; GISEL-NEXT: v_addc_u32_e32 v3, vcc, v3, v9, vcc
-; GISEL-NEXT: v_xor_b32_e32 v11, v2, v9
+; GISEL-NEXT: v_xor_b32_e32 v8, v2, v9
; GISEL-NEXT: v_mul_lo_u32 v2, v13, v0
-; GISEL-NEXT: v_mul_lo_u32 v7, v8, v6
+; GISEL-NEXT: v_mul_lo_u32 v11, v5, v7
; GISEL-NEXT: v_xor_b32_e32 v12, v3, v9
-; GISEL-NEXT: v_mul_hi_u32 v3, v8, v0
+; GISEL-NEXT: v_mul_hi_u32 v3, v5, v0
; GISEL-NEXT: v_mul_hi_u32 v0, v13, v0
-; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v7
-; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v11
+; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v3
; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
-; GISEL-NEXT: v_mul_lo_u32 v3, v13, v6
-; GISEL-NEXT: v_add_i32_e32 v2, vcc, v7, v2
-; GISEL-NEXT: v_mul_hi_u32 v7, v8, v6
+; GISEL-NEXT: v_mul_lo_u32 v3, v13, v7
+; GISEL-NEXT: v_add_i32_e32 v2, vcc, v11, v2
+; GISEL-NEXT: v_mul_hi_u32 v11, v5, v7
; GISEL-NEXT: v_add_i32_e32 v0, vcc, v3, v0
; GISEL-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v7
-; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v7
-; GISEL-NEXT: v_mul_hi_u32 v6, v13, v6
+; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v11
+; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v11
+; GISEL-NEXT: v_mul_hi_u32 v7, v13, v7
; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v2
; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
; GISEL-NEXT: v_add_i32_e32 v2, vcc, v3, v2
-; GISEL-NEXT: v_add_i32_e32 v2, vcc, v6, v2
-; GISEL-NEXT: v_add_i32_e32 v0, vcc, v8, v0
+; GISEL-NEXT: v_add_i32_e32 v2, vcc, v7, v2
+; GISEL-NEXT: v_add_i32_e32 v3, vcc, v5, v0
; GISEL-NEXT: v_addc_u32_e32 v2, vcc, v13, v2, vcc
-; GISEL-NEXT: v_mul_lo_u32 v3, v12, v0
-; GISEL-NEXT: v_mul_lo_u32 v6, v11, v2
-; GISEL-NEXT: v_mul_hi_u32 v7, v11, v0
-; GISEL-NEXT: v_mul_hi_u32 v0, v12, v0
-; GISEL-NEXT: v_xor_b32_e32 v8, v10, v4
-; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v6
+; GISEL-NEXT: v_mul_lo_u32 v5, v12, v3
+; GISEL-NEXT: v_mul_lo_u32 v7, v8, v2
+; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v10, v6
+; GISEL-NEXT: v_subb_u32_e32 v1, vcc, v1, v6, vcc
+; GISEL-NEXT: v_mul_hi_u32 v6, v8, v3
+; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v7
+; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v6
+; GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
+; GISEL-NEXT: v_mul_lo_u32 v6, v12, v2
+; GISEL-NEXT: v_mul_hi_u32 v3, v12, v3
+; GISEL-NEXT: v_add_i32_e32 v5, vcc, v7, v5
+; GISEL-NEXT: v_mul_hi_u32 v7, v8, v2
+; GISEL-NEXT: v_add_i32_e32 v3, vcc, v6, v3
; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v7
-; GISEL-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc
-; GISEL-NEXT: v_mul_lo_u32 v7, v12, v2
-; GISEL-NEXT: v_add_i32_e32 v3, vcc, v6, v3
-; GISEL-NEXT: v_mul_hi_u32 v6, v11, v2
-; GISEL-NEXT: v_add_i32_e32 v0, vcc, v7, v0
; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v6
-; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v6, vcc, v7, v6
-; GISEL-NEXT: v_add_i32_e32 v10, vcc, v0, v3
+; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v7
+; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v5
; GISEL-NEXT: v_mul_hi_u32 v7, v12, v2
-; GISEL-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v5, v10, 0
-; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v0, vcc, v6, v0
-; GISEL-NEXT: v_add_i32_e32 v6, vcc, v7, v0
-; GISEL-NEXT: v_mov_b32_e32 v0, v3
-; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v5, v6, v[0:1]
-; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v1, v4
-; GISEL-NEXT: v_subb_u32_e32 v1, vcc, v8, v4, vcc
-; GISEL-NEXT: v_mad_u64_u32 v[3:4], s[4:5], 0, v10, v[6:7]
-; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v11, v2
-; GISEL-NEXT: v_subb_u32_e64 v4, s[4:5], v12, v3, vcc
-; GISEL-NEXT: v_sub_i32_e64 v3, s[4:5], v12, v3
-; GISEL-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc
-; GISEL-NEXT: v_sub_i32_e32 v7, vcc, v2, v5
-; GISEL-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc
-; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v7, v5
+; GISEL-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v4, v3, 0
+; GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
+; GISEL-NEXT: v_add_i32_e32 v5, vcc, v6, v5
+; GISEL-NEXT: v_add_i32_e32 v5, vcc, v7, v5
+; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v4, v5, v[3:4]
+; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v8, v2
+; GISEL-NEXT: v_subb_u32_e64 v3, s[4:5], v12, v5, vcc
+; GISEL-NEXT: v_sub_i32_e64 v5, s[4:5], v12, v5
+; GISEL-NEXT: v_subbrev_u32_e32 v5, vcc, 0, v5, vcc
+; GISEL-NEXT: v_sub_i32_e32 v7, vcc, v2, v4
+; GISEL-NEXT: v_subbrev_u32_e32 v5, vcc, 0, v5, vcc
+; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v7, v4
; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, -1, vcc
-; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3
-; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v2, v5
+; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v5
+; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v2, v4
; GISEL-NEXT: v_cndmask_b32_e32 v8, -1, v8, vcc
-; GISEL-NEXT: v_sub_i32_e32 v5, vcc, v7, v5
+; GISEL-NEXT: v_sub_i32_e32 v4, vcc, v7, v4
; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, -1, s[4:5]
-; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v4
-; GISEL-NEXT: v_subbrev_u32_e32 v10, vcc, 0, v3, vcc
+; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v3
+; GISEL-NEXT: v_subbrev_u32_e32 v10, vcc, 0, v5, vcc
; GISEL-NEXT: v_cndmask_b32_e64 v6, -1, v6, s[4:5]
; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v8
-; GISEL-NEXT: v_cndmask_b32_e32 v5, v7, v5, vcc
-; GISEL-NEXT: v_cndmask_b32_e32 v3, v3, v10, vcc
+; GISEL-NEXT: v_cndmask_b32_e32 v4, v7, v4, vcc
+; GISEL-NEXT: v_cndmask_b32_e32 v5, v5, v10, vcc
; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v6
-; GISEL-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc
-; GISEL-NEXT: v_cndmask_b32_e32 v3, v4, v3, vcc
+; GISEL-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
+; GISEL-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc
; GISEL-NEXT: v_xor_b32_e32 v2, v2, v9
; GISEL-NEXT: v_xor_b32_e32 v3, v3, v9
; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v2, v9
diff --git a/llvm/unittests/CodeGen/GlobalISel/KnownBitsVectorTest.cpp b/llvm/unittests/CodeGen/GlobalISel/KnownBitsVectorTest.cpp
index dd6edd35a8468b..dada571564aefc 100644
--- a/llvm/unittests/CodeGen/GlobalISel/KnownBitsVectorTest.cpp
+++ b/llvm/unittests/CodeGen/GlobalISel/KnownBitsVectorTest.cpp
@@ -1548,3 +1548,49 @@ TEST_F(AArch64GISelMITest, TestNumSignBitsUAddoOverflow) {
// Assert sign-extension from vector boolean
EXPECT_EQ(32u, Info.computeNumSignBits(CopyOverflow));
}
+
+TEST_F(AArch64GISelMITest, TestKnwonBitsUnmergeVectorScalar) {
+ StringRef MIRString = R"(
+ %copy_x0:_(<2 x s16>) = COPY $w0
+ %maskf:_(s16) = G_CONSTANT i16 15
+ %x0_x1:_(<2 x s16>) = G_BUILD_VECTOR %maskf, %maskf
+ %and:_(<2 x s16>) = G_AND %copy_x0, %x0_x1
+ %x0_0:_(s16), %x0_1:_(s16) = G_UNMERGE_VALUES %and
+ %result:_(s16) = COPY %x0_0
+)";
+
+ setUp(MIRString);
+ if (!TM)
+ GTEST_SKIP();
+
+ Register CopyOverflow = Copies[Copies.size() - 1];
+
+ GISelKnownBits Info(*MF);
+
+ EXPECT_EQ(0xFFF0u, Info.getKnownBits(CopyOverflow).Zero.getZExtValue());
+}
+
+TEST_F(AArch64GISelMITest, TestKnwonBitsUnmergeVectorVector) {
+ StringRef MIRString = R"(
+ %copy_x0:_(<4 x s8>) = COPY $w0
+ %maskff:_(s8) = G_CONSTANT i8 255
+ %maskf:_(s8) = G_CONSTANT i8 15
+ %x0_x1:_(<4 x s8>) = G_BUILD_VECTOR %maskf, %maskf, %maskff, %maskff
+ %and:_(<4 x s8>) = G_AND %copy_x0, %x0_x1
+ %x0_0:_(<2 x s8>), %x0_1:_(<2 x s8>) = G_UNMERGE_VALUES %and
+ %result1:_(<2 x s8>) = COPY %x0_0
+ %result2:_(<2 x s8>) = COPY %x0_1
+)";
+
+ setUp(MIRString);
+ if (!TM)
+ GTEST_SKIP();
+
+
+ GISelKnownBits Info(*MF);
+
+ Register CopyOverflow1 = Copies[Copies.size() - 2];
+ EXPECT_EQ(0xF0u, Info.getKnownBits(CopyOverflow1).Zero.getZExtValue());
+ Register CopyOverflow2 = Copies[Copies.size() - 1];
+ EXPECT_EQ(0x00u, Info.getKnownBits(CopyOverflow2).Zero.getZExtValue());
+}
More information about the llvm-commits
mailing list