[llvm] [TargetLowering][AMDGPU][ARM][RISCV][X86] Teach SimplifyDemandedBits to combine (srl (sra X, C1), ShAmt) -> sra(X, C1+ShAmt) (PR #101751)
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Mon Aug 12 10:08:51 PDT 2024
https://github.com/topperc updated https://github.com/llvm/llvm-project/pull/101751
>From 8030bdafee64832fe71cf1e47c03de22a9a824d0 Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper at sifive.com>
Date: Fri, 2 Aug 2024 13:17:34 -0700
Subject: [PATCH 1/4] [TargetLowering][AMDGPU][ARM][RISCV][X86] Teach
SimplifyDemandedBits to combine (shr (sra X, C1), ShAmt) -> sra(X, C1+ShAmt)
If the upper bits of the shr aren't demanded.
This helps with cases where the outer srl was originally
an sra and was converted to a srl by SimplifyDemandedBits before
it had a chance to combine with the inner sra. This can occur
when the inner sra was part of a sign_extend_inreg expansion.
There are some regressions in AMDGPU, ARM, and Thumb that need
investigating.
---
.../CodeGen/SelectionDAG/TargetLowering.cpp | 16 ++
.../AArch64/srem-seteq-illegal-types.ll | 4 +-
.../CodeGen/AMDGPU/div-rem-by-constant-64.ll | 40 ++--
llvm/test/CodeGen/AMDGPU/div_i128.ll | 68 +++---
llvm/test/CodeGen/AMDGPU/function-returns.ll | 12 +-
llvm/test/CodeGen/AMDGPU/itofp.i128.ll | 8 +-
.../AMDGPU/load-range-metadata-sign-bits.ll | 5 +-
llvm/test/CodeGen/AMDGPU/permute_i8.ll | 24 +--
llvm/test/CodeGen/AMDGPU/sdiv64.ll | 14 +-
llvm/test/CodeGen/AMDGPU/shift-i128.ll | 4 +-
.../AMDGPU/srem-seteq-illegal-types.ll | 4 +-
.../CodeGen/ARM/srem-seteq-illegal-types.ll | 26 +--
.../CodeGen/Mips/srem-seteq-illegal-types.ll | 6 +-
llvm/test/CodeGen/NVPTX/idioms.ll | 4 +-
.../PowerPC/srem-seteq-illegal-types.ll | 8 +-
llvm/test/CodeGen/RISCV/div.ll | 40 ++--
llvm/test/CodeGen/RISCV/rv64-legal-i32/div.ll | 20 +-
llvm/test/CodeGen/RISCV/rv64zba.ll | 18 +-
.../CodeGen/RISCV/srem-seteq-illegal-types.ll | 30 ++-
.../Thumb2/srem-seteq-illegal-types.ll | 5 +-
llvm/test/CodeGen/X86/scmp.ll | 181 ++++++++--------
llvm/test/CodeGen/X86/sdiv_fix_sat.ll | 196 ++++++++++--------
.../CodeGen/X86/srem-seteq-illegal-types.ll | 6 +-
23 files changed, 371 insertions(+), 368 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 8ab3103fda23f9..ea1102a14cc593 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -1958,6 +1958,22 @@ bool TargetLowering::SimplifyDemandedBits(
}
}
+ // If this is (shr (sra X, C1), ShAmt), see if we can combine this into a
+ // single sra. We can do this if the top bits are never demanded.
+ if (Op0.getOpcode() == ISD::SRA) {
+ if (!DemandedBits.intersects(APInt::getHighBitsSet(BitWidth, ShAmt))) {
+ if (std::optional<uint64_t> InnerSA =
+ TLO.DAG.getValidShiftAmount(Op0, DemandedElts, Depth + 2)) {
+ unsigned C1 = *InnerSA;
+ // Clamp the combined shift amount if it exceeds the bit width.
+ unsigned Combined = std::min(C1 + ShAmt, BitWidth - 1);
+ SDValue NewSA = TLO.DAG.getConstant(Combined, dl, ShiftVT);
+ return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRA, dl, VT,
+ Op0.getOperand(0), NewSA));
+ }
+ }
+ }
+
APInt InDemandedMask = (DemandedBits << ShAmt);
// If the shift is exact, then it does demand the low bits (and knows that
diff --git a/llvm/test/CodeGen/AArch64/srem-seteq-illegal-types.ll b/llvm/test/CodeGen/AArch64/srem-seteq-illegal-types.ll
index 595991e86a91c7..9fbce05eee1775 100644
--- a/llvm/test/CodeGen/AArch64/srem-seteq-illegal-types.ll
+++ b/llvm/test/CodeGen/AArch64/srem-seteq-illegal-types.ll
@@ -41,8 +41,8 @@ define i1 @test_srem_even(i4 %X) nounwind {
define i1 @test_srem_pow2_setne(i6 %X) nounwind {
; CHECK-LABEL: test_srem_pow2_setne:
; CHECK: // %bb.0:
-; CHECK-NEXT: sbfx w8, w0, #0, #6
-; CHECK-NEXT: ubfx w8, w8, #9, #2
+; CHECK-NEXT: sbfx w8, w0, #5, #1
+; CHECK-NEXT: and w8, w8, #0x3
; CHECK-NEXT: add w8, w0, w8
; CHECK-NEXT: and w8, w8, #0x3c
; CHECK-NEXT: sub w8, w0, w8
diff --git a/llvm/test/CodeGen/AMDGPU/div-rem-by-constant-64.ll b/llvm/test/CodeGen/AMDGPU/div-rem-by-constant-64.ll
index 113c6d01c99a16..dd888433e32e8d 100644
--- a/llvm/test/CodeGen/AMDGPU/div-rem-by-constant-64.ll
+++ b/llvm/test/CodeGen/AMDGPU/div-rem-by-constant-64.ll
@@ -661,9 +661,10 @@ define noundef i64 @sdiv64_2(i64 noundef %i) {
; GFX9: ; %bb.0: ; %entry
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_lshrrev_b32_e32 v2, 31, v1
-; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
-; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
-; GFX9-NEXT: v_ashrrev_i64 v[0:1], 1, v[0:1]
+; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v0, v2
+; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v1, vcc
+; GFX9-NEXT: v_alignbit_b32 v0, v3, v2, 1
+; GFX9-NEXT: v_ashrrev_i64 v[1:2], 33, v[2:3]
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: sdiv64_2:
@@ -671,17 +672,20 @@ define noundef i64 @sdiv64_2(i64 noundef %i) {
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: v_lshrrev_b32_e32 v2, 31, v1
; GFX942-NEXT: v_mov_b32_e32 v3, 0
-; GFX942-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
-; GFX942-NEXT: v_ashrrev_i64 v[0:1], 1, v[0:1]
+; GFX942-NEXT: v_lshl_add_u64 v[2:3], v[0:1], 0, v[2:3]
+; GFX942-NEXT: v_alignbit_b32 v0, v3, v2, 1
+; GFX942-NEXT: v_ashrrev_i64 v[2:3], 33, v[2:3]
+; GFX942-NEXT: v_mov_b32_e32 v1, v2
; GFX942-NEXT: s_setpc_b64 s[30:31]
;
; GFX1030-LABEL: sdiv64_2:
; GFX1030: ; %bb.0: ; %entry
; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1030-NEXT: v_lshrrev_b32_e32 v2, 31, v1
-; GFX1030-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
-; GFX1030-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
-; GFX1030-NEXT: v_ashrrev_i64 v[0:1], 1, v[0:1]
+; GFX1030-NEXT: v_add_co_u32 v2, vcc_lo, v0, v2
+; GFX1030-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, 0, v1, vcc_lo
+; GFX1030-NEXT: v_alignbit_b32 v0, v3, v2, 1
+; GFX1030-NEXT: v_ashrrev_i64 v[1:2], 33, v[2:3]
; GFX1030-NEXT: s_setpc_b64 s[30:31]
entry:
%div = sdiv i64 %i, 2
@@ -788,9 +792,10 @@ define noundef i64 @sdiv64_64(i64 noundef %i) {
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_ashrrev_i32_e32 v2, 31, v1
; GFX9-NEXT: v_lshrrev_b32_e32 v2, 26, v2
-; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
-; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
-; GFX9-NEXT: v_ashrrev_i64 v[0:1], 6, v[0:1]
+; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v0, v2
+; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v1, vcc
+; GFX9-NEXT: v_alignbit_b32 v0, v3, v2, 6
+; GFX9-NEXT: v_ashrrev_i64 v[1:2], 38, v[2:3]
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: sdiv64_64:
@@ -799,8 +804,10 @@ define noundef i64 @sdiv64_64(i64 noundef %i) {
; GFX942-NEXT: v_ashrrev_i32_e32 v2, 31, v1
; GFX942-NEXT: v_lshrrev_b32_e32 v2, 26, v2
; GFX942-NEXT: v_mov_b32_e32 v3, 0
-; GFX942-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
-; GFX942-NEXT: v_ashrrev_i64 v[0:1], 6, v[0:1]
+; GFX942-NEXT: v_lshl_add_u64 v[2:3], v[0:1], 0, v[2:3]
+; GFX942-NEXT: v_alignbit_b32 v0, v3, v2, 6
+; GFX942-NEXT: v_ashrrev_i64 v[2:3], 38, v[2:3]
+; GFX942-NEXT: v_mov_b32_e32 v1, v2
; GFX942-NEXT: s_setpc_b64 s[30:31]
;
; GFX1030-LABEL: sdiv64_64:
@@ -808,9 +815,10 @@ define noundef i64 @sdiv64_64(i64 noundef %i) {
; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1030-NEXT: v_ashrrev_i32_e32 v2, 31, v1
; GFX1030-NEXT: v_lshrrev_b32_e32 v2, 26, v2
-; GFX1030-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
-; GFX1030-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
-; GFX1030-NEXT: v_ashrrev_i64 v[0:1], 6, v[0:1]
+; GFX1030-NEXT: v_add_co_u32 v2, vcc_lo, v0, v2
+; GFX1030-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, 0, v1, vcc_lo
+; GFX1030-NEXT: v_alignbit_b32 v0, v3, v2, 6
+; GFX1030-NEXT: v_ashrrev_i64 v[1:2], 38, v[2:3]
; GFX1030-NEXT: s_setpc_b64 s[30:31]
entry:
%div = sdiv i64 %i, 64
diff --git a/llvm/test/CodeGen/AMDGPU/div_i128.ll b/llvm/test/CodeGen/AMDGPU/div_i128.ll
index fea1303d0a2b76..071aae98c96853 100644
--- a/llvm/test/CodeGen/AMDGPU/div_i128.ll
+++ b/llvm/test/CodeGen/AMDGPU/div_i128.ll
@@ -4448,13 +4448,14 @@ define i128 @v_sdiv_i128_v_pow2k(i128 %lhs) {
; GFX9-NEXT: v_mov_b32_e32 v5, v4
; GFX9-NEXT: v_lshrrev_b64 v[4:5], 31, v[4:5]
; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v4
-; GFX9-NEXT: v_addc_co_u32_e32 v4, vcc, v1, v5, vcc
-; GFX9-NEXT: v_addc_co_u32_e32 v2, vcc, 0, v2, vcc
-; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v3, vcc
-; GFX9-NEXT: v_lshlrev_b64 v[0:1], 31, v[2:3]
-; GFX9-NEXT: v_lshrrev_b32_e32 v4, 1, v4
-; GFX9-NEXT: v_ashrrev_i64 v[2:3], 33, v[2:3]
-; GFX9-NEXT: v_or_b32_e32 v0, v4, v0
+; GFX9-NEXT: v_addc_co_u32_e32 v6, vcc, v1, v5, vcc
+; GFX9-NEXT: v_addc_co_u32_e32 v4, vcc, 0, v2, vcc
+; GFX9-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v3, vcc
+; GFX9-NEXT: v_lshlrev_b64 v[0:1], 31, v[4:5]
+; GFX9-NEXT: v_lshrrev_b32_e32 v2, 1, v6
+; GFX9-NEXT: v_or_b32_e32 v0, v2, v0
+; GFX9-NEXT: v_ashrrev_i64 v[2:3], 33, v[4:5]
+; GFX9-NEXT: v_ashrrev_i32_e32 v3, 31, v5
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-O0-LABEL: v_sdiv_i128_v_pow2k:
@@ -4481,41 +4482,40 @@ define i128 @v_sdiv_i128_v_pow2k(i128 %lhs) {
; GFX9-O0-NEXT: v_mov_b32_e32 v1, v5
; GFX9-O0-NEXT: s_mov_b32 s4, 63
; GFX9-O0-NEXT: v_ashrrev_i64 v[4:5], s4, v[4:5]
-; GFX9-O0-NEXT: s_mov_b32 s5, 31
-; GFX9-O0-NEXT: v_lshrrev_b64 v[6:7], s5, v[4:5]
+; GFX9-O0-NEXT: s_mov_b32 s6, 31
+; GFX9-O0-NEXT: v_lshrrev_b64 v[6:7], s6, v[4:5]
; GFX9-O0-NEXT: v_mov_b32_e32 v5, v6
; GFX9-O0-NEXT: v_mov_b32_e32 v4, v7
; GFX9-O0-NEXT: s_mov_b64 s[8:9], 0
-; GFX9-O0-NEXT: s_mov_b32 s6, s8
-; GFX9-O0-NEXT: s_mov_b32 s4, s9
+; GFX9-O0-NEXT: s_mov_b32 s7, s8
+; GFX9-O0-NEXT: s_mov_b32 s5, s9
; GFX9-O0-NEXT: v_add_co_u32_e32 v0, vcc, v0, v5
-; GFX9-O0-NEXT: v_addc_co_u32_e32 v3, vcc, v3, v4, vcc
-; GFX9-O0-NEXT: v_mov_b32_e32 v4, s6
-; GFX9-O0-NEXT: v_addc_co_u32_e32 v5, vcc, v2, v4, vcc
-; GFX9-O0-NEXT: v_mov_b32_e32 v2, s4
+; GFX9-O0-NEXT: v_addc_co_u32_e32 v5, vcc, v3, v4, vcc
+; GFX9-O0-NEXT: v_mov_b32_e32 v3, s7
+; GFX9-O0-NEXT: v_addc_co_u32_e32 v3, vcc, v2, v3, vcc
+; GFX9-O0-NEXT: v_mov_b32_e32 v2, s5
; GFX9-O0-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v2, vcc
-; GFX9-O0-NEXT: ; implicit-def: $sgpr4
-; GFX9-O0-NEXT: ; implicit-def: $sgpr4
-; GFX9-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v6, v1
-; GFX9-O0-NEXT: v_mov_b32_e32 v2, v5
-; GFX9-O0-NEXT: ; implicit-def: $sgpr4
-; GFX9-O0-NEXT: ; implicit-def: $sgpr4
+; GFX9-O0-NEXT: ; implicit-def: $sgpr5
+; GFX9-O0-NEXT: ; implicit-def: $sgpr5
+; GFX9-O0-NEXT: ; kill: def $vgpr3 killed $vgpr3 def $vgpr3_vgpr4 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v4, v1
+; GFX9-O0-NEXT: v_mov_b32_e32 v2, v3
+; GFX9-O0-NEXT: ; implicit-def: $sgpr5
+; GFX9-O0-NEXT: ; implicit-def: $sgpr5
; GFX9-O0-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v1, v3
-; GFX9-O0-NEXT: s_mov_b32 s4, 33
-; GFX9-O0-NEXT: v_lshrrev_b64 v[0:1], s4, v[0:1]
+; GFX9-O0-NEXT: v_mov_b32_e32 v1, v5
+; GFX9-O0-NEXT: s_mov_b32 s5, 33
+; GFX9-O0-NEXT: v_lshrrev_b64 v[0:1], s5, v[0:1]
; GFX9-O0-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec
-; GFX9-O0-NEXT: v_lshl_or_b32 v0, v2, s5, v0
-; GFX9-O0-NEXT: v_mov_b32_e32 v3, v5
-; GFX9-O0-NEXT: v_mov_b32_e32 v4, v6
+; GFX9-O0-NEXT: v_lshl_or_b32 v0, v2, s6, v0
+; GFX9-O0-NEXT: v_mov_b32_e32 v6, v4
+; GFX9-O0-NEXT: v_mov_b32_e32 v5, v3
+; GFX9-O0-NEXT: v_ashrrev_i64 v[5:6], s5, v[5:6]
+; GFX9-O0-NEXT: v_mov_b32_e32 v1, v4
+; GFX9-O0-NEXT: s_mov_b32 s5, 1
+; GFX9-O0-NEXT: v_alignbit_b32 v1, v1, v2, s5
+; GFX9-O0-NEXT: v_mov_b32_e32 v2, v5
; GFX9-O0-NEXT: v_ashrrev_i64 v[3:4], s4, v[3:4]
-; GFX9-O0-NEXT: v_mov_b32_e32 v1, v6
-; GFX9-O0-NEXT: s_mov_b32 s4, 1
-; GFX9-O0-NEXT: v_alignbit_b32 v1, v1, v2, s4
-; GFX9-O0-NEXT: v_mov_b32_e32 v2, v3
-; GFX9-O0-NEXT: s_mov_b32 s4, 32
-; GFX9-O0-NEXT: v_lshrrev_b64 v[3:4], s4, v[3:4]
; GFX9-O0-NEXT: ; kill: def $vgpr3 killed $vgpr3 killed $vgpr3_vgpr4 killed $exec
; GFX9-O0-NEXT: s_setpc_b64 s[30:31]
;
diff --git a/llvm/test/CodeGen/AMDGPU/function-returns.ll b/llvm/test/CodeGen/AMDGPU/function-returns.ll
index 401cbce00ac9a8..925987df8c8dea 100644
--- a/llvm/test/CodeGen/AMDGPU/function-returns.ll
+++ b/llvm/test/CodeGen/AMDGPU/function-returns.ll
@@ -334,23 +334,23 @@ define signext i63 @i63_signext_func_void(i63 %val) #0 {
; CI-LABEL: i63_signext_func_void:
; CI: ; %bb.0:
; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CI-NEXT: v_lshl_b64 v[0:1], v[0:1], 1
-; CI-NEXT: v_ashr_i64 v[0:1], v[0:1], 1
+; CI-NEXT: v_lshl_b64 v[1:2], v[0:1], 1
+; CI-NEXT: v_ashr_i64 v[1:2], v[1:2], 33
; CI-NEXT: s_setpc_b64 s[30:31]
;
; GFX89-LABEL: i63_signext_func_void:
; GFX89: ; %bb.0:
; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX89-NEXT: v_lshlrev_b64 v[0:1], 1, v[0:1]
-; GFX89-NEXT: v_ashrrev_i64 v[0:1], 1, v[0:1]
+; GFX89-NEXT: v_lshlrev_b64 v[1:2], 1, v[0:1]
+; GFX89-NEXT: v_ashrrev_i64 v[1:2], 33, v[1:2]
; GFX89-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: i63_signext_func_void:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_lshlrev_b64 v[0:1], 1, v[0:1]
+; GFX11-NEXT: v_lshlrev_b64 v[1:2], 1, v[0:1]
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT: v_ashrrev_i64 v[0:1], 1, v[0:1]
+; GFX11-NEXT: v_ashrrev_i64 v[1:2], 33, v[1:2]
; GFX11-NEXT: s_setpc_b64 s[30:31]
ret i63 %val
}
diff --git a/llvm/test/CodeGen/AMDGPU/itofp.i128.ll b/llvm/test/CodeGen/AMDGPU/itofp.i128.ll
index c5198cdb421a50..1fe8c0aabb6b6d 100644
--- a/llvm/test/CodeGen/AMDGPU/itofp.i128.ll
+++ b/llvm/test/CodeGen/AMDGPU/itofp.i128.ll
@@ -631,17 +631,15 @@ define double @sitofp_i128_to_f64(i128 %x) {
; SDAG-NEXT: v_and_or_b32 v0, v0, 1, v4
; SDAG-NEXT: v_add_co_u32_e32 v4, vcc, 1, v0
; SDAG-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v5, vcc
-; SDAG-NEXT: v_addc_co_u32_e32 v6, vcc, 0, v6, vcc
; SDAG-NEXT: v_lshrrev_b64 v[0:1], 2, v[4:5]
-; SDAG-NEXT: v_lshlrev_b32_e32 v7, 30, v6
-; SDAG-NEXT: v_or_b32_e32 v10, v1, v7
+; SDAG-NEXT: v_addc_co_u32_e32 v6, vcc, 0, v6, vcc
; SDAG-NEXT: v_and_b32_e32 v1, 0x800000, v5
; SDAG-NEXT: v_cmp_ne_u32_e32 vcc, 0, v1
+; SDAG-NEXT: v_alignbit_b32 v10, v6, v5, 2
; SDAG-NEXT: s_and_saveexec_b64 s[4:5], vcc
; SDAG-NEXT: ; %bb.11: ; %itofp-if-then20
; SDAG-NEXT: v_lshrrev_b64 v[0:1], 3, v[4:5]
-; SDAG-NEXT: v_lshlrev_b32_e32 v2, 29, v6
-; SDAG-NEXT: v_or_b32_e32 v10, v1, v2
+; SDAG-NEXT: v_alignbit_b32 v10, v6, v5, 3
; SDAG-NEXT: v_mov_b32_e32 v2, v8
; SDAG-NEXT: ; %bb.12: ; %Flow
; SDAG-NEXT: s_or_b64 exec, exec, s[4:5]
diff --git a/llvm/test/CodeGen/AMDGPU/load-range-metadata-sign-bits.ll b/llvm/test/CodeGen/AMDGPU/load-range-metadata-sign-bits.ll
index 5fc1a87e71a1a6..49b569bf2154e6 100644
--- a/llvm/test/CodeGen/AMDGPU/load-range-metadata-sign-bits.ll
+++ b/llvm/test/CodeGen/AMDGPU/load-range-metadata-sign-bits.ll
@@ -110,10 +110,11 @@ define i64 @range_metadata_sext_i8_signed_range_i64(ptr addrspace(1) %ptr) {
; SDAG-LABEL: range_metadata_sext_i8_signed_range_i64:
; SDAG: ; %bb.0:
; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SDAG-NEXT: global_load_dwordx2 v[0:1], v[0:1], off glc
+; SDAG-NEXT: global_load_dwordx2 v[2:3], v[0:1], off glc
; SDAG-NEXT: s_waitcnt vmcnt(0)
-; SDAG-NEXT: v_lshlrev_b32_e32 v1, 23, v0
+; SDAG-NEXT: v_lshlrev_b32_e32 v1, 23, v2
; SDAG-NEXT: v_ashrrev_i64 v[0:1], 55, v[0:1]
+; SDAG-NEXT: v_bfe_i32 v1, v2, 8, 1
; SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GISEL-LABEL: range_metadata_sext_i8_signed_range_i64:
diff --git a/llvm/test/CodeGen/AMDGPU/permute_i8.ll b/llvm/test/CodeGen/AMDGPU/permute_i8.ll
index bf98af33dc7b08..050300a69c46bb 100644
--- a/llvm/test/CodeGen/AMDGPU/permute_i8.ll
+++ b/llvm/test/CodeGen/AMDGPU/permute_i8.ll
@@ -1049,15 +1049,14 @@ define hidden void @ashr_store_div(ptr addrspace(1) %in0, ptr addrspace(1) %in1,
; GFX10-NEXT: global_load_dword v0, v[0:1], off
; GFX10-NEXT: s_waitcnt vmcnt(1)
; GFX10-NEXT: v_bfe_i32 v1, v9, 0, 8
-; GFX10-NEXT: v_ashrrev_i32_e32 v3, 24, v9
; GFX10-NEXT: v_ashrrev_i32_sdwa v2, v2, v9 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX10-NEXT: v_ashrrev_i32_e32 v3, 25, v9
; GFX10-NEXT: v_lshlrev_b16 v1, 7, v1
-; GFX10-NEXT: v_lshrrev_b16 v3, 1, v3
+; GFX10-NEXT: v_or_b32_sdwa v2, v3, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: v_ashrrev_i16 v4, 10, v0
; GFX10-NEXT: v_perm_b32 v0, v9, v0, 0x4010707
; GFX10-NEXT: v_and_b32_e32 v1, 0xffffff00, v1
-; GFX10-NEXT: v_or_b32_sdwa v2, v3, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
; GFX10-NEXT: v_or_b32_sdwa v1, v4, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
; GFX10-NEXT: v_or_b32_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX10-NEXT: global_store_dword v[5:6], v1, off
@@ -1075,23 +1074,22 @@ define hidden void @ashr_store_div(ptr addrspace(1) %in0, ptr addrspace(1) %in1,
; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v3, vcc
; GFX9-NEXT: global_load_dword v4, v[0:1], off
; GFX9-NEXT: global_load_dword v9, v[2:3], off
-; GFX9-NEXT: v_mov_b32_e32 v0, 26
-; GFX9-NEXT: v_mov_b32_e32 v1, 1
-; GFX9-NEXT: v_mov_b32_e32 v2, 7
+; GFX9-NEXT: v_mov_b32_e32 v1, 7
; GFX9-NEXT: s_mov_b32 s4, 0x4010707
+; GFX9-NEXT: v_mov_b32_e32 v0, 26
; GFX9-NEXT: s_waitcnt vmcnt(1)
-; GFX9-NEXT: v_ashrrev_i32_sdwa v0, v0, v4 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX9-NEXT: v_lshrrev_b16_sdwa v1, v1, sext(v4) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_3
-; GFX9-NEXT: v_lshlrev_b16_sdwa v2, v2, sext(v4) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
+; GFX9-NEXT: v_lshlrev_b16_sdwa v1, v1, sext(v4) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: v_perm_b32 v3, v4, v9, s4
+; GFX9-NEXT: v_perm_b32 v2, v4, v9, s4
+; GFX9-NEXT: v_ashrrev_i32_sdwa v0, v0, v4 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX9-NEXT: v_ashrrev_i32_e32 v3, 25, v4
; GFX9-NEXT: v_ashrrev_i16_e32 v9, 10, v9
-; GFX9-NEXT: v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
-; GFX9-NEXT: v_and_b32_e32 v1, 0xffffff00, v2
+; GFX9-NEXT: v_and_b32_e32 v1, 0xffffff00, v1
+; GFX9-NEXT: v_or_b32_sdwa v0, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
; GFX9-NEXT: v_or_b32_sdwa v1, v9, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
; GFX9-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX9-NEXT: global_store_dword v[5:6], v0, off
-; GFX9-NEXT: global_store_dword v[7:8], v3, off
+; GFX9-NEXT: global_store_dword v[7:8], v2, off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
%tid = call i32 @llvm.amdgcn.workitem.id.x()
diff --git a/llvm/test/CodeGen/AMDGPU/sdiv64.ll b/llvm/test/CodeGen/AMDGPU/sdiv64.ll
index f4776747f16ac1..8f9417f875e8e7 100644
--- a/llvm/test/CodeGen/AMDGPU/sdiv64.ll
+++ b/llvm/test/CodeGen/AMDGPU/sdiv64.ll
@@ -1759,9 +1759,10 @@ define i64 @v_test_sdiv_pow2_k_den_i64(i64 %x) {
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-NEXT: v_ashrrev_i32_e32 v2, 31, v1
; GCN-NEXT: v_lshrrev_b32_e32 v2, 17, v2
-; GCN-NEXT: v_add_i32_e32 v0, vcc, v0, v2
-; GCN-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
-; GCN-NEXT: v_ashr_i64 v[0:1], v[0:1], 15
+; GCN-NEXT: v_add_i32_e32 v2, vcc, v0, v2
+; GCN-NEXT: v_addc_u32_e32 v3, vcc, 0, v1, vcc
+; GCN-NEXT: v_alignbit_b32 v0, v3, v2, 15
+; GCN-NEXT: v_ashr_i64 v[1:2], v[2:3], 47
; GCN-NEXT: s_setpc_b64 s[30:31]
;
; GCN-IR-LABEL: v_test_sdiv_pow2_k_den_i64:
@@ -2064,9 +2065,10 @@ define i64 @v_test_sdiv24_pow2_k_den_i64(i64 %x) {
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-NEXT: v_ashr_i64 v[0:1], v[0:1], 40
; GCN-NEXT: v_lshrrev_b32_e32 v2, 17, v1
-; GCN-NEXT: v_add_i32_e32 v0, vcc, v0, v2
-; GCN-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
-; GCN-NEXT: v_ashr_i64 v[0:1], v[0:1], 15
+; GCN-NEXT: v_add_i32_e32 v2, vcc, v0, v2
+; GCN-NEXT: v_addc_u32_e32 v3, vcc, 0, v1, vcc
+; GCN-NEXT: v_alignbit_b32 v0, v3, v2, 15
+; GCN-NEXT: v_ashr_i64 v[1:2], v[2:3], 47
; GCN-NEXT: s_setpc_b64 s[30:31]
;
; GCN-IR-LABEL: v_test_sdiv24_pow2_k_den_i64:
diff --git a/llvm/test/CodeGen/AMDGPU/shift-i128.ll b/llvm/test/CodeGen/AMDGPU/shift-i128.ll
index ebc916b5c889b5..d603b413414021 100644
--- a/llvm/test/CodeGen/AMDGPU/shift-i128.ll
+++ b/llvm/test/CodeGen/AMDGPU/shift-i128.ll
@@ -113,8 +113,10 @@ define i128 @v_ashr_i128_vk(i128 %lhs) {
; GCN-NEXT: v_mov_b32_e32 v4, v1
; GCN-NEXT: v_lshl_b64 v[0:1], v[2:3], 31
; GCN-NEXT: v_lshrrev_b32_e32 v4, 1, v4
-; GCN-NEXT: v_ashr_i64 v[2:3], v[2:3], 33
; GCN-NEXT: v_or_b32_e32 v0, v4, v0
+; GCN-NEXT: v_ashr_i64 v[4:5], v[2:3], 33
+; GCN-NEXT: v_ashrrev_i32_e32 v3, 31, v3
+; GCN-NEXT: v_mov_b32_e32 v2, v4
; GCN-NEXT: s_setpc_b64 s[30:31]
%shl = ashr i128 %lhs, 33
ret i128 %shl
diff --git a/llvm/test/CodeGen/AMDGPU/srem-seteq-illegal-types.ll b/llvm/test/CodeGen/AMDGPU/srem-seteq-illegal-types.ll
index 126b17e718b59f..2efe27df2d10d1 100644
--- a/llvm/test/CodeGen/AMDGPU/srem-seteq-illegal-types.ll
+++ b/llvm/test/CodeGen/AMDGPU/srem-seteq-illegal-types.ll
@@ -43,8 +43,8 @@ define i1 @test_srem_pow2_setne(i6 %X) nounwind {
; CHECK-LABEL: test_srem_pow2_setne:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CHECK-NEXT: v_bfe_i32 v1, v0, 0, 6
-; CHECK-NEXT: v_bfe_u32 v1, v1, 9, 2
+; CHECK-NEXT: v_bfe_i32 v1, v0, 5, 1
+; CHECK-NEXT: v_and_b32_e32 v1, 3, v1
; CHECK-NEXT: v_add_i32_e32 v1, vcc, v0, v1
; CHECK-NEXT: v_and_b32_e32 v1, 60, v1
; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v1
diff --git a/llvm/test/CodeGen/ARM/srem-seteq-illegal-types.ll b/llvm/test/CodeGen/ARM/srem-seteq-illegal-types.ll
index a4e081d5384e5e..7f56215b9b4123 100644
--- a/llvm/test/CodeGen/ARM/srem-seteq-illegal-types.ll
+++ b/llvm/test/CodeGen/ARM/srem-seteq-illegal-types.ll
@@ -209,8 +209,7 @@ define i1 @test_srem_pow2_setne(i6 %X) nounwind {
; ARM5: @ %bb.0:
; ARM5-NEXT: lsl r1, r0, #26
; ARM5-NEXT: mov r2, #3
-; ARM5-NEXT: asr r1, r1, #26
-; ARM5-NEXT: and r1, r2, r1, lsr #9
+; ARM5-NEXT: and r1, r2, r1, asr #31
; ARM5-NEXT: add r1, r0, r1
; ARM5-NEXT: and r1, r1, #60
; ARM5-NEXT: sub r0, r0, r1
@@ -222,8 +221,7 @@ define i1 @test_srem_pow2_setne(i6 %X) nounwind {
; ARM6: @ %bb.0:
; ARM6-NEXT: lsl r1, r0, #26
; ARM6-NEXT: mov r2, #3
-; ARM6-NEXT: asr r1, r1, #26
-; ARM6-NEXT: and r1, r2, r1, lsr #9
+; ARM6-NEXT: and r1, r2, r1, asr #31
; ARM6-NEXT: add r1, r0, r1
; ARM6-NEXT: and r1, r1, #60
; ARM6-NEXT: sub r0, r0, r1
@@ -233,8 +231,9 @@ define i1 @test_srem_pow2_setne(i6 %X) nounwind {
;
; ARM7-LABEL: test_srem_pow2_setne:
; ARM7: @ %bb.0:
-; ARM7-NEXT: sbfx r1, r0, #0, #6
-; ARM7-NEXT: ubfx r1, r1, #9, #2
+; ARM7-NEXT: lsl r1, r0, #26
+; ARM7-NEXT: mov r2, #3
+; ARM7-NEXT: and r1, r2, r1, asr #31
; ARM7-NEXT: add r1, r0, r1
; ARM7-NEXT: and r1, r1, #60
; ARM7-NEXT: sub r0, r0, r1
@@ -244,8 +243,9 @@ define i1 @test_srem_pow2_setne(i6 %X) nounwind {
;
; ARM8-LABEL: test_srem_pow2_setne:
; ARM8: @ %bb.0:
-; ARM8-NEXT: sbfx r1, r0, #0, #6
-; ARM8-NEXT: ubfx r1, r1, #9, #2
+; ARM8-NEXT: lsl r1, r0, #26
+; ARM8-NEXT: mov r2, #3
+; ARM8-NEXT: and r1, r2, r1, asr #31
; ARM8-NEXT: add r1, r0, r1
; ARM8-NEXT: and r1, r1, #60
; ARM8-NEXT: sub r0, r0, r1
@@ -255,8 +255,9 @@ define i1 @test_srem_pow2_setne(i6 %X) nounwind {
;
; NEON7-LABEL: test_srem_pow2_setne:
; NEON7: @ %bb.0:
-; NEON7-NEXT: sbfx r1, r0, #0, #6
-; NEON7-NEXT: ubfx r1, r1, #9, #2
+; NEON7-NEXT: lsl r1, r0, #26
+; NEON7-NEXT: mov r2, #3
+; NEON7-NEXT: and r1, r2, r1, asr #31
; NEON7-NEXT: add r1, r0, r1
; NEON7-NEXT: and r1, r1, #60
; NEON7-NEXT: sub r0, r0, r1
@@ -266,8 +267,9 @@ define i1 @test_srem_pow2_setne(i6 %X) nounwind {
;
; NEON8-LABEL: test_srem_pow2_setne:
; NEON8: @ %bb.0:
-; NEON8-NEXT: sbfx r1, r0, #0, #6
-; NEON8-NEXT: ubfx r1, r1, #9, #2
+; NEON8-NEXT: lsl r1, r0, #26
+; NEON8-NEXT: mov r2, #3
+; NEON8-NEXT: and r1, r2, r1, asr #31
; NEON8-NEXT: add r1, r0, r1
; NEON8-NEXT: and r1, r1, #60
; NEON8-NEXT: sub r0, r0, r1
diff --git a/llvm/test/CodeGen/Mips/srem-seteq-illegal-types.ll b/llvm/test/CodeGen/Mips/srem-seteq-illegal-types.ll
index 1a9fa27c263deb..37cca8687890a6 100644
--- a/llvm/test/CodeGen/Mips/srem-seteq-illegal-types.ll
+++ b/llvm/test/CodeGen/Mips/srem-seteq-illegal-types.ll
@@ -90,8 +90,7 @@ define i1 @test_srem_pow2_setne(i6 %X) nounwind {
; MIPSEL-LABEL: test_srem_pow2_setne:
; MIPSEL: # %bb.0:
; MIPSEL-NEXT: sll $1, $4, 26
-; MIPSEL-NEXT: sra $1, $1, 26
-; MIPSEL-NEXT: srl $1, $1, 9
+; MIPSEL-NEXT: sra $1, $1, 31
; MIPSEL-NEXT: andi $1, $1, 3
; MIPSEL-NEXT: addu $1, $4, $1
; MIPSEL-NEXT: andi $1, $1, 60
@@ -104,8 +103,7 @@ define i1 @test_srem_pow2_setne(i6 %X) nounwind {
; MIPS64EL: # %bb.0:
; MIPS64EL-NEXT: sll $1, $4, 0
; MIPS64EL-NEXT: sll $2, $1, 26
-; MIPS64EL-NEXT: sra $2, $2, 26
-; MIPS64EL-NEXT: srl $2, $2, 9
+; MIPS64EL-NEXT: sra $2, $2, 31
; MIPS64EL-NEXT: andi $2, $2, 3
; MIPS64EL-NEXT: addu $2, $1, $2
; MIPS64EL-NEXT: andi $2, $2, 60
diff --git a/llvm/test/CodeGen/NVPTX/idioms.ll b/llvm/test/CodeGen/NVPTX/idioms.ll
index e8fe47c303f92d..7123b49430e2b7 100644
--- a/llvm/test/CodeGen/NVPTX/idioms.ll
+++ b/llvm/test/CodeGen/NVPTX/idioms.ll
@@ -113,9 +113,9 @@ define %struct.S16 @i32_to_2xi16_shr(i32 noundef %i){
%h32 = ashr i32 %i1, 16
%h = trunc i32 %h32 to i16
; CHECK: ld.param.u32 %[[R32:r[0-9]+]], [i32_to_2xi16_shr_param_0];
-; CHECK: shr.s32 %[[R32H:r[0-9]+]], %[[R32]], 16;
+; CHECK: shr.s32 %[[R32H:r[0-9]+]], %[[R32]], 31;
+; CHECK: cvt.u16.u32 %rs{{[0-9+]}}, %[[R32H]]
; CHECK-DAG mov.b32 {tmp, %rs{{[0-9+]}}}, %[[R32]];
-; CHECK-DAG mov.b32 {tmp, %rs{{[0-9+]}}}, %[[R32H]];
%s0 = insertvalue %struct.S16 poison, i16 %l, 0
%s1 = insertvalue %struct.S16 %s0, i16 %h, 1
ret %struct.S16 %s1
diff --git a/llvm/test/CodeGen/PowerPC/srem-seteq-illegal-types.ll b/llvm/test/CodeGen/PowerPC/srem-seteq-illegal-types.ll
index b0cc89d1828eda..2b07f27be021b1 100644
--- a/llvm/test/CodeGen/PowerPC/srem-seteq-illegal-types.ll
+++ b/llvm/test/CodeGen/PowerPC/srem-seteq-illegal-types.ll
@@ -85,8 +85,8 @@ define i1 @test_srem_pow2_setne(i6 %X) nounwind {
; PPC-LABEL: test_srem_pow2_setne:
; PPC: # %bb.0:
; PPC-NEXT: slwi 4, 3, 26
-; PPC-NEXT: srawi 4, 4, 26
-; PPC-NEXT: rlwinm 4, 4, 23, 30, 31
+; PPC-NEXT: srawi 4, 4, 31
+; PPC-NEXT: clrlwi 4, 4, 30
; PPC-NEXT: add 4, 3, 4
; PPC-NEXT: rlwinm 4, 4, 0, 26, 29
; PPC-NEXT: sub 3, 3, 4
@@ -99,8 +99,8 @@ define i1 @test_srem_pow2_setne(i6 %X) nounwind {
; PPC64LE-LABEL: test_srem_pow2_setne:
; PPC64LE: # %bb.0:
; PPC64LE-NEXT: slwi 4, 3, 26
-; PPC64LE-NEXT: srawi 4, 4, 26
-; PPC64LE-NEXT: rlwinm 4, 4, 23, 30, 31
+; PPC64LE-NEXT: srawi 4, 4, 31
+; PPC64LE-NEXT: clrlwi 4, 4, 30
; PPC64LE-NEXT: add 4, 3, 4
; PPC64LE-NEXT: rlwinm 4, 4, 0, 26, 29
; PPC64LE-NEXT: sub 3, 3, 4
diff --git a/llvm/test/CodeGen/RISCV/div.ll b/llvm/test/CodeGen/RISCV/div.ll
index 99c83b99497dd3..f1ad170ac674cc 100644
--- a/llvm/test/CodeGen/RISCV/div.ll
+++ b/llvm/test/CodeGen/RISCV/div.ll
@@ -1017,9 +1017,8 @@ define i8 @sdiv8_pow2(i8 %a) nounwind {
; RV32I-LABEL: sdiv8_pow2:
; RV32I: # %bb.0:
; RV32I-NEXT: slli a1, a0, 24
-; RV32I-NEXT: srai a1, a1, 24
-; RV32I-NEXT: slli a1, a1, 17
-; RV32I-NEXT: srli a1, a1, 29
+; RV32I-NEXT: srai a1, a1, 31
+; RV32I-NEXT: andi a1, a1, 7
; RV32I-NEXT: add a0, a0, a1
; RV32I-NEXT: slli a0, a0, 24
; RV32I-NEXT: srai a0, a0, 27
@@ -1028,9 +1027,8 @@ define i8 @sdiv8_pow2(i8 %a) nounwind {
; RV32IM-LABEL: sdiv8_pow2:
; RV32IM: # %bb.0:
; RV32IM-NEXT: slli a1, a0, 24
-; RV32IM-NEXT: srai a1, a1, 24
-; RV32IM-NEXT: slli a1, a1, 17
-; RV32IM-NEXT: srli a1, a1, 29
+; RV32IM-NEXT: srai a1, a1, 31
+; RV32IM-NEXT: andi a1, a1, 7
; RV32IM-NEXT: add a0, a0, a1
; RV32IM-NEXT: slli a0, a0, 24
; RV32IM-NEXT: srai a0, a0, 27
@@ -1039,9 +1037,8 @@ define i8 @sdiv8_pow2(i8 %a) nounwind {
; RV64I-LABEL: sdiv8_pow2:
; RV64I: # %bb.0:
; RV64I-NEXT: slli a1, a0, 56
-; RV64I-NEXT: srai a1, a1, 56
-; RV64I-NEXT: slli a1, a1, 49
-; RV64I-NEXT: srli a1, a1, 61
+; RV64I-NEXT: srai a1, a1, 63
+; RV64I-NEXT: andi a1, a1, 7
; RV64I-NEXT: add a0, a0, a1
; RV64I-NEXT: slli a0, a0, 56
; RV64I-NEXT: srai a0, a0, 59
@@ -1050,9 +1047,8 @@ define i8 @sdiv8_pow2(i8 %a) nounwind {
; RV64IM-LABEL: sdiv8_pow2:
; RV64IM: # %bb.0:
; RV64IM-NEXT: slli a1, a0, 56
-; RV64IM-NEXT: srai a1, a1, 56
-; RV64IM-NEXT: slli a1, a1, 49
-; RV64IM-NEXT: srli a1, a1, 61
+; RV64IM-NEXT: srai a1, a1, 63
+; RV64IM-NEXT: andi a1, a1, 7
; RV64IM-NEXT: add a0, a0, a1
; RV64IM-NEXT: slli a0, a0, 56
; RV64IM-NEXT: srai a0, a0, 59
@@ -1209,9 +1205,8 @@ define i16 @sdiv16_pow2(i16 %a) nounwind {
; RV32I-LABEL: sdiv16_pow2:
; RV32I: # %bb.0:
; RV32I-NEXT: slli a1, a0, 16
-; RV32I-NEXT: srai a1, a1, 16
-; RV32I-NEXT: slli a1, a1, 1
-; RV32I-NEXT: srli a1, a1, 29
+; RV32I-NEXT: srai a1, a1, 31
+; RV32I-NEXT: andi a1, a1, 7
; RV32I-NEXT: add a0, a0, a1
; RV32I-NEXT: slli a0, a0, 16
; RV32I-NEXT: srai a0, a0, 19
@@ -1220,9 +1215,8 @@ define i16 @sdiv16_pow2(i16 %a) nounwind {
; RV32IM-LABEL: sdiv16_pow2:
; RV32IM: # %bb.0:
; RV32IM-NEXT: slli a1, a0, 16
-; RV32IM-NEXT: srai a1, a1, 16
-; RV32IM-NEXT: slli a1, a1, 1
-; RV32IM-NEXT: srli a1, a1, 29
+; RV32IM-NEXT: srai a1, a1, 31
+; RV32IM-NEXT: andi a1, a1, 7
; RV32IM-NEXT: add a0, a0, a1
; RV32IM-NEXT: slli a0, a0, 16
; RV32IM-NEXT: srai a0, a0, 19
@@ -1231,9 +1225,8 @@ define i16 @sdiv16_pow2(i16 %a) nounwind {
; RV64I-LABEL: sdiv16_pow2:
; RV64I: # %bb.0:
; RV64I-NEXT: slli a1, a0, 48
-; RV64I-NEXT: srai a1, a1, 48
-; RV64I-NEXT: slli a1, a1, 33
-; RV64I-NEXT: srli a1, a1, 61
+; RV64I-NEXT: srai a1, a1, 63
+; RV64I-NEXT: andi a1, a1, 7
; RV64I-NEXT: add a0, a0, a1
; RV64I-NEXT: slli a0, a0, 48
; RV64I-NEXT: srai a0, a0, 51
@@ -1242,9 +1235,8 @@ define i16 @sdiv16_pow2(i16 %a) nounwind {
; RV64IM-LABEL: sdiv16_pow2:
; RV64IM: # %bb.0:
; RV64IM-NEXT: slli a1, a0, 48
-; RV64IM-NEXT: srai a1, a1, 48
-; RV64IM-NEXT: slli a1, a1, 33
-; RV64IM-NEXT: srli a1, a1, 61
+; RV64IM-NEXT: srai a1, a1, 63
+; RV64IM-NEXT: andi a1, a1, 7
; RV64IM-NEXT: add a0, a0, a1
; RV64IM-NEXT: slli a0, a0, 48
; RV64IM-NEXT: srai a0, a0, 51
diff --git a/llvm/test/CodeGen/RISCV/rv64-legal-i32/div.ll b/llvm/test/CodeGen/RISCV/rv64-legal-i32/div.ll
index 17d9e9cefe117e..435d883847d37a 100644
--- a/llvm/test/CodeGen/RISCV/rv64-legal-i32/div.ll
+++ b/llvm/test/CodeGen/RISCV/rv64-legal-i32/div.ll
@@ -545,9 +545,8 @@ define i8 @sdiv8_pow2(i8 %a) nounwind {
; RV64I-LABEL: sdiv8_pow2:
; RV64I: # %bb.0:
; RV64I-NEXT: slli a1, a0, 24
-; RV64I-NEXT: sraiw a1, a1, 24
-; RV64I-NEXT: slli a1, a1, 49
-; RV64I-NEXT: srli a1, a1, 61
+; RV64I-NEXT: sraiw a1, a1, 31
+; RV64I-NEXT: andi a1, a1, 7
; RV64I-NEXT: add a0, a0, a1
; RV64I-NEXT: slli a0, a0, 24
; RV64I-NEXT: sraiw a0, a0, 27
@@ -556,9 +555,8 @@ define i8 @sdiv8_pow2(i8 %a) nounwind {
; RV64IM-LABEL: sdiv8_pow2:
; RV64IM: # %bb.0:
; RV64IM-NEXT: slli a1, a0, 24
-; RV64IM-NEXT: sraiw a1, a1, 24
-; RV64IM-NEXT: slli a1, a1, 49
-; RV64IM-NEXT: srli a1, a1, 61
+; RV64IM-NEXT: sraiw a1, a1, 31
+; RV64IM-NEXT: andi a1, a1, 7
; RV64IM-NEXT: add a0, a0, a1
; RV64IM-NEXT: slli a0, a0, 24
; RV64IM-NEXT: sraiw a0, a0, 27
@@ -649,9 +647,8 @@ define i16 @sdiv16_pow2(i16 %a) nounwind {
; RV64I-LABEL: sdiv16_pow2:
; RV64I: # %bb.0:
; RV64I-NEXT: slli a1, a0, 16
-; RV64I-NEXT: sraiw a1, a1, 16
-; RV64I-NEXT: slli a1, a1, 33
-; RV64I-NEXT: srli a1, a1, 61
+; RV64I-NEXT: sraiw a1, a1, 31
+; RV64I-NEXT: andi a1, a1, 7
; RV64I-NEXT: add a0, a0, a1
; RV64I-NEXT: slli a0, a0, 16
; RV64I-NEXT: sraiw a0, a0, 19
@@ -660,9 +657,8 @@ define i16 @sdiv16_pow2(i16 %a) nounwind {
; RV64IM-LABEL: sdiv16_pow2:
; RV64IM: # %bb.0:
; RV64IM-NEXT: slli a1, a0, 16
-; RV64IM-NEXT: sraiw a1, a1, 16
-; RV64IM-NEXT: slli a1, a1, 33
-; RV64IM-NEXT: srli a1, a1, 61
+; RV64IM-NEXT: sraiw a1, a1, 31
+; RV64IM-NEXT: andi a1, a1, 7
; RV64IM-NEXT: add a0, a0, a1
; RV64IM-NEXT: slli a0, a0, 16
; RV64IM-NEXT: sraiw a0, a0, 19
diff --git a/llvm/test/CodeGen/RISCV/rv64zba.ll b/llvm/test/CodeGen/RISCV/rv64zba.ll
index 20a04844640188..f1b2193641d7a2 100644
--- a/llvm/test/CodeGen/RISCV/rv64zba.ll
+++ b/llvm/test/CodeGen/RISCV/rv64zba.ll
@@ -1555,17 +1555,16 @@ define zeroext i32 @sext_ashr_zext_i8(i8 %a) nounwind {
; RV64I-LABEL: sext_ashr_zext_i8:
; RV64I: # %bb.0:
; RV64I-NEXT: slli a0, a0, 56
-; RV64I-NEXT: srai a0, a0, 56
-; RV64I-NEXT: slli a0, a0, 23
+; RV64I-NEXT: srai a0, a0, 63
+; RV64I-NEXT: slli a0, a0, 32
; RV64I-NEXT: srli a0, a0, 32
; RV64I-NEXT: ret
;
; RV64ZBANOZBB-LABEL: sext_ashr_zext_i8:
; RV64ZBANOZBB: # %bb.0:
; RV64ZBANOZBB-NEXT: slli a0, a0, 56
-; RV64ZBANOZBB-NEXT: srai a0, a0, 56
-; RV64ZBANOZBB-NEXT: slli a0, a0, 23
-; RV64ZBANOZBB-NEXT: srli a0, a0, 32
+; RV64ZBANOZBB-NEXT: srai a0, a0, 63
+; RV64ZBANOZBB-NEXT: zext.w a0, a0
; RV64ZBANOZBB-NEXT: ret
;
; RV64ZBAZBB-LABEL: sext_ashr_zext_i8:
@@ -1674,17 +1673,16 @@ define zeroext i32 @sext_ashr_zext_i16(i16 %a) nounwind {
; RV64I-LABEL: sext_ashr_zext_i16:
; RV64I: # %bb.0:
; RV64I-NEXT: slli a0, a0, 48
-; RV64I-NEXT: srai a0, a0, 48
-; RV64I-NEXT: slli a0, a0, 23
+; RV64I-NEXT: srai a0, a0, 57
+; RV64I-NEXT: slli a0, a0, 32
; RV64I-NEXT: srli a0, a0, 32
; RV64I-NEXT: ret
;
; RV64ZBANOZBB-LABEL: sext_ashr_zext_i16:
; RV64ZBANOZBB: # %bb.0:
; RV64ZBANOZBB-NEXT: slli a0, a0, 48
-; RV64ZBANOZBB-NEXT: srai a0, a0, 48
-; RV64ZBANOZBB-NEXT: slli a0, a0, 23
-; RV64ZBANOZBB-NEXT: srli a0, a0, 32
+; RV64ZBANOZBB-NEXT: srai a0, a0, 57
+; RV64ZBANOZBB-NEXT: zext.w a0, a0
; RV64ZBANOZBB-NEXT: ret
;
; RV64ZBAZBB-LABEL: sext_ashr_zext_i16:
diff --git a/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll b/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll
index 457d0380ca8a83..29e98736ad5df5 100644
--- a/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll
+++ b/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll
@@ -222,9 +222,8 @@ define i1 @test_srem_pow2_setne(i6 %X) nounwind {
; RV32-LABEL: test_srem_pow2_setne:
; RV32: # %bb.0:
; RV32-NEXT: slli a1, a0, 26
-; RV32-NEXT: srai a1, a1, 26
-; RV32-NEXT: slli a1, a1, 21
-; RV32-NEXT: srli a1, a1, 30
+; RV32-NEXT: srai a1, a1, 31
+; RV32-NEXT: andi a1, a1, 3
; RV32-NEXT: add a1, a0, a1
; RV32-NEXT: andi a1, a1, 60
; RV32-NEXT: sub a0, a0, a1
@@ -235,9 +234,8 @@ define i1 @test_srem_pow2_setne(i6 %X) nounwind {
; RV64-LABEL: test_srem_pow2_setne:
; RV64: # %bb.0:
; RV64-NEXT: slli a1, a0, 58
-; RV64-NEXT: srai a1, a1, 58
-; RV64-NEXT: slli a1, a1, 53
-; RV64-NEXT: srli a1, a1, 62
+; RV64-NEXT: srai a1, a1, 63
+; RV64-NEXT: andi a1, a1, 3
; RV64-NEXT: add a1, a0, a1
; RV64-NEXT: andi a1, a1, 60
; RV64-NEXT: subw a0, a0, a1
@@ -248,9 +246,8 @@ define i1 @test_srem_pow2_setne(i6 %X) nounwind {
; RV32M-LABEL: test_srem_pow2_setne:
; RV32M: # %bb.0:
; RV32M-NEXT: slli a1, a0, 26
-; RV32M-NEXT: srai a1, a1, 26
-; RV32M-NEXT: slli a1, a1, 21
-; RV32M-NEXT: srli a1, a1, 30
+; RV32M-NEXT: srai a1, a1, 31
+; RV32M-NEXT: andi a1, a1, 3
; RV32M-NEXT: add a1, a0, a1
; RV32M-NEXT: andi a1, a1, 60
; RV32M-NEXT: sub a0, a0, a1
@@ -261,9 +258,8 @@ define i1 @test_srem_pow2_setne(i6 %X) nounwind {
; RV64M-LABEL: test_srem_pow2_setne:
; RV64M: # %bb.0:
; RV64M-NEXT: slli a1, a0, 58
-; RV64M-NEXT: srai a1, a1, 58
-; RV64M-NEXT: slli a1, a1, 53
-; RV64M-NEXT: srli a1, a1, 62
+; RV64M-NEXT: srai a1, a1, 63
+; RV64M-NEXT: andi a1, a1, 3
; RV64M-NEXT: add a1, a0, a1
; RV64M-NEXT: andi a1, a1, 60
; RV64M-NEXT: subw a0, a0, a1
@@ -274,9 +270,8 @@ define i1 @test_srem_pow2_setne(i6 %X) nounwind {
; RV32MV-LABEL: test_srem_pow2_setne:
; RV32MV: # %bb.0:
; RV32MV-NEXT: slli a1, a0, 26
-; RV32MV-NEXT: srai a1, a1, 26
-; RV32MV-NEXT: slli a1, a1, 21
-; RV32MV-NEXT: srli a1, a1, 30
+; RV32MV-NEXT: srai a1, a1, 31
+; RV32MV-NEXT: andi a1, a1, 3
; RV32MV-NEXT: add a1, a0, a1
; RV32MV-NEXT: andi a1, a1, 60
; RV32MV-NEXT: sub a0, a0, a1
@@ -287,9 +282,8 @@ define i1 @test_srem_pow2_setne(i6 %X) nounwind {
; RV64MV-LABEL: test_srem_pow2_setne:
; RV64MV: # %bb.0:
; RV64MV-NEXT: slli a1, a0, 58
-; RV64MV-NEXT: srai a1, a1, 58
-; RV64MV-NEXT: slli a1, a1, 53
-; RV64MV-NEXT: srli a1, a1, 62
+; RV64MV-NEXT: srai a1, a1, 63
+; RV64MV-NEXT: andi a1, a1, 3
; RV64MV-NEXT: add a1, a0, a1
; RV64MV-NEXT: andi a1, a1, 60
; RV64MV-NEXT: subw a0, a0, a1
diff --git a/llvm/test/CodeGen/Thumb2/srem-seteq-illegal-types.ll b/llvm/test/CodeGen/Thumb2/srem-seteq-illegal-types.ll
index 58bafebd5b702f..e3d65a336978b3 100644
--- a/llvm/test/CodeGen/Thumb2/srem-seteq-illegal-types.ll
+++ b/llvm/test/CodeGen/Thumb2/srem-seteq-illegal-types.ll
@@ -44,8 +44,9 @@ define i1 @test_srem_even(i4 %X) nounwind {
define i1 @test_srem_pow2_setne(i6 %X) nounwind {
; CHECK-LABEL: test_srem_pow2_setne:
; CHECK: @ %bb.0:
-; CHECK-NEXT: sbfx r1, r0, #0, #6
-; CHECK-NEXT: ubfx r1, r1, #9, #2
+; CHECK-NEXT: lsls r1, r0, #26
+; CHECK-NEXT: movs r2, #3
+; CHECK-NEXT: and.w r1, r2, r1, asr #31
; CHECK-NEXT: add r1, r0
; CHECK-NEXT: and r1, r1, #60
; CHECK-NEXT: subs r0, r0, r1
diff --git a/llvm/test/CodeGen/X86/scmp.ll b/llvm/test/CodeGen/X86/scmp.ll
index 7d4bbb06534e6c..909dd1df42719e 100644
--- a/llvm/test/CodeGen/X86/scmp.ll
+++ b/llvm/test/CodeGen/X86/scmp.ll
@@ -1557,17 +1557,14 @@ define <7 x i117> @scmp_uncommon_vectors(<7 x i7> %x, <7 x i7> %y) nounwind {
; X64-NEXT: orq %rbx, %rdi
; X64-NEXT: movq %rdi, 24(%rax)
; X64-NEXT: shlq $9, %r14
-; X64-NEXT: shrq $44, %r10
; X64-NEXT: andl $511, %r10d # imm = 0x1FF
; X64-NEXT: orq %r14, %r10
; X64-NEXT: movq %r10, 72(%rax)
; X64-NEXT: shlq $20, %r9
-; X64-NEXT: shrq $33, %r11
; X64-NEXT: andl $1048575, %r11d # imm = 0xFFFFF
; X64-NEXT: orq %r9, %r11
; X64-NEXT: movq %r11, 56(%rax)
; X64-NEXT: shlq $31, %r8
-; X64-NEXT: shrq $22, %rcx
; X64-NEXT: andl $2147483647, %ecx # imm = 0x7FFFFFFF
; X64-NEXT: orq %r8, %rcx
; X64-NEXT: movq %rcx, 40(%rax)
@@ -1618,9 +1615,15 @@ define <7 x i117> @scmp_uncommon_vectors(<7 x i7> %x, <7 x i7> %y) nounwind {
; X86-NEXT: addb %al, %al
; X86-NEXT: sarb %al
; X86-NEXT: movb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
-; X86-NEXT: movzbl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movb {{[0-9]+}}(%esp), %dh
+; X86-NEXT: addb %dh, %dh
+; X86-NEXT: sarb %dh
+; X86-NEXT: movb {{[0-9]+}}(%esp), %dl
; X86-NEXT: addb %dl, %dl
; X86-NEXT: sarb %dl
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: addb %al, %al
+; X86-NEXT: sarb %al
; X86-NEXT: movb {{[0-9]+}}(%esp), %ah
; X86-NEXT: addb %ah, %ah
; X86-NEXT: sarb %ah
@@ -1636,50 +1639,44 @@ define <7 x i117> @scmp_uncommon_vectors(<7 x i7> %x, <7 x i7> %y) nounwind {
; X86-NEXT: movb {{[0-9]+}}(%esp), %bh
; X86-NEXT: addb %bh, %bh
; X86-NEXT: sarb %bh
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
-; X86-NEXT: addb %al, %al
-; X86-NEXT: sarb %al
-; X86-NEXT: movb {{[0-9]+}}(%esp), %dh
-; X86-NEXT: addb %dh, %dh
-; X86-NEXT: sarb %dh
-; X86-NEXT: cmpb %al, %dh
-; X86-NEXT: setl %al
-; X86-NEXT: setg %dh
-; X86-NEXT: subb %al, %dh
-; X86-NEXT: movsbl %dh, %esi
-; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: sarl $31, %esi
-; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: andl $2097151, %esi # imm = 0x1FFFFF
-; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: cmpb %bl, %bh
-; X86-NEXT: setl %al
-; X86-NEXT: setg %dh
-; X86-NEXT: subb %al, %dh
-; X86-NEXT: movsbl %dh, %esi
+; X86-NEXT: setl %bl
+; X86-NEXT: setg %bh
+; X86-NEXT: subb %bl, %bh
+; X86-NEXT: movsbl %bh, %esi
; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: sarl $31, %esi
; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: andl $2097151, %esi # imm = 0x1FFFFF
; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: cmpb %cl, %ch
+; X86-NEXT: setl %cl
+; X86-NEXT: setg %ch
+; X86-NEXT: subb %cl, %ch
+; X86-NEXT: movsbl %ch, %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: sarl $31, %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: andl $2097151, %ecx # imm = 0x1FFFFF
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: cmpb %al, %ah
; X86-NEXT: setl %al
; X86-NEXT: setg %cl
; X86-NEXT: subb %al, %cl
; X86-NEXT: movsbl %cl, %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
-; X86-NEXT: movl %ecx, (%ebp)
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: movl %ecx, (%edi)
; X86-NEXT: sarl $31, %ecx
-; X86-NEXT: movl %ecx, %esi
-; X86-NEXT: andl $2097151, %esi # imm = 0x1FFFFF
-; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: cmpb %dl, %ah
+; X86-NEXT: movl %ecx, %eax
+; X86-NEXT: andl $2097151, %eax # imm = 0x1FFFFF
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: cmpb %dh, %dl
; X86-NEXT: setl %al
; X86-NEXT: setg %dl
; X86-NEXT: subb %al, %dl
-; X86-NEXT: movsbl %dl, %edi
-; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: sarl $31, %edi
+; X86-NEXT: movsbl %dl, %ebp
+; X86-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: sarl $31, %ebp
; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
; X86-NEXT: cmpb {{[-0-9]+}}(%e{{[sb]}}p), %al # 1-byte Folded Reload
; X86-NEXT: setl %al
@@ -1701,91 +1698,81 @@ define <7 x i117> @scmp_uncommon_vectors(<7 x i7> %x, <7 x i7> %y) nounwind {
; X86-NEXT: setl %dl
; X86-NEXT: setg %dh
; X86-NEXT: subb %dl, %dh
-; X86-NEXT: movsbl %dh, %edx
-; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: sarl $31, %edx
-; X86-NEXT: movl %edx, 96(%ebp)
-; X86-NEXT: movl %edx, 92(%ebp)
-; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
-; X86-NEXT: movl %ebx, 80(%ebp)
-; X86-NEXT: movl %eax, 68(%ebp)
-; X86-NEXT: movl %eax, 64(%ebp)
-; X86-NEXT: movl %esi, 52(%ebp)
-; X86-NEXT: movl %esi, 48(%ebp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
-; X86-NEXT: movl %ebx, 36(%ebp)
-; X86-NEXT: movl %edi, 24(%ebp)
-; X86-NEXT: movl %edi, 20(%ebp)
-; X86-NEXT: movl %ecx, 8(%ebp)
-; X86-NEXT: movl %ecx, 4(%ebp)
-; X86-NEXT: movl %edx, %ecx
+; X86-NEXT: movsbl %dh, %ebx
+; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: sarl $31, %ebx
+; X86-NEXT: movl %ebx, 96(%edi)
+; X86-NEXT: movl %ebx, 92(%edi)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT: movl %edx, 80(%edi)
+; X86-NEXT: movl %eax, 68(%edi)
+; X86-NEXT: movl %eax, 64(%edi)
+; X86-NEXT: movl %esi, 52(%edi)
+; X86-NEXT: movl %esi, 48(%edi)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT: movl %edx, 36(%edi)
+; X86-NEXT: movl %ebp, 24(%edi)
+; X86-NEXT: movl %ebp, 20(%edi)
+; X86-NEXT: movl %ecx, 8(%edi)
+; X86-NEXT: movl %ecx, 4(%edi)
+; X86-NEXT: movl %ebx, %ecx
+; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movw %cx, 100(%edi)
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
; X86-NEXT: shldl $30, %edx, %ecx
-; X86-NEXT: movl %ecx, 88(%ebp)
-; X86-NEXT: movl %ebp, %ebx
+; X86-NEXT: movl %ecx, 88(%edi)
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X86-NEXT: shldl $9, %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
-; X86-NEXT: shldl $9, %ebp, %ecx
-; X86-NEXT: movl %ebx, %ebp
-; X86-NEXT: movl %ecx, 76(%ebx)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT: shldl $9, %edx, %ecx
+; X86-NEXT: movl %ecx, 76(%edi)
; X86-NEXT: movl %eax, %ecx
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
-; X86-NEXT: shldl $20, %ebx, %ecx
-; X86-NEXT: movl %ecx, 60(%ebp)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT: shldl $20, %edx, %ecx
+; X86-NEXT: movl %ecx, 60(%edi)
; X86-NEXT: movl %esi, %ecx
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
-; X86-NEXT: shldl $31, %ebx, %ecx
-; X86-NEXT: movl %ecx, 44(%ebp)
-; X86-NEXT: movl %ebp, %ebx
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT: shldl $31, %edx, %ecx
+; X86-NEXT: movl %ecx, 44(%edi)
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X86-NEXT: shldl $10, %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
-; X86-NEXT: shldl $10, %ebp, %ecx
-; X86-NEXT: movl %ecx, 32(%ebx)
-; X86-NEXT: movl %edi, %ecx
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
-; X86-NEXT: shldl $21, %ebp, %ecx
-; X86-NEXT: movl %ecx, 16(%ebx)
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
-; X86-NEXT: movl %edx, %ecx
-; X86-NEXT: shrl $2, %ecx
-; X86-NEXT: movw %cx, 100(%ebx)
-; X86-NEXT: shll $21, %ebp
-; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
-; X86-NEXT: movl %ebp, 12(%ebx)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X86-NEXT: shll $30, %ecx
-; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
-; X86-NEXT: movl %ecx, 84(%ebx)
+; X86-NEXT: shldl $10, %edx, %ecx
+; X86-NEXT: movl %ecx, 32(%edi)
+; X86-NEXT: movl %ebp, %ecx
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT: shldl $21, %ebx, %ecx
+; X86-NEXT: movl %ecx, 16(%edi)
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X86-NEXT: shll $9, %ecx
-; X86-NEXT: shrl $12, %eax
; X86-NEXT: andl $511, %eax # imm = 0x1FF
; X86-NEXT: orl %ecx, %eax
-; X86-NEXT: movl %eax, 72(%ebx)
+; X86-NEXT: movl %eax, 72(%edi)
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X86-NEXT: shll $20, %eax
-; X86-NEXT: shrl %esi
; X86-NEXT: andl $1048575, %esi # imm = 0xFFFFF
; X86-NEXT: orl %eax, %esi
-; X86-NEXT: movl %esi, 56(%ebx)
+; X86-NEXT: movl %esi, 56(%edi)
+; X86-NEXT: shll $10, %edx
+; X86-NEXT: andl $1023, %ebp # imm = 0x3FF
+; X86-NEXT: orl %edx, %ebp
+; X86-NEXT: movl %ebp, 28(%edi)
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: shll $31, %eax
+; X86-NEXT: shll $21, %eax
; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
-; X86-NEXT: movl %eax, 40(%ebx)
+; X86-NEXT: movl %eax, 12(%edi)
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: shll $10, %eax
-; X86-NEXT: shrl $11, %edi
-; X86-NEXT: andl $1023, %edi # imm = 0x3FF
-; X86-NEXT: orl %eax, %edi
-; X86-NEXT: movl %edi, 28(%ebx)
-; X86-NEXT: movl %edx, %eax
-; X86-NEXT: shrl $18, %eax
; X86-NEXT: andl $7, %eax
-; X86-NEXT: movb %al, 102(%ebx)
-; X86-NEXT: movl %ebx, %eax
+; X86-NEXT: movb %al, 102(%edi)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: shll $30, %eax
+; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT: movl %eax, 84(%edi)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: shll $31, %eax
+; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT: movl %eax, 40(%edi)
+; X86-NEXT: movl %edi, %eax
; X86-NEXT: addl $52, %esp
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
diff --git a/llvm/test/CodeGen/X86/sdiv_fix_sat.ll b/llvm/test/CodeGen/X86/sdiv_fix_sat.ll
index e7727a0ab6178c..8eb32cbe879c8c 100644
--- a/llvm/test/CodeGen/X86/sdiv_fix_sat.ll
+++ b/llvm/test/CodeGen/X86/sdiv_fix_sat.ll
@@ -572,9 +572,9 @@ define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind {
; X64-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
; X64-NEXT: movdqa %xmm3, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; X64-NEXT: movq %xmm3, %rbx
-; X64-NEXT: movq %rbx, %r13
-; X64-NEXT: sarq $63, %r13
-; X64-NEXT: shldq $31, %rbx, %r13
+; X64-NEXT: movq %rbx, %r14
+; X64-NEXT: sarq $63, %r14
+; X64-NEXT: shldq $31, %rbx, %r14
; X64-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
; X64-NEXT: pxor %xmm0, %xmm0
; X64-NEXT: pcmpgtd %xmm1, %xmm0
@@ -582,94 +582,100 @@ define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind {
; X64-NEXT: movdqa %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; X64-NEXT: movq %xmm1, %rdx
; X64-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; X64-NEXT: movq %rdx, %r15
-; X64-NEXT: sarq $63, %r15
-; X64-NEXT: movq %rbx, %r12
-; X64-NEXT: shlq $31, %r12
-; X64-NEXT: movq %r12, %rdi
-; X64-NEXT: movq %r13, %rsi
-; X64-NEXT: movq %r15, %rcx
+; X64-NEXT: movq %rdx, %r12
+; X64-NEXT: sarq $63, %r12
+; X64-NEXT: movq %rbx, %r13
+; X64-NEXT: shlq $31, %r13
+; X64-NEXT: movq %r13, %rdi
+; X64-NEXT: movq %r14, %rsi
+; X64-NEXT: movq %r12, %rcx
; X64-NEXT: callq __divti3 at PLT
; X64-NEXT: movq %rax, %rbp
; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; X64-NEXT: movq %rdx, %r14
+; X64-NEXT: movq %rdx, %r15
; X64-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; X64-NEXT: subq $1, %rbp
-; X64-NEXT: sbbq $0, %r14
-; X64-NEXT: shrq $63, %rbx
-; X64-NEXT: xorl %r15d, %ebx
-; X64-NEXT: movq %r12, %rdi
-; X64-NEXT: movq %r13, %rsi
+; X64-NEXT: sbbq $0, %r15
; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
-; X64-NEXT: movq %r15, %rcx
+; X64-NEXT: movq %rdx, %rax
+; X64-NEXT: shrq $63, %rax
+; X64-NEXT: shrq $63, %rbx
+; X64-NEXT: xorl %eax, %ebx
+; X64-NEXT: movq %r13, %rdi
+; X64-NEXT: movq %r14, %rsi
+; X64-NEXT: movq %r12, %rcx
; X64-NEXT: callq __modti3 at PLT
+; X64-NEXT: xorl %ecx, %ecx
; X64-NEXT: orq %rax, %rdx
-; X64-NEXT: setne %al
-; X64-NEXT: testb %bl, %al
-; X64-NEXT: cmoveq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Folded Reload
+; X64-NEXT: setne %cl
+; X64-NEXT: testl %ebx, %ecx
+; X64-NEXT: cmoveq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Folded Reload
; X64-NEXT: cmoveq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Folded Reload
; X64-NEXT: xorl %ecx, %ecx
; X64-NEXT: movl $4294967295, %edx # imm = 0xFFFFFFFF
; X64-NEXT: cmpq %rdx, %rbp
-; X64-NEXT: movq %r14, %rax
+; X64-NEXT: movq %r15, %rax
; X64-NEXT: sbbq $0, %rax
-; X64-NEXT: cmovgeq %rcx, %r14
+; X64-NEXT: cmovgeq %rcx, %r15
; X64-NEXT: cmovgeq %rdx, %rbp
; X64-NEXT: movabsq $-4294967296, %rcx # imm = 0xFFFFFFFF00000000
; X64-NEXT: cmpq %rbp, %rcx
; X64-NEXT: movq $-1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill
; X64-NEXT: movq $-1, %rax
-; X64-NEXT: sbbq %r14, %rax
+; X64-NEXT: sbbq %r15, %rax
; X64-NEXT: cmovgeq %rcx, %rbp
; X64-NEXT: movq %rbp, %xmm0
; X64-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; X64-NEXT: pshufd $238, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
; X64-NEXT: # xmm0 = mem[2,3,2,3]
; X64-NEXT: movq %xmm0, %rbx
-; X64-NEXT: movq %rbx, %r13
-; X64-NEXT: sarq $63, %r13
-; X64-NEXT: shldq $31, %rbx, %r13
+; X64-NEXT: movq %rbx, %r14
+; X64-NEXT: sarq $63, %r14
+; X64-NEXT: shldq $31, %rbx, %r14
; X64-NEXT: pshufd $238, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
; X64-NEXT: # xmm0 = mem[2,3,2,3]
; X64-NEXT: movq %xmm0, %rdx
; X64-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; X64-NEXT: movq %rdx, %r15
-; X64-NEXT: sarq $63, %r15
-; X64-NEXT: movq %rbx, %r12
-; X64-NEXT: shlq $31, %r12
-; X64-NEXT: movq %r12, %rdi
-; X64-NEXT: movq %r13, %rsi
-; X64-NEXT: movq %r15, %rcx
+; X64-NEXT: movq %rdx, %r12
+; X64-NEXT: sarq $63, %r12
+; X64-NEXT: movq %rbx, %r13
+; X64-NEXT: shlq $31, %r13
+; X64-NEXT: movq %r13, %rdi
+; X64-NEXT: movq %r14, %rsi
+; X64-NEXT: movq %r12, %rcx
; X64-NEXT: callq __divti3 at PLT
; X64-NEXT: movq %rax, %rbp
; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; X64-NEXT: movq %rdx, %r14
+; X64-NEXT: movq %rdx, %r15
; X64-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; X64-NEXT: subq $1, %rbp
-; X64-NEXT: sbbq $0, %r14
-; X64-NEXT: shrq $63, %rbx
-; X64-NEXT: xorl %r15d, %ebx
-; X64-NEXT: movq %r12, %rdi
-; X64-NEXT: movq %r13, %rsi
+; X64-NEXT: sbbq $0, %r15
; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
-; X64-NEXT: movq %r15, %rcx
+; X64-NEXT: movq %rdx, %rax
+; X64-NEXT: shrq $63, %rax
+; X64-NEXT: shrq $63, %rbx
+; X64-NEXT: xorl %eax, %ebx
+; X64-NEXT: movq %r13, %rdi
+; X64-NEXT: movq %r14, %rsi
+; X64-NEXT: movq %r12, %rcx
; X64-NEXT: callq __modti3 at PLT
+; X64-NEXT: xorl %ecx, %ecx
; X64-NEXT: orq %rax, %rdx
-; X64-NEXT: setne %al
-; X64-NEXT: testb %bl, %al
-; X64-NEXT: cmoveq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Folded Reload
+; X64-NEXT: setne %cl
+; X64-NEXT: testl %ebx, %ecx
+; X64-NEXT: cmoveq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Folded Reload
; X64-NEXT: cmoveq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Folded Reload
; X64-NEXT: movl $4294967295, %ecx # imm = 0xFFFFFFFF
; X64-NEXT: cmpq %rcx, %rbp
-; X64-NEXT: movq %r14, %rax
+; X64-NEXT: movq %r15, %rax
; X64-NEXT: sbbq $0, %rax
; X64-NEXT: movl $0, %eax
-; X64-NEXT: cmovgeq %rax, %r14
+; X64-NEXT: cmovgeq %rax, %r15
; X64-NEXT: cmovgeq %rcx, %rbp
; X64-NEXT: movabsq $-4294967296, %rcx # imm = 0xFFFFFFFF00000000
; X64-NEXT: cmpq %rbp, %rcx
; X64-NEXT: movq $-1, %rax
-; X64-NEXT: sbbq %r14, %rax
+; X64-NEXT: sbbq %r15, %rax
; X64-NEXT: cmovgeq %rcx, %rbp
; X64-NEXT: movq %rbp, %xmm0
; X64-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
@@ -685,9 +691,9 @@ define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind {
; X64-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; X64-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; X64-NEXT: movq %xmm0, %rbx
-; X64-NEXT: movq %rbx, %r13
-; X64-NEXT: sarq $63, %r13
-; X64-NEXT: shldq $31, %rbx, %r13
+; X64-NEXT: movq %rbx, %r14
+; X64-NEXT: sarq $63, %r14
+; X64-NEXT: shldq $31, %rbx, %r14
; X64-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
; X64-NEXT: pxor %xmm1, %xmm1
; X64-NEXT: pcmpgtd %xmm0, %xmm1
@@ -695,92 +701,98 @@ define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind {
; X64-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; X64-NEXT: movq %xmm0, %rdx
; X64-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; X64-NEXT: movq %rdx, %r15
-; X64-NEXT: sarq $63, %r15
-; X64-NEXT: movq %rbx, %r12
-; X64-NEXT: shlq $31, %r12
-; X64-NEXT: movq %r12, %rdi
-; X64-NEXT: movq %r13, %rsi
-; X64-NEXT: movq %r15, %rcx
+; X64-NEXT: movq %rdx, %r12
+; X64-NEXT: sarq $63, %r12
+; X64-NEXT: movq %rbx, %r13
+; X64-NEXT: shlq $31, %r13
+; X64-NEXT: movq %r13, %rdi
+; X64-NEXT: movq %r14, %rsi
+; X64-NEXT: movq %r12, %rcx
; X64-NEXT: callq __divti3 at PLT
; X64-NEXT: movq %rax, %rbp
; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; X64-NEXT: movq %rdx, %r14
+; X64-NEXT: movq %rdx, %r15
; X64-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; X64-NEXT: subq $1, %rbp
-; X64-NEXT: sbbq $0, %r14
-; X64-NEXT: shrq $63, %rbx
-; X64-NEXT: xorl %r15d, %ebx
-; X64-NEXT: movq %r12, %rdi
-; X64-NEXT: movq %r13, %rsi
+; X64-NEXT: sbbq $0, %r15
; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
-; X64-NEXT: movq %r15, %rcx
+; X64-NEXT: movq %rdx, %rax
+; X64-NEXT: shrq $63, %rax
+; X64-NEXT: shrq $63, %rbx
+; X64-NEXT: xorl %eax, %ebx
+; X64-NEXT: movq %r13, %rdi
+; X64-NEXT: movq %r14, %rsi
+; X64-NEXT: movq %r12, %rcx
; X64-NEXT: callq __modti3 at PLT
+; X64-NEXT: xorl %ecx, %ecx
; X64-NEXT: orq %rax, %rdx
-; X64-NEXT: setne %al
-; X64-NEXT: testb %bl, %al
-; X64-NEXT: cmoveq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Folded Reload
+; X64-NEXT: setne %cl
+; X64-NEXT: testl %ebx, %ecx
+; X64-NEXT: cmoveq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Folded Reload
; X64-NEXT: cmoveq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Folded Reload
; X64-NEXT: movl $4294967295, %ecx # imm = 0xFFFFFFFF
; X64-NEXT: cmpq %rcx, %rbp
-; X64-NEXT: movq %r14, %rax
+; X64-NEXT: movq %r15, %rax
; X64-NEXT: sbbq $0, %rax
; X64-NEXT: movl $0, %eax
-; X64-NEXT: cmovgeq %rax, %r14
+; X64-NEXT: cmovgeq %rax, %r15
; X64-NEXT: cmovgeq %rcx, %rbp
; X64-NEXT: movabsq $-4294967296, %rcx # imm = 0xFFFFFFFF00000000
; X64-NEXT: cmpq %rbp, %rcx
; X64-NEXT: movq $-1, %rax
-; X64-NEXT: sbbq %r14, %rax
+; X64-NEXT: sbbq %r15, %rax
; X64-NEXT: cmovgeq %rcx, %rbp
; X64-NEXT: movq %rbp, %xmm0
; X64-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; X64-NEXT: pshufd $238, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
; X64-NEXT: # xmm0 = mem[2,3,2,3]
; X64-NEXT: movq %xmm0, %rbx
-; X64-NEXT: movq %rbx, %r13
-; X64-NEXT: sarq $63, %r13
-; X64-NEXT: shldq $31, %rbx, %r13
+; X64-NEXT: movq %rbx, %r14
+; X64-NEXT: sarq $63, %r14
+; X64-NEXT: shldq $31, %rbx, %r14
; X64-NEXT: pshufd $238, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
; X64-NEXT: # xmm0 = mem[2,3,2,3]
; X64-NEXT: movq %xmm0, %rdx
; X64-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; X64-NEXT: movq %rdx, %r15
-; X64-NEXT: sarq $63, %r15
-; X64-NEXT: movq %rbx, %r12
-; X64-NEXT: shlq $31, %r12
-; X64-NEXT: movq %r12, %rdi
-; X64-NEXT: movq %r13, %rsi
-; X64-NEXT: movq %r15, %rcx
+; X64-NEXT: movq %rdx, %r12
+; X64-NEXT: sarq $63, %r12
+; X64-NEXT: movq %rbx, %r13
+; X64-NEXT: shlq $31, %r13
+; X64-NEXT: movq %r13, %rdi
+; X64-NEXT: movq %r14, %rsi
+; X64-NEXT: movq %r12, %rcx
; X64-NEXT: callq __divti3 at PLT
; X64-NEXT: movq %rax, %rbp
; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; X64-NEXT: movq %rdx, %r14
+; X64-NEXT: movq %rdx, %r15
; X64-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; X64-NEXT: subq $1, %rbp
-; X64-NEXT: sbbq $0, %r14
-; X64-NEXT: shrq $63, %rbx
-; X64-NEXT: xorl %r15d, %ebx
-; X64-NEXT: movq %r12, %rdi
-; X64-NEXT: movq %r13, %rsi
+; X64-NEXT: sbbq $0, %r15
; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
-; X64-NEXT: movq %r15, %rcx
+; X64-NEXT: movq %rdx, %rax
+; X64-NEXT: shrq $63, %rax
+; X64-NEXT: shrq $63, %rbx
+; X64-NEXT: xorl %eax, %ebx
+; X64-NEXT: movq %r13, %rdi
+; X64-NEXT: movq %r14, %rsi
+; X64-NEXT: movq %r12, %rcx
; X64-NEXT: callq __modti3 at PLT
+; X64-NEXT: xorl %ecx, %ecx
; X64-NEXT: orq %rax, %rdx
-; X64-NEXT: setne %al
-; X64-NEXT: testb %bl, %al
-; X64-NEXT: cmoveq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Folded Reload
+; X64-NEXT: setne %cl
+; X64-NEXT: testl %ebx, %ecx
+; X64-NEXT: cmoveq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Folded Reload
; X64-NEXT: cmoveq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Folded Reload
; X64-NEXT: movl $4294967295, %ecx # imm = 0xFFFFFFFF
; X64-NEXT: cmpq %rcx, %rbp
-; X64-NEXT: movq %r14, %rax
+; X64-NEXT: movq %r15, %rax
; X64-NEXT: sbbq $0, %rax
; X64-NEXT: movl $0, %eax
-; X64-NEXT: cmovgeq %rax, %r14
+; X64-NEXT: cmovgeq %rax, %r15
; X64-NEXT: cmovgeq %rcx, %rbp
; X64-NEXT: movabsq $-4294967296, %rax # imm = 0xFFFFFFFF00000000
; X64-NEXT: cmpq %rbp, %rax
-; X64-NEXT: sbbq %r14, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill
+; X64-NEXT: sbbq %r15, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill
; X64-NEXT: cmovgeq %rax, %rbp
; X64-NEXT: movq %rbp, %xmm1
; X64-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
diff --git a/llvm/test/CodeGen/X86/srem-seteq-illegal-types.ll b/llvm/test/CodeGen/X86/srem-seteq-illegal-types.ll
index d644ed87c3c108..cc4bda81bef527 100644
--- a/llvm/test/CodeGen/X86/srem-seteq-illegal-types.ll
+++ b/llvm/test/CodeGen/X86/srem-seteq-illegal-types.ll
@@ -82,8 +82,7 @@ define i1 @test_srem_pow2_setne(i6 %X) nounwind {
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl %eax, %ecx
; X86-NEXT: shlb $2, %cl
-; X86-NEXT: sarb $5, %cl
-; X86-NEXT: shrb $4, %cl
+; X86-NEXT: sarb $7, %cl
; X86-NEXT: andb $3, %cl
; X86-NEXT: addb %al, %cl
; X86-NEXT: andb $60, %cl
@@ -96,8 +95,7 @@ define i1 @test_srem_pow2_setne(i6 %X) nounwind {
; X64: # %bb.0:
; X64-NEXT: # kill: def $edi killed $edi def $rdi
; X64-NEXT: leal (,%rdi,4), %eax
-; X64-NEXT: sarb $5, %al
-; X64-NEXT: shrb $4, %al
+; X64-NEXT: sarb $7, %al
; X64-NEXT: andb $3, %al
; X64-NEXT: addb %dil, %al
; X64-NEXT: andb $60, %al
>From 8f10bbf6396163e4d43a29daeb5b39cfe1e96731 Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper at sifive.com>
Date: Fri, 2 Aug 2024 13:59:27 -0700
Subject: [PATCH 2/4] fixup! shr -> srl
---
llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index ea1102a14cc593..cced97ac3e6aa1 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -1958,7 +1958,7 @@ bool TargetLowering::SimplifyDemandedBits(
}
}
- // If this is (shr (sra X, C1), ShAmt), see if we can combine this into a
+ // If this is (srl (sra X, C1), ShAmt), see if we can combine this into a
// single sra. We can do this if the top bits are never demanded.
if (Op0.getOpcode() == ISD::SRA) {
if (!DemandedBits.intersects(APInt::getHighBitsSet(BitWidth, ShAmt))) {
>From 2f7137beda811bc3f0d61779b9efb46f539d6330 Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper at sifive.com>
Date: Fri, 2 Aug 2024 16:27:50 -0700
Subject: [PATCH 3/4] fixup! Add one use check
---
.../CodeGen/SelectionDAG/TargetLowering.cpp | 2 +-
.../CodeGen/AMDGPU/div-rem-by-constant-64.ll | 40 ++--
llvm/test/CodeGen/AMDGPU/div_i128.ll | 68 +++---
llvm/test/CodeGen/AMDGPU/function-returns.ll | 12 +-
llvm/test/CodeGen/AMDGPU/itofp.i128.ll | 8 +-
.../AMDGPU/load-range-metadata-sign-bits.ll | 5 +-
llvm/test/CodeGen/AMDGPU/sdiv64.ll | 14 +-
llvm/test/CodeGen/AMDGPU/shift-i128.ll | 4 +-
llvm/test/CodeGen/NVPTX/idioms.ll | 4 +-
llvm/test/CodeGen/X86/scmp.ll | 181 ++++++++--------
llvm/test/CodeGen/X86/sdiv_fix_sat.ll | 196 ++++++++----------
11 files changed, 262 insertions(+), 272 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index cced97ac3e6aa1..c2d2c4d263e3a2 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -1960,7 +1960,7 @@ bool TargetLowering::SimplifyDemandedBits(
// If this is (srl (sra X, C1), ShAmt), see if we can combine this into a
// single sra. We can do this if the top bits are never demanded.
- if (Op0.getOpcode() == ISD::SRA) {
+ if (Op0.getOpcode() == ISD::SRA && Op0.hasOneUse()) {
if (!DemandedBits.intersects(APInt::getHighBitsSet(BitWidth, ShAmt))) {
if (std::optional<uint64_t> InnerSA =
TLO.DAG.getValidShiftAmount(Op0, DemandedElts, Depth + 2)) {
diff --git a/llvm/test/CodeGen/AMDGPU/div-rem-by-constant-64.ll b/llvm/test/CodeGen/AMDGPU/div-rem-by-constant-64.ll
index dd888433e32e8d..113c6d01c99a16 100644
--- a/llvm/test/CodeGen/AMDGPU/div-rem-by-constant-64.ll
+++ b/llvm/test/CodeGen/AMDGPU/div-rem-by-constant-64.ll
@@ -661,10 +661,9 @@ define noundef i64 @sdiv64_2(i64 noundef %i) {
; GFX9: ; %bb.0: ; %entry
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_lshrrev_b32_e32 v2, 31, v1
-; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v0, v2
-; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v1, vcc
-; GFX9-NEXT: v_alignbit_b32 v0, v3, v2, 1
-; GFX9-NEXT: v_ashrrev_i64 v[1:2], 33, v[2:3]
+; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-NEXT: v_ashrrev_i64 v[0:1], 1, v[0:1]
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: sdiv64_2:
@@ -672,20 +671,17 @@ define noundef i64 @sdiv64_2(i64 noundef %i) {
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: v_lshrrev_b32_e32 v2, 31, v1
; GFX942-NEXT: v_mov_b32_e32 v3, 0
-; GFX942-NEXT: v_lshl_add_u64 v[2:3], v[0:1], 0, v[2:3]
-; GFX942-NEXT: v_alignbit_b32 v0, v3, v2, 1
-; GFX942-NEXT: v_ashrrev_i64 v[2:3], 33, v[2:3]
-; GFX942-NEXT: v_mov_b32_e32 v1, v2
+; GFX942-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
+; GFX942-NEXT: v_ashrrev_i64 v[0:1], 1, v[0:1]
; GFX942-NEXT: s_setpc_b64 s[30:31]
;
; GFX1030-LABEL: sdiv64_2:
; GFX1030: ; %bb.0: ; %entry
; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1030-NEXT: v_lshrrev_b32_e32 v2, 31, v1
-; GFX1030-NEXT: v_add_co_u32 v2, vcc_lo, v0, v2
-; GFX1030-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, 0, v1, vcc_lo
-; GFX1030-NEXT: v_alignbit_b32 v0, v3, v2, 1
-; GFX1030-NEXT: v_ashrrev_i64 v[1:2], 33, v[2:3]
+; GFX1030-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX1030-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX1030-NEXT: v_ashrrev_i64 v[0:1], 1, v[0:1]
; GFX1030-NEXT: s_setpc_b64 s[30:31]
entry:
%div = sdiv i64 %i, 2
@@ -792,10 +788,9 @@ define noundef i64 @sdiv64_64(i64 noundef %i) {
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_ashrrev_i32_e32 v2, 31, v1
; GFX9-NEXT: v_lshrrev_b32_e32 v2, 26, v2
-; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v0, v2
-; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v1, vcc
-; GFX9-NEXT: v_alignbit_b32 v0, v3, v2, 6
-; GFX9-NEXT: v_ashrrev_i64 v[1:2], 38, v[2:3]
+; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-NEXT: v_ashrrev_i64 v[0:1], 6, v[0:1]
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: sdiv64_64:
@@ -804,10 +799,8 @@ define noundef i64 @sdiv64_64(i64 noundef %i) {
; GFX942-NEXT: v_ashrrev_i32_e32 v2, 31, v1
; GFX942-NEXT: v_lshrrev_b32_e32 v2, 26, v2
; GFX942-NEXT: v_mov_b32_e32 v3, 0
-; GFX942-NEXT: v_lshl_add_u64 v[2:3], v[0:1], 0, v[2:3]
-; GFX942-NEXT: v_alignbit_b32 v0, v3, v2, 6
-; GFX942-NEXT: v_ashrrev_i64 v[2:3], 38, v[2:3]
-; GFX942-NEXT: v_mov_b32_e32 v1, v2
+; GFX942-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
+; GFX942-NEXT: v_ashrrev_i64 v[0:1], 6, v[0:1]
; GFX942-NEXT: s_setpc_b64 s[30:31]
;
; GFX1030-LABEL: sdiv64_64:
@@ -815,10 +808,9 @@ define noundef i64 @sdiv64_64(i64 noundef %i) {
; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1030-NEXT: v_ashrrev_i32_e32 v2, 31, v1
; GFX1030-NEXT: v_lshrrev_b32_e32 v2, 26, v2
-; GFX1030-NEXT: v_add_co_u32 v2, vcc_lo, v0, v2
-; GFX1030-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, 0, v1, vcc_lo
-; GFX1030-NEXT: v_alignbit_b32 v0, v3, v2, 6
-; GFX1030-NEXT: v_ashrrev_i64 v[1:2], 38, v[2:3]
+; GFX1030-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX1030-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; GFX1030-NEXT: v_ashrrev_i64 v[0:1], 6, v[0:1]
; GFX1030-NEXT: s_setpc_b64 s[30:31]
entry:
%div = sdiv i64 %i, 64
diff --git a/llvm/test/CodeGen/AMDGPU/div_i128.ll b/llvm/test/CodeGen/AMDGPU/div_i128.ll
index 071aae98c96853..fea1303d0a2b76 100644
--- a/llvm/test/CodeGen/AMDGPU/div_i128.ll
+++ b/llvm/test/CodeGen/AMDGPU/div_i128.ll
@@ -4448,14 +4448,13 @@ define i128 @v_sdiv_i128_v_pow2k(i128 %lhs) {
; GFX9-NEXT: v_mov_b32_e32 v5, v4
; GFX9-NEXT: v_lshrrev_b64 v[4:5], 31, v[4:5]
; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v4
-; GFX9-NEXT: v_addc_co_u32_e32 v6, vcc, v1, v5, vcc
-; GFX9-NEXT: v_addc_co_u32_e32 v4, vcc, 0, v2, vcc
-; GFX9-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v3, vcc
-; GFX9-NEXT: v_lshlrev_b64 v[0:1], 31, v[4:5]
-; GFX9-NEXT: v_lshrrev_b32_e32 v2, 1, v6
-; GFX9-NEXT: v_or_b32_e32 v0, v2, v0
-; GFX9-NEXT: v_ashrrev_i64 v[2:3], 33, v[4:5]
-; GFX9-NEXT: v_ashrrev_i32_e32 v3, 31, v5
+; GFX9-NEXT: v_addc_co_u32_e32 v4, vcc, v1, v5, vcc
+; GFX9-NEXT: v_addc_co_u32_e32 v2, vcc, 0, v2, vcc
+; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v3, vcc
+; GFX9-NEXT: v_lshlrev_b64 v[0:1], 31, v[2:3]
+; GFX9-NEXT: v_lshrrev_b32_e32 v4, 1, v4
+; GFX9-NEXT: v_ashrrev_i64 v[2:3], 33, v[2:3]
+; GFX9-NEXT: v_or_b32_e32 v0, v4, v0
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-O0-LABEL: v_sdiv_i128_v_pow2k:
@@ -4482,40 +4481,41 @@ define i128 @v_sdiv_i128_v_pow2k(i128 %lhs) {
; GFX9-O0-NEXT: v_mov_b32_e32 v1, v5
; GFX9-O0-NEXT: s_mov_b32 s4, 63
; GFX9-O0-NEXT: v_ashrrev_i64 v[4:5], s4, v[4:5]
-; GFX9-O0-NEXT: s_mov_b32 s6, 31
-; GFX9-O0-NEXT: v_lshrrev_b64 v[6:7], s6, v[4:5]
+; GFX9-O0-NEXT: s_mov_b32 s5, 31
+; GFX9-O0-NEXT: v_lshrrev_b64 v[6:7], s5, v[4:5]
; GFX9-O0-NEXT: v_mov_b32_e32 v5, v6
; GFX9-O0-NEXT: v_mov_b32_e32 v4, v7
; GFX9-O0-NEXT: s_mov_b64 s[8:9], 0
-; GFX9-O0-NEXT: s_mov_b32 s7, s8
-; GFX9-O0-NEXT: s_mov_b32 s5, s9
+; GFX9-O0-NEXT: s_mov_b32 s6, s8
+; GFX9-O0-NEXT: s_mov_b32 s4, s9
; GFX9-O0-NEXT: v_add_co_u32_e32 v0, vcc, v0, v5
-; GFX9-O0-NEXT: v_addc_co_u32_e32 v5, vcc, v3, v4, vcc
-; GFX9-O0-NEXT: v_mov_b32_e32 v3, s7
-; GFX9-O0-NEXT: v_addc_co_u32_e32 v3, vcc, v2, v3, vcc
-; GFX9-O0-NEXT: v_mov_b32_e32 v2, s5
+; GFX9-O0-NEXT: v_addc_co_u32_e32 v3, vcc, v3, v4, vcc
+; GFX9-O0-NEXT: v_mov_b32_e32 v4, s6
+; GFX9-O0-NEXT: v_addc_co_u32_e32 v5, vcc, v2, v4, vcc
+; GFX9-O0-NEXT: v_mov_b32_e32 v2, s4
; GFX9-O0-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v2, vcc
-; GFX9-O0-NEXT: ; implicit-def: $sgpr5
-; GFX9-O0-NEXT: ; implicit-def: $sgpr5
-; GFX9-O0-NEXT: ; kill: def $vgpr3 killed $vgpr3 def $vgpr3_vgpr4 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v4, v1
-; GFX9-O0-NEXT: v_mov_b32_e32 v2, v3
-; GFX9-O0-NEXT: ; implicit-def: $sgpr5
-; GFX9-O0-NEXT: ; implicit-def: $sgpr5
+; GFX9-O0-NEXT: ; implicit-def: $sgpr4
+; GFX9-O0-NEXT: ; implicit-def: $sgpr4
+; GFX9-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v6, v1
+; GFX9-O0-NEXT: v_mov_b32_e32 v2, v5
+; GFX9-O0-NEXT: ; implicit-def: $sgpr4
+; GFX9-O0-NEXT: ; implicit-def: $sgpr4
; GFX9-O0-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v1, v5
-; GFX9-O0-NEXT: s_mov_b32 s5, 33
-; GFX9-O0-NEXT: v_lshrrev_b64 v[0:1], s5, v[0:1]
+; GFX9-O0-NEXT: v_mov_b32_e32 v1, v3
+; GFX9-O0-NEXT: s_mov_b32 s4, 33
+; GFX9-O0-NEXT: v_lshrrev_b64 v[0:1], s4, v[0:1]
; GFX9-O0-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec
-; GFX9-O0-NEXT: v_lshl_or_b32 v0, v2, s6, v0
-; GFX9-O0-NEXT: v_mov_b32_e32 v6, v4
-; GFX9-O0-NEXT: v_mov_b32_e32 v5, v3
-; GFX9-O0-NEXT: v_ashrrev_i64 v[5:6], s5, v[5:6]
-; GFX9-O0-NEXT: v_mov_b32_e32 v1, v4
-; GFX9-O0-NEXT: s_mov_b32 s5, 1
-; GFX9-O0-NEXT: v_alignbit_b32 v1, v1, v2, s5
-; GFX9-O0-NEXT: v_mov_b32_e32 v2, v5
+; GFX9-O0-NEXT: v_lshl_or_b32 v0, v2, s5, v0
+; GFX9-O0-NEXT: v_mov_b32_e32 v3, v5
+; GFX9-O0-NEXT: v_mov_b32_e32 v4, v6
; GFX9-O0-NEXT: v_ashrrev_i64 v[3:4], s4, v[3:4]
+; GFX9-O0-NEXT: v_mov_b32_e32 v1, v6
+; GFX9-O0-NEXT: s_mov_b32 s4, 1
+; GFX9-O0-NEXT: v_alignbit_b32 v1, v1, v2, s4
+; GFX9-O0-NEXT: v_mov_b32_e32 v2, v3
+; GFX9-O0-NEXT: s_mov_b32 s4, 32
+; GFX9-O0-NEXT: v_lshrrev_b64 v[3:4], s4, v[3:4]
; GFX9-O0-NEXT: ; kill: def $vgpr3 killed $vgpr3 killed $vgpr3_vgpr4 killed $exec
; GFX9-O0-NEXT: s_setpc_b64 s[30:31]
;
diff --git a/llvm/test/CodeGen/AMDGPU/function-returns.ll b/llvm/test/CodeGen/AMDGPU/function-returns.ll
index 925987df8c8dea..401cbce00ac9a8 100644
--- a/llvm/test/CodeGen/AMDGPU/function-returns.ll
+++ b/llvm/test/CodeGen/AMDGPU/function-returns.ll
@@ -334,23 +334,23 @@ define signext i63 @i63_signext_func_void(i63 %val) #0 {
; CI-LABEL: i63_signext_func_void:
; CI: ; %bb.0:
; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CI-NEXT: v_lshl_b64 v[1:2], v[0:1], 1
-; CI-NEXT: v_ashr_i64 v[1:2], v[1:2], 33
+; CI-NEXT: v_lshl_b64 v[0:1], v[0:1], 1
+; CI-NEXT: v_ashr_i64 v[0:1], v[0:1], 1
; CI-NEXT: s_setpc_b64 s[30:31]
;
; GFX89-LABEL: i63_signext_func_void:
; GFX89: ; %bb.0:
; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX89-NEXT: v_lshlrev_b64 v[1:2], 1, v[0:1]
-; GFX89-NEXT: v_ashrrev_i64 v[1:2], 33, v[1:2]
+; GFX89-NEXT: v_lshlrev_b64 v[0:1], 1, v[0:1]
+; GFX89-NEXT: v_ashrrev_i64 v[0:1], 1, v[0:1]
; GFX89-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: i63_signext_func_void:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_lshlrev_b64 v[1:2], 1, v[0:1]
+; GFX11-NEXT: v_lshlrev_b64 v[0:1], 1, v[0:1]
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT: v_ashrrev_i64 v[1:2], 33, v[1:2]
+; GFX11-NEXT: v_ashrrev_i64 v[0:1], 1, v[0:1]
; GFX11-NEXT: s_setpc_b64 s[30:31]
ret i63 %val
}
diff --git a/llvm/test/CodeGen/AMDGPU/itofp.i128.ll b/llvm/test/CodeGen/AMDGPU/itofp.i128.ll
index 1fe8c0aabb6b6d..c5198cdb421a50 100644
--- a/llvm/test/CodeGen/AMDGPU/itofp.i128.ll
+++ b/llvm/test/CodeGen/AMDGPU/itofp.i128.ll
@@ -631,15 +631,17 @@ define double @sitofp_i128_to_f64(i128 %x) {
; SDAG-NEXT: v_and_or_b32 v0, v0, 1, v4
; SDAG-NEXT: v_add_co_u32_e32 v4, vcc, 1, v0
; SDAG-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v5, vcc
-; SDAG-NEXT: v_lshrrev_b64 v[0:1], 2, v[4:5]
; SDAG-NEXT: v_addc_co_u32_e32 v6, vcc, 0, v6, vcc
+; SDAG-NEXT: v_lshrrev_b64 v[0:1], 2, v[4:5]
+; SDAG-NEXT: v_lshlrev_b32_e32 v7, 30, v6
+; SDAG-NEXT: v_or_b32_e32 v10, v1, v7
; SDAG-NEXT: v_and_b32_e32 v1, 0x800000, v5
; SDAG-NEXT: v_cmp_ne_u32_e32 vcc, 0, v1
-; SDAG-NEXT: v_alignbit_b32 v10, v6, v5, 2
; SDAG-NEXT: s_and_saveexec_b64 s[4:5], vcc
; SDAG-NEXT: ; %bb.11: ; %itofp-if-then20
; SDAG-NEXT: v_lshrrev_b64 v[0:1], 3, v[4:5]
-; SDAG-NEXT: v_alignbit_b32 v10, v6, v5, 3
+; SDAG-NEXT: v_lshlrev_b32_e32 v2, 29, v6
+; SDAG-NEXT: v_or_b32_e32 v10, v1, v2
; SDAG-NEXT: v_mov_b32_e32 v2, v8
; SDAG-NEXT: ; %bb.12: ; %Flow
; SDAG-NEXT: s_or_b64 exec, exec, s[4:5]
diff --git a/llvm/test/CodeGen/AMDGPU/load-range-metadata-sign-bits.ll b/llvm/test/CodeGen/AMDGPU/load-range-metadata-sign-bits.ll
index 49b569bf2154e6..5fc1a87e71a1a6 100644
--- a/llvm/test/CodeGen/AMDGPU/load-range-metadata-sign-bits.ll
+++ b/llvm/test/CodeGen/AMDGPU/load-range-metadata-sign-bits.ll
@@ -110,11 +110,10 @@ define i64 @range_metadata_sext_i8_signed_range_i64(ptr addrspace(1) %ptr) {
; SDAG-LABEL: range_metadata_sext_i8_signed_range_i64:
; SDAG: ; %bb.0:
; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SDAG-NEXT: global_load_dwordx2 v[2:3], v[0:1], off glc
+; SDAG-NEXT: global_load_dwordx2 v[0:1], v[0:1], off glc
; SDAG-NEXT: s_waitcnt vmcnt(0)
-; SDAG-NEXT: v_lshlrev_b32_e32 v1, 23, v2
+; SDAG-NEXT: v_lshlrev_b32_e32 v1, 23, v0
; SDAG-NEXT: v_ashrrev_i64 v[0:1], 55, v[0:1]
-; SDAG-NEXT: v_bfe_i32 v1, v2, 8, 1
; SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GISEL-LABEL: range_metadata_sext_i8_signed_range_i64:
diff --git a/llvm/test/CodeGen/AMDGPU/sdiv64.ll b/llvm/test/CodeGen/AMDGPU/sdiv64.ll
index 8f9417f875e8e7..f4776747f16ac1 100644
--- a/llvm/test/CodeGen/AMDGPU/sdiv64.ll
+++ b/llvm/test/CodeGen/AMDGPU/sdiv64.ll
@@ -1759,10 +1759,9 @@ define i64 @v_test_sdiv_pow2_k_den_i64(i64 %x) {
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-NEXT: v_ashrrev_i32_e32 v2, 31, v1
; GCN-NEXT: v_lshrrev_b32_e32 v2, 17, v2
-; GCN-NEXT: v_add_i32_e32 v2, vcc, v0, v2
-; GCN-NEXT: v_addc_u32_e32 v3, vcc, 0, v1, vcc
-; GCN-NEXT: v_alignbit_b32 v0, v3, v2, 15
-; GCN-NEXT: v_ashr_i64 v[1:2], v[2:3], 47
+; GCN-NEXT: v_add_i32_e32 v0, vcc, v0, v2
+; GCN-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
+; GCN-NEXT: v_ashr_i64 v[0:1], v[0:1], 15
; GCN-NEXT: s_setpc_b64 s[30:31]
;
; GCN-IR-LABEL: v_test_sdiv_pow2_k_den_i64:
@@ -2065,10 +2064,9 @@ define i64 @v_test_sdiv24_pow2_k_den_i64(i64 %x) {
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-NEXT: v_ashr_i64 v[0:1], v[0:1], 40
; GCN-NEXT: v_lshrrev_b32_e32 v2, 17, v1
-; GCN-NEXT: v_add_i32_e32 v2, vcc, v0, v2
-; GCN-NEXT: v_addc_u32_e32 v3, vcc, 0, v1, vcc
-; GCN-NEXT: v_alignbit_b32 v0, v3, v2, 15
-; GCN-NEXT: v_ashr_i64 v[1:2], v[2:3], 47
+; GCN-NEXT: v_add_i32_e32 v0, vcc, v0, v2
+; GCN-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
+; GCN-NEXT: v_ashr_i64 v[0:1], v[0:1], 15
; GCN-NEXT: s_setpc_b64 s[30:31]
;
; GCN-IR-LABEL: v_test_sdiv24_pow2_k_den_i64:
diff --git a/llvm/test/CodeGen/AMDGPU/shift-i128.ll b/llvm/test/CodeGen/AMDGPU/shift-i128.ll
index d603b413414021..ebc916b5c889b5 100644
--- a/llvm/test/CodeGen/AMDGPU/shift-i128.ll
+++ b/llvm/test/CodeGen/AMDGPU/shift-i128.ll
@@ -113,10 +113,8 @@ define i128 @v_ashr_i128_vk(i128 %lhs) {
; GCN-NEXT: v_mov_b32_e32 v4, v1
; GCN-NEXT: v_lshl_b64 v[0:1], v[2:3], 31
; GCN-NEXT: v_lshrrev_b32_e32 v4, 1, v4
+; GCN-NEXT: v_ashr_i64 v[2:3], v[2:3], 33
; GCN-NEXT: v_or_b32_e32 v0, v4, v0
-; GCN-NEXT: v_ashr_i64 v[4:5], v[2:3], 33
-; GCN-NEXT: v_ashrrev_i32_e32 v3, 31, v3
-; GCN-NEXT: v_mov_b32_e32 v2, v4
; GCN-NEXT: s_setpc_b64 s[30:31]
%shl = ashr i128 %lhs, 33
ret i128 %shl
diff --git a/llvm/test/CodeGen/NVPTX/idioms.ll b/llvm/test/CodeGen/NVPTX/idioms.ll
index 7123b49430e2b7..e8fe47c303f92d 100644
--- a/llvm/test/CodeGen/NVPTX/idioms.ll
+++ b/llvm/test/CodeGen/NVPTX/idioms.ll
@@ -113,9 +113,9 @@ define %struct.S16 @i32_to_2xi16_shr(i32 noundef %i){
%h32 = ashr i32 %i1, 16
%h = trunc i32 %h32 to i16
; CHECK: ld.param.u32 %[[R32:r[0-9]+]], [i32_to_2xi16_shr_param_0];
-; CHECK: shr.s32 %[[R32H:r[0-9]+]], %[[R32]], 31;
-; CHECK: cvt.u16.u32 %rs{{[0-9+]}}, %[[R32H]]
+; CHECK: shr.s32 %[[R32H:r[0-9]+]], %[[R32]], 16;
; CHECK-DAG mov.b32 {tmp, %rs{{[0-9+]}}}, %[[R32]];
+; CHECK-DAG mov.b32 {tmp, %rs{{[0-9+]}}}, %[[R32H]];
%s0 = insertvalue %struct.S16 poison, i16 %l, 0
%s1 = insertvalue %struct.S16 %s0, i16 %h, 1
ret %struct.S16 %s1
diff --git a/llvm/test/CodeGen/X86/scmp.ll b/llvm/test/CodeGen/X86/scmp.ll
index 909dd1df42719e..7d4bbb06534e6c 100644
--- a/llvm/test/CodeGen/X86/scmp.ll
+++ b/llvm/test/CodeGen/X86/scmp.ll
@@ -1557,14 +1557,17 @@ define <7 x i117> @scmp_uncommon_vectors(<7 x i7> %x, <7 x i7> %y) nounwind {
; X64-NEXT: orq %rbx, %rdi
; X64-NEXT: movq %rdi, 24(%rax)
; X64-NEXT: shlq $9, %r14
+; X64-NEXT: shrq $44, %r10
; X64-NEXT: andl $511, %r10d # imm = 0x1FF
; X64-NEXT: orq %r14, %r10
; X64-NEXT: movq %r10, 72(%rax)
; X64-NEXT: shlq $20, %r9
+; X64-NEXT: shrq $33, %r11
; X64-NEXT: andl $1048575, %r11d # imm = 0xFFFFF
; X64-NEXT: orq %r9, %r11
; X64-NEXT: movq %r11, 56(%rax)
; X64-NEXT: shlq $31, %r8
+; X64-NEXT: shrq $22, %rcx
; X64-NEXT: andl $2147483647, %ecx # imm = 0x7FFFFFFF
; X64-NEXT: orq %r8, %rcx
; X64-NEXT: movq %rcx, 40(%rax)
@@ -1615,15 +1618,9 @@ define <7 x i117> @scmp_uncommon_vectors(<7 x i7> %x, <7 x i7> %y) nounwind {
; X86-NEXT: addb %al, %al
; X86-NEXT: sarb %al
; X86-NEXT: movb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
-; X86-NEXT: movb {{[0-9]+}}(%esp), %dh
-; X86-NEXT: addb %dh, %dh
-; X86-NEXT: sarb %dh
-; X86-NEXT: movb {{[0-9]+}}(%esp), %dl
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %edx
; X86-NEXT: addb %dl, %dl
; X86-NEXT: sarb %dl
-; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: addb %al, %al
-; X86-NEXT: sarb %al
; X86-NEXT: movb {{[0-9]+}}(%esp), %ah
; X86-NEXT: addb %ah, %ah
; X86-NEXT: sarb %ah
@@ -1639,44 +1636,50 @@ define <7 x i117> @scmp_uncommon_vectors(<7 x i7> %x, <7 x i7> %y) nounwind {
; X86-NEXT: movb {{[0-9]+}}(%esp), %bh
; X86-NEXT: addb %bh, %bh
; X86-NEXT: sarb %bh
+; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: addb %al, %al
+; X86-NEXT: sarb %al
+; X86-NEXT: movb {{[0-9]+}}(%esp), %dh
+; X86-NEXT: addb %dh, %dh
+; X86-NEXT: sarb %dh
+; X86-NEXT: cmpb %al, %dh
+; X86-NEXT: setl %al
+; X86-NEXT: setg %dh
+; X86-NEXT: subb %al, %dh
+; X86-NEXT: movsbl %dh, %esi
+; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: sarl $31, %esi
+; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: andl $2097151, %esi # imm = 0x1FFFFF
+; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: cmpb %bl, %bh
-; X86-NEXT: setl %bl
-; X86-NEXT: setg %bh
-; X86-NEXT: subb %bl, %bh
-; X86-NEXT: movsbl %bh, %esi
+; X86-NEXT: setl %al
+; X86-NEXT: setg %dh
+; X86-NEXT: subb %al, %dh
+; X86-NEXT: movsbl %dh, %esi
; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: sarl $31, %esi
; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: andl $2097151, %esi # imm = 0x1FFFFF
; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: cmpb %cl, %ch
-; X86-NEXT: setl %cl
-; X86-NEXT: setg %ch
-; X86-NEXT: subb %cl, %ch
-; X86-NEXT: movsbl %ch, %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: sarl $31, %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: andl $2097151, %ecx # imm = 0x1FFFFF
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: cmpb %al, %ah
; X86-NEXT: setl %al
; X86-NEXT: setg %cl
; X86-NEXT: subb %al, %cl
; X86-NEXT: movsbl %cl, %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: movl %ecx, (%edi)
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
+; X86-NEXT: movl %ecx, (%ebp)
; X86-NEXT: sarl $31, %ecx
-; X86-NEXT: movl %ecx, %eax
-; X86-NEXT: andl $2097151, %eax # imm = 0x1FFFFF
-; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: cmpb %dh, %dl
+; X86-NEXT: movl %ecx, %esi
+; X86-NEXT: andl $2097151, %esi # imm = 0x1FFFFF
+; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: cmpb %dl, %ah
; X86-NEXT: setl %al
; X86-NEXT: setg %dl
; X86-NEXT: subb %al, %dl
-; X86-NEXT: movsbl %dl, %ebp
-; X86-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: sarl $31, %ebp
+; X86-NEXT: movsbl %dl, %edi
+; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: sarl $31, %edi
; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
; X86-NEXT: cmpb {{[-0-9]+}}(%e{{[sb]}}p), %al # 1-byte Folded Reload
; X86-NEXT: setl %al
@@ -1698,81 +1701,91 @@ define <7 x i117> @scmp_uncommon_vectors(<7 x i7> %x, <7 x i7> %y) nounwind {
; X86-NEXT: setl %dl
; X86-NEXT: setg %dh
; X86-NEXT: subb %dl, %dh
-; X86-NEXT: movsbl %dh, %ebx
-; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: sarl $31, %ebx
-; X86-NEXT: movl %ebx, 96(%edi)
-; X86-NEXT: movl %ebx, 92(%edi)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
-; X86-NEXT: movl %edx, 80(%edi)
-; X86-NEXT: movl %eax, 68(%edi)
-; X86-NEXT: movl %eax, 64(%edi)
-; X86-NEXT: movl %esi, 52(%edi)
-; X86-NEXT: movl %esi, 48(%edi)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
-; X86-NEXT: movl %edx, 36(%edi)
-; X86-NEXT: movl %ebp, 24(%edi)
-; X86-NEXT: movl %ebp, 20(%edi)
-; X86-NEXT: movl %ecx, 8(%edi)
-; X86-NEXT: movl %ecx, 4(%edi)
-; X86-NEXT: movl %ebx, %ecx
-; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movw %cx, 100(%edi)
+; X86-NEXT: movsbl %dh, %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: sarl $31, %edx
+; X86-NEXT: movl %edx, 96(%ebp)
+; X86-NEXT: movl %edx, 92(%ebp)
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT: movl %ebx, 80(%ebp)
+; X86-NEXT: movl %eax, 68(%ebp)
+; X86-NEXT: movl %eax, 64(%ebp)
+; X86-NEXT: movl %esi, 52(%ebp)
+; X86-NEXT: movl %esi, 48(%ebp)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT: movl %ebx, 36(%ebp)
+; X86-NEXT: movl %edi, 24(%ebp)
+; X86-NEXT: movl %edi, 20(%ebp)
+; X86-NEXT: movl %ecx, 8(%ebp)
+; X86-NEXT: movl %ecx, 4(%ebp)
+; X86-NEXT: movl %edx, %ecx
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
; X86-NEXT: shldl $30, %edx, %ecx
-; X86-NEXT: movl %ecx, 88(%edi)
+; X86-NEXT: movl %ecx, 88(%ebp)
+; X86-NEXT: movl %ebp, %ebx
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X86-NEXT: shldl $9, %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
-; X86-NEXT: shldl $9, %edx, %ecx
-; X86-NEXT: movl %ecx, 76(%edi)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT: shldl $9, %ebp, %ecx
+; X86-NEXT: movl %ebx, %ebp
+; X86-NEXT: movl %ecx, 76(%ebx)
; X86-NEXT: movl %eax, %ecx
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
-; X86-NEXT: shldl $20, %edx, %ecx
-; X86-NEXT: movl %ecx, 60(%edi)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT: shldl $20, %ebx, %ecx
+; X86-NEXT: movl %ecx, 60(%ebp)
; X86-NEXT: movl %esi, %ecx
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
-; X86-NEXT: shldl $31, %edx, %ecx
-; X86-NEXT: movl %ecx, 44(%edi)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT: shldl $31, %ebx, %ecx
+; X86-NEXT: movl %ecx, 44(%ebp)
+; X86-NEXT: movl %ebp, %ebx
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X86-NEXT: shldl $10, %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT: shldl $10, %ebp, %ecx
+; X86-NEXT: movl %ecx, 32(%ebx)
+; X86-NEXT: movl %edi, %ecx
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT: shldl $21, %ebp, %ecx
+; X86-NEXT: movl %ecx, 16(%ebx)
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
-; X86-NEXT: shldl $10, %edx, %ecx
-; X86-NEXT: movl %ecx, 32(%edi)
-; X86-NEXT: movl %ebp, %ecx
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
-; X86-NEXT: shldl $21, %ebx, %ecx
-; X86-NEXT: movl %ecx, 16(%edi)
+; X86-NEXT: movl %edx, %ecx
+; X86-NEXT: shrl $2, %ecx
+; X86-NEXT: movw %cx, 100(%ebx)
+; X86-NEXT: shll $21, %ebp
+; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X86-NEXT: movl %ebp, 12(%ebx)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: shll $30, %ecx
+; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT: movl %ecx, 84(%ebx)
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X86-NEXT: shll $9, %ecx
+; X86-NEXT: shrl $12, %eax
; X86-NEXT: andl $511, %eax # imm = 0x1FF
; X86-NEXT: orl %ecx, %eax
-; X86-NEXT: movl %eax, 72(%edi)
+; X86-NEXT: movl %eax, 72(%ebx)
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X86-NEXT: shll $20, %eax
+; X86-NEXT: shrl %esi
; X86-NEXT: andl $1048575, %esi # imm = 0xFFFFF
; X86-NEXT: orl %eax, %esi
-; X86-NEXT: movl %esi, 56(%edi)
-; X86-NEXT: shll $10, %edx
-; X86-NEXT: andl $1023, %ebp # imm = 0x3FF
-; X86-NEXT: orl %edx, %ebp
-; X86-NEXT: movl %ebp, 28(%edi)
+; X86-NEXT: movl %esi, 56(%ebx)
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: shll $21, %eax
+; X86-NEXT: shll $31, %eax
; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
-; X86-NEXT: movl %eax, 12(%edi)
+; X86-NEXT: movl %eax, 40(%ebx)
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: shll $10, %eax
+; X86-NEXT: shrl $11, %edi
+; X86-NEXT: andl $1023, %edi # imm = 0x3FF
+; X86-NEXT: orl %eax, %edi
+; X86-NEXT: movl %edi, 28(%ebx)
+; X86-NEXT: movl %edx, %eax
+; X86-NEXT: shrl $18, %eax
; X86-NEXT: andl $7, %eax
-; X86-NEXT: movb %al, 102(%edi)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: shll $30, %eax
-; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
-; X86-NEXT: movl %eax, 84(%edi)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: shll $31, %eax
-; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
-; X86-NEXT: movl %eax, 40(%edi)
-; X86-NEXT: movl %edi, %eax
+; X86-NEXT: movb %al, 102(%ebx)
+; X86-NEXT: movl %ebx, %eax
; X86-NEXT: addl $52, %esp
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
diff --git a/llvm/test/CodeGen/X86/sdiv_fix_sat.ll b/llvm/test/CodeGen/X86/sdiv_fix_sat.ll
index 8eb32cbe879c8c..e7727a0ab6178c 100644
--- a/llvm/test/CodeGen/X86/sdiv_fix_sat.ll
+++ b/llvm/test/CodeGen/X86/sdiv_fix_sat.ll
@@ -572,9 +572,9 @@ define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind {
; X64-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
; X64-NEXT: movdqa %xmm3, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; X64-NEXT: movq %xmm3, %rbx
-; X64-NEXT: movq %rbx, %r14
-; X64-NEXT: sarq $63, %r14
-; X64-NEXT: shldq $31, %rbx, %r14
+; X64-NEXT: movq %rbx, %r13
+; X64-NEXT: sarq $63, %r13
+; X64-NEXT: shldq $31, %rbx, %r13
; X64-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
; X64-NEXT: pxor %xmm0, %xmm0
; X64-NEXT: pcmpgtd %xmm1, %xmm0
@@ -582,100 +582,94 @@ define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind {
; X64-NEXT: movdqa %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; X64-NEXT: movq %xmm1, %rdx
; X64-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; X64-NEXT: movq %rdx, %r12
-; X64-NEXT: sarq $63, %r12
-; X64-NEXT: movq %rbx, %r13
-; X64-NEXT: shlq $31, %r13
-; X64-NEXT: movq %r13, %rdi
-; X64-NEXT: movq %r14, %rsi
-; X64-NEXT: movq %r12, %rcx
+; X64-NEXT: movq %rdx, %r15
+; X64-NEXT: sarq $63, %r15
+; X64-NEXT: movq %rbx, %r12
+; X64-NEXT: shlq $31, %r12
+; X64-NEXT: movq %r12, %rdi
+; X64-NEXT: movq %r13, %rsi
+; X64-NEXT: movq %r15, %rcx
; X64-NEXT: callq __divti3 at PLT
; X64-NEXT: movq %rax, %rbp
; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; X64-NEXT: movq %rdx, %r15
+; X64-NEXT: movq %rdx, %r14
; X64-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; X64-NEXT: subq $1, %rbp
-; X64-NEXT: sbbq $0, %r15
-; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
-; X64-NEXT: movq %rdx, %rax
-; X64-NEXT: shrq $63, %rax
+; X64-NEXT: sbbq $0, %r14
; X64-NEXT: shrq $63, %rbx
-; X64-NEXT: xorl %eax, %ebx
-; X64-NEXT: movq %r13, %rdi
-; X64-NEXT: movq %r14, %rsi
-; X64-NEXT: movq %r12, %rcx
+; X64-NEXT: xorl %r15d, %ebx
+; X64-NEXT: movq %r12, %rdi
+; X64-NEXT: movq %r13, %rsi
+; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
+; X64-NEXT: movq %r15, %rcx
; X64-NEXT: callq __modti3 at PLT
-; X64-NEXT: xorl %ecx, %ecx
; X64-NEXT: orq %rax, %rdx
-; X64-NEXT: setne %cl
-; X64-NEXT: testl %ebx, %ecx
-; X64-NEXT: cmoveq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Folded Reload
+; X64-NEXT: setne %al
+; X64-NEXT: testb %bl, %al
+; X64-NEXT: cmoveq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Folded Reload
; X64-NEXT: cmoveq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Folded Reload
; X64-NEXT: xorl %ecx, %ecx
; X64-NEXT: movl $4294967295, %edx # imm = 0xFFFFFFFF
; X64-NEXT: cmpq %rdx, %rbp
-; X64-NEXT: movq %r15, %rax
+; X64-NEXT: movq %r14, %rax
; X64-NEXT: sbbq $0, %rax
-; X64-NEXT: cmovgeq %rcx, %r15
+; X64-NEXT: cmovgeq %rcx, %r14
; X64-NEXT: cmovgeq %rdx, %rbp
; X64-NEXT: movabsq $-4294967296, %rcx # imm = 0xFFFFFFFF00000000
; X64-NEXT: cmpq %rbp, %rcx
; X64-NEXT: movq $-1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill
; X64-NEXT: movq $-1, %rax
-; X64-NEXT: sbbq %r15, %rax
+; X64-NEXT: sbbq %r14, %rax
; X64-NEXT: cmovgeq %rcx, %rbp
; X64-NEXT: movq %rbp, %xmm0
; X64-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; X64-NEXT: pshufd $238, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
; X64-NEXT: # xmm0 = mem[2,3,2,3]
; X64-NEXT: movq %xmm0, %rbx
-; X64-NEXT: movq %rbx, %r14
-; X64-NEXT: sarq $63, %r14
-; X64-NEXT: shldq $31, %rbx, %r14
+; X64-NEXT: movq %rbx, %r13
+; X64-NEXT: sarq $63, %r13
+; X64-NEXT: shldq $31, %rbx, %r13
; X64-NEXT: pshufd $238, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
; X64-NEXT: # xmm0 = mem[2,3,2,3]
; X64-NEXT: movq %xmm0, %rdx
; X64-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; X64-NEXT: movq %rdx, %r12
-; X64-NEXT: sarq $63, %r12
-; X64-NEXT: movq %rbx, %r13
-; X64-NEXT: shlq $31, %r13
-; X64-NEXT: movq %r13, %rdi
-; X64-NEXT: movq %r14, %rsi
-; X64-NEXT: movq %r12, %rcx
+; X64-NEXT: movq %rdx, %r15
+; X64-NEXT: sarq $63, %r15
+; X64-NEXT: movq %rbx, %r12
+; X64-NEXT: shlq $31, %r12
+; X64-NEXT: movq %r12, %rdi
+; X64-NEXT: movq %r13, %rsi
+; X64-NEXT: movq %r15, %rcx
; X64-NEXT: callq __divti3 at PLT
; X64-NEXT: movq %rax, %rbp
; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; X64-NEXT: movq %rdx, %r15
+; X64-NEXT: movq %rdx, %r14
; X64-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; X64-NEXT: subq $1, %rbp
-; X64-NEXT: sbbq $0, %r15
-; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
-; X64-NEXT: movq %rdx, %rax
-; X64-NEXT: shrq $63, %rax
+; X64-NEXT: sbbq $0, %r14
; X64-NEXT: shrq $63, %rbx
-; X64-NEXT: xorl %eax, %ebx
-; X64-NEXT: movq %r13, %rdi
-; X64-NEXT: movq %r14, %rsi
-; X64-NEXT: movq %r12, %rcx
+; X64-NEXT: xorl %r15d, %ebx
+; X64-NEXT: movq %r12, %rdi
+; X64-NEXT: movq %r13, %rsi
+; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
+; X64-NEXT: movq %r15, %rcx
; X64-NEXT: callq __modti3 at PLT
-; X64-NEXT: xorl %ecx, %ecx
; X64-NEXT: orq %rax, %rdx
-; X64-NEXT: setne %cl
-; X64-NEXT: testl %ebx, %ecx
-; X64-NEXT: cmoveq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Folded Reload
+; X64-NEXT: setne %al
+; X64-NEXT: testb %bl, %al
+; X64-NEXT: cmoveq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Folded Reload
; X64-NEXT: cmoveq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Folded Reload
; X64-NEXT: movl $4294967295, %ecx # imm = 0xFFFFFFFF
; X64-NEXT: cmpq %rcx, %rbp
-; X64-NEXT: movq %r15, %rax
+; X64-NEXT: movq %r14, %rax
; X64-NEXT: sbbq $0, %rax
; X64-NEXT: movl $0, %eax
-; X64-NEXT: cmovgeq %rax, %r15
+; X64-NEXT: cmovgeq %rax, %r14
; X64-NEXT: cmovgeq %rcx, %rbp
; X64-NEXT: movabsq $-4294967296, %rcx # imm = 0xFFFFFFFF00000000
; X64-NEXT: cmpq %rbp, %rcx
; X64-NEXT: movq $-1, %rax
-; X64-NEXT: sbbq %r15, %rax
+; X64-NEXT: sbbq %r14, %rax
; X64-NEXT: cmovgeq %rcx, %rbp
; X64-NEXT: movq %rbp, %xmm0
; X64-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
@@ -691,9 +685,9 @@ define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind {
; X64-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; X64-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; X64-NEXT: movq %xmm0, %rbx
-; X64-NEXT: movq %rbx, %r14
-; X64-NEXT: sarq $63, %r14
-; X64-NEXT: shldq $31, %rbx, %r14
+; X64-NEXT: movq %rbx, %r13
+; X64-NEXT: sarq $63, %r13
+; X64-NEXT: shldq $31, %rbx, %r13
; X64-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
; X64-NEXT: pxor %xmm1, %xmm1
; X64-NEXT: pcmpgtd %xmm0, %xmm1
@@ -701,98 +695,92 @@ define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind {
; X64-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; X64-NEXT: movq %xmm0, %rdx
; X64-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; X64-NEXT: movq %rdx, %r12
-; X64-NEXT: sarq $63, %r12
-; X64-NEXT: movq %rbx, %r13
-; X64-NEXT: shlq $31, %r13
-; X64-NEXT: movq %r13, %rdi
-; X64-NEXT: movq %r14, %rsi
-; X64-NEXT: movq %r12, %rcx
+; X64-NEXT: movq %rdx, %r15
+; X64-NEXT: sarq $63, %r15
+; X64-NEXT: movq %rbx, %r12
+; X64-NEXT: shlq $31, %r12
+; X64-NEXT: movq %r12, %rdi
+; X64-NEXT: movq %r13, %rsi
+; X64-NEXT: movq %r15, %rcx
; X64-NEXT: callq __divti3 at PLT
; X64-NEXT: movq %rax, %rbp
; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; X64-NEXT: movq %rdx, %r15
+; X64-NEXT: movq %rdx, %r14
; X64-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; X64-NEXT: subq $1, %rbp
-; X64-NEXT: sbbq $0, %r15
-; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
-; X64-NEXT: movq %rdx, %rax
-; X64-NEXT: shrq $63, %rax
+; X64-NEXT: sbbq $0, %r14
; X64-NEXT: shrq $63, %rbx
-; X64-NEXT: xorl %eax, %ebx
-; X64-NEXT: movq %r13, %rdi
-; X64-NEXT: movq %r14, %rsi
-; X64-NEXT: movq %r12, %rcx
+; X64-NEXT: xorl %r15d, %ebx
+; X64-NEXT: movq %r12, %rdi
+; X64-NEXT: movq %r13, %rsi
+; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
+; X64-NEXT: movq %r15, %rcx
; X64-NEXT: callq __modti3 at PLT
-; X64-NEXT: xorl %ecx, %ecx
; X64-NEXT: orq %rax, %rdx
-; X64-NEXT: setne %cl
-; X64-NEXT: testl %ebx, %ecx
-; X64-NEXT: cmoveq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Folded Reload
+; X64-NEXT: setne %al
+; X64-NEXT: testb %bl, %al
+; X64-NEXT: cmoveq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Folded Reload
; X64-NEXT: cmoveq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Folded Reload
; X64-NEXT: movl $4294967295, %ecx # imm = 0xFFFFFFFF
; X64-NEXT: cmpq %rcx, %rbp
-; X64-NEXT: movq %r15, %rax
+; X64-NEXT: movq %r14, %rax
; X64-NEXT: sbbq $0, %rax
; X64-NEXT: movl $0, %eax
-; X64-NEXT: cmovgeq %rax, %r15
+; X64-NEXT: cmovgeq %rax, %r14
; X64-NEXT: cmovgeq %rcx, %rbp
; X64-NEXT: movabsq $-4294967296, %rcx # imm = 0xFFFFFFFF00000000
; X64-NEXT: cmpq %rbp, %rcx
; X64-NEXT: movq $-1, %rax
-; X64-NEXT: sbbq %r15, %rax
+; X64-NEXT: sbbq %r14, %rax
; X64-NEXT: cmovgeq %rcx, %rbp
; X64-NEXT: movq %rbp, %xmm0
; X64-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; X64-NEXT: pshufd $238, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
; X64-NEXT: # xmm0 = mem[2,3,2,3]
; X64-NEXT: movq %xmm0, %rbx
-; X64-NEXT: movq %rbx, %r14
-; X64-NEXT: sarq $63, %r14
-; X64-NEXT: shldq $31, %rbx, %r14
+; X64-NEXT: movq %rbx, %r13
+; X64-NEXT: sarq $63, %r13
+; X64-NEXT: shldq $31, %rbx, %r13
; X64-NEXT: pshufd $238, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
; X64-NEXT: # xmm0 = mem[2,3,2,3]
; X64-NEXT: movq %xmm0, %rdx
; X64-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; X64-NEXT: movq %rdx, %r12
-; X64-NEXT: sarq $63, %r12
-; X64-NEXT: movq %rbx, %r13
-; X64-NEXT: shlq $31, %r13
-; X64-NEXT: movq %r13, %rdi
-; X64-NEXT: movq %r14, %rsi
-; X64-NEXT: movq %r12, %rcx
+; X64-NEXT: movq %rdx, %r15
+; X64-NEXT: sarq $63, %r15
+; X64-NEXT: movq %rbx, %r12
+; X64-NEXT: shlq $31, %r12
+; X64-NEXT: movq %r12, %rdi
+; X64-NEXT: movq %r13, %rsi
+; X64-NEXT: movq %r15, %rcx
; X64-NEXT: callq __divti3 at PLT
; X64-NEXT: movq %rax, %rbp
; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; X64-NEXT: movq %rdx, %r15
+; X64-NEXT: movq %rdx, %r14
; X64-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; X64-NEXT: subq $1, %rbp
-; X64-NEXT: sbbq $0, %r15
-; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
-; X64-NEXT: movq %rdx, %rax
-; X64-NEXT: shrq $63, %rax
+; X64-NEXT: sbbq $0, %r14
; X64-NEXT: shrq $63, %rbx
-; X64-NEXT: xorl %eax, %ebx
-; X64-NEXT: movq %r13, %rdi
-; X64-NEXT: movq %r14, %rsi
-; X64-NEXT: movq %r12, %rcx
+; X64-NEXT: xorl %r15d, %ebx
+; X64-NEXT: movq %r12, %rdi
+; X64-NEXT: movq %r13, %rsi
+; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
+; X64-NEXT: movq %r15, %rcx
; X64-NEXT: callq __modti3 at PLT
-; X64-NEXT: xorl %ecx, %ecx
; X64-NEXT: orq %rax, %rdx
-; X64-NEXT: setne %cl
-; X64-NEXT: testl %ebx, %ecx
-; X64-NEXT: cmoveq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Folded Reload
+; X64-NEXT: setne %al
+; X64-NEXT: testb %bl, %al
+; X64-NEXT: cmoveq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Folded Reload
; X64-NEXT: cmoveq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Folded Reload
; X64-NEXT: movl $4294967295, %ecx # imm = 0xFFFFFFFF
; X64-NEXT: cmpq %rcx, %rbp
-; X64-NEXT: movq %r15, %rax
+; X64-NEXT: movq %r14, %rax
; X64-NEXT: sbbq $0, %rax
; X64-NEXT: movl $0, %eax
-; X64-NEXT: cmovgeq %rax, %r15
+; X64-NEXT: cmovgeq %rax, %r14
; X64-NEXT: cmovgeq %rcx, %rbp
; X64-NEXT: movabsq $-4294967296, %rax # imm = 0xFFFFFFFF00000000
; X64-NEXT: cmpq %rbp, %rax
-; X64-NEXT: sbbq %r15, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill
+; X64-NEXT: sbbq %r14, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill
; X64-NEXT: cmovgeq %rax, %rbp
; X64-NEXT: movq %rbp, %xmm1
; X64-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
>From f894a7fe53e1c236d03596bffc6d04dd75ce4604 Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper at sifive.com>
Date: Sun, 4 Aug 2024 23:01:09 -0700
Subject: [PATCH 4/4] fixup! update tests after #101868
---
llvm/test/CodeGen/RISCV/div.ll | 32 +++++++++----------
llvm/test/CodeGen/RISCV/rv64zba.ll | 14 ++++----
.../CodeGen/RISCV/srem-seteq-illegal-types.ll | 24 +++++++-------
3 files changed, 34 insertions(+), 36 deletions(-)
diff --git a/llvm/test/CodeGen/RISCV/div.ll b/llvm/test/CodeGen/RISCV/div.ll
index f1ad170ac674cc..f4e67698473151 100644
--- a/llvm/test/CodeGen/RISCV/div.ll
+++ b/llvm/test/CodeGen/RISCV/div.ll
@@ -1017,8 +1017,8 @@ define i8 @sdiv8_pow2(i8 %a) nounwind {
; RV32I-LABEL: sdiv8_pow2:
; RV32I: # %bb.0:
; RV32I-NEXT: slli a1, a0, 24
-; RV32I-NEXT: srai a1, a1, 31
-; RV32I-NEXT: andi a1, a1, 7
+; RV32I-NEXT: srai a1, a1, 2
+; RV32I-NEXT: srli a1, a1, 29
; RV32I-NEXT: add a0, a0, a1
; RV32I-NEXT: slli a0, a0, 24
; RV32I-NEXT: srai a0, a0, 27
@@ -1027,8 +1027,8 @@ define i8 @sdiv8_pow2(i8 %a) nounwind {
; RV32IM-LABEL: sdiv8_pow2:
; RV32IM: # %bb.0:
; RV32IM-NEXT: slli a1, a0, 24
-; RV32IM-NEXT: srai a1, a1, 31
-; RV32IM-NEXT: andi a1, a1, 7
+; RV32IM-NEXT: srai a1, a1, 2
+; RV32IM-NEXT: srli a1, a1, 29
; RV32IM-NEXT: add a0, a0, a1
; RV32IM-NEXT: slli a0, a0, 24
; RV32IM-NEXT: srai a0, a0, 27
@@ -1037,8 +1037,8 @@ define i8 @sdiv8_pow2(i8 %a) nounwind {
; RV64I-LABEL: sdiv8_pow2:
; RV64I: # %bb.0:
; RV64I-NEXT: slli a1, a0, 56
-; RV64I-NEXT: srai a1, a1, 63
-; RV64I-NEXT: andi a1, a1, 7
+; RV64I-NEXT: srai a1, a1, 2
+; RV64I-NEXT: srli a1, a1, 61
; RV64I-NEXT: add a0, a0, a1
; RV64I-NEXT: slli a0, a0, 56
; RV64I-NEXT: srai a0, a0, 59
@@ -1047,8 +1047,8 @@ define i8 @sdiv8_pow2(i8 %a) nounwind {
; RV64IM-LABEL: sdiv8_pow2:
; RV64IM: # %bb.0:
; RV64IM-NEXT: slli a1, a0, 56
-; RV64IM-NEXT: srai a1, a1, 63
-; RV64IM-NEXT: andi a1, a1, 7
+; RV64IM-NEXT: srai a1, a1, 2
+; RV64IM-NEXT: srli a1, a1, 61
; RV64IM-NEXT: add a0, a0, a1
; RV64IM-NEXT: slli a0, a0, 56
; RV64IM-NEXT: srai a0, a0, 59
@@ -1205,8 +1205,8 @@ define i16 @sdiv16_pow2(i16 %a) nounwind {
; RV32I-LABEL: sdiv16_pow2:
; RV32I: # %bb.0:
; RV32I-NEXT: slli a1, a0, 16
-; RV32I-NEXT: srai a1, a1, 31
-; RV32I-NEXT: andi a1, a1, 7
+; RV32I-NEXT: srai a1, a1, 2
+; RV32I-NEXT: srli a1, a1, 29
; RV32I-NEXT: add a0, a0, a1
; RV32I-NEXT: slli a0, a0, 16
; RV32I-NEXT: srai a0, a0, 19
@@ -1215,8 +1215,8 @@ define i16 @sdiv16_pow2(i16 %a) nounwind {
; RV32IM-LABEL: sdiv16_pow2:
; RV32IM: # %bb.0:
; RV32IM-NEXT: slli a1, a0, 16
-; RV32IM-NEXT: srai a1, a1, 31
-; RV32IM-NEXT: andi a1, a1, 7
+; RV32IM-NEXT: srai a1, a1, 2
+; RV32IM-NEXT: srli a1, a1, 29
; RV32IM-NEXT: add a0, a0, a1
; RV32IM-NEXT: slli a0, a0, 16
; RV32IM-NEXT: srai a0, a0, 19
@@ -1225,8 +1225,8 @@ define i16 @sdiv16_pow2(i16 %a) nounwind {
; RV64I-LABEL: sdiv16_pow2:
; RV64I: # %bb.0:
; RV64I-NEXT: slli a1, a0, 48
-; RV64I-NEXT: srai a1, a1, 63
-; RV64I-NEXT: andi a1, a1, 7
+; RV64I-NEXT: srai a1, a1, 2
+; RV64I-NEXT: srli a1, a1, 61
; RV64I-NEXT: add a0, a0, a1
; RV64I-NEXT: slli a0, a0, 48
; RV64I-NEXT: srai a0, a0, 51
@@ -1235,8 +1235,8 @@ define i16 @sdiv16_pow2(i16 %a) nounwind {
; RV64IM-LABEL: sdiv16_pow2:
; RV64IM: # %bb.0:
; RV64IM-NEXT: slli a1, a0, 48
-; RV64IM-NEXT: srai a1, a1, 63
-; RV64IM-NEXT: andi a1, a1, 7
+; RV64IM-NEXT: srai a1, a1, 2
+; RV64IM-NEXT: srli a1, a1, 61
; RV64IM-NEXT: add a0, a0, a1
; RV64IM-NEXT: slli a0, a0, 48
; RV64IM-NEXT: srai a0, a0, 51
diff --git a/llvm/test/CodeGen/RISCV/rv64zba.ll b/llvm/test/CodeGen/RISCV/rv64zba.ll
index f1b2193641d7a2..87796e2c7b72e9 100644
--- a/llvm/test/CodeGen/RISCV/rv64zba.ll
+++ b/llvm/test/CodeGen/RISCV/rv64zba.ll
@@ -1555,16 +1555,15 @@ define zeroext i32 @sext_ashr_zext_i8(i8 %a) nounwind {
; RV64I-LABEL: sext_ashr_zext_i8:
; RV64I: # %bb.0:
; RV64I-NEXT: slli a0, a0, 56
-; RV64I-NEXT: srai a0, a0, 63
-; RV64I-NEXT: slli a0, a0, 32
+; RV64I-NEXT: srai a0, a0, 31
; RV64I-NEXT: srli a0, a0, 32
; RV64I-NEXT: ret
;
; RV64ZBANOZBB-LABEL: sext_ashr_zext_i8:
; RV64ZBANOZBB: # %bb.0:
; RV64ZBANOZBB-NEXT: slli a0, a0, 56
-; RV64ZBANOZBB-NEXT: srai a0, a0, 63
-; RV64ZBANOZBB-NEXT: zext.w a0, a0
+; RV64ZBANOZBB-NEXT: srai a0, a0, 31
+; RV64ZBANOZBB-NEXT: srli a0, a0, 32
; RV64ZBANOZBB-NEXT: ret
;
; RV64ZBAZBB-LABEL: sext_ashr_zext_i8:
@@ -1673,16 +1672,15 @@ define zeroext i32 @sext_ashr_zext_i16(i16 %a) nounwind {
; RV64I-LABEL: sext_ashr_zext_i16:
; RV64I: # %bb.0:
; RV64I-NEXT: slli a0, a0, 48
-; RV64I-NEXT: srai a0, a0, 57
-; RV64I-NEXT: slli a0, a0, 32
+; RV64I-NEXT: srai a0, a0, 25
; RV64I-NEXT: srli a0, a0, 32
; RV64I-NEXT: ret
;
; RV64ZBANOZBB-LABEL: sext_ashr_zext_i16:
; RV64ZBANOZBB: # %bb.0:
; RV64ZBANOZBB-NEXT: slli a0, a0, 48
-; RV64ZBANOZBB-NEXT: srai a0, a0, 57
-; RV64ZBANOZBB-NEXT: zext.w a0, a0
+; RV64ZBANOZBB-NEXT: srai a0, a0, 25
+; RV64ZBANOZBB-NEXT: srli a0, a0, 32
; RV64ZBANOZBB-NEXT: ret
;
; RV64ZBAZBB-LABEL: sext_ashr_zext_i16:
diff --git a/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll b/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll
index 29e98736ad5df5..dc27158cfb31f3 100644
--- a/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll
+++ b/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll
@@ -222,8 +222,8 @@ define i1 @test_srem_pow2_setne(i6 %X) nounwind {
; RV32-LABEL: test_srem_pow2_setne:
; RV32: # %bb.0:
; RV32-NEXT: slli a1, a0, 26
-; RV32-NEXT: srai a1, a1, 31
-; RV32-NEXT: andi a1, a1, 3
+; RV32-NEXT: srai a1, a1, 1
+; RV32-NEXT: srli a1, a1, 30
; RV32-NEXT: add a1, a0, a1
; RV32-NEXT: andi a1, a1, 60
; RV32-NEXT: sub a0, a0, a1
@@ -234,8 +234,8 @@ define i1 @test_srem_pow2_setne(i6 %X) nounwind {
; RV64-LABEL: test_srem_pow2_setne:
; RV64: # %bb.0:
; RV64-NEXT: slli a1, a0, 58
-; RV64-NEXT: srai a1, a1, 63
-; RV64-NEXT: andi a1, a1, 3
+; RV64-NEXT: srai a1, a1, 1
+; RV64-NEXT: srli a1, a1, 62
; RV64-NEXT: add a1, a0, a1
; RV64-NEXT: andi a1, a1, 60
; RV64-NEXT: subw a0, a0, a1
@@ -246,8 +246,8 @@ define i1 @test_srem_pow2_setne(i6 %X) nounwind {
; RV32M-LABEL: test_srem_pow2_setne:
; RV32M: # %bb.0:
; RV32M-NEXT: slli a1, a0, 26
-; RV32M-NEXT: srai a1, a1, 31
-; RV32M-NEXT: andi a1, a1, 3
+; RV32M-NEXT: srai a1, a1, 1
+; RV32M-NEXT: srli a1, a1, 30
; RV32M-NEXT: add a1, a0, a1
; RV32M-NEXT: andi a1, a1, 60
; RV32M-NEXT: sub a0, a0, a1
@@ -258,8 +258,8 @@ define i1 @test_srem_pow2_setne(i6 %X) nounwind {
; RV64M-LABEL: test_srem_pow2_setne:
; RV64M: # %bb.0:
; RV64M-NEXT: slli a1, a0, 58
-; RV64M-NEXT: srai a1, a1, 63
-; RV64M-NEXT: andi a1, a1, 3
+; RV64M-NEXT: srai a1, a1, 1
+; RV64M-NEXT: srli a1, a1, 62
; RV64M-NEXT: add a1, a0, a1
; RV64M-NEXT: andi a1, a1, 60
; RV64M-NEXT: subw a0, a0, a1
@@ -270,8 +270,8 @@ define i1 @test_srem_pow2_setne(i6 %X) nounwind {
; RV32MV-LABEL: test_srem_pow2_setne:
; RV32MV: # %bb.0:
; RV32MV-NEXT: slli a1, a0, 26
-; RV32MV-NEXT: srai a1, a1, 31
-; RV32MV-NEXT: andi a1, a1, 3
+; RV32MV-NEXT: srai a1, a1, 1
+; RV32MV-NEXT: srli a1, a1, 30
; RV32MV-NEXT: add a1, a0, a1
; RV32MV-NEXT: andi a1, a1, 60
; RV32MV-NEXT: sub a0, a0, a1
@@ -282,8 +282,8 @@ define i1 @test_srem_pow2_setne(i6 %X) nounwind {
; RV64MV-LABEL: test_srem_pow2_setne:
; RV64MV: # %bb.0:
; RV64MV-NEXT: slli a1, a0, 58
-; RV64MV-NEXT: srai a1, a1, 63
-; RV64MV-NEXT: andi a1, a1, 3
+; RV64MV-NEXT: srai a1, a1, 1
+; RV64MV-NEXT: srli a1, a1, 62
; RV64MV-NEXT: add a1, a0, a1
; RV64MV-NEXT: andi a1, a1, 60
; RV64MV-NEXT: subw a0, a0, a1
More information about the llvm-commits
mailing list