[llvm] [DAG] SimplifyDemandedBits - ensure we demand the high bits for shl nsw/nuw ops (PR #70041)
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Tue Oct 24 06:34:49 PDT 2023
https://github.com/RKSimon created https://github.com/llvm/llvm-project/pull/70041
Matches InstCombinerImpl::SimplifyDemandedUseBits
Fixes PR69965
>From cd17571bddef2509d786b2e1dab9bad7db0357a1 Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev at redking.me.uk>
Date: Tue, 24 Oct 2023 14:19:56 +0100
Subject: [PATCH 1/2] [DAG] SimplifyDemandedBits - ensure we demand the high
bits for shl nsw/nuw ops
Matches InstCombinerImpl::SimplifyDemandedUseBits
Exposes an issue with AND(CTPOP(X),1) -> PARITY(X) fold which fails to correctly demand known zero upper bits
Fixes #69965
---
.../CodeGen/SelectionDAG/TargetLowering.cpp | 16 +-
.../CodeGen/AArch64/arm64-shifted-sext.ll | 5 +-
llvm/test/CodeGen/AArch64/load-combine.ll | 8 +-
llvm/test/CodeGen/AMDGPU/shl.ll | 1 +
.../test/CodeGen/AMDGPU/wwm-reserved-spill.ll | 10 +-
llvm/test/CodeGen/PowerPC/pre-inc-disable.ll | 56 ++--
.../test/CodeGen/RISCV/rv64i-complex-float.ll | 1 +
llvm/test/CodeGen/RISCV/rvv/pr61561.ll | 7 +-
llvm/test/CodeGen/RISCV/split-store.ll | 2 +
llvm/test/CodeGen/Thumb/arm_q15_to_q31.ll | 306 ++++++++++--------
llvm/test/CodeGen/X86/fp128-cast.ll | 14 +-
llvm/test/CodeGen/X86/parity.ll | 123 ++++---
llvm/test/CodeGen/X86/pr69965.ll | 25 +-
llvm/test/CodeGen/X86/setcc.ll | 18 +-
.../vector-shuffle-combining-avx512bwvl.ll | 4 +-
15 files changed, 339 insertions(+), 257 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 8b4f3159499122a..75bd4f55b64b040 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -1785,14 +1785,22 @@ bool TargetLowering::SimplifyDemandedBits(
}
APInt InDemandedMask = DemandedBits.lshr(ShAmt);
+
+ // If the shift is NUW/NSW, then it does demand the high bits.
+ if (Op->getFlags().hasNoSignedWrap())
+ InDemandedMask.setHighBits(ShAmt + 1);
+ else if (Op->getFlags().hasNoUnsignedWrap())
+ InDemandedMask.setHighBits(ShAmt);
+
if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
Depth + 1))
return true;
assert(!Known.hasConflict() && "Bits known to be one AND zero?");
- Known.Zero <<= ShAmt;
- Known.One <<= ShAmt;
- // low bits known zero.
- Known.Zero.setLowBits(ShAmt);
+
+ Known = KnownBits::shl(Known,
+ KnownBits::makeConstant(APInt(BitWidth, ShAmt)),
+ /* NUW */ Op->getFlags().hasNoUnsignedWrap(),
+ /* NSW */ Op->getFlags().hasNoSignedWrap());
// Attempt to avoid multi-use ops if we don't need anything from them.
if (!InDemandedMask.isAllOnes() || !DemandedElts.isAllOnes()) {
diff --git a/llvm/test/CodeGen/AArch64/arm64-shifted-sext.ll b/llvm/test/CodeGen/AArch64/arm64-shifted-sext.ll
index da6499b7daa82e5..240c96130d38549 100644
--- a/llvm/test/CodeGen/AArch64/arm64-shifted-sext.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-shifted-sext.ll
@@ -195,8 +195,9 @@ entry:
define i32 @extendedLeftShiftshortTointBy16(i16 signext %a) nounwind readnone ssp {
; CHECK-LABEL: extendedLeftShiftshortTointBy16:
; CHECK: ; %bb.0: ; %entry
-; CHECK-NEXT: lsl w8, w0, #16
-; CHECK-NEXT: add w0, w8, #16, lsl #12 ; =65536
+; CHECK-NEXT: add w8, w0, #1
+; CHECK-NEXT: and w8, w8, #0xffff
+; CHECK-NEXT: lsl w0, w8, #16
; CHECK-NEXT: ret
entry:
%inc = add i16 %a, 1
diff --git a/llvm/test/CodeGen/AArch64/load-combine.ll b/llvm/test/CodeGen/AArch64/load-combine.ll
index 57f61e5303ecf97..099b175cff3fb03 100644
--- a/llvm/test/CodeGen/AArch64/load-combine.ll
+++ b/llvm/test/CodeGen/AArch64/load-combine.ll
@@ -578,7 +578,7 @@ define void @short_vector_to_i32_unused_low_i8(ptr %in, ptr %out, ptr %p) {
; CHECK-NEXT: umov w10, v0.h[3]
; CHECK-NEXT: lsl w8, w8, #16
; CHECK-NEXT: bfi w8, w9, #8, #8
-; CHECK-NEXT: orr w8, w8, w10, lsl #24
+; CHECK-NEXT: bfi w8, w10, #24, #8
; CHECK-NEXT: str w8, [x1]
; CHECK-NEXT: ret
%ld = load <4 x i8>, ptr %in, align 4
@@ -609,8 +609,8 @@ define void @short_vector_to_i32_unused_high_i8(ptr %in, ptr %out, ptr %p) {
; CHECK-NEXT: ldrh w9, [x0]
; CHECK-NEXT: ushll v0.8h, v0.8b, #0
; CHECK-NEXT: umov w8, v0.h[2]
-; CHECK-NEXT: orr w8, w9, w8, lsl #16
-; CHECK-NEXT: str w8, [x1]
+; CHECK-NEXT: bfi w9, w8, #16, #8
+; CHECK-NEXT: str w9, [x1]
; CHECK-NEXT: ret
%ld = load <4 x i8>, ptr %in, align 4
@@ -640,7 +640,7 @@ define void @short_vector_to_i32_unused_low_i16(ptr %in, ptr %out, ptr %p) {
; CHECK-NEXT: umov w8, v0.h[3]
; CHECK-NEXT: umov w9, v0.h[2]
; CHECK-NEXT: lsl w8, w8, #24
-; CHECK-NEXT: orr w8, w8, w9, lsl #16
+; CHECK-NEXT: bfi w8, w9, #16, #8
; CHECK-NEXT: str w8, [x1]
; CHECK-NEXT: ret
%ld = load <4 x i8>, ptr %in, align 4
diff --git a/llvm/test/CodeGen/AMDGPU/shl.ll b/llvm/test/CodeGen/AMDGPU/shl.ll
index be0aa394dd99dc0..65fcb9665ce820c 100644
--- a/llvm/test/CodeGen/AMDGPU/shl.ll
+++ b/llvm/test/CodeGen/AMDGPU/shl.ll
@@ -489,6 +489,7 @@ define amdgpu_kernel void @shl_i16_i_s(ptr addrspace(1) %out, i16 zeroext %a) {
; VI-NEXT: s_mov_b32 s3, 0xf000
; VI-NEXT: s_mov_b32 s2, -1
; VI-NEXT: s_waitcnt lgkmcnt(0)
+; VI-NEXT: s_and_b32 s4, s4, 15
; VI-NEXT: s_lshl_b32 s4, s4, 12
; VI-NEXT: v_mov_b32_e32 v0, s4
; VI-NEXT: buffer_store_short v0, off, s[0:3], 0
diff --git a/llvm/test/CodeGen/AMDGPU/wwm-reserved-spill.ll b/llvm/test/CodeGen/AMDGPU/wwm-reserved-spill.ll
index b6e7da97e008905..2afbb2e4c9fb734 100644
--- a/llvm/test/CodeGen/AMDGPU/wwm-reserved-spill.ll
+++ b/llvm/test/CodeGen/AMDGPU/wwm-reserved-spill.ll
@@ -53,9 +53,7 @@ define amdgpu_gfx void @strict_wwm_no_cfg(ptr addrspace(8) inreg %tmp14) {
; GFX9-O0-NEXT: s_mov_b64 exec, s[40:41]
; GFX9-O0-NEXT: v_mov_b32_e32 v4, v0
; GFX9-O0-NEXT: v_cmp_eq_u32_e64 s[40:41], v3, v4
-; GFX9-O0-NEXT: v_cndmask_b32_e64 v3, 0, 1, s[40:41]
-; GFX9-O0-NEXT: s_mov_b32 s35, 1
-; GFX9-O0-NEXT: v_lshlrev_b32_e64 v3, s35, v3
+; GFX9-O0-NEXT: v_cndmask_b32_e64 v3, 0, -1, s[40:41]
; GFX9-O0-NEXT: s_mov_b32 s35, 2
; GFX9-O0-NEXT: v_and_b32_e64 v3, v3, s35
; GFX9-O0-NEXT: buffer_store_dword v3, off, s[36:39], s34 offset:4
@@ -101,7 +99,6 @@ define amdgpu_gfx void @strict_wwm_no_cfg(ptr addrspace(8) inreg %tmp14) {
; GFX9-O3-NEXT: v_cmp_eq_u32_e32 vcc, v4, v5
; GFX9-O3-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
; GFX9-O3-NEXT: v_lshlrev_b32_e32 v4, 1, v4
-; GFX9-O3-NEXT: v_and_b32_e32 v4, 2, v4
; GFX9-O3-NEXT: buffer_store_dword v4, off, s[4:7], 0 offset:4
; GFX9-O3-NEXT: s_xor_saveexec_b64 s[34:35], -1
; GFX9-O3-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
@@ -235,9 +232,7 @@ define amdgpu_gfx void @strict_wwm_cfg(ptr addrspace(8) inreg %tmp14, i32 %arg)
; GFX9-O0-NEXT: v_readlane_b32 s35, v0, 3
; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
; GFX9-O0-NEXT: v_cmp_eq_u32_e64 s[36:37], v3, v4
-; GFX9-O0-NEXT: v_cndmask_b32_e64 v3, 0, 1, s[36:37]
-; GFX9-O0-NEXT: s_mov_b32 s36, 1
-; GFX9-O0-NEXT: v_lshlrev_b32_e64 v3, s36, v3
+; GFX9-O0-NEXT: v_cndmask_b32_e64 v3, 0, -1, s[36:37]
; GFX9-O0-NEXT: s_mov_b32 s36, 2
; GFX9-O0-NEXT: v_and_b32_e64 v3, v3, s36
; GFX9-O0-NEXT: s_mov_b32 s40, s35
@@ -302,7 +297,6 @@ define amdgpu_gfx void @strict_wwm_cfg(ptr addrspace(8) inreg %tmp14, i32 %arg)
; GFX9-O3-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5
; GFX9-O3-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
; GFX9-O3-NEXT: v_lshlrev_b32_e32 v0, 1, v0
-; GFX9-O3-NEXT: v_and_b32_e32 v0, 2, v0
; GFX9-O3-NEXT: buffer_store_dword v0, off, s[4:7], 0 offset:4
; GFX9-O3-NEXT: s_xor_saveexec_b64 s[34:35], -1
; GFX9-O3-NEXT: buffer_load_dword v1, off, s[0:3], s32 ; 4-byte Folded Reload
diff --git a/llvm/test/CodeGen/PowerPC/pre-inc-disable.ll b/llvm/test/CodeGen/PowerPC/pre-inc-disable.ll
index 4da36c9af5c101c..426dd1d8e596af1 100644
--- a/llvm/test/CodeGen/PowerPC/pre-inc-disable.ll
+++ b/llvm/test/CodeGen/PowerPC/pre-inc-disable.ll
@@ -19,19 +19,16 @@ define void @test64(ptr nocapture readonly %pix2, i32 signext %i_pix2) {
; P9LE-LABEL: test64:
; P9LE: # %bb.0: # %entry
; P9LE-NEXT: add 5, 3, 4
-; P9LE-NEXT: lfdx 0, 3, 4
+; P9LE-NEXT: lxsdx 2, 3, 4
; P9LE-NEXT: addis 3, 2, .LCPI0_0 at toc@ha
-; P9LE-NEXT: xxlxor 2, 2, 2
+; P9LE-NEXT: xxlxor 1, 1, 1
; P9LE-NEXT: vspltisw 4, 8
; P9LE-NEXT: lxsd 3, 4(5)
; P9LE-NEXT: addi 3, 3, .LCPI0_0 at toc@l
; P9LE-NEXT: vadduwm 4, 4, 4
-; P9LE-NEXT: lxv 1, 0(3)
-; P9LE-NEXT: addis 3, 2, .LCPI0_1 at toc@ha
-; P9LE-NEXT: addi 3, 3, .LCPI0_1 at toc@l
-; P9LE-NEXT: xxperm 2, 0, 1
; P9LE-NEXT: lxv 0, 0(3)
-; P9LE-NEXT: xxperm 3, 3, 0
+; P9LE-NEXT: xxperm 3, 1, 0
+; P9LE-NEXT: xxperm 2, 1, 0
; P9LE-NEXT: vnegw 3, 3
; P9LE-NEXT: vslw 3, 3, 4
; P9LE-NEXT: vsubuwm 2, 3, 2
@@ -50,11 +47,8 @@ define void @test64(ptr nocapture readonly %pix2, i32 signext %i_pix2) {
; P9BE-NEXT: addi 3, 3, .LCPI0_0 at toc@l
; P9BE-NEXT: vadduwm 4, 4, 4
; P9BE-NEXT: lxv 0, 0(3)
-; P9BE-NEXT: addis 3, 2, .LCPI0_1 at toc@ha
-; P9BE-NEXT: addi 3, 3, .LCPI0_1 at toc@l
+; P9BE-NEXT: xxperm 3, 1, 0
; P9BE-NEXT: xxperm 2, 1, 0
-; P9BE-NEXT: lxv 0, 0(3)
-; P9BE-NEXT: xxperm 3, 3, 0
; P9BE-NEXT: vnegw 3, 3
; P9BE-NEXT: vslw 3, 3, 4
; P9BE-NEXT: vsubuwm 2, 3, 2
@@ -71,11 +65,9 @@ define void @test64(ptr nocapture readonly %pix2, i32 signext %i_pix2) {
; P9BE-AIX-NEXT: vspltisw 4, 8
; P9BE-AIX-NEXT: lxsd 3, 4(5)
; P9BE-AIX-NEXT: lxv 0, 0(3)
-; P9BE-AIX-NEXT: ld 3, L..C1(2) # %const.1
; P9BE-AIX-NEXT: vadduwm 4, 4, 4
+; P9BE-AIX-NEXT: xxperm 3, 1, 0
; P9BE-AIX-NEXT: xxperm 2, 1, 0
-; P9BE-AIX-NEXT: lxv 0, 0(3)
-; P9BE-AIX-NEXT: xxperm 3, 3, 0
; P9BE-AIX-NEXT: vnegw 3, 3
; P9BE-AIX-NEXT: vslw 3, 3, 4
; P9BE-AIX-NEXT: vsubuwm 2, 3, 2
@@ -86,25 +78,23 @@ define void @test64(ptr nocapture readonly %pix2, i32 signext %i_pix2) {
; P9BE-AIX32-LABEL: test64:
; P9BE-AIX32: # %bb.0: # %entry
; P9BE-AIX32-NEXT: lwzux 4, 3, 4
-; P9BE-AIX32-NEXT: xxlxor 2, 2, 2
; P9BE-AIX32-NEXT: vspltisw 4, 8
-; P9BE-AIX32-NEXT: stw 4, -48(1)
; P9BE-AIX32-NEXT: vadduwm 4, 4, 4
+; P9BE-AIX32-NEXT: stw 4, -48(1)
; P9BE-AIX32-NEXT: lwz 4, 4(3)
; P9BE-AIX32-NEXT: lxv 0, -48(1)
; P9BE-AIX32-NEXT: stw 4, -32(1)
; P9BE-AIX32-NEXT: lwz 4, L..C0(2) # %const.0
-; P9BE-AIX32-NEXT: lxv 1, -32(1)
; P9BE-AIX32-NEXT: lwz 3, 8(3)
+; P9BE-AIX32-NEXT: lxv 1, -32(1)
; P9BE-AIX32-NEXT: stw 3, -16(1)
-; P9BE-AIX32-NEXT: lwz 3, L..C1(2) # %const.1
+; P9BE-AIX32-NEXT: lxv 2, 0(4)
+; P9BE-AIX32-NEXT: lxv 3, -16(1)
; P9BE-AIX32-NEXT: xxmrghw 2, 0, 1
-; P9BE-AIX32-NEXT: lxv 0, 0(4)
-; P9BE-AIX32-NEXT: xxperm 2, 2, 0
-; P9BE-AIX32-NEXT: lxv 0, -16(1)
-; P9BE-AIX32-NEXT: xxmrghw 3, 1, 0
-; P9BE-AIX32-NEXT: lxv 0, 0(3)
-; P9BE-AIX32-NEXT: xxperm 3, 3, 0
+; P9BE-AIX32-NEXT: xxlxor 0, 0, 0
+; P9BE-AIX32-NEXT: xxperm 2, 0, 2
+; P9BE-AIX32-NEXT: xxmrghw 3, 1, 3
+; P9BE-AIX32-NEXT: xxperm 3, 0, 2
; P9BE-AIX32-NEXT: vnegw 3, 3
; P9BE-AIX32-NEXT: vslw 3, 3, 4
; P9BE-AIX32-NEXT: vsubuwm 2, 3, 2
@@ -180,7 +170,7 @@ define void @test32(ptr nocapture readonly %pix2, i32 signext %i_pix2) {
; P9BE-AIX: # %bb.0: # %entry
; P9BE-AIX-NEXT: add 5, 3, 4
; P9BE-AIX-NEXT: lxsiwzx 2, 3, 4
-; P9BE-AIX-NEXT: ld 3, L..C2(2) # %const.0
+; P9BE-AIX-NEXT: ld 3, L..C1(2) # %const.0
; P9BE-AIX-NEXT: xxlxor 0, 0, 0
; P9BE-AIX-NEXT: vspltisw 4, 8
; P9BE-AIX-NEXT: lxv 1, 0(3)
@@ -200,7 +190,7 @@ define void @test32(ptr nocapture readonly %pix2, i32 signext %i_pix2) {
; P9BE-AIX32: # %bb.0: # %entry
; P9BE-AIX32-NEXT: add 5, 3, 4
; P9BE-AIX32-NEXT: lxsiwzx 2, 3, 4
-; P9BE-AIX32-NEXT: lwz 3, L..C2(2) # %const.0
+; P9BE-AIX32-NEXT: lwz 3, L..C1(2) # %const.0
; P9BE-AIX32-NEXT: xxlxor 0, 0, 0
; P9BE-AIX32-NEXT: vspltisw 4, 8
; P9BE-AIX32-NEXT: lxv 1, 0(3)
@@ -297,9 +287,9 @@ define void @test16(ptr nocapture readonly %sums, i32 signext %delta, i32 signex
; P9BE-AIX-NEXT: li 7, 16
; P9BE-AIX-NEXT: add 6, 3, 4
; P9BE-AIX-NEXT: lxsihzx 1, 3, 4
-; P9BE-AIX-NEXT: ld 3, L..C3(2) # %const.1
+; P9BE-AIX-NEXT: ld 3, L..C2(2) # %const.1
; P9BE-AIX-NEXT: lxsihzx 2, 6, 7
-; P9BE-AIX-NEXT: ld 6, L..C4(2) # %const.0
+; P9BE-AIX-NEXT: ld 6, L..C3(2) # %const.0
; P9BE-AIX-NEXT: lxv 0, 0(6)
; P9BE-AIX-NEXT: li 6, 0
; P9BE-AIX-NEXT: mtvsrwz 3, 6
@@ -328,7 +318,7 @@ define void @test16(ptr nocapture readonly %sums, i32 signext %delta, i32 signex
; P9BE-AIX32-NEXT: sth 4, -48(1)
; P9BE-AIX32-NEXT: lxv 4, -48(1)
; P9BE-AIX32-NEXT: sth 3, -32(1)
-; P9BE-AIX32-NEXT: lwz 3, L..C3(2) # %const.0
+; P9BE-AIX32-NEXT: lwz 3, L..C2(2) # %const.0
; P9BE-AIX32-NEXT: lxv 3, -32(1)
; P9BE-AIX32-NEXT: vmrghh 4, 2, 4
; P9BE-AIX32-NEXT: lxv 0, 0(3)
@@ -437,9 +427,9 @@ define void @test8(ptr nocapture readonly %sums, i32 signext %delta, i32 signext
; P9BE-AIX-NEXT: add 6, 3, 4
; P9BE-AIX-NEXT: li 7, 8
; P9BE-AIX-NEXT: lxsibzx 3, 3, 4
-; P9BE-AIX-NEXT: ld 3, L..C5(2) # %const.1
+; P9BE-AIX-NEXT: ld 3, L..C4(2) # %const.1
; P9BE-AIX-NEXT: lxsibzx 0, 6, 7
-; P9BE-AIX-NEXT: ld 6, L..C6(2) # %const.0
+; P9BE-AIX-NEXT: ld 6, L..C5(2) # %const.0
; P9BE-AIX-NEXT: lxv 1, 0(6)
; P9BE-AIX-NEXT: li 6, 0
; P9BE-AIX-NEXT: mtvsrwz 2, 6
@@ -464,9 +454,9 @@ define void @test8(ptr nocapture readonly %sums, i32 signext %delta, i32 signext
; P9BE-AIX32-NEXT: add 6, 3, 4
; P9BE-AIX32-NEXT: li 7, 8
; P9BE-AIX32-NEXT: lxsibzx 3, 3, 4
-; P9BE-AIX32-NEXT: lwz 3, L..C4(2) # %const.1
+; P9BE-AIX32-NEXT: lwz 3, L..C3(2) # %const.1
; P9BE-AIX32-NEXT: lxsibzx 0, 6, 7
-; P9BE-AIX32-NEXT: lwz 6, L..C5(2) # %const.0
+; P9BE-AIX32-NEXT: lwz 6, L..C4(2) # %const.0
; P9BE-AIX32-NEXT: lxv 1, 0(6)
; P9BE-AIX32-NEXT: li 6, 0
; P9BE-AIX32-NEXT: mtvsrwz 2, 6
diff --git a/llvm/test/CodeGen/RISCV/rv64i-complex-float.ll b/llvm/test/CodeGen/RISCV/rv64i-complex-float.ll
index 690828c7794346e..6e4f624415d9983 100644
--- a/llvm/test/CodeGen/RISCV/rv64i-complex-float.ll
+++ b/llvm/test/CodeGen/RISCV/rv64i-complex-float.ll
@@ -20,6 +20,7 @@ define i64 @complex_float_add(i64 %a.coerce, i64 %b.coerce) nounwind {
; CHECK-NEXT: mv a0, s0
; CHECK-NEXT: mv a1, s1
; CHECK-NEXT: call __addsf3 at plt
+; CHECK-NEXT: andi a0, a0, -1
; CHECK-NEXT: slli a0, a0, 32
; CHECK-NEXT: slli s2, s2, 32
; CHECK-NEXT: srli a1, s2, 32
diff --git a/llvm/test/CodeGen/RISCV/rvv/pr61561.ll b/llvm/test/CodeGen/RISCV/rvv/pr61561.ll
index f27edd36116657e..8d246d99388193f 100644
--- a/llvm/test/CodeGen/RISCV/rvv/pr61561.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/pr61561.ll
@@ -5,12 +5,11 @@ define <vscale x 4 x i8> @foo(ptr %p) {
; CHECK-LABEL: foo:
; CHECK: # %bb.0:
; CHECK-NEXT: vl1re16.v v8, (a0)
-; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
-; CHECK-NEXT: vsll.vi v8, v8, 3
-; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma
; CHECK-NEXT: vzext.vf2 v10, v8
+; CHECK-NEXT: vsll.vi v8, v10, 3
; CHECK-NEXT: li a0, 248
-; CHECK-NEXT: vand.vx v8, v10, a0
+; CHECK-NEXT: vand.vx v8, v8, a0
; CHECK-NEXT: lui a0, 4
; CHECK-NEXT: vmv.v.x v10, a0
; CHECK-NEXT: lui a0, 1
diff --git a/llvm/test/CodeGen/RISCV/split-store.ll b/llvm/test/CodeGen/RISCV/split-store.ll
index 367d3fe2c595fea..afc72d2b8ab7016 100644
--- a/llvm/test/CodeGen/RISCV/split-store.ll
+++ b/llvm/test/CodeGen/RISCV/split-store.ll
@@ -129,6 +129,7 @@ define void @int32_int32_pair(i32 %tmp1, i32 %tmp2, ptr %ref.tmp) {
;
; RV64-LABEL: int32_int32_pair:
; RV64: # %bb.0:
+; RV64-NEXT: andi a1, a1, -1
; RV64-NEXT: slli a1, a1, 32
; RV64-NEXT: slli a0, a0, 32
; RV64-NEXT: srli a0, a0, 32
@@ -138,6 +139,7 @@ define void @int32_int32_pair(i32 %tmp1, i32 %tmp2, ptr %ref.tmp) {
;
; RV64D-LABEL: int32_int32_pair:
; RV64D: # %bb.0:
+; RV64D-NEXT: andi a1, a1, -1
; RV64D-NEXT: slli a1, a1, 32
; RV64D-NEXT: slli a0, a0, 32
; RV64D-NEXT: srli a0, a0, 32
diff --git a/llvm/test/CodeGen/Thumb/arm_q15_to_q31.ll b/llvm/test/CodeGen/Thumb/arm_q15_to_q31.ll
index fdfbf3393098e4a..40e1398b6f10fe1 100644
--- a/llvm/test/CodeGen/Thumb/arm_q15_to_q31.ll
+++ b/llvm/test/CodeGen/Thumb/arm_q15_to_q31.ll
@@ -20,18 +20,19 @@ define void @arm_q15_to_q31(ptr nocapture noundef readonly %pSrc, ptr nocapture
; CHECK-NEXT: @ %bb.2: @ %while.body.prol
; CHECK-NEXT: str r2, [sp] @ 4-byte Spill
; CHECK-NEXT: str r7, [sp, #4] @ 4-byte Spill
-; CHECK-NEXT: ldrh r2, [r0]
-; CHECK-NEXT: ldrh r7, [r0, #2]
-; CHECK-NEXT: ldrh r4, [r0, #4]
-; CHECK-NEXT: ldrh r6, [r0, #6]
+; CHECK-NEXT: movs r6, #2
+; CHECK-NEXT: ldrsh r6, [r0, r6]
+; CHECK-NEXT: movs r7, #6
+; CHECK-NEXT: ldrsh r7, [r0, r7]
+; CHECK-NEXT: lsls r2, r7, #16
+; CHECK-NEXT: ldrh r4, [r0]
+; CHECK-NEXT: ldrh r7, [r0, #4]
+; CHECK-NEXT: lsls r7, r7, #16
; CHECK-NEXT: lsls r6, r6, #16
; CHECK-NEXT: lsls r4, r4, #16
-; CHECK-NEXT: lsls r7, r7, #16
-; CHECK-NEXT: lsls r2, r2, #16
-; CHECK-NEXT: stm r1!, {r2, r7}
-; CHECK-NEXT: str r4, [r1]
-; CHECK-NEXT: str r6, [r1, #4]
-; CHECK-NEXT: subs r1, #8
+; CHECK-NEXT: stm r1!, {r4, r6, r7}
+; CHECK-NEXT: str r2, [r1]
+; CHECK-NEXT: subs r1, #12
; CHECK-NEXT: cmp r5, #1
; CHECK-NEXT: bne .LBB0_11
; CHECK-NEXT: @ %bb.3:
@@ -45,53 +46,61 @@ define void @arm_q15_to_q31(ptr nocapture noundef readonly %pSrc, ptr nocapture
; CHECK-NEXT: blo .LBB0_6
; CHECK-NEXT: .LBB0_5: @ %while.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: ldrh r2, [r0]
-; CHECK-NEXT: ldrh r4, [r0, #2]
-; CHECK-NEXT: ldrh r5, [r0, #4]
-; CHECK-NEXT: ldrh r6, [r0, #6]
-; CHECK-NEXT: lsls r6, r6, #16
-; CHECK-NEXT: str r6, [r1, #12]
-; CHECK-NEXT: lsls r5, r5, #16
-; CHECK-NEXT: str r5, [r1, #8]
+; CHECK-NEXT: movs r2, #2
+; CHECK-NEXT: ldrsh r2, [r0, r2]
+; CHECK-NEXT: movs r4, #6
+; CHECK-NEXT: ldrsh r4, [r0, r4]
; CHECK-NEXT: lsls r4, r4, #16
-; CHECK-NEXT: str r4, [r1, #4]
+; CHECK-NEXT: ldrh r5, [r0]
+; CHECK-NEXT: ldrh r6, [r0, #4]
+; CHECK-NEXT: str r4, [r1, #12]
+; CHECK-NEXT: lsls r4, r6, #16
+; CHECK-NEXT: str r4, [r1, #8]
; CHECK-NEXT: lsls r2, r2, #16
+; CHECK-NEXT: str r2, [r1, #4]
+; CHECK-NEXT: lsls r2, r5, #16
; CHECK-NEXT: str r2, [r1]
-; CHECK-NEXT: ldrh r2, [r0, #8]
-; CHECK-NEXT: ldrh r4, [r0, #10]
-; CHECK-NEXT: ldrh r5, [r0, #12]
-; CHECK-NEXT: ldrh r6, [r0, #14]
-; CHECK-NEXT: lsls r6, r6, #16
-; CHECK-NEXT: str r6, [r1, #28]
-; CHECK-NEXT: lsls r5, r5, #16
-; CHECK-NEXT: str r5, [r1, #24]
+; CHECK-NEXT: movs r2, #10
+; CHECK-NEXT: ldrsh r2, [r0, r2]
+; CHECK-NEXT: movs r4, #14
+; CHECK-NEXT: ldrsh r4, [r0, r4]
; CHECK-NEXT: lsls r4, r4, #16
-; CHECK-NEXT: str r4, [r1, #20]
+; CHECK-NEXT: ldrh r5, [r0, #8]
+; CHECK-NEXT: ldrh r6, [r0, #12]
+; CHECK-NEXT: str r4, [r1, #28]
+; CHECK-NEXT: lsls r4, r6, #16
+; CHECK-NEXT: str r4, [r1, #24]
; CHECK-NEXT: lsls r2, r2, #16
+; CHECK-NEXT: str r2, [r1, #20]
+; CHECK-NEXT: lsls r2, r5, #16
; CHECK-NEXT: str r2, [r1, #16]
-; CHECK-NEXT: ldrh r2, [r0, #16]
-; CHECK-NEXT: ldrh r4, [r0, #18]
-; CHECK-NEXT: ldrh r5, [r0, #20]
-; CHECK-NEXT: ldrh r6, [r0, #22]
-; CHECK-NEXT: lsls r6, r6, #16
-; CHECK-NEXT: str r6, [r1, #44]
-; CHECK-NEXT: lsls r5, r5, #16
-; CHECK-NEXT: str r5, [r1, #40]
+; CHECK-NEXT: movs r2, #18
+; CHECK-NEXT: ldrsh r2, [r0, r2]
+; CHECK-NEXT: movs r4, #22
+; CHECK-NEXT: ldrsh r4, [r0, r4]
; CHECK-NEXT: lsls r4, r4, #16
-; CHECK-NEXT: str r4, [r1, #36]
+; CHECK-NEXT: ldrh r5, [r0, #16]
+; CHECK-NEXT: ldrh r6, [r0, #20]
+; CHECK-NEXT: str r4, [r1, #44]
+; CHECK-NEXT: lsls r4, r6, #16
+; CHECK-NEXT: str r4, [r1, #40]
; CHECK-NEXT: lsls r2, r2, #16
+; CHECK-NEXT: str r2, [r1, #36]
+; CHECK-NEXT: lsls r2, r5, #16
; CHECK-NEXT: str r2, [r1, #32]
-; CHECK-NEXT: ldrh r2, [r0, #24]
-; CHECK-NEXT: ldrh r4, [r0, #26]
-; CHECK-NEXT: ldrh r5, [r0, #28]
-; CHECK-NEXT: ldrh r6, [r0, #30]
-; CHECK-NEXT: lsls r6, r6, #16
-; CHECK-NEXT: str r6, [r1, #60]
-; CHECK-NEXT: lsls r5, r5, #16
-; CHECK-NEXT: str r5, [r1, #56]
+; CHECK-NEXT: movs r2, #26
+; CHECK-NEXT: ldrsh r2, [r0, r2]
+; CHECK-NEXT: movs r4, #30
+; CHECK-NEXT: ldrsh r4, [r0, r4]
; CHECK-NEXT: lsls r4, r4, #16
-; CHECK-NEXT: str r4, [r1, #52]
+; CHECK-NEXT: ldrh r5, [r0, #24]
+; CHECK-NEXT: ldrh r6, [r0, #28]
+; CHECK-NEXT: str r4, [r1, #60]
+; CHECK-NEXT: lsls r4, r6, #16
+; CHECK-NEXT: str r4, [r1, #56]
; CHECK-NEXT: lsls r2, r2, #16
+; CHECK-NEXT: str r2, [r1, #52]
+; CHECK-NEXT: lsls r2, r5, #16
; CHECK-NEXT: str r2, [r1, #48]
; CHECK-NEXT: adds r1, #64
; CHECK-NEXT: adds r0, #32
@@ -102,37 +111,42 @@ define void @arm_q15_to_q31(ptr nocapture noundef readonly %pSrc, ptr nocapture
; CHECK-NEXT: ands r7, r2
; CHECK-NEXT: beq .LBB0_10
; CHECK-NEXT: @ %bb.7: @ %while.body12
-; CHECK-NEXT: ldrh r2, [r0]
+; CHECK-NEXT: movs r2, #0
+; CHECK-NEXT: ldrsh r2, [r0, r2]
; CHECK-NEXT: lsls r2, r2, #16
; CHECK-NEXT: str r2, [r1]
; CHECK-NEXT: cmp r7, #1
; CHECK-NEXT: beq .LBB0_10
; CHECK-NEXT: @ %bb.8: @ %while.body12.1
-; CHECK-NEXT: ldrh r2, [r0, #2]
+; CHECK-NEXT: movs r2, #2
+; CHECK-NEXT: ldrsh r2, [r0, r2]
; CHECK-NEXT: lsls r2, r2, #16
; CHECK-NEXT: str r2, [r1, #4]
; CHECK-NEXT: cmp r7, #2
; CHECK-NEXT: beq .LBB0_10
; CHECK-NEXT: @ %bb.9: @ %while.body12.2
-; CHECK-NEXT: ldrh r0, [r0, #4]
+; CHECK-NEXT: movs r2, #4
+; CHECK-NEXT: ldrsh r0, [r0, r2]
; CHECK-NEXT: lsls r0, r0, #16
; CHECK-NEXT: str r0, [r1, #8]
; CHECK-NEXT: .LBB0_10: @ %while.end17
; CHECK-NEXT: add sp, #8
; CHECK-NEXT: pop {r4, r5, r6, r7, pc}
; CHECK-NEXT: .LBB0_11: @ %while.body.prol.1
-; CHECK-NEXT: ldrh r2, [r0, #8]
-; CHECK-NEXT: ldrh r4, [r0, #10]
-; CHECK-NEXT: ldrh r6, [r0, #12]
-; CHECK-NEXT: ldrh r7, [r0, #14]
-; CHECK-NEXT: lsls r7, r7, #16
-; CHECK-NEXT: lsls r6, r6, #16
+; CHECK-NEXT: movs r2, #10
+; CHECK-NEXT: ldrsh r2, [r0, r2]
+; CHECK-NEXT: movs r4, #14
+; CHECK-NEXT: ldrsh r4, [r0, r4]
; CHECK-NEXT: lsls r4, r4, #16
+; CHECK-NEXT: ldrh r6, [r0, #8]
+; CHECK-NEXT: ldrh r7, [r0, #12]
+; CHECK-NEXT: lsls r7, r7, #16
; CHECK-NEXT: lsls r2, r2, #16
-; CHECK-NEXT: str r2, [r1, #16]
-; CHECK-NEXT: str r4, [r1, #20]
-; CHECK-NEXT: str r6, [r1, #24]
-; CHECK-NEXT: str r7, [r1, #28]
+; CHECK-NEXT: lsls r6, r6, #16
+; CHECK-NEXT: str r6, [r1, #16]
+; CHECK-NEXT: str r2, [r1, #20]
+; CHECK-NEXT: str r7, [r1, #24]
+; CHECK-NEXT: str r4, [r1, #28]
; CHECK-NEXT: cmp r5, #2
; CHECK-NEXT: bne .LBB0_13
; CHECK-NEXT: @ %bb.12:
@@ -141,17 +155,20 @@ define void @arm_q15_to_q31(ptr nocapture noundef readonly %pSrc, ptr nocapture
; CHECK-NEXT: adds r0, #16
; CHECK-NEXT: b .LBB0_14
; CHECK-NEXT: .LBB0_13: @ %while.body.prol.2
-; CHECK-NEXT: ldrh r2, [r0, #16]
-; CHECK-NEXT: ldrh r4, [r0, #18]
-; CHECK-NEXT: ldrh r5, [r0, #20]
-; CHECK-NEXT: ldrh r6, [r0, #22]
-; CHECK-NEXT: lsls r6, r6, #16
-; CHECK-NEXT: lsls r5, r5, #16
+; CHECK-NEXT: movs r2, #18
+; CHECK-NEXT: ldrsh r2, [r0, r2]
+; CHECK-NEXT: movs r4, #22
+; CHECK-NEXT: ldrsh r4, [r0, r4]
; CHECK-NEXT: lsls r4, r4, #16
+; CHECK-NEXT: ldrh r5, [r0, #16]
+; CHECK-NEXT: ldrh r6, [r0, #20]
+; CHECK-NEXT: lsls r6, r6, #16
; CHECK-NEXT: lsls r2, r2, #16
-; CHECK-NEXT: mov r7, r1
-; CHECK-NEXT: adds r7, #32
-; CHECK-NEXT: stm r7!, {r2, r4, r5, r6}
+; CHECK-NEXT: lsls r5, r5, #16
+; CHECK-NEXT: str r5, [r1, #32]
+; CHECK-NEXT: str r2, [r1, #36]
+; CHECK-NEXT: str r6, [r1, #40]
+; CHECK-NEXT: str r4, [r1, #44]
; CHECK-NEXT: subs r3, r3, #3
; CHECK-NEXT: adds r1, #48
; CHECK-NEXT: adds r0, #24
@@ -427,18 +444,19 @@ define void @arm_q15_to_q31_altorder(ptr nocapture noundef readonly %pSrc, ptr n
; CHECK-NEXT: @ %bb.2: @ %while.body.prol
; CHECK-NEXT: str r2, [sp] @ 4-byte Spill
; CHECK-NEXT: str r7, [sp, #4] @ 4-byte Spill
-; CHECK-NEXT: ldrh r2, [r0]
-; CHECK-NEXT: ldrh r7, [r0, #2]
-; CHECK-NEXT: ldrh r4, [r0, #4]
-; CHECK-NEXT: ldrh r6, [r0, #6]
+; CHECK-NEXT: movs r6, #2
+; CHECK-NEXT: ldrsh r6, [r0, r6]
+; CHECK-NEXT: movs r7, #6
+; CHECK-NEXT: ldrsh r7, [r0, r7]
+; CHECK-NEXT: lsls r2, r7, #16
+; CHECK-NEXT: ldrh r4, [r0]
+; CHECK-NEXT: ldrh r7, [r0, #4]
+; CHECK-NEXT: lsls r7, r7, #16
; CHECK-NEXT: lsls r6, r6, #16
; CHECK-NEXT: lsls r4, r4, #16
-; CHECK-NEXT: lsls r7, r7, #16
-; CHECK-NEXT: lsls r2, r2, #16
-; CHECK-NEXT: stm r1!, {r2, r7}
-; CHECK-NEXT: str r4, [r1]
-; CHECK-NEXT: str r6, [r1, #4]
-; CHECK-NEXT: subs r1, #8
+; CHECK-NEXT: stm r1!, {r4, r6, r7}
+; CHECK-NEXT: str r2, [r1]
+; CHECK-NEXT: subs r1, #12
; CHECK-NEXT: cmp r5, #1
; CHECK-NEXT: bne .LBB1_11
; CHECK-NEXT: @ %bb.3:
@@ -452,53 +470,61 @@ define void @arm_q15_to_q31_altorder(ptr nocapture noundef readonly %pSrc, ptr n
; CHECK-NEXT: blo .LBB1_6
; CHECK-NEXT: .LBB1_5: @ %while.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: ldrh r2, [r0]
-; CHECK-NEXT: ldrh r4, [r0, #2]
-; CHECK-NEXT: ldrh r5, [r0, #4]
-; CHECK-NEXT: ldrh r6, [r0, #6]
-; CHECK-NEXT: lsls r6, r6, #16
-; CHECK-NEXT: str r6, [r1, #12]
-; CHECK-NEXT: lsls r5, r5, #16
-; CHECK-NEXT: str r5, [r1, #8]
+; CHECK-NEXT: movs r2, #2
+; CHECK-NEXT: ldrsh r2, [r0, r2]
+; CHECK-NEXT: movs r4, #6
+; CHECK-NEXT: ldrsh r4, [r0, r4]
; CHECK-NEXT: lsls r4, r4, #16
-; CHECK-NEXT: str r4, [r1, #4]
+; CHECK-NEXT: ldrh r5, [r0]
+; CHECK-NEXT: ldrh r6, [r0, #4]
+; CHECK-NEXT: str r4, [r1, #12]
+; CHECK-NEXT: lsls r4, r6, #16
+; CHECK-NEXT: str r4, [r1, #8]
; CHECK-NEXT: lsls r2, r2, #16
+; CHECK-NEXT: str r2, [r1, #4]
+; CHECK-NEXT: lsls r2, r5, #16
; CHECK-NEXT: str r2, [r1]
-; CHECK-NEXT: ldrh r2, [r0, #8]
-; CHECK-NEXT: ldrh r4, [r0, #10]
-; CHECK-NEXT: ldrh r5, [r0, #12]
-; CHECK-NEXT: ldrh r6, [r0, #14]
-; CHECK-NEXT: lsls r6, r6, #16
-; CHECK-NEXT: str r6, [r1, #28]
-; CHECK-NEXT: lsls r5, r5, #16
-; CHECK-NEXT: str r5, [r1, #24]
+; CHECK-NEXT: movs r2, #10
+; CHECK-NEXT: ldrsh r2, [r0, r2]
+; CHECK-NEXT: movs r4, #14
+; CHECK-NEXT: ldrsh r4, [r0, r4]
; CHECK-NEXT: lsls r4, r4, #16
-; CHECK-NEXT: str r4, [r1, #20]
+; CHECK-NEXT: ldrh r5, [r0, #8]
+; CHECK-NEXT: ldrh r6, [r0, #12]
+; CHECK-NEXT: str r4, [r1, #28]
+; CHECK-NEXT: lsls r4, r6, #16
+; CHECK-NEXT: str r4, [r1, #24]
; CHECK-NEXT: lsls r2, r2, #16
+; CHECK-NEXT: str r2, [r1, #20]
+; CHECK-NEXT: lsls r2, r5, #16
; CHECK-NEXT: str r2, [r1, #16]
-; CHECK-NEXT: ldrh r2, [r0, #16]
-; CHECK-NEXT: ldrh r4, [r0, #18]
-; CHECK-NEXT: ldrh r5, [r0, #20]
-; CHECK-NEXT: ldrh r6, [r0, #22]
-; CHECK-NEXT: lsls r6, r6, #16
-; CHECK-NEXT: str r6, [r1, #44]
-; CHECK-NEXT: lsls r5, r5, #16
-; CHECK-NEXT: str r5, [r1, #40]
+; CHECK-NEXT: movs r2, #18
+; CHECK-NEXT: ldrsh r2, [r0, r2]
+; CHECK-NEXT: movs r4, #22
+; CHECK-NEXT: ldrsh r4, [r0, r4]
; CHECK-NEXT: lsls r4, r4, #16
-; CHECK-NEXT: str r4, [r1, #36]
+; CHECK-NEXT: ldrh r5, [r0, #16]
+; CHECK-NEXT: ldrh r6, [r0, #20]
+; CHECK-NEXT: str r4, [r1, #44]
+; CHECK-NEXT: lsls r4, r6, #16
+; CHECK-NEXT: str r4, [r1, #40]
; CHECK-NEXT: lsls r2, r2, #16
+; CHECK-NEXT: str r2, [r1, #36]
+; CHECK-NEXT: lsls r2, r5, #16
; CHECK-NEXT: str r2, [r1, #32]
-; CHECK-NEXT: ldrh r2, [r0, #24]
-; CHECK-NEXT: ldrh r4, [r0, #26]
-; CHECK-NEXT: ldrh r5, [r0, #28]
-; CHECK-NEXT: ldrh r6, [r0, #30]
-; CHECK-NEXT: lsls r6, r6, #16
-; CHECK-NEXT: str r6, [r1, #60]
-; CHECK-NEXT: lsls r5, r5, #16
-; CHECK-NEXT: str r5, [r1, #56]
+; CHECK-NEXT: movs r2, #26
+; CHECK-NEXT: ldrsh r2, [r0, r2]
+; CHECK-NEXT: movs r4, #30
+; CHECK-NEXT: ldrsh r4, [r0, r4]
; CHECK-NEXT: lsls r4, r4, #16
-; CHECK-NEXT: str r4, [r1, #52]
+; CHECK-NEXT: ldrh r5, [r0, #24]
+; CHECK-NEXT: ldrh r6, [r0, #28]
+; CHECK-NEXT: str r4, [r1, #60]
+; CHECK-NEXT: lsls r4, r6, #16
+; CHECK-NEXT: str r4, [r1, #56]
; CHECK-NEXT: lsls r2, r2, #16
+; CHECK-NEXT: str r2, [r1, #52]
+; CHECK-NEXT: lsls r2, r5, #16
; CHECK-NEXT: str r2, [r1, #48]
; CHECK-NEXT: adds r1, #64
; CHECK-NEXT: subs r3, r3, #4
@@ -510,37 +536,42 @@ define void @arm_q15_to_q31_altorder(ptr nocapture noundef readonly %pSrc, ptr n
; CHECK-NEXT: ands r7, r2
; CHECK-NEXT: beq .LBB1_10
; CHECK-NEXT: @ %bb.7: @ %while.body12
-; CHECK-NEXT: ldrh r2, [r0]
+; CHECK-NEXT: movs r2, #0
+; CHECK-NEXT: ldrsh r2, [r0, r2]
; CHECK-NEXT: lsls r2, r2, #16
; CHECK-NEXT: str r2, [r1]
; CHECK-NEXT: cmp r7, #1
; CHECK-NEXT: beq .LBB1_10
; CHECK-NEXT: @ %bb.8: @ %while.body12.1
-; CHECK-NEXT: ldrh r2, [r0, #2]
+; CHECK-NEXT: movs r2, #2
+; CHECK-NEXT: ldrsh r2, [r0, r2]
; CHECK-NEXT: lsls r2, r2, #16
; CHECK-NEXT: str r2, [r1, #4]
; CHECK-NEXT: cmp r7, #2
; CHECK-NEXT: beq .LBB1_10
; CHECK-NEXT: @ %bb.9: @ %while.body12.2
-; CHECK-NEXT: ldrh r0, [r0, #4]
+; CHECK-NEXT: movs r2, #4
+; CHECK-NEXT: ldrsh r0, [r0, r2]
; CHECK-NEXT: lsls r0, r0, #16
; CHECK-NEXT: str r0, [r1, #8]
; CHECK-NEXT: .LBB1_10: @ %while.end17
; CHECK-NEXT: add sp, #8
; CHECK-NEXT: pop {r4, r5, r6, r7, pc}
; CHECK-NEXT: .LBB1_11: @ %while.body.prol.1
-; CHECK-NEXT: ldrh r2, [r0, #8]
-; CHECK-NEXT: ldrh r4, [r0, #10]
-; CHECK-NEXT: ldrh r6, [r0, #12]
-; CHECK-NEXT: ldrh r7, [r0, #14]
-; CHECK-NEXT: lsls r7, r7, #16
-; CHECK-NEXT: lsls r6, r6, #16
+; CHECK-NEXT: movs r2, #10
+; CHECK-NEXT: ldrsh r2, [r0, r2]
+; CHECK-NEXT: movs r4, #14
+; CHECK-NEXT: ldrsh r4, [r0, r4]
; CHECK-NEXT: lsls r4, r4, #16
+; CHECK-NEXT: ldrh r6, [r0, #8]
+; CHECK-NEXT: ldrh r7, [r0, #12]
+; CHECK-NEXT: lsls r7, r7, #16
; CHECK-NEXT: lsls r2, r2, #16
-; CHECK-NEXT: str r2, [r1, #16]
-; CHECK-NEXT: str r4, [r1, #20]
-; CHECK-NEXT: str r6, [r1, #24]
-; CHECK-NEXT: str r7, [r1, #28]
+; CHECK-NEXT: lsls r6, r6, #16
+; CHECK-NEXT: str r6, [r1, #16]
+; CHECK-NEXT: str r2, [r1, #20]
+; CHECK-NEXT: str r7, [r1, #24]
+; CHECK-NEXT: str r4, [r1, #28]
; CHECK-NEXT: cmp r5, #2
; CHECK-NEXT: bne .LBB1_13
; CHECK-NEXT: @ %bb.12:
@@ -549,17 +580,20 @@ define void @arm_q15_to_q31_altorder(ptr nocapture noundef readonly %pSrc, ptr n
; CHECK-NEXT: adds r0, #16
; CHECK-NEXT: b .LBB1_14
; CHECK-NEXT: .LBB1_13: @ %while.body.prol.2
-; CHECK-NEXT: ldrh r2, [r0, #16]
-; CHECK-NEXT: ldrh r4, [r0, #18]
-; CHECK-NEXT: ldrh r5, [r0, #20]
-; CHECK-NEXT: ldrh r6, [r0, #22]
-; CHECK-NEXT: lsls r6, r6, #16
-; CHECK-NEXT: lsls r5, r5, #16
+; CHECK-NEXT: movs r2, #18
+; CHECK-NEXT: ldrsh r2, [r0, r2]
+; CHECK-NEXT: movs r4, #22
+; CHECK-NEXT: ldrsh r4, [r0, r4]
; CHECK-NEXT: lsls r4, r4, #16
+; CHECK-NEXT: ldrh r5, [r0, #16]
+; CHECK-NEXT: ldrh r6, [r0, #20]
+; CHECK-NEXT: lsls r6, r6, #16
; CHECK-NEXT: lsls r2, r2, #16
-; CHECK-NEXT: mov r7, r1
-; CHECK-NEXT: adds r7, #32
-; CHECK-NEXT: stm r7!, {r2, r4, r5, r6}
+; CHECK-NEXT: lsls r5, r5, #16
+; CHECK-NEXT: str r5, [r1, #32]
+; CHECK-NEXT: str r2, [r1, #36]
+; CHECK-NEXT: str r6, [r1, #40]
+; CHECK-NEXT: str r4, [r1, #44]
; CHECK-NEXT: subs r3, r3, #3
; CHECK-NEXT: adds r1, #48
; CHECK-NEXT: adds r0, #24
diff --git a/llvm/test/CodeGen/X86/fp128-cast.ll b/llvm/test/CodeGen/X86/fp128-cast.ll
index b39533c0ee21c29..bacf8d293d12bc8 100644
--- a/llvm/test/CodeGen/X86/fp128-cast.ll
+++ b/llvm/test/CodeGen/X86/fp128-cast.ll
@@ -1221,14 +1221,14 @@ define fp128 @TestPair128(i64 %a, i64 %b) nounwind {
; X32-NEXT: movl {{[0-9]+}}(%esp), %edx
; X32-NEXT: movl {{[0-9]+}}(%esp), %esi
; X32-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X32-NEXT: addl $3, %ecx
-; X32-NEXT: adcl $0, %edx
-; X32-NEXT: adcl $0, %esi
+; X32-NEXT: addl $3, %esi
; X32-NEXT: adcl $0, %edi
-; X32-NEXT: movl %esi, 8(%eax)
-; X32-NEXT: movl %edx, 4(%eax)
-; X32-NEXT: movl %ecx, (%eax)
-; X32-NEXT: movl %edi, 12(%eax)
+; X32-NEXT: adcl $0, %ecx
+; X32-NEXT: adcl $0, %edx
+; X32-NEXT: movl %ecx, 8(%eax)
+; X32-NEXT: movl %edi, 4(%eax)
+; X32-NEXT: movl %esi, (%eax)
+; X32-NEXT: movl %edx, 12(%eax)
; X32-NEXT: popl %esi
; X32-NEXT: popl %edi
; X32-NEXT: retl $4
diff --git a/llvm/test/CodeGen/X86/parity.ll b/llvm/test/CodeGen/X86/parity.ll
index cada55919c8cea2..1d546b14154da94 100644
--- a/llvm/test/CodeGen/X86/parity.ll
+++ b/llvm/test/CodeGen/X86/parity.ll
@@ -404,21 +404,46 @@ define i16 @parity_16_mask15(i16 %x) {
define i16 @parity_16_shift(i16 %0) {
; X86-NOPOPCNT-LABEL: parity_16_shift:
; X86-NOPOPCNT: # %bb.0:
-; X86-NOPOPCNT-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NOPOPCNT-NEXT: xorl %eax, %eax
-; X86-NOPOPCNT-NEXT: xorb %ch, %cl
-; X86-NOPOPCNT-NEXT: setnp %al
+; X86-NOPOPCNT-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NOPOPCNT-NEXT: movl %eax, %ecx
+; X86-NOPOPCNT-NEXT: shrl %ecx
+; X86-NOPOPCNT-NEXT: andl $21845, %ecx # imm = 0x5555
+; X86-NOPOPCNT-NEXT: subl %ecx, %eax
+; X86-NOPOPCNT-NEXT: movl %eax, %ecx
+; X86-NOPOPCNT-NEXT: andl $13107, %ecx # imm = 0x3333
+; X86-NOPOPCNT-NEXT: shrl $2, %eax
+; X86-NOPOPCNT-NEXT: andl $13107, %eax # imm = 0x3333
+; X86-NOPOPCNT-NEXT: addl %ecx, %eax
+; X86-NOPOPCNT-NEXT: movl %eax, %ecx
+; X86-NOPOPCNT-NEXT: shrl $4, %ecx
+; X86-NOPOPCNT-NEXT: addl %eax, %ecx
+; X86-NOPOPCNT-NEXT: movl %ecx, %eax
+; X86-NOPOPCNT-NEXT: shrl $8, %eax
+; X86-NOPOPCNT-NEXT: addl %ecx, %eax
; X86-NOPOPCNT-NEXT: addl %eax, %eax
+; X86-NOPOPCNT-NEXT: andl $2, %eax
; X86-NOPOPCNT-NEXT: # kill: def $ax killed $ax killed $eax
; X86-NOPOPCNT-NEXT: retl
;
; X64-NOPOPCNT-LABEL: parity_16_shift:
; X64-NOPOPCNT: # %bb.0:
-; X64-NOPOPCNT-NEXT: movl %edi, %ecx
-; X64-NOPOPCNT-NEXT: xorl %eax, %eax
-; X64-NOPOPCNT-NEXT: xorb %ch, %cl
-; X64-NOPOPCNT-NEXT: setnp %al
+; X64-NOPOPCNT-NEXT: movl %edi, %eax
+; X64-NOPOPCNT-NEXT: shrl %eax
+; X64-NOPOPCNT-NEXT: andl $21845, %eax # imm = 0x5555
+; X64-NOPOPCNT-NEXT: subl %eax, %edi
+; X64-NOPOPCNT-NEXT: movl %edi, %eax
+; X64-NOPOPCNT-NEXT: andl $13107, %eax # imm = 0x3333
+; X64-NOPOPCNT-NEXT: shrl $2, %edi
+; X64-NOPOPCNT-NEXT: andl $13107, %edi # imm = 0x3333
+; X64-NOPOPCNT-NEXT: addl %edi, %eax
+; X64-NOPOPCNT-NEXT: movl %eax, %ecx
+; X64-NOPOPCNT-NEXT: shrl $4, %ecx
+; X64-NOPOPCNT-NEXT: addl %eax, %ecx
+; X64-NOPOPCNT-NEXT: movl %ecx, %eax
+; X64-NOPOPCNT-NEXT: shrl $8, %eax
+; X64-NOPOPCNT-NEXT: addl %ecx, %eax
; X64-NOPOPCNT-NEXT: addl %eax, %eax
+; X64-NOPOPCNT-NEXT: andl $2, %eax
; X64-NOPOPCNT-NEXT: # kill: def $ax killed $ax killed $eax
; X64-NOPOPCNT-NEXT: retq
;
@@ -426,8 +451,8 @@ define i16 @parity_16_shift(i16 %0) {
; X86-POPCNT: # %bb.0:
; X86-POPCNT-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; X86-POPCNT-NEXT: popcntl %eax, %eax
-; X86-POPCNT-NEXT: andl $1, %eax
; X86-POPCNT-NEXT: addl %eax, %eax
+; X86-POPCNT-NEXT: andl $2, %eax
; X86-POPCNT-NEXT: # kill: def $ax killed $ax killed $eax
; X86-POPCNT-NEXT: retl
;
@@ -435,8 +460,8 @@ define i16 @parity_16_shift(i16 %0) {
; X64-POPCNT: # %bb.0:
; X64-POPCNT-NEXT: movzwl %di, %eax
; X64-POPCNT-NEXT: popcntl %eax, %eax
-; X64-POPCNT-NEXT: andl $1, %eax
; X64-POPCNT-NEXT: addl %eax, %eax
+; X64-POPCNT-NEXT: andl $2, %eax
; X64-POPCNT-NEXT: # kill: def $ax killed $ax killed $eax
; X64-POPCNT-NEXT: retq
%2 = tail call i16 @llvm.ctpop.i16(i16 %0)
@@ -510,37 +535,55 @@ define i32 @parity_32_shift(i32 %0) {
; X86-NOPOPCNT: # %bb.0:
; X86-NOPOPCNT-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NOPOPCNT-NEXT: movl %eax, %ecx
-; X86-NOPOPCNT-NEXT: shrl $16, %ecx
-; X86-NOPOPCNT-NEXT: xorl %eax, %ecx
-; X86-NOPOPCNT-NEXT: xorl %eax, %eax
-; X86-NOPOPCNT-NEXT: xorb %ch, %cl
-; X86-NOPOPCNT-NEXT: setnp %al
-; X86-NOPOPCNT-NEXT: addl %eax, %eax
+; X86-NOPOPCNT-NEXT: shrl %ecx
+; X86-NOPOPCNT-NEXT: andl $1431655765, %ecx # imm = 0x55555555
+; X86-NOPOPCNT-NEXT: subl %ecx, %eax
+; X86-NOPOPCNT-NEXT: movl %eax, %ecx
+; X86-NOPOPCNT-NEXT: andl $858993459, %ecx # imm = 0x33333333
+; X86-NOPOPCNT-NEXT: shrl $2, %eax
+; X86-NOPOPCNT-NEXT: andl $858993459, %eax # imm = 0x33333333
+; X86-NOPOPCNT-NEXT: addl %ecx, %eax
+; X86-NOPOPCNT-NEXT: movl %eax, %ecx
+; X86-NOPOPCNT-NEXT: shrl $4, %ecx
+; X86-NOPOPCNT-NEXT: addl %eax, %ecx
+; X86-NOPOPCNT-NEXT: andl $17764111, %ecx # imm = 0x10F0F0F
+; X86-NOPOPCNT-NEXT: imull $16843009, %ecx, %eax # imm = 0x1010101
+; X86-NOPOPCNT-NEXT: shrl $23, %eax
+; X86-NOPOPCNT-NEXT: andl $2, %eax
; X86-NOPOPCNT-NEXT: retl
;
; X64-NOPOPCNT-LABEL: parity_32_shift:
; X64-NOPOPCNT: # %bb.0:
-; X64-NOPOPCNT-NEXT: movl %edi, %ecx
-; X64-NOPOPCNT-NEXT: shrl $16, %ecx
-; X64-NOPOPCNT-NEXT: xorl %edi, %ecx
-; X64-NOPOPCNT-NEXT: xorl %eax, %eax
-; X64-NOPOPCNT-NEXT: xorb %ch, %cl
-; X64-NOPOPCNT-NEXT: setnp %al
-; X64-NOPOPCNT-NEXT: addl %eax, %eax
+; X64-NOPOPCNT-NEXT: movl %edi, %eax
+; X64-NOPOPCNT-NEXT: shrl %eax
+; X64-NOPOPCNT-NEXT: andl $1431655765, %eax # imm = 0x55555555
+; X64-NOPOPCNT-NEXT: subl %eax, %edi
+; X64-NOPOPCNT-NEXT: movl %edi, %eax
+; X64-NOPOPCNT-NEXT: andl $858993459, %eax # imm = 0x33333333
+; X64-NOPOPCNT-NEXT: shrl $2, %edi
+; X64-NOPOPCNT-NEXT: andl $858993459, %edi # imm = 0x33333333
+; X64-NOPOPCNT-NEXT: addl %eax, %edi
+; X64-NOPOPCNT-NEXT: movl %edi, %eax
+; X64-NOPOPCNT-NEXT: shrl $4, %eax
+; X64-NOPOPCNT-NEXT: addl %edi, %eax
+; X64-NOPOPCNT-NEXT: andl $17764111, %eax # imm = 0x10F0F0F
+; X64-NOPOPCNT-NEXT: imull $16843009, %eax, %eax # imm = 0x1010101
+; X64-NOPOPCNT-NEXT: shrl $23, %eax
+; X64-NOPOPCNT-NEXT: andl $2, %eax
; X64-NOPOPCNT-NEXT: retq
;
; X86-POPCNT-LABEL: parity_32_shift:
; X86-POPCNT: # %bb.0:
; X86-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %eax
-; X86-POPCNT-NEXT: andl $1, %eax
; X86-POPCNT-NEXT: addl %eax, %eax
+; X86-POPCNT-NEXT: andl $2, %eax
; X86-POPCNT-NEXT: retl
;
; X64-POPCNT-LABEL: parity_32_shift:
; X64-POPCNT: # %bb.0:
; X64-POPCNT-NEXT: popcntl %edi, %eax
-; X64-POPCNT-NEXT: andl $1, %eax
; X64-POPCNT-NEXT: addl %eax, %eax
+; X64-POPCNT-NEXT: andl $2, %eax
; X64-POPCNT-NEXT: retq
%2 = tail call i32 @llvm.ctpop.i32(i32 %0)
%3 = shl nuw nsw i32 %2, 1
@@ -615,14 +658,22 @@ define i64 @parity_64_shift(i64 %0) {
; X86-NOPOPCNT-LABEL: parity_64_shift:
; X86-NOPOPCNT: # %bb.0:
; X86-NOPOPCNT-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NOPOPCNT-NEXT: xorl {{[0-9]+}}(%esp), %eax
-; X86-NOPOPCNT-NEXT: movl %eax, %ecx
-; X86-NOPOPCNT-NEXT: shrl $16, %ecx
-; X86-NOPOPCNT-NEXT: xorl %eax, %ecx
+; X86-NOPOPCNT-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NOPOPCNT-NEXT: movl %ecx, %edx
+; X86-NOPOPCNT-NEXT: shrl $16, %edx
+; X86-NOPOPCNT-NEXT: xorl %ecx, %edx
+; X86-NOPOPCNT-NEXT: xorl %ecx, %ecx
+; X86-NOPOPCNT-NEXT: xorb %dh, %dl
+; X86-NOPOPCNT-NEXT: setnp %cl
+; X86-NOPOPCNT-NEXT: movl %eax, %edx
+; X86-NOPOPCNT-NEXT: shrl $16, %edx
+; X86-NOPOPCNT-NEXT: xorl %eax, %edx
; X86-NOPOPCNT-NEXT: xorl %eax, %eax
-; X86-NOPOPCNT-NEXT: xorb %ch, %cl
+; X86-NOPOPCNT-NEXT: xorb %dh, %dl
; X86-NOPOPCNT-NEXT: setnp %al
+; X86-NOPOPCNT-NEXT: addl %ecx, %eax
; X86-NOPOPCNT-NEXT: addl %eax, %eax
+; X86-NOPOPCNT-NEXT: andl $2, %eax
; X86-NOPOPCNT-NEXT: xorl %edx, %edx
; X86-NOPOPCNT-NEXT: retl
;
@@ -637,16 +688,16 @@ define i64 @parity_64_shift(i64 %0) {
; X64-NOPOPCNT-NEXT: xorl %eax, %eax
; X64-NOPOPCNT-NEXT: xorb %ch, %cl
; X64-NOPOPCNT-NEXT: setnp %al
-; X64-NOPOPCNT-NEXT: addq %rax, %rax
+; X64-NOPOPCNT-NEXT: addl %eax, %eax
; X64-NOPOPCNT-NEXT: retq
;
; X86-POPCNT-LABEL: parity_64_shift:
; X86-POPCNT: # %bb.0:
-; X86-POPCNT-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-POPCNT-NEXT: xorl {{[0-9]+}}(%esp), %eax
-; X86-POPCNT-NEXT: popcntl %eax, %eax
-; X86-POPCNT-NEXT: andl $1, %eax
+; X86-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %ecx
+; X86-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %eax
+; X86-POPCNT-NEXT: addl %ecx, %eax
; X86-POPCNT-NEXT: addl %eax, %eax
+; X86-POPCNT-NEXT: andl $2, %eax
; X86-POPCNT-NEXT: xorl %edx, %edx
; X86-POPCNT-NEXT: retl
;
@@ -654,7 +705,7 @@ define i64 @parity_64_shift(i64 %0) {
; X64-POPCNT: # %bb.0:
; X64-POPCNT-NEXT: popcntq %rdi, %rax
; X64-POPCNT-NEXT: andl $1, %eax
-; X64-POPCNT-NEXT: addq %rax, %rax
+; X64-POPCNT-NEXT: addl %eax, %eax
; X64-POPCNT-NEXT: retq
%2 = tail call i64 @llvm.ctpop.i64(i64 %0)
%3 = shl nuw nsw i64 %2, 1
diff --git a/llvm/test/CodeGen/X86/pr69965.ll b/llvm/test/CodeGen/X86/pr69965.ll
index fc805e5097c0b85..6fdf7551821d75c 100644
--- a/llvm/test/CodeGen/X86/pr69965.ll
+++ b/llvm/test/CodeGen/X86/pr69965.ll
@@ -7,26 +7,25 @@ define i16 @test(i8 %_in) {
; X86: # %bb.0:
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: notb %al
-; X86-NEXT: movl %eax, %ecx
-; X86-NEXT: orb $-128, %cl
-; X86-NEXT: movzbl %cl, %ecx
-; X86-NEXT: shll $8, %ecx
-; X86-NEXT: addb %al, %al
+; X86-NEXT: andb $127, %al
+; X86-NEXT: movzbl %al, %ecx
+; X86-NEXT: orb $-128, %al
; X86-NEXT: movzbl %al, %eax
-; X86-NEXT: orl %ecx, %eax
+; X86-NEXT: shll $8, %eax
+; X86-NEXT: leal (%eax,%ecx,2), %eax
; X86-NEXT: # kill: def $ax killed $ax killed $eax
; X86-NEXT: retl
;
; X64-LABEL: test:
; X64: # %bb.0:
+; X64-NEXT: # kill: def $edi killed $edi def $rdi
; X64-NEXT: notb %dil
-; X64-NEXT: movl %edi, %eax
-; X64-NEXT: orb $-128, %al
-; X64-NEXT: movzbl %al, %ecx
-; X64-NEXT: shll $8, %ecx
-; X64-NEXT: addb %dil, %dil
-; X64-NEXT: movzbl %dil, %eax
-; X64-NEXT: orl %ecx, %eax
+; X64-NEXT: andb $127, %dil
+; X64-NEXT: leal -128(%rdi), %eax
+; X64-NEXT: movzbl %al, %eax
+; X64-NEXT: shll $8, %eax
+; X64-NEXT: movzbl %dil, %ecx
+; X64-NEXT: leal (%rax,%rcx,2), %eax
; X64-NEXT: # kill: def $ax killed $ax killed $eax
; X64-NEXT: retq
%_1 = and i8 %_in, 127
diff --git a/llvm/test/CodeGen/X86/setcc.ll b/llvm/test/CodeGen/X86/setcc.ll
index c38318d5f6a253e..29b597670b6f768 100644
--- a/llvm/test/CodeGen/X86/setcc.ll
+++ b/llvm/test/CodeGen/X86/setcc.ll
@@ -76,19 +76,21 @@ define i64 @t3(i64 %x) nounwind readnone ssp {
define i32 @t4(i32 %a) {
; X86-LABEL: t4:
; X86: ## %bb.0:
-; X86-NEXT: movl L_v4$non_lazy_ptr, %ecx
-; X86-NEXT: xorl %eax, %eax
-; X86-NEXT: cmpl $1, (%ecx)
-; X86-NEXT: adcw $1, %ax
+; X86-NEXT: movl L_v4$non_lazy_ptr, %eax
+; X86-NEXT: xorl %ecx, %ecx
+; X86-NEXT: cmpl $1, (%eax)
+; X86-NEXT: adcw $1, %cx
+; X86-NEXT: movzwl %cx, %eax
; X86-NEXT: shll $16, %eax
; X86-NEXT: retl
;
; X64-LABEL: t4:
; X64: ## %bb.0:
-; X64-NEXT: movq _v4 at GOTPCREL(%rip), %rcx
-; X64-NEXT: xorl %eax, %eax
-; X64-NEXT: cmpl $1, (%rcx)
-; X64-NEXT: adcw $1, %ax
+; X64-NEXT: movq _v4 at GOTPCREL(%rip), %rax
+; X64-NEXT: xorl %ecx, %ecx
+; X64-NEXT: cmpl $1, (%rax)
+; X64-NEXT: adcw $1, %cx
+; X64-NEXT: movzwl %cx, %eax
; X64-NEXT: shll $16, %eax
; X64-NEXT: retq
%t0 = load i32, ptr @v4, align 4
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512bwvl.ll b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512bwvl.ll
index 8f46209689a1da4..63a6567867763dd 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512bwvl.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512bwvl.ll
@@ -151,7 +151,7 @@ define void @PR46178(ptr %0) {
define <8 x i32> @PR46393(<8 x i16> %a0, i8 %a1) {
; X86-LABEL: PR46393:
; X86: # %bb.0:
-; X86-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
+; X86-NEXT: vpmovsxwd %xmm0, %ymm0
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovd %eax, %k1
; X86-NEXT: vpslld $16, %ymm0, %ymm0 {%k1} {z}
@@ -159,7 +159,7 @@ define <8 x i32> @PR46393(<8 x i16> %a0, i8 %a1) {
;
; X64-LABEL: PR46393:
; X64: # %bb.0:
-; X64-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
+; X64-NEXT: vpmovsxwd %xmm0, %ymm0
; X64-NEXT: kmovd %edi, %k1
; X64-NEXT: vpslld $16, %ymm0, %ymm0 {%k1} {z}
; X64-NEXT: retq
>From fd8d5939bae153f0d4a48789643533688c856db1 Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev at redking.me.uk>
Date: Tue, 24 Oct 2023 14:31:38 +0100
Subject: [PATCH 2/2] [DAG] SimplifyDemandedBits - relax AND(CTPOP(X),1) ->
PARITY(X) fold to correctly demand known zero upper bits
If we demand the lowest bit and any mixture of the upper 'known zero' bits of a CTPOP node then we can still fold to a PARITY node as we still guarantee that the upper bits will be zero.
---
.../CodeGen/SelectionDAG/TargetLowering.cpp | 15 ++-
llvm/test/CodeGen/X86/parity.ll | 123 +++++-------------
2 files changed, 45 insertions(+), 93 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 75bd4f55b64b040..826f773409cd910 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -2263,13 +2263,16 @@ bool TargetLowering::SimplifyDemandedBits(
break;
}
case ISD::CTPOP: {
- // If only 1 bit is demanded, replace with PARITY as long as we're before
- // op legalization.
+ // If only bit0 of 'active bits' is demanded, replace with PARITY as long as
+ // we're before op legalization.
// FIXME: Limit to scalars for now.
- if (DemandedBits.isOne() && !TLO.LegalOps && !VT.isVector())
- return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::PARITY, dl, VT,
- Op.getOperand(0)));
-
+ if (!TLO.LegalOps && !VT.isVector()) {
+ APInt NonZeroMask =
+ APInt::getLowBitsSet(BitWidth, llvm::bit_width(BitWidth));
+ if ((DemandedBits & NonZeroMask).isOne())
+ return TLO.CombineTo(
+ Op, TLO.DAG.getNode(ISD::PARITY, dl, VT, Op.getOperand(0)));
+ }
Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
break;
}
diff --git a/llvm/test/CodeGen/X86/parity.ll b/llvm/test/CodeGen/X86/parity.ll
index 1d546b14154da94..cada55919c8cea2 100644
--- a/llvm/test/CodeGen/X86/parity.ll
+++ b/llvm/test/CodeGen/X86/parity.ll
@@ -404,46 +404,21 @@ define i16 @parity_16_mask15(i16 %x) {
define i16 @parity_16_shift(i16 %0) {
; X86-NOPOPCNT-LABEL: parity_16_shift:
; X86-NOPOPCNT: # %bb.0:
-; X86-NOPOPCNT-NEXT: movzwl {{[0-9]+}}(%esp), %eax
-; X86-NOPOPCNT-NEXT: movl %eax, %ecx
-; X86-NOPOPCNT-NEXT: shrl %ecx
-; X86-NOPOPCNT-NEXT: andl $21845, %ecx # imm = 0x5555
-; X86-NOPOPCNT-NEXT: subl %ecx, %eax
-; X86-NOPOPCNT-NEXT: movl %eax, %ecx
-; X86-NOPOPCNT-NEXT: andl $13107, %ecx # imm = 0x3333
-; X86-NOPOPCNT-NEXT: shrl $2, %eax
-; X86-NOPOPCNT-NEXT: andl $13107, %eax # imm = 0x3333
-; X86-NOPOPCNT-NEXT: addl %ecx, %eax
-; X86-NOPOPCNT-NEXT: movl %eax, %ecx
-; X86-NOPOPCNT-NEXT: shrl $4, %ecx
-; X86-NOPOPCNT-NEXT: addl %eax, %ecx
-; X86-NOPOPCNT-NEXT: movl %ecx, %eax
-; X86-NOPOPCNT-NEXT: shrl $8, %eax
-; X86-NOPOPCNT-NEXT: addl %ecx, %eax
+; X86-NOPOPCNT-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NOPOPCNT-NEXT: xorl %eax, %eax
+; X86-NOPOPCNT-NEXT: xorb %ch, %cl
+; X86-NOPOPCNT-NEXT: setnp %al
; X86-NOPOPCNT-NEXT: addl %eax, %eax
-; X86-NOPOPCNT-NEXT: andl $2, %eax
; X86-NOPOPCNT-NEXT: # kill: def $ax killed $ax killed $eax
; X86-NOPOPCNT-NEXT: retl
;
; X64-NOPOPCNT-LABEL: parity_16_shift:
; X64-NOPOPCNT: # %bb.0:
-; X64-NOPOPCNT-NEXT: movl %edi, %eax
-; X64-NOPOPCNT-NEXT: shrl %eax
-; X64-NOPOPCNT-NEXT: andl $21845, %eax # imm = 0x5555
-; X64-NOPOPCNT-NEXT: subl %eax, %edi
-; X64-NOPOPCNT-NEXT: movl %edi, %eax
-; X64-NOPOPCNT-NEXT: andl $13107, %eax # imm = 0x3333
-; X64-NOPOPCNT-NEXT: shrl $2, %edi
-; X64-NOPOPCNT-NEXT: andl $13107, %edi # imm = 0x3333
-; X64-NOPOPCNT-NEXT: addl %edi, %eax
-; X64-NOPOPCNT-NEXT: movl %eax, %ecx
-; X64-NOPOPCNT-NEXT: shrl $4, %ecx
-; X64-NOPOPCNT-NEXT: addl %eax, %ecx
-; X64-NOPOPCNT-NEXT: movl %ecx, %eax
-; X64-NOPOPCNT-NEXT: shrl $8, %eax
-; X64-NOPOPCNT-NEXT: addl %ecx, %eax
+; X64-NOPOPCNT-NEXT: movl %edi, %ecx
+; X64-NOPOPCNT-NEXT: xorl %eax, %eax
+; X64-NOPOPCNT-NEXT: xorb %ch, %cl
+; X64-NOPOPCNT-NEXT: setnp %al
; X64-NOPOPCNT-NEXT: addl %eax, %eax
-; X64-NOPOPCNT-NEXT: andl $2, %eax
; X64-NOPOPCNT-NEXT: # kill: def $ax killed $ax killed $eax
; X64-NOPOPCNT-NEXT: retq
;
@@ -451,8 +426,8 @@ define i16 @parity_16_shift(i16 %0) {
; X86-POPCNT: # %bb.0:
; X86-POPCNT-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; X86-POPCNT-NEXT: popcntl %eax, %eax
+; X86-POPCNT-NEXT: andl $1, %eax
; X86-POPCNT-NEXT: addl %eax, %eax
-; X86-POPCNT-NEXT: andl $2, %eax
; X86-POPCNT-NEXT: # kill: def $ax killed $ax killed $eax
; X86-POPCNT-NEXT: retl
;
@@ -460,8 +435,8 @@ define i16 @parity_16_shift(i16 %0) {
; X64-POPCNT: # %bb.0:
; X64-POPCNT-NEXT: movzwl %di, %eax
; X64-POPCNT-NEXT: popcntl %eax, %eax
+; X64-POPCNT-NEXT: andl $1, %eax
; X64-POPCNT-NEXT: addl %eax, %eax
-; X64-POPCNT-NEXT: andl $2, %eax
; X64-POPCNT-NEXT: # kill: def $ax killed $ax killed $eax
; X64-POPCNT-NEXT: retq
%2 = tail call i16 @llvm.ctpop.i16(i16 %0)
@@ -535,55 +510,37 @@ define i32 @parity_32_shift(i32 %0) {
; X86-NOPOPCNT: # %bb.0:
; X86-NOPOPCNT-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NOPOPCNT-NEXT: movl %eax, %ecx
-; X86-NOPOPCNT-NEXT: shrl %ecx
-; X86-NOPOPCNT-NEXT: andl $1431655765, %ecx # imm = 0x55555555
-; X86-NOPOPCNT-NEXT: subl %ecx, %eax
-; X86-NOPOPCNT-NEXT: movl %eax, %ecx
-; X86-NOPOPCNT-NEXT: andl $858993459, %ecx # imm = 0x33333333
-; X86-NOPOPCNT-NEXT: shrl $2, %eax
-; X86-NOPOPCNT-NEXT: andl $858993459, %eax # imm = 0x33333333
-; X86-NOPOPCNT-NEXT: addl %ecx, %eax
-; X86-NOPOPCNT-NEXT: movl %eax, %ecx
-; X86-NOPOPCNT-NEXT: shrl $4, %ecx
-; X86-NOPOPCNT-NEXT: addl %eax, %ecx
-; X86-NOPOPCNT-NEXT: andl $17764111, %ecx # imm = 0x10F0F0F
-; X86-NOPOPCNT-NEXT: imull $16843009, %ecx, %eax # imm = 0x1010101
-; X86-NOPOPCNT-NEXT: shrl $23, %eax
-; X86-NOPOPCNT-NEXT: andl $2, %eax
+; X86-NOPOPCNT-NEXT: shrl $16, %ecx
+; X86-NOPOPCNT-NEXT: xorl %eax, %ecx
+; X86-NOPOPCNT-NEXT: xorl %eax, %eax
+; X86-NOPOPCNT-NEXT: xorb %ch, %cl
+; X86-NOPOPCNT-NEXT: setnp %al
+; X86-NOPOPCNT-NEXT: addl %eax, %eax
; X86-NOPOPCNT-NEXT: retl
;
; X64-NOPOPCNT-LABEL: parity_32_shift:
; X64-NOPOPCNT: # %bb.0:
-; X64-NOPOPCNT-NEXT: movl %edi, %eax
-; X64-NOPOPCNT-NEXT: shrl %eax
-; X64-NOPOPCNT-NEXT: andl $1431655765, %eax # imm = 0x55555555
-; X64-NOPOPCNT-NEXT: subl %eax, %edi
-; X64-NOPOPCNT-NEXT: movl %edi, %eax
-; X64-NOPOPCNT-NEXT: andl $858993459, %eax # imm = 0x33333333
-; X64-NOPOPCNT-NEXT: shrl $2, %edi
-; X64-NOPOPCNT-NEXT: andl $858993459, %edi # imm = 0x33333333
-; X64-NOPOPCNT-NEXT: addl %eax, %edi
-; X64-NOPOPCNT-NEXT: movl %edi, %eax
-; X64-NOPOPCNT-NEXT: shrl $4, %eax
-; X64-NOPOPCNT-NEXT: addl %edi, %eax
-; X64-NOPOPCNT-NEXT: andl $17764111, %eax # imm = 0x10F0F0F
-; X64-NOPOPCNT-NEXT: imull $16843009, %eax, %eax # imm = 0x1010101
-; X64-NOPOPCNT-NEXT: shrl $23, %eax
-; X64-NOPOPCNT-NEXT: andl $2, %eax
+; X64-NOPOPCNT-NEXT: movl %edi, %ecx
+; X64-NOPOPCNT-NEXT: shrl $16, %ecx
+; X64-NOPOPCNT-NEXT: xorl %edi, %ecx
+; X64-NOPOPCNT-NEXT: xorl %eax, %eax
+; X64-NOPOPCNT-NEXT: xorb %ch, %cl
+; X64-NOPOPCNT-NEXT: setnp %al
+; X64-NOPOPCNT-NEXT: addl %eax, %eax
; X64-NOPOPCNT-NEXT: retq
;
; X86-POPCNT-LABEL: parity_32_shift:
; X86-POPCNT: # %bb.0:
; X86-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %eax
+; X86-POPCNT-NEXT: andl $1, %eax
; X86-POPCNT-NEXT: addl %eax, %eax
-; X86-POPCNT-NEXT: andl $2, %eax
; X86-POPCNT-NEXT: retl
;
; X64-POPCNT-LABEL: parity_32_shift:
; X64-POPCNT: # %bb.0:
; X64-POPCNT-NEXT: popcntl %edi, %eax
+; X64-POPCNT-NEXT: andl $1, %eax
; X64-POPCNT-NEXT: addl %eax, %eax
-; X64-POPCNT-NEXT: andl $2, %eax
; X64-POPCNT-NEXT: retq
%2 = tail call i32 @llvm.ctpop.i32(i32 %0)
%3 = shl nuw nsw i32 %2, 1
@@ -658,22 +615,14 @@ define i64 @parity_64_shift(i64 %0) {
; X86-NOPOPCNT-LABEL: parity_64_shift:
; X86-NOPOPCNT: # %bb.0:
; X86-NOPOPCNT-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NOPOPCNT-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NOPOPCNT-NEXT: movl %ecx, %edx
-; X86-NOPOPCNT-NEXT: shrl $16, %edx
-; X86-NOPOPCNT-NEXT: xorl %ecx, %edx
-; X86-NOPOPCNT-NEXT: xorl %ecx, %ecx
-; X86-NOPOPCNT-NEXT: xorb %dh, %dl
-; X86-NOPOPCNT-NEXT: setnp %cl
-; X86-NOPOPCNT-NEXT: movl %eax, %edx
-; X86-NOPOPCNT-NEXT: shrl $16, %edx
-; X86-NOPOPCNT-NEXT: xorl %eax, %edx
+; X86-NOPOPCNT-NEXT: xorl {{[0-9]+}}(%esp), %eax
+; X86-NOPOPCNT-NEXT: movl %eax, %ecx
+; X86-NOPOPCNT-NEXT: shrl $16, %ecx
+; X86-NOPOPCNT-NEXT: xorl %eax, %ecx
; X86-NOPOPCNT-NEXT: xorl %eax, %eax
-; X86-NOPOPCNT-NEXT: xorb %dh, %dl
+; X86-NOPOPCNT-NEXT: xorb %ch, %cl
; X86-NOPOPCNT-NEXT: setnp %al
-; X86-NOPOPCNT-NEXT: addl %ecx, %eax
; X86-NOPOPCNT-NEXT: addl %eax, %eax
-; X86-NOPOPCNT-NEXT: andl $2, %eax
; X86-NOPOPCNT-NEXT: xorl %edx, %edx
; X86-NOPOPCNT-NEXT: retl
;
@@ -688,16 +637,16 @@ define i64 @parity_64_shift(i64 %0) {
; X64-NOPOPCNT-NEXT: xorl %eax, %eax
; X64-NOPOPCNT-NEXT: xorb %ch, %cl
; X64-NOPOPCNT-NEXT: setnp %al
-; X64-NOPOPCNT-NEXT: addl %eax, %eax
+; X64-NOPOPCNT-NEXT: addq %rax, %rax
; X64-NOPOPCNT-NEXT: retq
;
; X86-POPCNT-LABEL: parity_64_shift:
; X86-POPCNT: # %bb.0:
-; X86-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %ecx
-; X86-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %eax
-; X86-POPCNT-NEXT: addl %ecx, %eax
+; X86-POPCNT-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-POPCNT-NEXT: xorl {{[0-9]+}}(%esp), %eax
+; X86-POPCNT-NEXT: popcntl %eax, %eax
+; X86-POPCNT-NEXT: andl $1, %eax
; X86-POPCNT-NEXT: addl %eax, %eax
-; X86-POPCNT-NEXT: andl $2, %eax
; X86-POPCNT-NEXT: xorl %edx, %edx
; X86-POPCNT-NEXT: retl
;
@@ -705,7 +654,7 @@ define i64 @parity_64_shift(i64 %0) {
; X64-POPCNT: # %bb.0:
; X64-POPCNT-NEXT: popcntq %rdi, %rax
; X64-POPCNT-NEXT: andl $1, %eax
-; X64-POPCNT-NEXT: addl %eax, %eax
+; X64-POPCNT-NEXT: addq %rax, %rax
; X64-POPCNT-NEXT: retq
%2 = tail call i64 @llvm.ctpop.i64(i64 %0)
%3 = shl nuw nsw i64 %2, 1
More information about the llvm-commits
mailing list