[llvm] [DAG] SimplifyDemandedBits - ensure we demand the high bits for shl nsw/nuw ops (PR #70041)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Oct 24 06:35:59 PDT 2023
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-aarch64
Author: Simon Pilgrim (RKSimon)
<details>
<summary>Changes</summary>
Matches InstCombinerImpl::SimplifyDemandedUseBits
Fixes #<!-- -->69965
---
Patch is 35.71 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/70041.diff
14 Files Affected:
- (modified) llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp (+21-10)
- (modified) llvm/test/CodeGen/AArch64/arm64-shifted-sext.ll (+3-2)
- (modified) llvm/test/CodeGen/AArch64/load-combine.ll (+4-4)
- (modified) llvm/test/CodeGen/AMDGPU/shl.ll (+1)
- (modified) llvm/test/CodeGen/AMDGPU/wwm-reserved-spill.ll (+2-8)
- (modified) llvm/test/CodeGen/PowerPC/pre-inc-disable.ll (+23-33)
- (modified) llvm/test/CodeGen/RISCV/rv64i-complex-float.ll (+1)
- (modified) llvm/test/CodeGen/RISCV/rvv/pr61561.ll (+3-4)
- (modified) llvm/test/CodeGen/RISCV/split-store.ll (+2)
- (modified) llvm/test/CodeGen/Thumb/arm_q15_to_q31.ll (+170-136)
- (modified) llvm/test/CodeGen/X86/fp128-cast.ll (+7-7)
- (modified) llvm/test/CodeGen/X86/pr69965.ll (+12-13)
- (modified) llvm/test/CodeGen/X86/setcc.ll (+10-8)
- (modified) llvm/test/CodeGen/X86/vector-shuffle-combining-avx512bwvl.ll (+2-2)
``````````diff
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 8b4f3159499122a..826f773409cd910 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -1785,14 +1785,22 @@ bool TargetLowering::SimplifyDemandedBits(
}
APInt InDemandedMask = DemandedBits.lshr(ShAmt);
+
+ // If the shift is NUW/NSW, then it does demand the high bits.
+ if (Op->getFlags().hasNoSignedWrap())
+ InDemandedMask.setHighBits(ShAmt + 1);
+ else if (Op->getFlags().hasNoUnsignedWrap())
+ InDemandedMask.setHighBits(ShAmt);
+
if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
Depth + 1))
return true;
assert(!Known.hasConflict() && "Bits known to be one AND zero?");
- Known.Zero <<= ShAmt;
- Known.One <<= ShAmt;
- // low bits known zero.
- Known.Zero.setLowBits(ShAmt);
+
+ Known = KnownBits::shl(Known,
+ KnownBits::makeConstant(APInt(BitWidth, ShAmt)),
+ /* NUW */ Op->getFlags().hasNoUnsignedWrap(),
+ /* NSW */ Op->getFlags().hasNoSignedWrap());
// Attempt to avoid multi-use ops if we don't need anything from them.
if (!InDemandedMask.isAllOnes() || !DemandedElts.isAllOnes()) {
@@ -2255,13 +2263,16 @@ bool TargetLowering::SimplifyDemandedBits(
break;
}
case ISD::CTPOP: {
- // If only 1 bit is demanded, replace with PARITY as long as we're before
- // op legalization.
+ // If only bit0 of 'active bits' is demanded, replace with PARITY as long as
+ // we're before op legalization.
// FIXME: Limit to scalars for now.
- if (DemandedBits.isOne() && !TLO.LegalOps && !VT.isVector())
- return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::PARITY, dl, VT,
- Op.getOperand(0)));
-
+ if (!TLO.LegalOps && !VT.isVector()) {
+ APInt NonZeroMask =
+ APInt::getLowBitsSet(BitWidth, llvm::bit_width(BitWidth));
+ if ((DemandedBits & NonZeroMask).isOne())
+ return TLO.CombineTo(
+ Op, TLO.DAG.getNode(ISD::PARITY, dl, VT, Op.getOperand(0)));
+ }
Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
break;
}
diff --git a/llvm/test/CodeGen/AArch64/arm64-shifted-sext.ll b/llvm/test/CodeGen/AArch64/arm64-shifted-sext.ll
index da6499b7daa82e5..240c96130d38549 100644
--- a/llvm/test/CodeGen/AArch64/arm64-shifted-sext.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-shifted-sext.ll
@@ -195,8 +195,9 @@ entry:
define i32 @extendedLeftShiftshortTointBy16(i16 signext %a) nounwind readnone ssp {
; CHECK-LABEL: extendedLeftShiftshortTointBy16:
; CHECK: ; %bb.0: ; %entry
-; CHECK-NEXT: lsl w8, w0, #16
-; CHECK-NEXT: add w0, w8, #16, lsl #12 ; =65536
+; CHECK-NEXT: add w8, w0, #1
+; CHECK-NEXT: and w8, w8, #0xffff
+; CHECK-NEXT: lsl w0, w8, #16
; CHECK-NEXT: ret
entry:
%inc = add i16 %a, 1
diff --git a/llvm/test/CodeGen/AArch64/load-combine.ll b/llvm/test/CodeGen/AArch64/load-combine.ll
index 57f61e5303ecf97..099b175cff3fb03 100644
--- a/llvm/test/CodeGen/AArch64/load-combine.ll
+++ b/llvm/test/CodeGen/AArch64/load-combine.ll
@@ -578,7 +578,7 @@ define void @short_vector_to_i32_unused_low_i8(ptr %in, ptr %out, ptr %p) {
; CHECK-NEXT: umov w10, v0.h[3]
; CHECK-NEXT: lsl w8, w8, #16
; CHECK-NEXT: bfi w8, w9, #8, #8
-; CHECK-NEXT: orr w8, w8, w10, lsl #24
+; CHECK-NEXT: bfi w8, w10, #24, #8
; CHECK-NEXT: str w8, [x1]
; CHECK-NEXT: ret
%ld = load <4 x i8>, ptr %in, align 4
@@ -609,8 +609,8 @@ define void @short_vector_to_i32_unused_high_i8(ptr %in, ptr %out, ptr %p) {
; CHECK-NEXT: ldrh w9, [x0]
; CHECK-NEXT: ushll v0.8h, v0.8b, #0
; CHECK-NEXT: umov w8, v0.h[2]
-; CHECK-NEXT: orr w8, w9, w8, lsl #16
-; CHECK-NEXT: str w8, [x1]
+; CHECK-NEXT: bfi w9, w8, #16, #8
+; CHECK-NEXT: str w9, [x1]
; CHECK-NEXT: ret
%ld = load <4 x i8>, ptr %in, align 4
@@ -640,7 +640,7 @@ define void @short_vector_to_i32_unused_low_i16(ptr %in, ptr %out, ptr %p) {
; CHECK-NEXT: umov w8, v0.h[3]
; CHECK-NEXT: umov w9, v0.h[2]
; CHECK-NEXT: lsl w8, w8, #24
-; CHECK-NEXT: orr w8, w8, w9, lsl #16
+; CHECK-NEXT: bfi w8, w9, #16, #8
; CHECK-NEXT: str w8, [x1]
; CHECK-NEXT: ret
%ld = load <4 x i8>, ptr %in, align 4
diff --git a/llvm/test/CodeGen/AMDGPU/shl.ll b/llvm/test/CodeGen/AMDGPU/shl.ll
index be0aa394dd99dc0..65fcb9665ce820c 100644
--- a/llvm/test/CodeGen/AMDGPU/shl.ll
+++ b/llvm/test/CodeGen/AMDGPU/shl.ll
@@ -489,6 +489,7 @@ define amdgpu_kernel void @shl_i16_i_s(ptr addrspace(1) %out, i16 zeroext %a) {
; VI-NEXT: s_mov_b32 s3, 0xf000
; VI-NEXT: s_mov_b32 s2, -1
; VI-NEXT: s_waitcnt lgkmcnt(0)
+; VI-NEXT: s_and_b32 s4, s4, 15
; VI-NEXT: s_lshl_b32 s4, s4, 12
; VI-NEXT: v_mov_b32_e32 v0, s4
; VI-NEXT: buffer_store_short v0, off, s[0:3], 0
diff --git a/llvm/test/CodeGen/AMDGPU/wwm-reserved-spill.ll b/llvm/test/CodeGen/AMDGPU/wwm-reserved-spill.ll
index b6e7da97e008905..2afbb2e4c9fb734 100644
--- a/llvm/test/CodeGen/AMDGPU/wwm-reserved-spill.ll
+++ b/llvm/test/CodeGen/AMDGPU/wwm-reserved-spill.ll
@@ -53,9 +53,7 @@ define amdgpu_gfx void @strict_wwm_no_cfg(ptr addrspace(8) inreg %tmp14) {
; GFX9-O0-NEXT: s_mov_b64 exec, s[40:41]
; GFX9-O0-NEXT: v_mov_b32_e32 v4, v0
; GFX9-O0-NEXT: v_cmp_eq_u32_e64 s[40:41], v3, v4
-; GFX9-O0-NEXT: v_cndmask_b32_e64 v3, 0, 1, s[40:41]
-; GFX9-O0-NEXT: s_mov_b32 s35, 1
-; GFX9-O0-NEXT: v_lshlrev_b32_e64 v3, s35, v3
+; GFX9-O0-NEXT: v_cndmask_b32_e64 v3, 0, -1, s[40:41]
; GFX9-O0-NEXT: s_mov_b32 s35, 2
; GFX9-O0-NEXT: v_and_b32_e64 v3, v3, s35
; GFX9-O0-NEXT: buffer_store_dword v3, off, s[36:39], s34 offset:4
@@ -101,7 +99,6 @@ define amdgpu_gfx void @strict_wwm_no_cfg(ptr addrspace(8) inreg %tmp14) {
; GFX9-O3-NEXT: v_cmp_eq_u32_e32 vcc, v4, v5
; GFX9-O3-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
; GFX9-O3-NEXT: v_lshlrev_b32_e32 v4, 1, v4
-; GFX9-O3-NEXT: v_and_b32_e32 v4, 2, v4
; GFX9-O3-NEXT: buffer_store_dword v4, off, s[4:7], 0 offset:4
; GFX9-O3-NEXT: s_xor_saveexec_b64 s[34:35], -1
; GFX9-O3-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
@@ -235,9 +232,7 @@ define amdgpu_gfx void @strict_wwm_cfg(ptr addrspace(8) inreg %tmp14, i32 %arg)
; GFX9-O0-NEXT: v_readlane_b32 s35, v0, 3
; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
; GFX9-O0-NEXT: v_cmp_eq_u32_e64 s[36:37], v3, v4
-; GFX9-O0-NEXT: v_cndmask_b32_e64 v3, 0, 1, s[36:37]
-; GFX9-O0-NEXT: s_mov_b32 s36, 1
-; GFX9-O0-NEXT: v_lshlrev_b32_e64 v3, s36, v3
+; GFX9-O0-NEXT: v_cndmask_b32_e64 v3, 0, -1, s[36:37]
; GFX9-O0-NEXT: s_mov_b32 s36, 2
; GFX9-O0-NEXT: v_and_b32_e64 v3, v3, s36
; GFX9-O0-NEXT: s_mov_b32 s40, s35
@@ -302,7 +297,6 @@ define amdgpu_gfx void @strict_wwm_cfg(ptr addrspace(8) inreg %tmp14, i32 %arg)
; GFX9-O3-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5
; GFX9-O3-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
; GFX9-O3-NEXT: v_lshlrev_b32_e32 v0, 1, v0
-; GFX9-O3-NEXT: v_and_b32_e32 v0, 2, v0
; GFX9-O3-NEXT: buffer_store_dword v0, off, s[4:7], 0 offset:4
; GFX9-O3-NEXT: s_xor_saveexec_b64 s[34:35], -1
; GFX9-O3-NEXT: buffer_load_dword v1, off, s[0:3], s32 ; 4-byte Folded Reload
diff --git a/llvm/test/CodeGen/PowerPC/pre-inc-disable.ll b/llvm/test/CodeGen/PowerPC/pre-inc-disable.ll
index 4da36c9af5c101c..426dd1d8e596af1 100644
--- a/llvm/test/CodeGen/PowerPC/pre-inc-disable.ll
+++ b/llvm/test/CodeGen/PowerPC/pre-inc-disable.ll
@@ -19,19 +19,16 @@ define void @test64(ptr nocapture readonly %pix2, i32 signext %i_pix2) {
; P9LE-LABEL: test64:
; P9LE: # %bb.0: # %entry
; P9LE-NEXT: add 5, 3, 4
-; P9LE-NEXT: lfdx 0, 3, 4
+; P9LE-NEXT: lxsdx 2, 3, 4
; P9LE-NEXT: addis 3, 2, .LCPI0_0 at toc@ha
-; P9LE-NEXT: xxlxor 2, 2, 2
+; P9LE-NEXT: xxlxor 1, 1, 1
; P9LE-NEXT: vspltisw 4, 8
; P9LE-NEXT: lxsd 3, 4(5)
; P9LE-NEXT: addi 3, 3, .LCPI0_0 at toc@l
; P9LE-NEXT: vadduwm 4, 4, 4
-; P9LE-NEXT: lxv 1, 0(3)
-; P9LE-NEXT: addis 3, 2, .LCPI0_1 at toc@ha
-; P9LE-NEXT: addi 3, 3, .LCPI0_1 at toc@l
-; P9LE-NEXT: xxperm 2, 0, 1
; P9LE-NEXT: lxv 0, 0(3)
-; P9LE-NEXT: xxperm 3, 3, 0
+; P9LE-NEXT: xxperm 3, 1, 0
+; P9LE-NEXT: xxperm 2, 1, 0
; P9LE-NEXT: vnegw 3, 3
; P9LE-NEXT: vslw 3, 3, 4
; P9LE-NEXT: vsubuwm 2, 3, 2
@@ -50,11 +47,8 @@ define void @test64(ptr nocapture readonly %pix2, i32 signext %i_pix2) {
; P9BE-NEXT: addi 3, 3, .LCPI0_0 at toc@l
; P9BE-NEXT: vadduwm 4, 4, 4
; P9BE-NEXT: lxv 0, 0(3)
-; P9BE-NEXT: addis 3, 2, .LCPI0_1 at toc@ha
-; P9BE-NEXT: addi 3, 3, .LCPI0_1 at toc@l
+; P9BE-NEXT: xxperm 3, 1, 0
; P9BE-NEXT: xxperm 2, 1, 0
-; P9BE-NEXT: lxv 0, 0(3)
-; P9BE-NEXT: xxperm 3, 3, 0
; P9BE-NEXT: vnegw 3, 3
; P9BE-NEXT: vslw 3, 3, 4
; P9BE-NEXT: vsubuwm 2, 3, 2
@@ -71,11 +65,9 @@ define void @test64(ptr nocapture readonly %pix2, i32 signext %i_pix2) {
; P9BE-AIX-NEXT: vspltisw 4, 8
; P9BE-AIX-NEXT: lxsd 3, 4(5)
; P9BE-AIX-NEXT: lxv 0, 0(3)
-; P9BE-AIX-NEXT: ld 3, L..C1(2) # %const.1
; P9BE-AIX-NEXT: vadduwm 4, 4, 4
+; P9BE-AIX-NEXT: xxperm 3, 1, 0
; P9BE-AIX-NEXT: xxperm 2, 1, 0
-; P9BE-AIX-NEXT: lxv 0, 0(3)
-; P9BE-AIX-NEXT: xxperm 3, 3, 0
; P9BE-AIX-NEXT: vnegw 3, 3
; P9BE-AIX-NEXT: vslw 3, 3, 4
; P9BE-AIX-NEXT: vsubuwm 2, 3, 2
@@ -86,25 +78,23 @@ define void @test64(ptr nocapture readonly %pix2, i32 signext %i_pix2) {
; P9BE-AIX32-LABEL: test64:
; P9BE-AIX32: # %bb.0: # %entry
; P9BE-AIX32-NEXT: lwzux 4, 3, 4
-; P9BE-AIX32-NEXT: xxlxor 2, 2, 2
; P9BE-AIX32-NEXT: vspltisw 4, 8
-; P9BE-AIX32-NEXT: stw 4, -48(1)
; P9BE-AIX32-NEXT: vadduwm 4, 4, 4
+; P9BE-AIX32-NEXT: stw 4, -48(1)
; P9BE-AIX32-NEXT: lwz 4, 4(3)
; P9BE-AIX32-NEXT: lxv 0, -48(1)
; P9BE-AIX32-NEXT: stw 4, -32(1)
; P9BE-AIX32-NEXT: lwz 4, L..C0(2) # %const.0
-; P9BE-AIX32-NEXT: lxv 1, -32(1)
; P9BE-AIX32-NEXT: lwz 3, 8(3)
+; P9BE-AIX32-NEXT: lxv 1, -32(1)
; P9BE-AIX32-NEXT: stw 3, -16(1)
-; P9BE-AIX32-NEXT: lwz 3, L..C1(2) # %const.1
+; P9BE-AIX32-NEXT: lxv 2, 0(4)
+; P9BE-AIX32-NEXT: lxv 3, -16(1)
; P9BE-AIX32-NEXT: xxmrghw 2, 0, 1
-; P9BE-AIX32-NEXT: lxv 0, 0(4)
-; P9BE-AIX32-NEXT: xxperm 2, 2, 0
-; P9BE-AIX32-NEXT: lxv 0, -16(1)
-; P9BE-AIX32-NEXT: xxmrghw 3, 1, 0
-; P9BE-AIX32-NEXT: lxv 0, 0(3)
-; P9BE-AIX32-NEXT: xxperm 3, 3, 0
+; P9BE-AIX32-NEXT: xxlxor 0, 0, 0
+; P9BE-AIX32-NEXT: xxperm 2, 0, 2
+; P9BE-AIX32-NEXT: xxmrghw 3, 1, 3
+; P9BE-AIX32-NEXT: xxperm 3, 0, 2
; P9BE-AIX32-NEXT: vnegw 3, 3
; P9BE-AIX32-NEXT: vslw 3, 3, 4
; P9BE-AIX32-NEXT: vsubuwm 2, 3, 2
@@ -180,7 +170,7 @@ define void @test32(ptr nocapture readonly %pix2, i32 signext %i_pix2) {
; P9BE-AIX: # %bb.0: # %entry
; P9BE-AIX-NEXT: add 5, 3, 4
; P9BE-AIX-NEXT: lxsiwzx 2, 3, 4
-; P9BE-AIX-NEXT: ld 3, L..C2(2) # %const.0
+; P9BE-AIX-NEXT: ld 3, L..C1(2) # %const.0
; P9BE-AIX-NEXT: xxlxor 0, 0, 0
; P9BE-AIX-NEXT: vspltisw 4, 8
; P9BE-AIX-NEXT: lxv 1, 0(3)
@@ -200,7 +190,7 @@ define void @test32(ptr nocapture readonly %pix2, i32 signext %i_pix2) {
; P9BE-AIX32: # %bb.0: # %entry
; P9BE-AIX32-NEXT: add 5, 3, 4
; P9BE-AIX32-NEXT: lxsiwzx 2, 3, 4
-; P9BE-AIX32-NEXT: lwz 3, L..C2(2) # %const.0
+; P9BE-AIX32-NEXT: lwz 3, L..C1(2) # %const.0
; P9BE-AIX32-NEXT: xxlxor 0, 0, 0
; P9BE-AIX32-NEXT: vspltisw 4, 8
; P9BE-AIX32-NEXT: lxv 1, 0(3)
@@ -297,9 +287,9 @@ define void @test16(ptr nocapture readonly %sums, i32 signext %delta, i32 signex
; P9BE-AIX-NEXT: li 7, 16
; P9BE-AIX-NEXT: add 6, 3, 4
; P9BE-AIX-NEXT: lxsihzx 1, 3, 4
-; P9BE-AIX-NEXT: ld 3, L..C3(2) # %const.1
+; P9BE-AIX-NEXT: ld 3, L..C2(2) # %const.1
; P9BE-AIX-NEXT: lxsihzx 2, 6, 7
-; P9BE-AIX-NEXT: ld 6, L..C4(2) # %const.0
+; P9BE-AIX-NEXT: ld 6, L..C3(2) # %const.0
; P9BE-AIX-NEXT: lxv 0, 0(6)
; P9BE-AIX-NEXT: li 6, 0
; P9BE-AIX-NEXT: mtvsrwz 3, 6
@@ -328,7 +318,7 @@ define void @test16(ptr nocapture readonly %sums, i32 signext %delta, i32 signex
; P9BE-AIX32-NEXT: sth 4, -48(1)
; P9BE-AIX32-NEXT: lxv 4, -48(1)
; P9BE-AIX32-NEXT: sth 3, -32(1)
-; P9BE-AIX32-NEXT: lwz 3, L..C3(2) # %const.0
+; P9BE-AIX32-NEXT: lwz 3, L..C2(2) # %const.0
; P9BE-AIX32-NEXT: lxv 3, -32(1)
; P9BE-AIX32-NEXT: vmrghh 4, 2, 4
; P9BE-AIX32-NEXT: lxv 0, 0(3)
@@ -437,9 +427,9 @@ define void @test8(ptr nocapture readonly %sums, i32 signext %delta, i32 signext
; P9BE-AIX-NEXT: add 6, 3, 4
; P9BE-AIX-NEXT: li 7, 8
; P9BE-AIX-NEXT: lxsibzx 3, 3, 4
-; P9BE-AIX-NEXT: ld 3, L..C5(2) # %const.1
+; P9BE-AIX-NEXT: ld 3, L..C4(2) # %const.1
; P9BE-AIX-NEXT: lxsibzx 0, 6, 7
-; P9BE-AIX-NEXT: ld 6, L..C6(2) # %const.0
+; P9BE-AIX-NEXT: ld 6, L..C5(2) # %const.0
; P9BE-AIX-NEXT: lxv 1, 0(6)
; P9BE-AIX-NEXT: li 6, 0
; P9BE-AIX-NEXT: mtvsrwz 2, 6
@@ -464,9 +454,9 @@ define void @test8(ptr nocapture readonly %sums, i32 signext %delta, i32 signext
; P9BE-AIX32-NEXT: add 6, 3, 4
; P9BE-AIX32-NEXT: li 7, 8
; P9BE-AIX32-NEXT: lxsibzx 3, 3, 4
-; P9BE-AIX32-NEXT: lwz 3, L..C4(2) # %const.1
+; P9BE-AIX32-NEXT: lwz 3, L..C3(2) # %const.1
; P9BE-AIX32-NEXT: lxsibzx 0, 6, 7
-; P9BE-AIX32-NEXT: lwz 6, L..C5(2) # %const.0
+; P9BE-AIX32-NEXT: lwz 6, L..C4(2) # %const.0
; P9BE-AIX32-NEXT: lxv 1, 0(6)
; P9BE-AIX32-NEXT: li 6, 0
; P9BE-AIX32-NEXT: mtvsrwz 2, 6
diff --git a/llvm/test/CodeGen/RISCV/rv64i-complex-float.ll b/llvm/test/CodeGen/RISCV/rv64i-complex-float.ll
index 690828c7794346e..6e4f624415d9983 100644
--- a/llvm/test/CodeGen/RISCV/rv64i-complex-float.ll
+++ b/llvm/test/CodeGen/RISCV/rv64i-complex-float.ll
@@ -20,6 +20,7 @@ define i64 @complex_float_add(i64 %a.coerce, i64 %b.coerce) nounwind {
; CHECK-NEXT: mv a0, s0
; CHECK-NEXT: mv a1, s1
; CHECK-NEXT: call __addsf3 at plt
+; CHECK-NEXT: andi a0, a0, -1
; CHECK-NEXT: slli a0, a0, 32
; CHECK-NEXT: slli s2, s2, 32
; CHECK-NEXT: srli a1, s2, 32
diff --git a/llvm/test/CodeGen/RISCV/rvv/pr61561.ll b/llvm/test/CodeGen/RISCV/rvv/pr61561.ll
index f27edd36116657e..8d246d99388193f 100644
--- a/llvm/test/CodeGen/RISCV/rvv/pr61561.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/pr61561.ll
@@ -5,12 +5,11 @@ define <vscale x 4 x i8> @foo(ptr %p) {
; CHECK-LABEL: foo:
; CHECK: # %bb.0:
; CHECK-NEXT: vl1re16.v v8, (a0)
-; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
-; CHECK-NEXT: vsll.vi v8, v8, 3
-; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma
; CHECK-NEXT: vzext.vf2 v10, v8
+; CHECK-NEXT: vsll.vi v8, v10, 3
; CHECK-NEXT: li a0, 248
-; CHECK-NEXT: vand.vx v8, v10, a0
+; CHECK-NEXT: vand.vx v8, v8, a0
; CHECK-NEXT: lui a0, 4
; CHECK-NEXT: vmv.v.x v10, a0
; CHECK-NEXT: lui a0, 1
diff --git a/llvm/test/CodeGen/RISCV/split-store.ll b/llvm/test/CodeGen/RISCV/split-store.ll
index 367d3fe2c595fea..afc72d2b8ab7016 100644
--- a/llvm/test/CodeGen/RISCV/split-store.ll
+++ b/llvm/test/CodeGen/RISCV/split-store.ll
@@ -129,6 +129,7 @@ define void @int32_int32_pair(i32 %tmp1, i32 %tmp2, ptr %ref.tmp) {
;
; RV64-LABEL: int32_int32_pair:
; RV64: # %bb.0:
+; RV64-NEXT: andi a1, a1, -1
; RV64-NEXT: slli a1, a1, 32
; RV64-NEXT: slli a0, a0, 32
; RV64-NEXT: srli a0, a0, 32
@@ -138,6 +139,7 @@ define void @int32_int32_pair(i32 %tmp1, i32 %tmp2, ptr %ref.tmp) {
;
; RV64D-LABEL: int32_int32_pair:
; RV64D: # %bb.0:
+; RV64D-NEXT: andi a1, a1, -1
; RV64D-NEXT: slli a1, a1, 32
; RV64D-NEXT: slli a0, a0, 32
; RV64D-NEXT: srli a0, a0, 32
diff --git a/llvm/test/CodeGen/Thumb/arm_q15_to_q31.ll b/llvm/test/CodeGen/Thumb/arm_q15_to_q31.ll
index fdfbf3393098e4a..40e1398b6f10fe1 100644
--- a/llvm/test/CodeGen/Thumb/arm_q15_to_q31.ll
+++ b/llvm/test/CodeGen/Thumb/arm_q15_to_q31.ll
@@ -20,18 +20,19 @@ define void @arm_q15_to_q31(ptr nocapture noundef readonly %pSrc, ptr nocapture
; CHECK-NEXT: @ %bb.2: @ %while.body.prol
; CHECK-NEXT: str r2, [sp] @ 4-byte Spill
; CHECK-NEXT: str r7, [sp, #4] @ 4-byte Spill
-; CHECK-NEXT: ldrh r2, [r0]
-; CHECK-NEXT: ldrh r7, [r0, #2]
-; CHECK-NEXT: ldrh r4, [r0, #4]
-; CHECK-NEXT: ldrh r6, [r0, #6]
+; CHECK-NEXT: movs r6, #2
+; CHECK-NEXT: ldrsh r6, [r0, r6]
+; CHECK-NEXT: movs r7, #6
+; CHECK-NEXT: ldrsh r7, [r0, r7]
+; CHECK-NEXT: lsls r2, r7, #16
+; CHECK-NEXT: ldrh r4, [r0]
+; CHECK-NEXT: ldrh r7, [r0, #4]
+; CHECK-NEXT: lsls r7, r7, #16
; CHECK-NEXT: lsls r6, r6, #16
; CHECK-NEXT: lsls r4, r4, #16
-; CHECK-NEXT: lsls r7, r7, #16
-; CHECK-NEXT: lsls r2, r2, #16
-; CHECK-NEXT: stm r1!, {r2, r7}
-; CHECK-NEXT: str r4, [r1]
-; CHECK-NEXT: str r6, [r1, #4]
-; CHECK-NEXT: subs r1, #8
+; CHECK-NEXT: stm r1!, {r4, r6, r7}
+; CHECK-NEXT: str r2, [r1]
+; CHECK-NEXT: subs r1, #12
; CHECK-NEXT: cmp r5, #1
; CHECK-NEXT: bne .LBB0_11
; CHECK-NEXT: @ %bb.3:
@@ -45,53 +46,61 @@ define void @arm_q15_to_q31(ptr nocapture noundef readonly %pSrc, ptr nocapture
; CHECK-NEXT: blo .LBB0_6
; CHECK-NEXT: .LBB0_5: @ %while.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: ldrh r2, [r0]
-; CHECK-NEXT: ldrh r4, [r0, #2]
-; CHECK-NEXT: ldrh r5, [r0, #4]
-; CHECK-NEXT: ldrh r6, [r0, #6]
-; CHECK-NEXT: lsls r6, r6, #16
-; CHECK-NEXT: str r6, [r1, #12]
-; CHECK-NEXT: lsls r5, r5, #16
-; CHECK-NEXT: str r5, [r1, #8]
+; CHECK-NEXT: movs r2, #2
+; CHECK-NEXT: ldrsh r2, [r0, r2]
+; CHECK-NEXT: movs r4, #6
+; CHECK-NEXT: ldrsh r4, [r0, r4]
; CHECK-NEXT: lsls r4, r4, #16
-; CHECK-NEXT: str r4, [r1, #4]
+; CHECK-NEXT: ldrh r5, [r0]
+; CHECK-NEXT: ldrh r6, [r0, #4]
+; CHECK-NEXT: str r4, [r1, #12]
+; CHECK-NEXT: lsls r4, r6, #16
+; CHECK-NEXT: str r4, [r1, #8]
; CHECK-NEXT: lsls r2, r2, #16
+; CHECK-NEXT: str r2, [r1, #4]
+; CHECK-NEXT: lsls r2, r5, #16
; CHECK-NEXT: str r2, [r1]
-; CHECK-NEXT: ldrh r2, [r0, #8]
-; CHECK-NEXT: ldrh r4, [r0, #10]
-; CHECK-NEXT: ldrh r5, [r0, #12]
-; CHECK-NEXT: ldrh r6, [r0, #14]
-; CHECK-NEXT: lsls r6, r6, #16
-; CHECK-NEXT: str r6, [r1, #28]
-; CHECK-NEXT: lsls r5, r5, #16
-; CHECK-NEXT: str r5, [r1, #24]
+; CHECK-NEXT: movs r2, #10
+; CHECK-NEXT: ldrsh r2, [r0, r2]
+; CHECK-NEXT: movs r4, #14
+; CHECK-NEXT: ldrsh r4, [r0, r4]
; CHECK-NEXT: lsls r4, r4, #16
-; CHECK-NEXT: str r4, [r1, #20]
+; CHECK-NEXT: ldrh r5, [r0, #8]
+; CHECK-NEXT: ldrh r6, [r0, #12]
+; CHECK-NEXT: str r4, [r1, #28]
+; CHECK-NEXT: lsls r4, r6, #16
+; CHECK-NEXT: str r4, [r1, #24]
; CHECK-NEXT: lsls r2, r2, #16
+; CHECK-NEXT: str r2, [r1, #20]
+; CHECK-NEXT: lsls r2, r5, #16
; CHECK-NEXT: str r2, [r1, #16]
-; CHECK-NEXT: ldrh r2, [r0, #16]
-; CHECK-NEXT: ldrh r4, [r0, #18]
-; CHECK-NEXT: ldrh r5, [r0, #20]
-; CHECK-NEXT: ldrh r6, [r0, #22]
-; CHECK-NEXT: lsls r6, r6, #16
-; CHECK-NEXT: str r6, [r1, #44]
-; CHECK-NEXT: lsls r5, r5, #16
-; CHECK-NEXT: str r5, [r1, #40]
+; CHECK-NEXT: movs r2, #18
+; CHECK-NEXT: ldrsh r2, [r0, r2]
+; CHECK-NEXT: movs r4, #22
+; CHECK-NEXT: ldrsh r4, [r0, r4]
; CHECK-NEXT: lsls r4, r4, #16
-; CHECK-NEXT: str r4, [r1, #36]
+; CHECK-NEXT: ldrh r5, [r0, #16]
+; CHECK-NEXT: ldrh r6, [r0, #20]
+; CHECK-NEXT: str r4, [r1, #44]
+; CHECK-NEXT: lsls r4, r6, #16
+; CHECK-NEXT: str r4, [r1, #40]
; CHECK-NEXT: lsls r2, r2, #16
+; CHECK-NEXT: str r2, [r1, #36]
+; CHECK-NEXT: lsls r2, r5, #16
; CHECK-NEXT: str r2, [r1, #32]
-; CHECK-NEXT: ldrh r2, [r0, #24]
-; CHECK-NEXT: ldrh r4, [r0, #26]
-; CHECK-NEXT: ldrh r5, [r0, #28]
-; CHECK-NEXT: ldrh r6, [r0, #30]
-; CHECK-NEXT: lsls r6, r6, #16
-; CHECK-NEXT: str r6, [r1, #60]
-; CHECK-NEXT: lsls r5, r5, #16
-; CHECK-NEXT: str r5, [r1, #56]
+; CHECK-NEXT: movs r2, #26
+; CHECK-NEXT: ldrsh r2, [r0, r2]
+; CHECK-NEXT: movs r4, #30
+; CHECK-NEXT: ldrsh r4, [r0, r4]
; CHECK-NEXT: lsls r4, r4, #16
-; CHECK-NEXT: str r4, [r1, #52]
+; CHECK-NEXT: ldr...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/70041
More information about the llvm-commits
mailing list