[llvm] [DAG] SimplifyDemandedBits - ensure we demand the high bits for shl nsw/nuw ops (PR #70041)

via llvm-commits llvm-commits at lists.llvm.org
Tue Oct 24 06:35:59 PDT 2023


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-backend-aarch64

Author: Simon Pilgrim (RKSimon)

<details>
<summary>Changes</summary>

Matches InstCombinerImpl::SimplifyDemandedUseBits

Fixes #<!-- -->69965  

---

Patch is 35.71 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/70041.diff


14 Files Affected:

- (modified) llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp (+21-10) 
- (modified) llvm/test/CodeGen/AArch64/arm64-shifted-sext.ll (+3-2) 
- (modified) llvm/test/CodeGen/AArch64/load-combine.ll (+4-4) 
- (modified) llvm/test/CodeGen/AMDGPU/shl.ll (+1) 
- (modified) llvm/test/CodeGen/AMDGPU/wwm-reserved-spill.ll (+2-8) 
- (modified) llvm/test/CodeGen/PowerPC/pre-inc-disable.ll (+23-33) 
- (modified) llvm/test/CodeGen/RISCV/rv64i-complex-float.ll (+1) 
- (modified) llvm/test/CodeGen/RISCV/rvv/pr61561.ll (+3-4) 
- (modified) llvm/test/CodeGen/RISCV/split-store.ll (+2) 
- (modified) llvm/test/CodeGen/Thumb/arm_q15_to_q31.ll (+170-136) 
- (modified) llvm/test/CodeGen/X86/fp128-cast.ll (+7-7) 
- (modified) llvm/test/CodeGen/X86/pr69965.ll (+12-13) 
- (modified) llvm/test/CodeGen/X86/setcc.ll (+10-8) 
- (modified) llvm/test/CodeGen/X86/vector-shuffle-combining-avx512bwvl.ll (+2-2) 


``````````diff
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 8b4f3159499122a..826f773409cd910 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -1785,14 +1785,22 @@ bool TargetLowering::SimplifyDemandedBits(
       }
 
       APInt InDemandedMask = DemandedBits.lshr(ShAmt);
+
+      // If the shift is NUW/NSW, then it does demand the high bits.
+      if (Op->getFlags().hasNoSignedWrap())
+        InDemandedMask.setHighBits(ShAmt + 1);
+      else if (Op->getFlags().hasNoUnsignedWrap())
+        InDemandedMask.setHighBits(ShAmt);
+
       if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
                                Depth + 1))
         return true;
       assert(!Known.hasConflict() && "Bits known to be one AND zero?");
-      Known.Zero <<= ShAmt;
-      Known.One <<= ShAmt;
-      // low bits known zero.
-      Known.Zero.setLowBits(ShAmt);
+
+      Known = KnownBits::shl(Known,
+                             KnownBits::makeConstant(APInt(BitWidth, ShAmt)),
+                             /* NUW */ Op->getFlags().hasNoUnsignedWrap(),
+                             /* NSW */ Op->getFlags().hasNoSignedWrap());
 
       // Attempt to avoid multi-use ops if we don't need anything from them.
       if (!InDemandedMask.isAllOnes() || !DemandedElts.isAllOnes()) {
@@ -2255,13 +2263,16 @@ bool TargetLowering::SimplifyDemandedBits(
     break;
   }
   case ISD::CTPOP: {
-    // If only 1 bit is demanded, replace with PARITY as long as we're before
-    // op legalization.
+    // If only bit0 of 'active bits' is demanded, replace with PARITY as long as
+    // we're before op legalization.
     // FIXME: Limit to scalars for now.
-    if (DemandedBits.isOne() && !TLO.LegalOps && !VT.isVector())
-      return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::PARITY, dl, VT,
-                                               Op.getOperand(0)));
-
+    if (!TLO.LegalOps && !VT.isVector()) {
+      APInt NonZeroMask =
+          APInt::getLowBitsSet(BitWidth, llvm::bit_width(BitWidth));
+      if ((DemandedBits & NonZeroMask).isOne())
+        return TLO.CombineTo(
+            Op, TLO.DAG.getNode(ISD::PARITY, dl, VT, Op.getOperand(0)));
+    }
     Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
     break;
   }
diff --git a/llvm/test/CodeGen/AArch64/arm64-shifted-sext.ll b/llvm/test/CodeGen/AArch64/arm64-shifted-sext.ll
index da6499b7daa82e5..240c96130d38549 100644
--- a/llvm/test/CodeGen/AArch64/arm64-shifted-sext.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-shifted-sext.ll
@@ -195,8 +195,9 @@ entry:
 define i32 @extendedLeftShiftshortTointBy16(i16 signext %a) nounwind readnone ssp {
 ; CHECK-LABEL: extendedLeftShiftshortTointBy16:
 ; CHECK:       ; %bb.0: ; %entry
-; CHECK-NEXT:    lsl w8, w0, #16
-; CHECK-NEXT:    add w0, w8, #16, lsl #12 ; =65536
+; CHECK-NEXT:    add w8, w0, #1
+; CHECK-NEXT:    and w8, w8, #0xffff
+; CHECK-NEXT:    lsl w0, w8, #16
 ; CHECK-NEXT:    ret
 entry:
   %inc = add i16 %a, 1
diff --git a/llvm/test/CodeGen/AArch64/load-combine.ll b/llvm/test/CodeGen/AArch64/load-combine.ll
index 57f61e5303ecf97..099b175cff3fb03 100644
--- a/llvm/test/CodeGen/AArch64/load-combine.ll
+++ b/llvm/test/CodeGen/AArch64/load-combine.ll
@@ -578,7 +578,7 @@ define void @short_vector_to_i32_unused_low_i8(ptr %in, ptr %out, ptr %p) {
 ; CHECK-NEXT:    umov w10, v0.h[3]
 ; CHECK-NEXT:    lsl w8, w8, #16
 ; CHECK-NEXT:    bfi w8, w9, #8, #8
-; CHECK-NEXT:    orr w8, w8, w10, lsl #24
+; CHECK-NEXT:    bfi w8, w10, #24, #8
 ; CHECK-NEXT:    str w8, [x1]
 ; CHECK-NEXT:    ret
   %ld = load <4 x i8>, ptr %in, align 4
@@ -609,8 +609,8 @@ define void @short_vector_to_i32_unused_high_i8(ptr %in, ptr %out, ptr %p) {
 ; CHECK-NEXT:    ldrh w9, [x0]
 ; CHECK-NEXT:    ushll v0.8h, v0.8b, #0
 ; CHECK-NEXT:    umov w8, v0.h[2]
-; CHECK-NEXT:    orr w8, w9, w8, lsl #16
-; CHECK-NEXT:    str w8, [x1]
+; CHECK-NEXT:    bfi w9, w8, #16, #8
+; CHECK-NEXT:    str w9, [x1]
 ; CHECK-NEXT:    ret
   %ld = load <4 x i8>, ptr %in, align 4
 
@@ -640,7 +640,7 @@ define void @short_vector_to_i32_unused_low_i16(ptr %in, ptr %out, ptr %p) {
 ; CHECK-NEXT:    umov w8, v0.h[3]
 ; CHECK-NEXT:    umov w9, v0.h[2]
 ; CHECK-NEXT:    lsl w8, w8, #24
-; CHECK-NEXT:    orr w8, w8, w9, lsl #16
+; CHECK-NEXT:    bfi w8, w9, #16, #8
 ; CHECK-NEXT:    str w8, [x1]
 ; CHECK-NEXT:    ret
   %ld = load <4 x i8>, ptr %in, align 4
diff --git a/llvm/test/CodeGen/AMDGPU/shl.ll b/llvm/test/CodeGen/AMDGPU/shl.ll
index be0aa394dd99dc0..65fcb9665ce820c 100644
--- a/llvm/test/CodeGen/AMDGPU/shl.ll
+++ b/llvm/test/CodeGen/AMDGPU/shl.ll
@@ -489,6 +489,7 @@ define amdgpu_kernel void @shl_i16_i_s(ptr addrspace(1) %out, i16 zeroext %a) {
 ; VI-NEXT:    s_mov_b32 s3, 0xf000
 ; VI-NEXT:    s_mov_b32 s2, -1
 ; VI-NEXT:    s_waitcnt lgkmcnt(0)
+; VI-NEXT:    s_and_b32 s4, s4, 15
 ; VI-NEXT:    s_lshl_b32 s4, s4, 12
 ; VI-NEXT:    v_mov_b32_e32 v0, s4
 ; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0
diff --git a/llvm/test/CodeGen/AMDGPU/wwm-reserved-spill.ll b/llvm/test/CodeGen/AMDGPU/wwm-reserved-spill.ll
index b6e7da97e008905..2afbb2e4c9fb734 100644
--- a/llvm/test/CodeGen/AMDGPU/wwm-reserved-spill.ll
+++ b/llvm/test/CodeGen/AMDGPU/wwm-reserved-spill.ll
@@ -53,9 +53,7 @@ define amdgpu_gfx void @strict_wwm_no_cfg(ptr addrspace(8) inreg %tmp14) {
 ; GFX9-O0-NEXT:    s_mov_b64 exec, s[40:41]
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v0
 ; GFX9-O0-NEXT:    v_cmp_eq_u32_e64 s[40:41], v3, v4
-; GFX9-O0-NEXT:    v_cndmask_b32_e64 v3, 0, 1, s[40:41]
-; GFX9-O0-NEXT:    s_mov_b32 s35, 1
-; GFX9-O0-NEXT:    v_lshlrev_b32_e64 v3, s35, v3
+; GFX9-O0-NEXT:    v_cndmask_b32_e64 v3, 0, -1, s[40:41]
 ; GFX9-O0-NEXT:    s_mov_b32 s35, 2
 ; GFX9-O0-NEXT:    v_and_b32_e64 v3, v3, s35
 ; GFX9-O0-NEXT:    buffer_store_dword v3, off, s[36:39], s34 offset:4
@@ -101,7 +99,6 @@ define amdgpu_gfx void @strict_wwm_no_cfg(ptr addrspace(8) inreg %tmp14) {
 ; GFX9-O3-NEXT:    v_cmp_eq_u32_e32 vcc, v4, v5
 ; GFX9-O3-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
 ; GFX9-O3-NEXT:    v_lshlrev_b32_e32 v4, 1, v4
-; GFX9-O3-NEXT:    v_and_b32_e32 v4, 2, v4
 ; GFX9-O3-NEXT:    buffer_store_dword v4, off, s[4:7], 0 offset:4
 ; GFX9-O3-NEXT:    s_xor_saveexec_b64 s[34:35], -1
 ; GFX9-O3-NEXT:    buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
@@ -235,9 +232,7 @@ define amdgpu_gfx void @strict_wwm_cfg(ptr addrspace(8) inreg %tmp14, i32 %arg)
 ; GFX9-O0-NEXT:    v_readlane_b32 s35, v0, 3
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-O0-NEXT:    v_cmp_eq_u32_e64 s[36:37], v3, v4
-; GFX9-O0-NEXT:    v_cndmask_b32_e64 v3, 0, 1, s[36:37]
-; GFX9-O0-NEXT:    s_mov_b32 s36, 1
-; GFX9-O0-NEXT:    v_lshlrev_b32_e64 v3, s36, v3
+; GFX9-O0-NEXT:    v_cndmask_b32_e64 v3, 0, -1, s[36:37]
 ; GFX9-O0-NEXT:    s_mov_b32 s36, 2
 ; GFX9-O0-NEXT:    v_and_b32_e64 v3, v3, s36
 ; GFX9-O0-NEXT:    s_mov_b32 s40, s35
@@ -302,7 +297,6 @@ define amdgpu_gfx void @strict_wwm_cfg(ptr addrspace(8) inreg %tmp14, i32 %arg)
 ; GFX9-O3-NEXT:    v_cmp_eq_u32_e32 vcc, v3, v5
 ; GFX9-O3-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
 ; GFX9-O3-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
-; GFX9-O3-NEXT:    v_and_b32_e32 v0, 2, v0
 ; GFX9-O3-NEXT:    buffer_store_dword v0, off, s[4:7], 0 offset:4
 ; GFX9-O3-NEXT:    s_xor_saveexec_b64 s[34:35], -1
 ; GFX9-O3-NEXT:    buffer_load_dword v1, off, s[0:3], s32 ; 4-byte Folded Reload
diff --git a/llvm/test/CodeGen/PowerPC/pre-inc-disable.ll b/llvm/test/CodeGen/PowerPC/pre-inc-disable.ll
index 4da36c9af5c101c..426dd1d8e596af1 100644
--- a/llvm/test/CodeGen/PowerPC/pre-inc-disable.ll
+++ b/llvm/test/CodeGen/PowerPC/pre-inc-disable.ll
@@ -19,19 +19,16 @@ define void @test64(ptr nocapture readonly %pix2, i32 signext %i_pix2) {
 ; P9LE-LABEL: test64:
 ; P9LE:       # %bb.0: # %entry
 ; P9LE-NEXT:    add 5, 3, 4
-; P9LE-NEXT:    lfdx 0, 3, 4
+; P9LE-NEXT:    lxsdx 2, 3, 4
 ; P9LE-NEXT:    addis 3, 2, .LCPI0_0 at toc@ha
-; P9LE-NEXT:    xxlxor 2, 2, 2
+; P9LE-NEXT:    xxlxor 1, 1, 1
 ; P9LE-NEXT:    vspltisw 4, 8
 ; P9LE-NEXT:    lxsd 3, 4(5)
 ; P9LE-NEXT:    addi 3, 3, .LCPI0_0 at toc@l
 ; P9LE-NEXT:    vadduwm 4, 4, 4
-; P9LE-NEXT:    lxv 1, 0(3)
-; P9LE-NEXT:    addis 3, 2, .LCPI0_1 at toc@ha
-; P9LE-NEXT:    addi 3, 3, .LCPI0_1 at toc@l
-; P9LE-NEXT:    xxperm 2, 0, 1
 ; P9LE-NEXT:    lxv 0, 0(3)
-; P9LE-NEXT:    xxperm 3, 3, 0
+; P9LE-NEXT:    xxperm 3, 1, 0
+; P9LE-NEXT:    xxperm 2, 1, 0
 ; P9LE-NEXT:    vnegw 3, 3
 ; P9LE-NEXT:    vslw 3, 3, 4
 ; P9LE-NEXT:    vsubuwm 2, 3, 2
@@ -50,11 +47,8 @@ define void @test64(ptr nocapture readonly %pix2, i32 signext %i_pix2) {
 ; P9BE-NEXT:    addi 3, 3, .LCPI0_0 at toc@l
 ; P9BE-NEXT:    vadduwm 4, 4, 4
 ; P9BE-NEXT:    lxv 0, 0(3)
-; P9BE-NEXT:    addis 3, 2, .LCPI0_1 at toc@ha
-; P9BE-NEXT:    addi 3, 3, .LCPI0_1 at toc@l
+; P9BE-NEXT:    xxperm 3, 1, 0
 ; P9BE-NEXT:    xxperm 2, 1, 0
-; P9BE-NEXT:    lxv 0, 0(3)
-; P9BE-NEXT:    xxperm 3, 3, 0
 ; P9BE-NEXT:    vnegw 3, 3
 ; P9BE-NEXT:    vslw 3, 3, 4
 ; P9BE-NEXT:    vsubuwm 2, 3, 2
@@ -71,11 +65,9 @@ define void @test64(ptr nocapture readonly %pix2, i32 signext %i_pix2) {
 ; P9BE-AIX-NEXT:    vspltisw 4, 8
 ; P9BE-AIX-NEXT:    lxsd 3, 4(5)
 ; P9BE-AIX-NEXT:    lxv 0, 0(3)
-; P9BE-AIX-NEXT:    ld 3, L..C1(2) # %const.1
 ; P9BE-AIX-NEXT:    vadduwm 4, 4, 4
+; P9BE-AIX-NEXT:    xxperm 3, 1, 0
 ; P9BE-AIX-NEXT:    xxperm 2, 1, 0
-; P9BE-AIX-NEXT:    lxv 0, 0(3)
-; P9BE-AIX-NEXT:    xxperm 3, 3, 0
 ; P9BE-AIX-NEXT:    vnegw 3, 3
 ; P9BE-AIX-NEXT:    vslw 3, 3, 4
 ; P9BE-AIX-NEXT:    vsubuwm 2, 3, 2
@@ -86,25 +78,23 @@ define void @test64(ptr nocapture readonly %pix2, i32 signext %i_pix2) {
 ; P9BE-AIX32-LABEL: test64:
 ; P9BE-AIX32:       # %bb.0: # %entry
 ; P9BE-AIX32-NEXT:    lwzux 4, 3, 4
-; P9BE-AIX32-NEXT:    xxlxor 2, 2, 2
 ; P9BE-AIX32-NEXT:    vspltisw 4, 8
-; P9BE-AIX32-NEXT:    stw 4, -48(1)
 ; P9BE-AIX32-NEXT:    vadduwm 4, 4, 4
+; P9BE-AIX32-NEXT:    stw 4, -48(1)
 ; P9BE-AIX32-NEXT:    lwz 4, 4(3)
 ; P9BE-AIX32-NEXT:    lxv 0, -48(1)
 ; P9BE-AIX32-NEXT:    stw 4, -32(1)
 ; P9BE-AIX32-NEXT:    lwz 4, L..C0(2) # %const.0
-; P9BE-AIX32-NEXT:    lxv 1, -32(1)
 ; P9BE-AIX32-NEXT:    lwz 3, 8(3)
+; P9BE-AIX32-NEXT:    lxv 1, -32(1)
 ; P9BE-AIX32-NEXT:    stw 3, -16(1)
-; P9BE-AIX32-NEXT:    lwz 3, L..C1(2) # %const.1
+; P9BE-AIX32-NEXT:    lxv 2, 0(4)
+; P9BE-AIX32-NEXT:    lxv 3, -16(1)
 ; P9BE-AIX32-NEXT:    xxmrghw 2, 0, 1
-; P9BE-AIX32-NEXT:    lxv 0, 0(4)
-; P9BE-AIX32-NEXT:    xxperm 2, 2, 0
-; P9BE-AIX32-NEXT:    lxv 0, -16(1)
-; P9BE-AIX32-NEXT:    xxmrghw 3, 1, 0
-; P9BE-AIX32-NEXT:    lxv 0, 0(3)
-; P9BE-AIX32-NEXT:    xxperm 3, 3, 0
+; P9BE-AIX32-NEXT:    xxlxor 0, 0, 0
+; P9BE-AIX32-NEXT:    xxperm 2, 0, 2
+; P9BE-AIX32-NEXT:    xxmrghw 3, 1, 3
+; P9BE-AIX32-NEXT:    xxperm 3, 0, 2
 ; P9BE-AIX32-NEXT:    vnegw 3, 3
 ; P9BE-AIX32-NEXT:    vslw 3, 3, 4
 ; P9BE-AIX32-NEXT:    vsubuwm 2, 3, 2
@@ -180,7 +170,7 @@ define void @test32(ptr nocapture readonly %pix2, i32 signext %i_pix2) {
 ; P9BE-AIX:       # %bb.0: # %entry
 ; P9BE-AIX-NEXT:    add 5, 3, 4
 ; P9BE-AIX-NEXT:    lxsiwzx 2, 3, 4
-; P9BE-AIX-NEXT:    ld 3, L..C2(2) # %const.0
+; P9BE-AIX-NEXT:    ld 3, L..C1(2) # %const.0
 ; P9BE-AIX-NEXT:    xxlxor 0, 0, 0
 ; P9BE-AIX-NEXT:    vspltisw 4, 8
 ; P9BE-AIX-NEXT:    lxv 1, 0(3)
@@ -200,7 +190,7 @@ define void @test32(ptr nocapture readonly %pix2, i32 signext %i_pix2) {
 ; P9BE-AIX32:       # %bb.0: # %entry
 ; P9BE-AIX32-NEXT:    add 5, 3, 4
 ; P9BE-AIX32-NEXT:    lxsiwzx 2, 3, 4
-; P9BE-AIX32-NEXT:    lwz 3, L..C2(2) # %const.0
+; P9BE-AIX32-NEXT:    lwz 3, L..C1(2) # %const.0
 ; P9BE-AIX32-NEXT:    xxlxor 0, 0, 0
 ; P9BE-AIX32-NEXT:    vspltisw 4, 8
 ; P9BE-AIX32-NEXT:    lxv 1, 0(3)
@@ -297,9 +287,9 @@ define void @test16(ptr nocapture readonly %sums, i32 signext %delta, i32 signex
 ; P9BE-AIX-NEXT:    li 7, 16
 ; P9BE-AIX-NEXT:    add 6, 3, 4
 ; P9BE-AIX-NEXT:    lxsihzx 1, 3, 4
-; P9BE-AIX-NEXT:    ld 3, L..C3(2) # %const.1
+; P9BE-AIX-NEXT:    ld 3, L..C2(2) # %const.1
 ; P9BE-AIX-NEXT:    lxsihzx 2, 6, 7
-; P9BE-AIX-NEXT:    ld 6, L..C4(2) # %const.0
+; P9BE-AIX-NEXT:    ld 6, L..C3(2) # %const.0
 ; P9BE-AIX-NEXT:    lxv 0, 0(6)
 ; P9BE-AIX-NEXT:    li 6, 0
 ; P9BE-AIX-NEXT:    mtvsrwz 3, 6
@@ -328,7 +318,7 @@ define void @test16(ptr nocapture readonly %sums, i32 signext %delta, i32 signex
 ; P9BE-AIX32-NEXT:    sth 4, -48(1)
 ; P9BE-AIX32-NEXT:    lxv 4, -48(1)
 ; P9BE-AIX32-NEXT:    sth 3, -32(1)
-; P9BE-AIX32-NEXT:    lwz 3, L..C3(2) # %const.0
+; P9BE-AIX32-NEXT:    lwz 3, L..C2(2) # %const.0
 ; P9BE-AIX32-NEXT:    lxv 3, -32(1)
 ; P9BE-AIX32-NEXT:    vmrghh 4, 2, 4
 ; P9BE-AIX32-NEXT:    lxv 0, 0(3)
@@ -437,9 +427,9 @@ define void @test8(ptr nocapture readonly %sums, i32 signext %delta, i32 signext
 ; P9BE-AIX-NEXT:    add 6, 3, 4
 ; P9BE-AIX-NEXT:    li 7, 8
 ; P9BE-AIX-NEXT:    lxsibzx 3, 3, 4
-; P9BE-AIX-NEXT:    ld 3, L..C5(2) # %const.1
+; P9BE-AIX-NEXT:    ld 3, L..C4(2) # %const.1
 ; P9BE-AIX-NEXT:    lxsibzx 0, 6, 7
-; P9BE-AIX-NEXT:    ld 6, L..C6(2) # %const.0
+; P9BE-AIX-NEXT:    ld 6, L..C5(2) # %const.0
 ; P9BE-AIX-NEXT:    lxv 1, 0(6)
 ; P9BE-AIX-NEXT:    li 6, 0
 ; P9BE-AIX-NEXT:    mtvsrwz 2, 6
@@ -464,9 +454,9 @@ define void @test8(ptr nocapture readonly %sums, i32 signext %delta, i32 signext
 ; P9BE-AIX32-NEXT:    add 6, 3, 4
 ; P9BE-AIX32-NEXT:    li 7, 8
 ; P9BE-AIX32-NEXT:    lxsibzx 3, 3, 4
-; P9BE-AIX32-NEXT:    lwz 3, L..C4(2) # %const.1
+; P9BE-AIX32-NEXT:    lwz 3, L..C3(2) # %const.1
 ; P9BE-AIX32-NEXT:    lxsibzx 0, 6, 7
-; P9BE-AIX32-NEXT:    lwz 6, L..C5(2) # %const.0
+; P9BE-AIX32-NEXT:    lwz 6, L..C4(2) # %const.0
 ; P9BE-AIX32-NEXT:    lxv 1, 0(6)
 ; P9BE-AIX32-NEXT:    li 6, 0
 ; P9BE-AIX32-NEXT:    mtvsrwz 2, 6
diff --git a/llvm/test/CodeGen/RISCV/rv64i-complex-float.ll b/llvm/test/CodeGen/RISCV/rv64i-complex-float.ll
index 690828c7794346e..6e4f624415d9983 100644
--- a/llvm/test/CodeGen/RISCV/rv64i-complex-float.ll
+++ b/llvm/test/CodeGen/RISCV/rv64i-complex-float.ll
@@ -20,6 +20,7 @@ define i64 @complex_float_add(i64 %a.coerce, i64 %b.coerce) nounwind {
 ; CHECK-NEXT:    mv a0, s0
 ; CHECK-NEXT:    mv a1, s1
 ; CHECK-NEXT:    call __addsf3 at plt
+; CHECK-NEXT:    andi a0, a0, -1
 ; CHECK-NEXT:    slli a0, a0, 32
 ; CHECK-NEXT:    slli s2, s2, 32
 ; CHECK-NEXT:    srli a1, s2, 32
diff --git a/llvm/test/CodeGen/RISCV/rvv/pr61561.ll b/llvm/test/CodeGen/RISCV/rvv/pr61561.ll
index f27edd36116657e..8d246d99388193f 100644
--- a/llvm/test/CodeGen/RISCV/rvv/pr61561.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/pr61561.ll
@@ -5,12 +5,11 @@ define <vscale x 4 x i8> @foo(ptr %p) {
 ; CHECK-LABEL: foo:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vl1re16.v v8, (a0)
-; CHECK-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vsll.vi v8, v8, 3
-; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
 ; CHECK-NEXT:    vzext.vf2 v10, v8
+; CHECK-NEXT:    vsll.vi v8, v10, 3
 ; CHECK-NEXT:    li a0, 248
-; CHECK-NEXT:    vand.vx v8, v10, a0
+; CHECK-NEXT:    vand.vx v8, v8, a0
 ; CHECK-NEXT:    lui a0, 4
 ; CHECK-NEXT:    vmv.v.x v10, a0
 ; CHECK-NEXT:    lui a0, 1
diff --git a/llvm/test/CodeGen/RISCV/split-store.ll b/llvm/test/CodeGen/RISCV/split-store.ll
index 367d3fe2c595fea..afc72d2b8ab7016 100644
--- a/llvm/test/CodeGen/RISCV/split-store.ll
+++ b/llvm/test/CodeGen/RISCV/split-store.ll
@@ -129,6 +129,7 @@ define void @int32_int32_pair(i32 %tmp1, i32 %tmp2, ptr %ref.tmp) {
 ;
 ; RV64-LABEL: int32_int32_pair:
 ; RV64:       # %bb.0:
+; RV64-NEXT:    andi a1, a1, -1
 ; RV64-NEXT:    slli a1, a1, 32
 ; RV64-NEXT:    slli a0, a0, 32
 ; RV64-NEXT:    srli a0, a0, 32
@@ -138,6 +139,7 @@ define void @int32_int32_pair(i32 %tmp1, i32 %tmp2, ptr %ref.tmp) {
 ;
 ; RV64D-LABEL: int32_int32_pair:
 ; RV64D:       # %bb.0:
+; RV64D-NEXT:    andi a1, a1, -1
 ; RV64D-NEXT:    slli a1, a1, 32
 ; RV64D-NEXT:    slli a0, a0, 32
 ; RV64D-NEXT:    srli a0, a0, 32
diff --git a/llvm/test/CodeGen/Thumb/arm_q15_to_q31.ll b/llvm/test/CodeGen/Thumb/arm_q15_to_q31.ll
index fdfbf3393098e4a..40e1398b6f10fe1 100644
--- a/llvm/test/CodeGen/Thumb/arm_q15_to_q31.ll
+++ b/llvm/test/CodeGen/Thumb/arm_q15_to_q31.ll
@@ -20,18 +20,19 @@ define void @arm_q15_to_q31(ptr nocapture noundef readonly %pSrc, ptr nocapture
 ; CHECK-NEXT:  @ %bb.2: @ %while.body.prol
 ; CHECK-NEXT:    str r2, [sp] @ 4-byte Spill
 ; CHECK-NEXT:    str r7, [sp, #4] @ 4-byte Spill
-; CHECK-NEXT:    ldrh r2, [r0]
-; CHECK-NEXT:    ldrh r7, [r0, #2]
-; CHECK-NEXT:    ldrh r4, [r0, #4]
-; CHECK-NEXT:    ldrh r6, [r0, #6]
+; CHECK-NEXT:    movs r6, #2
+; CHECK-NEXT:    ldrsh r6, [r0, r6]
+; CHECK-NEXT:    movs r7, #6
+; CHECK-NEXT:    ldrsh r7, [r0, r7]
+; CHECK-NEXT:    lsls r2, r7, #16
+; CHECK-NEXT:    ldrh r4, [r0]
+; CHECK-NEXT:    ldrh r7, [r0, #4]
+; CHECK-NEXT:    lsls r7, r7, #16
 ; CHECK-NEXT:    lsls r6, r6, #16
 ; CHECK-NEXT:    lsls r4, r4, #16
-; CHECK-NEXT:    lsls r7, r7, #16
-; CHECK-NEXT:    lsls r2, r2, #16
-; CHECK-NEXT:    stm r1!, {r2, r7}
-; CHECK-NEXT:    str r4, [r1]
-; CHECK-NEXT:    str r6, [r1, #4]
-; CHECK-NEXT:    subs r1, #8
+; CHECK-NEXT:    stm r1!, {r4, r6, r7}
+; CHECK-NEXT:    str r2, [r1]
+; CHECK-NEXT:    subs r1, #12
 ; CHECK-NEXT:    cmp r5, #1
 ; CHECK-NEXT:    bne .LBB0_11
 ; CHECK-NEXT:  @ %bb.3:
@@ -45,53 +46,61 @@ define void @arm_q15_to_q31(ptr nocapture noundef readonly %pSrc, ptr nocapture
 ; CHECK-NEXT:    blo .LBB0_6
 ; CHECK-NEXT:  .LBB0_5: @ %while.body
 ; CHECK-NEXT:    @ =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    ldrh r2, [r0]
-; CHECK-NEXT:    ldrh r4, [r0, #2]
-; CHECK-NEXT:    ldrh r5, [r0, #4]
-; CHECK-NEXT:    ldrh r6, [r0, #6]
-; CHECK-NEXT:    lsls r6, r6, #16
-; CHECK-NEXT:    str r6, [r1, #12]
-; CHECK-NEXT:    lsls r5, r5, #16
-; CHECK-NEXT:    str r5, [r1, #8]
+; CHECK-NEXT:    movs r2, #2
+; CHECK-NEXT:    ldrsh r2, [r0, r2]
+; CHECK-NEXT:    movs r4, #6
+; CHECK-NEXT:    ldrsh r4, [r0, r4]
 ; CHECK-NEXT:    lsls r4, r4, #16
-; CHECK-NEXT:    str r4, [r1, #4]
+; CHECK-NEXT:    ldrh r5, [r0]
+; CHECK-NEXT:    ldrh r6, [r0, #4]
+; CHECK-NEXT:    str r4, [r1, #12]
+; CHECK-NEXT:    lsls r4, r6, #16
+; CHECK-NEXT:    str r4, [r1, #8]
 ; CHECK-NEXT:    lsls r2, r2, #16
+; CHECK-NEXT:    str r2, [r1, #4]
+; CHECK-NEXT:    lsls r2, r5, #16
 ; CHECK-NEXT:    str r2, [r1]
-; CHECK-NEXT:    ldrh r2, [r0, #8]
-; CHECK-NEXT:    ldrh r4, [r0, #10]
-; CHECK-NEXT:    ldrh r5, [r0, #12]
-; CHECK-NEXT:    ldrh r6, [r0, #14]
-; CHECK-NEXT:    lsls r6, r6, #16
-; CHECK-NEXT:    str r6, [r1, #28]
-; CHECK-NEXT:    lsls r5, r5, #16
-; CHECK-NEXT:    str r5, [r1, #24]
+; CHECK-NEXT:    movs r2, #10
+; CHECK-NEXT:    ldrsh r2, [r0, r2]
+; CHECK-NEXT:    movs r4, #14
+; CHECK-NEXT:    ldrsh r4, [r0, r4]
 ; CHECK-NEXT:    lsls r4, r4, #16
-; CHECK-NEXT:    str r4, [r1, #20]
+; CHECK-NEXT:    ldrh r5, [r0, #8]
+; CHECK-NEXT:    ldrh r6, [r0, #12]
+; CHECK-NEXT:    str r4, [r1, #28]
+; CHECK-NEXT:    lsls r4, r6, #16
+; CHECK-NEXT:    str r4, [r1, #24]
 ; CHECK-NEXT:    lsls r2, r2, #16
+; CHECK-NEXT:    str r2, [r1, #20]
+; CHECK-NEXT:    lsls r2, r5, #16
 ; CHECK-NEXT:    str r2, [r1, #16]
-; CHECK-NEXT:    ldrh r2, [r0, #16]
-; CHECK-NEXT:    ldrh r4, [r0, #18]
-; CHECK-NEXT:    ldrh r5, [r0, #20]
-; CHECK-NEXT:    ldrh r6, [r0, #22]
-; CHECK-NEXT:    lsls r6, r6, #16
-; CHECK-NEXT:    str r6, [r1, #44]
-; CHECK-NEXT:    lsls r5, r5, #16
-; CHECK-NEXT:    str r5, [r1, #40]
+; CHECK-NEXT:    movs r2, #18
+; CHECK-NEXT:    ldrsh r2, [r0, r2]
+; CHECK-NEXT:    movs r4, #22
+; CHECK-NEXT:    ldrsh r4, [r0, r4]
 ; CHECK-NEXT:    lsls r4, r4, #16
-; CHECK-NEXT:    str r4, [r1, #36]
+; CHECK-NEXT:    ldrh r5, [r0, #16]
+; CHECK-NEXT:    ldrh r6, [r0, #20]
+; CHECK-NEXT:    str r4, [r1, #44]
+; CHECK-NEXT:    lsls r4, r6, #16
+; CHECK-NEXT:    str r4, [r1, #40]
 ; CHECK-NEXT:    lsls r2, r2, #16
+; CHECK-NEXT:    str r2, [r1, #36]
+; CHECK-NEXT:    lsls r2, r5, #16
 ; CHECK-NEXT:    str r2, [r1, #32]
-; CHECK-NEXT:    ldrh r2, [r0, #24]
-; CHECK-NEXT:    ldrh r4, [r0, #26]
-; CHECK-NEXT:    ldrh r5, [r0, #28]
-; CHECK-NEXT:    ldrh r6, [r0, #30]
-; CHECK-NEXT:    lsls r6, r6, #16
-; CHECK-NEXT:    str r6, [r1, #60]
-; CHECK-NEXT:    lsls r5, r5, #16
-; CHECK-NEXT:    str r5, [r1, #56]
+; CHECK-NEXT:    movs r2, #26
+; CHECK-NEXT:    ldrsh r2, [r0, r2]
+; CHECK-NEXT:    movs r4, #30
+; CHECK-NEXT:    ldrsh r4, [r0, r4]
 ; CHECK-NEXT:    lsls r4, r4, #16
-; CHECK-NEXT:    str r4, [r1, #52]
+; CHECK-NEXT:    ldr...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/70041


More information about the llvm-commits mailing list