[llvm] [DAGCombiner] Set shift flags during visit. (PR #91239)

Mon May 6 10:04:57 PDT 2024

llvmbot wrote:



@llvm/pr-subscribers-backend-systemz

@llvm/pr-subscribers-backend-x86

Author: None (goldsteinn)

<details>
<summary>Changes</summary>

- **[CodeGen] Regen some old tests; NFC**
- **[DAGCombiner] Set shift flags during visit.**


---

Patch is 6.14 MiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/91239.diff


393 Files Affected:

- (modified) llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (+69-1) 
- (modified) llvm/test/CodeGen/AArch64/DAGCombine_vscale.ll (+2-2) 
- (modified) llvm/test/CodeGen/AArch64/aarch64-address-type-promotion.ll (+12-6) 
- (modified) llvm/test/CodeGen/AArch64/addsub.ll (+1-1) 
- (modified) llvm/test/CodeGen/AArch64/arm64-narrow-st-merge.ll (+206) 
- (modified) llvm/test/CodeGen/AArch64/arm64-rev.ll (+28-17) 
- (modified) llvm/test/CodeGen/AArch64/arm64-trunc-store.ll (+4-4) 
- (modified) llvm/test/CodeGen/AArch64/bswap-known-bits.ll (+5-5) 
- (modified) llvm/test/CodeGen/AArch64/const-shift-of-constmasked.ll (+53-29) 
- (modified) llvm/test/CodeGen/AArch64/extract-subvec-combine.ll (+6-4) 
- (modified) llvm/test/CodeGen/AArch64/fold-int-pow2-with-fmul-or-fdiv.ll (+2-1) 
- (modified) llvm/test/CodeGen/AArch64/hoist-and-by-const-from-shl-in-eqcmp-zero.ll (+6-6) 
- (modified) llvm/test/CodeGen/AArch64/pull-binop-through-shift.ll (+14-14) 
- (modified) llvm/test/CodeGen/AArch64/sadd_sat_vec.ll (+16-12) 
- (modified) llvm/test/CodeGen/AArch64/select_const.ll (+12-9) 
- (modified) llvm/test/CodeGen/AArch64/shift-logic.ll (+6-4) 
- (modified) llvm/test/CodeGen/AArch64/shift-mod.ll (+8-6) 
- (modified) llvm/test/CodeGen/AArch64/signed-truncation-check.ll (+1-1) 
- (modified) llvm/test/CodeGen/AArch64/srem-pow2.ll (+12-10) 
- (modified) llvm/test/CodeGen/AArch64/srem-seteq-illegal-types.ll (+8-6) 
- (modified) llvm/test/CodeGen/AArch64/srem-seteq-vec-splat.ll (+8-7) 
- (modified) llvm/test/CodeGen/AArch64/sshl_sat.ll (+10-9) 
- (modified) llvm/test/CodeGen/AArch64/ssub_sat_vec.ll (+16-12) 
- (modified) llvm/test/CodeGen/AArch64/storepairsuppress.ll (+8-8) 
- (modified) llvm/test/CodeGen/AArch64/tbl-loops.ll (+6-4) 
- (modified) llvm/test/CodeGen/AArch64/ushl_sat.ll (+7-4) 
- (modified) llvm/test/CodeGen/AArch64/vector_splat-const-shift-of-constmasked.ll (+62-53) 
- (modified) llvm/test/CodeGen/AArch64/win64_vararg.ll (+1-1) 
- (modified) llvm/test/CodeGen/AMDGPU/add.v2i16.ll (+6-3) 
- (modified) llvm/test/CodeGen/AMDGPU/anyext.ll (+2) 
- (modified) llvm/test/CodeGen/AMDGPU/bf16-conversions.ll (+51-40) 
- (modified) llvm/test/CodeGen/AMDGPU/bf16.ll (+15556-10734) 
- (modified) llvm/test/CodeGen/AMDGPU/bfi_nested.ll (+2-1) 
- (modified) llvm/test/CodeGen/AMDGPU/bswap.ll (+1) 
- (modified) llvm/test/CodeGen/AMDGPU/build-vector-packed-partial-undef.ll (+4) 
- (modified) llvm/test/CodeGen/AMDGPU/build_vector.ll (+386) 
- (modified) llvm/test/CodeGen/AMDGPU/bypass-div.ll (+470-434) 
- (modified) llvm/test/CodeGen/AMDGPU/calling-conventions.ll (+2-1) 
- (modified) llvm/test/CodeGen/AMDGPU/carryout-selection.ll (+565-520) 
- (modified) llvm/test/CodeGen/AMDGPU/combine-vload-extract.ll (+10-4) 
- (modified) llvm/test/CodeGen/AMDGPU/commute-shifts.ll (+4) 
- (modified) llvm/test/CodeGen/AMDGPU/computeNumSignBits-mul.ll (+2-1) 
- (modified) llvm/test/CodeGen/AMDGPU/copy-illegal-type.ll (+42-34) 
- (modified) llvm/test/CodeGen/AMDGPU/ctpop16.ll (+172-132) 
- (modified) llvm/test/CodeGen/AMDGPU/cvt_f32_ubyte.ll (+65-52) 
- (modified) llvm/test/CodeGen/AMDGPU/divergence-driven-buildvector.ll (+6-3) 
- (modified) llvm/test/CodeGen/AMDGPU/divergence-driven-sext-inreg.ll (+2-2) 
- (modified) llvm/test/CodeGen/AMDGPU/ds_read2.ll (+11-6) 
- (modified) llvm/test/CodeGen/AMDGPU/extract-load-i1.ll (+14-14) 
- (modified) llvm/test/CodeGen/AMDGPU/extract-subvector-16bit.ll (+230-194) 
- (modified) llvm/test/CodeGen/AMDGPU/fabs.f16.ll (+8-2) 
- (modified) llvm/test/CodeGen/AMDGPU/fast-unaligned-load-store.private.ll (+8) 
- (modified) llvm/test/CodeGen/AMDGPU/fmed3-cast-combine.ll (+2-1) 
- (modified) llvm/test/CodeGen/AMDGPU/fneg-combines.new.ll (+4) 
- (modified) llvm/test/CodeGen/AMDGPU/fneg-fabs.f16.ll (+54-18) 
- (modified) llvm/test/CodeGen/AMDGPU/fneg-modifier-casting.ll (+8-7) 
- (modified) llvm/test/CodeGen/AMDGPU/fneg.f16.ll (+43-13) 
- (modified) llvm/test/CodeGen/AMDGPU/fneg.ll (+657) 
- (modified) llvm/test/CodeGen/AMDGPU/fold-int-pow2-with-fmul-or-fdiv.ll (+20-16) 
- (modified) llvm/test/CodeGen/AMDGPU/fp_to_sint.ll (+157-153) 
- (modified) llvm/test/CodeGen/AMDGPU/fp_to_uint.ll (+157-153) 
- (modified) llvm/test/CodeGen/AMDGPU/fp_trunc_store_fp64_to_bf16.ll (+355-287) 
- (modified) llvm/test/CodeGen/AMDGPU/fshr.ll (+2-2) 
- (modified) llvm/test/CodeGen/AMDGPU/function-returns.ll (+139-78) 
- (modified) llvm/test/CodeGen/AMDGPU/global-atomics-fp.ll (+192-172) 
- (modified) llvm/test/CodeGen/AMDGPU/global-saddr-load.ll (+106-36) 
- (modified) llvm/test/CodeGen/AMDGPU/idot4s.ll (+27-23) 
- (modified) llvm/test/CodeGen/AMDGPU/idot4u.ll (+14-12) 
- (modified) llvm/test/CodeGen/AMDGPU/idot8s.ll (+878-375) 
- (modified) llvm/test/CodeGen/AMDGPU/idot8u.ll (+63-43) 
- (modified) llvm/test/CodeGen/AMDGPU/insert_vector_dynelt.ll (+25-20) 
- (modified) llvm/test/CodeGen/AMDGPU/insert_vector_elt.ll (+44-13) 
- (modified) llvm/test/CodeGen/AMDGPU/insert_vector_elt.v2i16.ll (+219-126) 
- (modified) llvm/test/CodeGen/AMDGPU/integer-mad-patterns.ll (+329-305) 
- (modified) llvm/test/CodeGen/AMDGPU/kernel-args.ll (+334-262) 
- (modified) llvm/test/CodeGen/AMDGPU/kernel-argument-dag-lowering.ll (+332) 
- (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.pkrtz.ll (+55-3) 
- (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.bpermute.ll (+58) 
- (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.buffer.load.ll (-32) 
- (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.buffer.load.ll (-32) 
- (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sbfe.ll (+3) 
- (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.load.ll (+156) 
- (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.buffer.load.ll (+156) 
- (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ubfe.ll (+6-6) 
- (modified) llvm/test/CodeGen/AMDGPU/llvm.exp.ll (+10-7) 
- (modified) llvm/test/CodeGen/AMDGPU/llvm.exp10.ll (+10-7) 
- (modified) llvm/test/CodeGen/AMDGPU/llvm.exp2.ll (+4-1) 
- (modified) llvm/test/CodeGen/AMDGPU/llvm.log.ll (+10-7) 
- (modified) llvm/test/CodeGen/AMDGPU/llvm.log10.ll (+10-7) 
- (modified) llvm/test/CodeGen/AMDGPU/llvm.log2.ll (+4-1) 
- (modified) llvm/test/CodeGen/AMDGPU/llvm.mulo.ll (+184-140) 
- (modified) llvm/test/CodeGen/AMDGPU/llvm.r600.read.local.size.ll (+355) 
- (modified) llvm/test/CodeGen/AMDGPU/llvm.set.rounding.ll (+103-43) 
- (modified) llvm/test/CodeGen/AMDGPU/load-constant-i1.ll (+2889-2607) 
- (modified) llvm/test/CodeGen/AMDGPU/load-constant-i16.ll (+1402-930) 
- (modified) llvm/test/CodeGen/AMDGPU/load-constant-i8.ll (+1494-1222) 
- (modified) llvm/test/CodeGen/AMDGPU/load-global-i16.ll (+2772-1841) 
- (modified) llvm/test/CodeGen/AMDGPU/load-lo16.ll (+42-21) 
- (modified) llvm/test/CodeGen/AMDGPU/load-range-metadata-sign-bits.ll (+18-5) 
- (modified) llvm/test/CodeGen/AMDGPU/local-atomics-fp.ll (+166-138) 
- (modified) llvm/test/CodeGen/AMDGPU/lshl64-to-32.ll (+7-4) 
- (modified) llvm/test/CodeGen/AMDGPU/lshr.v2i16.ll (+12-6) 
- (modified) llvm/test/CodeGen/AMDGPU/mad-mix-hi.ll (+52-84) 
- (modified) llvm/test/CodeGen/AMDGPU/max.ll (+8-4) 
- (modified) llvm/test/CodeGen/AMDGPU/memory_clause.ll (+4-4) 
- (modified) llvm/test/CodeGen/AMDGPU/min.ll (+55-34) 
- (modified) llvm/test/CodeGen/AMDGPU/mul_int24.ll (+18-6) 
- (modified) llvm/test/CodeGen/AMDGPU/mul_uint24-amdgcn.ll (+147-94) 
- (modified) llvm/test/CodeGen/AMDGPU/mul_uint24-r600.ll (+44-33) 
- (modified) llvm/test/CodeGen/AMDGPU/permute_i8.ll (+110-119) 
- (modified) llvm/test/CodeGen/AMDGPU/reassoc-mul-add-1-to-mad.ll (+60-51) 
- (modified) llvm/test/CodeGen/AMDGPU/rem_i128.ll (+66-45) 
- (modified) llvm/test/CodeGen/AMDGPU/saddsat.ll (+6-3) 
- (modified) llvm/test/CodeGen/AMDGPU/scalar_to_vector.ll (+28-20) 
- (modified) llvm/test/CodeGen/AMDGPU/scratch-simple.ll (+7184) 
- (modified) llvm/test/CodeGen/AMDGPU/sdiv.ll (+10-2) 
- (modified) llvm/test/CodeGen/AMDGPU/sdiv64.ll (+391-366) 
- (modified) llvm/test/CodeGen/AMDGPU/select-fabs-fneg-extract.v2f16.ll (+209-144) 
- (modified) llvm/test/CodeGen/AMDGPU/sext-in-reg.ll (+2402) 
- (modified) llvm/test/CodeGen/AMDGPU/shift-i64-opts.ll (+428) 
- (modified) llvm/test/CodeGen/AMDGPU/shl-add-to-add-shl.ll (+31) 
- (modified) llvm/test/CodeGen/AMDGPU/shl.ll (+35-29) 
- (modified) llvm/test/CodeGen/AMDGPU/shl.v2i16.ll (+2-6) 
- (modified) llvm/test/CodeGen/AMDGPU/shl_add_ptr.ll (+39) 
- (modified) llvm/test/CodeGen/AMDGPU/shrink-add-sub-constant.ll (+91-43) 
- (modified) llvm/test/CodeGen/AMDGPU/sra.ll (+43-29) 
- (modified) llvm/test/CodeGen/AMDGPU/srem-seteq-illegal-types.ll (+7-5) 
- (modified) llvm/test/CodeGen/AMDGPU/srem64.ll (+440-407) 
- (modified) llvm/test/CodeGen/AMDGPU/ssubsat.ll (+6-3) 
- (modified) llvm/test/CodeGen/AMDGPU/store-private.ll (+1874) 
- (modified) llvm/test/CodeGen/AMDGPU/store-weird-sizes.ll (+2-1) 
- (modified) llvm/test/CodeGen/AMDGPU/sub.v2i16.ll (+2-1) 
- (modified) llvm/test/CodeGen/AMDGPU/trunc-combine.ll (+1-1) 
- (modified) llvm/test/CodeGen/AMDGPU/uaddsat.ll (+1) 
- (modified) llvm/test/CodeGen/AMDGPU/udiv.ll (+206-181) 
- (modified) llvm/test/CodeGen/AMDGPU/udiv64.ll (+418-380) 
- (modified) llvm/test/CodeGen/AMDGPU/unstructured-cfg-def-use-issue.ll (+79-77) 
- (modified) llvm/test/CodeGen/AMDGPU/urem64.ll (+266-239) 
- (modified) llvm/test/CodeGen/AMDGPU/usubsat.ll (+3-2) 
- (modified) llvm/test/CodeGen/AMDGPU/vector_shuffle.packed.ll (+232-199) 
- (modified) llvm/test/CodeGen/AMDGPU/wave32.ll (+174-162) 
- (modified) llvm/test/CodeGen/AMDGPU/widen-smrd-loads.ll (+14-11) 
- (modified) llvm/test/CodeGen/ARM/Windows/alloca.ll (+3) 
- (modified) llvm/test/CodeGen/ARM/Windows/vla.ll (+4) 
- (modified) llvm/test/CodeGen/ARM/and-cmpz.ll (+121) 
- (modified) llvm/test/CodeGen/ARM/and-load-combine.ll (+8-4) 
- (modified) llvm/test/CodeGen/ARM/bfi-chain-cse-crash.ll (+10-6) 
- (modified) llvm/test/CodeGen/ARM/bfi.ll (+2) 
- (modified) llvm/test/CodeGen/ARM/bfx.ll (+33-17) 
- (modified) llvm/test/CodeGen/ARM/combine-movc-sub.ll (+3-3) 
- (modified) llvm/test/CodeGen/ARM/demanded-bits-and.ll (+1) 
- (modified) llvm/test/CodeGen/ARM/hoist-and-by-const-from-lshr-in-eqcmp-zero.ll (+31-14) 
- (modified) llvm/test/CodeGen/ARM/hoist-and-by-const-from-shl-in-eqcmp-zero.ll (+35-14) 
- (modified) llvm/test/CodeGen/ARM/illegal-bitfield-loadstore.ll (+36-15) 
- (modified) llvm/test/CodeGen/ARM/pr36577.ll (+6-5) 
- (modified) llvm/test/CodeGen/ARM/sadd_sat_plus.ll (+2) 
- (modified) llvm/test/CodeGen/ARM/sbfx.ll (+28-13) 
- (modified) llvm/test/CodeGen/ARM/sdiv-pow2-arm-size.ll (+52-32) 
- (modified) llvm/test/CodeGen/ARM/shift-combine.ll (+157-13) 
- (modified) llvm/test/CodeGen/ARM/simplifysetcc_narrow_load.ll (+4-2) 
- (modified) llvm/test/CodeGen/ARM/srem-seteq-illegal-types.ll (+54-40) 
- (modified) llvm/test/CodeGen/ARM/ssub_sat_plus.ll (+2) 
- (modified) llvm/test/CodeGen/BPF/remove_truncate_9.ll (+3) 
- (modified) llvm/test/CodeGen/Hexagon/atomicrmw-uinc-udec-wrap.ll (+58-50) 
- (modified) llvm/test/CodeGen/Hexagon/isel-global-offset-alignment.ll (+3-3) 
- (modified) llvm/test/CodeGen/Hexagon/vect/vect-shifts.ll (+92-75) 
- (modified) llvm/test/CodeGen/LoongArch/atomicrmw-uinc-udec-wrap.ll (+18-18) 
- (modified) llvm/test/CodeGen/LoongArch/bstrins_w.ll (+1) 
- (modified) llvm/test/CodeGen/LoongArch/bstrpick_d.ll (+4-2) 
- (modified) llvm/test/CodeGen/LoongArch/bstrpick_w.ll (+4-2) 
- (modified) llvm/test/CodeGen/LoongArch/bytepick.ll (+30-5) 
- (modified) llvm/test/CodeGen/LoongArch/ctlz-cttz-ctpop.ll (+12-7) 
- (modified) llvm/test/CodeGen/LoongArch/fcopysign.ll (+3) 
- (modified) llvm/test/CodeGen/LoongArch/ir-instruction/and.ll (+4-1) 
- (modified) llvm/test/CodeGen/LoongArch/ir-instruction/atomic-cmpxchg.ll (+28-14) 
- (modified) llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-minmax.ll (+240-220) 
- (modified) llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw.ll (+360-180) 
- (modified) llvm/test/CodeGen/LoongArch/ir-instruction/lshr.ll (+10-2) 
- (modified) llvm/test/CodeGen/LoongArch/legalicmpimm.ll (+4-1) 
- (modified) llvm/test/CodeGen/LoongArch/rotl-rotr.ll (+6-3) 
- (modified) llvm/test/CodeGen/LoongArch/sextw-removal.ll (+54-20) 
- (modified) llvm/test/CodeGen/Mips/atomic.ll (+21-7) 
- (modified) llvm/test/CodeGen/Mips/cconv/illegal-vectors.ll (+272-148) 
- (modified) llvm/test/CodeGen/Mips/cins.ll (+34-16) 
- (modified) llvm/test/CodeGen/Mips/dins.ll (+19-9) 
- (modified) llvm/test/CodeGen/Mips/fabs.ll (+11-6) 
- (modified) llvm/test/CodeGen/Mips/fcopysign-f32-f64.ll (+76-28) 
- (modified) llvm/test/CodeGen/Mips/fcopysign.ll (+88-31) 
- (modified) llvm/test/CodeGen/Mips/funnel-shift-rot.ll (+4-4) 
- (modified) llvm/test/CodeGen/Mips/funnel-shift.ll (+4-4) 
- (modified) llvm/test/CodeGen/Mips/llvm-ir/abs.ll (+107-12) 
- (modified) llvm/test/CodeGen/Mips/llvm-ir/nan-fp-attr.ll (+13-18) 
- (modified) llvm/test/CodeGen/Mips/load-store-left-right.ll (+37-41) 
- (modified) llvm/test/CodeGen/Mips/mips64-f128.ll (+10-7) 
- (modified) llvm/test/CodeGen/Mips/srem-seteq-illegal-types.ll (+18-14) 
- (modified) llvm/test/CodeGen/Mips/unalignedload.ll (+6-6) 
- (modified) llvm/test/CodeGen/NVPTX/atomics-sm70.ll (+4-4) 
- (modified) llvm/test/CodeGen/NVPTX/i8x4-instructions.ll (+12-9) 
- (modified) llvm/test/CodeGen/NVPTX/lower-byval-args.ll (+217) 
- (modified) llvm/test/CodeGen/NVPTX/mulwide.ll (+4-20) 
- (modified) llvm/test/CodeGen/NVPTX/unaligned-param-load-store.ll (+3) 
- (modified) llvm/test/CodeGen/PowerPC/coalesce-ext.ll (+10-4) 
- (modified) llvm/test/CodeGen/PowerPC/extsh.ll (+1) 
- (modified) llvm/test/CodeGen/PowerPC/more-dq-form-prepare.ll (+140-141) 
- (modified) llvm/test/CodeGen/PowerPC/no-ctr-loop-if-exit-in-nested-loop.ll (+16-13) 
- (modified) llvm/test/CodeGen/PowerPC/ppc64-P9-vabsd.ll (+26-12) 
- (modified) llvm/test/CodeGen/PowerPC/pr38087.ll (+4-1) 
- (modified) llvm/test/CodeGen/PowerPC/pr45432.ll (+2-2) 
- (modified) llvm/test/CodeGen/PowerPC/rlwinm.ll (+2-1) 
- (modified) llvm/test/CodeGen/PowerPC/sext-vector-inreg.ll (+4-1) 
- (modified) llvm/test/CodeGen/PowerPC/shl_sext.ll (+1) 
- (modified) llvm/test/CodeGen/PowerPC/sms-phi-3.ll (+15-27) 
- (modified) llvm/test/CodeGen/PowerPC/srem-seteq-illegal-types.ll (+2-2) 
- (modified) llvm/test/CodeGen/PowerPC/vec-itofp.ll (+27-11) 
- (modified) llvm/test/CodeGen/PowerPC/vec_conv_i16_to_fp32_elts.ll (+31-18) 
- (modified) llvm/test/CodeGen/PowerPC/vec_conv_i16_to_fp64_elts.ll (+61-34) 
- (modified) llvm/test/CodeGen/PowerPC/vec_conv_i8_to_fp32_elts.ll (+20-7) 
- (modified) llvm/test/CodeGen/PowerPC/vec_conv_i8_to_fp64_elts.ll (+58-31) 
- (modified) llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll (+9-2) 
- (modified) llvm/test/CodeGen/RISCV/add-before-shl.ll (+16-20) 
- (modified) llvm/test/CodeGen/RISCV/atomic-cmpxchg-branch-on-result.ll (+18-6) 
- (modified) llvm/test/CodeGen/RISCV/atomic-cmpxchg.ll (+128) 
- (modified) llvm/test/CodeGen/RISCV/atomic-rmw.ll (+1682-1104) 
- (modified) llvm/test/CodeGen/RISCV/atomic-signext.ll (+136-92) 
- (modified) llvm/test/CodeGen/RISCV/atomicrmw-uinc-udec-wrap.ll (+28-28) 
- (modified) llvm/test/CodeGen/RISCV/bittest.ll (+6-8) 
- (modified) llvm/test/CodeGen/RISCV/bswap-bitreverse.ll (+26-22) 
- (modified) llvm/test/CodeGen/RISCV/ctlz-cttz-ctpop.ll (+8-4) 
- (modified) llvm/test/CodeGen/RISCV/div-by-constant.ll (+72-43) 
- (modified) llvm/test/CodeGen/RISCV/div.ll (+15-9) 
- (modified) llvm/test/CodeGen/RISCV/float-intrinsics.ll (+8-4) 
- (modified) llvm/test/CodeGen/RISCV/pr65025.ll (+2-1) 
- (modified) llvm/test/CodeGen/RISCV/rem.ll (+16-12) 
- (modified) llvm/test/CodeGen/RISCV/riscv-shifted-extend.ll (+19-11) 
- (modified) llvm/test/CodeGen/RISCV/rotl-rotr.ll (+4-2) 
- (modified) llvm/test/CodeGen/RISCV/rv64-legal-i32/div.ll (+6-3) 
- (modified) llvm/test/CodeGen/RISCV/rv64-legal-i32/rem.ll (+8-6) 
- (modified) llvm/test/CodeGen/RISCV/rv64-legal-i32/rv64zba.ll (+24-25) 
- (modified) llvm/test/CodeGen/RISCV/rv64i-shift-sext.ll (+16-11) 
- (modified) llvm/test/CodeGen/RISCV/rv64i-tricky-shifts.ll (+4-2) 
- (modified) llvm/test/CodeGen/RISCV/rv64xtheadbb.ll (+14-9) 
- (modified) llvm/test/CodeGen/RISCV/rv64zba.ll (+25-26) 
- (modified) llvm/test/CodeGen/RISCV/rv64zbb.ll (+18-9) 
- (modified) llvm/test/CodeGen/RISCV/rvv/extract-subvector.ll (+5-4) 
- (modified) llvm/test/CodeGen/RISCV/rvv/extractelt-int-rv32.ll (+78) 
- (modified) llvm/test/CodeGen/RISCV/rvv/extractelt-int-rv64.ll (+78) 
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll (+28-12) 
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int-vp.ll (+635-237) 
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-sad.ll (-26) 
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-store.ll (+57-22) 
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-unaligned.ll (+20-20) 
- (modified) llvm/test/CodeGen/RISCV/rvv/insert-subvector.ll (+10-8) 
- (modified) llvm/test/CodeGen/RISCV/rvv/legalize-load-sdnode.ll (+4-3) 
- (modified) llvm/test/CodeGen/RISCV/rvv/legalize-store-sdnode.ll (+4-3) 
- (modified) llvm/test/CodeGen/RISCV/rvv/rvv-vscale.i64.ll (+10-5) 
- (modified) llvm/test/CodeGen/RISCV/rvv/vec3-setcc-crash.ll (+20-18) 
- (modified) llvm/test/CodeGen/RISCV/rvv/vreductions-int-vp.ll (+939-350) 
- (modified) llvm/test/CodeGen/RISCV/rvv/vreductions-int.ll (+981-357) 
- (modified) llvm/test/CodeGen/RISCV/rvv/vwsll-sdnode.ll (+9-13) 
- (modified) llvm/test/CodeGen/RISCV/sdiv-pow2-cmov.ll (+32-16) 
- (modified) llvm/test/CodeGen/RISCV/shifts.ll (+60-60) 
- (modified) llvm/test/CodeGen/RISCV/split-udiv-by-constant.ll (+55-55) 
- (modified) llvm/test/CodeGen/RISCV/split-urem-by-constant.ll (+41-41) 
- (modified) llvm/test/CodeGen/RISCV/srem-lkk.ll (+16-12) 
- (modified) llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll (+120-106) 
- (modified) llvm/test/CodeGen/RISCV/srem-vector-lkk.ll (+34-22) 
- (modified) llvm/test/CodeGen/RISCV/unaligned-load-store.ll (+46-32) 
- (modified) llvm/test/CodeGen/RISCV/urem-seteq-illegal-types.ll (+4-2) 
- (modified) llvm/test/CodeGen/RISCV/wide-scalar-shift-by-byte-multiple-legalization.ll (+1304-950) 
- (modified) llvm/test/CodeGen/RISCV/wide-scalar-shift-legalization.ll (+1636-1467) 
- (modified) llvm/test/CodeGen/SystemZ/int-abs-01.ll (+53-24) 
- (modified) llvm/test/CodeGen/SystemZ/int-cmp-44.ll (+306-161) 
- (modified) llvm/test/CodeGen/SystemZ/int-mul-10.ll (+3-38) 
- (modified) llvm/test/CodeGen/SystemZ/int-neg-02.ll (+92-34) 
- (modified) llvm/test/CodeGen/SystemZ/store_nonbytesized_vecs.ll (+24-7) 
- (modified) llvm/test/CodeGen/Thumb/shift-and.ll (+15-10) 
- (modified) llvm/test/CodeGen/Thumb/srem-seteq-illegal-types.ll (+12-7) 
- (modified) llvm/test/CodeGen/Thumb/umul_fix_sat.ll (+6-2) 
- (modified) llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-disabled-in-loloops.ll (+3-2) 
- (modified) llvm/test/CodeGen/Thumb2/bfx.ll (+14-6) 
- (modified) llvm/test/CodeGen/Thumb2/mve-float16regloops.ll (+2-2) 
- (modified) llvm/test/CodeGen/Thumb2/mve-float32regloops.ll (+3-3) 
- (modified) llvm/test/CodeGen/Thumb2/mve-gather-increment.ll (+153-141) 
- (modified) llvm/test/CodeGen/Thumb2/mve-gather-tailpred.ll (+12-10) 
- (modified) llvm/test/CodeGen/Thumb2/mve-scatter-increment.ll (+15-13) 
- (modified) llvm/test/CodeGen/Thumb2/shift_parts.ll (+10-4) 
- (modified) llvm/test/CodeGen/Thumb2/srem-seteq-illegal-types.ll (+9-6) 
- (modified) llvm/test/CodeGen/VE/Scalar/bitreverse.ll (+11-2) 
- (modified) llvm/test/CodeGen/WebAssembly/conv.ll (+3) 
- (modified) llvm/test/CodeGen/WebAssembly/pr47375.ll (+5-3) 
- (modified) llvm/test/CodeGen/WebAssembly/simd-bitmask.ll (+13-13) 
- (modified) llvm/test/CodeGen/WebAssembly/simd-pr61780.ll (+2-2) 
- (modified) llvm/test/CodeGen/WebAssembly/simd-sext-inreg.ll (+5) 
- (modified) llvm/test/CodeGen/WebAssembly/simd-vecreduce-bool.ll (+319-319) 
- (modified) llvm/test/CodeGen/X86/2008-05-12-tailmerge-5.ll (+16-5) 
- (modified) llvm/test/CodeGen/X86/2009-05-23-dagcombine-shifts.ll (+3-3) 
- (modified) llvm/test/CodeGen/X86/add-ext.ll (+2-1) 
- (modified) llvm/test/CodeGen/X86/addr-mode-matcher-2.ll (+4-4) 
- (modified) llvm/test/CodeGen/X86/addr-mode-matcher-3.ll (+5-4) 
- (modified) llvm/test/CodeGen/X86/atomic-bit-test.ll (+19-16) 
- (modified) llvm/test/CodeGen/X86/atomic-rm-bit-test-64.ll (+12-3) 
- (modified) llvm/test/CodeGen/X86/atomic-rm-bit-test.ll (+36-36) 
- (modified) llvm/test/CodeGen/X86/avx512-calling-conv.ll (+217-209) 
- (modified) llvm/test/CodeGen/X86/bfloat.ll (+59-53) 
- (modified) llvm/test/CodeGen/X86/bitreverse.ll (+43-36) 
- (modified) llvm/test/CodeGen/X86/bool-math.ll (+5-3) 


``````````diff

diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 05ab6e2e48206f..a182fbdde4f17b 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -9738,6 +9738,64 @@ SDValue DAGCombiner::visitRotate(SDNode *N) {
   return SDValue();
 }
 
+static SDValue setShiftFlags(SelectionDAG &DAG, const SDLoc &DL, SDNode *N) {
+  unsigned Opc = N->getOpcode();
+  assert((Opc == ISD::SHL || Opc == ISD::SRA || Opc == ISD::SRL) &&
+         "Unknown shift opcode");
+  SDNodeFlags Flags = N->getFlags();
+
+  SDValue N0 = N->getOperand(0);
+  SDValue N1 = N->getOperand(1);
+  EVT VT = N->getValueType(0);
+  // Check if we already have the flags.
+  if (Opc == ISD::SHL) {
+    if (Flags.hasNoSignedWrap() && Flags.hasNoUnsignedWrap())
+      return SDValue();
+
+  } else {
+    if (Flags.hasExact())
+      return SDValue();
+
+    // shr (shl X, Y), Y
+    if (sd_match(N0, m_Shl(m_Value(), m_Specific(N1)))) {
+      Flags.setExact(true);
+      return DAG.getNode(Opc, DL, VT, N0, N1, Flags);
+    }
+  }
+
+  // Compute what we know about shift count.
+  KnownBits KnownCnt = DAG.computeKnownBits(N1);
+  // Compute what we know about shift amt.
+  KnownBits KnownAmt = DAG.computeKnownBits(N0);
+  APInt MaxCnt = KnownCnt.getMaxValue();
+  bool Changed = false;
+  if (Opc == ISD::SHL) {
+    // If we have as many leading zeros than maximum shift cnt we have nuw.
+    if (!Flags.hasNoUnsignedWrap() &&
+        MaxCnt.ule(KnownAmt.countMinLeadingZeros())) {
+      Flags.setNoUnsignedWrap(true);
+      Changed = true;
+    }
+    // If we have more sign bits than maximum shift cnt we have nsw.
+    if (!Flags.hasNoSignedWrap()) {
+      if (MaxCnt.ult(KnownAmt.countMinSignBits()) ||
+          MaxCnt.ult(DAG.ComputeNumSignBits(N0))) {
+        Flags.setNoSignedWrap(true);
+        Changed = true;
+      }
+    }
+  } else {
+    // If we have at least as many trailing zeros as maximum count then we have
+    // exact.
+    Changed = MaxCnt.ule(KnownAmt.countMinTrailingZeros());
+    Flags.setExact(Changed);
+  }
+
+  if (Changed)
+    return DAG.getNode(Opc, DL, VT, N0, N1, Flags);
+  return SDValue();
+}
+
 SDValue DAGCombiner::visitSHL(SDNode *N) {
   SDValue N0 = N->getOperand(0);
   SDValue N1 = N->getOperand(1);
@@ -9745,6 +9803,9 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
     return V;
 
   SDLoc DL(N);
+  if (SDValue V = setShiftFlags(DAG, DL, N))
+    return V;
+
   EVT VT = N0.getValueType();
   EVT ShiftVT = N1.getValueType();
   unsigned OpSizeInBits = VT.getScalarSizeInBits();
@@ -9895,7 +9956,7 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
       return LHSC.ult(OpSizeInBits) && RHSC.ult(OpSizeInBits) &&
              LHSC.getZExtValue() <= RHSC.getZExtValue();
     };
-    
+
     // fold (shl (sr[la] exact X,  C1), C2) -> (shl    X, (C2-C1)) if C1 <= C2
     // fold (shl (sr[la] exact X,  C1), C2) -> (sr[la] X, (C2-C1)) if C1 >= C2
     if (N0->getFlags().hasExact()) {
@@ -10188,6 +10249,9 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
     return V;
 
   SDLoc DL(N);
+  if (SDValue V = setShiftFlags(DAG, DL, N))
+    return V;
+
   EVT VT = N0.getValueType();
   unsigned OpSizeInBits = VT.getScalarSizeInBits();
 
@@ -10389,6 +10453,8 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
     return V;
 
   SDLoc DL(N);
+  if (SDValue V = setShiftFlags(DAG, DL, N))
+    return V;
   EVT VT = N0.getValueType();
   EVT ShiftVT = N1.getValueType();
   unsigned OpSizeInBits = VT.getScalarSizeInBits();
@@ -10638,6 +10704,8 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
   return SDValue();
 }
 
+
+
 SDValue DAGCombiner::visitFunnelShift(SDNode *N) {
   EVT VT = N->getValueType(0);
   SDValue N0 = N->getOperand(0);
diff --git a/llvm/test/CodeGen/AArch64/DAGCombine_vscale.ll b/llvm/test/CodeGen/AArch64/DAGCombine_vscale.ll
index 71f4da2b465c13..26f41f4d98c5cc 100644
--- a/llvm/test/CodeGen/AArch64/DAGCombine_vscale.ll
+++ b/llvm/test/CodeGen/AArch64/DAGCombine_vscale.ll
@@ -7,8 +7,8 @@
 define <vscale x 4 x i32> @sext_inreg(<vscale x 4 x i32> %a) {
 ; CHECK-LABEL: sext_inreg:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    ptrue p0.s
-; CHECK-NEXT:    sxth z0.s, p0/m, z0.s
+; CHECK-NEXT:    lsl z0.s, z0.s, #16
+; CHECK-NEXT:    asr z0.s, z0.s, #16
 ; CHECK-NEXT:    ret
   %in = insertelement <vscale x 4 x i32> undef, i32 16, i32 0
   %splat = shufflevector <vscale x 4 x i32> %in, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
diff --git a/llvm/test/CodeGen/AArch64/aarch64-address-type-promotion.ll b/llvm/test/CodeGen/AArch64/aarch64-address-type-promotion.ll
index d8280dadc550ea..da29a480959394 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-address-type-promotion.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-address-type-promotion.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
 ; RUN: llc < %s -o - | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n32:64"
@@ -5,13 +6,18 @@ target triple = "arm64-apple-macosx10.9"
 
 ; Check that sexts get promoted above adds.
 define void @foo(ptr nocapture %a, i32 %i) {
+; CHECK-LABEL: foo:
+; CHECK:       ; %bb.0: ; %entry
+; CHECK-NEXT:    ; kill: def $w1 killed $w1 def $x1
+; CHECK-NEXT:    sxtw x8, w1
+; CHECK-NEXT:    add x9, x8, #1
+; CHECK-NEXT:    add x8, x8, #2
+; CHECK-NEXT:    ldr w9, [x0, x9, lsl #2]
+; CHECK-NEXT:    ldr w8, [x0, x8, lsl #2]
+; CHECK-NEXT:    add w8, w8, w9
+; CHECK-NEXT:    str w8, [x0, w1, sxtw #2]
+; CHECK-NEXT:    ret
 entry:
-; CHECK-LABEL: _foo:
-; CHECK: add
-; CHECK-NEXT: ldp
-; CHECK-NEXT: add
-; CHECK-NEXT: str
-; CHECK-NEXT: ret
   %add = add nsw i32 %i, 1
   %idxprom = sext i32 %add to i64
   %arrayidx = getelementptr inbounds i32, ptr %a, i64 %idxprom
diff --git a/llvm/test/CodeGen/AArch64/addsub.ll b/llvm/test/CodeGen/AArch64/addsub.ll
index 20215fe9146924..fed1747c23e1c9 100644
--- a/llvm/test/CodeGen/AArch64/addsub.ll
+++ b/llvm/test/CodeGen/AArch64/addsub.ll
@@ -376,7 +376,7 @@ define i1 @uadd_add(i8 %a, i8 %b, ptr %p) {
 ; CHECK-NEXT:    mov w8, #255 // =0xff
 ; CHECK-NEXT:    bic w8, w8, w0
 ; CHECK-NEXT:    add w8, w8, w1, uxtb
-; CHECK-NEXT:    lsr w0, w8, #8
+; CHECK-NEXT:    ubfx w0, w8, #8, #1
 ; CHECK-NEXT:    add w8, w8, #1
 ; CHECK-NEXT:    strb w8, [x2]
 ; CHECK-NEXT:    ret
diff --git a/llvm/test/CodeGen/AArch64/arm64-narrow-st-merge.ll b/llvm/test/CodeGen/AArch64/arm64-narrow-st-merge.ll
index 81c3195584701c..033ac301d7abe1 100644
--- a/llvm/test/CodeGen/AArch64/arm64-narrow-st-merge.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-narrow-st-merge.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
 ; RUN: llc < %s -mtriple aarch64 -verify-machineinstrs | FileCheck %s
 ; RUN: llc < %s -mtriple aarch64 -mattr=+strict-align -verify-machineinstrs | FileCheck %s -check-prefix=CHECK-STRICT
 
@@ -7,6 +8,23 @@
 ; CHECK-STRICT: strh wzr
 ; CHECK-STRICT: strh wzr
 define void @Strh_zero(ptr nocapture %P, i32 %n) {
+; CHECK-LABEL: Strh_zero:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
+; CHECK-NEXT:    sxtw x8, w1
+; CHECK-NEXT:    strh wzr, [x0, w1, sxtw #1]
+; CHECK-NEXT:    add x8, x8, #1
+; CHECK-NEXT:    strh wzr, [x0, x8, lsl #1]
+; CHECK-NEXT:    ret
+;
+; CHECK-STRICT-LABEL: Strh_zero:
+; CHECK-STRICT:       // %bb.0: // %entry
+; CHECK-STRICT-NEXT:    // kill: def $w1 killed $w1 def $x1
+; CHECK-STRICT-NEXT:    sxtw x8, w1
+; CHECK-STRICT-NEXT:    strh wzr, [x0, w1, sxtw #1]
+; CHECK-STRICT-NEXT:    add x8, x8, #1
+; CHECK-STRICT-NEXT:    strh wzr, [x0, x8, lsl #1]
+; CHECK-STRICT-NEXT:    ret
 entry:
   %idxprom = sext i32 %n to i64
   %arrayidx = getelementptr inbounds i16, ptr %P, i64 %idxprom
@@ -26,6 +44,31 @@ entry:
 ; CHECK-STRICT: strh wzr
 ; CHECK-STRICT: strh wzr
 define void @Strh_zero_4(ptr nocapture %P, i32 %n) {
+; CHECK-LABEL: Strh_zero_4:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
+; CHECK-NEXT:    sxtw x8, w1
+; CHECK-NEXT:    strh wzr, [x0, w1, sxtw #1]
+; CHECK-NEXT:    add x9, x8, #1
+; CHECK-NEXT:    add x10, x8, #2
+; CHECK-NEXT:    add x8, x8, #3
+; CHECK-NEXT:    strh wzr, [x0, x9, lsl #1]
+; CHECK-NEXT:    strh wzr, [x0, x10, lsl #1]
+; CHECK-NEXT:    strh wzr, [x0, x8, lsl #1]
+; CHECK-NEXT:    ret
+;
+; CHECK-STRICT-LABEL: Strh_zero_4:
+; CHECK-STRICT:       // %bb.0: // %entry
+; CHECK-STRICT-NEXT:    // kill: def $w1 killed $w1 def $x1
+; CHECK-STRICT-NEXT:    sxtw x8, w1
+; CHECK-STRICT-NEXT:    strh wzr, [x0, w1, sxtw #1]
+; CHECK-STRICT-NEXT:    add x9, x8, #1
+; CHECK-STRICT-NEXT:    add x10, x8, #2
+; CHECK-STRICT-NEXT:    add x8, x8, #3
+; CHECK-STRICT-NEXT:    strh wzr, [x0, x9, lsl #1]
+; CHECK-STRICT-NEXT:    strh wzr, [x0, x10, lsl #1]
+; CHECK-STRICT-NEXT:    strh wzr, [x0, x8, lsl #1]
+; CHECK-STRICT-NEXT:    ret
 entry:
   %idxprom = sext i32 %n to i64
   %arrayidx = getelementptr inbounds i16, ptr %P, i64 %idxprom
@@ -50,6 +93,23 @@ entry:
 ; CHECK-STRICT-LABEL: Strw_zero
 ; CHECK-STRICT: stp wzr, wzr
 define void @Strw_zero(ptr nocapture %P, i32 %n) {
+; CHECK-LABEL: Strw_zero:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
+; CHECK-NEXT:    sxtw x8, w1
+; CHECK-NEXT:    str wzr, [x0, w1, sxtw #2]
+; CHECK-NEXT:    add x8, x8, #1
+; CHECK-NEXT:    str wzr, [x0, x8, lsl #2]
+; CHECK-NEXT:    ret
+;
+; CHECK-STRICT-LABEL: Strw_zero:
+; CHECK-STRICT:       // %bb.0: // %entry
+; CHECK-STRICT-NEXT:    // kill: def $w1 killed $w1 def $x1
+; CHECK-STRICT-NEXT:    sxtw x8, w1
+; CHECK-STRICT-NEXT:    str wzr, [x0, w1, sxtw #2]
+; CHECK-STRICT-NEXT:    add x8, x8, #1
+; CHECK-STRICT-NEXT:    str wzr, [x0, x8, lsl #2]
+; CHECK-STRICT-NEXT:    ret
 entry:
   %idxprom = sext i32 %n to i64
   %arrayidx = getelementptr inbounds i32, ptr %P, i64 %idxprom
@@ -64,6 +124,23 @@ entry:
 ; CHECK-LABEL: Strw_zero_nonzero
 ; CHECK: stp wzr, w1
 define void @Strw_zero_nonzero(ptr nocapture %P, i32 %n)  {
+; CHECK-LABEL: Strw_zero_nonzero:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
+; CHECK-NEXT:    sxtw x8, w1
+; CHECK-NEXT:    str wzr, [x0, w1, sxtw #2]
+; CHECK-NEXT:    add x8, x8, #1
+; CHECK-NEXT:    str w1, [x0, x8, lsl #2]
+; CHECK-NEXT:    ret
+;
+; CHECK-STRICT-LABEL: Strw_zero_nonzero:
+; CHECK-STRICT:       // %bb.0: // %entry
+; CHECK-STRICT-NEXT:    // kill: def $w1 killed $w1 def $x1
+; CHECK-STRICT-NEXT:    sxtw x8, w1
+; CHECK-STRICT-NEXT:    str wzr, [x0, w1, sxtw #2]
+; CHECK-STRICT-NEXT:    add x8, x8, #1
+; CHECK-STRICT-NEXT:    str w1, [x0, x8, lsl #2]
+; CHECK-STRICT-NEXT:    ret
 entry:
   %idxprom = sext i32 %n to i64
   %arrayidx = getelementptr inbounds i32, ptr %P, i64 %idxprom
@@ -81,6 +158,31 @@ entry:
 ; CHECK-STRICT: stp wzr, wzr
 ; CHECK-STRICT: stp wzr, wzr
 define void @Strw_zero_4(ptr nocapture %P, i32 %n) {
+; CHECK-LABEL: Strw_zero_4:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
+; CHECK-NEXT:    sxtw x8, w1
+; CHECK-NEXT:    str wzr, [x0, w1, sxtw #2]
+; CHECK-NEXT:    add x9, x8, #1
+; CHECK-NEXT:    add x10, x8, #2
+; CHECK-NEXT:    add x8, x8, #3
+; CHECK-NEXT:    str wzr, [x0, x9, lsl #2]
+; CHECK-NEXT:    str wzr, [x0, x10, lsl #2]
+; CHECK-NEXT:    str wzr, [x0, x8, lsl #2]
+; CHECK-NEXT:    ret
+;
+; CHECK-STRICT-LABEL: Strw_zero_4:
+; CHECK-STRICT:       // %bb.0: // %entry
+; CHECK-STRICT-NEXT:    // kill: def $w1 killed $w1 def $x1
+; CHECK-STRICT-NEXT:    sxtw x8, w1
+; CHECK-STRICT-NEXT:    str wzr, [x0, w1, sxtw #2]
+; CHECK-STRICT-NEXT:    add x9, x8, #1
+; CHECK-STRICT-NEXT:    add x10, x8, #2
+; CHECK-STRICT-NEXT:    add x8, x8, #3
+; CHECK-STRICT-NEXT:    str wzr, [x0, x9, lsl #2]
+; CHECK-STRICT-NEXT:    str wzr, [x0, x10, lsl #2]
+; CHECK-STRICT-NEXT:    str wzr, [x0, x8, lsl #2]
+; CHECK-STRICT-NEXT:    ret
 entry:
   %idxprom = sext i32 %n to i64
   %arrayidx = getelementptr inbounds i32, ptr %P, i64 %idxprom
@@ -106,6 +208,18 @@ entry:
 ; CHECK-STRICT: sturb wzr
 ; CHECK-STRICT: sturb wzr
 define void @Sturb_zero(ptr nocapture %P, i32 %n) #0 {
+; CHECK-LABEL: Sturb_zero:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    add x8, x0, w1, sxtw
+; CHECK-NEXT:    sturh wzr, [x8, #-2]
+; CHECK-NEXT:    ret
+;
+; CHECK-STRICT-LABEL: Sturb_zero:
+; CHECK-STRICT:       // %bb.0: // %entry
+; CHECK-STRICT-NEXT:    add x8, x0, w1, sxtw
+; CHECK-STRICT-NEXT:    sturb wzr, [x8, #-2]
+; CHECK-STRICT-NEXT:    sturb wzr, [x8, #-1]
+; CHECK-STRICT-NEXT:    ret
 entry:
   %sub = add nsw i32 %n, -2
   %idxprom = sext i32 %sub to i64
@@ -124,6 +238,25 @@ entry:
 ; CHECK-STRICT: sturh wzr
 ; CHECK-STRICT: sturh wzr
 define void @Sturh_zero(ptr nocapture %P, i32 %n) {
+; CHECK-LABEL: Sturh_zero:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
+; CHECK-NEXT:    sxtw x8, w1
+; CHECK-NEXT:    sub x9, x8, #2
+; CHECK-NEXT:    sub x8, x8, #3
+; CHECK-NEXT:    strh wzr, [x0, x9, lsl #1]
+; CHECK-NEXT:    strh wzr, [x0, x8, lsl #1]
+; CHECK-NEXT:    ret
+;
+; CHECK-STRICT-LABEL: Sturh_zero:
+; CHECK-STRICT:       // %bb.0: // %entry
+; CHECK-STRICT-NEXT:    // kill: def $w1 killed $w1 def $x1
+; CHECK-STRICT-NEXT:    sxtw x8, w1
+; CHECK-STRICT-NEXT:    sub x9, x8, #2
+; CHECK-STRICT-NEXT:    sub x8, x8, #3
+; CHECK-STRICT-NEXT:    strh wzr, [x0, x9, lsl #1]
+; CHECK-STRICT-NEXT:    strh wzr, [x0, x8, lsl #1]
+; CHECK-STRICT-NEXT:    ret
 entry:
   %sub = add nsw i32 %n, -2
   %idxprom = sext i32 %sub to i64
@@ -144,6 +277,33 @@ entry:
 ; CHECK-STRICT: sturh wzr
 ; CHECK-STRICT: sturh wzr
 define void @Sturh_zero_4(ptr nocapture %P, i32 %n) {
+; CHECK-LABEL: Sturh_zero_4:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
+; CHECK-NEXT:    sxtw x8, w1
+; CHECK-NEXT:    sub x9, x8, #3
+; CHECK-NEXT:    sub x10, x8, #4
+; CHECK-NEXT:    strh wzr, [x0, x9, lsl #1]
+; CHECK-NEXT:    sub x9, x8, #2
+; CHECK-NEXT:    sub x8, x8, #1
+; CHECK-NEXT:    strh wzr, [x0, x10, lsl #1]
+; CHECK-NEXT:    strh wzr, [x0, x9, lsl #1]
+; CHECK-NEXT:    strh wzr, [x0, x8, lsl #1]
+; CHECK-NEXT:    ret
+;
+; CHECK-STRICT-LABEL: Sturh_zero_4:
+; CHECK-STRICT:       // %bb.0: // %entry
+; CHECK-STRICT-NEXT:    // kill: def $w1 killed $w1 def $x1
+; CHECK-STRICT-NEXT:    sxtw x8, w1
+; CHECK-STRICT-NEXT:    sub x9, x8, #3
+; CHECK-STRICT-NEXT:    sub x10, x8, #4
+; CHECK-STRICT-NEXT:    strh wzr, [x0, x9, lsl #1]
+; CHECK-STRICT-NEXT:    sub x9, x8, #2
+; CHECK-STRICT-NEXT:    sub x8, x8, #1
+; CHECK-STRICT-NEXT:    strh wzr, [x0, x10, lsl #1]
+; CHECK-STRICT-NEXT:    strh wzr, [x0, x9, lsl #1]
+; CHECK-STRICT-NEXT:    strh wzr, [x0, x8, lsl #1]
+; CHECK-STRICT-NEXT:    ret
 entry:
   %sub = add nsw i32 %n, -3
   %idxprom = sext i32 %sub to i64
@@ -169,6 +329,25 @@ entry:
 ; CHECK-STRICT-LABEL: Sturw_zero
 ; CHECK-STRICT: stp wzr, wzr
 define void @Sturw_zero(ptr nocapture %P, i32 %n) {
+; CHECK-LABEL: Sturw_zero:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
+; CHECK-NEXT:    sxtw x8, w1
+; CHECK-NEXT:    sub x9, x8, #3
+; CHECK-NEXT:    sub x8, x8, #4
+; CHECK-NEXT:    str wzr, [x0, x9, lsl #2]
+; CHECK-NEXT:    str wzr, [x0, x8, lsl #2]
+; CHECK-NEXT:    ret
+;
+; CHECK-STRICT-LABEL: Sturw_zero:
+; CHECK-STRICT:       // %bb.0: // %entry
+; CHECK-STRICT-NEXT:    // kill: def $w1 killed $w1 def $x1
+; CHECK-STRICT-NEXT:    sxtw x8, w1
+; CHECK-STRICT-NEXT:    sub x9, x8, #3
+; CHECK-STRICT-NEXT:    sub x8, x8, #4
+; CHECK-STRICT-NEXT:    str wzr, [x0, x9, lsl #2]
+; CHECK-STRICT-NEXT:    str wzr, [x0, x8, lsl #2]
+; CHECK-STRICT-NEXT:    ret
 entry:
   %sub = add nsw i32 %n, -3
   %idxprom = sext i32 %sub to i64
@@ -187,6 +366,33 @@ entry:
 ; CHECK-STRICT: stp wzr, wzr
 ; CHECK-STRICT: stp wzr, wzr
 define void @Sturw_zero_4(ptr nocapture %P, i32 %n) {
+; CHECK-LABEL: Sturw_zero_4:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
+; CHECK-NEXT:    sxtw x8, w1
+; CHECK-NEXT:    sub x9, x8, #3
+; CHECK-NEXT:    sub x10, x8, #4
+; CHECK-NEXT:    str wzr, [x0, x9, lsl #2]
+; CHECK-NEXT:    sub x9, x8, #2
+; CHECK-NEXT:    sub x8, x8, #1
+; CHECK-NEXT:    str wzr, [x0, x10, lsl #2]
+; CHECK-NEXT:    str wzr, [x0, x9, lsl #2]
+; CHECK-NEXT:    str wzr, [x0, x8, lsl #2]
+; CHECK-NEXT:    ret
+;
+; CHECK-STRICT-LABEL: Sturw_zero_4:
+; CHECK-STRICT:       // %bb.0: // %entry
+; CHECK-STRICT-NEXT:    // kill: def $w1 killed $w1 def $x1
+; CHECK-STRICT-NEXT:    sxtw x8, w1
+; CHECK-STRICT-NEXT:    sub x9, x8, #3
+; CHECK-STRICT-NEXT:    sub x10, x8, #4
+; CHECK-STRICT-NEXT:    str wzr, [x0, x9, lsl #2]
+; CHECK-STRICT-NEXT:    sub x9, x8, #2
+; CHECK-STRICT-NEXT:    sub x8, x8, #1
+; CHECK-STRICT-NEXT:    str wzr, [x0, x10, lsl #2]
+; CHECK-STRICT-NEXT:    str wzr, [x0, x9, lsl #2]
+; CHECK-STRICT-NEXT:    str wzr, [x0, x8, lsl #2]
+; CHECK-STRICT-NEXT:    ret
 entry:
   %sub = add nsw i32 %n, -3
   %idxprom = sext i32 %sub to i64
diff --git a/llvm/test/CodeGen/AArch64/arm64-rev.ll b/llvm/test/CodeGen/AArch64/arm64-rev.ll
index f548a0e01feee6..19c0c8940b92b3 100644
--- a/llvm/test/CodeGen/AArch64/arm64-rev.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-rev.ll
@@ -27,8 +27,8 @@ entry:
 define i32 @test_rev_w_srl16(i16 %a) {
 ; CHECK-SD-LABEL: test_rev_w_srl16:
 ; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    rev w8, w0
-; CHECK-SD-NEXT:    lsr w0, w8, #16
+; CHECK-SD-NEXT:    and w8, w0, #0xffff
+; CHECK-SD-NEXT:    rev16 w0, w8
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_rev_w_srl16:
@@ -45,12 +45,18 @@ entry:
 }
 
 define i32 @test_rev_w_srl16_load(ptr %a) {
-; CHECK-LABEL: test_rev_w_srl16_load:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    ldrh w8, [x0]
-; CHECK-NEXT:    rev w8, w8
-; CHECK-NEXT:    lsr w0, w8, #16
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: test_rev_w_srl16_load:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    ldrh w8, [x0]
+; CHECK-SD-NEXT:    rev16 w0, w8
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_rev_w_srl16_load:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    ldrh w8, [x0]
+; CHECK-GI-NEXT:    rev w8, w8
+; CHECK-GI-NEXT:    lsr w0, w8, #16
+; CHECK-GI-NEXT:    ret
 entry:
   %0 = load i16, ptr %a
   %1 = zext i16 %0 to i32
@@ -88,9 +94,8 @@ entry:
 define i64 @test_rev_x_srl32(i32 %a) {
 ; CHECK-SD-LABEL: test_rev_x_srl32:
 ; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    // kill: def $w0 killed $w0 def $x0
-; CHECK-SD-NEXT:    rev x8, x0
-; CHECK-SD-NEXT:    lsr x0, x8, #32
+; CHECK-SD-NEXT:    mov w8, w0
+; CHECK-SD-NEXT:    rev32 x0, x8
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_rev_x_srl32:
@@ -107,12 +112,18 @@ entry:
 }
 
 define i64 @test_rev_x_srl32_load(ptr %a) {
-; CHECK-LABEL: test_rev_x_srl32_load:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    ldr w8, [x0]
-; CHECK-NEXT:    rev x8, x8
-; CHECK-NEXT:    lsr x0, x8, #32
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: test_rev_x_srl32_load:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    ldr w8, [x0]
+; CHECK-SD-NEXT:    rev32 x0, x8
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_rev_x_srl32_load:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    ldr w8, [x0]
+; CHECK-GI-NEXT:    rev x8, x8
+; CHECK-GI-NEXT:    lsr x0, x8, #32
+; CHECK-GI-NEXT:    ret
 entry:
   %0 = load i32, ptr %a
   %1 = zext i32 %0 to i64
diff --git a/llvm/test/CodeGen/AArch64/arm64-trunc-store.ll b/llvm/test/CodeGen/AArch64/arm64-trunc-store.ll
index cd47fff46729f9..31a649ad64f448 100644
--- a/llvm/test/CodeGen/AArch64/arm64-trunc-store.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-trunc-store.ll
@@ -20,10 +20,10 @@ define void @fct32(i32 %arg, i64 %var) {
 ; CHECK-LABEL: fct32:
 ; CHECK:       // %bb.0: // %bb
 ; CHECK-NEXT:    adrp x8, :got:zptr32
+; CHECK-NEXT:    sub w9, w0, #1
 ; CHECK-NEXT:    ldr x8, [x8, :got_lo12:zptr32]
 ; CHECK-NEXT:    ldr x8, [x8]
-; CHECK-NEXT:    add x8, x8, w0, sxtw #2
-; CHECK-NEXT:    stur w1, [x8, #-4]
+; CHECK-NEXT:    str w1, [x8, w9, sxtw #2]
 ; CHECK-NEXT:    ret
 bb:
   %.pre37 = load ptr, ptr @zptr32, align 8
@@ -39,10 +39,10 @@ define void @fct16(i32 %arg, i64 %var) {
 ; CHECK-LABEL: fct16:
 ; CHECK:       // %bb.0: // %bb
 ; CHECK-NEXT:    adrp x8, :got:zptr16
+; CHECK-NEXT...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/91239