[llvm] [DAGCombiner] Set shift flags during visit. (PR #91239)
via llvm-commits
llvm-commits at lists.llvm.org
Mon May 6 10:04:57 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-systemz
@llvm/pr-subscribers-backend-x86
Author: None (goldsteinn)
<details>
<summary>Changes</summary>
- **[CodeGen] Regen some old tests; NFC**
- **[DAGCombiner] Set shift flags during visit.**
---
Patch is 6.14 MiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/91239.diff
393 Files Affected:
- (modified) llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (+69-1)
- (modified) llvm/test/CodeGen/AArch64/DAGCombine_vscale.ll (+2-2)
- (modified) llvm/test/CodeGen/AArch64/aarch64-address-type-promotion.ll (+12-6)
- (modified) llvm/test/CodeGen/AArch64/addsub.ll (+1-1)
- (modified) llvm/test/CodeGen/AArch64/arm64-narrow-st-merge.ll (+206)
- (modified) llvm/test/CodeGen/AArch64/arm64-rev.ll (+28-17)
- (modified) llvm/test/CodeGen/AArch64/arm64-trunc-store.ll (+4-4)
- (modified) llvm/test/CodeGen/AArch64/bswap-known-bits.ll (+5-5)
- (modified) llvm/test/CodeGen/AArch64/const-shift-of-constmasked.ll (+53-29)
- (modified) llvm/test/CodeGen/AArch64/extract-subvec-combine.ll (+6-4)
- (modified) llvm/test/CodeGen/AArch64/fold-int-pow2-with-fmul-or-fdiv.ll (+2-1)
- (modified) llvm/test/CodeGen/AArch64/hoist-and-by-const-from-shl-in-eqcmp-zero.ll (+6-6)
- (modified) llvm/test/CodeGen/AArch64/pull-binop-through-shift.ll (+14-14)
- (modified) llvm/test/CodeGen/AArch64/sadd_sat_vec.ll (+16-12)
- (modified) llvm/test/CodeGen/AArch64/select_const.ll (+12-9)
- (modified) llvm/test/CodeGen/AArch64/shift-logic.ll (+6-4)
- (modified) llvm/test/CodeGen/AArch64/shift-mod.ll (+8-6)
- (modified) llvm/test/CodeGen/AArch64/signed-truncation-check.ll (+1-1)
- (modified) llvm/test/CodeGen/AArch64/srem-pow2.ll (+12-10)
- (modified) llvm/test/CodeGen/AArch64/srem-seteq-illegal-types.ll (+8-6)
- (modified) llvm/test/CodeGen/AArch64/srem-seteq-vec-splat.ll (+8-7)
- (modified) llvm/test/CodeGen/AArch64/sshl_sat.ll (+10-9)
- (modified) llvm/test/CodeGen/AArch64/ssub_sat_vec.ll (+16-12)
- (modified) llvm/test/CodeGen/AArch64/storepairsuppress.ll (+8-8)
- (modified) llvm/test/CodeGen/AArch64/tbl-loops.ll (+6-4)
- (modified) llvm/test/CodeGen/AArch64/ushl_sat.ll (+7-4)
- (modified) llvm/test/CodeGen/AArch64/vector_splat-const-shift-of-constmasked.ll (+62-53)
- (modified) llvm/test/CodeGen/AArch64/win64_vararg.ll (+1-1)
- (modified) llvm/test/CodeGen/AMDGPU/add.v2i16.ll (+6-3)
- (modified) llvm/test/CodeGen/AMDGPU/anyext.ll (+2)
- (modified) llvm/test/CodeGen/AMDGPU/bf16-conversions.ll (+51-40)
- (modified) llvm/test/CodeGen/AMDGPU/bf16.ll (+15556-10734)
- (modified) llvm/test/CodeGen/AMDGPU/bfi_nested.ll (+2-1)
- (modified) llvm/test/CodeGen/AMDGPU/bswap.ll (+1)
- (modified) llvm/test/CodeGen/AMDGPU/build-vector-packed-partial-undef.ll (+4)
- (modified) llvm/test/CodeGen/AMDGPU/build_vector.ll (+386)
- (modified) llvm/test/CodeGen/AMDGPU/bypass-div.ll (+470-434)
- (modified) llvm/test/CodeGen/AMDGPU/calling-conventions.ll (+2-1)
- (modified) llvm/test/CodeGen/AMDGPU/carryout-selection.ll (+565-520)
- (modified) llvm/test/CodeGen/AMDGPU/combine-vload-extract.ll (+10-4)
- (modified) llvm/test/CodeGen/AMDGPU/commute-shifts.ll (+4)
- (modified) llvm/test/CodeGen/AMDGPU/computeNumSignBits-mul.ll (+2-1)
- (modified) llvm/test/CodeGen/AMDGPU/copy-illegal-type.ll (+42-34)
- (modified) llvm/test/CodeGen/AMDGPU/ctpop16.ll (+172-132)
- (modified) llvm/test/CodeGen/AMDGPU/cvt_f32_ubyte.ll (+65-52)
- (modified) llvm/test/CodeGen/AMDGPU/divergence-driven-buildvector.ll (+6-3)
- (modified) llvm/test/CodeGen/AMDGPU/divergence-driven-sext-inreg.ll (+2-2)
- (modified) llvm/test/CodeGen/AMDGPU/ds_read2.ll (+11-6)
- (modified) llvm/test/CodeGen/AMDGPU/extract-load-i1.ll (+14-14)
- (modified) llvm/test/CodeGen/AMDGPU/extract-subvector-16bit.ll (+230-194)
- (modified) llvm/test/CodeGen/AMDGPU/fabs.f16.ll (+8-2)
- (modified) llvm/test/CodeGen/AMDGPU/fast-unaligned-load-store.private.ll (+8)
- (modified) llvm/test/CodeGen/AMDGPU/fmed3-cast-combine.ll (+2-1)
- (modified) llvm/test/CodeGen/AMDGPU/fneg-combines.new.ll (+4)
- (modified) llvm/test/CodeGen/AMDGPU/fneg-fabs.f16.ll (+54-18)
- (modified) llvm/test/CodeGen/AMDGPU/fneg-modifier-casting.ll (+8-7)
- (modified) llvm/test/CodeGen/AMDGPU/fneg.f16.ll (+43-13)
- (modified) llvm/test/CodeGen/AMDGPU/fneg.ll (+657)
- (modified) llvm/test/CodeGen/AMDGPU/fold-int-pow2-with-fmul-or-fdiv.ll (+20-16)
- (modified) llvm/test/CodeGen/AMDGPU/fp_to_sint.ll (+157-153)
- (modified) llvm/test/CodeGen/AMDGPU/fp_to_uint.ll (+157-153)
- (modified) llvm/test/CodeGen/AMDGPU/fp_trunc_store_fp64_to_bf16.ll (+355-287)
- (modified) llvm/test/CodeGen/AMDGPU/fshr.ll (+2-2)
- (modified) llvm/test/CodeGen/AMDGPU/function-returns.ll (+139-78)
- (modified) llvm/test/CodeGen/AMDGPU/global-atomics-fp.ll (+192-172)
- (modified) llvm/test/CodeGen/AMDGPU/global-saddr-load.ll (+106-36)
- (modified) llvm/test/CodeGen/AMDGPU/idot4s.ll (+27-23)
- (modified) llvm/test/CodeGen/AMDGPU/idot4u.ll (+14-12)
- (modified) llvm/test/CodeGen/AMDGPU/idot8s.ll (+878-375)
- (modified) llvm/test/CodeGen/AMDGPU/idot8u.ll (+63-43)
- (modified) llvm/test/CodeGen/AMDGPU/insert_vector_dynelt.ll (+25-20)
- (modified) llvm/test/CodeGen/AMDGPU/insert_vector_elt.ll (+44-13)
- (modified) llvm/test/CodeGen/AMDGPU/insert_vector_elt.v2i16.ll (+219-126)
- (modified) llvm/test/CodeGen/AMDGPU/integer-mad-patterns.ll (+329-305)
- (modified) llvm/test/CodeGen/AMDGPU/kernel-args.ll (+334-262)
- (modified) llvm/test/CodeGen/AMDGPU/kernel-argument-dag-lowering.ll (+332)
- (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.pkrtz.ll (+55-3)
- (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.bpermute.ll (+58)
- (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.buffer.load.ll (-32)
- (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.buffer.load.ll (-32)
- (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sbfe.ll (+3)
- (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.load.ll (+156)
- (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.buffer.load.ll (+156)
- (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ubfe.ll (+6-6)
- (modified) llvm/test/CodeGen/AMDGPU/llvm.exp.ll (+10-7)
- (modified) llvm/test/CodeGen/AMDGPU/llvm.exp10.ll (+10-7)
- (modified) llvm/test/CodeGen/AMDGPU/llvm.exp2.ll (+4-1)
- (modified) llvm/test/CodeGen/AMDGPU/llvm.log.ll (+10-7)
- (modified) llvm/test/CodeGen/AMDGPU/llvm.log10.ll (+10-7)
- (modified) llvm/test/CodeGen/AMDGPU/llvm.log2.ll (+4-1)
- (modified) llvm/test/CodeGen/AMDGPU/llvm.mulo.ll (+184-140)
- (modified) llvm/test/CodeGen/AMDGPU/llvm.r600.read.local.size.ll (+355)
- (modified) llvm/test/CodeGen/AMDGPU/llvm.set.rounding.ll (+103-43)
- (modified) llvm/test/CodeGen/AMDGPU/load-constant-i1.ll (+2889-2607)
- (modified) llvm/test/CodeGen/AMDGPU/load-constant-i16.ll (+1402-930)
- (modified) llvm/test/CodeGen/AMDGPU/load-constant-i8.ll (+1494-1222)
- (modified) llvm/test/CodeGen/AMDGPU/load-global-i16.ll (+2772-1841)
- (modified) llvm/test/CodeGen/AMDGPU/load-lo16.ll (+42-21)
- (modified) llvm/test/CodeGen/AMDGPU/load-range-metadata-sign-bits.ll (+18-5)
- (modified) llvm/test/CodeGen/AMDGPU/local-atomics-fp.ll (+166-138)
- (modified) llvm/test/CodeGen/AMDGPU/lshl64-to-32.ll (+7-4)
- (modified) llvm/test/CodeGen/AMDGPU/lshr.v2i16.ll (+12-6)
- (modified) llvm/test/CodeGen/AMDGPU/mad-mix-hi.ll (+52-84)
- (modified) llvm/test/CodeGen/AMDGPU/max.ll (+8-4)
- (modified) llvm/test/CodeGen/AMDGPU/memory_clause.ll (+4-4)
- (modified) llvm/test/CodeGen/AMDGPU/min.ll (+55-34)
- (modified) llvm/test/CodeGen/AMDGPU/mul_int24.ll (+18-6)
- (modified) llvm/test/CodeGen/AMDGPU/mul_uint24-amdgcn.ll (+147-94)
- (modified) llvm/test/CodeGen/AMDGPU/mul_uint24-r600.ll (+44-33)
- (modified) llvm/test/CodeGen/AMDGPU/permute_i8.ll (+110-119)
- (modified) llvm/test/CodeGen/AMDGPU/reassoc-mul-add-1-to-mad.ll (+60-51)
- (modified) llvm/test/CodeGen/AMDGPU/rem_i128.ll (+66-45)
- (modified) llvm/test/CodeGen/AMDGPU/saddsat.ll (+6-3)
- (modified) llvm/test/CodeGen/AMDGPU/scalar_to_vector.ll (+28-20)
- (modified) llvm/test/CodeGen/AMDGPU/scratch-simple.ll (+7184)
- (modified) llvm/test/CodeGen/AMDGPU/sdiv.ll (+10-2)
- (modified) llvm/test/CodeGen/AMDGPU/sdiv64.ll (+391-366)
- (modified) llvm/test/CodeGen/AMDGPU/select-fabs-fneg-extract.v2f16.ll (+209-144)
- (modified) llvm/test/CodeGen/AMDGPU/sext-in-reg.ll (+2402)
- (modified) llvm/test/CodeGen/AMDGPU/shift-i64-opts.ll (+428)
- (modified) llvm/test/CodeGen/AMDGPU/shl-add-to-add-shl.ll (+31)
- (modified) llvm/test/CodeGen/AMDGPU/shl.ll (+35-29)
- (modified) llvm/test/CodeGen/AMDGPU/shl.v2i16.ll (+2-6)
- (modified) llvm/test/CodeGen/AMDGPU/shl_add_ptr.ll (+39)
- (modified) llvm/test/CodeGen/AMDGPU/shrink-add-sub-constant.ll (+91-43)
- (modified) llvm/test/CodeGen/AMDGPU/sra.ll (+43-29)
- (modified) llvm/test/CodeGen/AMDGPU/srem-seteq-illegal-types.ll (+7-5)
- (modified) llvm/test/CodeGen/AMDGPU/srem64.ll (+440-407)
- (modified) llvm/test/CodeGen/AMDGPU/ssubsat.ll (+6-3)
- (modified) llvm/test/CodeGen/AMDGPU/store-private.ll (+1874)
- (modified) llvm/test/CodeGen/AMDGPU/store-weird-sizes.ll (+2-1)
- (modified) llvm/test/CodeGen/AMDGPU/sub.v2i16.ll (+2-1)
- (modified) llvm/test/CodeGen/AMDGPU/trunc-combine.ll (+1-1)
- (modified) llvm/test/CodeGen/AMDGPU/uaddsat.ll (+1)
- (modified) llvm/test/CodeGen/AMDGPU/udiv.ll (+206-181)
- (modified) llvm/test/CodeGen/AMDGPU/udiv64.ll (+418-380)
- (modified) llvm/test/CodeGen/AMDGPU/unstructured-cfg-def-use-issue.ll (+79-77)
- (modified) llvm/test/CodeGen/AMDGPU/urem64.ll (+266-239)
- (modified) llvm/test/CodeGen/AMDGPU/usubsat.ll (+3-2)
- (modified) llvm/test/CodeGen/AMDGPU/vector_shuffle.packed.ll (+232-199)
- (modified) llvm/test/CodeGen/AMDGPU/wave32.ll (+174-162)
- (modified) llvm/test/CodeGen/AMDGPU/widen-smrd-loads.ll (+14-11)
- (modified) llvm/test/CodeGen/ARM/Windows/alloca.ll (+3)
- (modified) llvm/test/CodeGen/ARM/Windows/vla.ll (+4)
- (modified) llvm/test/CodeGen/ARM/and-cmpz.ll (+121)
- (modified) llvm/test/CodeGen/ARM/and-load-combine.ll (+8-4)
- (modified) llvm/test/CodeGen/ARM/bfi-chain-cse-crash.ll (+10-6)
- (modified) llvm/test/CodeGen/ARM/bfi.ll (+2)
- (modified) llvm/test/CodeGen/ARM/bfx.ll (+33-17)
- (modified) llvm/test/CodeGen/ARM/combine-movc-sub.ll (+3-3)
- (modified) llvm/test/CodeGen/ARM/demanded-bits-and.ll (+1)
- (modified) llvm/test/CodeGen/ARM/hoist-and-by-const-from-lshr-in-eqcmp-zero.ll (+31-14)
- (modified) llvm/test/CodeGen/ARM/hoist-and-by-const-from-shl-in-eqcmp-zero.ll (+35-14)
- (modified) llvm/test/CodeGen/ARM/illegal-bitfield-loadstore.ll (+36-15)
- (modified) llvm/test/CodeGen/ARM/pr36577.ll (+6-5)
- (modified) llvm/test/CodeGen/ARM/sadd_sat_plus.ll (+2)
- (modified) llvm/test/CodeGen/ARM/sbfx.ll (+28-13)
- (modified) llvm/test/CodeGen/ARM/sdiv-pow2-arm-size.ll (+52-32)
- (modified) llvm/test/CodeGen/ARM/shift-combine.ll (+157-13)
- (modified) llvm/test/CodeGen/ARM/simplifysetcc_narrow_load.ll (+4-2)
- (modified) llvm/test/CodeGen/ARM/srem-seteq-illegal-types.ll (+54-40)
- (modified) llvm/test/CodeGen/ARM/ssub_sat_plus.ll (+2)
- (modified) llvm/test/CodeGen/BPF/remove_truncate_9.ll (+3)
- (modified) llvm/test/CodeGen/Hexagon/atomicrmw-uinc-udec-wrap.ll (+58-50)
- (modified) llvm/test/CodeGen/Hexagon/isel-global-offset-alignment.ll (+3-3)
- (modified) llvm/test/CodeGen/Hexagon/vect/vect-shifts.ll (+92-75)
- (modified) llvm/test/CodeGen/LoongArch/atomicrmw-uinc-udec-wrap.ll (+18-18)
- (modified) llvm/test/CodeGen/LoongArch/bstrins_w.ll (+1)
- (modified) llvm/test/CodeGen/LoongArch/bstrpick_d.ll (+4-2)
- (modified) llvm/test/CodeGen/LoongArch/bstrpick_w.ll (+4-2)
- (modified) llvm/test/CodeGen/LoongArch/bytepick.ll (+30-5)
- (modified) llvm/test/CodeGen/LoongArch/ctlz-cttz-ctpop.ll (+12-7)
- (modified) llvm/test/CodeGen/LoongArch/fcopysign.ll (+3)
- (modified) llvm/test/CodeGen/LoongArch/ir-instruction/and.ll (+4-1)
- (modified) llvm/test/CodeGen/LoongArch/ir-instruction/atomic-cmpxchg.ll (+28-14)
- (modified) llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-minmax.ll (+240-220)
- (modified) llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw.ll (+360-180)
- (modified) llvm/test/CodeGen/LoongArch/ir-instruction/lshr.ll (+10-2)
- (modified) llvm/test/CodeGen/LoongArch/legalicmpimm.ll (+4-1)
- (modified) llvm/test/CodeGen/LoongArch/rotl-rotr.ll (+6-3)
- (modified) llvm/test/CodeGen/LoongArch/sextw-removal.ll (+54-20)
- (modified) llvm/test/CodeGen/Mips/atomic.ll (+21-7)
- (modified) llvm/test/CodeGen/Mips/cconv/illegal-vectors.ll (+272-148)
- (modified) llvm/test/CodeGen/Mips/cins.ll (+34-16)
- (modified) llvm/test/CodeGen/Mips/dins.ll (+19-9)
- (modified) llvm/test/CodeGen/Mips/fabs.ll (+11-6)
- (modified) llvm/test/CodeGen/Mips/fcopysign-f32-f64.ll (+76-28)
- (modified) llvm/test/CodeGen/Mips/fcopysign.ll (+88-31)
- (modified) llvm/test/CodeGen/Mips/funnel-shift-rot.ll (+4-4)
- (modified) llvm/test/CodeGen/Mips/funnel-shift.ll (+4-4)
- (modified) llvm/test/CodeGen/Mips/llvm-ir/abs.ll (+107-12)
- (modified) llvm/test/CodeGen/Mips/llvm-ir/nan-fp-attr.ll (+13-18)
- (modified) llvm/test/CodeGen/Mips/load-store-left-right.ll (+37-41)
- (modified) llvm/test/CodeGen/Mips/mips64-f128.ll (+10-7)
- (modified) llvm/test/CodeGen/Mips/srem-seteq-illegal-types.ll (+18-14)
- (modified) llvm/test/CodeGen/Mips/unalignedload.ll (+6-6)
- (modified) llvm/test/CodeGen/NVPTX/atomics-sm70.ll (+4-4)
- (modified) llvm/test/CodeGen/NVPTX/i8x4-instructions.ll (+12-9)
- (modified) llvm/test/CodeGen/NVPTX/lower-byval-args.ll (+217)
- (modified) llvm/test/CodeGen/NVPTX/mulwide.ll (+4-20)
- (modified) llvm/test/CodeGen/NVPTX/unaligned-param-load-store.ll (+3)
- (modified) llvm/test/CodeGen/PowerPC/coalesce-ext.ll (+10-4)
- (modified) llvm/test/CodeGen/PowerPC/extsh.ll (+1)
- (modified) llvm/test/CodeGen/PowerPC/more-dq-form-prepare.ll (+140-141)
- (modified) llvm/test/CodeGen/PowerPC/no-ctr-loop-if-exit-in-nested-loop.ll (+16-13)
- (modified) llvm/test/CodeGen/PowerPC/ppc64-P9-vabsd.ll (+26-12)
- (modified) llvm/test/CodeGen/PowerPC/pr38087.ll (+4-1)
- (modified) llvm/test/CodeGen/PowerPC/pr45432.ll (+2-2)
- (modified) llvm/test/CodeGen/PowerPC/rlwinm.ll (+2-1)
- (modified) llvm/test/CodeGen/PowerPC/sext-vector-inreg.ll (+4-1)
- (modified) llvm/test/CodeGen/PowerPC/shl_sext.ll (+1)
- (modified) llvm/test/CodeGen/PowerPC/sms-phi-3.ll (+15-27)
- (modified) llvm/test/CodeGen/PowerPC/srem-seteq-illegal-types.ll (+2-2)
- (modified) llvm/test/CodeGen/PowerPC/vec-itofp.ll (+27-11)
- (modified) llvm/test/CodeGen/PowerPC/vec_conv_i16_to_fp32_elts.ll (+31-18)
- (modified) llvm/test/CodeGen/PowerPC/vec_conv_i16_to_fp64_elts.ll (+61-34)
- (modified) llvm/test/CodeGen/PowerPC/vec_conv_i8_to_fp32_elts.ll (+20-7)
- (modified) llvm/test/CodeGen/PowerPC/vec_conv_i8_to_fp64_elts.ll (+58-31)
- (modified) llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll (+9-2)
- (modified) llvm/test/CodeGen/RISCV/add-before-shl.ll (+16-20)
- (modified) llvm/test/CodeGen/RISCV/atomic-cmpxchg-branch-on-result.ll (+18-6)
- (modified) llvm/test/CodeGen/RISCV/atomic-cmpxchg.ll (+128)
- (modified) llvm/test/CodeGen/RISCV/atomic-rmw.ll (+1682-1104)
- (modified) llvm/test/CodeGen/RISCV/atomic-signext.ll (+136-92)
- (modified) llvm/test/CodeGen/RISCV/atomicrmw-uinc-udec-wrap.ll (+28-28)
- (modified) llvm/test/CodeGen/RISCV/bittest.ll (+6-8)
- (modified) llvm/test/CodeGen/RISCV/bswap-bitreverse.ll (+26-22)
- (modified) llvm/test/CodeGen/RISCV/ctlz-cttz-ctpop.ll (+8-4)
- (modified) llvm/test/CodeGen/RISCV/div-by-constant.ll (+72-43)
- (modified) llvm/test/CodeGen/RISCV/div.ll (+15-9)
- (modified) llvm/test/CodeGen/RISCV/float-intrinsics.ll (+8-4)
- (modified) llvm/test/CodeGen/RISCV/pr65025.ll (+2-1)
- (modified) llvm/test/CodeGen/RISCV/rem.ll (+16-12)
- (modified) llvm/test/CodeGen/RISCV/riscv-shifted-extend.ll (+19-11)
- (modified) llvm/test/CodeGen/RISCV/rotl-rotr.ll (+4-2)
- (modified) llvm/test/CodeGen/RISCV/rv64-legal-i32/div.ll (+6-3)
- (modified) llvm/test/CodeGen/RISCV/rv64-legal-i32/rem.ll (+8-6)
- (modified) llvm/test/CodeGen/RISCV/rv64-legal-i32/rv64zba.ll (+24-25)
- (modified) llvm/test/CodeGen/RISCV/rv64i-shift-sext.ll (+16-11)
- (modified) llvm/test/CodeGen/RISCV/rv64i-tricky-shifts.ll (+4-2)
- (modified) llvm/test/CodeGen/RISCV/rv64xtheadbb.ll (+14-9)
- (modified) llvm/test/CodeGen/RISCV/rv64zba.ll (+25-26)
- (modified) llvm/test/CodeGen/RISCV/rv64zbb.ll (+18-9)
- (modified) llvm/test/CodeGen/RISCV/rvv/extract-subvector.ll (+5-4)
- (modified) llvm/test/CodeGen/RISCV/rvv/extractelt-int-rv32.ll (+78)
- (modified) llvm/test/CodeGen/RISCV/rvv/extractelt-int-rv64.ll (+78)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll (+28-12)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int-vp.ll (+635-237)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-sad.ll (-26)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-store.ll (+57-22)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-unaligned.ll (+20-20)
- (modified) llvm/test/CodeGen/RISCV/rvv/insert-subvector.ll (+10-8)
- (modified) llvm/test/CodeGen/RISCV/rvv/legalize-load-sdnode.ll (+4-3)
- (modified) llvm/test/CodeGen/RISCV/rvv/legalize-store-sdnode.ll (+4-3)
- (modified) llvm/test/CodeGen/RISCV/rvv/rvv-vscale.i64.ll (+10-5)
- (modified) llvm/test/CodeGen/RISCV/rvv/vec3-setcc-crash.ll (+20-18)
- (modified) llvm/test/CodeGen/RISCV/rvv/vreductions-int-vp.ll (+939-350)
- (modified) llvm/test/CodeGen/RISCV/rvv/vreductions-int.ll (+981-357)
- (modified) llvm/test/CodeGen/RISCV/rvv/vwsll-sdnode.ll (+9-13)
- (modified) llvm/test/CodeGen/RISCV/sdiv-pow2-cmov.ll (+32-16)
- (modified) llvm/test/CodeGen/RISCV/shifts.ll (+60-60)
- (modified) llvm/test/CodeGen/RISCV/split-udiv-by-constant.ll (+55-55)
- (modified) llvm/test/CodeGen/RISCV/split-urem-by-constant.ll (+41-41)
- (modified) llvm/test/CodeGen/RISCV/srem-lkk.ll (+16-12)
- (modified) llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll (+120-106)
- (modified) llvm/test/CodeGen/RISCV/srem-vector-lkk.ll (+34-22)
- (modified) llvm/test/CodeGen/RISCV/unaligned-load-store.ll (+46-32)
- (modified) llvm/test/CodeGen/RISCV/urem-seteq-illegal-types.ll (+4-2)
- (modified) llvm/test/CodeGen/RISCV/wide-scalar-shift-by-byte-multiple-legalization.ll (+1304-950)
- (modified) llvm/test/CodeGen/RISCV/wide-scalar-shift-legalization.ll (+1636-1467)
- (modified) llvm/test/CodeGen/SystemZ/int-abs-01.ll (+53-24)
- (modified) llvm/test/CodeGen/SystemZ/int-cmp-44.ll (+306-161)
- (modified) llvm/test/CodeGen/SystemZ/int-mul-10.ll (+3-38)
- (modified) llvm/test/CodeGen/SystemZ/int-neg-02.ll (+92-34)
- (modified) llvm/test/CodeGen/SystemZ/store_nonbytesized_vecs.ll (+24-7)
- (modified) llvm/test/CodeGen/Thumb/shift-and.ll (+15-10)
- (modified) llvm/test/CodeGen/Thumb/srem-seteq-illegal-types.ll (+12-7)
- (modified) llvm/test/CodeGen/Thumb/umul_fix_sat.ll (+6-2)
- (modified) llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-disabled-in-loloops.ll (+3-2)
- (modified) llvm/test/CodeGen/Thumb2/bfx.ll (+14-6)
- (modified) llvm/test/CodeGen/Thumb2/mve-float16regloops.ll (+2-2)
- (modified) llvm/test/CodeGen/Thumb2/mve-float32regloops.ll (+3-3)
- (modified) llvm/test/CodeGen/Thumb2/mve-gather-increment.ll (+153-141)
- (modified) llvm/test/CodeGen/Thumb2/mve-gather-tailpred.ll (+12-10)
- (modified) llvm/test/CodeGen/Thumb2/mve-scatter-increment.ll (+15-13)
- (modified) llvm/test/CodeGen/Thumb2/shift_parts.ll (+10-4)
- (modified) llvm/test/CodeGen/Thumb2/srem-seteq-illegal-types.ll (+9-6)
- (modified) llvm/test/CodeGen/VE/Scalar/bitreverse.ll (+11-2)
- (modified) llvm/test/CodeGen/WebAssembly/conv.ll (+3)
- (modified) llvm/test/CodeGen/WebAssembly/pr47375.ll (+5-3)
- (modified) llvm/test/CodeGen/WebAssembly/simd-bitmask.ll (+13-13)
- (modified) llvm/test/CodeGen/WebAssembly/simd-pr61780.ll (+2-2)
- (modified) llvm/test/CodeGen/WebAssembly/simd-sext-inreg.ll (+5)
- (modified) llvm/test/CodeGen/WebAssembly/simd-vecreduce-bool.ll (+319-319)
- (modified) llvm/test/CodeGen/X86/2008-05-12-tailmerge-5.ll (+16-5)
- (modified) llvm/test/CodeGen/X86/2009-05-23-dagcombine-shifts.ll (+3-3)
- (modified) llvm/test/CodeGen/X86/add-ext.ll (+2-1)
- (modified) llvm/test/CodeGen/X86/addr-mode-matcher-2.ll (+4-4)
- (modified) llvm/test/CodeGen/X86/addr-mode-matcher-3.ll (+5-4)
- (modified) llvm/test/CodeGen/X86/atomic-bit-test.ll (+19-16)
- (modified) llvm/test/CodeGen/X86/atomic-rm-bit-test-64.ll (+12-3)
- (modified) llvm/test/CodeGen/X86/atomic-rm-bit-test.ll (+36-36)
- (modified) llvm/test/CodeGen/X86/avx512-calling-conv.ll (+217-209)
- (modified) llvm/test/CodeGen/X86/bfloat.ll (+59-53)
- (modified) llvm/test/CodeGen/X86/bitreverse.ll (+43-36)
- (modified) llvm/test/CodeGen/X86/bool-math.ll (+5-3)
``````````diff
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 05ab6e2e48206f..a182fbdde4f17b 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -9738,6 +9738,64 @@ SDValue DAGCombiner::visitRotate(SDNode *N) {
return SDValue();
}
+static SDValue setShiftFlags(SelectionDAG &DAG, const SDLoc &DL, SDNode *N) {
+ unsigned Opc = N->getOpcode();
+ assert((Opc == ISD::SHL || Opc == ISD::SRA || Opc == ISD::SRL) &&
+ "Unknown shift opcode");
+ SDNodeFlags Flags = N->getFlags();
+
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ EVT VT = N->getValueType(0);
+ // Check if we already have the flags.
+ if (Opc == ISD::SHL) {
+ if (Flags.hasNoSignedWrap() && Flags.hasNoUnsignedWrap())
+ return SDValue();
+
+ } else {
+ if (Flags.hasExact())
+ return SDValue();
+
+ // shr (shl X, Y), Y
+ if (sd_match(N0, m_Shl(m_Value(), m_Specific(N1)))) {
+ Flags.setExact(true);
+ return DAG.getNode(Opc, DL, VT, N0, N1, Flags);
+ }
+ }
+
+ // Compute what we know about shift count.
+ KnownBits KnownCnt = DAG.computeKnownBits(N1);
+ // Compute what we know about shift amt.
+ KnownBits KnownAmt = DAG.computeKnownBits(N0);
+ APInt MaxCnt = KnownCnt.getMaxValue();
+ bool Changed = false;
+ if (Opc == ISD::SHL) {
+ // If we have as many leading zeros than maximum shift cnt we have nuw.
+ if (!Flags.hasNoUnsignedWrap() &&
+ MaxCnt.ule(KnownAmt.countMinLeadingZeros())) {
+ Flags.setNoUnsignedWrap(true);
+ Changed = true;
+ }
+ // If we have more sign bits than maximum shift cnt we have nsw.
+ if (!Flags.hasNoSignedWrap()) {
+ if (MaxCnt.ult(KnownAmt.countMinSignBits()) ||
+ MaxCnt.ult(DAG.ComputeNumSignBits(N0))) {
+ Flags.setNoSignedWrap(true);
+ Changed = true;
+ }
+ }
+ } else {
+ // If we have at least as many trailing zeros as maximum count then we have
+ // exact.
+ Changed = MaxCnt.ule(KnownAmt.countMinTrailingZeros());
+ Flags.setExact(Changed);
+ }
+
+ if (Changed)
+ return DAG.getNode(Opc, DL, VT, N0, N1, Flags);
+ return SDValue();
+}
+
SDValue DAGCombiner::visitSHL(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
@@ -9745,6 +9803,9 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
return V;
SDLoc DL(N);
+ if (SDValue V = setShiftFlags(DAG, DL, N))
+ return V;
+
EVT VT = N0.getValueType();
EVT ShiftVT = N1.getValueType();
unsigned OpSizeInBits = VT.getScalarSizeInBits();
@@ -9895,7 +9956,7 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
return LHSC.ult(OpSizeInBits) && RHSC.ult(OpSizeInBits) &&
LHSC.getZExtValue() <= RHSC.getZExtValue();
};
-
+
// fold (shl (sr[la] exact X, C1), C2) -> (shl X, (C2-C1)) if C1 <= C2
// fold (shl (sr[la] exact X, C1), C2) -> (sr[la] X, (C2-C1)) if C1 >= C2
if (N0->getFlags().hasExact()) {
@@ -10188,6 +10249,9 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
return V;
SDLoc DL(N);
+ if (SDValue V = setShiftFlags(DAG, DL, N))
+ return V;
+
EVT VT = N0.getValueType();
unsigned OpSizeInBits = VT.getScalarSizeInBits();
@@ -10389,6 +10453,8 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
return V;
SDLoc DL(N);
+ if (SDValue V = setShiftFlags(DAG, DL, N))
+ return V;
EVT VT = N0.getValueType();
EVT ShiftVT = N1.getValueType();
unsigned OpSizeInBits = VT.getScalarSizeInBits();
@@ -10638,6 +10704,8 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
return SDValue();
}
+
+
SDValue DAGCombiner::visitFunnelShift(SDNode *N) {
EVT VT = N->getValueType(0);
SDValue N0 = N->getOperand(0);
diff --git a/llvm/test/CodeGen/AArch64/DAGCombine_vscale.ll b/llvm/test/CodeGen/AArch64/DAGCombine_vscale.ll
index 71f4da2b465c13..26f41f4d98c5cc 100644
--- a/llvm/test/CodeGen/AArch64/DAGCombine_vscale.ll
+++ b/llvm/test/CodeGen/AArch64/DAGCombine_vscale.ll
@@ -7,8 +7,8 @@
define <vscale x 4 x i32> @sext_inreg(<vscale x 4 x i32> %a) {
; CHECK-LABEL: sext_inreg:
; CHECK: // %bb.0:
-; CHECK-NEXT: ptrue p0.s
-; CHECK-NEXT: sxth z0.s, p0/m, z0.s
+; CHECK-NEXT: lsl z0.s, z0.s, #16
+; CHECK-NEXT: asr z0.s, z0.s, #16
; CHECK-NEXT: ret
%in = insertelement <vscale x 4 x i32> undef, i32 16, i32 0
%splat = shufflevector <vscale x 4 x i32> %in, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
diff --git a/llvm/test/CodeGen/AArch64/aarch64-address-type-promotion.ll b/llvm/test/CodeGen/AArch64/aarch64-address-type-promotion.ll
index d8280dadc550ea..da29a480959394 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-address-type-promotion.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-address-type-promotion.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
; RUN: llc < %s -o - | FileCheck %s
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n32:64"
@@ -5,13 +6,18 @@ target triple = "arm64-apple-macosx10.9"
; Check that sexts get promoted above adds.
define void @foo(ptr nocapture %a, i32 %i) {
+; CHECK-LABEL: foo:
+; CHECK: ; %bb.0: ; %entry
+; CHECK-NEXT: ; kill: def $w1 killed $w1 def $x1
+; CHECK-NEXT: sxtw x8, w1
+; CHECK-NEXT: add x9, x8, #1
+; CHECK-NEXT: add x8, x8, #2
+; CHECK-NEXT: ldr w9, [x0, x9, lsl #2]
+; CHECK-NEXT: ldr w8, [x0, x8, lsl #2]
+; CHECK-NEXT: add w8, w8, w9
+; CHECK-NEXT: str w8, [x0, w1, sxtw #2]
+; CHECK-NEXT: ret
entry:
-; CHECK-LABEL: _foo:
-; CHECK: add
-; CHECK-NEXT: ldp
-; CHECK-NEXT: add
-; CHECK-NEXT: str
-; CHECK-NEXT: ret
%add = add nsw i32 %i, 1
%idxprom = sext i32 %add to i64
%arrayidx = getelementptr inbounds i32, ptr %a, i64 %idxprom
diff --git a/llvm/test/CodeGen/AArch64/addsub.ll b/llvm/test/CodeGen/AArch64/addsub.ll
index 20215fe9146924..fed1747c23e1c9 100644
--- a/llvm/test/CodeGen/AArch64/addsub.ll
+++ b/llvm/test/CodeGen/AArch64/addsub.ll
@@ -376,7 +376,7 @@ define i1 @uadd_add(i8 %a, i8 %b, ptr %p) {
; CHECK-NEXT: mov w8, #255 // =0xff
; CHECK-NEXT: bic w8, w8, w0
; CHECK-NEXT: add w8, w8, w1, uxtb
-; CHECK-NEXT: lsr w0, w8, #8
+; CHECK-NEXT: ubfx w0, w8, #8, #1
; CHECK-NEXT: add w8, w8, #1
; CHECK-NEXT: strb w8, [x2]
; CHECK-NEXT: ret
diff --git a/llvm/test/CodeGen/AArch64/arm64-narrow-st-merge.ll b/llvm/test/CodeGen/AArch64/arm64-narrow-st-merge.ll
index 81c3195584701c..033ac301d7abe1 100644
--- a/llvm/test/CodeGen/AArch64/arm64-narrow-st-merge.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-narrow-st-merge.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
; RUN: llc < %s -mtriple aarch64 -verify-machineinstrs | FileCheck %s
; RUN: llc < %s -mtriple aarch64 -mattr=+strict-align -verify-machineinstrs | FileCheck %s -check-prefix=CHECK-STRICT
@@ -7,6 +8,23 @@
; CHECK-STRICT: strh wzr
; CHECK-STRICT: strh wzr
define void @Strh_zero(ptr nocapture %P, i32 %n) {
+; CHECK-LABEL: Strh_zero:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1
+; CHECK-NEXT: sxtw x8, w1
+; CHECK-NEXT: strh wzr, [x0, w1, sxtw #1]
+; CHECK-NEXT: add x8, x8, #1
+; CHECK-NEXT: strh wzr, [x0, x8, lsl #1]
+; CHECK-NEXT: ret
+;
+; CHECK-STRICT-LABEL: Strh_zero:
+; CHECK-STRICT: // %bb.0: // %entry
+; CHECK-STRICT-NEXT: // kill: def $w1 killed $w1 def $x1
+; CHECK-STRICT-NEXT: sxtw x8, w1
+; CHECK-STRICT-NEXT: strh wzr, [x0, w1, sxtw #1]
+; CHECK-STRICT-NEXT: add x8, x8, #1
+; CHECK-STRICT-NEXT: strh wzr, [x0, x8, lsl #1]
+; CHECK-STRICT-NEXT: ret
entry:
%idxprom = sext i32 %n to i64
%arrayidx = getelementptr inbounds i16, ptr %P, i64 %idxprom
@@ -26,6 +44,31 @@ entry:
; CHECK-STRICT: strh wzr
; CHECK-STRICT: strh wzr
define void @Strh_zero_4(ptr nocapture %P, i32 %n) {
+; CHECK-LABEL: Strh_zero_4:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1
+; CHECK-NEXT: sxtw x8, w1
+; CHECK-NEXT: strh wzr, [x0, w1, sxtw #1]
+; CHECK-NEXT: add x9, x8, #1
+; CHECK-NEXT: add x10, x8, #2
+; CHECK-NEXT: add x8, x8, #3
+; CHECK-NEXT: strh wzr, [x0, x9, lsl #1]
+; CHECK-NEXT: strh wzr, [x0, x10, lsl #1]
+; CHECK-NEXT: strh wzr, [x0, x8, lsl #1]
+; CHECK-NEXT: ret
+;
+; CHECK-STRICT-LABEL: Strh_zero_4:
+; CHECK-STRICT: // %bb.0: // %entry
+; CHECK-STRICT-NEXT: // kill: def $w1 killed $w1 def $x1
+; CHECK-STRICT-NEXT: sxtw x8, w1
+; CHECK-STRICT-NEXT: strh wzr, [x0, w1, sxtw #1]
+; CHECK-STRICT-NEXT: add x9, x8, #1
+; CHECK-STRICT-NEXT: add x10, x8, #2
+; CHECK-STRICT-NEXT: add x8, x8, #3
+; CHECK-STRICT-NEXT: strh wzr, [x0, x9, lsl #1]
+; CHECK-STRICT-NEXT: strh wzr, [x0, x10, lsl #1]
+; CHECK-STRICT-NEXT: strh wzr, [x0, x8, lsl #1]
+; CHECK-STRICT-NEXT: ret
entry:
%idxprom = sext i32 %n to i64
%arrayidx = getelementptr inbounds i16, ptr %P, i64 %idxprom
@@ -50,6 +93,23 @@ entry:
; CHECK-STRICT-LABEL: Strw_zero
; CHECK-STRICT: stp wzr, wzr
define void @Strw_zero(ptr nocapture %P, i32 %n) {
+; CHECK-LABEL: Strw_zero:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1
+; CHECK-NEXT: sxtw x8, w1
+; CHECK-NEXT: str wzr, [x0, w1, sxtw #2]
+; CHECK-NEXT: add x8, x8, #1
+; CHECK-NEXT: str wzr, [x0, x8, lsl #2]
+; CHECK-NEXT: ret
+;
+; CHECK-STRICT-LABEL: Strw_zero:
+; CHECK-STRICT: // %bb.0: // %entry
+; CHECK-STRICT-NEXT: // kill: def $w1 killed $w1 def $x1
+; CHECK-STRICT-NEXT: sxtw x8, w1
+; CHECK-STRICT-NEXT: str wzr, [x0, w1, sxtw #2]
+; CHECK-STRICT-NEXT: add x8, x8, #1
+; CHECK-STRICT-NEXT: str wzr, [x0, x8, lsl #2]
+; CHECK-STRICT-NEXT: ret
entry:
%idxprom = sext i32 %n to i64
%arrayidx = getelementptr inbounds i32, ptr %P, i64 %idxprom
@@ -64,6 +124,23 @@ entry:
; CHECK-LABEL: Strw_zero_nonzero
; CHECK: stp wzr, w1
define void @Strw_zero_nonzero(ptr nocapture %P, i32 %n) {
+; CHECK-LABEL: Strw_zero_nonzero:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1
+; CHECK-NEXT: sxtw x8, w1
+; CHECK-NEXT: str wzr, [x0, w1, sxtw #2]
+; CHECK-NEXT: add x8, x8, #1
+; CHECK-NEXT: str w1, [x0, x8, lsl #2]
+; CHECK-NEXT: ret
+;
+; CHECK-STRICT-LABEL: Strw_zero_nonzero:
+; CHECK-STRICT: // %bb.0: // %entry
+; CHECK-STRICT-NEXT: // kill: def $w1 killed $w1 def $x1
+; CHECK-STRICT-NEXT: sxtw x8, w1
+; CHECK-STRICT-NEXT: str wzr, [x0, w1, sxtw #2]
+; CHECK-STRICT-NEXT: add x8, x8, #1
+; CHECK-STRICT-NEXT: str w1, [x0, x8, lsl #2]
+; CHECK-STRICT-NEXT: ret
entry:
%idxprom = sext i32 %n to i64
%arrayidx = getelementptr inbounds i32, ptr %P, i64 %idxprom
@@ -81,6 +158,31 @@ entry:
; CHECK-STRICT: stp wzr, wzr
; CHECK-STRICT: stp wzr, wzr
define void @Strw_zero_4(ptr nocapture %P, i32 %n) {
+; CHECK-LABEL: Strw_zero_4:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1
+; CHECK-NEXT: sxtw x8, w1
+; CHECK-NEXT: str wzr, [x0, w1, sxtw #2]
+; CHECK-NEXT: add x9, x8, #1
+; CHECK-NEXT: add x10, x8, #2
+; CHECK-NEXT: add x8, x8, #3
+; CHECK-NEXT: str wzr, [x0, x9, lsl #2]
+; CHECK-NEXT: str wzr, [x0, x10, lsl #2]
+; CHECK-NEXT: str wzr, [x0, x8, lsl #2]
+; CHECK-NEXT: ret
+;
+; CHECK-STRICT-LABEL: Strw_zero_4:
+; CHECK-STRICT: // %bb.0: // %entry
+; CHECK-STRICT-NEXT: // kill: def $w1 killed $w1 def $x1
+; CHECK-STRICT-NEXT: sxtw x8, w1
+; CHECK-STRICT-NEXT: str wzr, [x0, w1, sxtw #2]
+; CHECK-STRICT-NEXT: add x9, x8, #1
+; CHECK-STRICT-NEXT: add x10, x8, #2
+; CHECK-STRICT-NEXT: add x8, x8, #3
+; CHECK-STRICT-NEXT: str wzr, [x0, x9, lsl #2]
+; CHECK-STRICT-NEXT: str wzr, [x0, x10, lsl #2]
+; CHECK-STRICT-NEXT: str wzr, [x0, x8, lsl #2]
+; CHECK-STRICT-NEXT: ret
entry:
%idxprom = sext i32 %n to i64
%arrayidx = getelementptr inbounds i32, ptr %P, i64 %idxprom
@@ -106,6 +208,18 @@ entry:
; CHECK-STRICT: sturb wzr
; CHECK-STRICT: sturb wzr
define void @Sturb_zero(ptr nocapture %P, i32 %n) #0 {
+; CHECK-LABEL: Sturb_zero:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: add x8, x0, w1, sxtw
+; CHECK-NEXT: sturh wzr, [x8, #-2]
+; CHECK-NEXT: ret
+;
+; CHECK-STRICT-LABEL: Sturb_zero:
+; CHECK-STRICT: // %bb.0: // %entry
+; CHECK-STRICT-NEXT: add x8, x0, w1, sxtw
+; CHECK-STRICT-NEXT: sturb wzr, [x8, #-2]
+; CHECK-STRICT-NEXT: sturb wzr, [x8, #-1]
+; CHECK-STRICT-NEXT: ret
entry:
%sub = add nsw i32 %n, -2
%idxprom = sext i32 %sub to i64
@@ -124,6 +238,25 @@ entry:
; CHECK-STRICT: sturh wzr
; CHECK-STRICT: sturh wzr
define void @Sturh_zero(ptr nocapture %P, i32 %n) {
+; CHECK-LABEL: Sturh_zero:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1
+; CHECK-NEXT: sxtw x8, w1
+; CHECK-NEXT: sub x9, x8, #2
+; CHECK-NEXT: sub x8, x8, #3
+; CHECK-NEXT: strh wzr, [x0, x9, lsl #1]
+; CHECK-NEXT: strh wzr, [x0, x8, lsl #1]
+; CHECK-NEXT: ret
+;
+; CHECK-STRICT-LABEL: Sturh_zero:
+; CHECK-STRICT: // %bb.0: // %entry
+; CHECK-STRICT-NEXT: // kill: def $w1 killed $w1 def $x1
+; CHECK-STRICT-NEXT: sxtw x8, w1
+; CHECK-STRICT-NEXT: sub x9, x8, #2
+; CHECK-STRICT-NEXT: sub x8, x8, #3
+; CHECK-STRICT-NEXT: strh wzr, [x0, x9, lsl #1]
+; CHECK-STRICT-NEXT: strh wzr, [x0, x8, lsl #1]
+; CHECK-STRICT-NEXT: ret
entry:
%sub = add nsw i32 %n, -2
%idxprom = sext i32 %sub to i64
@@ -144,6 +277,33 @@ entry:
; CHECK-STRICT: sturh wzr
; CHECK-STRICT: sturh wzr
define void @Sturh_zero_4(ptr nocapture %P, i32 %n) {
+; CHECK-LABEL: Sturh_zero_4:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1
+; CHECK-NEXT: sxtw x8, w1
+; CHECK-NEXT: sub x9, x8, #3
+; CHECK-NEXT: sub x10, x8, #4
+; CHECK-NEXT: strh wzr, [x0, x9, lsl #1]
+; CHECK-NEXT: sub x9, x8, #2
+; CHECK-NEXT: sub x8, x8, #1
+; CHECK-NEXT: strh wzr, [x0, x10, lsl #1]
+; CHECK-NEXT: strh wzr, [x0, x9, lsl #1]
+; CHECK-NEXT: strh wzr, [x0, x8, lsl #1]
+; CHECK-NEXT: ret
+;
+; CHECK-STRICT-LABEL: Sturh_zero_4:
+; CHECK-STRICT: // %bb.0: // %entry
+; CHECK-STRICT-NEXT: // kill: def $w1 killed $w1 def $x1
+; CHECK-STRICT-NEXT: sxtw x8, w1
+; CHECK-STRICT-NEXT: sub x9, x8, #3
+; CHECK-STRICT-NEXT: sub x10, x8, #4
+; CHECK-STRICT-NEXT: strh wzr, [x0, x9, lsl #1]
+; CHECK-STRICT-NEXT: sub x9, x8, #2
+; CHECK-STRICT-NEXT: sub x8, x8, #1
+; CHECK-STRICT-NEXT: strh wzr, [x0, x10, lsl #1]
+; CHECK-STRICT-NEXT: strh wzr, [x0, x9, lsl #1]
+; CHECK-STRICT-NEXT: strh wzr, [x0, x8, lsl #1]
+; CHECK-STRICT-NEXT: ret
entry:
%sub = add nsw i32 %n, -3
%idxprom = sext i32 %sub to i64
@@ -169,6 +329,25 @@ entry:
; CHECK-STRICT-LABEL: Sturw_zero
; CHECK-STRICT: stp wzr, wzr
define void @Sturw_zero(ptr nocapture %P, i32 %n) {
+; CHECK-LABEL: Sturw_zero:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1
+; CHECK-NEXT: sxtw x8, w1
+; CHECK-NEXT: sub x9, x8, #3
+; CHECK-NEXT: sub x8, x8, #4
+; CHECK-NEXT: str wzr, [x0, x9, lsl #2]
+; CHECK-NEXT: str wzr, [x0, x8, lsl #2]
+; CHECK-NEXT: ret
+;
+; CHECK-STRICT-LABEL: Sturw_zero:
+; CHECK-STRICT: // %bb.0: // %entry
+; CHECK-STRICT-NEXT: // kill: def $w1 killed $w1 def $x1
+; CHECK-STRICT-NEXT: sxtw x8, w1
+; CHECK-STRICT-NEXT: sub x9, x8, #3
+; CHECK-STRICT-NEXT: sub x8, x8, #4
+; CHECK-STRICT-NEXT: str wzr, [x0, x9, lsl #2]
+; CHECK-STRICT-NEXT: str wzr, [x0, x8, lsl #2]
+; CHECK-STRICT-NEXT: ret
entry:
%sub = add nsw i32 %n, -3
%idxprom = sext i32 %sub to i64
@@ -187,6 +366,33 @@ entry:
; CHECK-STRICT: stp wzr, wzr
; CHECK-STRICT: stp wzr, wzr
define void @Sturw_zero_4(ptr nocapture %P, i32 %n) {
+; CHECK-LABEL: Sturw_zero_4:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1
+; CHECK-NEXT: sxtw x8, w1
+; CHECK-NEXT: sub x9, x8, #3
+; CHECK-NEXT: sub x10, x8, #4
+; CHECK-NEXT: str wzr, [x0, x9, lsl #2]
+; CHECK-NEXT: sub x9, x8, #2
+; CHECK-NEXT: sub x8, x8, #1
+; CHECK-NEXT: str wzr, [x0, x10, lsl #2]
+; CHECK-NEXT: str wzr, [x0, x9, lsl #2]
+; CHECK-NEXT: str wzr, [x0, x8, lsl #2]
+; CHECK-NEXT: ret
+;
+; CHECK-STRICT-LABEL: Sturw_zero_4:
+; CHECK-STRICT: // %bb.0: // %entry
+; CHECK-STRICT-NEXT: // kill: def $w1 killed $w1 def $x1
+; CHECK-STRICT-NEXT: sxtw x8, w1
+; CHECK-STRICT-NEXT: sub x9, x8, #3
+; CHECK-STRICT-NEXT: sub x10, x8, #4
+; CHECK-STRICT-NEXT: str wzr, [x0, x9, lsl #2]
+; CHECK-STRICT-NEXT: sub x9, x8, #2
+; CHECK-STRICT-NEXT: sub x8, x8, #1
+; CHECK-STRICT-NEXT: str wzr, [x0, x10, lsl #2]
+; CHECK-STRICT-NEXT: str wzr, [x0, x9, lsl #2]
+; CHECK-STRICT-NEXT: str wzr, [x0, x8, lsl #2]
+; CHECK-STRICT-NEXT: ret
entry:
%sub = add nsw i32 %n, -3
%idxprom = sext i32 %sub to i64
diff --git a/llvm/test/CodeGen/AArch64/arm64-rev.ll b/llvm/test/CodeGen/AArch64/arm64-rev.ll
index f548a0e01feee6..19c0c8940b92b3 100644
--- a/llvm/test/CodeGen/AArch64/arm64-rev.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-rev.ll
@@ -27,8 +27,8 @@ entry:
define i32 @test_rev_w_srl16(i16 %a) {
; CHECK-SD-LABEL: test_rev_w_srl16:
; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: rev w8, w0
-; CHECK-SD-NEXT: lsr w0, w8, #16
+; CHECK-SD-NEXT: and w8, w0, #0xffff
+; CHECK-SD-NEXT: rev16 w0, w8
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_rev_w_srl16:
@@ -45,12 +45,18 @@ entry:
}
define i32 @test_rev_w_srl16_load(ptr %a) {
-; CHECK-LABEL: test_rev_w_srl16_load:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: ldrh w8, [x0]
-; CHECK-NEXT: rev w8, w8
-; CHECK-NEXT: lsr w0, w8, #16
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_rev_w_srl16_load:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: ldrh w8, [x0]
+; CHECK-SD-NEXT: rev16 w0, w8
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_rev_w_srl16_load:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: ldrh w8, [x0]
+; CHECK-GI-NEXT: rev w8, w8
+; CHECK-GI-NEXT: lsr w0, w8, #16
+; CHECK-GI-NEXT: ret
entry:
%0 = load i16, ptr %a
%1 = zext i16 %0 to i32
@@ -88,9 +94,8 @@ entry:
define i64 @test_rev_x_srl32(i32 %a) {
; CHECK-SD-LABEL: test_rev_x_srl32:
; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: // kill: def $w0 killed $w0 def $x0
-; CHECK-SD-NEXT: rev x8, x0
-; CHECK-SD-NEXT: lsr x0, x8, #32
+; CHECK-SD-NEXT: mov w8, w0
+; CHECK-SD-NEXT: rev32 x0, x8
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_rev_x_srl32:
@@ -107,12 +112,18 @@ entry:
}
define i64 @test_rev_x_srl32_load(ptr %a) {
-; CHECK-LABEL: test_rev_x_srl32_load:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: ldr w8, [x0]
-; CHECK-NEXT: rev x8, x8
-; CHECK-NEXT: lsr x0, x8, #32
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_rev_x_srl32_load:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: ldr w8, [x0]
+; CHECK-SD-NEXT: rev32 x0, x8
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_rev_x_srl32_load:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: ldr w8, [x0]
+; CHECK-GI-NEXT: rev x8, x8
+; CHECK-GI-NEXT: lsr x0, x8, #32
+; CHECK-GI-NEXT: ret
entry:
%0 = load i32, ptr %a
%1 = zext i32 %0 to i64
diff --git a/llvm/test/CodeGen/AArch64/arm64-trunc-store.ll b/llvm/test/CodeGen/AArch64/arm64-trunc-store.ll
index cd47fff46729f9..31a649ad64f448 100644
--- a/llvm/test/CodeGen/AArch64/arm64-trunc-store.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-trunc-store.ll
@@ -20,10 +20,10 @@ define void @fct32(i32 %arg, i64 %var) {
; CHECK-LABEL: fct32:
; CHECK: // %bb.0: // %bb
; CHECK-NEXT: adrp x8, :got:zptr32
+; CHECK-NEXT: sub w9, w0, #1
; CHECK-NEXT: ldr x8, [x8, :got_lo12:zptr32]
; CHECK-NEXT: ldr x8, [x8]
-; CHECK-NEXT: add x8, x8, w0, sxtw #2
-; CHECK-NEXT: stur w1, [x8, #-4]
+; CHECK-NEXT: str w1, [x8, w9, sxtw #2]
; CHECK-NEXT: ret
bb:
%.pre37 = load ptr, ptr @zptr32, align 8
@@ -39,10 +39,10 @@ define void @fct16(i32 %arg, i64 %var) {
; CHECK-LABEL: fct16:
; CHECK: // %bb.0: // %bb
; CHECK-NEXT: adrp x8, :got:zptr16
+; CHECK-NEXT...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/91239
More information about the llvm-commits
mailing list