[PATCH] D88396: [X86] Replace movaps with movups when avx is enabled.

LuoYuanke via Phabricator via llvm-commits llvm-commits at lists.llvm.org
Mon Sep 28 01:35:59 PDT 2020


LuoYuanke created this revision.
Herald added subscribers: llvm-commits, nikic, arphaman, hiraditya, mgorny.
Herald added a project: LLVM.
LuoYuanke requested review of this revision.

The performance for movaps and movups are the same if the address is aligned.
However if the address is not aligned, movaps raise exception while movups
can still run.

Change-Id: I85ab9749013d7e1abb237e03bc22eeacfd37836a


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D88396

Files:
  llvm/lib/Target/X86/CMakeLists.txt
  llvm/lib/Target/X86/X86.h
  llvm/lib/Target/X86/X86MovapsToMovups.cpp
  llvm/lib/Target/X86/X86TargetMachine.cpp
  llvm/test/CodeGen/X86/2012-01-12-extract-sv.ll
  llvm/test/CodeGen/X86/2012-04-26-sdglue.ll
  llvm/test/CodeGen/X86/2012-1-10-buildvector.ll
  llvm/test/CodeGen/X86/GlobalISel/memop-vec.ll
  llvm/test/CodeGen/X86/MergeConsecutiveStores.ll
  llvm/test/CodeGen/X86/O0-pipeline.ll
  llvm/test/CodeGen/X86/SwizzleShuff.ll
  llvm/test/CodeGen/X86/anyregcc.ll
  llvm/test/CodeGen/X86/arg-copy-elide-win64.ll
  llvm/test/CodeGen/X86/atomic-non-integer.ll
  llvm/test/CodeGen/X86/avx-basic.ll
  llvm/test/CodeGen/X86/avx-intel-ocl.ll
  llvm/test/CodeGen/X86/avx-intrinsics-fast-isel.ll
  llvm/test/CodeGen/X86/avx-load-store.ll
  llvm/test/CodeGen/X86/avx-unpack.ll
  llvm/test/CodeGen/X86/avx-varargs-x86_64.ll
  llvm/test/CodeGen/X86/avx-vbroadcast.ll
  llvm/test/CodeGen/X86/avx-vbroadcastf128.ll
  llvm/test/CodeGen/X86/avx-vextractf128.ll
  llvm/test/CodeGen/X86/avx-vzeroupper.ll
  llvm/test/CodeGen/X86/avx-win64.ll
  llvm/test/CodeGen/X86/avx1-logical-load-folding.ll
  llvm/test/CodeGen/X86/avx2-conversions.ll
  llvm/test/CodeGen/X86/avx2-intrinsics-x86.ll
  llvm/test/CodeGen/X86/avx2-masked-gather.ll
  llvm/test/CodeGen/X86/avx2-vbroadcast.ll
  llvm/test/CodeGen/X86/avx2-vbroadcasti128.ll
  llvm/test/CodeGen/X86/avx2-vperm.ll
  llvm/test/CodeGen/X86/avx512-broadcast-unfold.ll
  llvm/test/CodeGen/X86/avx512-bugfix-25270.ll
  llvm/test/CodeGen/X86/avx512-build-vector.ll
  llvm/test/CodeGen/X86/avx512-calling-conv.ll
  llvm/test/CodeGen/X86/avx512-cmp-mask.ll
  llvm/test/CodeGen/X86/avx512-extract-subvector-load-store.ll
  llvm/test/CodeGen/X86/avx512-extract-subvector.ll
  llvm/test/CodeGen/X86/avx512-gather-scatter-intrin-deprecated.ll
  llvm/test/CodeGen/X86/avx512-gather-scatter-intrin.ll
  llvm/test/CodeGen/X86/avx512-insert-extract.ll
  llvm/test/CodeGen/X86/avx512-intel-ocl.ll
  llvm/test/CodeGen/X86/avx512-intrinsics-upgrade.ll
  llvm/test/CodeGen/X86/avx512-intrinsics.ll
  llvm/test/CodeGen/X86/avx512-mask-op.ll
  llvm/test/CodeGen/X86/avx512-mask-zext-bugfix.ll
  llvm/test/CodeGen/X86/avx512-mov.ll
  llvm/test/CodeGen/X86/avx512-regcall-Mask.ll
  llvm/test/CodeGen/X86/avx512-regcall-NoMask.ll
  llvm/test/CodeGen/X86/avx512-rotate.ll
  llvm/test/CodeGen/X86/avx512-select.ll
  llvm/test/CodeGen/X86/avx512-shuffles/in_lane_permute.ll
  llvm/test/CodeGen/X86/avx512-shuffles/partial_permute.ll
  llvm/test/CodeGen/X86/avx512-shuffles/permute.ll
  llvm/test/CodeGen/X86/avx512-unsafe-fp-math.ll
  llvm/test/CodeGen/X86/avx512-vbroadcast.ll
  llvm/test/CodeGen/X86/avx512-vbroadcasti128.ll
  llvm/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll
  llvm/test/CodeGen/X86/avx512vl-mov.ll
  llvm/test/CodeGen/X86/avx512vl-vbroadcast.ll
  llvm/test/CodeGen/X86/avx512vl-vec-masked-cmp.ll
  llvm/test/CodeGen/X86/avx512vlvp2intersect-intrinsics.ll
  llvm/test/CodeGen/X86/avx512vp2intersect-intrinsics.ll
  llvm/test/CodeGen/X86/bitcast-int-to-vector-bool-sext.ll
  llvm/test/CodeGen/X86/bitcast-int-to-vector-bool-zext.ll
  llvm/test/CodeGen/X86/break-false-dep.ll
  llvm/test/CodeGen/X86/broadcast-elm-cross-splat-vec.ll
  llvm/test/CodeGen/X86/bswap-vector.ll
  llvm/test/CodeGen/X86/bug37521.ll
  llvm/test/CodeGen/X86/buildvec-insertvec.ll
  llvm/test/CodeGen/X86/cast-vsel.ll
  llvm/test/CodeGen/X86/clear_upper_vector_element_bits.ll
  llvm/test/CodeGen/X86/combine-abs.ll
  llvm/test/CodeGen/X86/combine-add-ssat.ll
  llvm/test/CodeGen/X86/combine-add-usat.ll
  llvm/test/CodeGen/X86/combine-bitselect.ll
  llvm/test/CodeGen/X86/combine-concatvectors.ll
  llvm/test/CodeGen/X86/combine-fabs.ll
  llvm/test/CodeGen/X86/combine-sdiv.ll
  llvm/test/CodeGen/X86/combine-sub-ssat.ll
  llvm/test/CodeGen/X86/combine-sub-usat.ll
  llvm/test/CodeGen/X86/combine-udiv.ll
  llvm/test/CodeGen/X86/commute-fcmp.ll
  llvm/test/CodeGen/X86/concat-cast.ll
  llvm/test/CodeGen/X86/dynamic-allocas-VLAs.ll
  llvm/test/CodeGen/X86/extract-concat.ll
  llvm/test/CodeGen/X86/extractelement-fp.ll
  llvm/test/CodeGen/X86/extractelement-index.ll
  llvm/test/CodeGen/X86/extractelement-load.ll
  llvm/test/CodeGen/X86/fast-isel-store.ll
  llvm/test/CodeGen/X86/fast-isel-vecload.ll
  llvm/test/CodeGen/X86/fma-commute-x86.ll
  llvm/test/CodeGen/X86/fma-intrinsics-canonical.ll
  llvm/test/CodeGen/X86/fma-intrinsics-fast-isel.ll
  llvm/test/CodeGen/X86/fma-intrinsics-x86-upgrade.ll
  llvm/test/CodeGen/X86/fma-intrinsics-x86.ll
  llvm/test/CodeGen/X86/fma.ll
  llvm/test/CodeGen/X86/fma4-commute-x86.ll
  llvm/test/CodeGen/X86/fma4-intrinsics-x86_64-folded-load.ll
  llvm/test/CodeGen/X86/fma_patterns.ll
  llvm/test/CodeGen/X86/fma_patterns_wide.ll
  llvm/test/CodeGen/X86/fold-load-unops.ll
  llvm/test/CodeGen/X86/fold-vector-sext-zext.ll
  llvm/test/CodeGen/X86/fold-vector-trunc-sitofp.ll
  llvm/test/CodeGen/X86/fp-round.ll
  llvm/test/CodeGen/X86/fp128-cast-strict.ll
  llvm/test/CodeGen/X86/fp128-cast.ll
  llvm/test/CodeGen/X86/fp128-i128.ll
  llvm/test/CodeGen/X86/gpr-to-mask.ll
  llvm/test/CodeGen/X86/haddsub-shuf-undef-operand.ll
  llvm/test/CodeGen/X86/half.ll
  llvm/test/CodeGen/X86/i64-mem-copy.ll
  llvm/test/CodeGen/X86/insert-into-constant-vector.ll
  llvm/test/CodeGen/X86/known-bits-vector.ll
  llvm/test/CodeGen/X86/known-signbits-vector.ll
  llvm/test/CodeGen/X86/legalize-vaarg.ll
  llvm/test/CodeGen/X86/load-partial.ll
  llvm/test/CodeGen/X86/masked_compressstore.ll
  llvm/test/CodeGen/X86/masked_gather_scatter.ll
  llvm/test/CodeGen/X86/masked_load.ll
  llvm/test/CodeGen/X86/masked_store_trunc.ll
  llvm/test/CodeGen/X86/memset-nonzero.ll
  llvm/test/CodeGen/X86/memset-sse-stack-realignment.ll
  llvm/test/CodeGen/X86/memset-zero.ll
  llvm/test/CodeGen/X86/memset.ll
  llvm/test/CodeGen/X86/merge-consecutive-loads-128.ll
  llvm/test/CodeGen/X86/merge-consecutive-loads-256.ll
  llvm/test/CodeGen/X86/merge-consecutive-loads-512.ll
  llvm/test/CodeGen/X86/merge-consecutive-stores-nt.ll
  llvm/test/CodeGen/X86/merge-store-constants.ll
  llvm/test/CodeGen/X86/min-legal-vector-width.ll
  llvm/test/CodeGen/X86/nontemporal-loads-2.ll
  llvm/test/CodeGen/X86/oddshuffles.ll
  llvm/test/CodeGen/X86/oddsubvector.ll
  llvm/test/CodeGen/X86/opt-pipeline.ll
  llvm/test/CodeGen/X86/paddus.ll
  llvm/test/CodeGen/X86/pmul.ll
  llvm/test/CodeGen/X86/pr11334.ll
  llvm/test/CodeGen/X86/pr22774.ll
  llvm/test/CodeGen/X86/pr29112.ll
  llvm/test/CodeGen/X86/pr30290.ll
  llvm/test/CodeGen/X86/pr30430.ll
  llvm/test/CodeGen/X86/pr31956.ll
  llvm/test/CodeGen/X86/pr32368.ll
  llvm/test/CodeGen/X86/pr34592.ll
  llvm/test/CodeGen/X86/pr34657.ll
  llvm/test/CodeGen/X86/pr38639.ll
  llvm/test/CodeGen/X86/pr38738.ll
  llvm/test/CodeGen/X86/pr40811.ll
  llvm/test/CodeGen/X86/pr43866.ll
  llvm/test/CodeGen/X86/pr44140.ll
  llvm/test/CodeGen/X86/pr45563-2.ll
  llvm/test/CodeGen/X86/pr46532.ll
  llvm/test/CodeGen/X86/pr46820.ll
  llvm/test/CodeGen/X86/psubus.ll
  llvm/test/CodeGen/X86/recip-fastmath.ll
  llvm/test/CodeGen/X86/recip-fastmath2.ll
  llvm/test/CodeGen/X86/reduce-trunc-shl.ll
  llvm/test/CodeGen/X86/sadd_sat_vec.ll
  llvm/test/CodeGen/X86/sandybridge-loads.ll
  llvm/test/CodeGen/X86/scalar-fp-to-i32.ll
  llvm/test/CodeGen/X86/scalar-fp-to-i64.ll
  llvm/test/CodeGen/X86/shuffle-of-splat-multiuses.ll
  llvm/test/CodeGen/X86/shuffle-strided-with-offset-256.ll
  llvm/test/CodeGen/X86/shuffle-strided-with-offset-512.ll
  llvm/test/CodeGen/X86/shuffle-vs-trunc-256.ll
  llvm/test/CodeGen/X86/splat-const.ll
  llvm/test/CodeGen/X86/sqrt-fastmath-tune.ll
  llvm/test/CodeGen/X86/sqrt-fastmath.ll
  llvm/test/CodeGen/X86/srem-seteq-vec-splat.ll
  llvm/test/CodeGen/X86/sse-fsignum.ll
  llvm/test/CodeGen/X86/sse-intrinsics-fast-isel.ll
  llvm/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll
  llvm/test/CodeGen/X86/sse2-intrinsics-x86.ll
  llvm/test/CodeGen/X86/sse2.ll
  llvm/test/CodeGen/X86/sse41-intrinsics-x86.ll
  llvm/test/CodeGen/X86/sse41.ll
  llvm/test/CodeGen/X86/ssub_sat_vec.ll
  llvm/test/CodeGen/X86/stack-folding-avx512bf16.ll
  llvm/test/CodeGen/X86/stack-folding-avx512vp2intersect.ll
  llvm/test/CodeGen/X86/stack-folding-fp-avx1.ll
  llvm/test/CodeGen/X86/stack-folding-fp-avx512.ll
  llvm/test/CodeGen/X86/stack-folding-fp-avx512vl.ll
  llvm/test/CodeGen/X86/stack-folding-int-avx1.ll
  llvm/test/CodeGen/X86/stack-folding-int-avx2.ll
  llvm/test/CodeGen/X86/stack-folding-int-avx512.ll
  llvm/test/CodeGen/X86/stack-folding-int-avx512vl.ll
  llvm/test/CodeGen/X86/stack-folding-xop.ll
  llvm/test/CodeGen/X86/statepoint-no-realign-stack.ll
  llvm/test/CodeGen/X86/subvector-broadcast.ll
  llvm/test/CodeGen/X86/swap.ll
  llvm/test/CodeGen/X86/swizzle-avx2.ll
  llvm/test/CodeGen/X86/unaligned-32-byte-memops.ll
  llvm/test/CodeGen/X86/unfold-masked-merge-vector-variablemask-const.ll
  llvm/test/CodeGen/X86/urem-seteq-vec-splat.ll
  llvm/test/CodeGen/X86/v8i1-masks.ll
  llvm/test/CodeGen/X86/vaargs.ll
  llvm/test/CodeGen/X86/var-permute-512.ll
  llvm/test/CodeGen/X86/vec-libcalls.ll
  llvm/test/CodeGen/X86/vec-strict-cmp-128.ll
  llvm/test/CodeGen/X86/vec-strict-cmp-256.ll
  llvm/test/CodeGen/X86/vec-strict-fptoint-128.ll
  llvm/test/CodeGen/X86/vec-strict-fptoint-256.ll
  llvm/test/CodeGen/X86/vec_cast3.ll
  llvm/test/CodeGen/X86/vec_extract-avx.ll
  llvm/test/CodeGen/X86/vec_fabs.ll
  llvm/test/CodeGen/X86/vec_floor.ll
  llvm/test/CodeGen/X86/vec_fp_to_int.ll
  llvm/test/CodeGen/X86/vec_fpext.ll
  llvm/test/CodeGen/X86/vec_int_to_fp.ll
  llvm/test/CodeGen/X86/vec_logical.ll
  llvm/test/CodeGen/X86/vec_minmax_sint.ll
  llvm/test/CodeGen/X86/vec_minmax_uint.ll
  llvm/test/CodeGen/X86/vec_ss_load_fold.ll
  llvm/test/CodeGen/X86/vec_umulo.ll
  llvm/test/CodeGen/X86/vector-bitreverse.ll
  llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics-fma.ll
  llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll
  llvm/test/CodeGen/X86/vector-extend-inreg.ll
  llvm/test/CodeGen/X86/vector-fshl-256.ll
  llvm/test/CodeGen/X86/vector-fshr-256.ll
  llvm/test/CodeGen/X86/vector-gep.ll
  llvm/test/CodeGen/X86/vector-half-conversions.ll
  llvm/test/CodeGen/X86/vector-lzcnt-128.ll
  llvm/test/CodeGen/X86/vector-lzcnt-256.ll
  llvm/test/CodeGen/X86/vector-partial-undef.ll
  llvm/test/CodeGen/X86/vector-popcnt-128.ll
  llvm/test/CodeGen/X86/vector-popcnt-256.ll
  llvm/test/CodeGen/X86/vector-reduce-and-bool.ll
  llvm/test/CodeGen/X86/vector-reduce-or-bool.ll
  llvm/test/CodeGen/X86/vector-reduce-xor-bool.ll
  llvm/test/CodeGen/X86/vector-shuffle-128-v16.ll
  llvm/test/CodeGen/X86/vector-shuffle-128-v4.ll
  llvm/test/CodeGen/X86/vector-shuffle-256-v16.ll
  llvm/test/CodeGen/X86/vector-shuffle-256-v32.ll
  llvm/test/CodeGen/X86/vector-shuffle-256-v8.ll
  llvm/test/CodeGen/X86/vector-shuffle-512-v16.ll
  llvm/test/CodeGen/X86/vector-shuffle-512-v32.ll
  llvm/test/CodeGen/X86/vector-shuffle-512-v64.ll
  llvm/test/CodeGen/X86/vector-shuffle-512-v8.ll
  llvm/test/CodeGen/X86/vector-shuffle-avx512.ll
  llvm/test/CodeGen/X86/vector-shuffle-combining-avx.ll
  llvm/test/CodeGen/X86/vector-shuffle-combining-avx2.ll
  llvm/test/CodeGen/X86/vector-shuffle-combining-avx512f.ll
  llvm/test/CodeGen/X86/vector-shuffle-combining-ssse3.ll
  llvm/test/CodeGen/X86/vector-shuffle-combining-xop.ll
  llvm/test/CodeGen/X86/vector-shuffle-combining.ll
  llvm/test/CodeGen/X86/vector-shuffle-variable-128.ll
  llvm/test/CodeGen/X86/vector-shuffle-variable-256.ll
  (13 more files...)



More information about the llvm-commits mailing list