[all-commits] [llvm/llvm-project] 8b43c1: [X86] X86FixupVectorConstants - shrink vector load...

Wed Jan 24 06:01:04 PST 2024

  Branch: refs/heads/main
  Home:   https://github.com/llvm/llvm-project
  Commit: 8b43c1be23119c1024bed0a8ce392bc73727e2e2
      https://github.com/llvm/llvm-project/commit/8b43c1be23119c1024bed0a8ce392bc73727e2e2
  Author: Simon Pilgrim <RKSimon at users.noreply.github.com>
  Date:   2024-01-24 (Wed, 24 Jan 2024)

  Changed paths:
    M llvm/lib/Target/X86/X86FixupVectorConstants.cpp
    M llvm/test/CodeGen/X86/2011-20-21-zext-ui2fp.ll
    M llvm/test/CodeGen/X86/any_extend_vector_inreg_of_broadcast.ll
    M llvm/test/CodeGen/X86/any_extend_vector_inreg_of_broadcast_from_memory.ll
    M llvm/test/CodeGen/X86/avx-load-store.ll
    M llvm/test/CodeGen/X86/avx2-arith.ll
    M llvm/test/CodeGen/X86/avx512-shuffles/shuffle-chained-bf16.ll
    M llvm/test/CodeGen/X86/bitreverse.ll
    M llvm/test/CodeGen/X86/combine-srl.ll
    M llvm/test/CodeGen/X86/combine-subo.ll
    M llvm/test/CodeGen/X86/constant-pool-sharing.ll
    M llvm/test/CodeGen/X86/dpbusd.ll
    M llvm/test/CodeGen/X86/dpbusd_const.ll
    M llvm/test/CodeGen/X86/expand-vp-cast-intrinsics.ll
    M llvm/test/CodeGen/X86/fcmp-constant.ll
    M llvm/test/CodeGen/X86/fold-vector-sext-zext.ll
    M llvm/test/CodeGen/X86/half.ll
    M llvm/test/CodeGen/X86/icmp-pow2-mask.ll
    M llvm/test/CodeGen/X86/insert-into-constant-vector.ll
    M llvm/test/CodeGen/X86/insertelement-ones.ll
    M llvm/test/CodeGen/X86/masked_gather_scatter_widen.ll
    M llvm/test/CodeGen/X86/masked_store_trunc.ll
    M llvm/test/CodeGen/X86/memcmp-more-load-pairs.ll
    M llvm/test/CodeGen/X86/memcmp.ll
    M llvm/test/CodeGen/X86/pmaddubsw.ll
    M llvm/test/CodeGen/X86/pr46532.ll
    M llvm/test/CodeGen/X86/pr63108.ll
    M llvm/test/CodeGen/X86/pr74736.ll
    M llvm/test/CodeGen/X86/prefer-avx256-lzcnt.ll
    M llvm/test/CodeGen/X86/pshufb-mask-comments.ll
    M llvm/test/CodeGen/X86/ret-mmx.ll
    M llvm/test/CodeGen/X86/sad.ll
    M llvm/test/CodeGen/X86/sext-vsetcc.ll
    M llvm/test/CodeGen/X86/shrink_vmul.ll
    M llvm/test/CodeGen/X86/shuffle-half.ll
    M llvm/test/CodeGen/X86/shuffle-strided-with-offset-256.ll
    M llvm/test/CodeGen/X86/shuffle-vs-trunc-256.ll
    M llvm/test/CodeGen/X86/shuffle-vs-trunc-512.ll
    M llvm/test/CodeGen/X86/sse2-intrinsics-x86.ll
    M llvm/test/CodeGen/X86/vec_anyext.ll
    M llvm/test/CodeGen/X86/vec_fp_to_int.ll
    M llvm/test/CodeGen/X86/vec_set-A.ll
    M llvm/test/CodeGen/X86/vector-blend.ll
    M llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll
    M llvm/test/CodeGen/X86/vector-fshl-128.ll
    M llvm/test/CodeGen/X86/vector-fshl-256.ll
    M llvm/test/CodeGen/X86/vector-fshl-rot-128.ll
    M llvm/test/CodeGen/X86/vector-fshl-rot-256.ll
    M llvm/test/CodeGen/X86/vector-fshl-rot-sub128.ll
    M llvm/test/CodeGen/X86/vector-fshl-sub128.ll
    M llvm/test/CodeGen/X86/vector-fshr-128.ll
    M llvm/test/CodeGen/X86/vector-fshr-256.ll
    M llvm/test/CodeGen/X86/vector-fshr-rot-128.ll
    M llvm/test/CodeGen/X86/vector-fshr-rot-256.ll
    M llvm/test/CodeGen/X86/vector-fshr-rot-sub128.ll
    M llvm/test/CodeGen/X86/vector-fshr-sub128.ll
    M llvm/test/CodeGen/X86/vector-half-conversions.ll
    M llvm/test/CodeGen/X86/vector-interleaved-load-i16-stride-3.ll
    M llvm/test/CodeGen/X86/vector-interleaved-load-i16-stride-5.ll
    M llvm/test/CodeGen/X86/vector-interleaved-load-i16-stride-6.ll
    M llvm/test/CodeGen/X86/vector-interleaved-load-i16-stride-7.ll
    M llvm/test/CodeGen/X86/vector-interleaved-load-i16-stride-8.ll
    M llvm/test/CodeGen/X86/vector-interleaved-load-i32-stride-3.ll
    M llvm/test/CodeGen/X86/vector-interleaved-load-i32-stride-4.ll
    M llvm/test/CodeGen/X86/vector-interleaved-load-i32-stride-6.ll
    M llvm/test/CodeGen/X86/vector-interleaved-load-i32-stride-7.ll
    M llvm/test/CodeGen/X86/vector-interleaved-load-i32-stride-8.ll
    M llvm/test/CodeGen/X86/vector-interleaved-load-i8-stride-2.ll
    M llvm/test/CodeGen/X86/vector-interleaved-load-i8-stride-4.ll
    M llvm/test/CodeGen/X86/vector-interleaved-load-i8-stride-5.ll
    M llvm/test/CodeGen/X86/vector-interleaved-load-i8-stride-6.ll
    M llvm/test/CodeGen/X86/vector-interleaved-load-i8-stride-7.ll
    M llvm/test/CodeGen/X86/vector-interleaved-load-i8-stride-8.ll
    M llvm/test/CodeGen/X86/vector-interleaved-store-i8-stride-5.ll
    M llvm/test/CodeGen/X86/vector-interleaved-store-i8-stride-7.ll
    M llvm/test/CodeGen/X86/vector-interleaved-store-i8-stride-8.ll
    M llvm/test/CodeGen/X86/vector-lzcnt-128.ll
    M llvm/test/CodeGen/X86/vector-lzcnt-256.ll
    M llvm/test/CodeGen/X86/vector-mulfix-legalize.ll
    M llvm/test/CodeGen/X86/vector-reduce-add-mask.ll
    M llvm/test/CodeGen/X86/vector-rotate-128.ll
    M llvm/test/CodeGen/X86/vector-rotate-256.ll
    M llvm/test/CodeGen/X86/vector-shift-ashr-sub128.ll
    M llvm/test/CodeGen/X86/vector-shift-lshr-sub128.ll
    M llvm/test/CodeGen/X86/vector-shift-shl-sub128.ll
    M llvm/test/CodeGen/X86/vector-shuffle-128-v16.ll
    M llvm/test/CodeGen/X86/vector-shuffle-256-v16.ll
    M llvm/test/CodeGen/X86/vector-shuffle-256-v32.ll
    M llvm/test/CodeGen/X86/vector-shuffle-256-v8.ll
    M llvm/test/CodeGen/X86/vector-shuffle-512-v32.ll
    M llvm/test/CodeGen/X86/vector-shuffle-512-v64.ll
    M llvm/test/CodeGen/X86/vector-shuffle-512-v8.ll
    M llvm/test/CodeGen/X86/vector-shuffle-combining.ll
    M llvm/test/CodeGen/X86/vector-shuffle-v1.ll
    M llvm/test/CodeGen/X86/vector-trunc.ll
    M llvm/test/CodeGen/X86/vector-tzcnt-128.ll
    M llvm/test/CodeGen/X86/vselect-constants.ll
    M llvm/test/CodeGen/X86/vselect-post-combine.ll
    M llvm/test/CodeGen/X86/widen_bitcnt.ll
    M llvm/test/CodeGen/X86/x86-interleaved-access.ll
    M llvm/test/CodeGen/X86/zero_extend_vector_inreg.ll
    M llvm/test/CodeGen/X86/zero_extend_vector_inreg_of_broadcast.ll
    M llvm/test/CodeGen/X86/zero_extend_vector_inreg_of_broadcast_from_memory.ll

  Log Message:
  -----------
  [X86] X86FixupVectorConstants - shrink vector load to movsd/movsd/movd/movq 'zero upper' instructions (#79000)

If we're loading a vector constant that is known to be zero in the upper elements, then attempt to shrink the constant and just scalar load the lower 32/64 bits.

Always chose the vzload/broadcast with the smallest constant load, and prefer vzload over broadcasts for same bitwidth to avoid domain flips (mainly a AVX1 issue).

Fixes #73783