[all-commits] [llvm/llvm-project] 17eafe: [X86][SSE] lowerV2I64Shuffle - use undef elements ...

Sun Jul 26 08:04:59 PDT 2020

  Branch: refs/heads/master
  Home:   https://github.com/llvm/llvm-project
  Commit: 17eafe0841d6e523d410771c8d4de99d5881c59d
      https://github.com/llvm/llvm-project/commit/17eafe0841d6e523d410771c8d4de99d5881c59d
  Author: Simon Pilgrim <llvm-dev at redking.me.uk>
  Date:   2020-07-26 (Sun, 26 Jul 2020)

  Changed paths:
    M llvm/lib/Target/X86/X86ISelLowering.cpp
    M llvm/test/CodeGen/X86/avg.ll
    M llvm/test/CodeGen/X86/avx-cvt.ll
    M llvm/test/CodeGen/X86/avx512-hadd-hsub.ll
    M llvm/test/CodeGen/X86/avx512-intrinsics-fast-isel.ll
    M llvm/test/CodeGen/X86/buildvec-extract.ll
    M llvm/test/CodeGen/X86/cast-vsel.ll
    M llvm/test/CodeGen/X86/clear_upper_vector_element_bits.ll
    M llvm/test/CodeGen/X86/combine-movmsk-avx.ll
    M llvm/test/CodeGen/X86/combine-sdiv.ll
    M llvm/test/CodeGen/X86/combine-shl.ll
    M llvm/test/CodeGen/X86/combine-sra.ll
    M llvm/test/CodeGen/X86/combine-srl.ll
    M llvm/test/CodeGen/X86/combine-udiv.ll
    M llvm/test/CodeGen/X86/combine-urem.ll
    M llvm/test/CodeGen/X86/div-rem-pair-recomposition-signed.ll
    M llvm/test/CodeGen/X86/div-rem-pair-recomposition-unsigned.ll
    M llvm/test/CodeGen/X86/extract-store.ll
    M llvm/test/CodeGen/X86/extractelement-index.ll
    M llvm/test/CodeGen/X86/extractelement-load.ll
    M llvm/test/CodeGen/X86/gather-addresses.ll
    M llvm/test/CodeGen/X86/haddsub-2.ll
    M llvm/test/CodeGen/X86/hoist-and-by-const-from-lshr-in-eqcmp-zero.ll
    M llvm/test/CodeGen/X86/horizontal-reduce-add.ll
    M llvm/test/CodeGen/X86/horizontal-reduce-smax.ll
    M llvm/test/CodeGen/X86/horizontal-reduce-smin.ll
    M llvm/test/CodeGen/X86/horizontal-reduce-umax.ll
    M llvm/test/CodeGen/X86/horizontal-reduce-umin.ll
    M llvm/test/CodeGen/X86/i128-add.ll
    M llvm/test/CodeGen/X86/inline-asm-x-i128.ll
    M llvm/test/CodeGen/X86/known-bits-vector.ll
    M llvm/test/CodeGen/X86/known-signbits-vector.ll
    M llvm/test/CodeGen/X86/madd.ll
    M llvm/test/CodeGen/X86/masked_compressstore.ll
    M llvm/test/CodeGen/X86/masked_gather.ll
    M llvm/test/CodeGen/X86/masked_load.ll
    M llvm/test/CodeGen/X86/masked_store.ll
    M llvm/test/CodeGen/X86/masked_store_trunc.ll
    M llvm/test/CodeGen/X86/masked_store_trunc_ssat.ll
    M llvm/test/CodeGen/X86/masked_store_trunc_usat.ll
    M llvm/test/CodeGen/X86/merge-consecutive-stores-nt.ll
    M llvm/test/CodeGen/X86/min-legal-vector-width.ll
    M llvm/test/CodeGen/X86/nontemporal-2.ll
    M llvm/test/CodeGen/X86/oddshuffles.ll
    M llvm/test/CodeGen/X86/phaddsub-extract.ll
    M llvm/test/CodeGen/X86/pmul.ll
    M llvm/test/CodeGen/X86/pmulh.ll
    M llvm/test/CodeGen/X86/pr15267.ll
    M llvm/test/CodeGen/X86/pr39733.ll
    M llvm/test/CodeGen/X86/pr42452.ll
    M llvm/test/CodeGen/X86/pr42905.ll
    M llvm/test/CodeGen/X86/pr44976.ll
    M llvm/test/CodeGen/X86/pr45378.ll
    M llvm/test/CodeGen/X86/pr46189.ll
    M llvm/test/CodeGen/X86/pr46455.ll
    M llvm/test/CodeGen/X86/prefer-avx256-mask-shuffle.ll
    M llvm/test/CodeGen/X86/psubus.ll
    M llvm/test/CodeGen/X86/sad.ll
    M llvm/test/CodeGen/X86/sdiv_fix.ll
    M llvm/test/CodeGen/X86/sdiv_fix_sat.ll
    M llvm/test/CodeGen/X86/setcc-wide-types.ll
    M llvm/test/CodeGen/X86/shrink_vmul.ll
    M llvm/test/CodeGen/X86/slow-pmulld.ll
    M llvm/test/CodeGen/X86/smul_fix_sat.ll
    M llvm/test/CodeGen/X86/split-extend-vector-inreg.ll
    M llvm/test/CodeGen/X86/split-vector-rem.ll
    M llvm/test/CodeGen/X86/sse-intrinsics-fast-isel.ll
    M llvm/test/CodeGen/X86/sse41.ll
    M llvm/test/CodeGen/X86/trunc-subvector.ll
    M llvm/test/CodeGen/X86/udiv_fix.ll
    M llvm/test/CodeGen/X86/udiv_fix_sat.ll
    M llvm/test/CodeGen/X86/uint_to_fp-3.ll
    M llvm/test/CodeGen/X86/umul_fix_sat.ll
    M llvm/test/CodeGen/X86/urem-seteq-vec-nonsplat.ll
    M llvm/test/CodeGen/X86/var-permute-128.ll
    M llvm/test/CodeGen/X86/vec-strict-inttofp-128.ll
    M llvm/test/CodeGen/X86/vec-strict-inttofp-256.ll
    M llvm/test/CodeGen/X86/vec_cast2.ll
    M llvm/test/CodeGen/X86/vec_int_to_fp.ll
    M llvm/test/CodeGen/X86/vec_saddo.ll
    M llvm/test/CodeGen/X86/vec_smulo.ll
    M llvm/test/CodeGen/X86/vec_ssubo.ll
    M llvm/test/CodeGen/X86/vec_uaddo.ll
    M llvm/test/CodeGen/X86/vec_umulo.ll
    M llvm/test/CodeGen/X86/vec_usubo.ll
    M llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll
    M llvm/test/CodeGen/X86/vector-fshl-128.ll
    M llvm/test/CodeGen/X86/vector-fshl-256.ll
    M llvm/test/CodeGen/X86/vector-fshl-rot-128.ll
    M llvm/test/CodeGen/X86/vector-fshl-rot-256.ll
    M llvm/test/CodeGen/X86/vector-fshr-128.ll
    M llvm/test/CodeGen/X86/vector-fshr-256.ll
    M llvm/test/CodeGen/X86/vector-fshr-rot-128.ll
    M llvm/test/CodeGen/X86/vector-fshr-rot-256.ll
    M llvm/test/CodeGen/X86/vector-idiv-sdiv-128.ll
    M llvm/test/CodeGen/X86/vector-idiv-sdiv-256.ll
    M llvm/test/CodeGen/X86/vector-idiv-udiv-128.ll
    M llvm/test/CodeGen/X86/vector-pcmp.ll
    M llvm/test/CodeGen/X86/vector-reduce-add.ll
    M llvm/test/CodeGen/X86/vector-reduce-and-cmp.ll
    M llvm/test/CodeGen/X86/vector-reduce-and.ll
    M llvm/test/CodeGen/X86/vector-reduce-mul.ll
    M llvm/test/CodeGen/X86/vector-reduce-or-cmp.ll
    M llvm/test/CodeGen/X86/vector-reduce-or.ll
    M llvm/test/CodeGen/X86/vector-reduce-smax.ll
    M llvm/test/CodeGen/X86/vector-reduce-smin.ll
    M llvm/test/CodeGen/X86/vector-reduce-umax.ll
    M llvm/test/CodeGen/X86/vector-reduce-umin.ll
    M llvm/test/CodeGen/X86/vector-reduce-xor.ll
    M llvm/test/CodeGen/X86/vector-rem.ll
    M llvm/test/CodeGen/X86/vector-rotate-128.ll
    M llvm/test/CodeGen/X86/vector-rotate-256.ll
    M llvm/test/CodeGen/X86/vector-sext.ll
    M llvm/test/CodeGen/X86/vector-shift-ashr-128.ll
    M llvm/test/CodeGen/X86/vector-shift-ashr-256.ll
    M llvm/test/CodeGen/X86/vector-shift-ashr-sub128.ll
    M llvm/test/CodeGen/X86/vector-shift-lshr-128.ll
    M llvm/test/CodeGen/X86/vector-shift-lshr-256.ll
    M llvm/test/CodeGen/X86/vector-shift-lshr-sub128.ll
    M llvm/test/CodeGen/X86/vector-shift-shl-128.ll
    M llvm/test/CodeGen/X86/vector-shift-shl-256.ll
    M llvm/test/CodeGen/X86/vector-shuffle-128-v8.ll
    M llvm/test/CodeGen/X86/vector-shuffle-256-v16.ll
    M llvm/test/CodeGen/X86/vector-shuffle-256-v32.ll
    M llvm/test/CodeGen/X86/vector-shuffle-256-v8.ll
    M llvm/test/CodeGen/X86/vector-shuffle-512-v32.ll
    M llvm/test/CodeGen/X86/vector-shuffle-512-v64.ll
    M llvm/test/CodeGen/X86/vector-shuffle-combining.ll
    M llvm/test/CodeGen/X86/vector-shuffle-sse4a.ll
    M llvm/test/CodeGen/X86/vector-zext.ll
    M llvm/test/CodeGen/X86/vsel-cmp-load.ll
    M llvm/test/CodeGen/X86/vselect-avx.ll
    M llvm/test/CodeGen/X86/vselect-pcmp.ll
    M llvm/test/CodeGen/X86/vshift-4.ll
    M llvm/test/CodeGen/X86/widen_conv-4.ll
    M llvm/test/CodeGen/X86/x86-setcc-int-to-fp-combine.ll
    M llvm/test/CodeGen/X86/xor.ll

  Log Message:
  -----------
  [X86][SSE] lowerV2I64Shuffle - use undef elements in PSHUFD mask widening

If we lower a v2i64 shuffle to PSHUFD, we currently clamp undef elements to 0, (elements 0,1 of the v4i32) which can result in the shuffle referencing more elements of the source vector than expected, affecting later shuffle combines and KnownBits/SimplifyDemanded calls.

By ensuring we widen the undef mask element we allow getV4X86ShuffleImm8 to use inline elements as the default, which are more likely to fold.