[all-commits] [llvm/llvm-project] 17eafe: [X86][SSE] lowerV2I64Shuffle - use undef elements ...

Simon Pilgrim via All-commits all-commits at lists.llvm.org
Sun Jul 26 08:04:59 PDT 2020


  Branch: refs/heads/master
  Home:   https://github.com/llvm/llvm-project
  Commit: 17eafe0841d6e523d410771c8d4de99d5881c59d
      https://github.com/llvm/llvm-project/commit/17eafe0841d6e523d410771c8d4de99d5881c59d
  Author: Simon Pilgrim <llvm-dev at redking.me.uk>
  Date:   2020-07-26 (Sun, 26 Jul 2020)

  Changed paths:
    M llvm/lib/Target/X86/X86ISelLowering.cpp
    M llvm/test/CodeGen/X86/avg.ll
    M llvm/test/CodeGen/X86/avx-cvt.ll
    M llvm/test/CodeGen/X86/avx512-hadd-hsub.ll
    M llvm/test/CodeGen/X86/avx512-intrinsics-fast-isel.ll
    M llvm/test/CodeGen/X86/buildvec-extract.ll
    M llvm/test/CodeGen/X86/cast-vsel.ll
    M llvm/test/CodeGen/X86/clear_upper_vector_element_bits.ll
    M llvm/test/CodeGen/X86/combine-movmsk-avx.ll
    M llvm/test/CodeGen/X86/combine-sdiv.ll
    M llvm/test/CodeGen/X86/combine-shl.ll
    M llvm/test/CodeGen/X86/combine-sra.ll
    M llvm/test/CodeGen/X86/combine-srl.ll
    M llvm/test/CodeGen/X86/combine-udiv.ll
    M llvm/test/CodeGen/X86/combine-urem.ll
    M llvm/test/CodeGen/X86/div-rem-pair-recomposition-signed.ll
    M llvm/test/CodeGen/X86/div-rem-pair-recomposition-unsigned.ll
    M llvm/test/CodeGen/X86/extract-store.ll
    M llvm/test/CodeGen/X86/extractelement-index.ll
    M llvm/test/CodeGen/X86/extractelement-load.ll
    M llvm/test/CodeGen/X86/gather-addresses.ll
    M llvm/test/CodeGen/X86/haddsub-2.ll
    M llvm/test/CodeGen/X86/hoist-and-by-const-from-lshr-in-eqcmp-zero.ll
    M llvm/test/CodeGen/X86/horizontal-reduce-add.ll
    M llvm/test/CodeGen/X86/horizontal-reduce-smax.ll
    M llvm/test/CodeGen/X86/horizontal-reduce-smin.ll
    M llvm/test/CodeGen/X86/horizontal-reduce-umax.ll
    M llvm/test/CodeGen/X86/horizontal-reduce-umin.ll
    M llvm/test/CodeGen/X86/i128-add.ll
    M llvm/test/CodeGen/X86/inline-asm-x-i128.ll
    M llvm/test/CodeGen/X86/known-bits-vector.ll
    M llvm/test/CodeGen/X86/known-signbits-vector.ll
    M llvm/test/CodeGen/X86/madd.ll
    M llvm/test/CodeGen/X86/masked_compressstore.ll
    M llvm/test/CodeGen/X86/masked_gather.ll
    M llvm/test/CodeGen/X86/masked_load.ll
    M llvm/test/CodeGen/X86/masked_store.ll
    M llvm/test/CodeGen/X86/masked_store_trunc.ll
    M llvm/test/CodeGen/X86/masked_store_trunc_ssat.ll
    M llvm/test/CodeGen/X86/masked_store_trunc_usat.ll
    M llvm/test/CodeGen/X86/merge-consecutive-stores-nt.ll
    M llvm/test/CodeGen/X86/min-legal-vector-width.ll
    M llvm/test/CodeGen/X86/nontemporal-2.ll
    M llvm/test/CodeGen/X86/oddshuffles.ll
    M llvm/test/CodeGen/X86/phaddsub-extract.ll
    M llvm/test/CodeGen/X86/pmul.ll
    M llvm/test/CodeGen/X86/pmulh.ll
    M llvm/test/CodeGen/X86/pr15267.ll
    M llvm/test/CodeGen/X86/pr39733.ll
    M llvm/test/CodeGen/X86/pr42452.ll
    M llvm/test/CodeGen/X86/pr42905.ll
    M llvm/test/CodeGen/X86/pr44976.ll
    M llvm/test/CodeGen/X86/pr45378.ll
    M llvm/test/CodeGen/X86/pr46189.ll
    M llvm/test/CodeGen/X86/pr46455.ll
    M llvm/test/CodeGen/X86/prefer-avx256-mask-shuffle.ll
    M llvm/test/CodeGen/X86/psubus.ll
    M llvm/test/CodeGen/X86/sad.ll
    M llvm/test/CodeGen/X86/sdiv_fix.ll
    M llvm/test/CodeGen/X86/sdiv_fix_sat.ll
    M llvm/test/CodeGen/X86/setcc-wide-types.ll
    M llvm/test/CodeGen/X86/shrink_vmul.ll
    M llvm/test/CodeGen/X86/slow-pmulld.ll
    M llvm/test/CodeGen/X86/smul_fix_sat.ll
    M llvm/test/CodeGen/X86/split-extend-vector-inreg.ll
    M llvm/test/CodeGen/X86/split-vector-rem.ll
    M llvm/test/CodeGen/X86/sse-intrinsics-fast-isel.ll
    M llvm/test/CodeGen/X86/sse41.ll
    M llvm/test/CodeGen/X86/trunc-subvector.ll
    M llvm/test/CodeGen/X86/udiv_fix.ll
    M llvm/test/CodeGen/X86/udiv_fix_sat.ll
    M llvm/test/CodeGen/X86/uint_to_fp-3.ll
    M llvm/test/CodeGen/X86/umul_fix_sat.ll
    M llvm/test/CodeGen/X86/urem-seteq-vec-nonsplat.ll
    M llvm/test/CodeGen/X86/var-permute-128.ll
    M llvm/test/CodeGen/X86/vec-strict-inttofp-128.ll
    M llvm/test/CodeGen/X86/vec-strict-inttofp-256.ll
    M llvm/test/CodeGen/X86/vec_cast2.ll
    M llvm/test/CodeGen/X86/vec_int_to_fp.ll
    M llvm/test/CodeGen/X86/vec_saddo.ll
    M llvm/test/CodeGen/X86/vec_smulo.ll
    M llvm/test/CodeGen/X86/vec_ssubo.ll
    M llvm/test/CodeGen/X86/vec_uaddo.ll
    M llvm/test/CodeGen/X86/vec_umulo.ll
    M llvm/test/CodeGen/X86/vec_usubo.ll
    M llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll
    M llvm/test/CodeGen/X86/vector-fshl-128.ll
    M llvm/test/CodeGen/X86/vector-fshl-256.ll
    M llvm/test/CodeGen/X86/vector-fshl-rot-128.ll
    M llvm/test/CodeGen/X86/vector-fshl-rot-256.ll
    M llvm/test/CodeGen/X86/vector-fshr-128.ll
    M llvm/test/CodeGen/X86/vector-fshr-256.ll
    M llvm/test/CodeGen/X86/vector-fshr-rot-128.ll
    M llvm/test/CodeGen/X86/vector-fshr-rot-256.ll
    M llvm/test/CodeGen/X86/vector-idiv-sdiv-128.ll
    M llvm/test/CodeGen/X86/vector-idiv-sdiv-256.ll
    M llvm/test/CodeGen/X86/vector-idiv-udiv-128.ll
    M llvm/test/CodeGen/X86/vector-pcmp.ll
    M llvm/test/CodeGen/X86/vector-reduce-add.ll
    M llvm/test/CodeGen/X86/vector-reduce-and-cmp.ll
    M llvm/test/CodeGen/X86/vector-reduce-and.ll
    M llvm/test/CodeGen/X86/vector-reduce-mul.ll
    M llvm/test/CodeGen/X86/vector-reduce-or-cmp.ll
    M llvm/test/CodeGen/X86/vector-reduce-or.ll
    M llvm/test/CodeGen/X86/vector-reduce-smax.ll
    M llvm/test/CodeGen/X86/vector-reduce-smin.ll
    M llvm/test/CodeGen/X86/vector-reduce-umax.ll
    M llvm/test/CodeGen/X86/vector-reduce-umin.ll
    M llvm/test/CodeGen/X86/vector-reduce-xor.ll
    M llvm/test/CodeGen/X86/vector-rem.ll
    M llvm/test/CodeGen/X86/vector-rotate-128.ll
    M llvm/test/CodeGen/X86/vector-rotate-256.ll
    M llvm/test/CodeGen/X86/vector-sext.ll
    M llvm/test/CodeGen/X86/vector-shift-ashr-128.ll
    M llvm/test/CodeGen/X86/vector-shift-ashr-256.ll
    M llvm/test/CodeGen/X86/vector-shift-ashr-sub128.ll
    M llvm/test/CodeGen/X86/vector-shift-lshr-128.ll
    M llvm/test/CodeGen/X86/vector-shift-lshr-256.ll
    M llvm/test/CodeGen/X86/vector-shift-lshr-sub128.ll
    M llvm/test/CodeGen/X86/vector-shift-shl-128.ll
    M llvm/test/CodeGen/X86/vector-shift-shl-256.ll
    M llvm/test/CodeGen/X86/vector-shuffle-128-v8.ll
    M llvm/test/CodeGen/X86/vector-shuffle-256-v16.ll
    M llvm/test/CodeGen/X86/vector-shuffle-256-v32.ll
    M llvm/test/CodeGen/X86/vector-shuffle-256-v8.ll
    M llvm/test/CodeGen/X86/vector-shuffle-512-v32.ll
    M llvm/test/CodeGen/X86/vector-shuffle-512-v64.ll
    M llvm/test/CodeGen/X86/vector-shuffle-combining.ll
    M llvm/test/CodeGen/X86/vector-shuffle-sse4a.ll
    M llvm/test/CodeGen/X86/vector-zext.ll
    M llvm/test/CodeGen/X86/vsel-cmp-load.ll
    M llvm/test/CodeGen/X86/vselect-avx.ll
    M llvm/test/CodeGen/X86/vselect-pcmp.ll
    M llvm/test/CodeGen/X86/vshift-4.ll
    M llvm/test/CodeGen/X86/widen_conv-4.ll
    M llvm/test/CodeGen/X86/x86-setcc-int-to-fp-combine.ll
    M llvm/test/CodeGen/X86/xor.ll

  Log Message:
  -----------
  [X86][SSE] lowerV2I64Shuffle - use undef elements in PSHUFD mask widening

If we lower a v2i64 shuffle to PSHUFD, we currently clamp undef elements to 0, (elements 0,1 of the v4i32) which can result in the shuffle referencing more elements of the source vector than expected, affecting later shuffle combines and KnownBits/SimplifyDemanded calls.

By ensuring we widen the undef mask element we allow getV4X86ShuffleImm8 to use inline elements as the default, which are more likely to fold.




More information about the All-commits mailing list