[all-commits] [llvm/llvm-project] 6c8026: [CostModel][X86] getScalarizationOverhead - improv...

Simon Pilgrim via All-commits all-commits at lists.llvm.org
Tue May 24 07:18:30 PDT 2022


  Branch: refs/heads/main
  Home:   https://github.com/llvm/llvm-project
  Commit: 6c80267d0ff445c0c47c6ddb283da5a8bc4feb64
      https://github.com/llvm/llvm-project/commit/6c80267d0ff445c0c47c6ddb283da5a8bc4feb64
  Author: Simon Pilgrim <llvm-dev at redking.me.uk>
  Date:   2022-05-24 (Tue, 24 May 2022)

  Changed paths:
    M llvm/lib/Target/X86/X86TargetTransformInfo.cpp
    M llvm/test/Analysis/CostModel/X86/arith-fp.ll
    M llvm/test/Analysis/CostModel/X86/fptoi_sat.ll
    M llvm/test/Analysis/CostModel/X86/fptosi.ll
    M llvm/test/Analysis/CostModel/X86/fptoui.ll
    M llvm/test/Analysis/CostModel/X86/gather-i16-with-i8-index.ll
    M llvm/test/Analysis/CostModel/X86/gather-i32-with-i8-index.ll
    M llvm/test/Analysis/CostModel/X86/gather-i64-with-i8-index.ll
    M llvm/test/Analysis/CostModel/X86/gather-i8-with-i8-index.ll
    M llvm/test/Analysis/CostModel/X86/interleaved-load-f32-stride-2.ll
    M llvm/test/Analysis/CostModel/X86/interleaved-load-f32-stride-3.ll
    M llvm/test/Analysis/CostModel/X86/interleaved-load-f32-stride-4.ll
    M llvm/test/Analysis/CostModel/X86/interleaved-load-f32-stride-5.ll
    M llvm/test/Analysis/CostModel/X86/interleaved-load-f32-stride-6.ll
    M llvm/test/Analysis/CostModel/X86/interleaved-load-f32-stride-7.ll
    M llvm/test/Analysis/CostModel/X86/interleaved-load-f32-stride-8.ll
    M llvm/test/Analysis/CostModel/X86/interleaved-load-f64-stride-2.ll
    M llvm/test/Analysis/CostModel/X86/interleaved-load-f64-stride-3.ll
    M llvm/test/Analysis/CostModel/X86/interleaved-load-f64-stride-4.ll
    M llvm/test/Analysis/CostModel/X86/interleaved-load-f64-stride-5.ll
    M llvm/test/Analysis/CostModel/X86/interleaved-load-f64-stride-6.ll
    M llvm/test/Analysis/CostModel/X86/interleaved-load-f64-stride-7.ll
    M llvm/test/Analysis/CostModel/X86/interleaved-load-f64-stride-8.ll
    M llvm/test/Analysis/CostModel/X86/interleaved-load-i16-stride-2.ll
    M llvm/test/Analysis/CostModel/X86/interleaved-load-i16-stride-3.ll
    M llvm/test/Analysis/CostModel/X86/interleaved-load-i16-stride-4.ll
    M llvm/test/Analysis/CostModel/X86/interleaved-load-i16-stride-5.ll
    M llvm/test/Analysis/CostModel/X86/interleaved-load-i16-stride-6.ll
    M llvm/test/Analysis/CostModel/X86/interleaved-load-i16-stride-7.ll
    M llvm/test/Analysis/CostModel/X86/interleaved-load-i16-stride-8.ll
    M llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-2-indices-0u.ll
    M llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-2.ll
    M llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-3-indices-01u.ll
    M llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-3-indices-0uu.ll
    M llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-3.ll
    M llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-4-indices-012u.ll
    M llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-4-indices-01uu.ll
    M llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-4.ll
    M llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-5.ll
    M llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-6.ll
    M llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-7.ll
    M llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-8.ll
    M llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-2.ll
    M llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-3.ll
    M llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-4.ll
    M llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-5.ll
    M llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-6.ll
    M llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-7.ll
    M llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-8.ll
    M llvm/test/Analysis/CostModel/X86/interleaved-load-i8-stride-2.ll
    M llvm/test/Analysis/CostModel/X86/interleaved-load-i8-stride-3.ll
    M llvm/test/Analysis/CostModel/X86/interleaved-load-i8-stride-4.ll
    M llvm/test/Analysis/CostModel/X86/interleaved-load-i8-stride-5.ll
    M llvm/test/Analysis/CostModel/X86/interleaved-load-i8-stride-6.ll
    M llvm/test/Analysis/CostModel/X86/interleaved-load-i8-stride-7.ll
    M llvm/test/Analysis/CostModel/X86/interleaved-load-i8-stride-8.ll
    M llvm/test/Analysis/CostModel/X86/interleaved-store-f32-stride-2.ll
    M llvm/test/Analysis/CostModel/X86/interleaved-store-f32-stride-3.ll
    M llvm/test/Analysis/CostModel/X86/interleaved-store-f32-stride-4.ll
    M llvm/test/Analysis/CostModel/X86/interleaved-store-f32-stride-5.ll
    M llvm/test/Analysis/CostModel/X86/interleaved-store-f32-stride-6.ll
    M llvm/test/Analysis/CostModel/X86/interleaved-store-f32-stride-7.ll
    M llvm/test/Analysis/CostModel/X86/interleaved-store-f32-stride-8.ll
    M llvm/test/Analysis/CostModel/X86/interleaved-store-f64-stride-2.ll
    M llvm/test/Analysis/CostModel/X86/interleaved-store-f64-stride-3.ll
    M llvm/test/Analysis/CostModel/X86/interleaved-store-f64-stride-4.ll
    M llvm/test/Analysis/CostModel/X86/interleaved-store-f64-stride-5.ll
    M llvm/test/Analysis/CostModel/X86/interleaved-store-f64-stride-6.ll
    M llvm/test/Analysis/CostModel/X86/interleaved-store-f64-stride-7.ll
    M llvm/test/Analysis/CostModel/X86/interleaved-store-f64-stride-8.ll
    M llvm/test/Analysis/CostModel/X86/interleaved-store-i16-stride-2.ll
    M llvm/test/Analysis/CostModel/X86/interleaved-store-i16-stride-3.ll
    M llvm/test/Analysis/CostModel/X86/interleaved-store-i16-stride-4.ll
    M llvm/test/Analysis/CostModel/X86/interleaved-store-i16-stride-5.ll
    M llvm/test/Analysis/CostModel/X86/interleaved-store-i16-stride-6.ll
    M llvm/test/Analysis/CostModel/X86/interleaved-store-i16-stride-7.ll
    M llvm/test/Analysis/CostModel/X86/interleaved-store-i16-stride-8.ll
    M llvm/test/Analysis/CostModel/X86/interleaved-store-i32-stride-2.ll
    M llvm/test/Analysis/CostModel/X86/interleaved-store-i32-stride-3.ll
    M llvm/test/Analysis/CostModel/X86/interleaved-store-i32-stride-4.ll
    M llvm/test/Analysis/CostModel/X86/interleaved-store-i32-stride-5.ll
    M llvm/test/Analysis/CostModel/X86/interleaved-store-i32-stride-6.ll
    M llvm/test/Analysis/CostModel/X86/interleaved-store-i32-stride-7.ll
    M llvm/test/Analysis/CostModel/X86/interleaved-store-i32-stride-8.ll
    M llvm/test/Analysis/CostModel/X86/interleaved-store-i64-stride-2.ll
    M llvm/test/Analysis/CostModel/X86/interleaved-store-i64-stride-3.ll
    M llvm/test/Analysis/CostModel/X86/interleaved-store-i64-stride-4.ll
    M llvm/test/Analysis/CostModel/X86/interleaved-store-i64-stride-5.ll
    M llvm/test/Analysis/CostModel/X86/interleaved-store-i64-stride-6.ll
    M llvm/test/Analysis/CostModel/X86/interleaved-store-i64-stride-7.ll
    M llvm/test/Analysis/CostModel/X86/interleaved-store-i64-stride-8.ll
    M llvm/test/Analysis/CostModel/X86/interleaved-store-i8-stride-2.ll
    M llvm/test/Analysis/CostModel/X86/interleaved-store-i8-stride-3.ll
    M llvm/test/Analysis/CostModel/X86/interleaved-store-i8-stride-4.ll
    M llvm/test/Analysis/CostModel/X86/interleaved-store-i8-stride-5.ll
    M llvm/test/Analysis/CostModel/X86/interleaved-store-i8-stride-6.ll
    M llvm/test/Analysis/CostModel/X86/interleaved-store-i8-stride-7.ll
    M llvm/test/Analysis/CostModel/X86/interleaved-store-i8-stride-8.ll
    M llvm/test/Analysis/CostModel/X86/masked-gather-i32-with-i8-index.ll
    M llvm/test/Analysis/CostModel/X86/masked-gather-i64-with-i8-index.ll
    M llvm/test/Analysis/CostModel/X86/masked-interleaved-load-i16.ll
    M llvm/test/Analysis/CostModel/X86/masked-interleaved-store-i16.ll
    M llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost-inseltpoison.ll
    M llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost.ll
    M llvm/test/Analysis/CostModel/X86/masked-scatter-i32-with-i8-index.ll
    M llvm/test/Analysis/CostModel/X86/masked-scatter-i64-with-i8-index.ll
    M llvm/test/Analysis/CostModel/X86/masked-store-i16.ll
    M llvm/test/Analysis/CostModel/X86/masked-store-i8.ll
    M llvm/test/Analysis/CostModel/X86/reduce-fadd.ll
    M llvm/test/Analysis/CostModel/X86/reduce-fmul.ll
    M llvm/test/Analysis/CostModel/X86/scatter-i16-with-i8-index.ll
    M llvm/test/Analysis/CostModel/X86/scatter-i32-with-i8-index.ll
    M llvm/test/Analysis/CostModel/X86/scatter-i64-with-i8-index.ll
    M llvm/test/Analysis/CostModel/X86/scatter-i8-with-i8-index.ll
    M llvm/test/Analysis/CostModel/X86/shuffle-replication-i16.ll
    M llvm/test/Analysis/CostModel/X86/shuffle-replication-i32.ll
    M llvm/test/Analysis/CostModel/X86/shuffle-replication-i64.ll
    M llvm/test/Analysis/CostModel/X86/shuffle-replication-i8.ll
    M llvm/test/Analysis/CostModel/X86/sitofp.ll
    M llvm/test/Analysis/CostModel/X86/trunc.ll

  Log Message:
  -----------
  [CostModel][X86] getScalarizationOverhead - improve extraction costs for > 128-bit vectors

We were using the default getScalarizationOverhead expansion for extraction costs, which adds up all the individual element extraction costs.

This is fine for 128-bit vectors, but for 256/512-bit vectors each element extraction also has to account for extracting the upper 128-bit subvector extraction before it can handle the element. For scalarization costs we only need to extract each demanded subvector once.

Differential Revision: https://reviews.llvm.org/D125527




More information about the All-commits mailing list