[PATCH] D125527: [CostModel][X86] getScalarizationOverhead - improve extraction costs for > 128-bit vectors

Simon Pilgrim via Phabricator via llvm-commits llvm-commits at lists.llvm.org
Fri May 13 01:53:33 PDT 2022


RKSimon created this revision.
RKSimon added reviewers: pengfei, craig.topper, spatel, lebedev.ri, pgousseau.
Herald added subscribers: StephenFan, arphaman, hiraditya.
Herald added a project: All.
RKSimon requested review of this revision.
Herald added a project: LLVM.

We were using the default getScalarizationOverhead expansion for extraction costs, which adds up all the individual element extraction costs.

This is fine for 128-bit vectors, but for 256/512-bit vectors each element extraction also has to account for extracting the upper 128-bit subvector extraction before it can handle the element. For scalarization costs we only need to extract each demanded subvector once.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D125527

Files:
  llvm/lib/Target/X86/X86TargetTransformInfo.cpp
  llvm/test/Analysis/CostModel/X86/arith-fp.ll
  llvm/test/Analysis/CostModel/X86/fptoi_sat.ll
  llvm/test/Analysis/CostModel/X86/fptosi.ll
  llvm/test/Analysis/CostModel/X86/fptoui.ll
  llvm/test/Analysis/CostModel/X86/gather-i16-with-i8-index.ll
  llvm/test/Analysis/CostModel/X86/gather-i32-with-i8-index.ll
  llvm/test/Analysis/CostModel/X86/gather-i64-with-i8-index.ll
  llvm/test/Analysis/CostModel/X86/gather-i8-with-i8-index.ll
  llvm/test/Analysis/CostModel/X86/interleaved-load-f32-stride-2.ll
  llvm/test/Analysis/CostModel/X86/interleaved-load-f32-stride-3.ll
  llvm/test/Analysis/CostModel/X86/interleaved-load-f32-stride-4.ll
  llvm/test/Analysis/CostModel/X86/interleaved-load-f32-stride-5.ll
  llvm/test/Analysis/CostModel/X86/interleaved-load-f32-stride-6.ll
  llvm/test/Analysis/CostModel/X86/interleaved-load-f32-stride-7.ll
  llvm/test/Analysis/CostModel/X86/interleaved-load-f32-stride-8.ll
  llvm/test/Analysis/CostModel/X86/interleaved-load-f64-stride-2.ll
  llvm/test/Analysis/CostModel/X86/interleaved-load-f64-stride-3.ll
  llvm/test/Analysis/CostModel/X86/interleaved-load-f64-stride-4.ll
  llvm/test/Analysis/CostModel/X86/interleaved-load-f64-stride-5.ll
  llvm/test/Analysis/CostModel/X86/interleaved-load-f64-stride-6.ll
  llvm/test/Analysis/CostModel/X86/interleaved-load-f64-stride-7.ll
  llvm/test/Analysis/CostModel/X86/interleaved-load-f64-stride-8.ll
  llvm/test/Analysis/CostModel/X86/interleaved-load-i16-stride-2.ll
  llvm/test/Analysis/CostModel/X86/interleaved-load-i16-stride-3.ll
  llvm/test/Analysis/CostModel/X86/interleaved-load-i16-stride-4.ll
  llvm/test/Analysis/CostModel/X86/interleaved-load-i16-stride-5.ll
  llvm/test/Analysis/CostModel/X86/interleaved-load-i16-stride-6.ll
  llvm/test/Analysis/CostModel/X86/interleaved-load-i16-stride-7.ll
  llvm/test/Analysis/CostModel/X86/interleaved-load-i16-stride-8.ll
  llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-2-indices-0u.ll
  llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-2.ll
  llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-3-indices-01u.ll
  llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-3-indices-0uu.ll
  llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-3.ll
  llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-4-indices-012u.ll
  llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-4-indices-01uu.ll
  llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-4.ll
  llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-5.ll
  llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-6.ll
  llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-7.ll
  llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-8.ll
  llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-2.ll
  llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-3.ll
  llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-4.ll
  llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-5.ll
  llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-6.ll
  llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-7.ll
  llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-8.ll
  llvm/test/Analysis/CostModel/X86/interleaved-load-i8-stride-2.ll
  llvm/test/Analysis/CostModel/X86/interleaved-load-i8-stride-3.ll
  llvm/test/Analysis/CostModel/X86/interleaved-load-i8-stride-4.ll
  llvm/test/Analysis/CostModel/X86/interleaved-load-i8-stride-5.ll
  llvm/test/Analysis/CostModel/X86/interleaved-load-i8-stride-6.ll
  llvm/test/Analysis/CostModel/X86/interleaved-load-i8-stride-7.ll
  llvm/test/Analysis/CostModel/X86/interleaved-load-i8-stride-8.ll
  llvm/test/Analysis/CostModel/X86/interleaved-store-f32-stride-2.ll
  llvm/test/Analysis/CostModel/X86/interleaved-store-f32-stride-3.ll
  llvm/test/Analysis/CostModel/X86/interleaved-store-f32-stride-4.ll
  llvm/test/Analysis/CostModel/X86/interleaved-store-f32-stride-5.ll
  llvm/test/Analysis/CostModel/X86/interleaved-store-f32-stride-6.ll
  llvm/test/Analysis/CostModel/X86/interleaved-store-f32-stride-7.ll
  llvm/test/Analysis/CostModel/X86/interleaved-store-f32-stride-8.ll
  llvm/test/Analysis/CostModel/X86/interleaved-store-f64-stride-2.ll
  llvm/test/Analysis/CostModel/X86/interleaved-store-f64-stride-3.ll
  llvm/test/Analysis/CostModel/X86/interleaved-store-f64-stride-4.ll
  llvm/test/Analysis/CostModel/X86/interleaved-store-f64-stride-5.ll
  llvm/test/Analysis/CostModel/X86/interleaved-store-f64-stride-6.ll
  llvm/test/Analysis/CostModel/X86/interleaved-store-f64-stride-7.ll
  llvm/test/Analysis/CostModel/X86/interleaved-store-f64-stride-8.ll
  llvm/test/Analysis/CostModel/X86/interleaved-store-i16-stride-2.ll
  llvm/test/Analysis/CostModel/X86/interleaved-store-i16-stride-3.ll
  llvm/test/Analysis/CostModel/X86/interleaved-store-i16-stride-4.ll
  llvm/test/Analysis/CostModel/X86/interleaved-store-i16-stride-5.ll
  llvm/test/Analysis/CostModel/X86/interleaved-store-i16-stride-6.ll
  llvm/test/Analysis/CostModel/X86/interleaved-store-i16-stride-7.ll
  llvm/test/Analysis/CostModel/X86/interleaved-store-i16-stride-8.ll
  llvm/test/Analysis/CostModel/X86/interleaved-store-i32-stride-2.ll
  llvm/test/Analysis/CostModel/X86/interleaved-store-i32-stride-3.ll
  llvm/test/Analysis/CostModel/X86/interleaved-store-i32-stride-4.ll
  llvm/test/Analysis/CostModel/X86/interleaved-store-i32-stride-5.ll
  llvm/test/Analysis/CostModel/X86/interleaved-store-i32-stride-6.ll
  llvm/test/Analysis/CostModel/X86/interleaved-store-i32-stride-7.ll
  llvm/test/Analysis/CostModel/X86/interleaved-store-i32-stride-8.ll
  llvm/test/Analysis/CostModel/X86/interleaved-store-i64-stride-2.ll
  llvm/test/Analysis/CostModel/X86/interleaved-store-i64-stride-3.ll
  llvm/test/Analysis/CostModel/X86/interleaved-store-i64-stride-4.ll
  llvm/test/Analysis/CostModel/X86/interleaved-store-i64-stride-5.ll
  llvm/test/Analysis/CostModel/X86/interleaved-store-i64-stride-6.ll
  llvm/test/Analysis/CostModel/X86/interleaved-store-i64-stride-7.ll
  llvm/test/Analysis/CostModel/X86/interleaved-store-i64-stride-8.ll
  llvm/test/Analysis/CostModel/X86/interleaved-store-i8-stride-2.ll
  llvm/test/Analysis/CostModel/X86/interleaved-store-i8-stride-3.ll
  llvm/test/Analysis/CostModel/X86/interleaved-store-i8-stride-4.ll
  llvm/test/Analysis/CostModel/X86/interleaved-store-i8-stride-5.ll
  llvm/test/Analysis/CostModel/X86/interleaved-store-i8-stride-6.ll
  llvm/test/Analysis/CostModel/X86/interleaved-store-i8-stride-7.ll
  llvm/test/Analysis/CostModel/X86/interleaved-store-i8-stride-8.ll
  llvm/test/Analysis/CostModel/X86/masked-gather-i32-with-i8-index.ll
  llvm/test/Analysis/CostModel/X86/masked-gather-i64-with-i8-index.ll
  llvm/test/Analysis/CostModel/X86/masked-interleaved-load-i16.ll
  llvm/test/Analysis/CostModel/X86/masked-interleaved-store-i16.ll
  llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost-inseltpoison.ll
  llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost.ll
  llvm/test/Analysis/CostModel/X86/masked-scatter-i32-with-i8-index.ll
  llvm/test/Analysis/CostModel/X86/masked-scatter-i64-with-i8-index.ll
  llvm/test/Analysis/CostModel/X86/masked-store-i16.ll
  llvm/test/Analysis/CostModel/X86/masked-store-i8.ll
  llvm/test/Analysis/CostModel/X86/reduce-fadd.ll
  llvm/test/Analysis/CostModel/X86/reduce-fmul.ll
  llvm/test/Analysis/CostModel/X86/scatter-i16-with-i8-index.ll
  llvm/test/Analysis/CostModel/X86/scatter-i32-with-i8-index.ll
  llvm/test/Analysis/CostModel/X86/scatter-i64-with-i8-index.ll
  llvm/test/Analysis/CostModel/X86/scatter-i8-with-i8-index.ll
  llvm/test/Analysis/CostModel/X86/shuffle-replication-i16.ll
  llvm/test/Analysis/CostModel/X86/shuffle-replication-i32.ll
  llvm/test/Analysis/CostModel/X86/shuffle-replication-i64.ll
  llvm/test/Analysis/CostModel/X86/shuffle-replication-i8.ll
  llvm/test/Analysis/CostModel/X86/sitofp.ll
  llvm/test/Analysis/CostModel/X86/trunc.ll



More information about the llvm-commits mailing list