[PATCH] D133902: X86: Stop assigning register costs for longer encodings

Matthias Braun via Phabricator via llvm-commits llvm-commits at lists.llvm.org
Wed Sep 14 15:32:56 PDT 2022


MatzeB created this revision.
MatzeB added reviewers: RKSimon, craig.topper, qcolombet, mtrofin, arsenm, reames, wmi.
Herald added subscribers: lebedev.ri, armkevincheng, eric-k256, StephenFan, modimo, wenlei, pengfei, asbirlea, arphaman, javed.absar, mcrosier.
Herald added a reviewer: sjarus.
Herald added a reviewer: lebedev.ri.
Herald added a project: All.
MatzeB requested review of this revision.
Herald added subscribers: llvm-commits, wdng.
Herald added a project: LLVM.

This stops reporting CostPerUse 1 for R8 <https://reviews.llvm.org/source/libcxx/>-R15 and XMM8-XMM31. This was
previously done because instruction encoding require a REX prefix when
using them resulting in longer instruction encodings. I found that this
regresses the quality of the register allocation as the costs impose an
ordering on eviction candidates. I also feel that there is a bit of an
impedance mismatch as the actual costs occure when encoding instructions
using those registers, but the order of VReg assignments is not
primarily ordered by number of Defs+Uses.

I did extensive measurements with the llvm-test-suite wiht SPEC2006 +
SPEC2017 included, internal services showed similar patterns. Generally
there are a log of improvements but also a lot of regression. But on
average the allocation quality seems to improve at a small code size
regression.

Results for measuring static and dynamic instruction counts:

O3 + ThinLTO + Instr-PGO
------------------------

Dynamic Counts (scaled by execution frequency) / Optimization Remarks:

  Spills+FoldedSpills   -5.6%
  Reloads+FoldedReloads -4.2%
  Copies                -0.1%

Static / LLVM Statistics:

  regalloc.NumSpills    mean -1.6%, geomean -2.8%
  regalloc.NumReloads   mean -1.7%, geomean -3.1%
  size..text            mean +0.4%, geomean +0.4%

O3
--

Static / LLVM Statistics:

  mean -2.2%, geomean -3.1%) regalloc.NumSpills
  mean -2.6%, geomean -3.9%) regalloc.NumReloads
  mean +0.6%, geomean +0.6%) size..text

Os
--

Static / LLVM Statistics:

  regalloc.NumSpills   mean -3.0%
  regalloc.NumReloads  mean -3.3%
  size..text           mean +0.3%, geomean +0.3%


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D133902

Files:
  llvm/test/CodeGen/X86/2007-01-13-StackPtrIndex.ll
  llvm/test/CodeGen/X86/2007-08-09-IllegalX86-64Asm.ll
  llvm/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll
  llvm/test/CodeGen/X86/2009-03-23-MultiUseSched.ll
  llvm/test/CodeGen/X86/2009-05-30-ISelBug.ll
  llvm/test/CodeGen/X86/AMX/amx-across-func.ll
  llvm/test/CodeGen/X86/AMX/amx-greedy-ra-spill-shape.ll
  llvm/test/CodeGen/X86/AMX/amx-intrinsic-chain.ll
  llvm/test/CodeGen/X86/AMX/amx-ldtilecfg-insert.ll
  llvm/test/CodeGen/X86/AMX/amx-lower-tile-copy.ll
  llvm/test/CodeGen/X86/AMX/amx-spill-merge.ll
  llvm/test/CodeGen/X86/AMX/amx-spill.ll
  llvm/test/CodeGen/X86/MergeConsecutiveStores.ll
  llvm/test/CodeGen/X86/StackColoring.ll
  llvm/test/CodeGen/X86/add-and-not.ll
  llvm/test/CodeGen/X86/addcarry.ll
  llvm/test/CodeGen/X86/avg.ll
  llvm/test/CodeGen/X86/avoid-sfb.ll
  llvm/test/CodeGen/X86/avx-intrinsics-fast-isel.ll
  llvm/test/CodeGen/X86/avx-load-store.ll
  llvm/test/CodeGen/X86/avx512-calling-conv.ll
  llvm/test/CodeGen/X86/avx512-regcall-NoMask.ll
  llvm/test/CodeGen/X86/avx512bwvl-intrinsics-upgrade.ll
  llvm/test/CodeGen/X86/bfloat.ll
  llvm/test/CodeGen/X86/bitcast-and-setcc-512.ll
  llvm/test/CodeGen/X86/bitreverse.ll
  llvm/test/CodeGen/X86/break-false-dep.ll
  llvm/test/CodeGen/X86/bswap.ll
  llvm/test/CodeGen/X86/callbr-asm-blockplacement.ll
  llvm/test/CodeGen/X86/callbr-asm-branch-folding.ll
  llvm/test/CodeGen/X86/callbr-asm-phi-placement.ll
  llvm/test/CodeGen/X86/cgp-usubo.ll
  llvm/test/CodeGen/X86/clear_upper_vector_element_bits.ll
  llvm/test/CodeGen/X86/combine-pmuldq.ll
  llvm/test/CodeGen/X86/combine-sdiv.ll
  llvm/test/CodeGen/X86/commute-fcmp.ll
  llvm/test/CodeGen/X86/compact-unwind.ll
  llvm/test/CodeGen/X86/conditional-tailcall.ll
  llvm/test/CodeGen/X86/copy-eflags.ll
  llvm/test/CodeGen/X86/ctpop-combine.ll
  llvm/test/CodeGen/X86/dag-update-nodetomatch.ll
  llvm/test/CodeGen/X86/dagcombine-cse.ll
  llvm/test/CodeGen/X86/div-rem-pair-recomposition-signed.ll
  llvm/test/CodeGen/X86/div-rem-pair-recomposition-unsigned.ll
  llvm/test/CodeGen/X86/divmod128.ll
  llvm/test/CodeGen/X86/extract-bits.ll
  llvm/test/CodeGen/X86/flt-rounds.ll
  llvm/test/CodeGen/X86/fma-commute-loop.ll
  llvm/test/CodeGen/X86/fmaddsub-combine.ll
  llvm/test/CodeGen/X86/fmaxnum.ll
  llvm/test/CodeGen/X86/fminnum.ll
  llvm/test/CodeGen/X86/fp-stack-2results.ll
  llvm/test/CodeGen/X86/fp128-libcalls-strict.ll
  llvm/test/CodeGen/X86/fp128-select.ll
  llvm/test/CodeGen/X86/fpclamptosat_vec.ll
  llvm/test/CodeGen/X86/fptosi-sat-vector-128.ll
  llvm/test/CodeGen/X86/fptoui-sat-vector-128.ll
  llvm/test/CodeGen/X86/gather-addresses.ll
  llvm/test/CodeGen/X86/h-registers-1.ll
  llvm/test/CodeGen/X86/haddsub-2.ll
  llvm/test/CodeGen/X86/haddsub-4.ll
  llvm/test/CodeGen/X86/hoist-invariant-load.ll
  llvm/test/CodeGen/X86/i128-mul.ll
  llvm/test/CodeGen/X86/load-local-v3i1.ll
  llvm/test/CodeGen/X86/lrshrink.ll
  llvm/test/CodeGen/X86/lsr-loop-exit-cond.ll
  llvm/test/CodeGen/X86/lzcnt-zext-cmp.ll
  llvm/test/CodeGen/X86/machine-combiner-int-vec.ll
  llvm/test/CodeGen/X86/machine-cp.ll
  llvm/test/CodeGen/X86/madd.ll
  llvm/test/CodeGen/X86/masked-iv-unsafe.ll
  llvm/test/CodeGen/X86/masked_compressstore.ll
  llvm/test/CodeGen/X86/masked_expandload.ll
  llvm/test/CodeGen/X86/masked_gather.ll
  llvm/test/CodeGen/X86/masked_load.ll
  llvm/test/CodeGen/X86/masked_store_trunc_ssat.ll
  llvm/test/CodeGen/X86/masked_store_trunc_usat.ll
  llvm/test/CodeGen/X86/midpoint-int-vec-256.ll
  llvm/test/CodeGen/X86/misched-matmul.ll
  llvm/test/CodeGen/X86/mmx-arith.ll
  llvm/test/CodeGen/X86/mul-constant-result.ll
  llvm/test/CodeGen/X86/mul-i1024.ll
  llvm/test/CodeGen/X86/mul-i256.ll
  llvm/test/CodeGen/X86/mul-i512.ll
  llvm/test/CodeGen/X86/muloti.ll
  llvm/test/CodeGen/X86/musttail-varargs.ll
  llvm/test/CodeGen/X86/nontemporal-loads.ll
  llvm/test/CodeGen/X86/oddshuffles.ll
  llvm/test/CodeGen/X86/or-address.ll
  llvm/test/CodeGen/X86/paddus.ll
  llvm/test/CodeGen/X86/pmul.ll
  llvm/test/CodeGen/X86/pmulh.ll
  llvm/test/CodeGen/X86/popcnt.ll
  llvm/test/CodeGen/X86/pr18344.ll
  llvm/test/CodeGen/X86/pr21792.ll
  llvm/test/CodeGen/X86/pr23603.ll
  llvm/test/CodeGen/X86/pr29112.ll
  llvm/test/CodeGen/X86/pr32329.ll
  llvm/test/CodeGen/X86/pr35316.ll
  llvm/test/CodeGen/X86/pr38185.ll
  llvm/test/CodeGen/X86/pr38217.ll
  llvm/test/CodeGen/X86/pr43820.ll
  llvm/test/CodeGen/X86/pr45563-2.ll
  llvm/test/CodeGen/X86/pr45563.ll
  llvm/test/CodeGen/X86/pr45995.ll
  llvm/test/CodeGen/X86/pr46877.ll
  llvm/test/CodeGen/X86/pr47299.ll
  llvm/test/CodeGen/X86/pr47857.ll
  llvm/test/CodeGen/X86/pr53990-incorrect-machine-sink.ll
  llvm/test/CodeGen/X86/promote-cmp.ll
  llvm/test/CodeGen/X86/psubus.ll
  llvm/test/CodeGen/X86/ragreedy-hoist-spill.ll
  llvm/test/CodeGen/X86/reverse_branches.ll
  llvm/test/CodeGen/X86/sad.ll
  llvm/test/CodeGen/X86/sadd_sat_vec.ll
  llvm/test/CodeGen/X86/sbb-false-dep.ll
  llvm/test/CodeGen/X86/scalar_widen_div.ll
  llvm/test/CodeGen/X86/scheduler-backtracking.ll
  llvm/test/CodeGen/X86/sdiv_fix.ll
  llvm/test/CodeGen/X86/sdiv_fix_sat.ll
  llvm/test/CodeGen/X86/setcc-wide-types.ll
  llvm/test/CodeGen/X86/shift-i128.ll
  llvm/test/CodeGen/X86/shrink_vmul.ll
  llvm/test/CodeGen/X86/smul-with-overflow.ll
  llvm/test/CodeGen/X86/smulo-128-legalisation-lowering.ll
  llvm/test/CodeGen/X86/speculative-load-hardening-call-and-ret.ll
  llvm/test/CodeGen/X86/speculative-load-hardening.ll
  llvm/test/CodeGen/X86/srem-seteq-vec-nonsplat.ll
  llvm/test/CodeGen/X86/sse-intel-ocl.ll
  llvm/test/CodeGen/X86/sse-regcall.ll
  llvm/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll
  llvm/test/CodeGen/X86/sshl_sat.ll
  llvm/test/CodeGen/X86/ssub_sat_vec.ll
  llvm/test/CodeGen/X86/statepoint-invoke-ra-enter-at-end.mir
  llvm/test/CodeGen/X86/statepoint-invoke-ra-inline-spiller.mir
  llvm/test/CodeGen/X86/statepoint-invoke-ra-remove-back-copies.mir
  llvm/test/CodeGen/X86/statepoint-live-in-remat.ll
  llvm/test/CodeGen/X86/statepoint-live-in.ll
  llvm/test/CodeGen/X86/statepoint-ra-no-ls.ll
  llvm/test/CodeGen/X86/statepoint-regs.ll
  llvm/test/CodeGen/X86/statepoint-spill-slot-size-promotion.ll
  llvm/test/CodeGen/X86/statepoint-stack-usage.ll
  llvm/test/CodeGen/X86/statepoint-vreg-details.ll
  llvm/test/CodeGen/X86/statepoint-vreg-invoke.ll
  llvm/test/CodeGen/X86/statepoint-vreg-unlimited-tied-opnds.ll
  llvm/test/CodeGen/X86/statepoint-vreg.ll
  llvm/test/CodeGen/X86/statepoint-vreg.mir
  llvm/test/CodeGen/X86/subcarry.ll
  llvm/test/CodeGen/X86/swifterror.ll
  llvm/test/CodeGen/X86/tail-dup-merge-loop-headers.ll
  llvm/test/CodeGen/X86/tail-opts.ll
  llvm/test/CodeGen/X86/tailcallstack64.ll
  llvm/test/CodeGen/X86/tailccstack64.ll
  llvm/test/CodeGen/X86/twoaddr-lea.ll
  llvm/test/CodeGen/X86/uadd_sat_vec.ll
  llvm/test/CodeGen/X86/udiv_fix_sat.ll
  llvm/test/CodeGen/X86/umul-with-overflow.ll
  llvm/test/CodeGen/X86/unfold-masked-merge-vector-variablemask.ll
  llvm/test/CodeGen/X86/usub_sat_vec.ll
  llvm/test/CodeGen/X86/var-permute-128.ll
  llvm/test/CodeGen/X86/var-permute-512.ll
  llvm/test/CodeGen/X86/vec_int_to_fp.ll
  llvm/test/CodeGen/X86/vec_saddo.ll
  llvm/test/CodeGen/X86/vec_smulo.ll
  llvm/test/CodeGen/X86/vec_ssubo.ll
  llvm/test/CodeGen/X86/vec_uaddo.ll
  llvm/test/CodeGen/X86/vec_umulo.ll
  llvm/test/CodeGen/X86/vec_usubo.ll
  llvm/test/CodeGen/X86/vector-bitreverse.ll
  llvm/test/CodeGen/X86/vector-compare-results.ll
  llvm/test/CodeGen/X86/vector-fshl-128.ll
  llvm/test/CodeGen/X86/vector-fshl-256.ll
  llvm/test/CodeGen/X86/vector-fshl-rot-256.ll
  llvm/test/CodeGen/X86/vector-fshr-128.ll
  llvm/test/CodeGen/X86/vector-fshr-256.ll
  llvm/test/CodeGen/X86/vector-fshr-rot-256.ll
  llvm/test/CodeGen/X86/vector-idiv-sdiv-256.ll
  llvm/test/CodeGen/X86/vector-idiv-udiv-256.ll
  llvm/test/CodeGen/X86/vector-interleave.ll
  llvm/test/CodeGen/X86/vector-interleaved-load-i16-stride-2.ll
  llvm/test/CodeGen/X86/vector-interleaved-load-i16-stride-3.ll
  llvm/test/CodeGen/X86/vector-interleaved-load-i16-stride-4.ll
  llvm/test/CodeGen/X86/vector-interleaved-load-i16-stride-5.ll
  llvm/test/CodeGen/X86/vector-interleaved-load-i16-stride-6.ll
  llvm/test/CodeGen/X86/vector-interleaved-load-i32-stride-2.ll
  llvm/test/CodeGen/X86/vector-interleaved-load-i32-stride-3.ll
  llvm/test/CodeGen/X86/vector-interleaved-load-i32-stride-4.ll
  llvm/test/CodeGen/X86/vector-interleaved-load-i32-stride-6.ll
  llvm/test/CodeGen/X86/vector-interleaved-load-i64-stride-2.ll
  llvm/test/CodeGen/X86/vector-interleaved-load-i64-stride-3.ll
  llvm/test/CodeGen/X86/vector-interleaved-load-i64-stride-4.ll
  llvm/test/CodeGen/X86/vector-interleaved-load-i64-stride-6.ll
  llvm/test/CodeGen/X86/vector-interleaved-load-i8-stride-3.ll
  llvm/test/CodeGen/X86/vector-interleaved-load-i8-stride-4.ll
  llvm/test/CodeGen/X86/vector-interleaved-load-i8-stride-6.ll
  llvm/test/CodeGen/X86/vector-interleaved-store-i16-stride-2.ll
  llvm/test/CodeGen/X86/vector-interleaved-store-i16-stride-3.ll
  llvm/test/CodeGen/X86/vector-interleaved-store-i16-stride-4.ll
  llvm/test/CodeGen/X86/vector-interleaved-store-i16-stride-5.ll
  llvm/test/CodeGen/X86/vector-interleaved-store-i16-stride-6.ll
  llvm/test/CodeGen/X86/vector-interleaved-store-i32-stride-2.ll
  llvm/test/CodeGen/X86/vector-interleaved-store-i32-stride-3.ll
  llvm/test/CodeGen/X86/vector-interleaved-store-i32-stride-4.ll
  llvm/test/CodeGen/X86/vector-interleaved-store-i32-stride-6.ll
  llvm/test/CodeGen/X86/vector-interleaved-store-i64-stride-2.ll
  llvm/test/CodeGen/X86/vector-interleaved-store-i64-stride-3.ll
  llvm/test/CodeGen/X86/vector-interleaved-store-i64-stride-4.ll
  llvm/test/CodeGen/X86/vector-interleaved-store-i64-stride-6.ll
  llvm/test/CodeGen/X86/vector-interleaved-store-i8-stride-3.ll
  llvm/test/CodeGen/X86/vector-interleaved-store-i8-stride-4.ll
  llvm/test/CodeGen/X86/vector-interleaved-store-i8-stride-6.ll
  llvm/test/CodeGen/X86/vector-mulfix-legalize.ll
  llvm/test/CodeGen/X86/vector-reduce-add-sext.ll
  llvm/test/CodeGen/X86/vector-reduce-fmax.ll
  llvm/test/CodeGen/X86/vector-reduce-fmin.ll
  llvm/test/CodeGen/X86/vector-reduce-mul.ll
  llvm/test/CodeGen/X86/vector-reduce-umax.ll
  llvm/test/CodeGen/X86/vector-reduce-umin.ll
  llvm/test/CodeGen/X86/vector-rotate-256.ll
  llvm/test/CodeGen/X86/vector-shift-by-select-loop.ll
  llvm/test/CodeGen/X86/vector-shuffle-v192.ll
  llvm/test/CodeGen/X86/vector-shuffle-variable-128.ll
  llvm/test/CodeGen/X86/vector-shuffle-variable-256.ll
  llvm/test/CodeGen/X86/vector-trunc-math.ll
  llvm/test/CodeGen/X86/vector-trunc-packus.ll
  llvm/test/CodeGen/X86/vector-trunc-ssat.ll
  llvm/test/CodeGen/X86/vector-trunc-usat.ll
  llvm/test/CodeGen/X86/vector-zext.ll
  llvm/test/CodeGen/X86/vp2intersect_multiple_pairs.ll
  llvm/test/CodeGen/X86/vselect-minmax.ll
  llvm/test/CodeGen/X86/vselect-packss.ll
  llvm/test/CodeGen/X86/x86-cmov-converter.ll
  llvm/test/CodeGen/X86/x86-interleaved-access.ll
  llvm/test/CodeGen/X86/znver3-gather.ll



More information about the llvm-commits mailing list