[all-commits] [llvm/llvm-project] da067e: [AMDGPU] Set most sched model resource's BufferSiz...
Austin Kerbow via All-commits
all-commits at lists.llvm.org
Wed Dec 1 22:34:52 PST 2021
Branch: refs/heads/main
Home: https://github.com/llvm/llvm-project
Commit: da067ed569e0525d55c6ec6242f8cd5a73dd9e6f
https://github.com/llvm/llvm-project/commit/da067ed569e0525d55c6ec6242f8cd5a73dd9e6f
Author: Austin Kerbow <Austin.Kerbow at amd.com>
Date: 2021-12-01 (Wed, 01 Dec 2021)
Changed paths:
M llvm/lib/Target/AMDGPU/SISchedule.td
M llvm/test/CodeGen/AMDGPU/GlobalISel/cvt_f32_ubyte.ll
M llvm/test/CodeGen/AMDGPU/GlobalISel/divergent-control-flow.ll
M llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.i128.ll
M llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.i8.ll
M llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.ll
M llvm/test/CodeGen/AMDGPU/GlobalISel/fmed3.ll
M llvm/test/CodeGen/AMDGPU/GlobalISel/fp64-atomics-gfx90a.ll
M llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.i16.ll
M llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.i8.ll
M llvm/test/CodeGen/AMDGPU/GlobalISel/lds-global-non-entry-func.ll
M llvm/test/CodeGen/AMDGPU/GlobalISel/lds-global-value.ll
M llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.atomic.dec.ll
M llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.atomic.inc.ll
M llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.div.fmas.ll
M llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.div.scale.ll
M llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.global.atomic.fadd.ll
M llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.mfma.gfx90a.ll
M llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.mov.dpp.ll
M llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.sbfe.ll
M llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.set.inactive.ll
M llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.trig.preop.ll
M llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ubfe.ll
M llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.update.dpp.ll
M llvm/test/CodeGen/AMDGPU/GlobalISel/load-constant.96.ll
M llvm/test/CodeGen/AMDGPU/GlobalISel/load-local.96.ll
M llvm/test/CodeGen/AMDGPU/GlobalISel/load-unaligned.ll
M llvm/test/CodeGen/AMDGPU/GlobalISel/localizer.ll
M llvm/test/CodeGen/AMDGPU/GlobalISel/non-entry-alloca.ll
M llvm/test/CodeGen/AMDGPU/GlobalISel/sdivrem.ll
M llvm/test/CodeGen/AMDGPU/GlobalISel/store-local.128.ll
M llvm/test/CodeGen/AMDGPU/GlobalISel/store-local.96.ll
M llvm/test/CodeGen/AMDGPU/GlobalISel/udivrem.ll
M llvm/test/CodeGen/AMDGPU/abi-attribute-hints-undefined-behavior.ll
M llvm/test/CodeGen/AMDGPU/add.v2i16.ll
M llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-idiv.ll
M llvm/test/CodeGen/AMDGPU/and.ll
M llvm/test/CodeGen/AMDGPU/anyext.ll
M llvm/test/CodeGen/AMDGPU/atomic_optimizations_global_pointer.ll
M llvm/test/CodeGen/AMDGPU/bitreverse.ll
M llvm/test/CodeGen/AMDGPU/branch-relaxation-inst-size-gfx10.ll
M llvm/test/CodeGen/AMDGPU/branch-relaxation.ll
M llvm/test/CodeGen/AMDGPU/bswap.ll
M llvm/test/CodeGen/AMDGPU/build_vector.ll
M llvm/test/CodeGen/AMDGPU/call-argument-types.ll
M llvm/test/CodeGen/AMDGPU/callee-special-input-vgprs.ll
M llvm/test/CodeGen/AMDGPU/chain-hi-to-lo.ll
M llvm/test/CodeGen/AMDGPU/code-object-v3.ll
M llvm/test/CodeGen/AMDGPU/commute-shifts.ll
M llvm/test/CodeGen/AMDGPU/copy-illegal-type.ll
M llvm/test/CodeGen/AMDGPU/copy_to_scc.ll
M llvm/test/CodeGen/AMDGPU/ctlz.ll
M llvm/test/CodeGen/AMDGPU/ctlz_zero_undef.ll
M llvm/test/CodeGen/AMDGPU/ctpop16.ll
M llvm/test/CodeGen/AMDGPU/ctpop64.ll
M llvm/test/CodeGen/AMDGPU/cttz.ll
M llvm/test/CodeGen/AMDGPU/cttz_zero_undef.ll
M llvm/test/CodeGen/AMDGPU/cvt_f32_ubyte.ll
M llvm/test/CodeGen/AMDGPU/dag-divergence-atomic.ll
M llvm/test/CodeGen/AMDGPU/dbg-value-ends-sched-region.mir
M llvm/test/CodeGen/AMDGPU/ds-alignment.ll
M llvm/test/CodeGen/AMDGPU/ds_read2.ll
M llvm/test/CodeGen/AMDGPU/ds_write2.ll
M llvm/test/CodeGen/AMDGPU/extract_vector_elt-i16.ll
M llvm/test/CodeGen/AMDGPU/extract_vector_elt-i8.ll
M llvm/test/CodeGen/AMDGPU/fabs.ll
M llvm/test/CodeGen/AMDGPU/fast-unaligned-load-store.global.ll
M llvm/test/CodeGen/AMDGPU/fast-unaligned-load-store.private.ll
M llvm/test/CodeGen/AMDGPU/fmax_legacy.f64.ll
M llvm/test/CodeGen/AMDGPU/fmin_legacy.f64.ll
M llvm/test/CodeGen/AMDGPU/fminnum.f64.ll
M llvm/test/CodeGen/AMDGPU/fp-min-max-atomics.ll
M llvm/test/CodeGen/AMDGPU/fp64-atomics-gfx90a.ll
M llvm/test/CodeGen/AMDGPU/fp_to_sint.ll
M llvm/test/CodeGen/AMDGPU/fp_to_uint.ll
M llvm/test/CodeGen/AMDGPU/fptosi.f16.ll
M llvm/test/CodeGen/AMDGPU/fptoui.f16.ll
M llvm/test/CodeGen/AMDGPU/frame-index-elimination.ll
M llvm/test/CodeGen/AMDGPU/fshl.ll
M llvm/test/CodeGen/AMDGPU/fshr.ll
M llvm/test/CodeGen/AMDGPU/fused-bitlogic.ll
M llvm/test/CodeGen/AMDGPU/gfx-callable-argument-types.ll
M llvm/test/CodeGen/AMDGPU/half.ll
M llvm/test/CodeGen/AMDGPU/idot2.ll
M llvm/test/CodeGen/AMDGPU/idot4s.ll
M llvm/test/CodeGen/AMDGPU/idot4u.ll
M llvm/test/CodeGen/AMDGPU/idot8s.ll
M llvm/test/CodeGen/AMDGPU/idot8u.ll
M llvm/test/CodeGen/AMDGPU/imm.ll
M llvm/test/CodeGen/AMDGPU/imm16.ll
M llvm/test/CodeGen/AMDGPU/immv216.ll
M llvm/test/CodeGen/AMDGPU/insert-subvector-unused-scratch.ll
M llvm/test/CodeGen/AMDGPU/insert_vector_dynelt.ll
M llvm/test/CodeGen/AMDGPU/insert_vector_elt.ll
M llvm/test/CodeGen/AMDGPU/insert_vector_elt.v2i16.ll
M llvm/test/CodeGen/AMDGPU/kernel-args.ll
M llvm/test/CodeGen/AMDGPU/kernel-argument-dag-lowering.ll
M llvm/test/CodeGen/AMDGPU/lds-atomic-fmin-fmax.ll
M llvm/test/CodeGen/AMDGPU/llvm.amdgcn.class.f16.ll
M llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.pkrtz.ll
M llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fmad.ftz.ll
M llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.dim.ll
M llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.dim.ll
M llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.tbuffer.load.ll
M llvm/test/CodeGen/AMDGPU/llvm.amdgcn.set.inactive.ll
M llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.tbuffer.load.ll
M llvm/test/CodeGen/AMDGPU/llvm.amdgcn.tbuffer.load.ll
M llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ubfe.ll
M llvm/test/CodeGen/AMDGPU/llvm.cos.f16.ll
M llvm/test/CodeGen/AMDGPU/llvm.fma.f16.ll
M llvm/test/CodeGen/AMDGPU/llvm.fmuladd.f16.ll
M llvm/test/CodeGen/AMDGPU/llvm.maxnum.f16.ll
M llvm/test/CodeGen/AMDGPU/llvm.minnum.f16.ll
M llvm/test/CodeGen/AMDGPU/llvm.round.f64.ll
M llvm/test/CodeGen/AMDGPU/llvm.sin.f16.ll
M llvm/test/CodeGen/AMDGPU/load-constant-i16.ll
M llvm/test/CodeGen/AMDGPU/load-global-i16.ll
M llvm/test/CodeGen/AMDGPU/load-hi16.ll
M llvm/test/CodeGen/AMDGPU/load-lo16.ll
M llvm/test/CodeGen/AMDGPU/load-local.128.ll
M llvm/test/CodeGen/AMDGPU/load-local.96.ll
M llvm/test/CodeGen/AMDGPU/local-memory.amdgcn.ll
M llvm/test/CodeGen/AMDGPU/lshl64-to-32.ll
M llvm/test/CodeGen/AMDGPU/lshr.v2i16.ll
M llvm/test/CodeGen/AMDGPU/max.i16.ll
M llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-agent.ll
M llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-nontemporal.ll
M llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-singlethread.ll
M llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-system.ll
M llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-volatile.ll
M llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-wavefront.ll
M llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-workgroup.ll
M llvm/test/CodeGen/AMDGPU/memory-legalizer-global-agent.ll
M llvm/test/CodeGen/AMDGPU/memory-legalizer-global-nontemporal.ll
M llvm/test/CodeGen/AMDGPU/memory-legalizer-global-singlethread.ll
M llvm/test/CodeGen/AMDGPU/memory-legalizer-global-system.ll
M llvm/test/CodeGen/AMDGPU/memory-legalizer-global-volatile.ll
M llvm/test/CodeGen/AMDGPU/memory-legalizer-global-wavefront.ll
M llvm/test/CodeGen/AMDGPU/memory-legalizer-global-workgroup.ll
M llvm/test/CodeGen/AMDGPU/memory-legalizer-local-agent.ll
M llvm/test/CodeGen/AMDGPU/memory-legalizer-local-nontemporal.ll
M llvm/test/CodeGen/AMDGPU/memory-legalizer-local-singlethread.ll
M llvm/test/CodeGen/AMDGPU/memory-legalizer-local-system.ll
M llvm/test/CodeGen/AMDGPU/memory-legalizer-local-volatile.ll
M llvm/test/CodeGen/AMDGPU/memory-legalizer-local-wavefront.ll
M llvm/test/CodeGen/AMDGPU/memory-legalizer-local-workgroup.ll
M llvm/test/CodeGen/AMDGPU/memory-legalizer-private-nontemporal.ll
M llvm/test/CodeGen/AMDGPU/memory-legalizer-private-volatile.ll
M llvm/test/CodeGen/AMDGPU/memory_clause.ll
M llvm/test/CodeGen/AMDGPU/min.ll
M llvm/test/CodeGen/AMDGPU/missing-store.ll
M llvm/test/CodeGen/AMDGPU/move-addr64-rsrc-dead-subreg-writes.ll
M llvm/test/CodeGen/AMDGPU/mul_int24.ll
M llvm/test/CodeGen/AMDGPU/mul_uint24-amdgcn.ll
M llvm/test/CodeGen/AMDGPU/operand-spacing.ll
M llvm/test/CodeGen/AMDGPU/packed-op-sel.ll
M llvm/test/CodeGen/AMDGPU/pr51516.mir
M llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm.ll
M llvm/test/CodeGen/AMDGPU/saddo.ll
M llvm/test/CodeGen/AMDGPU/salu-to-valu.ll
M llvm/test/CodeGen/AMDGPU/scalar_to_vector.ll
M llvm/test/CodeGen/AMDGPU/sched-assert-onlydbg-value-empty-region.mir
A llvm/test/CodeGen/AMDGPU/schedule-ilp.mir
M llvm/test/CodeGen/AMDGPU/sdiv.ll
M llvm/test/CodeGen/AMDGPU/sdiv64.ll
M llvm/test/CodeGen/AMDGPU/sdwa-peephole.ll
M llvm/test/CodeGen/AMDGPU/select.f16.ll
M llvm/test/CodeGen/AMDGPU/select64.ll
M llvm/test/CodeGen/AMDGPU/sext-divergence-driven-isel.ll
M llvm/test/CodeGen/AMDGPU/sgpr-control-flow.ll
M llvm/test/CodeGen/AMDGPU/shift-and-i128-ubfe.ll
M llvm/test/CodeGen/AMDGPU/shift-i128.ll
M llvm/test/CodeGen/AMDGPU/shl.ll
M llvm/test/CodeGen/AMDGPU/shl.v2i16.ll
M llvm/test/CodeGen/AMDGPU/shl_add_ptr_global.ll
M llvm/test/CodeGen/AMDGPU/shrink-add-sub-constant.ll
M llvm/test/CodeGen/AMDGPU/si-annotate-cf.ll
M llvm/test/CodeGen/AMDGPU/si-triv-disjoint-mem-access.ll
M llvm/test/CodeGen/AMDGPU/sign_extend.ll
M llvm/test/CodeGen/AMDGPU/sint_to_fp.i64.ll
M llvm/test/CodeGen/AMDGPU/sra.ll
M llvm/test/CodeGen/AMDGPU/srem64.ll
M llvm/test/CodeGen/AMDGPU/srl.ll
M llvm/test/CodeGen/AMDGPU/store-local.128.ll
M llvm/test/CodeGen/AMDGPU/store-local.96.ll
M llvm/test/CodeGen/AMDGPU/store-weird-sizes.ll
M llvm/test/CodeGen/AMDGPU/sub.ll
M llvm/test/CodeGen/AMDGPU/sub.v2i16.ll
M llvm/test/CodeGen/AMDGPU/udiv.ll
M llvm/test/CodeGen/AMDGPU/udiv64.ll
M llvm/test/CodeGen/AMDGPU/udivrem.ll
M llvm/test/CodeGen/AMDGPU/uint_to_fp.i64.ll
M llvm/test/CodeGen/AMDGPU/uniform-cfg.ll
M llvm/test/CodeGen/AMDGPU/urem64.ll
M llvm/test/CodeGen/AMDGPU/use-sgpr-multiple-times.ll
M llvm/test/CodeGen/AMDGPU/v_madak_f16.ll
M llvm/test/CodeGen/AMDGPU/vector-extract-insert.ll
M llvm/test/CodeGen/AMDGPU/vector_shuffle.packed.ll
M llvm/test/CodeGen/AMDGPU/widen-smrd-loads.ll
M llvm/test/CodeGen/AMDGPU/wwm-reserved-spill.ll
Log Message:
-----------
[AMDGPU] Set most sched model resource's BufferSize to one
Using a BufferSize of one for memory ProcResources will result in better
ILP since it more accurately models the dependencies between memory ops
and their consumers on an in-order processor. After this change, the
scheduler will treat the data edges from loads as blocking so that
stalls are guaranteed when waiting for data to be retreaved from memory.
Since we don't actually track waitcnt here, this should do a better job
at modeling their behavior.
Practically, this means that the scheduler will trigger the 'STALL'
heuristic more often.
This type of change needs to be evaluated experimentally. Preliminary
results are positive.
Fixes: SWDEV-282962
Reviewed By: rampitec
Differential Revision: https://reviews.llvm.org/D114777
More information about the All-commits
mailing list