[PATCH] D153537: [AMDGPU] Do not wait for vscnt on function entry and return

Jay Foad via Phabricator via llvm-commits llvm-commits at lists.llvm.org
Thu Jun 22 05:31:19 PDT 2023


foad created this revision.
foad added reviewers: AMDGPU, nhaehnle, mareko, rampitec, kerbowa, stepthomas.
Herald added subscribers: bzcheeseman, StephenFan, wenlei, hiraditya, tpr, dstuttard, yaxunl, jvesely, kzhuravl, arsenm.
Herald added a project: All.
foad requested review of this revision.
Herald added subscribers: llvm-commits, wdng.
Herald added a project: LLVM.

SIInsertWaitcnts inserts waitcnt instructions to resolve data
dependencies. The GFX10+ vscnt (VMEM store count) counter is never used
in this way. It is only used to resolve memory dependencies, and that is
handled by SIMemoryLegalizer. Hence there is no need to conservatively
wait for vscnt to be 0 on function entry and before returns.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D153537

Files:
  llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
  llvm/test/CodeGen/AMDGPU/GlobalISel/add.v2i16.ll
  llvm/test/CodeGen/AMDGPU/GlobalISel/andn2.ll
  llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-asserts.ll
  llvm/test/CodeGen/AMDGPU/GlobalISel/ashr.ll
  llvm/test/CodeGen/AMDGPU/GlobalISel/br-constant-invalid-sgpr-copy.ll
  llvm/test/CodeGen/AMDGPU/GlobalISel/bswap.ll
  llvm/test/CodeGen/AMDGPU/GlobalISel/bug-legalization-artifact-combiner-dead-def.ll
  llvm/test/CodeGen/AMDGPU/GlobalISel/clamp-fmed3-const-combine.ll
  llvm/test/CodeGen/AMDGPU/GlobalISel/clamp-minmax-const-combine.ll
  llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-fma-mul.ll
  llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-mul.ll
  llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-sub-mul.ll
  llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-sub-neg-mul.ll
  llvm/test/CodeGen/AMDGPU/GlobalISel/dynamic-alloca-uniform.ll
  llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.i128.ll
  llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.i16.ll
  llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.i8.ll
  llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.ll
  llvm/test/CodeGen/AMDGPU/GlobalISel/fdiv.f16.ll
  llvm/test/CodeGen/AMDGPU/GlobalISel/fdiv.f32.ll
  llvm/test/CodeGen/AMDGPU/GlobalISel/fdiv.f64.ll
  llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch.ll
  llvm/test/CodeGen/AMDGPU/GlobalISel/fma.ll
  llvm/test/CodeGen/AMDGPU/GlobalISel/fmed3-min-max-const-combine.ll
  llvm/test/CodeGen/AMDGPU/GlobalISel/fmul.v2f16.ll
  llvm/test/CodeGen/AMDGPU/GlobalISel/fpow.ll
  llvm/test/CodeGen/AMDGPU/GlobalISel/fshl.ll
  llvm/test/CodeGen/AMDGPU/GlobalISel/fshr.ll
  llvm/test/CodeGen/AMDGPU/GlobalISel/image-waterfall-loop-O0.ll
  llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.ll
  llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.div.fmas.ll
  llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.fadd.ll
  llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.fmin.ll
  llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.fdot2.ll
  llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.fmul.legacy.ll
  llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.global.atomic.csub.ll
  llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.2d.ll
  llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.2darraymsaa.a16.ll
  llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.2darraymsaa.ll
  llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.3d.a16.ll
  llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.3d.ll
  llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.s.setreg.ll
  llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.sdot2.ll
  llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.sdot4.ll
  llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.sdot8.ll
  llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.sudot4.ll
  llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.sudot8.ll
  llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.trig.preop.ll
  llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.udot2.ll
  llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.udot4.ll
  llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.udot8.ll
  llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.powi.ll
  llvm/test/CodeGen/AMDGPU/GlobalISel/load-local.128.ll
  llvm/test/CodeGen/AMDGPU/GlobalISel/load-local.96.ll
  llvm/test/CodeGen/AMDGPU/GlobalISel/load-unaligned.ll
  llvm/test/CodeGen/AMDGPU/GlobalISel/lshr.ll
  llvm/test/CodeGen/AMDGPU/GlobalISel/mul.ll
  llvm/test/CodeGen/AMDGPU/GlobalISel/mul.v2i16.ll
  llvm/test/CodeGen/AMDGPU/GlobalISel/orn2.ll
  llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll
  llvm/test/CodeGen/AMDGPU/GlobalISel/sbfx.ll
  llvm/test/CodeGen/AMDGPU/GlobalISel/sext_inreg.ll
  llvm/test/CodeGen/AMDGPU/GlobalISel/shl-ext-reduce.ll
  llvm/test/CodeGen/AMDGPU/GlobalISel/shl.ll
  llvm/test/CodeGen/AMDGPU/GlobalISel/shlN_add.ll
  llvm/test/CodeGen/AMDGPU/GlobalISel/smed3.ll
  llvm/test/CodeGen/AMDGPU/GlobalISel/ssubsat.ll
  llvm/test/CodeGen/AMDGPU/GlobalISel/uaddsat.ll
  llvm/test/CodeGen/AMDGPU/GlobalISel/ubfx.ll
  llvm/test/CodeGen/AMDGPU/GlobalISel/umed3.ll
  llvm/test/CodeGen/AMDGPU/GlobalISel/usubsat.ll
  llvm/test/CodeGen/AMDGPU/GlobalISel/v_bfe_i32.ll
  llvm/test/CodeGen/AMDGPU/GlobalISel/xnor.ll
  llvm/test/CodeGen/AMDGPU/GlobalISel/zextload.ll
  llvm/test/CodeGen/AMDGPU/amd.endpgm.ll
  llvm/test/CodeGen/AMDGPU/atomicrmw-expand.ll
  llvm/test/CodeGen/AMDGPU/back-off-barrier-subtarget-feature.ll
  llvm/test/CodeGen/AMDGPU/bf16.ll
  llvm/test/CodeGen/AMDGPU/bfi_int.ll
  llvm/test/CodeGen/AMDGPU/bitreverse.ll
  llvm/test/CodeGen/AMDGPU/bswap.ll
  llvm/test/CodeGen/AMDGPU/bug-sdag-emitcopyfromreg.ll
  llvm/test/CodeGen/AMDGPU/call-argument-types.ll
  llvm/test/CodeGen/AMDGPU/calling-conventions.ll
  llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-flat.ll
  llvm/test/CodeGen/AMDGPU/chain-hi-to-lo.ll
  llvm/test/CodeGen/AMDGPU/clamp-modifier.ll
  llvm/test/CodeGen/AMDGPU/combine-add-zext-xor.ll
  llvm/test/CodeGen/AMDGPU/cse-convergent.ll
  llvm/test/CodeGen/AMDGPU/cvt_f32_ubyte.ll
  llvm/test/CodeGen/AMDGPU/dagcombine-fma-fmad.ll
  llvm/test/CodeGen/AMDGPU/divergence-driven-buildvector.ll
  llvm/test/CodeGen/AMDGPU/expand-scalar-carry-out-select-user.ll
  llvm/test/CodeGen/AMDGPU/extract-subvector-16bit.ll
  llvm/test/CodeGen/AMDGPU/fast-unaligned-load-store.global.ll
  llvm/test/CodeGen/AMDGPU/fast-unaligned-load-store.private.ll
  llvm/test/CodeGen/AMDGPU/fcanonicalize.f16.ll
  llvm/test/CodeGen/AMDGPU/fcanonicalize.ll
  llvm/test/CodeGen/AMDGPU/fcopysign.f16.ll
  llvm/test/CodeGen/AMDGPU/fcopysign.f32.ll
  llvm/test/CodeGen/AMDGPU/fcopysign.f64.ll
  llvm/test/CodeGen/AMDGPU/fexp.ll
  llvm/test/CodeGen/AMDGPU/flat-scratch.ll
  llvm/test/CodeGen/AMDGPU/fma.f16.ll
  llvm/test/CodeGen/AMDGPU/fmad-formation-fmul-distribute-denormal-mode.ll
  llvm/test/CodeGen/AMDGPU/fmax3.ll
  llvm/test/CodeGen/AMDGPU/fmax_legacy.f16.ll
  llvm/test/CodeGen/AMDGPU/fmin_legacy.f16.ll
  llvm/test/CodeGen/AMDGPU/fneg-combines.f16.ll
  llvm/test/CodeGen/AMDGPU/fneg-fold-legalize-dag-increase-insts.ll
  llvm/test/CodeGen/AMDGPU/fneg-modifier-casting.ll
  llvm/test/CodeGen/AMDGPU/fneg.ll
  llvm/test/CodeGen/AMDGPU/fold-fabs.ll
  llvm/test/CodeGen/AMDGPU/fp-min-max-flat-atomics.ll
  llvm/test/CodeGen/AMDGPU/fpext-free.ll
  llvm/test/CodeGen/AMDGPU/fpow.ll
  llvm/test/CodeGen/AMDGPU/fract-match.ll
  llvm/test/CodeGen/AMDGPU/fshr.ll
  llvm/test/CodeGen/AMDGPU/gfx-callable-argument-types.ll
  llvm/test/CodeGen/AMDGPU/gfx-callable-preserved-registers.ll
  llvm/test/CodeGen/AMDGPU/gfx-callable-return-types.ll
  llvm/test/CodeGen/AMDGPU/imm16.ll
  llvm/test/CodeGen/AMDGPU/insert-delay-alu-bug.ll
  llvm/test/CodeGen/AMDGPU/integer-mad-patterns.ll
  llvm/test/CodeGen/AMDGPU/known-never-nan.ll
  llvm/test/CodeGen/AMDGPU/lds-frame-extern.ll
  llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fma.legacy.ll
  llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.dim.ll
  llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.d16.dim.ll
  llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.dim.ll
  llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.load.format.ll
  llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.buffer.load.format.ll
  llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sudot4.ll
  llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sudot8.ll
  llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.f16.ll
  llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.ll
  llvm/test/CodeGen/AMDGPU/llvm.ldexp.ll
  llvm/test/CodeGen/AMDGPU/llvm.log.ll
  llvm/test/CodeGen/AMDGPU/llvm.log10.ll
  llvm/test/CodeGen/AMDGPU/llvm.log2.ll
  llvm/test/CodeGen/AMDGPU/llvm.mulo.ll
  llvm/test/CodeGen/AMDGPU/llvm.powi.ll
  llvm/test/CodeGen/AMDGPU/lo16-hi16-physreg-copy.mir
  llvm/test/CodeGen/AMDGPU/load-local.128.ll
  llvm/test/CodeGen/AMDGPU/load-local.96.ll
  llvm/test/CodeGen/AMDGPU/mad-mix-hi.ll
  llvm/test/CodeGen/AMDGPU/mad-mix-lo.ll
  llvm/test/CodeGen/AMDGPU/mad-mix.ll
  llvm/test/CodeGen/AMDGPU/mad.u16.ll
  llvm/test/CodeGen/AMDGPU/mad_64_32.ll
  llvm/test/CodeGen/AMDGPU/mad_u64_u32.ll
  llvm/test/CodeGen/AMDGPU/memory_clause.ll
  llvm/test/CodeGen/AMDGPU/minmax.ll
  llvm/test/CodeGen/AMDGPU/module-lds-false-sharing.ll
  llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands-non-ptr-intrinsics.ll
  llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.ll
  llvm/test/CodeGen/AMDGPU/offset-split-flat.ll
  llvm/test/CodeGen/AMDGPU/offset-split-global.ll
  llvm/test/CodeGen/AMDGPU/permute_i8.ll
  llvm/test/CodeGen/AMDGPU/preserve-hi16.ll
  llvm/test/CodeGen/AMDGPU/ptrmask.ll
  llvm/test/CodeGen/AMDGPU/reassoc-mul-add-1-to-mad.ll
  llvm/test/CodeGen/AMDGPU/roundeven.ll
  llvm/test/CodeGen/AMDGPU/saddsat.ll
  llvm/test/CodeGen/AMDGPU/select-constant-xor.ll
  llvm/test/CodeGen/AMDGPU/select-fabs-fneg-extract.f16.ll
  llvm/test/CodeGen/AMDGPU/select-fabs-fneg-extract.v2f16.ll
  llvm/test/CodeGen/AMDGPU/setcc-multiple-use.ll
  llvm/test/CodeGen/AMDGPU/skip-if-dead.ll
  llvm/test/CodeGen/AMDGPU/ssubsat.ll
  llvm/test/CodeGen/AMDGPU/store-weird-sizes.ll
  llvm/test/CodeGen/AMDGPU/strict_fadd.f16.ll
  llvm/test/CodeGen/AMDGPU/strict_fadd.f32.ll
  llvm/test/CodeGen/AMDGPU/strict_fadd.f64.ll
  llvm/test/CodeGen/AMDGPU/strict_fma.f16.ll
  llvm/test/CodeGen/AMDGPU/strict_fma.f32.ll
  llvm/test/CodeGen/AMDGPU/strict_fma.f64.ll
  llvm/test/CodeGen/AMDGPU/strict_fmul.f16.ll
  llvm/test/CodeGen/AMDGPU/strict_fmul.f32.ll
  llvm/test/CodeGen/AMDGPU/strict_fmul.f64.ll
  llvm/test/CodeGen/AMDGPU/strict_fsub.f16.ll
  llvm/test/CodeGen/AMDGPU/strict_fsub.f32.ll
  llvm/test/CodeGen/AMDGPU/strict_fsub.f64.ll
  llvm/test/CodeGen/AMDGPU/strict_ldexp.f16.ll
  llvm/test/CodeGen/AMDGPU/strict_ldexp.f32.ll
  llvm/test/CodeGen/AMDGPU/strict_ldexp.f64.ll
  llvm/test/CodeGen/AMDGPU/uaddsat.ll
  llvm/test/CodeGen/AMDGPU/udiv.ll
  llvm/test/CodeGen/AMDGPU/usubsat.ll
  llvm/test/CodeGen/AMDGPU/v_sat_pk_u8_i16.ll
  llvm/test/CodeGen/AMDGPU/vector_shuffle.packed.ll
  llvm/test/CodeGen/AMDGPU/vgpr-descriptor-waterfall-loop-idom-update.ll
  llvm/test/CodeGen/AMDGPU/vgpr-tuple-allocation.ll
  llvm/test/CodeGen/AMDGPU/waitcnt-bvh.mir
  llvm/test/CodeGen/AMDGPU/waitcnt-overflow.mir
  llvm/test/CodeGen/AMDGPU/waitcnt-preexisting-vscnt.mir
  llvm/test/CodeGen/AMDGPU/waitcnt-vscnt.ll
  llvm/test/CodeGen/AMDGPU/wave32.ll
  llvm/test/CodeGen/AMDGPU/wqm.ll



More information about the llvm-commits mailing list