[all-commits] [llvm/llvm-project] f2c164: [AMDGPU] Do not wait for vscnt on function entry a...

Jay Foad via All-commits all-commits at lists.llvm.org
Tue Jul 4 04:26:21 PDT 2023


  Branch: refs/heads/main
  Home:   https://github.com/llvm/llvm-project
  Commit: f2c164c8150548d983565c4ddc0fde790f9e2a5b
      https://github.com/llvm/llvm-project/commit/f2c164c8150548d983565c4ddc0fde790f9e2a5b
  Author: Jay Foad <jay.foad at amd.com>
  Date:   2023-07-04 (Tue, 04 Jul 2023)

  Changed paths:
    M llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
    M llvm/test/CodeGen/AMDGPU/GlobalISel/add.v2i16.ll
    M llvm/test/CodeGen/AMDGPU/GlobalISel/andn2.ll
    M llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-asserts.ll
    M llvm/test/CodeGen/AMDGPU/GlobalISel/ashr.ll
    M llvm/test/CodeGen/AMDGPU/GlobalISel/br-constant-invalid-sgpr-copy.ll
    M llvm/test/CodeGen/AMDGPU/GlobalISel/bswap.ll
    M llvm/test/CodeGen/AMDGPU/GlobalISel/bug-legalization-artifact-combiner-dead-def.ll
    M llvm/test/CodeGen/AMDGPU/GlobalISel/clamp-fmed3-const-combine.ll
    M llvm/test/CodeGen/AMDGPU/GlobalISel/clamp-minmax-const-combine.ll
    M llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-fma-mul.ll
    M llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-mul.ll
    M llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-sub-mul.ll
    M llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-sub-neg-mul.ll
    M llvm/test/CodeGen/AMDGPU/GlobalISel/dynamic-alloca-uniform.ll
    M llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.i128.ll
    M llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.i16.ll
    M llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.i8.ll
    M llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.ll
    M llvm/test/CodeGen/AMDGPU/GlobalISel/fdiv.f16.ll
    M llvm/test/CodeGen/AMDGPU/GlobalISel/fdiv.f32.ll
    M llvm/test/CodeGen/AMDGPU/GlobalISel/fdiv.f64.ll
    M llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch.ll
    M llvm/test/CodeGen/AMDGPU/GlobalISel/fma.ll
    M llvm/test/CodeGen/AMDGPU/GlobalISel/fmed3-min-max-const-combine.ll
    M llvm/test/CodeGen/AMDGPU/GlobalISel/fmul.v2f16.ll
    M llvm/test/CodeGen/AMDGPU/GlobalISel/fpow.ll
    M llvm/test/CodeGen/AMDGPU/GlobalISel/fshl.ll
    M llvm/test/CodeGen/AMDGPU/GlobalISel/fshr.ll
    M llvm/test/CodeGen/AMDGPU/GlobalISel/image-waterfall-loop-O0.ll
    M llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.ll
    M llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.div.fmas.ll
    M llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.fadd.ll
    M llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.fmin.ll
    M llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.fdot2.ll
    M llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.fmul.legacy.ll
    M llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.global.atomic.csub.ll
    M llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.2d.ll
    M llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.2darraymsaa.a16.ll
    M llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.2darraymsaa.ll
    M llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.3d.a16.ll
    M llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.3d.ll
    M llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.s.setreg.ll
    M llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.sdot2.ll
    M llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.sdot4.ll
    M llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.sdot8.ll
    M llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.sudot4.ll
    M llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.sudot8.ll
    M llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.trig.preop.ll
    M llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.udot2.ll
    M llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.udot4.ll
    M llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.udot8.ll
    M llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.powi.ll
    M llvm/test/CodeGen/AMDGPU/GlobalISel/load-local.128.ll
    M llvm/test/CodeGen/AMDGPU/GlobalISel/load-local.96.ll
    M llvm/test/CodeGen/AMDGPU/GlobalISel/load-unaligned.ll
    M llvm/test/CodeGen/AMDGPU/GlobalISel/lshr.ll
    M llvm/test/CodeGen/AMDGPU/GlobalISel/mul.ll
    M llvm/test/CodeGen/AMDGPU/GlobalISel/mul.v2i16.ll
    M llvm/test/CodeGen/AMDGPU/GlobalISel/orn2.ll
    M llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll
    M llvm/test/CodeGen/AMDGPU/GlobalISel/sbfx.ll
    M llvm/test/CodeGen/AMDGPU/GlobalISel/sext_inreg.ll
    M llvm/test/CodeGen/AMDGPU/GlobalISel/shl-ext-reduce.ll
    M llvm/test/CodeGen/AMDGPU/GlobalISel/shl.ll
    M llvm/test/CodeGen/AMDGPU/GlobalISel/shlN_add.ll
    M llvm/test/CodeGen/AMDGPU/GlobalISel/smed3.ll
    M llvm/test/CodeGen/AMDGPU/GlobalISel/ssubsat.ll
    M llvm/test/CodeGen/AMDGPU/GlobalISel/uaddsat.ll
    M llvm/test/CodeGen/AMDGPU/GlobalISel/ubfx.ll
    M llvm/test/CodeGen/AMDGPU/GlobalISel/umed3.ll
    M llvm/test/CodeGen/AMDGPU/GlobalISel/usubsat.ll
    M llvm/test/CodeGen/AMDGPU/GlobalISel/v_bfe_i32.ll
    M llvm/test/CodeGen/AMDGPU/GlobalISel/xnor.ll
    M llvm/test/CodeGen/AMDGPU/GlobalISel/zextload.ll
    M llvm/test/CodeGen/AMDGPU/amd.endpgm.ll
    M llvm/test/CodeGen/AMDGPU/atomicrmw-expand.ll
    M llvm/test/CodeGen/AMDGPU/back-off-barrier-subtarget-feature.ll
    M llvm/test/CodeGen/AMDGPU/bf16.ll
    M llvm/test/CodeGen/AMDGPU/bfi_int.ll
    M llvm/test/CodeGen/AMDGPU/bitreverse.ll
    M llvm/test/CodeGen/AMDGPU/bswap.ll
    M llvm/test/CodeGen/AMDGPU/bug-sdag-emitcopyfromreg.ll
    M llvm/test/CodeGen/AMDGPU/call-argument-types.ll
    M llvm/test/CodeGen/AMDGPU/calling-conventions.ll
    M llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-flat.ll
    M llvm/test/CodeGen/AMDGPU/chain-hi-to-lo.ll
    M llvm/test/CodeGen/AMDGPU/clamp-modifier.ll
    M llvm/test/CodeGen/AMDGPU/combine-add-zext-xor.ll
    M llvm/test/CodeGen/AMDGPU/cse-convergent.ll
    M llvm/test/CodeGen/AMDGPU/cvt_f32_ubyte.ll
    M llvm/test/CodeGen/AMDGPU/dagcombine-fma-fmad.ll
    M llvm/test/CodeGen/AMDGPU/divergence-driven-buildvector.ll
    M llvm/test/CodeGen/AMDGPU/expand-scalar-carry-out-select-user.ll
    M llvm/test/CodeGen/AMDGPU/extract-subvector-16bit.ll
    M llvm/test/CodeGen/AMDGPU/fast-unaligned-load-store.global.ll
    M llvm/test/CodeGen/AMDGPU/fast-unaligned-load-store.private.ll
    M llvm/test/CodeGen/AMDGPU/fcanonicalize.f16.ll
    M llvm/test/CodeGen/AMDGPU/fcanonicalize.ll
    M llvm/test/CodeGen/AMDGPU/fcopysign.f16.ll
    M llvm/test/CodeGen/AMDGPU/fcopysign.f32.ll
    M llvm/test/CodeGen/AMDGPU/fcopysign.f64.ll
    M llvm/test/CodeGen/AMDGPU/flat-scratch.ll
    M llvm/test/CodeGen/AMDGPU/fma.f16.ll
    M llvm/test/CodeGen/AMDGPU/fmad-formation-fmul-distribute-denormal-mode.ll
    M llvm/test/CodeGen/AMDGPU/fmax3.ll
    M llvm/test/CodeGen/AMDGPU/fmax_legacy.f16.ll
    M llvm/test/CodeGen/AMDGPU/fmin3.ll
    M llvm/test/CodeGen/AMDGPU/fmin_legacy.f16.ll
    M llvm/test/CodeGen/AMDGPU/fneg-combines.f16.ll
    M llvm/test/CodeGen/AMDGPU/fneg-fold-legalize-dag-increase-insts.ll
    M llvm/test/CodeGen/AMDGPU/fneg-modifier-casting.ll
    M llvm/test/CodeGen/AMDGPU/fneg.ll
    M llvm/test/CodeGen/AMDGPU/fold-fabs.ll
    M llvm/test/CodeGen/AMDGPU/fp-min-max-flat-atomics.ll
    M llvm/test/CodeGen/AMDGPU/fpext-free.ll
    M llvm/test/CodeGen/AMDGPU/fpow.ll
    M llvm/test/CodeGen/AMDGPU/fract-match.ll
    M llvm/test/CodeGen/AMDGPU/fshr.ll
    M llvm/test/CodeGen/AMDGPU/function-args.ll
    M llvm/test/CodeGen/AMDGPU/function-returns.ll
    M llvm/test/CodeGen/AMDGPU/gfx-callable-argument-types.ll
    M llvm/test/CodeGen/AMDGPU/gfx-callable-preserved-registers.ll
    M llvm/test/CodeGen/AMDGPU/gfx-callable-return-types.ll
    M llvm/test/CodeGen/AMDGPU/imm16.ll
    M llvm/test/CodeGen/AMDGPU/insert-delay-alu-bug.ll
    M llvm/test/CodeGen/AMDGPU/integer-mad-patterns.ll
    M llvm/test/CodeGen/AMDGPU/known-never-nan.ll
    M llvm/test/CodeGen/AMDGPU/lds-frame-extern.ll
    M llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fma.legacy.ll
    M llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.dim.ll
    M llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.d16.dim.ll
    M llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.dim.ll
    M llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.load.format.ll
    M llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.buffer.load.format.ll
    M llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sudot4.ll
    M llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sudot8.ll
    M llvm/test/CodeGen/AMDGPU/llvm.frexp.ll
    M llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.f16.ll
    M llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.ll
    M llvm/test/CodeGen/AMDGPU/llvm.ldexp.ll
    M llvm/test/CodeGen/AMDGPU/llvm.log.ll
    M llvm/test/CodeGen/AMDGPU/llvm.log10.ll
    M llvm/test/CodeGen/AMDGPU/llvm.log2.ll
    M llvm/test/CodeGen/AMDGPU/llvm.mulo.ll
    M llvm/test/CodeGen/AMDGPU/llvm.powi.ll
    M llvm/test/CodeGen/AMDGPU/lo16-hi16-physreg-copy.mir
    M llvm/test/CodeGen/AMDGPU/load-local.128.ll
    M llvm/test/CodeGen/AMDGPU/load-local.96.ll
    M llvm/test/CodeGen/AMDGPU/mad-mix-hi.ll
    M llvm/test/CodeGen/AMDGPU/mad-mix-lo.ll
    M llvm/test/CodeGen/AMDGPU/mad-mix.ll
    M llvm/test/CodeGen/AMDGPU/mad.u16.ll
    M llvm/test/CodeGen/AMDGPU/mad_64_32.ll
    M llvm/test/CodeGen/AMDGPU/mad_u64_u32.ll
    M llvm/test/CodeGen/AMDGPU/memory_clause.ll
    M llvm/test/CodeGen/AMDGPU/minmax.ll
    M llvm/test/CodeGen/AMDGPU/module-lds-false-sharing.ll
    M llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands-non-ptr-intrinsics.ll
    M llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.ll
    M llvm/test/CodeGen/AMDGPU/offset-split-flat.ll
    M llvm/test/CodeGen/AMDGPU/offset-split-global.ll
    M llvm/test/CodeGen/AMDGPU/permute_i8.ll
    M llvm/test/CodeGen/AMDGPU/preserve-hi16.ll
    M llvm/test/CodeGen/AMDGPU/ptrmask.ll
    M llvm/test/CodeGen/AMDGPU/reassoc-mul-add-1-to-mad.ll
    M llvm/test/CodeGen/AMDGPU/roundeven.ll
    M llvm/test/CodeGen/AMDGPU/saddsat.ll
    M llvm/test/CodeGen/AMDGPU/select-constant-xor.ll
    M llvm/test/CodeGen/AMDGPU/select-fabs-fneg-extract.f16.ll
    M llvm/test/CodeGen/AMDGPU/select-fabs-fneg-extract.v2f16.ll
    M llvm/test/CodeGen/AMDGPU/setcc-multiple-use.ll
    M llvm/test/CodeGen/AMDGPU/skip-if-dead.ll
    M llvm/test/CodeGen/AMDGPU/ssubsat.ll
    M llvm/test/CodeGen/AMDGPU/store-weird-sizes.ll
    M llvm/test/CodeGen/AMDGPU/strict_fadd.f16.ll
    M llvm/test/CodeGen/AMDGPU/strict_fadd.f32.ll
    M llvm/test/CodeGen/AMDGPU/strict_fadd.f64.ll
    M llvm/test/CodeGen/AMDGPU/strict_fma.f16.ll
    M llvm/test/CodeGen/AMDGPU/strict_fma.f32.ll
    M llvm/test/CodeGen/AMDGPU/strict_fma.f64.ll
    M llvm/test/CodeGen/AMDGPU/strict_fmul.f16.ll
    M llvm/test/CodeGen/AMDGPU/strict_fmul.f32.ll
    M llvm/test/CodeGen/AMDGPU/strict_fmul.f64.ll
    M llvm/test/CodeGen/AMDGPU/strict_fpext.ll
    M llvm/test/CodeGen/AMDGPU/strict_fptrunc.ll
    M llvm/test/CodeGen/AMDGPU/strict_fsub.f16.ll
    M llvm/test/CodeGen/AMDGPU/strict_fsub.f32.ll
    M llvm/test/CodeGen/AMDGPU/strict_fsub.f64.ll
    M llvm/test/CodeGen/AMDGPU/strict_ldexp.f16.ll
    M llvm/test/CodeGen/AMDGPU/strict_ldexp.f32.ll
    M llvm/test/CodeGen/AMDGPU/strict_ldexp.f64.ll
    M llvm/test/CodeGen/AMDGPU/uaddsat.ll
    M llvm/test/CodeGen/AMDGPU/udiv.ll
    M llvm/test/CodeGen/AMDGPU/usubsat.ll
    M llvm/test/CodeGen/AMDGPU/v_sat_pk_u8_i16.ll
    M llvm/test/CodeGen/AMDGPU/vector_shuffle.packed.ll
    M llvm/test/CodeGen/AMDGPU/vgpr-descriptor-waterfall-loop-idom-update.ll
    M llvm/test/CodeGen/AMDGPU/vgpr-tuple-allocation.ll
    M llvm/test/CodeGen/AMDGPU/waitcnt-bvh.mir
    M llvm/test/CodeGen/AMDGPU/waitcnt-overflow.mir
    M llvm/test/CodeGen/AMDGPU/waitcnt-preexisting-vscnt.mir
    M llvm/test/CodeGen/AMDGPU/waitcnt-vscnt.ll
    M llvm/test/CodeGen/AMDGPU/wave32.ll
    M llvm/test/CodeGen/AMDGPU/wqm.ll

  Log Message:
  -----------
  [AMDGPU] Do not wait for vscnt on function entry and return

SIInsertWaitcnts inserts waitcnt instructions to resolve data
dependencies. The GFX10+ vscnt (VMEM store count) counter is never used
in this way. It is only used to resolve memory dependencies, and that is
handled by SIMemoryLegalizer. Hence there is no need to conservatively
wait for vscnt to be 0 on function entry and before returns.

Differential Revision: https://reviews.llvm.org/D153537




More information about the All-commits mailing list