[PATCH] D113017: [AMDGPU] Avoid copying dead subregisters in copyPhysReg

Jay Foad via Phabricator via llvm-commits llvm-commits at lists.llvm.org
Tue Nov 2 06:56:57 PDT 2021


foad created this revision.
Herald added subscribers: wenlei, kerbowa, arphaman, hiraditya, t-tye, tpr, dstuttard, yaxunl, nhaehnle, jvesely, kzhuravl, arsenm, qcolombet, MatzeB.
foad requested review of this revision.
Herald added subscribers: llvm-commits, wdng.
Herald added a project: LLVM.

When SIInstrInfo::copyPhysReg splits a multi-dword (superregister) copy
into individual dword (subregister) copies, use LivePhysRegs info to
avoid copying dead subregisters.

This fixes a liveness problem where the superregister copy source may
have been only partially defined (which is allowed) but one of the
resulting subregister copy sources would be completely undefined (which
is not allowed by the machine verifier).

This replaces the previous workaround, which was to add implicit
superregister use/def operands to all the subregister copy instructions,
which caused false dependencies between them and restricted the freedom
of post-RA scheduling and other late codegen passes.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D113017

Files:
  llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
  llvm/test/CodeGen/AMDGPU/GlobalISel/atomic_optimizations_mul_one.ll
  llvm/test/CodeGen/AMDGPU/GlobalISel/cvt_f32_ubyte.ll
  llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement-stack-lower.ll
  llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.i128.ll
  llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.ll
  llvm/test/CodeGen/AMDGPU/GlobalISel/fmed3.ll
  llvm/test/CodeGen/AMDGPU/GlobalISel/frem.ll
  llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.i16.ll
  llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.i8.ll
  llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.ll
  llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.atomic.dec.ll
  llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.atomic.inc.ll
  llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.div.fmas.ll
  llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.div.scale.ll
  llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.2darraymsaa.a16.ll
  llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.3d.a16.ll
  llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.mfma.gfx90a.ll
  llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.mov.dpp.ll
  llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.update.dpp.ll
  llvm/test/CodeGen/AMDGPU/GlobalISel/mubuf-global.ll
  llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll
  llvm/test/CodeGen/AMDGPU/GlobalISel/sdivrem.ll
  llvm/test/CodeGen/AMDGPU/GlobalISel/shl-ext-reduce.ll
  llvm/test/CodeGen/AMDGPU/GlobalISel/ssubsat.ll
  llvm/test/CodeGen/AMDGPU/GlobalISel/uaddsat.ll
  llvm/test/CodeGen/AMDGPU/GlobalISel/udivrem.ll
  llvm/test/CodeGen/AMDGPU/GlobalISel/usubsat.ll
  llvm/test/CodeGen/AMDGPU/GlobalISel/widen-i8-i16-scalar-loads.ll
  llvm/test/CodeGen/AMDGPU/abi-attribute-hints-undefined-behavior.ll
  llvm/test/CodeGen/AMDGPU/accvgpr-copy.mir
  llvm/test/CodeGen/AMDGPU/atomic_optimizations_local_pointer.ll
  llvm/test/CodeGen/AMDGPU/bitreverse.ll
  llvm/test/CodeGen/AMDGPU/chain-hi-to-lo.ll
  llvm/test/CodeGen/AMDGPU/cluster_stores.ll
  llvm/test/CodeGen/AMDGPU/copy-overlap-vgpr-kill.mir
  llvm/test/CodeGen/AMDGPU/copy_phys_vgpr64.mir
  llvm/test/CodeGen/AMDGPU/ctlz_zero_undef.ll
  llvm/test/CodeGen/AMDGPU/cttz_zero_undef.ll
  llvm/test/CodeGen/AMDGPU/dag-divergence-atomic.ll
  llvm/test/CodeGen/AMDGPU/ds-sub-offset.ll
  llvm/test/CodeGen/AMDGPU/ds_write2.ll
  llvm/test/CodeGen/AMDGPU/fast-unaligned-load-store.global.ll
  llvm/test/CodeGen/AMDGPU/fence-lds-read2-write2.ll
  llvm/test/CodeGen/AMDGPU/flat-scratch.ll
  llvm/test/CodeGen/AMDGPU/fp-min-max-global-atomics-gfx10.ll
  llvm/test/CodeGen/AMDGPU/fp64-atomics-gfx90a.ll
  llvm/test/CodeGen/AMDGPU/frem.ll
  llvm/test/CodeGen/AMDGPU/fshl.ll
  llvm/test/CodeGen/AMDGPU/fshr.ll
  llvm/test/CodeGen/AMDGPU/half.ll
  llvm/test/CodeGen/AMDGPU/idot4s.ll
  llvm/test/CodeGen/AMDGPU/idot4u.ll
  llvm/test/CodeGen/AMDGPU/insert_vector_elt.ll
  llvm/test/CodeGen/AMDGPU/kernel-args.ll
  llvm/test/CodeGen/AMDGPU/lds-atomic-fmin-fmax.ll
  llvm/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.dec.ll
  llvm/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.inc.ll
  llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.pkrtz.ll
  llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.dim.ll
  llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.dim.ll
  llvm/test/CodeGen/AMDGPU/load-constant-i16.ll
  llvm/test/CodeGen/AMDGPU/load-global-i16.ll
  llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-agent.ll
  llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-singlethread.ll
  llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-system.ll
  llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-wavefront.ll
  llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-workgroup.ll
  llvm/test/CodeGen/AMDGPU/memory-legalizer-global-agent.ll
  llvm/test/CodeGen/AMDGPU/memory-legalizer-global-singlethread.ll
  llvm/test/CodeGen/AMDGPU/memory-legalizer-global-system.ll
  llvm/test/CodeGen/AMDGPU/memory-legalizer-global-wavefront.ll
  llvm/test/CodeGen/AMDGPU/memory-legalizer-global-workgroup.ll
  llvm/test/CodeGen/AMDGPU/memory-legalizer-private-nontemporal.ll
  llvm/test/CodeGen/AMDGPU/memory-legalizer-private-volatile.ll
  llvm/test/CodeGen/AMDGPU/memory_clause.ll
  llvm/test/CodeGen/AMDGPU/pal-simple-indirect-call.ll
  llvm/test/CodeGen/AMDGPU/saddo.ll
  llvm/test/CodeGen/AMDGPU/sdiv64.ll
  llvm/test/CodeGen/AMDGPU/select64.ll
  llvm/test/CodeGen/AMDGPU/sgpr-phys-copy.mir
  llvm/test/CodeGen/AMDGPU/shift-i128.ll
  llvm/test/CodeGen/AMDGPU/shl.ll
  llvm/test/CodeGen/AMDGPU/sint_to_fp.i64.ll
  llvm/test/CodeGen/AMDGPU/srem64.ll
  llvm/test/CodeGen/AMDGPU/stack-pointer-offset-relative-frameindex.ll
  llvm/test/CodeGen/AMDGPU/store-local.128.ll
  llvm/test/CodeGen/AMDGPU/store-weird-sizes.ll
  llvm/test/CodeGen/AMDGPU/subreg-coalescer-undef-use.ll
  llvm/test/CodeGen/AMDGPU/trap-abis.ll
  llvm/test/CodeGen/AMDGPU/udiv64.ll
  llvm/test/CodeGen/AMDGPU/udivrem.ll
  llvm/test/CodeGen/AMDGPU/uint_to_fp.i64.ll
  llvm/test/CodeGen/AMDGPU/undefined-subreg-liverange.ll
  llvm/test/CodeGen/AMDGPU/urem64.ll
  llvm/test/CodeGen/AMDGPU/v_mov_b64_expand_and_shrink.mir
  llvm/test/CodeGen/AMDGPU/v_mov_b64_expansion.mir
  llvm/test/CodeGen/AMDGPU/widen-smrd-loads.ll
  llvm/test/CodeGen/AMDGPU/wwm-reserved-spill.ll



More information about the llvm-commits mailing list