[PATCH] D113017: [AMDGPU] Avoid copying dead subregisters in copyPhysReg
Jay Foad via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Tue Nov 2 06:56:57 PDT 2021
foad created this revision.
Herald added subscribers: wenlei, kerbowa, arphaman, hiraditya, t-tye, tpr, dstuttard, yaxunl, nhaehnle, jvesely, kzhuravl, arsenm, qcolombet, MatzeB.
foad requested review of this revision.
Herald added subscribers: llvm-commits, wdng.
Herald added a project: LLVM.
When SIInstrInfo::copyPhysReg splits a multi-dword (superregister) copy
into individual dword (subregister) copies, use LivePhysRegs info to
avoid copying dead subregisters.
This fixes a liveness problem where the superregister copy source may
have been only partially defined (which is allowed) but one of the
resulting subregister copy sources would be completely undefined (which
is not allowed by the machine verifier).
This replaces the previous workaround, which was to add implicit
superregister use/def operands to all the subregister copy instructions,
which caused false dependencies between them and restricted the freedom
of post-RA scheduling and other late codegen passes.
Repository:
rG LLVM Github Monorepo
https://reviews.llvm.org/D113017
Files:
llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
llvm/test/CodeGen/AMDGPU/GlobalISel/atomic_optimizations_mul_one.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/cvt_f32_ubyte.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement-stack-lower.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.i128.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/fmed3.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/frem.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.i16.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.i8.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.atomic.dec.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.atomic.inc.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.div.fmas.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.div.scale.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.2darraymsaa.a16.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.3d.a16.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.mfma.gfx90a.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.mov.dpp.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.update.dpp.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/mubuf-global.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/sdivrem.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/shl-ext-reduce.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/ssubsat.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/uaddsat.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/udivrem.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/usubsat.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/widen-i8-i16-scalar-loads.ll
llvm/test/CodeGen/AMDGPU/abi-attribute-hints-undefined-behavior.ll
llvm/test/CodeGen/AMDGPU/accvgpr-copy.mir
llvm/test/CodeGen/AMDGPU/atomic_optimizations_local_pointer.ll
llvm/test/CodeGen/AMDGPU/bitreverse.ll
llvm/test/CodeGen/AMDGPU/chain-hi-to-lo.ll
llvm/test/CodeGen/AMDGPU/cluster_stores.ll
llvm/test/CodeGen/AMDGPU/copy-overlap-vgpr-kill.mir
llvm/test/CodeGen/AMDGPU/copy_phys_vgpr64.mir
llvm/test/CodeGen/AMDGPU/ctlz_zero_undef.ll
llvm/test/CodeGen/AMDGPU/cttz_zero_undef.ll
llvm/test/CodeGen/AMDGPU/dag-divergence-atomic.ll
llvm/test/CodeGen/AMDGPU/ds-sub-offset.ll
llvm/test/CodeGen/AMDGPU/ds_write2.ll
llvm/test/CodeGen/AMDGPU/fast-unaligned-load-store.global.ll
llvm/test/CodeGen/AMDGPU/fence-lds-read2-write2.ll
llvm/test/CodeGen/AMDGPU/flat-scratch.ll
llvm/test/CodeGen/AMDGPU/fp-min-max-global-atomics-gfx10.ll
llvm/test/CodeGen/AMDGPU/fp64-atomics-gfx90a.ll
llvm/test/CodeGen/AMDGPU/frem.ll
llvm/test/CodeGen/AMDGPU/fshl.ll
llvm/test/CodeGen/AMDGPU/fshr.ll
llvm/test/CodeGen/AMDGPU/half.ll
llvm/test/CodeGen/AMDGPU/idot4s.ll
llvm/test/CodeGen/AMDGPU/idot4u.ll
llvm/test/CodeGen/AMDGPU/insert_vector_elt.ll
llvm/test/CodeGen/AMDGPU/kernel-args.ll
llvm/test/CodeGen/AMDGPU/lds-atomic-fmin-fmax.ll
llvm/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.dec.ll
llvm/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.inc.ll
llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.pkrtz.ll
llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.dim.ll
llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.dim.ll
llvm/test/CodeGen/AMDGPU/load-constant-i16.ll
llvm/test/CodeGen/AMDGPU/load-global-i16.ll
llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-agent.ll
llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-singlethread.ll
llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-system.ll
llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-wavefront.ll
llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-workgroup.ll
llvm/test/CodeGen/AMDGPU/memory-legalizer-global-agent.ll
llvm/test/CodeGen/AMDGPU/memory-legalizer-global-singlethread.ll
llvm/test/CodeGen/AMDGPU/memory-legalizer-global-system.ll
llvm/test/CodeGen/AMDGPU/memory-legalizer-global-wavefront.ll
llvm/test/CodeGen/AMDGPU/memory-legalizer-global-workgroup.ll
llvm/test/CodeGen/AMDGPU/memory-legalizer-private-nontemporal.ll
llvm/test/CodeGen/AMDGPU/memory-legalizer-private-volatile.ll
llvm/test/CodeGen/AMDGPU/memory_clause.ll
llvm/test/CodeGen/AMDGPU/pal-simple-indirect-call.ll
llvm/test/CodeGen/AMDGPU/saddo.ll
llvm/test/CodeGen/AMDGPU/sdiv64.ll
llvm/test/CodeGen/AMDGPU/select64.ll
llvm/test/CodeGen/AMDGPU/sgpr-phys-copy.mir
llvm/test/CodeGen/AMDGPU/shift-i128.ll
llvm/test/CodeGen/AMDGPU/shl.ll
llvm/test/CodeGen/AMDGPU/sint_to_fp.i64.ll
llvm/test/CodeGen/AMDGPU/srem64.ll
llvm/test/CodeGen/AMDGPU/stack-pointer-offset-relative-frameindex.ll
llvm/test/CodeGen/AMDGPU/store-local.128.ll
llvm/test/CodeGen/AMDGPU/store-weird-sizes.ll
llvm/test/CodeGen/AMDGPU/subreg-coalescer-undef-use.ll
llvm/test/CodeGen/AMDGPU/trap-abis.ll
llvm/test/CodeGen/AMDGPU/udiv64.ll
llvm/test/CodeGen/AMDGPU/udivrem.ll
llvm/test/CodeGen/AMDGPU/uint_to_fp.i64.ll
llvm/test/CodeGen/AMDGPU/undefined-subreg-liverange.ll
llvm/test/CodeGen/AMDGPU/urem64.ll
llvm/test/CodeGen/AMDGPU/v_mov_b64_expand_and_shrink.mir
llvm/test/CodeGen/AMDGPU/v_mov_b64_expansion.mir
llvm/test/CodeGen/AMDGPU/widen-smrd-loads.ll
llvm/test/CodeGen/AMDGPU/wwm-reserved-spill.ll
More information about the llvm-commits
mailing list