[all-commits] [llvm/llvm-project] a3a3e6: [AMDGPU] Rewrite GFX12 SGPR hazard handling to ded...

Carl Ritson via All-commits all-commits at lists.llvm.org
Wed Jan 29 18:21:33 PST 2025


  Branch: refs/heads/main
  Home:   https://github.com/llvm/llvm-project
  Commit: a3a3e6997bd7bd82cce6737c1315019704a846ed
      https://github.com/llvm/llvm-project/commit/a3a3e6997bd7bd82cce6737c1315019704a846ed
  Author: Carl Ritson <carl.ritson at amd.com>
  Date:   2025-01-30 (Thu, 30 Jan 2025)

  Changed paths:
    M llvm/docs/AMDGPUUsage.rst
    M llvm/lib/Target/AMDGPU/AMDGPU.h
    M llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
    A llvm/lib/Target/AMDGPU/AMDGPUWaitSGPRHazards.cpp
    A llvm/lib/Target/AMDGPU/AMDGPUWaitSGPRHazards.h
    M llvm/lib/Target/AMDGPU/CMakeLists.txt
    M llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
    M llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h
    M llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
    M llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
    M llvm/test/CodeGen/AMDGPU/GlobalISel/atomicrmw_fmax.ll
    M llvm/test/CodeGen/AMDGPU/GlobalISel/atomicrmw_fmin.ll
    M llvm/test/CodeGen/AMDGPU/GlobalISel/atomicrmw_uinc_wrap.ll
    M llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement-stack-lower.ll
    M llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.rsq.clamp.ll
    M llvm/test/CodeGen/AMDGPU/GlobalISel/mubuf-global.ll
    M llvm/test/CodeGen/AMDGPU/GlobalISel/mul.ll
    M llvm/test/CodeGen/AMDGPU/atomic_optimizations_buffer.ll
    M llvm/test/CodeGen/AMDGPU/atomic_optimizations_global_pointer.ll
    M llvm/test/CodeGen/AMDGPU/atomic_optimizations_raw_buffer.ll
    M llvm/test/CodeGen/AMDGPU/atomic_optimizations_struct_buffer.ll
    M llvm/test/CodeGen/AMDGPU/branch-relaxation.ll
    M llvm/test/CodeGen/AMDGPU/buffer-fat-pointer-atomicrmw-fadd.ll
    M llvm/test/CodeGen/AMDGPU/buffer-fat-pointer-atomicrmw-fmax.ll
    M llvm/test/CodeGen/AMDGPU/buffer-fat-pointer-atomicrmw-fmin.ll
    M llvm/test/CodeGen/AMDGPU/fcmp.f16.ll
    M llvm/test/CodeGen/AMDGPU/flat-atomicrmw-fadd.ll
    M llvm/test/CodeGen/AMDGPU/flat-atomicrmw-fmax.ll
    M llvm/test/CodeGen/AMDGPU/flat-atomicrmw-fmin.ll
    M llvm/test/CodeGen/AMDGPU/flat-atomicrmw-fsub.ll
    M llvm/test/CodeGen/AMDGPU/flat_atomics_i64.ll
    M llvm/test/CodeGen/AMDGPU/fma.f16.ll
    M llvm/test/CodeGen/AMDGPU/fmaximum3.ll
    M llvm/test/CodeGen/AMDGPU/fminimum3.ll
    M llvm/test/CodeGen/AMDGPU/fract-match.ll
    M llvm/test/CodeGen/AMDGPU/global-atomicrmw-fadd.ll
    M llvm/test/CodeGen/AMDGPU/global-atomicrmw-fmax.ll
    M llvm/test/CodeGen/AMDGPU/global-atomicrmw-fmin.ll
    M llvm/test/CodeGen/AMDGPU/global-atomicrmw-fsub.ll
    M llvm/test/CodeGen/AMDGPU/global-saddr-atomics-min-max-system.ll
    M llvm/test/CodeGen/AMDGPU/global-saddr-load.ll
    M llvm/test/CodeGen/AMDGPU/indirect-call-known-callees.ll
    M llvm/test/CodeGen/AMDGPU/insert_waitcnt_for_precise_memory.ll
    M llvm/test/CodeGen/AMDGPU/integer-mad-patterns.ll
    M llvm/test/CodeGen/AMDGPU/llc-pipeline.ll
    M llvm/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.cond.sub.ll
    M llvm/test/CodeGen/AMDGPU/llvm.amdgcn.init.whole.wave-w32.ll
    M llvm/test/CodeGen/AMDGPU/llvm.amdgcn.init.whole.wave-w64.ll
    M llvm/test/CodeGen/AMDGPU/llvm.amdgcn.intersect_ray.ll
    M llvm/test/CodeGen/AMDGPU/llvm.amdgcn.permlane.ll
    M llvm/test/CodeGen/AMDGPU/llvm.amdgcn.permlane.ptr.ll
    M llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.buffer.atomic.fadd.v2bf16.ll
    M llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.load.format.v3f16.ll
    M llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.buffer.atomic.fadd.v2bf16.ll
    M llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.buffer.atomic.fadd_nortn.ll
    M llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.buffer.atomic.fadd_rtn.ll
    M llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.buffer.atomic.fmax.f32.ll
    M llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.buffer.atomic.fmin.f32.ll
    M llvm/test/CodeGen/AMDGPU/llvm.mulo.ll
    M llvm/test/CodeGen/AMDGPU/load-constant-i1.ll
    M llvm/test/CodeGen/AMDGPU/load-constant-i16.ll
    M llvm/test/CodeGen/AMDGPU/load-constant-i8.ll
    M llvm/test/CodeGen/AMDGPU/local-atomicrmw-fadd.ll
    M llvm/test/CodeGen/AMDGPU/local-atomicrmw-fmax.ll
    M llvm/test/CodeGen/AMDGPU/local-atomicrmw-fmin.ll
    M llvm/test/CodeGen/AMDGPU/local-atomicrmw-fsub.ll
    M llvm/test/CodeGen/AMDGPU/lower-buffer-fat-pointers-lastuse-metadata.ll
    M llvm/test/CodeGen/AMDGPU/lower-buffer-fat-pointers-nontemporal-metadata.ll
    M llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics-hsa.ll
    M llvm/test/CodeGen/AMDGPU/mad_64_32.ll
    M llvm/test/CodeGen/AMDGPU/materialize-frame-index-sgpr.gfx10.ll
    M llvm/test/CodeGen/AMDGPU/materialize-frame-index-sgpr.ll
    M llvm/test/CodeGen/AMDGPU/maximumnum.ll
    M llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-lastuse.ll
    M llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-nontemporal.ll
    M llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-volatile.ll
    M llvm/test/CodeGen/AMDGPU/memory-legalizer-global-lastuse.ll
    M llvm/test/CodeGen/AMDGPU/memory-legalizer-global-nontemporal.ll
    M llvm/test/CodeGen/AMDGPU/memory-legalizer-global-volatile.ll
    M llvm/test/CodeGen/AMDGPU/memory-legalizer-local-nontemporal.ll
    M llvm/test/CodeGen/AMDGPU/memory-legalizer-local-volatile.ll
    M llvm/test/CodeGen/AMDGPU/memory-legalizer-private-lastuse.ll
    M llvm/test/CodeGen/AMDGPU/memory-legalizer-private-nontemporal.ll
    M llvm/test/CodeGen/AMDGPU/memory-legalizer-private-volatile.ll
    M llvm/test/CodeGen/AMDGPU/memory-legalizer-store-infinite-loop.ll
    M llvm/test/CodeGen/AMDGPU/minimumnum.ll
    M llvm/test/CodeGen/AMDGPU/offset-split-flat.ll
    M llvm/test/CodeGen/AMDGPU/offset-split-global.ll
    M llvm/test/CodeGen/AMDGPU/pseudo-scalar-transcendental.ll
    M llvm/test/CodeGen/AMDGPU/s-barrier.ll
    M llvm/test/CodeGen/AMDGPU/s-getpc-b64-remat.ll
    M llvm/test/CodeGen/AMDGPU/select-flags-to-fmin-fmax.ll
    M llvm/test/CodeGen/AMDGPU/sub.ll
    M llvm/test/CodeGen/AMDGPU/v_swap_b16.ll
    M llvm/test/CodeGen/AMDGPU/valu-mask-write-hazard.mir
    A llvm/test/CodeGen/AMDGPU/valu-read-sgpr-hazard-attrs.mir
    M llvm/test/CodeGen/AMDGPU/valu-read-sgpr-hazard.mir
    M llvm/test/CodeGen/AMDGPU/vcmpx-permlane-hazard.mir

  Log Message:
  -----------
  [AMDGPU] Rewrite GFX12 SGPR hazard handling to dedicated pass (#118750)

- Algorithm operates over whole IR to attempt to minimize waits.
- Add support for VALU->VALU SGPR hazards via VA_SDST/VA_VCC.



To unsubscribe from these emails, change your notification settings at https://github.com/llvm/llvm-project/settings/notifications


More information about the All-commits mailing list