[all-commits] [llvm/llvm-project] ba52f0: [AMDGPU] CodeGen for GFX12 S_WAIT_* instructions (...

Jay Foad via All-commits all-commits at lists.llvm.org
Thu Jan 18 02:47:57 PST 2024


  Branch: refs/heads/main
  Home:   https://github.com/llvm/llvm-project
  Commit: ba52f06f9d92c7ca04b440f618f8d352ea121fcc
      https://github.com/llvm/llvm-project/commit/ba52f06f9d92c7ca04b440f618f8d352ea121fcc
  Author: Jay Foad <jay.foad at amd.com>
  Date:   2024-01-18 (Thu, 18 Jan 2024)

  Changed paths:
    M llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
    M llvm/lib/Target/AMDGPU/GCNSubtarget.h
    M llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
    M llvm/lib/Target/AMDGPU/SIInstrInfo.h
    M llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
    M llvm/lib/Target/AMDGPU/SOPInstructions.td
    M llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
    M llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
    M llvm/test/CodeGen/AMDGPU/GlobalISel/addsubu64.ll
    M llvm/test/CodeGen/AMDGPU/GlobalISel/clamp-fmed3-const-combine.ll
    M llvm/test/CodeGen/AMDGPU/GlobalISel/clamp-minmax-const-combine.ll
    M llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement-stack-lower.ll
    M llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch.ll
    M llvm/test/CodeGen/AMDGPU/GlobalISel/fmed3-min-max-const-combine.ll
    M llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.global.atomic.csub.ll
    M llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.atomic.dim.a16.ll
    M llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.atomic.dim.ll
    M llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.gather4.a16.dim.ll
    M llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.gather4.dim.ll
    M llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.getresinfo.a16.ll
    M llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.getresinfo.ll
    M llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.1d.d16.ll
    M llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.1d.ll
    M llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.2d.ll
    M llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.2darraymsaa.a16.ll
    M llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.2darraymsaa.ll
    M llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.3d.a16.ll
    M llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.3d.ll
    M llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.sample.g16.ll
    M llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.rsq.clamp.ll
    M llvm/test/CodeGen/AMDGPU/GlobalISel/load-constant.96.ll
    M llvm/test/CodeGen/AMDGPU/GlobalISel/mubuf-global.ll
    M llvm/test/CodeGen/AMDGPU/GlobalISel/mul.ll
    M llvm/test/CodeGen/AMDGPU/add.ll
    M llvm/test/CodeGen/AMDGPU/atomic_optimizations_buffer.ll
    M llvm/test/CodeGen/AMDGPU/atomic_optimizations_global_pointer.ll
    M llvm/test/CodeGen/AMDGPU/atomic_optimizations_raw_buffer.ll
    M llvm/test/CodeGen/AMDGPU/atomic_optimizations_struct_buffer.ll
    M llvm/test/CodeGen/AMDGPU/atomicrmw-expand.ll
    M llvm/test/CodeGen/AMDGPU/atomics_cond_sub.ll
    M llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-smem.ll
    M llvm/test/CodeGen/AMDGPU/clamp.ll
    M llvm/test/CodeGen/AMDGPU/fast-unaligned-load-store.global.ll
    M llvm/test/CodeGen/AMDGPU/fast-unaligned-load-store.private.ll
    M llvm/test/CodeGen/AMDGPU/flat-scratch-i8-i16.ll
    M llvm/test/CodeGen/AMDGPU/flat-scratch-svs.ll
    M llvm/test/CodeGen/AMDGPU/flat-scratch.ll
    M llvm/test/CodeGen/AMDGPU/flat_atomics_i64.ll
    M llvm/test/CodeGen/AMDGPU/fmaximum.ll
    M llvm/test/CodeGen/AMDGPU/fminimum.ll
    M llvm/test/CodeGen/AMDGPU/fp-min-max-num-flat-atomics.ll
    M llvm/test/CodeGen/AMDGPU/fp-min-max-num-global-atomics.ll
    M llvm/test/CodeGen/AMDGPU/gfx12_scalar_subword_loads.ll
    M llvm/test/CodeGen/AMDGPU/global-saddr-load.ll
    M llvm/test/CodeGen/AMDGPU/global-saddr-store.ll
    M llvm/test/CodeGen/AMDGPU/global_atomics_i64.ll
    M llvm/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.cond.sub.ll
    M llvm/test/CodeGen/AMDGPU/llvm.amdgcn.global.atomic.ordered.add.b64.ll
    M llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.a16.dim.ll
    M llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.a16.encode.ll
    M llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.atomic.flt.ll
    M llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.dim.ll
    M llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.a16.dim.ll
    M llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.getlod.dim.ll
    M llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.msaa.load.ll
    M llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.a16.dim.ll
    M llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.d16.dim.ll
    M llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.dim.ll
    M llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.g16.encode.ll
    M llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.g16.ll
    M llvm/test/CodeGen/AMDGPU/llvm.amdgcn.lds.direct.load.ll
    M llvm/test/CodeGen/AMDGPU/llvm.amdgcn.lds.param.load.ll
    M llvm/test/CodeGen/AMDGPU/llvm.amdgcn.permlane.ll
    M llvm/test/CodeGen/AMDGPU/llvm.amdgcn.permlane16.var.ll
    M llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.buffer.load.ll
    M llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.tbuffer.load.d16.ll
    M llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.tbuffer.load.ll
    M llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.tbuffer.store.d16.ll
    M llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.barrier.ll
    M llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.barrier.wait.ll
    M llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.buffer.load.ll
    M llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.sleep.var.ll
    M llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.load.format.ll
    M llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.load.format.v3f16.ll
    M llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.tbuffer.load.d16.ll
    M llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.tbuffer.load.ll
    M llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.tbuffer.store.d16.ll
    M llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.tbuffer.store.ll
    M llvm/test/CodeGen/AMDGPU/llvm.mulo.ll
    M llvm/test/CodeGen/AMDGPU/load-constant-f32.ll
    M llvm/test/CodeGen/AMDGPU/load-constant-f64.ll
    M llvm/test/CodeGen/AMDGPU/load-constant-i1.ll
    M llvm/test/CodeGen/AMDGPU/load-constant-i16.ll
    M llvm/test/CodeGen/AMDGPU/load-constant-i32.ll
    M llvm/test/CodeGen/AMDGPU/load-constant-i64.ll
    M llvm/test/CodeGen/AMDGPU/load-constant-i8.ll
    M llvm/test/CodeGen/AMDGPU/loop-prefetch-data.ll
    M llvm/test/CodeGen/AMDGPU/mad_64_32.ll
    M llvm/test/CodeGen/AMDGPU/memory-legalizer-private-nontemporal.ll
    M llvm/test/CodeGen/AMDGPU/memory-legalizer-private-volatile.ll
    M llvm/test/CodeGen/AMDGPU/mul.ll
    M llvm/test/CodeGen/AMDGPU/no-dup-inst-prefetch.ll
    M llvm/test/CodeGen/AMDGPU/offset-split-flat.ll
    M llvm/test/CodeGen/AMDGPU/offset-split-global.ll
    M llvm/test/CodeGen/AMDGPU/readcyclecounter.ll
    M llvm/test/CodeGen/AMDGPU/s-getpc-b64-remat.ll
    M llvm/test/CodeGen/AMDGPU/sub.ll
    M llvm/test/CodeGen/AMDGPU/vgpr-mark-last-scratch-load.ll
    M llvm/test/CodeGen/AMDGPU/waitcnt-global-inv-wb.mir

  Log Message:
  -----------
  [AMDGPU] CodeGen for GFX12 S_WAIT_* instructions (#77438)

Update SIMemoryLegalizer and SIInsertWaitcnts to use separate wait
instructions per counter (e.g. S_WAIT_LOADCNT) and split VMCNT into
separate LOADCNT, SAMPLECNT and BVHCNT counters.




More information about the All-commits mailing list