[all-commits] [llvm/llvm-project] 75b0e1: [AMDGPU] Enable multi-group xnack replay in hardwa...

Christudasan Devadasan via All-commits all-commits at lists.llvm.org
Fri Nov 21 00:11:25 PST 2025


  Branch: refs/heads/users/cdevadas/enable-multi-group-xnack-replay
  Home:   https://github.com/llvm/llvm-project
  Commit: 75b0e1272da74f528142f46e69c70541c6d66d29
      https://github.com/llvm/llvm-project/commit/75b0e1272da74f528142f46e69c70541c6d66d29
  Author: Christudasan Devadasan <Christudasan.Devadasan at amd.com>
  Date:   2025-11-21 (Fri, 21 Nov 2025)

  Changed paths:
    M llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
    M llvm/test/CodeGen/AMDGPU/GlobalISel/fp64-atomics-gfx90a.ll
    M llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.abs.ll
    M llvm/test/CodeGen/AMDGPU/GlobalISel/load-constant.96.ll
    M llvm/test/CodeGen/AMDGPU/GlobalISel/minmaxabs-i64.ll
    M llvm/test/CodeGen/AMDGPU/GlobalISel/mul.ll
    M llvm/test/CodeGen/AMDGPU/add-max.ll
    M llvm/test/CodeGen/AMDGPU/add_u64.ll
    M llvm/test/CodeGen/AMDGPU/addrspacecast-gas.ll
    M llvm/test/CodeGen/AMDGPU/bf16-conversions.ll
    M llvm/test/CodeGen/AMDGPU/bf16-math.ll
    M llvm/test/CodeGen/AMDGPU/bf16.ll
    M llvm/test/CodeGen/AMDGPU/bitop3.ll
    M llvm/test/CodeGen/AMDGPU/branch-relaxation-gfx1250.ll
    M llvm/test/CodeGen/AMDGPU/calling-conventions.ll
    M llvm/test/CodeGen/AMDGPU/carryout-selection.ll
    M llvm/test/CodeGen/AMDGPU/code-size-estimate.ll
    M llvm/test/CodeGen/AMDGPU/ds_read2-gfx1250.ll
    M llvm/test/CodeGen/AMDGPU/ds_write2.ll
    M llvm/test/CodeGen/AMDGPU/fcanonicalize.bf16.ll
    M llvm/test/CodeGen/AMDGPU/flat-load-saddr-to-vaddr.ll
    M llvm/test/CodeGen/AMDGPU/flat-saddr-atomics.ll
    M llvm/test/CodeGen/AMDGPU/flat-saddr-load.ll
    M llvm/test/CodeGen/AMDGPU/flat-saddr-store.ll
    M llvm/test/CodeGen/AMDGPU/fmax3.ll
    M llvm/test/CodeGen/AMDGPU/fmin3.ll
    M llvm/test/CodeGen/AMDGPU/fp-atomics-gfx942.ll
    M llvm/test/CodeGen/AMDGPU/fp64-atomics-gfx90a.ll
    M llvm/test/CodeGen/AMDGPU/fptrunc.f16.ll
    M llvm/test/CodeGen/AMDGPU/global-address.ll
    M llvm/test/CodeGen/AMDGPU/global-load-xcnt.ll
    M llvm/test/CodeGen/AMDGPU/insert_vector_elt.v2bf16.ll
    M llvm/test/CodeGen/AMDGPU/literal64.ll
    M llvm/test/CodeGen/AMDGPU/llvm.amdgcn.bitop3.ll
    M llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cluster.id.ll
    M llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cluster.load.async.to.lds.ll
    M llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cluster.load.ll
    M llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cluster.workgroup.id.ll
    M llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cluster.workgroup.max.flat.id.ll
    M llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cluster.workgroup.max.id.ll
    M llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.f16.fp8.ll
    M llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.dpp.ll
    M llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.e5m3.ll
    M llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.f16.ll
    M llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.pk.f16.ll
    M llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.sat.pk.ll
    M llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.scale.pk.ll
    M llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.scalef32.pk16.gfx1250.ll
    M llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.scalef32.pk8.ll
    M llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.scalef32.sr.pk.gfx1250.ll
    M llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.scalef32.sr.pk16.ll
    M llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.sr.pk.bf16.ll
    M llvm/test/CodeGen/AMDGPU/llvm.amdgcn.flat.prefetch.ll
    M llvm/test/CodeGen/AMDGPU/llvm.amdgcn.global.load.async.to.lds.ll
    M llvm/test/CodeGen/AMDGPU/llvm.amdgcn.global.prefetch.ll
    M llvm/test/CodeGen/AMDGPU/llvm.amdgcn.global.store.async.from.lds.ll
    M llvm/test/CodeGen/AMDGPU/llvm.amdgcn.is.private.ll
    M llvm/test/CodeGen/AMDGPU/llvm.amdgcn.is.shared.ll
    M llvm/test/CodeGen/AMDGPU/llvm.amdgcn.load.monitor.gfx1250.ll
    M llvm/test/CodeGen/AMDGPU/llvm.amdgcn.load.tr.gfx1250.w32.ll
    M llvm/test/CodeGen/AMDGPU/llvm.amdgcn.permlane.gfx1250.ll
    M llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.atomic.buffer.load.ll
    M llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.buffer.load.tfe.ll
    M llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.buffer.store.ll
    M llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.atomic.buffer.load.ll
    M llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.buffer.store.bf16.ll
    M llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rcp.bf16.ll
    M llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rsq.bf16.ll
    M llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.wait.gfx1250.ll
    M llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sendmsg.rtn.ll
    M llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.atomic.buffer.load.ll
    M llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.load.tfe.ll
    M llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.store.ll
    M llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.atomic.buffer.load.ll
    M llvm/test/CodeGen/AMDGPU/llvm.amdgcn.tanh.ll
    M llvm/test/CodeGen/AMDGPU/llvm.amdgcn.tensor.load.store.ll
    M llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wave.id.ll
    M llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wmma.gfx1250.w32.ll
    M llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wmma.imm.gfx1250.w32.ll
    M llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wmma.imod.gfx1250.w32.ll
    M llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wmma.index.gfx1250.w32.ll
    M llvm/test/CodeGen/AMDGPU/llvm.cos.bf16.ll
    M llvm/test/CodeGen/AMDGPU/llvm.prefetch.ll
    M llvm/test/CodeGen/AMDGPU/llvm.sin.bf16.ll
    M llvm/test/CodeGen/AMDGPU/llvm.sqrt.bf16.ll
    M llvm/test/CodeGen/AMDGPU/load-constant-i1.ll
    M llvm/test/CodeGen/AMDGPU/load-constant-i32.ll
    M llvm/test/CodeGen/AMDGPU/loop-prefetch-data.ll
    M llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics.ll
    M llvm/test/CodeGen/AMDGPU/mad-mix-bf16.ll
    M llvm/test/CodeGen/AMDGPU/mad_64_32.ll
    M llvm/test/CodeGen/AMDGPU/mad_u64_u32.ll
    M llvm/test/CodeGen/AMDGPU/max.ll
    M llvm/test/CodeGen/AMDGPU/memory-legalizer-barriers.ll
    M llvm/test/CodeGen/AMDGPU/memory-legalizer-fence-mmra-global.ll
    M llvm/test/CodeGen/AMDGPU/memory-legalizer-fence-mmra-local.ll
    M llvm/test/CodeGen/AMDGPU/memory-legalizer-fence.ll
    M llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-agent.ll
    M llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-cluster.ll
    M llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-lastuse.ll
    M llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-nontemporal.ll
    M llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-singlethread.ll
    M llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-system.ll
    M llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-volatile.ll
    M llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-wavefront.ll
    M llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-workgroup.ll
    M llvm/test/CodeGen/AMDGPU/memory-legalizer-global-agent.ll
    M llvm/test/CodeGen/AMDGPU/memory-legalizer-global-cluster.ll
    M llvm/test/CodeGen/AMDGPU/memory-legalizer-global-lastuse.ll
    M llvm/test/CodeGen/AMDGPU/memory-legalizer-global-nontemporal.ll
    M llvm/test/CodeGen/AMDGPU/memory-legalizer-global-singlethread.ll
    M llvm/test/CodeGen/AMDGPU/memory-legalizer-global-system.ll
    M llvm/test/CodeGen/AMDGPU/memory-legalizer-global-volatile.ll
    M llvm/test/CodeGen/AMDGPU/memory-legalizer-global-wavefront.ll
    M llvm/test/CodeGen/AMDGPU/memory-legalizer-global-workgroup.ll
    M llvm/test/CodeGen/AMDGPU/memory-legalizer-local-agent.ll
    M llvm/test/CodeGen/AMDGPU/memory-legalizer-local-cluster.ll
    M llvm/test/CodeGen/AMDGPU/memory-legalizer-local-nontemporal.ll
    M llvm/test/CodeGen/AMDGPU/memory-legalizer-local-singlethread.ll
    M llvm/test/CodeGen/AMDGPU/memory-legalizer-local-system.ll
    M llvm/test/CodeGen/AMDGPU/memory-legalizer-local-volatile.ll
    M llvm/test/CodeGen/AMDGPU/memory-legalizer-local-wavefront.ll
    M llvm/test/CodeGen/AMDGPU/memory-legalizer-local-workgroup.ll
    M llvm/test/CodeGen/AMDGPU/memory-legalizer-private-agent.ll
    M llvm/test/CodeGen/AMDGPU/memory-legalizer-private-cluster.ll
    M llvm/test/CodeGen/AMDGPU/memory-legalizer-private-lastuse.ll
    M llvm/test/CodeGen/AMDGPU/memory-legalizer-private-nontemporal.ll
    M llvm/test/CodeGen/AMDGPU/memory-legalizer-private-singlethread.ll
    M llvm/test/CodeGen/AMDGPU/memory-legalizer-private-system.ll
    M llvm/test/CodeGen/AMDGPU/memory-legalizer-private-volatile.ll
    M llvm/test/CodeGen/AMDGPU/memory-legalizer-private-wavefront.ll
    M llvm/test/CodeGen/AMDGPU/memory-legalizer-private-workgroup.ll
    M llvm/test/CodeGen/AMDGPU/min.ll
    M llvm/test/CodeGen/AMDGPU/minmax.ll
    M llvm/test/CodeGen/AMDGPU/mul.ll
    M llvm/test/CodeGen/AMDGPU/packed-fp32.ll
    M llvm/test/CodeGen/AMDGPU/preload-implicit-kernargs.ll
    M llvm/test/CodeGen/AMDGPU/preload-kernargs.ll
    M llvm/test/CodeGen/AMDGPU/reassoc-mul-add-1-to-mad.ll
    M llvm/test/CodeGen/AMDGPU/s-cluster-barrier.ll
    M llvm/test/CodeGen/AMDGPU/scale-offset-flat.ll
    M llvm/test/CodeGen/AMDGPU/scale-offset-global.ll
    M llvm/test/CodeGen/AMDGPU/scale-offset-scratch.ll
    M llvm/test/CodeGen/AMDGPU/scale-offset-smem.ll
    M llvm/test/CodeGen/AMDGPU/sub_u64.ll
    M llvm/test/CodeGen/AMDGPU/v_ashr_pk.ll
    M llvm/test/CodeGen/AMDGPU/wait-before-stores-with-scope_sys.ll
    M llvm/test/CodeGen/AMDGPU/whole-wave-functions.ll
    M llvm/test/CodeGen/AMDGPU/workgroup-id-in-arch-sgprs.ll

  Log Message:
  -----------
  [AMDGPU] Enable multi-group xnack replay in hardware (GFX1250)

This patch enables the multi-group xnack replay mode by
configuring the hardware MODE register at kernel entry.
This aligns the hardware behavior with the compiler's
existing multi-group s_wait_xcnt insertion logic.



To unsubscribe from these emails, change your notification settings at https://github.com/llvm/llvm-project/settings/notifications


More information about the All-commits mailing list