[all-commits] [llvm/llvm-project] 75b0e1: [AMDGPU] Enable multi-group xnack replay in hardwa...
Christudasan Devadasan via All-commits
all-commits at lists.llvm.org
Fri Nov 21 00:11:25 PST 2025
Branch: refs/heads/users/cdevadas/enable-multi-group-xnack-replay
Home: https://github.com/llvm/llvm-project
Commit: 75b0e1272da74f528142f46e69c70541c6d66d29
https://github.com/llvm/llvm-project/commit/75b0e1272da74f528142f46e69c70541c6d66d29
Author: Christudasan Devadasan <Christudasan.Devadasan at amd.com>
Date: 2025-11-21 (Fri, 21 Nov 2025)
Changed paths:
M llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
M llvm/test/CodeGen/AMDGPU/GlobalISel/fp64-atomics-gfx90a.ll
M llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.abs.ll
M llvm/test/CodeGen/AMDGPU/GlobalISel/load-constant.96.ll
M llvm/test/CodeGen/AMDGPU/GlobalISel/minmaxabs-i64.ll
M llvm/test/CodeGen/AMDGPU/GlobalISel/mul.ll
M llvm/test/CodeGen/AMDGPU/add-max.ll
M llvm/test/CodeGen/AMDGPU/add_u64.ll
M llvm/test/CodeGen/AMDGPU/addrspacecast-gas.ll
M llvm/test/CodeGen/AMDGPU/bf16-conversions.ll
M llvm/test/CodeGen/AMDGPU/bf16-math.ll
M llvm/test/CodeGen/AMDGPU/bf16.ll
M llvm/test/CodeGen/AMDGPU/bitop3.ll
M llvm/test/CodeGen/AMDGPU/branch-relaxation-gfx1250.ll
M llvm/test/CodeGen/AMDGPU/calling-conventions.ll
M llvm/test/CodeGen/AMDGPU/carryout-selection.ll
M llvm/test/CodeGen/AMDGPU/code-size-estimate.ll
M llvm/test/CodeGen/AMDGPU/ds_read2-gfx1250.ll
M llvm/test/CodeGen/AMDGPU/ds_write2.ll
M llvm/test/CodeGen/AMDGPU/fcanonicalize.bf16.ll
M llvm/test/CodeGen/AMDGPU/flat-load-saddr-to-vaddr.ll
M llvm/test/CodeGen/AMDGPU/flat-saddr-atomics.ll
M llvm/test/CodeGen/AMDGPU/flat-saddr-load.ll
M llvm/test/CodeGen/AMDGPU/flat-saddr-store.ll
M llvm/test/CodeGen/AMDGPU/fmax3.ll
M llvm/test/CodeGen/AMDGPU/fmin3.ll
M llvm/test/CodeGen/AMDGPU/fp-atomics-gfx942.ll
M llvm/test/CodeGen/AMDGPU/fp64-atomics-gfx90a.ll
M llvm/test/CodeGen/AMDGPU/fptrunc.f16.ll
M llvm/test/CodeGen/AMDGPU/global-address.ll
M llvm/test/CodeGen/AMDGPU/global-load-xcnt.ll
M llvm/test/CodeGen/AMDGPU/insert_vector_elt.v2bf16.ll
M llvm/test/CodeGen/AMDGPU/literal64.ll
M llvm/test/CodeGen/AMDGPU/llvm.amdgcn.bitop3.ll
M llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cluster.id.ll
M llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cluster.load.async.to.lds.ll
M llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cluster.load.ll
M llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cluster.workgroup.id.ll
M llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cluster.workgroup.max.flat.id.ll
M llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cluster.workgroup.max.id.ll
M llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.f16.fp8.ll
M llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.dpp.ll
M llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.e5m3.ll
M llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.f16.ll
M llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.pk.f16.ll
M llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.sat.pk.ll
M llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.scale.pk.ll
M llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.scalef32.pk16.gfx1250.ll
M llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.scalef32.pk8.ll
M llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.scalef32.sr.pk.gfx1250.ll
M llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.scalef32.sr.pk16.ll
M llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.sr.pk.bf16.ll
M llvm/test/CodeGen/AMDGPU/llvm.amdgcn.flat.prefetch.ll
M llvm/test/CodeGen/AMDGPU/llvm.amdgcn.global.load.async.to.lds.ll
M llvm/test/CodeGen/AMDGPU/llvm.amdgcn.global.prefetch.ll
M llvm/test/CodeGen/AMDGPU/llvm.amdgcn.global.store.async.from.lds.ll
M llvm/test/CodeGen/AMDGPU/llvm.amdgcn.is.private.ll
M llvm/test/CodeGen/AMDGPU/llvm.amdgcn.is.shared.ll
M llvm/test/CodeGen/AMDGPU/llvm.amdgcn.load.monitor.gfx1250.ll
M llvm/test/CodeGen/AMDGPU/llvm.amdgcn.load.tr.gfx1250.w32.ll
M llvm/test/CodeGen/AMDGPU/llvm.amdgcn.permlane.gfx1250.ll
M llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.atomic.buffer.load.ll
M llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.buffer.load.tfe.ll
M llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.buffer.store.ll
M llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.atomic.buffer.load.ll
M llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.buffer.store.bf16.ll
M llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rcp.bf16.ll
M llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rsq.bf16.ll
M llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.wait.gfx1250.ll
M llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sendmsg.rtn.ll
M llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.atomic.buffer.load.ll
M llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.load.tfe.ll
M llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.store.ll
M llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.atomic.buffer.load.ll
M llvm/test/CodeGen/AMDGPU/llvm.amdgcn.tanh.ll
M llvm/test/CodeGen/AMDGPU/llvm.amdgcn.tensor.load.store.ll
M llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wave.id.ll
M llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wmma.gfx1250.w32.ll
M llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wmma.imm.gfx1250.w32.ll
M llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wmma.imod.gfx1250.w32.ll
M llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wmma.index.gfx1250.w32.ll
M llvm/test/CodeGen/AMDGPU/llvm.cos.bf16.ll
M llvm/test/CodeGen/AMDGPU/llvm.prefetch.ll
M llvm/test/CodeGen/AMDGPU/llvm.sin.bf16.ll
M llvm/test/CodeGen/AMDGPU/llvm.sqrt.bf16.ll
M llvm/test/CodeGen/AMDGPU/load-constant-i1.ll
M llvm/test/CodeGen/AMDGPU/load-constant-i32.ll
M llvm/test/CodeGen/AMDGPU/loop-prefetch-data.ll
M llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics.ll
M llvm/test/CodeGen/AMDGPU/mad-mix-bf16.ll
M llvm/test/CodeGen/AMDGPU/mad_64_32.ll
M llvm/test/CodeGen/AMDGPU/mad_u64_u32.ll
M llvm/test/CodeGen/AMDGPU/max.ll
M llvm/test/CodeGen/AMDGPU/memory-legalizer-barriers.ll
M llvm/test/CodeGen/AMDGPU/memory-legalizer-fence-mmra-global.ll
M llvm/test/CodeGen/AMDGPU/memory-legalizer-fence-mmra-local.ll
M llvm/test/CodeGen/AMDGPU/memory-legalizer-fence.ll
M llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-agent.ll
M llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-cluster.ll
M llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-lastuse.ll
M llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-nontemporal.ll
M llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-singlethread.ll
M llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-system.ll
M llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-volatile.ll
M llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-wavefront.ll
M llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-workgroup.ll
M llvm/test/CodeGen/AMDGPU/memory-legalizer-global-agent.ll
M llvm/test/CodeGen/AMDGPU/memory-legalizer-global-cluster.ll
M llvm/test/CodeGen/AMDGPU/memory-legalizer-global-lastuse.ll
M llvm/test/CodeGen/AMDGPU/memory-legalizer-global-nontemporal.ll
M llvm/test/CodeGen/AMDGPU/memory-legalizer-global-singlethread.ll
M llvm/test/CodeGen/AMDGPU/memory-legalizer-global-system.ll
M llvm/test/CodeGen/AMDGPU/memory-legalizer-global-volatile.ll
M llvm/test/CodeGen/AMDGPU/memory-legalizer-global-wavefront.ll
M llvm/test/CodeGen/AMDGPU/memory-legalizer-global-workgroup.ll
M llvm/test/CodeGen/AMDGPU/memory-legalizer-local-agent.ll
M llvm/test/CodeGen/AMDGPU/memory-legalizer-local-cluster.ll
M llvm/test/CodeGen/AMDGPU/memory-legalizer-local-nontemporal.ll
M llvm/test/CodeGen/AMDGPU/memory-legalizer-local-singlethread.ll
M llvm/test/CodeGen/AMDGPU/memory-legalizer-local-system.ll
M llvm/test/CodeGen/AMDGPU/memory-legalizer-local-volatile.ll
M llvm/test/CodeGen/AMDGPU/memory-legalizer-local-wavefront.ll
M llvm/test/CodeGen/AMDGPU/memory-legalizer-local-workgroup.ll
M llvm/test/CodeGen/AMDGPU/memory-legalizer-private-agent.ll
M llvm/test/CodeGen/AMDGPU/memory-legalizer-private-cluster.ll
M llvm/test/CodeGen/AMDGPU/memory-legalizer-private-lastuse.ll
M llvm/test/CodeGen/AMDGPU/memory-legalizer-private-nontemporal.ll
M llvm/test/CodeGen/AMDGPU/memory-legalizer-private-singlethread.ll
M llvm/test/CodeGen/AMDGPU/memory-legalizer-private-system.ll
M llvm/test/CodeGen/AMDGPU/memory-legalizer-private-volatile.ll
M llvm/test/CodeGen/AMDGPU/memory-legalizer-private-wavefront.ll
M llvm/test/CodeGen/AMDGPU/memory-legalizer-private-workgroup.ll
M llvm/test/CodeGen/AMDGPU/min.ll
M llvm/test/CodeGen/AMDGPU/minmax.ll
M llvm/test/CodeGen/AMDGPU/mul.ll
M llvm/test/CodeGen/AMDGPU/packed-fp32.ll
M llvm/test/CodeGen/AMDGPU/preload-implicit-kernargs.ll
M llvm/test/CodeGen/AMDGPU/preload-kernargs.ll
M llvm/test/CodeGen/AMDGPU/reassoc-mul-add-1-to-mad.ll
M llvm/test/CodeGen/AMDGPU/s-cluster-barrier.ll
M llvm/test/CodeGen/AMDGPU/scale-offset-flat.ll
M llvm/test/CodeGen/AMDGPU/scale-offset-global.ll
M llvm/test/CodeGen/AMDGPU/scale-offset-scratch.ll
M llvm/test/CodeGen/AMDGPU/scale-offset-smem.ll
M llvm/test/CodeGen/AMDGPU/sub_u64.ll
M llvm/test/CodeGen/AMDGPU/v_ashr_pk.ll
M llvm/test/CodeGen/AMDGPU/wait-before-stores-with-scope_sys.ll
M llvm/test/CodeGen/AMDGPU/whole-wave-functions.ll
M llvm/test/CodeGen/AMDGPU/workgroup-id-in-arch-sgprs.ll
Log Message:
-----------
[AMDGPU] Enable multi-group xnack replay in hardware (GFX1250)
This patch enables the multi-group xnack replay mode by
configuring the hardware MODE register at kernel entry.
This aligns the hardware behavior with the compiler's
existing multi-group s_wait_xcnt insertion logic.
To unsubscribe from these emails, change your notification settings at https://github.com/llvm/llvm-project/settings/notifications
More information about the All-commits
mailing list