[llvm] 1c49ce6 - [AMDGPU] Enable FWD_PROGRESS bit for GFX10+ on PAL (#139895)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Jul 21 09:29:09 PDT 2025
Author: Jay Foad
Date: 2025-07-21T17:29:06+01:00
New Revision: 1c49ce676caa161250624714c3698b87dc2f8628
URL: https://github.com/llvm/llvm-project/commit/1c49ce676caa161250624714c3698b87dc2f8628
DIFF: https://github.com/llvm/llvm-project/commit/1c49ce676caa161250624714c3698b87dc2f8628.diff
LOG: [AMDGPU] Enable FWD_PROGRESS bit for GFX10+ on PAL (#139895)
Performance testing shows no significant gains or losses on graphics
workloads, so this is mostly to make the behavior consistent across all
supported OSes instead of special-casing HSA.
Added:
Modified:
llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
llvm/lib/Target/AMDGPU/SIProgramInfo.cpp
llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.ll
llvm/test/CodeGen/AMDGPU/amdpal-msgpack-ieee.ll
llvm/test/CodeGen/AMDGPU/pal-metadata-3.0-callable-dvgpr.ll
llvm/test/CodeGen/AMDGPU/pal-metadata-3.0-callable.ll
llvm/test/CodeGen/AMDGPU/pal-metadata-3.0-dvgpr.ll
llvm/test/CodeGen/AMDGPU/pal-metadata-3.0.ll
llvm/test/CodeGen/AMDGPU/pal-metadata-3.6.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
index 749b9efc81378..4b3dc371c65f0 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
@@ -1415,6 +1415,7 @@ static void EmitPALMetadataCommon(AMDGPUPALMetadata *MD,
MD->setHwStage(CC, ".wgp_mode", (bool)CurrentProgramInfo.WgpMode);
MD->setHwStage(CC, ".mem_ordered", (bool)CurrentProgramInfo.MemOrdered);
+ MD->setHwStage(CC, ".forward_progress", (bool)CurrentProgramInfo.FwdProgress);
if (AMDGPU::isCompute(CC)) {
MD->setHwStage(CC, ".trap_present",
diff --git a/llvm/lib/Target/AMDGPU/SIProgramInfo.cpp b/llvm/lib/Target/AMDGPU/SIProgramInfo.cpp
index 7093fe6405abb..5940f45e74bf2 100644
--- a/llvm/lib/Target/AMDGPU/SIProgramInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIProgramInfo.cpp
@@ -85,7 +85,8 @@ static uint64_t getComputePGMRSrc1Reg(const SIProgramInfo &ProgInfo,
S_00B848_PRIV(ProgInfo.Priv) |
S_00B848_DEBUG_MODE(ProgInfo.DebugMode) |
S_00B848_WGP_MODE(ProgInfo.WgpMode) |
- S_00B848_MEM_ORDERED(ProgInfo.MemOrdered);
+ S_00B848_MEM_ORDERED(ProgInfo.MemOrdered) |
+ S_00B848_FWD_PROGRESS(ProgInfo.FwdProgress);
if (ST.hasDX10ClampMode())
Reg |= S_00B848_DX10_CLAMP(ProgInfo.DX10Clamp);
@@ -93,10 +94,6 @@ static uint64_t getComputePGMRSrc1Reg(const SIProgramInfo &ProgInfo,
if (ST.hasIEEEMode())
Reg |= S_00B848_IEEE_MODE(ProgInfo.IEEEMode);
- // TODO: in the long run we will want to enable this unconditionally.
- if (ST.getTargetTriple().getOS() == Triple::OSType::AMDHSA)
- Reg |= S_00B848_FWD_PROGRESS(ProgInfo.FwdProgress);
-
if (ST.hasRrWGMode())
Reg |= S_00B848_RR_WG_MODE(ProgInfo.RrWgMode);
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.ll
index 9b35920f8547a..fa4676e4befe4 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.ll
@@ -3211,7 +3211,7 @@ define amdgpu_kernel void @dyn_extract_v5f64_s_s(ptr addrspace(1) %out, i32 %sel
; GFX10-NEXT: enable_ieee_mode = 1
; GFX10-NEXT: enable_wgp_mode = 1
; GFX10-NEXT: enable_mem_ordered = 1
-; GFX10-NEXT: enable_fwd_progress = 0
+; GFX10-NEXT: enable_fwd_progress = 1
; GFX10-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0
; GFX10-NEXT: user_sgpr_count = 14
; GFX10-NEXT: enable_trap_handler = 0
@@ -3303,7 +3303,7 @@ define amdgpu_kernel void @dyn_extract_v5f64_s_s(ptr addrspace(1) %out, i32 %sel
; GFX11-NEXT: enable_ieee_mode = 1
; GFX11-NEXT: enable_wgp_mode = 1
; GFX11-NEXT: enable_mem_ordered = 1
-; GFX11-NEXT: enable_fwd_progress = 0
+; GFX11-NEXT: enable_fwd_progress = 1
; GFX11-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0
; GFX11-NEXT: user_sgpr_count = 13
; GFX11-NEXT: enable_trap_handler = 0
@@ -4215,7 +4215,7 @@ define amdgpu_kernel void @dyn_extract_v4f32_s_s_s(ptr addrspace(1) %out, i32 %s
; GFX10-NEXT: enable_ieee_mode = 1
; GFX10-NEXT: enable_wgp_mode = 1
; GFX10-NEXT: enable_mem_ordered = 1
-; GFX10-NEXT: enable_fwd_progress = 0
+; GFX10-NEXT: enable_fwd_progress = 1
; GFX10-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0
; GFX10-NEXT: user_sgpr_count = 14
; GFX10-NEXT: enable_trap_handler = 0
@@ -4300,7 +4300,7 @@ define amdgpu_kernel void @dyn_extract_v4f32_s_s_s(ptr addrspace(1) %out, i32 %s
; GFX11-NEXT: enable_ieee_mode = 1
; GFX11-NEXT: enable_wgp_mode = 1
; GFX11-NEXT: enable_mem_ordered = 1
-; GFX11-NEXT: enable_fwd_progress = 0
+; GFX11-NEXT: enable_fwd_progress = 1
; GFX11-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0
; GFX11-NEXT: user_sgpr_count = 13
; GFX11-NEXT: enable_trap_handler = 0
@@ -4569,7 +4569,7 @@ define amdgpu_kernel void @dyn_extract_v4f64_s_s_s(ptr addrspace(1) %out, i32 %s
; GFX10-NEXT: enable_ieee_mode = 1
; GFX10-NEXT: enable_wgp_mode = 1
; GFX10-NEXT: enable_mem_ordered = 1
-; GFX10-NEXT: enable_fwd_progress = 0
+; GFX10-NEXT: enable_fwd_progress = 1
; GFX10-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0
; GFX10-NEXT: user_sgpr_count = 14
; GFX10-NEXT: enable_trap_handler = 0
@@ -4657,7 +4657,7 @@ define amdgpu_kernel void @dyn_extract_v4f64_s_s_s(ptr addrspace(1) %out, i32 %s
; GFX11-NEXT: enable_ieee_mode = 1
; GFX11-NEXT: enable_wgp_mode = 1
; GFX11-NEXT: enable_mem_ordered = 1
-; GFX11-NEXT: enable_fwd_progress = 0
+; GFX11-NEXT: enable_fwd_progress = 1
; GFX11-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0
; GFX11-NEXT: user_sgpr_count = 13
; GFX11-NEXT: enable_trap_handler = 0
diff --git a/llvm/test/CodeGen/AMDGPU/amdpal-msgpack-ieee.ll b/llvm/test/CodeGen/AMDGPU/amdpal-msgpack-ieee.ll
index d4826a22db795..6044f6e354ee0 100644
--- a/llvm/test/CodeGen/AMDGPU/amdpal-msgpack-ieee.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdpal-msgpack-ieee.ll
@@ -7,7 +7,7 @@
; SI-DAG: '0x2e12 (COMPUTE_PGM_RSRC1)': 0xaf0000{{$}}
; VI-DAG: '0x2e12 (COMPUTE_PGM_RSRC1)': 0xaf02c0{{$}}
; GFX9-DAG: '0x2e12 (COMPUTE_PGM_RSRC1)': 0xaf0000{{$}}
-; GFX12-DAG: '0x2e12 (COMPUTE_PGM_RSRC1)': 0x600f0000{{$}}
+; GFX12-DAG: '0x2e12 (COMPUTE_PGM_RSRC1)': 0xe00f0000{{$}}
define amdgpu_cs half @cs_amdpal(half %arg0) #0 {
%add = fadd half %arg0, 1.0
ret half %add
diff --git a/llvm/test/CodeGen/AMDGPU/pal-metadata-3.0-callable-dvgpr.ll b/llvm/test/CodeGen/AMDGPU/pal-metadata-3.0-callable-dvgpr.ll
index ae35d0dcb88f3..e6bc733775b17 100644
--- a/llvm/test/CodeGen/AMDGPU/pal-metadata-3.0-callable-dvgpr.ll
+++ b/llvm/test/CodeGen/AMDGPU/pal-metadata-3.0-callable-dvgpr.ll
@@ -17,6 +17,7 @@
; CHECK-NEXT: .debug_mode: 0
; CHECK-NEXT: .excp_en: 0
; CHECK-NEXT: .float_mode: 0xc0
+; CHECK-NEXT: .forward_progress: true
; CHECK-NEXT: .image_op: false
; CHECK-NEXT: .lds_size: 0x200
; CHECK-NEXT: .mem_ordered: true
diff --git a/llvm/test/CodeGen/AMDGPU/pal-metadata-3.0-callable.ll b/llvm/test/CodeGen/AMDGPU/pal-metadata-3.0-callable.ll
index 638dc8965987e..310040d44bc34 100644
--- a/llvm/test/CodeGen/AMDGPU/pal-metadata-3.0-callable.ll
+++ b/llvm/test/CodeGen/AMDGPU/pal-metadata-3.0-callable.ll
@@ -19,6 +19,7 @@
; CHECK-NEXT: .debug_mode: 0
; CHECK-NEXT: .excp_en: 0
; CHECK-NEXT: .float_mode: 0xc0
+; CHECK-NEXT: .forward_progress: true
; GFX11-NEXT: .ieee_mode: true
; CHECK-NEXT: .image_op: false
; CHECK-NEXT: .lds_size: 0x200
diff --git a/llvm/test/CodeGen/AMDGPU/pal-metadata-3.0-dvgpr.ll b/llvm/test/CodeGen/AMDGPU/pal-metadata-3.0-dvgpr.ll
index fb6ac2e8833be..c1846c0f2c23b 100644
--- a/llvm/test/CodeGen/AMDGPU/pal-metadata-3.0-dvgpr.ll
+++ b/llvm/test/CodeGen/AMDGPU/pal-metadata-3.0-dvgpr.ll
@@ -59,6 +59,7 @@
; CHECK-NEXT: .entry_point_symbol: _amdgpu_cs_main
; CHECK-NEXT: .excp_en: 0
; CHECK-NEXT: .float_mode: 0xc0
+; CHECK-NEXT: .forward_progress: true
; CHECK-NEXT: .image_op: false
; CHECK-NEXT: .lds_size: 0
; CHECK-NEXT: .mem_ordered: true
@@ -113,6 +114,7 @@
; CHECK-NEXT: .debug_mode: false
; CHECK-NEXT: .entry_point: _amdgpu_gs
; CHECK-NEXT: .entry_point_symbol: gs_shader
+; CHECK-NEXT: .forward_progress: true
; CHECK-NEXT: .lds_size: 0x200
; CHECK-NEXT: .mem_ordered: true
; CHECK-NEXT: .scratch_en: false
@@ -124,6 +126,7 @@
; CHECK-NEXT: .debug_mode: false
; CHECK-NEXT: .entry_point: _amdgpu_hs
; CHECK-NEXT: .entry_point_symbol: hs_shader
+; CHECK-NEXT: .forward_progress: true
; CHECK-NEXT: .lds_size: 0x1000
; CHECK-NEXT: .mem_ordered: true
; CHECK-NEXT: .scratch_en: false
@@ -135,6 +138,7 @@
; CHECK-NEXT: .debug_mode: false
; CHECK-NEXT: .entry_point: _amdgpu_ps
; CHECK-NEXT: .entry_point_symbol: ps_shader
+; CHECK-NEXT: .forward_progress: true
; CHECK-NEXT: .lds_size: 0
; CHECK-NEXT: .mem_ordered: true
; CHECK-NEXT: .scratch_en: false
diff --git a/llvm/test/CodeGen/AMDGPU/pal-metadata-3.0.ll b/llvm/test/CodeGen/AMDGPU/pal-metadata-3.0.ll
index 15778c8861e83..5c0c366277829 100644
--- a/llvm/test/CodeGen/AMDGPU/pal-metadata-3.0.ll
+++ b/llvm/test/CodeGen/AMDGPU/pal-metadata-3.0.ll
@@ -62,6 +62,7 @@
; CHECK-NEXT: .entry_point_symbol: _amdgpu_cs_main
; CHECK-NEXT: .excp_en: 0
; CHECK-NEXT: .float_mode: 0xc0
+; CHECK-NEXT: .forward_progress: true
; GFX11-NEXT: .ieee_mode: false
; CHECK-NEXT: .image_op: false
; CHECK-NEXT: .lds_size: 0
@@ -118,6 +119,7 @@
; CHECK-NEXT: .debug_mode: false
; CHECK-NEXT: .entry_point: _amdgpu_gs_main
; CHECK-NEXT: .entry_point_symbol: gs_shader
+; CHECK-NEXT: .forward_progress: true
; GFX11-NEXT: .ieee_mode: false
; CHECK-NEXT: .lds_size: 0x200
; CHECK-NEXT: .mem_ordered: true
@@ -130,6 +132,7 @@
; CHECK-NEXT: .debug_mode: false
; CHECK-NEXT: .entry_point: _amdgpu_hs_main
; CHECK-NEXT: .entry_point_symbol: hs_shader
+; CHECK-NEXT: .forward_progress: true
; GFX11-NEXT: .ieee_mode: false
; CHECK-NEXT: .lds_size: 0x1000
; CHECK-NEXT: .mem_ordered: true
@@ -142,6 +145,7 @@
; CHECK-NEXT: .debug_mode: false
; CHECK-NEXT: .entry_point: _amdgpu_ps_main
; CHECK-NEXT: .entry_point_symbol: ps_shader
+; CHECK-NEXT: .forward_progress: true
; GFX11-NEXT: .ieee_mode: false
; CHECK-NEXT: .lds_size: 0
; CHECK-NEXT: .mem_ordered: true
diff --git a/llvm/test/CodeGen/AMDGPU/pal-metadata-3.6.ll b/llvm/test/CodeGen/AMDGPU/pal-metadata-3.6.ll
index 644722bdd1273..830872a58f0b8 100644
--- a/llvm/test/CodeGen/AMDGPU/pal-metadata-3.6.ll
+++ b/llvm/test/CodeGen/AMDGPU/pal-metadata-3.6.ll
@@ -62,6 +62,7 @@
; CHECK-NEXT: .entry_point_symbol: _amdgpu_cs_main
; CHECK-NEXT: .excp_en: 0
; CHECK-NEXT: .float_mode: 0xc0
+; CHECK-NEXT: .forward_progress: true
; GFX11-NEXT: .ieee_mode: false
; CHECK-NEXT: .image_op: false
; CHECK-NEXT: .lds_size: 0
@@ -118,6 +119,7 @@
; CHECK-NEXT: .debug_mode: false
; CHECK-NOT: .entry_point: _amdgpu_gs_main
; CHECK-NEXT: .entry_point_symbol: gs_shader
+; CHECK-NEXT: .forward_progress: true
; GFX11-NEXT: .ieee_mode: false
; CHECK-NEXT: .lds_size: 0x200
; CHECK-NEXT: .mem_ordered: true
@@ -130,6 +132,7 @@
; CHECK-NEXT: .debug_mode: false
; CHECK-NOT: .entry_point: _amdgpu_hs_main
; CHECK-NEXT: .entry_point_symbol: hs_shader
+; CHECK-NEXT: .forward_progress: true
; GFX11-NEXT: .ieee_mode: false
; CHECK-NEXT: .lds_size: 0x1000
; CHECK-NEXT: .mem_ordered: true
@@ -142,6 +145,7 @@
; CHECK-NEXT: .debug_mode: false
; CHECK-NOT: .entry_point: _amdgpu_ps_main
; CHECK-NEXT: .entry_point_symbol: ps_shader
+; CHECK-NEXT: .forward_progress: true
; GFX11-NEXT: .ieee_mode: false
; CHECK-NEXT: .lds_size: 0
; CHECK-NEXT: .mem_ordered: true
More information about the llvm-commits
mailing list