[llvm] [SelectionDAG][AMDGPU] Negative offset when selecting scratch sv offsets (PR #122251)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Jan 9 03:22:03 PST 2025
https://github.com/jofrn updated https://github.com/llvm/llvm-project/pull/122251
>From 8683ff0882c2019dd2a4c6d2c509a59d054d92d6 Mon Sep 17 00:00:00 2001
From: jofrn <jofernau at amd.com>
Date: Thu, 9 Jan 2025 05:37:40 -0500
Subject: [PATCH] AMDGPU: Use isSigned for ImmOffset in SelectScratchSVAddr
A negative offset will trigger an assertion
in checkFlatScratchSVSSwizzleBug.
This change modifies it to use isSigned parameter of APInt.
---
llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp | 3 +-
llvm/test/CodeGen/AMDGPU/flat-scratch-svs.ll | 80 +++++++++++++++++++
2 files changed, 82 insertions(+), 1 deletion(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
index 27e9018d68a03e..7c1d92504da28e 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -1926,7 +1926,8 @@ bool AMDGPUDAGToDAGISel::checkFlatScratchSVSSwizzleBug(
KnownBits VKnown = CurDAG->computeKnownBits(VAddr);
KnownBits SKnown =
KnownBits::add(CurDAG->computeKnownBits(SAddr),
- KnownBits::makeConstant(APInt(32, ImmOffset)));
+ KnownBits::makeConstant(APInt(32, ImmOffset,
+ /*isSigned=*/ true)));
uint64_t VMax = VKnown.getMaxValue().getZExtValue();
uint64_t SMax = SKnown.getMaxValue().getZExtValue();
return (VMax & 3) + (SMax & 3) >= 4;
diff --git a/llvm/test/CodeGen/AMDGPU/flat-scratch-svs.ll b/llvm/test/CodeGen/AMDGPU/flat-scratch-svs.ll
index 066c04b1af088d..77ae03f0d3c324 100644
--- a/llvm/test/CodeGen/AMDGPU/flat-scratch-svs.ll
+++ b/llvm/test/CodeGen/AMDGPU/flat-scratch-svs.ll
@@ -1243,3 +1243,83 @@ bb:
store volatile i8 4, ptr addrspace(5) %p4
ret void
}
+
+define amdgpu_kernel void @soff1_voff1_negative(i32 %soff) {
+; GFX940-SDAG-LABEL: soff1_voff1_negative:
+; GFX940-SDAG: ; %bb.0: ; %bb
+; GFX940-SDAG-NEXT: s_load_dword s0, s[4:5], 0x24
+; GFX940-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0
+; GFX940-SDAG-NEXT: v_mov_b32_e32 v1, 1
+; GFX940-SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; GFX940-SDAG-NEXT: v_add_u32_e32 v0, s0, v0
+; GFX940-SDAG-NEXT: v_add_u32_e32 v0, -1, v0
+; GFX940-SDAG-NEXT: scratch_store_byte v0, v1, off sc0 sc1
+; GFX940-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX940-SDAG-NEXT: s_endpgm
+;
+; GFX940-GISEL-LABEL: soff1_voff1_negative:
+; GFX940-GISEL: ; %bb.0: ; %bb
+; GFX940-GISEL-NEXT: s_load_dword s0, s[4:5], 0x24
+; GFX940-GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0
+; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 1
+; GFX940-GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GFX940-GISEL-NEXT: s_add_u32 s0, 0, s0
+; GFX940-GISEL-NEXT: v_add3_u32 v0, s0, v0, -1
+; GFX940-GISEL-NEXT: scratch_store_byte v0, v1, off sc0 sc1
+; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX940-GISEL-NEXT: s_endpgm
+;
+; GFX11-SDAG-LABEL: soff1_voff1_negative:
+; GFX11-SDAG: ; %bb.0: ; %bb
+; GFX11-SDAG-NEXT: s_load_b32 s0, s[4:5], 0x24
+; GFX11-SDAG-NEXT: v_dual_mov_b32 v1, 1 :: v_dual_and_b32 v0, 0x3ff, v0
+; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SDAG-NEXT: v_add3_u32 v0, 0, s0, v0
+; GFX11-SDAG-NEXT: scratch_store_b8 v0, v1, off offset:-1 dlc
+; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX11-SDAG-NEXT: s_endpgm
+;
+; GFX11-GISEL-LABEL: soff1_voff1_negative:
+; GFX11-GISEL: ; %bb.0: ; %bb
+; GFX11-GISEL-NEXT: s_load_b32 s0, s[4:5], 0x24
+; GFX11-GISEL-NEXT: v_dual_mov_b32 v1, 1 :: v_dual_and_b32 v0, 0x3ff, v0
+; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-GISEL-NEXT: s_add_u32 s0, 0, s0
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
+; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v0, s0, v0
+; GFX11-GISEL-NEXT: scratch_store_b8 v0, v1, off offset:-1 dlc
+; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX11-GISEL-NEXT: s_endpgm
+;
+; GFX12-SDAG-LABEL: soff1_voff1_negative:
+; GFX12-SDAG: ; %bb.0: ; %bb
+; GFX12-SDAG-NEXT: s_load_b32 s0, s[4:5], 0x24
+; GFX12-SDAG-NEXT: v_dual_mov_b32 v1, 1 :: v_dual_and_b32 v0, 0x3ff, v0
+; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-SDAG-NEXT: scratch_store_b8 v0, v1, s0 offset:-1 scope:SCOPE_SYS
+; GFX12-SDAG-NEXT: s_wait_storecnt 0x0
+; GFX12-SDAG-NEXT: s_endpgm
+;
+; GFX12-GISEL-LABEL: soff1_voff1_negative:
+; GFX12-GISEL: ; %bb.0: ; %bb
+; GFX12-GISEL-NEXT: s_load_b32 s0, s[4:5], 0x24
+; GFX12-GISEL-NEXT: v_dual_mov_b32 v1, 1 :: v_dual_and_b32 v0, 0x3ff, v0
+; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
+; GFX12-GISEL-NEXT: s_add_co_u32 s0, 0, s0
+; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
+; GFX12-GISEL-NEXT: v_add_nc_u32_e32 v0, s0, v0
+; GFX12-GISEL-NEXT: scratch_store_b8 v0, v1, off offset:-1 scope:SCOPE_SYS
+; GFX12-GISEL-NEXT: s_wait_storecnt 0x0
+; GFX12-GISEL-NEXT: s_endpgm
+bb:
+ %soff1 = mul i32 %soff, 1
+ %a = alloca i8, i32 64, align 4, addrspace(5)
+ %as = getelementptr i8, ptr addrspace(5) %a, i32 %soff1
+ %voff = call i32 @llvm.amdgcn.workitem.id.x()
+ %voff1 = mul i32 %voff, 1
+ %asv = getelementptr i8, ptr addrspace(5) %as, i32 %voff1
+ %p1 = getelementptr i8, ptr addrspace(5) %asv, i32 -1
+ store volatile i8 1, ptr addrspace(5) %p1
+ ret void
+}
More information about the llvm-commits
mailing list