[llvm-branch-commits] [llvm] AMDGPU: Fix inst-selection of large scratch offsets with sgpr base (PR #110256)
Petar Avramovic via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Fri Sep 27 09:04:07 PDT 2024
https://github.com/petar-avramovic updated https://github.com/llvm/llvm-project/pull/110256
>From 2ea25b291ffdae0d3b9b6821199080f133de34c7 Mon Sep 17 00:00:00 2001
From: Petar Avramovic <Petar.Avramovic at amd.com>
Date: Fri, 27 Sep 2024 13:59:31 +0200
Subject: [PATCH] AMDGPU: Fix inst-selection of large scratch offsets with sgpr
base
Use i32 for offset instead of i16, this way it does not get interpreted
as negative 16 bit offset.
---
llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp | 6 +++---
llvm/test/CodeGen/AMDGPU/flat-scratch.ll | 12 ++++++------
2 files changed, 9 insertions(+), 9 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
index d3d5bc924525fc..ff8798edb3cc0f 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -1911,7 +1911,7 @@ bool AMDGPUDAGToDAGISel::SelectScratchSAddr(SDNode *Parent, SDValue Addr,
0);
}
- Offset = CurDAG->getTargetConstant(COffsetVal, DL, MVT::i16);
+ Offset = CurDAG->getTargetConstant(COffsetVal, DL, MVT::i32);
return true;
}
@@ -1966,7 +1966,7 @@ bool AMDGPUDAGToDAGISel::SelectScratchSVAddr(SDNode *N, SDValue Addr,
return false;
if (checkFlatScratchSVSSwizzleBug(VAddr, SAddr, SplitImmOffset))
return false;
- Offset = CurDAG->getTargetConstant(SplitImmOffset, SDLoc(), MVT::i16);
+ Offset = CurDAG->getTargetConstant(SplitImmOffset, SDLoc(), MVT::i32);
return true;
}
}
@@ -1999,7 +1999,7 @@ bool AMDGPUDAGToDAGISel::SelectScratchSVAddr(SDNode *N, SDValue Addr,
if (checkFlatScratchSVSSwizzleBug(VAddr, SAddr, ImmOffset))
return false;
SAddr = SelectSAddrFI(CurDAG, SAddr);
- Offset = CurDAG->getTargetConstant(ImmOffset, SDLoc(), MVT::i16);
+ Offset = CurDAG->getTargetConstant(ImmOffset, SDLoc(), MVT::i32);
return true;
}
diff --git a/llvm/test/CodeGen/AMDGPU/flat-scratch.ll b/llvm/test/CodeGen/AMDGPU/flat-scratch.ll
index fd67dfc65f9846..ef9590b3fd33fa 100644
--- a/llvm/test/CodeGen/AMDGPU/flat-scratch.ll
+++ b/llvm/test/CodeGen/AMDGPU/flat-scratch.ll
@@ -4926,7 +4926,7 @@ define amdgpu_gs void @sgpr_base_large_offset(ptr addrspace(1) %out, ptr addrspa
;
; GFX12-LABEL: sgpr_base_large_offset:
; GFX12: ; %bb.0: ; %entry
-; GFX12-NEXT: scratch_load_b32 v2, off, s0 offset:-24
+; GFX12-NEXT: scratch_load_b32 v2, off, s0 offset:65512
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: global_store_b32 v[0:1], v2, off
; GFX12-NEXT: s_nop 0
@@ -4985,7 +4985,7 @@ define amdgpu_gs void @sgpr_base_large_offset(ptr addrspace(1) %out, ptr addrspa
;
; GFX12-PAL-LABEL: sgpr_base_large_offset:
; GFX12-PAL: ; %bb.0: ; %entry
-; GFX12-PAL-NEXT: scratch_load_b32 v2, off, s0 offset:-24
+; GFX12-PAL-NEXT: scratch_load_b32 v2, off, s0 offset:65512
; GFX12-PAL-NEXT: s_wait_loadcnt 0x0
; GFX12-PAL-NEXT: global_store_b32 v[0:1], v2, off
; GFX12-PAL-NEXT: s_nop 0
@@ -5038,7 +5038,7 @@ define amdgpu_gs void @sgpr_base_large_offset_split(ptr addrspace(1) %out, ptr a
; GFX12: ; %bb.0: ; %entry
; GFX12-NEXT: v_mov_b32_e32 v2, 0x1000000
; GFX12-NEXT: s_and_b32 s0, s0, -4
-; GFX12-NEXT: scratch_load_b32 v2, v2, s0 offset:-24 scope:SCOPE_SYS
+; GFX12-NEXT: scratch_load_b32 v2, v2, s0 offset:65512 scope:SCOPE_SYS
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: global_store_b32 v[0:1], v2, off
; GFX12-NEXT: s_nop 0
@@ -5103,7 +5103,7 @@ define amdgpu_gs void @sgpr_base_large_offset_split(ptr addrspace(1) %out, ptr a
; GFX12-PAL: ; %bb.0: ; %entry
; GFX12-PAL-NEXT: v_mov_b32_e32 v2, 0x1000000
; GFX12-PAL-NEXT: s_and_b32 s0, s0, -4
-; GFX12-PAL-NEXT: scratch_load_b32 v2, v2, s0 offset:-24 scope:SCOPE_SYS
+; GFX12-PAL-NEXT: scratch_load_b32 v2, v2, s0 offset:65512 scope:SCOPE_SYS
; GFX12-PAL-NEXT: s_wait_loadcnt 0x0
; GFX12-PAL-NEXT: global_store_b32 v[0:1], v2, off
; GFX12-PAL-NEXT: s_nop 0
@@ -5159,7 +5159,7 @@ define amdgpu_gs void @sgpr_base_plus_sgpr_plus_vgpr_plus_large_imm_offset(ptr a
; GFX12: ; %bb.0: ; %bb
; GFX12-NEXT: v_mov_b32_e32 v1, 15
; GFX12-NEXT: s_add_co_i32 s0, s0, s1
-; GFX12-NEXT: scratch_store_b32 v0, v1, s0 offset:-24 scope:SCOPE_SYS
+; GFX12-NEXT: scratch_store_b32 v0, v1, s0 offset:65512 scope:SCOPE_SYS
; GFX12-NEXT: s_wait_storecnt 0x0
; GFX12-NEXT: s_endpgm
;
@@ -5221,7 +5221,7 @@ define amdgpu_gs void @sgpr_base_plus_sgpr_plus_vgpr_plus_large_imm_offset(ptr a
; GFX12-PAL: ; %bb.0: ; %bb
; GFX12-PAL-NEXT: v_mov_b32_e32 v1, 15
; GFX12-PAL-NEXT: s_add_co_i32 s0, s0, s1
-; GFX12-PAL-NEXT: scratch_store_b32 v0, v1, s0 offset:-24 scope:SCOPE_SYS
+; GFX12-PAL-NEXT: scratch_store_b32 v0, v1, s0 offset:65512 scope:SCOPE_SYS
; GFX12-PAL-NEXT: s_wait_storecnt 0x0
; GFX12-PAL-NEXT: s_endpgm
bb:
More information about the llvm-branch-commits
mailing list