[llvm-branch-commits] [llvm] 962edd3 - AMDGPU: Fix inst-selection of large scratch offsets with sgpr base (#110256)

Tobias Hieta via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Mon Sep 30 23:57:06 PDT 2024


Author: Petar Avramovic
Date: 2024-10-01T08:56:50+02:00
New Revision: 962edd3f71eebdcd781222cdd97a561979894003

URL: https://github.com/llvm/llvm-project/commit/962edd3f71eebdcd781222cdd97a561979894003
DIFF: https://github.com/llvm/llvm-project/commit/962edd3f71eebdcd781222cdd97a561979894003.diff

LOG: AMDGPU: Fix inst-selection of large scratch offsets with sgpr base (#110256)

Use i32 for offset instead of i16, this way it does not get interpreted
as negative 16 bit offset.

(cherry picked from commit 83fe85115da9dc25fa270d2ea8140113c8d49670)

Added: 
    

Modified: 
    llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
    llvm/test/CodeGen/AMDGPU/flat-scratch.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
index b7471bab128509..7b786ee2641721 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -1911,7 +1911,7 @@ bool AMDGPUDAGToDAGISel::SelectScratchSAddr(SDNode *Parent, SDValue Addr,
                     0);
   }
 
-  Offset = CurDAG->getTargetConstant(COffsetVal, DL, MVT::i16);
+  Offset = CurDAG->getTargetConstant(COffsetVal, DL, MVT::i32);
 
   return true;
 }
@@ -1967,7 +1967,7 @@ bool AMDGPUDAGToDAGISel::SelectScratchSVAddr(SDNode *N, SDValue Addr,
           return false;
         if (checkFlatScratchSVSSwizzleBug(VAddr, SAddr, SplitImmOffset))
           return false;
-        Offset = CurDAG->getTargetConstant(SplitImmOffset, SDLoc(), MVT::i16);
+        Offset = CurDAG->getTargetConstant(SplitImmOffset, SDLoc(), MVT::i32);
         return true;
       }
     }
@@ -2000,7 +2000,7 @@ bool AMDGPUDAGToDAGISel::SelectScratchSVAddr(SDNode *N, SDValue Addr,
   if (checkFlatScratchSVSSwizzleBug(VAddr, SAddr, ImmOffset))
     return false;
   SAddr = SelectSAddrFI(CurDAG, SAddr);
-  Offset = CurDAG->getTargetConstant(ImmOffset, SDLoc(), MVT::i16);
+  Offset = CurDAG->getTargetConstant(ImmOffset, SDLoc(), MVT::i32);
   return true;
 }
 

diff  --git a/llvm/test/CodeGen/AMDGPU/flat-scratch.ll b/llvm/test/CodeGen/AMDGPU/flat-scratch.ll
index f040b47428640a..284f1746145225 100644
--- a/llvm/test/CodeGen/AMDGPU/flat-scratch.ll
+++ b/llvm/test/CodeGen/AMDGPU/flat-scratch.ll
@@ -4956,7 +4956,7 @@ define amdgpu_gs void @sgpr_base_large_offset(ptr addrspace(1) %out, ptr addrspa
 ;
 ; GFX12-LABEL: sgpr_base_large_offset:
 ; GFX12:       ; %bb.0: ; %entry
-; GFX12-NEXT:    scratch_load_b32 v2, off, s0 offset:-24
+; GFX12-NEXT:    scratch_load_b32 v2, off, s0 offset:65512
 ; GFX12-NEXT:    s_wait_loadcnt 0x0
 ; GFX12-NEXT:    global_store_b32 v[0:1], v2, off
 ; GFX12-NEXT:    s_nop 0
@@ -5015,7 +5015,7 @@ define amdgpu_gs void @sgpr_base_large_offset(ptr addrspace(1) %out, ptr addrspa
 ;
 ; GFX12-PAL-LABEL: sgpr_base_large_offset:
 ; GFX12-PAL:       ; %bb.0: ; %entry
-; GFX12-PAL-NEXT:    scratch_load_b32 v2, off, s0 offset:-24
+; GFX12-PAL-NEXT:    scratch_load_b32 v2, off, s0 offset:65512
 ; GFX12-PAL-NEXT:    s_wait_loadcnt 0x0
 ; GFX12-PAL-NEXT:    global_store_b32 v[0:1], v2, off
 ; GFX12-PAL-NEXT:    s_nop 0
@@ -5068,7 +5068,7 @@ define amdgpu_gs void @sgpr_base_large_offset_split(ptr addrspace(1) %out, ptr a
 ; GFX12:       ; %bb.0: ; %entry
 ; GFX12-NEXT:    v_mov_b32_e32 v2, 0x1000000
 ; GFX12-NEXT:    s_and_b32 s0, s0, -4
-; GFX12-NEXT:    scratch_load_b32 v2, v2, s0 offset:-24 scope:SCOPE_SYS
+; GFX12-NEXT:    scratch_load_b32 v2, v2, s0 offset:65512 scope:SCOPE_SYS
 ; GFX12-NEXT:    s_wait_loadcnt 0x0
 ; GFX12-NEXT:    global_store_b32 v[0:1], v2, off
 ; GFX12-NEXT:    s_nop 0
@@ -5133,7 +5133,7 @@ define amdgpu_gs void @sgpr_base_large_offset_split(ptr addrspace(1) %out, ptr a
 ; GFX12-PAL:       ; %bb.0: ; %entry
 ; GFX12-PAL-NEXT:    v_mov_b32_e32 v2, 0x1000000
 ; GFX12-PAL-NEXT:    s_and_b32 s0, s0, -4
-; GFX12-PAL-NEXT:    scratch_load_b32 v2, v2, s0 offset:-24 scope:SCOPE_SYS
+; GFX12-PAL-NEXT:    scratch_load_b32 v2, v2, s0 offset:65512 scope:SCOPE_SYS
 ; GFX12-PAL-NEXT:    s_wait_loadcnt 0x0
 ; GFX12-PAL-NEXT:    global_store_b32 v[0:1], v2, off
 ; GFX12-PAL-NEXT:    s_nop 0
@@ -5189,7 +5189,7 @@ define amdgpu_gs void @sgpr_base_plus_sgpr_plus_vgpr_plus_large_imm_offset(ptr a
 ; GFX12:       ; %bb.0: ; %bb
 ; GFX12-NEXT:    v_mov_b32_e32 v1, 15
 ; GFX12-NEXT:    s_add_co_i32 s0, s0, s1
-; GFX12-NEXT:    scratch_store_b32 v0, v1, s0 offset:-24 scope:SCOPE_SYS
+; GFX12-NEXT:    scratch_store_b32 v0, v1, s0 offset:65512 scope:SCOPE_SYS
 ; GFX12-NEXT:    s_wait_storecnt 0x0
 ; GFX12-NEXT:    s_endpgm
 ;
@@ -5251,7 +5251,7 @@ define amdgpu_gs void @sgpr_base_plus_sgpr_plus_vgpr_plus_large_imm_offset(ptr a
 ; GFX12-PAL:       ; %bb.0: ; %bb
 ; GFX12-PAL-NEXT:    v_mov_b32_e32 v1, 15
 ; GFX12-PAL-NEXT:    s_add_co_i32 s0, s0, s1
-; GFX12-PAL-NEXT:    scratch_store_b32 v0, v1, s0 offset:-24 scope:SCOPE_SYS
+; GFX12-PAL-NEXT:    scratch_store_b32 v0, v1, s0 offset:65512 scope:SCOPE_SYS
 ; GFX12-PAL-NEXT:    s_wait_storecnt 0x0
 ; GFX12-PAL-NEXT:    s_endpgm
 bb:


        


More information about the llvm-branch-commits mailing list