[llvm] [AMDGPU][GlobalISel] Fix pointer type handling in instruction selection (PR #181842)

Matt Arsenault via llvm-commits llvm-commits at lists.llvm.org
Tue Mar 3 00:44:36 PST 2026


================
@@ -8143,6 +8143,90 @@ define amdgpu_ps float @s_buffer_load_f32_offset_or_vgpr_imm(<4 x i32> inreg %rs
   ret float %val
 }
 
+define amdgpu_gs i32 @s_buffer_load_pointer_derived_offset(i32 inreg %offset.base) {
+  ; GFX6-LABEL: name: s_buffer_load_pointer_derived_offset
+  ; GFX6: bb.1.entry:
+  ; GFX6-NEXT:   liveins: $sgpr2
+  ; GFX6-NEXT: {{  $}}
+  ; GFX6-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+  ; GFX6-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+  ; GFX6-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_]], %subreg.sub1, [[S_MOV_B32_]], %subreg.sub2, [[S_MOV_B32_]], %subreg.sub3
+  ; GFX6-NEXT:   [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 2
+  ; GFX6-NEXT:   [[S_LSHL_B32_:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY]], [[S_MOV_B32_1]], implicit-def dead $scc
+  ; GFX6-NEXT:   [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 16
+  ; GFX6-NEXT:   [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[S_MOV_B32_2]], [[S_LSHL_B32_]], implicit-def dead $scc
+  ; GFX6-NEXT:   [[S_ADD_U32_1:%[0-9]+]]:sreg_32 = S_ADD_U32 [[S_ADD_U32_]], [[S_MOV_B32_2]], implicit-def dead $scc
+  ; GFX6-NEXT:   [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_ADD_U32_1]], 0 :: (dereferenceable invariant load (s32))
+  ; GFX6-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
+  ; GFX6-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec
+  ; GFX6-NEXT:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
+  ; GFX6-NEXT:   SI_RETURN_TO_EPILOG implicit $sgpr0
+  ;
+  ; GFX7-LABEL: name: s_buffer_load_pointer_derived_offset
+  ; GFX7: bb.1.entry:
+  ; GFX7-NEXT:   liveins: $sgpr2
+  ; GFX7-NEXT: {{  $}}
+  ; GFX7-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+  ; GFX7-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+  ; GFX7-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_]], %subreg.sub1, [[S_MOV_B32_]], %subreg.sub2, [[S_MOV_B32_]], %subreg.sub3
+  ; GFX7-NEXT:   [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 2
+  ; GFX7-NEXT:   [[S_LSHL_B32_:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY]], [[S_MOV_B32_1]], implicit-def dead $scc
+  ; GFX7-NEXT:   [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 16
+  ; GFX7-NEXT:   [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[S_MOV_B32_2]], [[S_LSHL_B32_]], implicit-def dead $scc
+  ; GFX7-NEXT:   [[S_ADD_U32_1:%[0-9]+]]:sreg_32 = S_ADD_U32 [[S_ADD_U32_]], [[S_MOV_B32_2]], implicit-def dead $scc
+  ; GFX7-NEXT:   [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_ADD_U32_1]], 0 :: (dereferenceable invariant load (s32))
+  ; GFX7-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
+  ; GFX7-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec
+  ; GFX7-NEXT:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
+  ; GFX7-NEXT:   SI_RETURN_TO_EPILOG implicit $sgpr0
+  ;
+  ; GFX8-LABEL: name: s_buffer_load_pointer_derived_offset
+  ; GFX8: bb.1.entry:
+  ; GFX8-NEXT:   liveins: $sgpr2
+  ; GFX8-NEXT: {{  $}}
+  ; GFX8-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+  ; GFX8-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+  ; GFX8-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_]], %subreg.sub1, [[S_MOV_B32_]], %subreg.sub2, [[S_MOV_B32_]], %subreg.sub3
+  ; GFX8-NEXT:   [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 2
+  ; GFX8-NEXT:   [[S_LSHL_B32_:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY]], [[S_MOV_B32_1]], implicit-def dead $scc
+  ; GFX8-NEXT:   [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 16
+  ; GFX8-NEXT:   [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[S_MOV_B32_2]], [[S_LSHL_B32_]], implicit-def dead $scc
+  ; GFX8-NEXT:   [[S_ADD_U32_1:%[0-9]+]]:sreg_32 = S_ADD_U32 [[S_ADD_U32_]], [[S_MOV_B32_2]], implicit-def dead $scc
+  ; GFX8-NEXT:   [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_ADD_U32_1]], 0 :: (dereferenceable invariant load (s32))
+  ; GFX8-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
+  ; GFX8-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec
+  ; GFX8-NEXT:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
+  ; GFX8-NEXT:   SI_RETURN_TO_EPILOG implicit $sgpr0
+  ;
+  ; GFX1200_1250-LABEL: name: s_buffer_load_pointer_derived_offset
+  ; GFX1200_1250: bb.1.entry:
+  ; GFX1200_1250-NEXT:   liveins: $sgpr2
+  ; GFX1200_1250-NEXT: {{  $}}
+  ; GFX1200_1250-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+  ; GFX1200_1250-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+  ; GFX1200_1250-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_]], %subreg.sub1, [[S_MOV_B32_]], %subreg.sub2, [[S_MOV_B32_]], %subreg.sub3
+  ; GFX1200_1250-NEXT:   [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 2
+  ; GFX1200_1250-NEXT:   [[S_LSHL_B32_:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY]], [[S_MOV_B32_1]], implicit-def dead $scc
+  ; GFX1200_1250-NEXT:   [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 16
+  ; GFX1200_1250-NEXT:   [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[S_MOV_B32_2]], [[S_LSHL_B32_]], implicit-def dead $scc
+  ; GFX1200_1250-NEXT:   [[S_ADD_U32_1:%[0-9]+]]:sreg_32 = S_ADD_U32 [[S_ADD_U32_]], [[S_MOV_B32_2]], implicit-def dead $scc
+  ; GFX1200_1250-NEXT:   [[S_BUFFER_LOAD_DWORD_SGPR_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM [[REG_SEQUENCE]], [[S_ADD_U32_1]], 0, 0 :: (dereferenceable invariant load (s32))
+  ; GFX1200_1250-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR_IMM]]
+  ; GFX1200_1250-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec
+  ; GFX1200_1250-NEXT:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
+  ; GFX1200_1250-NEXT:   SI_RETURN_TO_EPILOG implicit $sgpr0
+entry:
+  %offset.i64 = sext i32 %offset.base to i64
+  %base.ptr = getelementptr i32, ptr addrspace(6) inttoptr (i64 16 to ptr addrspace(6)), i64 %offset.i64
+  %final.ptr = getelementptr i8, ptr addrspace(6) %base.ptr, i64 16
+
+  %ptr.as.int = ptrtoint ptr addrspace(6) %final.ptr to i64
+  %offset = trunc i64 %ptr.as.int to i32
----------------
arsenm wrote:

I'm not sure which assert you mean. But yes, I think this should just be going off of bit sizes for selection purposes 

https://github.com/llvm/llvm-project/pull/181842


More information about the llvm-commits mailing list