[llvm] [AMDGPU][GlobalISel] Fix pointer type handling in instruction selection (PR #181842)
Romanov Vlad via llvm-commits
llvm-commits at lists.llvm.org
Wed Mar 4 08:15:16 PST 2026
https://github.com/romanovvlad updated https://github.com/llvm/llvm-project/pull/181842
>From f955ac9a3b7c319b188e599abcae98809932b670 Mon Sep 17 00:00:00 2001
From: Vlad <Vladislav.Romanov at amd.com>
Date: Tue, 17 Feb 2026 07:59:44 -0600
Subject: [PATCH 1/2] [AMDGPU][GlobalISel] Fix pointer type handling in
instruction selection
getBaseWithConstantOffset can return pointer-typed registers when
analyzing address computations. selectSMRDBufferSgprImm was
assuming integer types, causing assertion failures with pointers.
This could happen for sequences like:
%ptr = getelementptr i8, ptr addrspace(6) %base, i32 %offset
%ptr2 = getelementptr i8, ptr addrspace(6) %ptr, i64 16
%offset = ptrtoint ptr addrspace(6) %ptr2 to i32
%val = call i32 @llvm.amdgcn.s.buffer.load.i32(.., i32 %offset, ..)
The fix is to check if getBaseWithConstantOffset returns a pointer type
and bail out if it does.
---
.../AMDGPU/AMDGPUInstructionSelector.cpp | 8 ++
.../GlobalISel/llvm.amdgcn.s.buffer.load.ll | 84 +++++++++++++++++++
2 files changed, 92 insertions(+)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index 82783dc95b2ab..cc2593ff9a20e 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -6884,6 +6884,14 @@ AMDGPUInstructionSelector::selectSMRDBufferSgprImm(MachineOperand &Root) const {
if (!SOffset)
return std::nullopt;
+ // AMDGPU::getBaseWithConstantOffset may return a pointer type for sequences
+ // like: G_PTRTOINT (G_PTR_ADD (G_INTTOPTR(base), const)) while SMRD
+ // instructions require integer offsets in scalar registers. A PTRTOINT could
+ // be inserted here, but there is not enough info to build instructions in
+ // this context.
+ if (MRI->getType(SOffset).isPointer())
+ return std::nullopt;
+
std::optional<int64_t> EncodedOffset =
AMDGPU::getSMRDEncodedOffset(STI, Offset, /* IsBuffer */ true);
if (!EncodedOffset)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.s.buffer.load.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.s.buffer.load.ll
index 7de6b6649dab5..84fd8fa3b1071 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.s.buffer.load.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.s.buffer.load.ll
@@ -8143,6 +8143,90 @@ define amdgpu_ps float @s_buffer_load_f32_offset_or_vgpr_imm(<4 x i32> inreg %rs
ret float %val
}
+define amdgpu_gs i32 @s_buffer_load_pointer_derived_offset(i32 inreg %offset.base) {
+ ; GFX6-LABEL: name: s_buffer_load_pointer_derived_offset
+ ; GFX6: bb.1.entry:
+ ; GFX6-NEXT: liveins: $sgpr2
+ ; GFX6-NEXT: {{ $}}
+ ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_]], %subreg.sub1, [[S_MOV_B32_]], %subreg.sub2, [[S_MOV_B32_]], %subreg.sub3
+ ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 2
+ ; GFX6-NEXT: [[S_LSHL_B32_:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY]], [[S_MOV_B32_1]], implicit-def dead $scc
+ ; GFX6-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 16
+ ; GFX6-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[S_MOV_B32_2]], [[S_LSHL_B32_]], implicit-def dead $scc
+ ; GFX6-NEXT: [[S_ADD_U32_1:%[0-9]+]]:sreg_32 = S_ADD_U32 [[S_ADD_U32_]], [[S_MOV_B32_2]], implicit-def dead $scc
+ ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_ADD_U32_1]], 0 :: (dereferenceable invariant load (s32))
+ ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
+ ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec
+ ; GFX6-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
+ ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0
+ ;
+ ; GFX7-LABEL: name: s_buffer_load_pointer_derived_offset
+ ; GFX7: bb.1.entry:
+ ; GFX7-NEXT: liveins: $sgpr2
+ ; GFX7-NEXT: {{ $}}
+ ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_]], %subreg.sub1, [[S_MOV_B32_]], %subreg.sub2, [[S_MOV_B32_]], %subreg.sub3
+ ; GFX7-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 2
+ ; GFX7-NEXT: [[S_LSHL_B32_:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY]], [[S_MOV_B32_1]], implicit-def dead $scc
+ ; GFX7-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 16
+ ; GFX7-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[S_MOV_B32_2]], [[S_LSHL_B32_]], implicit-def dead $scc
+ ; GFX7-NEXT: [[S_ADD_U32_1:%[0-9]+]]:sreg_32 = S_ADD_U32 [[S_ADD_U32_]], [[S_MOV_B32_2]], implicit-def dead $scc
+ ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_ADD_U32_1]], 0 :: (dereferenceable invariant load (s32))
+ ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
+ ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec
+ ; GFX7-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
+ ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0
+ ;
+ ; GFX8-LABEL: name: s_buffer_load_pointer_derived_offset
+ ; GFX8: bb.1.entry:
+ ; GFX8-NEXT: liveins: $sgpr2
+ ; GFX8-NEXT: {{ $}}
+ ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_]], %subreg.sub1, [[S_MOV_B32_]], %subreg.sub2, [[S_MOV_B32_]], %subreg.sub3
+ ; GFX8-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 2
+ ; GFX8-NEXT: [[S_LSHL_B32_:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY]], [[S_MOV_B32_1]], implicit-def dead $scc
+ ; GFX8-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 16
+ ; GFX8-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[S_MOV_B32_2]], [[S_LSHL_B32_]], implicit-def dead $scc
+ ; GFX8-NEXT: [[S_ADD_U32_1:%[0-9]+]]:sreg_32 = S_ADD_U32 [[S_ADD_U32_]], [[S_MOV_B32_2]], implicit-def dead $scc
+ ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_ADD_U32_1]], 0 :: (dereferenceable invariant load (s32))
+ ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
+ ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec
+ ; GFX8-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
+ ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0
+ ;
+ ; GFX1200_1250-LABEL: name: s_buffer_load_pointer_derived_offset
+ ; GFX1200_1250: bb.1.entry:
+ ; GFX1200_1250-NEXT: liveins: $sgpr2
+ ; GFX1200_1250-NEXT: {{ $}}
+ ; GFX1200_1250-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX1200_1250-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX1200_1250-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_]], %subreg.sub1, [[S_MOV_B32_]], %subreg.sub2, [[S_MOV_B32_]], %subreg.sub3
+ ; GFX1200_1250-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 2
+ ; GFX1200_1250-NEXT: [[S_LSHL_B32_:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY]], [[S_MOV_B32_1]], implicit-def dead $scc
+ ; GFX1200_1250-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 16
+ ; GFX1200_1250-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[S_MOV_B32_2]], [[S_LSHL_B32_]], implicit-def dead $scc
+ ; GFX1200_1250-NEXT: [[S_ADD_U32_1:%[0-9]+]]:sreg_32 = S_ADD_U32 [[S_ADD_U32_]], [[S_MOV_B32_2]], implicit-def dead $scc
+ ; GFX1200_1250-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM [[REG_SEQUENCE]], [[S_ADD_U32_1]], 0, 0 :: (dereferenceable invariant load (s32))
+ ; GFX1200_1250-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR_IMM]]
+ ; GFX1200_1250-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec
+ ; GFX1200_1250-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
+ ; GFX1200_1250-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0
+entry:
+ %offset.i64 = sext i32 %offset.base to i64
+ %base.ptr = getelementptr i32, ptr addrspace(6) inttoptr (i64 16 to ptr addrspace(6)), i64 %offset.i64
+ %final.ptr = getelementptr i8, ptr addrspace(6) %base.ptr, i64 16
+
+ %ptr.as.int = ptrtoint ptr addrspace(6) %final.ptr to i64
+ %offset = trunc i64 %ptr.as.int to i32
+
+ %res = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> zeroinitializer, i32 %offset, i32 0)
+ ret i32 %res
+}
+
declare i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32>, i32, i32 immarg)
declare <2 x i32> @llvm.amdgcn.s.buffer.load.v2i32(<4 x i32>, i32, i32 immarg)
declare <3 x i32> @llvm.amdgcn.s.buffer.load.v3i32(<4 x i32>, i32, i32 immarg)
>From 1d069184616544185e33ca5789e5bb5943a0e0ab Mon Sep 17 00:00:00 2001
From: Vlad <Vladislav.Romanov at amd.com>
Date: Wed, 4 Mar 2026 09:56:41 -0600
Subject: [PATCH 2/2] Just check for size
---
.../AMDGPU/AMDGPUInstructionSelector.cpp | 10 +--
.../GlobalISel/llvm.amdgcn.s.buffer.load.ll | 84 -------------------
.../smrd-buffer-sgpr-imm-s64-assert.mir | 36 ++++++++
3 files changed, 37 insertions(+), 93 deletions(-)
create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/smrd-buffer-sgpr-imm-s64-assert.mir
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index cc2593ff9a20e..00704991db248 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -6881,15 +6881,7 @@ AMDGPUInstructionSelector::selectSMRDBufferSgprImm(MachineOperand &Root) const {
unsigned Offset;
std::tie(SOffset, Offset) = AMDGPU::getBaseWithConstantOffset(
*MRI, Root.getReg(), VT, /*CheckNUW*/ true);
- if (!SOffset)
- return std::nullopt;
-
- // AMDGPU::getBaseWithConstantOffset may return a pointer type for sequences
- // like: G_PTRTOINT (G_PTR_ADD (G_INTTOPTR(base), const)) while SMRD
- // instructions require integer offsets in scalar registers. A PTRTOINT could
- // be inserted here, but there is not enough info to build instructions in
- // this context.
- if (MRI->getType(SOffset).isPointer())
+ if (!SOffset || MRI->getType(SOffset).getSizeInBits() != 32)
return std::nullopt;
std::optional<int64_t> EncodedOffset =
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.s.buffer.load.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.s.buffer.load.ll
index 84fd8fa3b1071..7de6b6649dab5 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.s.buffer.load.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.s.buffer.load.ll
@@ -8143,90 +8143,6 @@ define amdgpu_ps float @s_buffer_load_f32_offset_or_vgpr_imm(<4 x i32> inreg %rs
ret float %val
}
-define amdgpu_gs i32 @s_buffer_load_pointer_derived_offset(i32 inreg %offset.base) {
- ; GFX6-LABEL: name: s_buffer_load_pointer_derived_offset
- ; GFX6: bb.1.entry:
- ; GFX6-NEXT: liveins: $sgpr2
- ; GFX6-NEXT: {{ $}}
- ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
- ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
- ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_]], %subreg.sub1, [[S_MOV_B32_]], %subreg.sub2, [[S_MOV_B32_]], %subreg.sub3
- ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 2
- ; GFX6-NEXT: [[S_LSHL_B32_:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY]], [[S_MOV_B32_1]], implicit-def dead $scc
- ; GFX6-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 16
- ; GFX6-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[S_MOV_B32_2]], [[S_LSHL_B32_]], implicit-def dead $scc
- ; GFX6-NEXT: [[S_ADD_U32_1:%[0-9]+]]:sreg_32 = S_ADD_U32 [[S_ADD_U32_]], [[S_MOV_B32_2]], implicit-def dead $scc
- ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_ADD_U32_1]], 0 :: (dereferenceable invariant load (s32))
- ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
- ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec
- ; GFX6-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
- ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0
- ;
- ; GFX7-LABEL: name: s_buffer_load_pointer_derived_offset
- ; GFX7: bb.1.entry:
- ; GFX7-NEXT: liveins: $sgpr2
- ; GFX7-NEXT: {{ $}}
- ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
- ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
- ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_]], %subreg.sub1, [[S_MOV_B32_]], %subreg.sub2, [[S_MOV_B32_]], %subreg.sub3
- ; GFX7-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 2
- ; GFX7-NEXT: [[S_LSHL_B32_:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY]], [[S_MOV_B32_1]], implicit-def dead $scc
- ; GFX7-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 16
- ; GFX7-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[S_MOV_B32_2]], [[S_LSHL_B32_]], implicit-def dead $scc
- ; GFX7-NEXT: [[S_ADD_U32_1:%[0-9]+]]:sreg_32 = S_ADD_U32 [[S_ADD_U32_]], [[S_MOV_B32_2]], implicit-def dead $scc
- ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_ADD_U32_1]], 0 :: (dereferenceable invariant load (s32))
- ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
- ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec
- ; GFX7-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
- ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0
- ;
- ; GFX8-LABEL: name: s_buffer_load_pointer_derived_offset
- ; GFX8: bb.1.entry:
- ; GFX8-NEXT: liveins: $sgpr2
- ; GFX8-NEXT: {{ $}}
- ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
- ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
- ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_]], %subreg.sub1, [[S_MOV_B32_]], %subreg.sub2, [[S_MOV_B32_]], %subreg.sub3
- ; GFX8-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 2
- ; GFX8-NEXT: [[S_LSHL_B32_:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY]], [[S_MOV_B32_1]], implicit-def dead $scc
- ; GFX8-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 16
- ; GFX8-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[S_MOV_B32_2]], [[S_LSHL_B32_]], implicit-def dead $scc
- ; GFX8-NEXT: [[S_ADD_U32_1:%[0-9]+]]:sreg_32 = S_ADD_U32 [[S_ADD_U32_]], [[S_MOV_B32_2]], implicit-def dead $scc
- ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_ADD_U32_1]], 0 :: (dereferenceable invariant load (s32))
- ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
- ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec
- ; GFX8-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
- ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0
- ;
- ; GFX1200_1250-LABEL: name: s_buffer_load_pointer_derived_offset
- ; GFX1200_1250: bb.1.entry:
- ; GFX1200_1250-NEXT: liveins: $sgpr2
- ; GFX1200_1250-NEXT: {{ $}}
- ; GFX1200_1250-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
- ; GFX1200_1250-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
- ; GFX1200_1250-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_]], %subreg.sub1, [[S_MOV_B32_]], %subreg.sub2, [[S_MOV_B32_]], %subreg.sub3
- ; GFX1200_1250-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 2
- ; GFX1200_1250-NEXT: [[S_LSHL_B32_:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY]], [[S_MOV_B32_1]], implicit-def dead $scc
- ; GFX1200_1250-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 16
- ; GFX1200_1250-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[S_MOV_B32_2]], [[S_LSHL_B32_]], implicit-def dead $scc
- ; GFX1200_1250-NEXT: [[S_ADD_U32_1:%[0-9]+]]:sreg_32 = S_ADD_U32 [[S_ADD_U32_]], [[S_MOV_B32_2]], implicit-def dead $scc
- ; GFX1200_1250-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM [[REG_SEQUENCE]], [[S_ADD_U32_1]], 0, 0 :: (dereferenceable invariant load (s32))
- ; GFX1200_1250-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR_IMM]]
- ; GFX1200_1250-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec
- ; GFX1200_1250-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
- ; GFX1200_1250-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0
-entry:
- %offset.i64 = sext i32 %offset.base to i64
- %base.ptr = getelementptr i32, ptr addrspace(6) inttoptr (i64 16 to ptr addrspace(6)), i64 %offset.i64
- %final.ptr = getelementptr i8, ptr addrspace(6) %base.ptr, i64 16
-
- %ptr.as.int = ptrtoint ptr addrspace(6) %final.ptr to i64
- %offset = trunc i64 %ptr.as.int to i32
-
- %res = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> zeroinitializer, i32 %offset, i32 0)
- ret i32 %res
-}
-
declare i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32>, i32, i32 immarg)
declare <2 x i32> @llvm.amdgcn.s.buffer.load.v2i32(<4 x i32>, i32, i32 immarg)
declare <3 x i32> @llvm.amdgcn.s.buffer.load.v3i32(<4 x i32>, i32, i32 immarg)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/smrd-buffer-sgpr-imm-s64-assert.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/smrd-buffer-sgpr-imm-s64-assert.mir
new file mode 100644
index 0000000000000..79ad8741bcba4
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/smrd-buffer-sgpr-imm-s64-assert.mir
@@ -0,0 +1,36 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=amdgcn -mcpu=gfx942 -run-pass=instruction-select -disable-gisel-legality-check -o - %s | FileCheck %s
+#
+# Test that selectSMRDBufferSgprImm correctly handles 64-bit types returned
+# by getBaseWithConstantOffset when looking through
+# G_PTRTOINT(G_PTR_ADD( G_INTTOPTR(s64), const))
+
+---
+name: smrd_buffer_sgpr_imm_s64_offset
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4_sgpr5
+
+ ; CHECK-LABEL: name: smrd_buffer_sgpr_imm_s64_offset
+ ; CHECK: liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4_sgpr5
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr4_sgpr5
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]]
+ ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 16
+ ; CHECK-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[S_MOV_B32_]], implicit-def dead $scc
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+ ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM [[COPY2]], [[S_ADD_U32_]], 0, 0 :: (dereferenceable invariant load (s32))
+ ; CHECK-NEXT: S_ENDPGM 0, implicit [[S_BUFFER_LOAD_DWORD_SGPR_IMM]]
+ %0:sgpr(s64) = COPY $sgpr4_sgpr5
+ %1:sgpr(p6) = G_INTTOPTR %0(s64)
+ %2:sgpr(s32) = G_CONSTANT i32 16
+ %3:sgpr(p6) = G_PTR_ADD %1, %2(s32)
+ %4:sgpr(s32) = G_PTRTOINT %3(p6)
+
+ %5:sgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+ %6:sgpr(s32) = G_AMDGPU_S_BUFFER_LOAD %5, %4, 0 :: (dereferenceable invariant load (s32))
+ S_ENDPGM 0, implicit %6(s32)
+...
More information about the llvm-commits
mailing list