[llvm] [AMDGPU][GlobalISel] Fix assert on APInt creation. (PR #124608)
Daniil Fukalov via llvm-commits
llvm-commits at lists.llvm.org
Tue Jan 28 05:25:38 PST 2025
https://github.com/dfukalov updated https://github.com/llvm/llvm-project/pull/124608
>From 659517606b10f77e967d3ac5aca5c37585e315bd Mon Sep 17 00:00:00 2001
From: dfukalov <dfukalov at gmail.com>
Date: Mon, 27 Jan 2025 19:31:50 +0100
Subject: [PATCH 1/3] [AMDGPU][GlobalISel] Fix assert on APInt creation.
Since 3494ee95902cef62f767489802e469c58a13ea04 APInt stopped
to implicitly truncate values, therefore it asserts
on a big signed value converted to (implicitly) unsigned APInt.
The change explicitly marks offset as a signed value.
---
llvm/lib/Target/AMDGPU/AMDGPUGlobalISelUtils.cpp | 2 +-
.../AMDGPU/GlobalISel/assert-signed-apint.ll | 14 ++++++++++++++
2 files changed, 15 insertions(+), 1 deletion(-)
create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/assert-signed-apint.ll
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUGlobalISelUtils.cpp b/llvm/lib/Target/AMDGPU/AMDGPUGlobalISelUtils.cpp
index d64337c4cb9093..0b18c6b0e923a7 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUGlobalISelUtils.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUGlobalISelUtils.cpp
@@ -56,7 +56,7 @@ AMDGPU::getBaseWithConstantOffset(MachineRegisterInfo &MRI, Register Reg,
Register Base;
if (KnownBits && mi_match(Reg, MRI, m_GOr(m_Reg(Base), m_ICst(Offset))) &&
- KnownBits->maskedValueIsZero(Base, APInt(32, Offset)))
+ KnownBits->maskedValueIsZero(Base, APInt(32, Offset, /*isSigned=*/true)))
return std::pair(Base, Offset);
// Handle G_PTRTOINT (G_PTR_ADD base, const) case
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/assert-signed-apint.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/assert-signed-apint.ll
new file mode 100644
index 00000000000000..33bdd67a42782e
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/assert-signed-apint.ll
@@ -0,0 +1,14 @@
+; REQUIRES: asserts
+; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs -stop-after=instruction-select -o - %s | FileCheck %s
+
+;
+
+; CHECK-LABEL: @test
+; CHECK: S_BUFFER_LOAD_DWORD_SGPR_IMM
+
+define amdgpu_cs void @test(<4 x i32> inreg %base, i32 inreg %i, ptr addrspace(1) inreg %out) {
+ %off = or i32 %i, -2147483648
+ %v = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %base, i32 %off, i32 0)
+ store i32 %v, ptr addrspace(1) %out, align 4
+ ret void
+}
>From 3023d8aa6b61ee0dd94720c51f711bb1992e64ac Mon Sep 17 00:00:00 2001
From: dfukalov <dfukalov at gmail.com>
Date: Mon, 27 Jan 2025 19:50:24 +0100
Subject: [PATCH 2/3] fixup! [AMDGPU][GlobalISel] Fix assert on APInt creation.
Add comment to test.
---
llvm/test/CodeGen/AMDGPU/GlobalISel/assert-signed-apint.ll | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/assert-signed-apint.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/assert-signed-apint.ll
index 33bdd67a42782e..49344f7a0fd598 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/assert-signed-apint.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/assert-signed-apint.ll
@@ -1,7 +1,7 @@
; REQUIRES: asserts
; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs -stop-after=instruction-select -o - %s | FileCheck %s
-;
+; Check that APInt doesn't assert on creation from -2147483648 value.
; CHECK-LABEL: @test
; CHECK: S_BUFFER_LOAD_DWORD_SGPR_IMM
>From 2ea9ccd0e934918f3f007483b749ad4a6975d439 Mon Sep 17 00:00:00 2001
From: dfukalov <dfukalov at gmail.com>
Date: Tue, 28 Jan 2025 14:25:22 +0100
Subject: [PATCH 3/3] fixup! fixup! [AMDGPU][GlobalISel] Fix assert on APInt
creation.
Addressed comments.
---
.../AMDGPU/GlobalISel/assert-signed-apint.ll | 14 ----
.../GlobalISel/llvm.amdgcn.s.buffer.load.ll | 69 +++++++++++++++++++
2 files changed, 69 insertions(+), 14 deletions(-)
delete mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/assert-signed-apint.ll
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/assert-signed-apint.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/assert-signed-apint.ll
deleted file mode 100644
index 49344f7a0fd598..00000000000000
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/assert-signed-apint.ll
+++ /dev/null
@@ -1,14 +0,0 @@
-; REQUIRES: asserts
-; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs -stop-after=instruction-select -o - %s | FileCheck %s
-
-; Check that APInt doesn't assert on creation from -2147483648 value.
-
-; CHECK-LABEL: @test
-; CHECK: S_BUFFER_LOAD_DWORD_SGPR_IMM
-
-define amdgpu_cs void @test(<4 x i32> inreg %base, i32 inreg %i, ptr addrspace(1) inreg %out) {
- %off = or i32 %i, -2147483648
- %v = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %base, i32 %off, i32 0)
- store i32 %v, ptr addrspace(1) %out, align 4
- ret void
-}
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.s.buffer.load.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.s.buffer.load.ll
index 91cde52cd2d67a..79b333c08cb2db 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.s.buffer.load.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.s.buffer.load.ll
@@ -6846,6 +6846,75 @@ define amdgpu_ps float @s_buffer_load_f32_offset_add_imm_vgpr_sgpr(<4 x i32> inr
ret float %val
}
+define amdgpu_ps float @s_buffer_load_f32_offset_or_vgpr_imm(<4 x i32> inreg %rsrc, i32 inreg %offset.s) {
+ ; GFX6-LABEL: name: s_buffer_load_f32_offset_or_vgpr_imm
+ ; GFX6: bb.1 (%ir-block.0):
+ ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6
+ ; GFX6-NEXT: {{ $}}
+ ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX6-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6
+ ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -2147483648
+ ; GFX6-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[COPY4]], [[S_MOV_B32_]], implicit-def dead $scc
+ ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_OR_B32_]], 0 :: (dereferenceable invariant load (s32))
+ ; GFX6-NEXT: $vgpr0 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
+ ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
+ ;
+ ; GFX7-LABEL: name: s_buffer_load_f32_offset_or_vgpr_imm
+ ; GFX7: bb.1 (%ir-block.0):
+ ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6
+ ; GFX7-NEXT: {{ $}}
+ ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX7-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6
+ ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -2147483648
+ ; GFX7-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[COPY4]], [[S_MOV_B32_]], implicit-def dead $scc
+ ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_OR_B32_]], 0 :: (dereferenceable invariant load (s32))
+ ; GFX7-NEXT: $vgpr0 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
+ ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
+ ;
+ ; GFX8-LABEL: name: s_buffer_load_f32_offset_or_vgpr_imm
+ ; GFX8: bb.1 (%ir-block.0):
+ ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6
+ ; GFX8-NEXT: {{ $}}
+ ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX8-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6
+ ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -2147483648
+ ; GFX8-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[COPY4]], [[S_MOV_B32_]], implicit-def dead $scc
+ ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_OR_B32_]], 0 :: (dereferenceable invariant load (s32))
+ ; GFX8-NEXT: $vgpr0 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
+ ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
+ ;
+ ; GFX12-LABEL: name: s_buffer_load_f32_offset_or_vgpr_imm
+ ; GFX12: bb.1 (%ir-block.0):
+ ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6
+ ; GFX12-NEXT: {{ $}}
+ ; GFX12-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX12-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX12-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6
+ ; GFX12-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -2147483648
+ ; GFX12-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[COPY4]], [[S_MOV_B32_]], implicit-def dead $scc
+ ; GFX12-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM [[REG_SEQUENCE]], [[S_OR_B32_]], 0, 0 :: (dereferenceable invariant load (s32))
+ ; GFX12-NEXT: $vgpr0 = COPY [[S_BUFFER_LOAD_DWORD_SGPR_IMM]]
+ ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
+ %offset = or i32 %offset.s, -2147483648
+ %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %offset, i32 0)
+ ret float %val
+}
+
declare i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32>, i32, i32 immarg)
declare <2 x i32> @llvm.amdgcn.s.buffer.load.v2i32(<4 x i32>, i32, i32 immarg)
declare <3 x i32> @llvm.amdgcn.s.buffer.load.v3i32(<4 x i32>, i32, i32 immarg)
More information about the llvm-commits
mailing list