[llvm] [AMDGPU] Add support for store to constant address space (PR #153835)
Shilei Tian via llvm-commits
llvm-commits at lists.llvm.org
Fri Aug 15 10:11:35 PDT 2025
https://github.com/shiltian created https://github.com/llvm/llvm-project/pull/153835
Since we don't stores to the constant address space as IR verifier errors, we need to support their lowering. This PR supports that by treating such stores as no-ops: in the combiner, the store node is simply replaced with its chain.
Fixes SWDEV-499366.
>From deb7416725f7abf16cfa64582f8bef8d52953e7e Mon Sep 17 00:00:00 2001
From: Shilei Tian <i at tianshilei.me>
Date: Fri, 15 Aug 2025 13:10:32 -0400
Subject: [PATCH] [AMDGPU] Add support for store to constant address space
Since we don't stores to the constant address space as IR verifier errors, we need to support their lowering. This PR supports that by treating such stores as no-ops: in the combiner, the store node is simply replaced with its chain.
Fixes SWDEV-499366.
---
llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp | 7 +-
.../CodeGen/AMDGPU/store-to-constant-error.ll | 10 ---
llvm/test/CodeGen/AMDGPU/store-to-constant.ll | 77 +++++++++++++++++++
3 files changed, 82 insertions(+), 12 deletions(-)
delete mode 100644 llvm/test/CodeGen/AMDGPU/store-to-constant-error.ll
create mode 100644 llvm/test/CodeGen/AMDGPU/store-to-constant.ll
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index 64e68ab7d753c..3f44559e07756 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -3910,10 +3910,14 @@ SDValue AMDGPUTargetLowering::performLoadCombine(SDNode *N,
// type.
SDValue AMDGPUTargetLowering::performStoreCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
+ StoreSDNode *SN = cast<StoreSDNode>(N);
+ unsigned AS = SN->getAddressSpace();
+ if (AMDGPU::isConstantAddressSpace(AS))
+ return SN->getChain();
+
if (!DCI.isBeforeLegalize())
return SDValue();
- StoreSDNode *SN = cast<StoreSDNode>(N);
if (!SN->isSimple() || !ISD::isNormalStore(SN))
return SDValue();
@@ -3925,7 +3929,6 @@ SDValue AMDGPUTargetLowering::performStoreCombine(SDNode *N,
Align Alignment = SN->getAlign();
if (Alignment < Size && isTypeLegal(VT)) {
unsigned IsFast;
- unsigned AS = SN->getAddressSpace();
// Expand unaligned stores earlier than legalization. Due to visitation
// order problems during legalization, the emitted instructions to pack and
diff --git a/llvm/test/CodeGen/AMDGPU/store-to-constant-error.ll b/llvm/test/CodeGen/AMDGPU/store-to-constant-error.ll
deleted file mode 100644
index 0bfc45c84b0c4..0000000000000
--- a/llvm/test/CodeGen/AMDGPU/store-to-constant-error.ll
+++ /dev/null
@@ -1,10 +0,0 @@
-; RUN: not --crash llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -o /dev/null %s 2>&1 | FileCheck -check-prefix=SDAG %s
-; RUN: not llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -o /dev/null %s 2>&1 | FileCheck -check-prefix=GISEL %s
-
-; SDAG: LLVM ERROR: Cannot select: {{[a-z0-9]+}}: ch = store<(store (s32) into %ir.ptr.load, addrspace 4)>
-; GISEL: LLVM ERROR: cannot select: G_STORE %{{[0-9]+}}:vgpr(s32), %{{[0-9]+}}:vgpr(p4) :: (store (s32) into %ir.ptr.load, addrspace 4) (in function: store_to_constant_i32)
-define amdgpu_kernel void @store_to_constant_i32(ptr addrspace(4) %ptr) {
-bb:
- store i32 1, ptr addrspace(4) %ptr, align 4
- ret void
-}
diff --git a/llvm/test/CodeGen/AMDGPU/store-to-constant.ll b/llvm/test/CodeGen/AMDGPU/store-to-constant.ll
new file mode 100644
index 0000000000000..77afbf8355f2b
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/store-to-constant.ll
@@ -0,0 +1,77 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a %s -o - | FileCheck %s
+
+define amdgpu_kernel void @store_as4(ptr addrspace(4) %out, i32 %a, i32 %b) {
+; CHECK-LABEL: store_as4:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_endpgm
+ %r = add i32 %a, %b
+ store i32 %r, ptr addrspace(4) %out
+ ret void
+}
+
+define amdgpu_kernel void @memset_as4(ptr addrspace(4) %dst) {
+; CHECK-LABEL: memset_as4:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_endpgm
+ call void @llvm.memset.p4.i64(ptr addrspace(4) %dst, i8 0, i64 256, i1 false)
+ ret void
+}
+
+define amdgpu_kernel void @memcpy_to_as4(ptr addrspace(4) %dst, ptr %src) {
+; CHECK-LABEL: memcpy_to_as4:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_endpgm
+ call void @llvm.memcpy.p4.p0.i32(ptr addrspace(4) %dst, ptr %src, i32 256, i1 false)
+ ret void
+}
+
+define amdgpu_kernel void @store_as6(ptr addrspace(6) %out, i32 %a, i32 %b) {
+; CHECK-LABEL: store_as6:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_endpgm
+ %r = add i32 %a, %b
+ store i32 %r, ptr addrspace(6) %out
+ ret void
+}
+
+define amdgpu_kernel void @memset_as6(ptr addrspace(6) %dst) {
+; CHECK-LABEL: memset_as6:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_endpgm
+ call void @llvm.memset.p6.i64(ptr addrspace(6) %dst, i8 0, i64 256, i1 false)
+ ret void
+}
+
+define amdgpu_kernel void @memcpy_to_as6(ptr addrspace(6) %dst, ptr %src) {
+; CHECK-LABEL: memcpy_to_as6:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_endpgm
+ call void @llvm.memcpy.p6.p0.i32(ptr addrspace(6) %dst, ptr %src, i32 256, i1 false)
+ ret void
+}
+
+; define amdgpu_kernel void @cmpxchg_to_as4(ptr addrspace(4) %dst, i32 %src) {
+; %void = cmpxchg ptr addrspace(4) %dst, i32 0, i32 %src seq_cst monotonic
+; ret void
+; }
+
+; define amdgpu_kernel void @atomicrmw_to_as4(ptr addrspace(4) %dst, i32 %src) {
+; %void = atomicrmw add ptr addrspace(4) %dst, i32 %src acquire
+; ret void
+; }
+
+; define amdgpu_kernel void @cmpxchg_to_as6(ptr addrspace(6) %dst, i32 %src) {
+; %void = cmpxchg ptr addrspace(6) %dst, i32 0, i32 %src seq_cst monotonic
+; ret void
+; }
+
+; define amdgpu_kernel void @atomicrmw_to_as6(ptr addrspace(6) %dst, i32 %src) {
+; %void = atomicrmw add ptr addrspace(6) %dst, i32 %src acquire
+; ret void
+; }
+
+declare void @llvm.memset.p4.i64(ptr addrspace(4) noalias nocapture writeonly, i8, i64, i1)
+declare void @llvm.memset.p6.i64(ptr addrspace(6) noalias nocapture writeonly, i8, i64, i1)
+declare void @llvm.memcpy.p4.p0.i32(ptr addrspace(4) noalias nocapture writeonly, ptr noalias nocapture readonly, i32, i1)
+declare void @llvm.memcpy.p6.p0.i32(ptr addrspace(6) noalias nocapture writeonly, ptr noalias nocapture readonly, i32, i1)
More information about the llvm-commits
mailing list