[llvm] [AMDGPU] Add support for store to constant address space (PR #153835)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Aug 15 10:12:13 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-amdgpu
Author: Shilei Tian (shiltian)
<details>
<summary>Changes</summary>
Since we don't stores to the constant address space as IR verifier errors, we need to support their lowering. This PR supports that by treating such stores as no-ops: in the combiner, the store node is simply replaced with its chain.
Fixes SWDEV-499366.
---
Full diff: https://github.com/llvm/llvm-project/pull/153835.diff
3 Files Affected:
- (modified) llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp (+5-2)
- (removed) llvm/test/CodeGen/AMDGPU/store-to-constant-error.ll (-10)
- (added) llvm/test/CodeGen/AMDGPU/store-to-constant.ll (+77)
``````````diff
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index 64e68ab7d753c..3f44559e07756 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -3910,10 +3910,14 @@ SDValue AMDGPUTargetLowering::performLoadCombine(SDNode *N,
// type.
SDValue AMDGPUTargetLowering::performStoreCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
+ StoreSDNode *SN = cast<StoreSDNode>(N);
+ unsigned AS = SN->getAddressSpace();
+ if (AMDGPU::isConstantAddressSpace(AS))
+ return SN->getChain();
+
if (!DCI.isBeforeLegalize())
return SDValue();
- StoreSDNode *SN = cast<StoreSDNode>(N);
if (!SN->isSimple() || !ISD::isNormalStore(SN))
return SDValue();
@@ -3925,7 +3929,6 @@ SDValue AMDGPUTargetLowering::performStoreCombine(SDNode *N,
Align Alignment = SN->getAlign();
if (Alignment < Size && isTypeLegal(VT)) {
unsigned IsFast;
- unsigned AS = SN->getAddressSpace();
// Expand unaligned stores earlier than legalization. Due to visitation
// order problems during legalization, the emitted instructions to pack and
diff --git a/llvm/test/CodeGen/AMDGPU/store-to-constant-error.ll b/llvm/test/CodeGen/AMDGPU/store-to-constant-error.ll
deleted file mode 100644
index 0bfc45c84b0c4..0000000000000
--- a/llvm/test/CodeGen/AMDGPU/store-to-constant-error.ll
+++ /dev/null
@@ -1,10 +0,0 @@
-; RUN: not --crash llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -o /dev/null %s 2>&1 | FileCheck -check-prefix=SDAG %s
-; RUN: not llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -o /dev/null %s 2>&1 | FileCheck -check-prefix=GISEL %s
-
-; SDAG: LLVM ERROR: Cannot select: {{[a-z0-9]+}}: ch = store<(store (s32) into %ir.ptr.load, addrspace 4)>
-; GISEL: LLVM ERROR: cannot select: G_STORE %{{[0-9]+}}:vgpr(s32), %{{[0-9]+}}:vgpr(p4) :: (store (s32) into %ir.ptr.load, addrspace 4) (in function: store_to_constant_i32)
-define amdgpu_kernel void @store_to_constant_i32(ptr addrspace(4) %ptr) {
-bb:
- store i32 1, ptr addrspace(4) %ptr, align 4
- ret void
-}
diff --git a/llvm/test/CodeGen/AMDGPU/store-to-constant.ll b/llvm/test/CodeGen/AMDGPU/store-to-constant.ll
new file mode 100644
index 0000000000000..77afbf8355f2b
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/store-to-constant.ll
@@ -0,0 +1,77 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a %s -o - | FileCheck %s
+
+define amdgpu_kernel void @store_as4(ptr addrspace(4) %out, i32 %a, i32 %b) {
+; CHECK-LABEL: store_as4:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_endpgm
+ %r = add i32 %a, %b
+ store i32 %r, ptr addrspace(4) %out
+ ret void
+}
+
+define amdgpu_kernel void @memset_as4(ptr addrspace(4) %dst) {
+; CHECK-LABEL: memset_as4:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_endpgm
+ call void @llvm.memset.p4.i64(ptr addrspace(4) %dst, i8 0, i64 256, i1 false)
+ ret void
+}
+
+define amdgpu_kernel void @memcpy_to_as4(ptr addrspace(4) %dst, ptr %src) {
+; CHECK-LABEL: memcpy_to_as4:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_endpgm
+ call void @llvm.memcpy.p4.p0.i32(ptr addrspace(4) %dst, ptr %src, i32 256, i1 false)
+ ret void
+}
+
+define amdgpu_kernel void @store_as6(ptr addrspace(6) %out, i32 %a, i32 %b) {
+; CHECK-LABEL: store_as6:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_endpgm
+ %r = add i32 %a, %b
+ store i32 %r, ptr addrspace(6) %out
+ ret void
+}
+
+define amdgpu_kernel void @memset_as6(ptr addrspace(6) %dst) {
+; CHECK-LABEL: memset_as6:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_endpgm
+ call void @llvm.memset.p6.i64(ptr addrspace(6) %dst, i8 0, i64 256, i1 false)
+ ret void
+}
+
+define amdgpu_kernel void @memcpy_to_as6(ptr addrspace(6) %dst, ptr %src) {
+; CHECK-LABEL: memcpy_to_as6:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_endpgm
+ call void @llvm.memcpy.p6.p0.i32(ptr addrspace(6) %dst, ptr %src, i32 256, i1 false)
+ ret void
+}
+
+; define amdgpu_kernel void @cmpxchg_to_as4(ptr addrspace(4) %dst, i32 %src) {
+; %void = cmpxchg ptr addrspace(4) %dst, i32 0, i32 %src seq_cst monotonic
+; ret void
+; }
+
+; define amdgpu_kernel void @atomicrmw_to_as4(ptr addrspace(4) %dst, i32 %src) {
+; %void = atomicrmw add ptr addrspace(4) %dst, i32 %src acquire
+; ret void
+; }
+
+; define amdgpu_kernel void @cmpxchg_to_as6(ptr addrspace(6) %dst, i32 %src) {
+; %void = cmpxchg ptr addrspace(6) %dst, i32 0, i32 %src seq_cst monotonic
+; ret void
+; }
+
+; define amdgpu_kernel void @atomicrmw_to_as6(ptr addrspace(6) %dst, i32 %src) {
+; %void = atomicrmw add ptr addrspace(6) %dst, i32 %src acquire
+; ret void
+; }
+
+declare void @llvm.memset.p4.i64(ptr addrspace(4) noalias nocapture writeonly, i8, i64, i1)
+declare void @llvm.memset.p6.i64(ptr addrspace(6) noalias nocapture writeonly, i8, i64, i1)
+declare void @llvm.memcpy.p4.p0.i32(ptr addrspace(4) noalias nocapture writeonly, ptr noalias nocapture readonly, i32, i1)
+declare void @llvm.memcpy.p6.p0.i32(ptr addrspace(6) noalias nocapture writeonly, ptr noalias nocapture readonly, i32, i1)
``````````
</details>
https://github.com/llvm/llvm-project/pull/153835
More information about the llvm-commits
mailing list