[llvm] [AMDGPU] Add gfx1250 memory lealizer tests run lines. NFC (PR #160586)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Sep 24 12:25:53 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-amdgpu
Author: Stanislav Mekhanoshin (rampitec)
<details>
<summary>Changes</summary>
---
Full diff: https://github.com/llvm/llvm-project/pull/160586.diff
3 Files Affected:
- (modified) llvm/test/CodeGen/AMDGPU/memory-legalizer-private-lastuse.ll (+48)
- (modified) llvm/test/CodeGen/AMDGPU/memory-legalizer-private-nontemporal.ll (+62)
- (modified) llvm/test/CodeGen/AMDGPU/memory-legalizer-private-volatile.ll (+53)
``````````diff
diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-private-lastuse.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-private-lastuse.ll
index bc905fa564f8a..80ea48be0b893 100644
--- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-private-lastuse.ll
+++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-private-lastuse.ll
@@ -1,6 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 < %s | FileCheck --check-prefix=GFX12 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 -mattr=+cumode < %s | FileCheck --check-prefix=GFX12 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1250 < %s | FileCheck --check-prefixes=GFX1250 %s
define amdgpu_kernel void @private_last_use_load_0(ptr addrspace(5) %in, ptr addrspace(1) %out) {
; GFX12-LABEL: private_last_use_load_0:
@@ -13,6 +14,17 @@ define amdgpu_kernel void @private_last_use_load_0(ptr addrspace(5) %in, ptr add
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-NEXT: s_endpgm
+;
+; GFX1250-LABEL: private_last_use_load_0:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: scratch_load_b32 v1, off, s2 th:TH_LOAD_LU
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
entry:
%val = load i32, ptr addrspace(5) %in, align 4, !amdgpu.last.use !{}
store i32 %val, ptr addrspace(1) %out
@@ -36,6 +48,20 @@ define amdgpu_kernel void @private_last_use_load_1(ptr addrspace(5) %in, ptr add
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-NEXT: s_endpgm
+;
+; GFX1250-LABEL: private_last_use_load_1:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v1, v0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_mov_b32 s3, 0x3ff
+; GFX1250-NEXT: v_and_b32_e64 v1, v1, s3
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: scratch_load_b32 v1, v1, s2 scale_offset th:TH_LOAD_LU
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
entry:
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%val.gep = getelementptr inbounds i32, ptr addrspace(5) %in, i32 %tid
@@ -57,6 +83,17 @@ define amdgpu_kernel void @private_last_use_and_volatile_load(ptr addrspace(5) %
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-NEXT: s_endpgm
+;
+; GFX1250-LABEL: private_last_use_and_volatile_load:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: scratch_load_b32 v1, off, s2 th:TH_LOAD_BYPASS scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
entry:
%val = load volatile i32, ptr addrspace(5) %in, align 4, !amdgpu.last.use !{}
store i32 %val, ptr addrspace(1) %out
@@ -74,6 +111,17 @@ define amdgpu_kernel void @private_last_use_and_nontemporal_load(ptr addrspace(5
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-NEXT: s_endpgm
+;
+; GFX1250-LABEL: private_last_use_and_nontemporal_load:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: scratch_load_b32 v1, off, s2 th:TH_LOAD_LU
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
entry:
%val = load i32, ptr addrspace(5) %in, align 4, !amdgpu.last.use !{}, !nontemporal !0
store i32 %val, ptr addrspace(1) %out
diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-private-nontemporal.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-private-nontemporal.ll
index 2aa4f021c259c..89de17ecbd1e8 100644
--- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-private-nontemporal.ll
+++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-private-nontemporal.ll
@@ -12,6 +12,7 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1100 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX11-CU %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 < %s | FileCheck --check-prefixes=GFX12-WGP %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX12-CU %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1250 < %s | FileCheck --check-prefixes=GFX1250 %s
define amdgpu_kernel void @private_nontemporal_load_0(
; GFX6-LABEL: private_nontemporal_load_0:
@@ -201,6 +202,17 @@ define amdgpu_kernel void @private_nontemporal_load_0(
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-LABEL: private_nontemporal_load_0:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: scratch_load_b32 v1, off, s2 th:TH_LOAD_NT
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(5) %in, ptr addrspace(1) %out) {
entry:
%val = load i32, ptr addrspace(5) %in, align 4, !nontemporal !0
@@ -450,6 +462,20 @@ define amdgpu_kernel void @private_nontemporal_load_1(
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-LABEL: private_nontemporal_load_1:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v1, v0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_mov_b32 s3, 0x3ff
+; GFX1250-NEXT: v_and_b32_e64 v1, v1, s3
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: scratch_load_b32 v1, v1, s2 scale_offset th:TH_LOAD_NT
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(5) %in, ptr addrspace(1) %out) {
entry:
%tid = call i32 @llvm.amdgcn.workitem.id.x()
@@ -627,6 +653,17 @@ define amdgpu_kernel void @private_nontemporal_store_0(
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s1
; GFX12-CU-NEXT: scratch_store_b32 off, v0, s0 th:TH_STORE_NT
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-LABEL: private_nontemporal_store_0:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[2:3], 0x0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-NEXT: scratch_store_b32 off, v0, s0 th:TH_STORE_NT
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %in, ptr addrspace(5) %out) {
entry:
%val = load i32, ptr addrspace(1) %in, align 4
@@ -846,6 +883,20 @@ define amdgpu_kernel void @private_nontemporal_store_1(
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s1
; GFX12-CU-NEXT: scratch_store_b32 v1, v0, s0 th:TH_STORE_NT
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-LABEL: private_nontemporal_store_1:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[2:3], 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
+; GFX1250-NEXT: s_mov_b32 s2, 0x3ff
+; GFX1250-NEXT: v_and_b32_e64 v1, v0, s2
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-NEXT: scratch_store_b32 v1, v0, s0 scale_offset th:TH_STORE_NT
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %in, ptr addrspace(5) %out) {
entry:
%tid = call i32 @llvm.amdgcn.workitem.id.x()
@@ -1047,6 +1098,17 @@ define amdgpu_kernel void @private_nontemporal_volatile_load(
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-LABEL: private_nontemporal_volatile_load:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: scratch_load_b32 v1, off, s2 th:TH_LOAD_NT scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(5) %in, ptr addrspace(1) %out) {
entry:
%val = load volatile i32, ptr addrspace(5) %in, align 4, !nontemporal !0
diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-private-volatile.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-private-volatile.ll
index df4193969f8a0..7faa0621aa6d0 100644
--- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-private-volatile.ll
+++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-private-volatile.ll
@@ -8,6 +8,7 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1100 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX11-CU %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 < %s | FileCheck --check-prefixes=GFX12-WGP %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX12-CU %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1250 < %s | FileCheck --check-prefixes=GFX1250 %s
define amdgpu_kernel void @private_volatile_load_0(
; GFX6-LABEL: private_volatile_load_0:
@@ -155,6 +156,17 @@ define amdgpu_kernel void @private_volatile_load_0(
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-LABEL: private_volatile_load_0:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: scratch_load_b32 v1, off, s2 scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(5) %in, ptr addrspace(1) %out) {
entry:
%val = load volatile i32, ptr addrspace(5) %in, align 4
@@ -340,6 +352,20 @@ define amdgpu_kernel void @private_volatile_load_1(
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-LABEL: private_volatile_load_1:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v1, v0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_mov_b32 s3, 0x3ff
+; GFX1250-NEXT: v_and_b32_e64 v1, v1, s3
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: scratch_load_b32 v1, v1, s2 scale_offset scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(5) %in, ptr addrspace(1) %out) {
entry:
%tid = call i32 @llvm.amdgcn.workitem.id.x()
@@ -490,6 +516,18 @@ define amdgpu_kernel void @private_volatile_store_0(
; GFX12-CU-NEXT: scratch_store_b32 off, v0, s0 scope:SCOPE_SYS
; GFX12-CU-NEXT: s_wait_storecnt 0x0
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-LABEL: private_volatile_store_0:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[2:3], 0x0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-NEXT: scratch_store_b32 off, v0, s0 scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %in, ptr addrspace(5) %out) {
entry:
%val = load i32, ptr addrspace(1) %in, align 4
@@ -664,6 +702,21 @@ define amdgpu_kernel void @private_volatile_store_1(
; GFX12-CU-NEXT: scratch_store_b32 v1, v0, s0 scope:SCOPE_SYS
; GFX12-CU-NEXT: s_wait_storecnt 0x0
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-LABEL: private_volatile_store_1:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[2:3], 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
+; GFX1250-NEXT: s_mov_b32 s2, 0x3ff
+; GFX1250-NEXT: v_and_b32_e64 v1, v0, s2
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-NEXT: scratch_store_b32 v1, v0, s0 scale_offset scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %in, ptr addrspace(5) %out) {
entry:
%tid = call i32 @llvm.amdgcn.workitem.id.x()
``````````
</details>
https://github.com/llvm/llvm-project/pull/160586
More information about the llvm-commits
mailing list