[llvm] [AMDGPU] gfx1250 codegen load tests update. NFC. (PR #155305)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Aug 25 14:36:24 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-globalisel
Author: Stanislav Mekhanoshin (rampitec)
<details>
<summary>Changes</summary>
---
Patch is 169.72 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/155305.diff
5 Files Affected:
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/load-constant.96.ll (+357-72)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-load.mir (+1)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-split-scalar-load-metadata.mir (+1)
- (modified) llvm/test/CodeGen/AMDGPU/load-constant-i1.ll (+1765)
- (modified) llvm/test/CodeGen/AMDGPU/load-constant-i32.ll (+609)
``````````diff
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/load-constant.96.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/load-constant.96.ll
index 6bb104311a4d8..ab8d8c192187f 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/load-constant.96.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/load-constant.96.ll
@@ -1,6 +1,8 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1200 -mattr=+unaligned-access-mode < %s | FileCheck -check-prefixes=GFX12,GFX12-UNALIGNED %s
; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1200 -mattr=-unaligned-access-mode < %s | FileCheck -check-prefixes=GFX12,GFX12-NOUNALIGNED %s
+; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1250 -mattr=+unaligned-access-mode < %s | FileCheck -check-prefixes=GFX12,GFX1250,GFX1250-UNALIGNED %s
+; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1250 -mattr=-unaligned-access-mode < %s | FileCheck -check-prefixes=GFX12,GFX1250,GFX1250-NOUNALIGNED %s
; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -mattr=+unaligned-access-mode < %s | FileCheck -check-prefixes=GCN,GFX9,GFX9-UNALIGNED %s
; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -mattr=-unaligned-access-mode < %s | FileCheck -check-prefixes=GCN,GFX9,GFX9-NOUNALIGNED %s
; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=hawaii -mattr=+unaligned-access-mode < %s | FileCheck -check-prefixes=GCN,GFX7,GFX7-UNALIGNED %s
@@ -64,6 +66,52 @@ define <3 x i32> @v_load_constant_v3i32_align1(ptr addrspace(4) %ptr) {
; GFX12-NOUNALIGNED-NEXT: v_or3_b32 v2, v8, v9, v7
; GFX12-NOUNALIGNED-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1250-UNALIGNED-LABEL: v_load_constant_v3i32_align1:
+; GFX1250-UNALIGNED: ; %bb.0:
+; GFX1250-UNALIGNED-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-UNALIGNED-NEXT: s_wait_kmcnt 0x0
+; GFX1250-UNALIGNED-NEXT: global_load_b96 v[0:2], v[0:1], off
+; GFX1250-UNALIGNED-NEXT: s_wait_loadcnt 0x0
+; GFX1250-UNALIGNED-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX1250-NOUNALIGNED-LABEL: v_load_constant_v3i32_align1:
+; GFX1250-NOUNALIGNED: ; %bb.0:
+; GFX1250-NOUNALIGNED-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NOUNALIGNED-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NOUNALIGNED-NEXT: s_clause 0xb
+; GFX1250-NOUNALIGNED-NEXT: global_load_u8 v2, v[0:1], off
+; GFX1250-NOUNALIGNED-NEXT: global_load_u8 v3, v[0:1], off offset:1
+; GFX1250-NOUNALIGNED-NEXT: global_load_u8 v4, v[0:1], off offset:2
+; GFX1250-NOUNALIGNED-NEXT: global_load_u8 v5, v[0:1], off offset:3
+; GFX1250-NOUNALIGNED-NEXT: global_load_u8 v6, v[0:1], off offset:4
+; GFX1250-NOUNALIGNED-NEXT: global_load_u8 v7, v[0:1], off offset:5
+; GFX1250-NOUNALIGNED-NEXT: global_load_u8 v8, v[0:1], off offset:6
+; GFX1250-NOUNALIGNED-NEXT: global_load_u8 v9, v[0:1], off offset:7
+; GFX1250-NOUNALIGNED-NEXT: global_load_u8 v10, v[0:1], off offset:8
+; GFX1250-NOUNALIGNED-NEXT: global_load_u8 v11, v[0:1], off offset:9
+; GFX1250-NOUNALIGNED-NEXT: global_load_u8 v12, v[0:1], off offset:11
+; GFX1250-NOUNALIGNED-NEXT: global_load_u8 v0, v[0:1], off offset:10
+; GFX1250-NOUNALIGNED-NEXT: s_wait_loadcnt 0xa
+; GFX1250-NOUNALIGNED-NEXT: s_wait_xcnt 0x0
+; GFX1250-NOUNALIGNED-NEXT: v_lshl_or_b32 v1, v3, 8, v2
+; GFX1250-NOUNALIGNED-NEXT: s_wait_loadcnt 0x8
+; GFX1250-NOUNALIGNED-NEXT: v_dual_lshlrev_b32 v3, 16, v4 :: v_dual_lshlrev_b32 v2, 24, v5
+; GFX1250-NOUNALIGNED-NEXT: s_wait_loadcnt 0x6
+; GFX1250-NOUNALIGNED-NEXT: v_lshl_or_b32 v4, v7, 8, v6
+; GFX1250-NOUNALIGNED-NEXT: s_wait_loadcnt 0x4
+; GFX1250-NOUNALIGNED-NEXT: v_dual_lshlrev_b32 v6, 16, v8 :: v_dual_lshlrev_b32 v5, 24, v9
+; GFX1250-NOUNALIGNED-NEXT: s_wait_loadcnt 0x2
+; GFX1250-NOUNALIGNED-NEXT: v_lshl_or_b32 v7, v11, 8, v10
+; GFX1250-NOUNALIGNED-NEXT: s_wait_loadcnt 0x1
+; GFX1250-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v8, 24, v12
+; GFX1250-NOUNALIGNED-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v9, 16, v0
+; GFX1250-NOUNALIGNED-NEXT: v_or3_b32 v0, v2, v3, v1
+; GFX1250-NOUNALIGNED-NEXT: v_or3_b32 v1, v5, v6, v4
+; GFX1250-NOUNALIGNED-NEXT: s_delay_alu instid0(VALU_DEP_3)
+; GFX1250-NOUNALIGNED-NEXT: v_or3_b32 v2, v8, v9, v7
+; GFX1250-NOUNALIGNED-NEXT: s_set_pc_i64 s[30:31]
+;
; GFX9-UNALIGNED-LABEL: v_load_constant_v3i32_align1:
; GFX9-UNALIGNED: ; %bb.0:
; GFX9-UNALIGNED-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -256,6 +304,34 @@ define <3 x i32> @v_load_constant_v3i32_align2(ptr addrspace(4) %ptr) {
; GFX12-NOUNALIGNED-NEXT: v_lshl_or_b32 v2, v7, 16, v6
; GFX12-NOUNALIGNED-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1250-UNALIGNED-LABEL: v_load_constant_v3i32_align2:
+; GFX1250-UNALIGNED: ; %bb.0:
+; GFX1250-UNALIGNED-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-UNALIGNED-NEXT: s_wait_kmcnt 0x0
+; GFX1250-UNALIGNED-NEXT: global_load_b96 v[0:2], v[0:1], off
+; GFX1250-UNALIGNED-NEXT: s_wait_loadcnt 0x0
+; GFX1250-UNALIGNED-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX1250-NOUNALIGNED-LABEL: v_load_constant_v3i32_align2:
+; GFX1250-NOUNALIGNED: ; %bb.0:
+; GFX1250-NOUNALIGNED-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NOUNALIGNED-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NOUNALIGNED-NEXT: s_clause 0x5
+; GFX1250-NOUNALIGNED-NEXT: global_load_u16 v2, v[0:1], off
+; GFX1250-NOUNALIGNED-NEXT: global_load_u16 v3, v[0:1], off offset:2
+; GFX1250-NOUNALIGNED-NEXT: global_load_u16 v4, v[0:1], off offset:4
+; GFX1250-NOUNALIGNED-NEXT: global_load_u16 v5, v[0:1], off offset:6
+; GFX1250-NOUNALIGNED-NEXT: global_load_u16 v6, v[0:1], off offset:8
+; GFX1250-NOUNALIGNED-NEXT: global_load_u16 v7, v[0:1], off offset:10
+; GFX1250-NOUNALIGNED-NEXT: s_wait_loadcnt 0x4
+; GFX1250-NOUNALIGNED-NEXT: s_wait_xcnt 0x0
+; GFX1250-NOUNALIGNED-NEXT: v_lshl_or_b32 v0, v3, 16, v2
+; GFX1250-NOUNALIGNED-NEXT: s_wait_loadcnt 0x2
+; GFX1250-NOUNALIGNED-NEXT: v_lshl_or_b32 v1, v5, 16, v4
+; GFX1250-NOUNALIGNED-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NOUNALIGNED-NEXT: v_lshl_or_b32 v2, v7, 16, v6
+; GFX1250-NOUNALIGNED-NEXT: s_set_pc_i64 s[30:31]
+;
; GFX9-UNALIGNED-LABEL: v_load_constant_v3i32_align2:
; GFX9-UNALIGNED: ; %bb.0:
; GFX9-UNALIGNED-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -346,16 +422,35 @@ define <3 x i32> @v_load_constant_v3i32_align2(ptr addrspace(4) %ptr) {
}
define <3 x i32> @v_load_constant_v3i32_align4(ptr addrspace(4) %ptr) {
-; GFX12-LABEL: v_load_constant_v3i32_align4:
-; GFX12: ; %bb.0:
-; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX12-NEXT: s_wait_expcnt 0x0
-; GFX12-NEXT: s_wait_samplecnt 0x0
-; GFX12-NEXT: s_wait_bvhcnt 0x0
-; GFX12-NEXT: s_wait_kmcnt 0x0
-; GFX12-NEXT: global_load_b96 v[0:2], v[0:1], off
-; GFX12-NEXT: s_wait_loadcnt 0x0
-; GFX12-NEXT: s_setpc_b64 s[30:31]
+; GFX12-UNALIGNED-LABEL: v_load_constant_v3i32_align4:
+; GFX12-UNALIGNED: ; %bb.0:
+; GFX12-UNALIGNED-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-UNALIGNED-NEXT: s_wait_expcnt 0x0
+; GFX12-UNALIGNED-NEXT: s_wait_samplecnt 0x0
+; GFX12-UNALIGNED-NEXT: s_wait_bvhcnt 0x0
+; GFX12-UNALIGNED-NEXT: s_wait_kmcnt 0x0
+; GFX12-UNALIGNED-NEXT: global_load_b96 v[0:2], v[0:1], off
+; GFX12-UNALIGNED-NEXT: s_wait_loadcnt 0x0
+; GFX12-UNALIGNED-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-NOUNALIGNED-LABEL: v_load_constant_v3i32_align4:
+; GFX12-NOUNALIGNED: ; %bb.0:
+; GFX12-NOUNALIGNED-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-NOUNALIGNED-NEXT: s_wait_expcnt 0x0
+; GFX12-NOUNALIGNED-NEXT: s_wait_samplecnt 0x0
+; GFX12-NOUNALIGNED-NEXT: s_wait_bvhcnt 0x0
+; GFX12-NOUNALIGNED-NEXT: s_wait_kmcnt 0x0
+; GFX12-NOUNALIGNED-NEXT: global_load_b96 v[0:2], v[0:1], off
+; GFX12-NOUNALIGNED-NEXT: s_wait_loadcnt 0x0
+; GFX12-NOUNALIGNED-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-LABEL: v_load_constant_v3i32_align4:
+; GFX1250: ; %bb.0:
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: global_load_b96 v[0:2], v[0:1], off
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_set_pc_i64 s[30:31]
;
; GFX9-LABEL: v_load_constant_v3i32_align4:
; GFX9: ; %bb.0:
@@ -392,16 +487,35 @@ define <3 x i32> @v_load_constant_v3i32_align4(ptr addrspace(4) %ptr) {
}
define i96 @v_load_constant_i96_align8(ptr addrspace(4) %ptr) {
-; GFX12-LABEL: v_load_constant_i96_align8:
-; GFX12: ; %bb.0:
-; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX12-NEXT: s_wait_expcnt 0x0
-; GFX12-NEXT: s_wait_samplecnt 0x0
-; GFX12-NEXT: s_wait_bvhcnt 0x0
-; GFX12-NEXT: s_wait_kmcnt 0x0
-; GFX12-NEXT: global_load_b96 v[0:2], v[0:1], off
-; GFX12-NEXT: s_wait_loadcnt 0x0
-; GFX12-NEXT: s_setpc_b64 s[30:31]
+; GFX12-UNALIGNED-LABEL: v_load_constant_i96_align8:
+; GFX12-UNALIGNED: ; %bb.0:
+; GFX12-UNALIGNED-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-UNALIGNED-NEXT: s_wait_expcnt 0x0
+; GFX12-UNALIGNED-NEXT: s_wait_samplecnt 0x0
+; GFX12-UNALIGNED-NEXT: s_wait_bvhcnt 0x0
+; GFX12-UNALIGNED-NEXT: s_wait_kmcnt 0x0
+; GFX12-UNALIGNED-NEXT: global_load_b96 v[0:2], v[0:1], off
+; GFX12-UNALIGNED-NEXT: s_wait_loadcnt 0x0
+; GFX12-UNALIGNED-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-NOUNALIGNED-LABEL: v_load_constant_i96_align8:
+; GFX12-NOUNALIGNED: ; %bb.0:
+; GFX12-NOUNALIGNED-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-NOUNALIGNED-NEXT: s_wait_expcnt 0x0
+; GFX12-NOUNALIGNED-NEXT: s_wait_samplecnt 0x0
+; GFX12-NOUNALIGNED-NEXT: s_wait_bvhcnt 0x0
+; GFX12-NOUNALIGNED-NEXT: s_wait_kmcnt 0x0
+; GFX12-NOUNALIGNED-NEXT: global_load_b96 v[0:2], v[0:1], off
+; GFX12-NOUNALIGNED-NEXT: s_wait_loadcnt 0x0
+; GFX12-NOUNALIGNED-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-LABEL: v_load_constant_i96_align8:
+; GFX1250: ; %bb.0:
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: global_load_b96 v[0:2], v[0:1], off
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_set_pc_i64 s[30:31]
;
; GFX9-LABEL: v_load_constant_i96_align8:
; GFX9: ; %bb.0:
@@ -438,16 +552,35 @@ define i96 @v_load_constant_i96_align8(ptr addrspace(4) %ptr) {
}
define <3 x i32> @v_load_constant_v3i32_align8(ptr addrspace(4) %ptr) {
-; GFX12-LABEL: v_load_constant_v3i32_align8:
-; GFX12: ; %bb.0:
-; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX12-NEXT: s_wait_expcnt 0x0
-; GFX12-NEXT: s_wait_samplecnt 0x0
-; GFX12-NEXT: s_wait_bvhcnt 0x0
-; GFX12-NEXT: s_wait_kmcnt 0x0
-; GFX12-NEXT: global_load_b96 v[0:2], v[0:1], off
-; GFX12-NEXT: s_wait_loadcnt 0x0
-; GFX12-NEXT: s_setpc_b64 s[30:31]
+; GFX12-UNALIGNED-LABEL: v_load_constant_v3i32_align8:
+; GFX12-UNALIGNED: ; %bb.0:
+; GFX12-UNALIGNED-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-UNALIGNED-NEXT: s_wait_expcnt 0x0
+; GFX12-UNALIGNED-NEXT: s_wait_samplecnt 0x0
+; GFX12-UNALIGNED-NEXT: s_wait_bvhcnt 0x0
+; GFX12-UNALIGNED-NEXT: s_wait_kmcnt 0x0
+; GFX12-UNALIGNED-NEXT: global_load_b96 v[0:2], v[0:1], off
+; GFX12-UNALIGNED-NEXT: s_wait_loadcnt 0x0
+; GFX12-UNALIGNED-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-NOUNALIGNED-LABEL: v_load_constant_v3i32_align8:
+; GFX12-NOUNALIGNED: ; %bb.0:
+; GFX12-NOUNALIGNED-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-NOUNALIGNED-NEXT: s_wait_expcnt 0x0
+; GFX12-NOUNALIGNED-NEXT: s_wait_samplecnt 0x0
+; GFX12-NOUNALIGNED-NEXT: s_wait_bvhcnt 0x0
+; GFX12-NOUNALIGNED-NEXT: s_wait_kmcnt 0x0
+; GFX12-NOUNALIGNED-NEXT: global_load_b96 v[0:2], v[0:1], off
+; GFX12-NOUNALIGNED-NEXT: s_wait_loadcnt 0x0
+; GFX12-NOUNALIGNED-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-LABEL: v_load_constant_v3i32_align8:
+; GFX1250: ; %bb.0:
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: global_load_b96 v[0:2], v[0:1], off
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_set_pc_i64 s[30:31]
;
; GFX9-LABEL: v_load_constant_v3i32_align8:
; GFX9: ; %bb.0:
@@ -484,16 +617,35 @@ define <3 x i32> @v_load_constant_v3i32_align8(ptr addrspace(4) %ptr) {
}
define <6 x i16> @v_load_constant_v6i16_align8(ptr addrspace(4) %ptr) {
-; GFX12-LABEL: v_load_constant_v6i16_align8:
-; GFX12: ; %bb.0:
-; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX12-NEXT: s_wait_expcnt 0x0
-; GFX12-NEXT: s_wait_samplecnt 0x0
-; GFX12-NEXT: s_wait_bvhcnt 0x0
-; GFX12-NEXT: s_wait_kmcnt 0x0
-; GFX12-NEXT: global_load_b96 v[0:2], v[0:1], off
-; GFX12-NEXT: s_wait_loadcnt 0x0
-; GFX12-NEXT: s_setpc_b64 s[30:31]
+; GFX12-UNALIGNED-LABEL: v_load_constant_v6i16_align8:
+; GFX12-UNALIGNED: ; %bb.0:
+; GFX12-UNALIGNED-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-UNALIGNED-NEXT: s_wait_expcnt 0x0
+; GFX12-UNALIGNED-NEXT: s_wait_samplecnt 0x0
+; GFX12-UNALIGNED-NEXT: s_wait_bvhcnt 0x0
+; GFX12-UNALIGNED-NEXT: s_wait_kmcnt 0x0
+; GFX12-UNALIGNED-NEXT: global_load_b96 v[0:2], v[0:1], off
+; GFX12-UNALIGNED-NEXT: s_wait_loadcnt 0x0
+; GFX12-UNALIGNED-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-NOUNALIGNED-LABEL: v_load_constant_v6i16_align8:
+; GFX12-NOUNALIGNED: ; %bb.0:
+; GFX12-NOUNALIGNED-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-NOUNALIGNED-NEXT: s_wait_expcnt 0x0
+; GFX12-NOUNALIGNED-NEXT: s_wait_samplecnt 0x0
+; GFX12-NOUNALIGNED-NEXT: s_wait_bvhcnt 0x0
+; GFX12-NOUNALIGNED-NEXT: s_wait_kmcnt 0x0
+; GFX12-NOUNALIGNED-NEXT: global_load_b96 v[0:2], v[0:1], off
+; GFX12-NOUNALIGNED-NEXT: s_wait_loadcnt 0x0
+; GFX12-NOUNALIGNED-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-LABEL: v_load_constant_v6i16_align8:
+; GFX1250: ; %bb.0:
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: global_load_b96 v[0:2], v[0:1], off
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_set_pc_i64 s[30:31]
;
; GFX9-LABEL: v_load_constant_v6i16_align8:
; GFX9: ; %bb.0:
@@ -539,28 +691,67 @@ define <6 x i16> @v_load_constant_v6i16_align8(ptr addrspace(4) %ptr) {
}
define <12 x i8> @v_load_constant_v12i8_align8(ptr addrspace(4) %ptr) {
-; GFX12-LABEL: v_load_constant_v12i8_align8:
-; GFX12: ; %bb.0:
-; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX12-NEXT: s_wait_expcnt 0x0
-; GFX12-NEXT: s_wait_samplecnt 0x0
-; GFX12-NEXT: s_wait_bvhcnt 0x0
-; GFX12-NEXT: s_wait_kmcnt 0x0
-; GFX12-NEXT: global_load_b96 v[0:2], v[0:1], off
-; GFX12-NEXT: s_wait_loadcnt 0x0
-; GFX12-NEXT: v_lshrrev_b32_e32 v13, 8, v0
-; GFX12-NEXT: v_lshrrev_b32_e32 v12, 16, v0
-; GFX12-NEXT: v_lshrrev_b32_e32 v3, 24, v0
-; GFX12-NEXT: v_lshrrev_b32_e32 v5, 8, v1
-; GFX12-NEXT: v_lshrrev_b32_e32 v6, 16, v1
-; GFX12-NEXT: v_lshrrev_b32_e32 v7, 24, v1
-; GFX12-NEXT: v_lshrrev_b32_e32 v9, 8, v2
-; GFX12-NEXT: v_lshrrev_b32_e32 v10, 16, v2
-; GFX12-NEXT: v_lshrrev_b32_e32 v11, 24, v2
-; GFX12-NEXT: v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v1, v13
-; GFX12-NEXT: v_mov_b32_e32 v8, v2
-; GFX12-NEXT: v_mov_b32_e32 v2, v12
-; GFX12-NEXT: s_setpc_b64 s[30:31]
+; GFX12-UNALIGNED-LABEL: v_load_constant_v12i8_align8:
+; GFX12-UNALIGNED: ; %bb.0:
+; GFX12-UNALIGNED-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-UNALIGNED-NEXT: s_wait_expcnt 0x0
+; GFX12-UNALIGNED-NEXT: s_wait_samplecnt 0x0
+; GFX12-UNALIGNED-NEXT: s_wait_bvhcnt 0x0
+; GFX12-UNALIGNED-NEXT: s_wait_kmcnt 0x0
+; GFX12-UNALIGNED-NEXT: global_load_b96 v[0:2], v[0:1], off
+; GFX12-UNALIGNED-NEXT: s_wait_loadcnt 0x0
+; GFX12-UNALIGNED-NEXT: v_lshrrev_b32_e32 v13, 8, v0
+; GFX12-UNALIGNED-NEXT: v_lshrrev_b32_e32 v12, 16, v0
+; GFX12-UNALIGNED-NEXT: v_lshrrev_b32_e32 v3, 24, v0
+; GFX12-UNALIGNED-NEXT: v_lshrrev_b32_e32 v5, 8, v1
+; GFX12-UNALIGNED-NEXT: v_lshrrev_b32_e32 v6, 16, v1
+; GFX12-UNALIGNED-NEXT: v_lshrrev_b32_e32 v7, 24, v1
+; GFX12-UNALIGNED-NEXT: v_lshrrev_b32_e32 v9, 8, v2
+; GFX12-UNALIGNED-NEXT: v_lshrrev_b32_e32 v10, 16, v2
+; GFX12-UNALIGNED-NEXT: v_lshrrev_b32_e32 v11, 24, v2
+; GFX12-UNALIGNED-NEXT: v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v1, v13
+; GFX12-UNALIGNED-NEXT: v_mov_b32_e32 v8, v2
+; GFX12-UNALIGNED-NEXT: v_mov_b32_e32 v2, v12
+; GFX12-UNALIGNED-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-NOUNALIGNED-LABEL: v_load_constant_v12i8_align8:
+; GFX12-NOUNALIGNED: ; %bb.0:
+; GFX12-NOUNALIGNED-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-NOUNALIGNED-NEXT: s_wait_expcnt 0x0
+; GFX12-NOUNALIGNED-NEXT: s_wait_samplecnt 0x0
+; GFX12-NOUNALIGNED-NEXT: s_wait_bvhcnt 0x0
+; GFX12-NOUNALIGNED-NEXT: s_wait_kmcnt 0x0
+; GFX12-NOUNALIGNED-NEXT: global_load_b96 v[0:2], v[0:1], off
+; GFX12-NOUNALIGNED-NEXT: s_wait_loadcnt 0x0
+; GFX12-NOUNALIGNED-NEXT: v_lshrrev_b32_e32 v13, 8, v0
+; GFX12-NOUNALIGNED-NEXT: v_lshrrev_b32_e32 v12, 16, v0
+; GFX12-NOUNALIGNED-NEXT: v_lshrrev_b32_e32 v3, 24, v0
+; GFX12-NOUNALIGNED-NEXT: v_lshrrev_b32_e32 v5, 8, v1
+; GFX12-NOUNALIGNED-NEXT: v_lshrrev_b32_e32 v6, 16, v1
+; GFX12-NOUNALIGNED-NEXT: v_lshrrev_b32_e32 v7, 24, v1
+; GFX12-NOUNALIGNED-NEXT: v_lshrrev_b32_e32 v9, 8, v2
+; GFX12-NOUNALIGNED-NEXT: v_lshrrev_b32_e32 v10, 16, v2
+; GFX12-NOUNALIGNED-NEXT: v_lshrrev_b32_e32 v11, 24, v2
+; GFX12-NOUNALIGNED-NEXT: v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v1, v13
+; GFX12-NOUNALIGNED-NEXT: v_mov_b32_e32 v8, v2
+; GFX12-NOUNALIGNED-NEXT: v_mov_b32_e32 v2, v12
+; GFX12-NOUNALIGNED-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-LABEL: v_load_constant_v12i8_align8:
+; GFX1250: ; %bb.0:
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: global_load_b96 v[0:2], v[0:1], off
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: v_dual_lshrrev_b32 v13, 8, v0 :: v_dual_lshrrev_b32 v12, 16, v0
+; GFX1250-NEXT: v_dual_lshrrev_b32 v3, 24, v0 :: v_dual_lshrrev_b32 v5, 8, v1
+; GFX1250-NEXT: v_dual_lshrrev_b32 v6, 16, v1 :: v_dual_lshrrev_b32 v7, 24, v1
+; GFX1250-NEXT: v_dual_lshrrev_b32 v9, 8, v2 :: v_dual_lshrrev_b32 v10, 16, v2
+; GFX1250-NEXT: v_dual_mov_b32 v4, v1 :: v_dual_lshrrev_b32 v11, 24, v2
+; GFX1250-NEXT: s_wait_xcnt 0x0
+; GFX1250-NEXT: v_dual_mov_b32 v8, v2 :: v_dual_mov_b32 v1, v13
+; GFX1250-NEXT: v_mov_b32_e32 v2, v12
+; GFX1250-NEXT: s_set_pc_i64 s[30:31]
;
; GFX9-LABEL: v_load_constant_v12i8_align8:
; GFX9: ; %bb.0:
@@ -632,16 +823,35 @@ define <12 x i8> @v_load_constant_v12i8_align8(ptr addrspace(4) %ptr) {
}
define <3 x i32> @v_load_constant_v3i32_align16(ptr addrspace(4) %ptr) {
-; GFX12-LABEL: v_load_constant_v3i32_align16:
-; GFX12: ; %bb.0:
-; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX12-NEXT: s_wait_expcnt 0x0
-; GFX12-NEXT: s_wait_samplecnt 0x0
-; GFX12-NEXT: s_wait_bvhcnt 0x0
-; GFX12-NEXT: s_wait_kmcnt 0x0
-; GFX12-NEXT: global_load_b96 v[0:2], v[0:1], off
-; GFX12-NEXT: s_wait_loadcnt 0x0
-; GFX12-NEXT: s_setpc_b64 s[30:31]
+; GFX12-UNALIGNED-LABEL: v_load_constant_v3i32_align16:
+; GFX12-UNALIGNED: ; %bb.0:
+; GFX12-UNALIGNED-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-UNALIGNED-NEXT: s_wait_expcnt 0x0
+; GFX12-UNALIGNED-NEXT: s_wait_samplecnt 0x0
+; GFX12-UNALIGNED-NEXT: s_wait_bvhcnt 0x0
+; GFX12-UNALIGNED-NEXT: s_wait_kmcnt 0x0
+; GFX12-UNALIGNED-NEXT: global_load_b96 v[0:2], v[0:1], off
+; GFX12-UNALIGNED-NEXT: s_wait_loadcnt 0x0
+; GFX12-UNALIGNED-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-NOUNALIGNED-LABEL: v_load_constant_v3i32_align16:
+; GFX12-NOUNALIGNED: ; %bb.0:
+; GFX12-NOUNALIGNED-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-NOUNALIGNED-NEXT: s_wait_expcnt 0x0
+; GFX12-NOUNALIGNED-NEXT: s_wait_samplecnt 0x0
+; GFX12-NOUNALIGNED-NEXT: s_wait_bvhcnt 0x0
+; GFX12-NOUNALIGNED-NEXT: s_wait_kmcnt 0x0
+; GFX12-NOUNALIGNED-NEXT: global_load_b96 v[0:2], v[0:1], off
+; GFX12-NOUNALIGNED-NEXT: s_wait_loadcnt 0x0
+; GFX12-NOUNALIGNED-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-LABEL: v_load_constant_v3i32_align16:
+; GFX1250: ; %bb.0:
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: global_load_b96 v[0:2], v[0:1], off
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_set_pc_i64 s[30:31]
;
; GFX9-LABEL: v_load_constant_v3i32_align16:
; GFX9: ; %bb.0:
@...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/155305
More information about the llvm-commits
mailing list