[llvm] AMDGPU: Add more freeze codegen tests (PR #131843)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Mar 18 09:17:48 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-amdgpu
Author: Matt Arsenault (arsenm)
<details>
<summary>Changes</summary>
---
Patch is 88.88 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/131843.diff
1 Files Affected:
- (modified) llvm/test/CodeGen/AMDGPU/freeze.ll (+1963)
``````````diff
diff --git a/llvm/test/CodeGen/AMDGPU/freeze.ll b/llvm/test/CodeGen/AMDGPU/freeze.ll
index 22427ee344d91..42d6e57585345 100644
--- a/llvm/test/CodeGen/AMDGPU/freeze.ll
+++ b/llvm/test/CodeGen/AMDGPU/freeze.ll
@@ -1854,3 +1854,1966 @@ define void @freeze_i256(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
store i256 %freeze, ptr addrspace(1) %ptrb, align 4
ret void
}
+
+define void @freeze_i16(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
+; GFX10-LABEL: freeze_i16:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: global_load_ushort v0, v[0:1], off
+; GFX10-NEXT: s_waitcnt vmcnt(0)
+; GFX10-NEXT: global_store_short v[2:3], v0, off
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: freeze_i16:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: global_load_u16 v0, v[0:1], off
+; GFX11-NEXT: s_waitcnt vmcnt(0)
+; GFX11-NEXT: global_store_b16 v[2:3], v0, off
+; GFX11-NEXT: s_setpc_b64 s[30:31]
+ %a = load i16, ptr addrspace(1) %ptra
+ %freeze = freeze i16 %a
+ store i16 %freeze, ptr addrspace(1) %ptrb
+ ret void
+}
+
+define void @freeze_v2i16(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
+; GFX10-LABEL: freeze_v2i16:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: global_load_dword v0, v[0:1], off
+; GFX10-NEXT: s_waitcnt vmcnt(0)
+; GFX10-NEXT: global_store_dword v[2:3], v0, off
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: freeze_v2i16:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: global_load_b32 v0, v[0:1], off
+; GFX11-NEXT: s_waitcnt vmcnt(0)
+; GFX11-NEXT: global_store_b32 v[2:3], v0, off
+; GFX11-NEXT: s_setpc_b64 s[30:31]
+ %a = load <2 x i16>, ptr addrspace(1) %ptra
+ %freeze = freeze <2 x i16> %a
+ store <2 x i16> %freeze, ptr addrspace(1) %ptrb
+ ret void
+}
+
+define void @freeze_v3i16(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
+; GFX10-SDAG-LABEL: freeze_v3i16:
+; GFX10-SDAG: ; %bb.0:
+; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-SDAG-NEXT: global_load_dwordx2 v[0:1], v[0:1], off
+; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-SDAG-NEXT: global_store_short v[2:3], v1, off offset:4
+; GFX10-SDAG-NEXT: global_store_dword v[2:3], v0, off
+; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-GISEL-LABEL: freeze_v3i16:
+; GFX10-GISEL: ; %bb.0:
+; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-GISEL-NEXT: global_load_dwordx2 v[0:1], v[0:1], off
+; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-GISEL-NEXT: global_store_short v[2:3], v0, off
+; GFX10-GISEL-NEXT: global_store_short_d16_hi v[2:3], v0, off offset:2
+; GFX10-GISEL-NEXT: global_store_short v[2:3], v1, off offset:4
+; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-SDAG-LABEL: freeze_v3i16:
+; GFX11-SDAG: ; %bb.0:
+; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-NEXT: global_load_b64 v[0:1], v[0:1], off
+; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX11-SDAG-NEXT: s_clause 0x1
+; GFX11-SDAG-NEXT: global_store_b16 v[2:3], v1, off offset:4
+; GFX11-SDAG-NEXT: global_store_b32 v[2:3], v0, off
+; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GISEL-LABEL: freeze_v3i16:
+; GFX11-GISEL: ; %bb.0:
+; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-NEXT: global_load_b64 v[0:1], v[0:1], off
+; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GISEL-NEXT: s_clause 0x2
+; GFX11-GISEL-NEXT: global_store_b16 v[2:3], v0, off
+; GFX11-GISEL-NEXT: global_store_d16_hi_b16 v[2:3], v0, off offset:2
+; GFX11-GISEL-NEXT: global_store_b16 v[2:3], v1, off offset:4
+; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
+ %a = load <3 x i16>, ptr addrspace(1) %ptra
+ %freeze = freeze <3 x i16> %a
+ store <3 x i16> %freeze, ptr addrspace(1) %ptrb
+ ret void
+}
+
+define void @freeze_v4i16(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
+; GFX10-LABEL: freeze_v4i16:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: global_load_dwordx2 v[0:1], v[0:1], off
+; GFX10-NEXT: s_waitcnt vmcnt(0)
+; GFX10-NEXT: global_store_dwordx2 v[2:3], v[0:1], off
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: freeze_v4i16:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: global_load_b64 v[0:1], v[0:1], off
+; GFX11-NEXT: s_waitcnt vmcnt(0)
+; GFX11-NEXT: global_store_b64 v[2:3], v[0:1], off
+; GFX11-NEXT: s_setpc_b64 s[30:31]
+ %a = load <4 x i16>, ptr addrspace(1) %ptra
+ %freeze = freeze <4 x i16> %a
+ store <4 x i16> %freeze, ptr addrspace(1) %ptrb
+ ret void
+}
+
+define void @freeze_v8i16(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
+; GFX10-LABEL: freeze_v8i16:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: global_load_dwordx4 v[4:7], v[0:1], off
+; GFX10-NEXT: s_waitcnt vmcnt(0)
+; GFX10-NEXT: global_store_dwordx4 v[2:3], v[4:7], off
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: freeze_v8i16:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: global_load_b128 v[4:7], v[0:1], off
+; GFX11-NEXT: s_waitcnt vmcnt(0)
+; GFX11-NEXT: global_store_b128 v[2:3], v[4:7], off
+; GFX11-NEXT: s_setpc_b64 s[30:31]
+ %a = load <8 x i16>, ptr addrspace(1) %ptra
+ %freeze = freeze <8 x i16> %a
+ store <8 x i16> %freeze, ptr addrspace(1) %ptrb
+ ret void
+}
+
+define void @freeze_v16i16(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
+; GFX10-SDAG-LABEL: freeze_v16i16:
+; GFX10-SDAG: ; %bb.0:
+; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-SDAG-NEXT: s_clause 0x1
+; GFX10-SDAG-NEXT: global_load_dwordx4 v[4:7], v[0:1], off offset:16
+; GFX10-SDAG-NEXT: global_load_dwordx4 v[8:11], v[0:1], off
+; GFX10-SDAG-NEXT: s_waitcnt vmcnt(1)
+; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[4:7], off offset:16
+; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[8:11], off
+; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-GISEL-LABEL: freeze_v16i16:
+; GFX10-GISEL: ; %bb.0:
+; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-GISEL-NEXT: s_clause 0x1
+; GFX10-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off
+; GFX10-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16
+; GFX10-GISEL-NEXT: s_waitcnt vmcnt(1)
+; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off
+; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16
+; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-SDAG-LABEL: freeze_v16i16:
+; GFX11-SDAG: ; %bb.0:
+; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-NEXT: s_clause 0x1
+; GFX11-SDAG-NEXT: global_load_b128 v[4:7], v[0:1], off offset:16
+; GFX11-SDAG-NEXT: global_load_b128 v[8:11], v[0:1], off
+; GFX11-SDAG-NEXT: s_waitcnt vmcnt(1)
+; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[4:7], off offset:16
+; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[8:11], off
+; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GISEL-LABEL: freeze_v16i16:
+; GFX11-GISEL: ; %bb.0:
+; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-NEXT: s_clause 0x1
+; GFX11-GISEL-NEXT: global_load_b128 v[4:7], v[0:1], off
+; GFX11-GISEL-NEXT: global_load_b128 v[8:11], v[0:1], off offset:16
+; GFX11-GISEL-NEXT: s_waitcnt vmcnt(1)
+; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[4:7], off
+; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[8:11], off offset:16
+; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
+ %a = load <16 x i16>, ptr addrspace(1) %ptra
+ %freeze = freeze <16 x i16> %a
+ store <16 x i16> %freeze, ptr addrspace(1) %ptrb
+ ret void
+}
+
+define void @freeze_f16(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
+; GFX10-LABEL: freeze_f16:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: global_load_ushort v0, v[0:1], off
+; GFX10-NEXT: s_waitcnt vmcnt(0)
+; GFX10-NEXT: global_store_short v[2:3], v0, off
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: freeze_f16:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: global_load_u16 v0, v[0:1], off
+; GFX11-NEXT: s_waitcnt vmcnt(0)
+; GFX11-NEXT: global_store_b16 v[2:3], v0, off
+; GFX11-NEXT: s_setpc_b64 s[30:31]
+ %a = load half, ptr addrspace(1) %ptra
+ %freeze = freeze half %a
+ store half %freeze, ptr addrspace(1) %ptrb
+ ret void
+}
+
+define void @freeze_v2f16(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
+; GFX10-LABEL: freeze_v2f16:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: global_load_dword v0, v[0:1], off
+; GFX10-NEXT: s_waitcnt vmcnt(0)
+; GFX10-NEXT: global_store_dword v[2:3], v0, off
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: freeze_v2f16:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: global_load_b32 v0, v[0:1], off
+; GFX11-NEXT: s_waitcnt vmcnt(0)
+; GFX11-NEXT: global_store_b32 v[2:3], v0, off
+; GFX11-NEXT: s_setpc_b64 s[30:31]
+ %a = load <2 x half>, ptr addrspace(1) %ptra
+ %freeze = freeze <2 x half> %a
+ store <2 x half> %freeze, ptr addrspace(1) %ptrb
+ ret void
+}
+
+define void @freeze_v3f16(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
+; GFX10-SDAG-LABEL: freeze_v3f16:
+; GFX10-SDAG: ; %bb.0:
+; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-SDAG-NEXT: global_load_dwordx2 v[0:1], v[0:1], off
+; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-SDAG-NEXT: global_store_short v[2:3], v1, off offset:4
+; GFX10-SDAG-NEXT: global_store_dword v[2:3], v0, off
+; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-GISEL-LABEL: freeze_v3f16:
+; GFX10-GISEL: ; %bb.0:
+; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-GISEL-NEXT: global_load_dwordx2 v[0:1], v[0:1], off
+; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-GISEL-NEXT: global_store_short v[2:3], v0, off
+; GFX10-GISEL-NEXT: global_store_short_d16_hi v[2:3], v0, off offset:2
+; GFX10-GISEL-NEXT: global_store_short v[2:3], v1, off offset:4
+; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-SDAG-LABEL: freeze_v3f16:
+; GFX11-SDAG: ; %bb.0:
+; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-NEXT: global_load_b64 v[0:1], v[0:1], off
+; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX11-SDAG-NEXT: s_clause 0x1
+; GFX11-SDAG-NEXT: global_store_b16 v[2:3], v1, off offset:4
+; GFX11-SDAG-NEXT: global_store_b32 v[2:3], v0, off
+; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GISEL-LABEL: freeze_v3f16:
+; GFX11-GISEL: ; %bb.0:
+; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-NEXT: global_load_b64 v[0:1], v[0:1], off
+; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GISEL-NEXT: s_clause 0x2
+; GFX11-GISEL-NEXT: global_store_b16 v[2:3], v0, off
+; GFX11-GISEL-NEXT: global_store_d16_hi_b16 v[2:3], v0, off offset:2
+; GFX11-GISEL-NEXT: global_store_b16 v[2:3], v1, off offset:4
+; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
+ %a = load <3 x half>, ptr addrspace(1) %ptra
+ %freeze = freeze <3 x half> %a
+ store <3 x half> %freeze, ptr addrspace(1) %ptrb
+ ret void
+}
+
+define void @freeze_v4f16(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
+; GFX10-LABEL: freeze_v4f16:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: global_load_dwordx2 v[0:1], v[0:1], off
+; GFX10-NEXT: s_waitcnt vmcnt(0)
+; GFX10-NEXT: global_store_dwordx2 v[2:3], v[0:1], off
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: freeze_v4f16:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: global_load_b64 v[0:1], v[0:1], off
+; GFX11-NEXT: s_waitcnt vmcnt(0)
+; GFX11-NEXT: global_store_b64 v[2:3], v[0:1], off
+; GFX11-NEXT: s_setpc_b64 s[30:31]
+ %a = load <4 x half>, ptr addrspace(1) %ptra
+ %freeze = freeze <4 x half> %a
+ store <4 x half> %freeze, ptr addrspace(1) %ptrb
+ ret void
+}
+
+define void @freeze_v8f16(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
+; GFX10-LABEL: freeze_v8f16:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: global_load_dwordx4 v[4:7], v[0:1], off
+; GFX10-NEXT: s_waitcnt vmcnt(0)
+; GFX10-NEXT: global_store_dwordx4 v[2:3], v[4:7], off
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: freeze_v8f16:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: global_load_b128 v[4:7], v[0:1], off
+; GFX11-NEXT: s_waitcnt vmcnt(0)
+; GFX11-NEXT: global_store_b128 v[2:3], v[4:7], off
+; GFX11-NEXT: s_setpc_b64 s[30:31]
+ %a = load <8 x half>, ptr addrspace(1) %ptra
+ %freeze = freeze <8 x half> %a
+ store <8 x half> %freeze, ptr addrspace(1) %ptrb
+ ret void
+}
+
+define void @freeze_v16f16(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
+; GFX10-SDAG-LABEL: freeze_v16f16:
+; GFX10-SDAG: ; %bb.0:
+; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-SDAG-NEXT: s_clause 0x1
+; GFX10-SDAG-NEXT: global_load_dwordx4 v[4:7], v[0:1], off offset:16
+; GFX10-SDAG-NEXT: global_load_dwordx4 v[8:11], v[0:1], off
+; GFX10-SDAG-NEXT: s_waitcnt vmcnt(1)
+; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[4:7], off offset:16
+; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[8:11], off
+; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-GISEL-LABEL: freeze_v16f16:
+; GFX10-GISEL: ; %bb.0:
+; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-GISEL-NEXT: s_clause 0x1
+; GFX10-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off
+; GFX10-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16
+; GFX10-GISEL-NEXT: s_waitcnt vmcnt(1)
+; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off
+; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16
+; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-SDAG-LABEL: freeze_v16f16:
+; GFX11-SDAG: ; %bb.0:
+; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-NEXT: s_clause 0x1
+; GFX11-SDAG-NEXT: global_load_b128 v[4:7], v[0:1], off offset:16
+; GFX11-SDAG-NEXT: global_load_b128 v[8:11], v[0:1], off
+; GFX11-SDAG-NEXT: s_waitcnt vmcnt(1)
+; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[4:7], off offset:16
+; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[8:11], off
+; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GISEL-LABEL: freeze_v16f16:
+; GFX11-GISEL: ; %bb.0:
+; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-NEXT: s_clause 0x1
+; GFX11-GISEL-NEXT: global_load_b128 v[4:7], v[0:1], off
+; GFX11-GISEL-NEXT: global_load_b128 v[8:11], v[0:1], off offset:16
+; GFX11-GISEL-NEXT: s_waitcnt vmcnt(1)
+; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[4:7], off
+; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[8:11], off offset:16
+; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
+ %a = load <16 x half>, ptr addrspace(1) %ptra
+ %freeze = freeze <16 x half> %a
+ store <16 x half> %freeze, ptr addrspace(1) %ptrb
+ ret void
+}
+
+define void @freeze_bf16(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
+; GFX10-LABEL: freeze_bf16:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: global_load_ushort v0, v[0:1], off
+; GFX10-NEXT: s_waitcnt vmcnt(0)
+; GFX10-NEXT: global_store_short v[2:3], v0, off
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: freeze_bf16:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: global_load_u16 v0, v[0:1], off
+; GFX11-NEXT: s_waitcnt vmcnt(0)
+; GFX11-NEXT: global_store_b16 v[2:3], v0, off
+; GFX11-NEXT: s_setpc_b64 s[30:31]
+ %a = load bfloat, ptr addrspace(1) %ptra
+ %freeze = freeze bfloat %a
+ store bfloat %freeze, ptr addrspace(1) %ptrb
+ ret void
+}
+
+define void @freeze_v2bf16(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
+; GFX10-LABEL: freeze_v2bf16:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: global_load_dword v0, v[0:1], off
+; GFX10-NEXT: s_waitcnt vmcnt(0)
+; GFX10-NEXT: global_store_dword v[2:3], v0, off
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: freeze_v2bf16:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: global_load_b32 v0, v[0:1], off
+; GFX11-NEXT: s_waitcnt vmcnt(0)
+; GFX11-NEXT: global_store_b32 v[2:3], v0, off
+; GFX11-NEXT: s_setpc_b64 s[30:31]
+ %a = load <2 x bfloat>, ptr addrspace(1) %ptra
+ %freeze = freeze <2 x bfloat> %a
+ store <2 x bfloat> %freeze, ptr addrspace(1) %ptrb
+ ret void
+}
+
+define void @freeze_v3bf16(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
+; GFX10-SDAG-LABEL: freeze_v3bf16:
+; GFX10-SDAG: ; %bb.0:
+; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-SDAG-NEXT: global_load_dwordx2 v[0:1], v[0:1], off
+; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-SDAG-NEXT: global_store_short v[2:3], v1, off offset:4
+; GFX10-SDAG-NEXT: global_store_dword v[2:3], v0, off
+; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-GISEL-LABEL: freeze_v3bf16:
+; GFX10-GISEL: ; %bb.0:
+; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-GISEL-NEXT: global_load_dwordx2 v[0:1], v[0:1], off
+; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-GISEL-NEXT: global_store_short v[2:3], v0, off
+; GFX10-GISEL-NEXT: global_store_short_d16_hi v[2:3], v0, off offset:2
+; GFX10-GISEL-NEXT: global_store_short v[2:3], v1, off offset:4
+; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-SDAG-LABEL: freeze_v3bf16:
+; GFX11-SDAG: ; %bb.0:
+; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-NEXT: global_load_b64 v[0:1], v[0:1], off
+; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX11-SDAG-NEXT: s_clause 0x1
+; GFX11-SDAG-NEXT: global_store_b16 v[2:3], v1, off offset:4
+; GFX11-SDAG-NEXT: global_store_b32 v[2:3], v0, off
+; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GISEL-LABEL: freeze_v3bf16:
+; GFX11-GISEL: ; %bb.0:
+; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-NEXT: global_load_b64 v[0:1], v[0:1], off
+; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GISEL-NEXT: s_clause 0x2
+; GFX11-GISEL-NEXT: global_store_b16 v[2:3], v0, off
+; GFX11-GISEL-NEXT: global_store_d16_hi_b16 v[2:3], v0, off offset:2
+; GFX11-GISEL-NEXT: global_store_b16 v[2:3], v1, off offset:4
+; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
+ %a = load <3 x bfloat>, ptr addrspace(1) %ptra
+ %freeze = freeze <3 x bfloat> %a
+ store <3 x bfloat> %freeze, ptr addrspace(1) %ptrb
+ ret void
+}
+
+define void @freeze_v4bf16(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
+; GFX10-LABEL: freeze_v4bf16:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: global_load_dwordx2 v[0:1], v[0:1], off
+; GFX10-NEXT: s_waitcnt vmcnt(0)
+; GFX10-NEXT: global_store_dwordx2 v[2:3], v[0:1], off
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: freeze_v4bf16:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: global_load_b64 v[0:1], v[0:1], off
+; GFX11-NEXT: s_waitcnt vmcnt(0)
+; GFX11-NEXT: global_store_b64 v[2:3], v[0:1], off
+; GFX11-NEXT: s_setpc_b64 s[30:31]
+ %a = ...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/131843
More information about the llvm-commits
mailing list