[llvm-branch-commits] [llvm] DAG: Fix promote of half freeze (PR #131844)
via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Tue Mar 18 09:18:15 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-selectiondag
Author: Matt Arsenault (arsenm)
<details>
<summary>Changes</summary>
Round out the AMDGPU codegen test to all the generations to cover
the illegal f16 targets.
---
Patch is 430.15 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/131844.diff
2 Files Affected:
- (modified) llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp (+4-1)
- (modified) llvm/test/CodeGen/AMDGPU/freeze.ll (+8213)
``````````diff
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
index 894d717bbbbd5..f51aac4021ae3 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
@@ -2763,7 +2763,10 @@ void DAGTypeLegalizer::PromoteFloatResult(SDNode *N, unsigned ResNo) {
#endif
report_fatal_error("Do not know how to promote this operator's result!");
- case ISD::BITCAST: R = PromoteFloatRes_BITCAST(N); break;
+ case ISD::BITCAST:
+ case ISD::FREEZE:
+ R = PromoteFloatRes_BITCAST(N);
+ break;
case ISD::ConstantFP: R = PromoteFloatRes_ConstantFP(N); break;
case ISD::EXTRACT_VECTOR_ELT:
R = PromoteFloatRes_EXTRACT_VECTOR_ELT(N); break;
diff --git a/llvm/test/CodeGen/AMDGPU/freeze.ll b/llvm/test/CodeGen/AMDGPU/freeze.ll
index 42d6e57585345..96725e6996e3d 100644
--- a/llvm/test/CodeGen/AMDGPU/freeze.ll
+++ b/llvm/test/CodeGen/AMDGPU/freeze.ll
@@ -1,10 +1,90 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx600 < %s | FileCheck -check-prefixes=GFX6,GFX6-SDAG %s
+; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx600 < %s | FileCheck -check-prefixes=GFX6,GFX6-GISEL %s
+
+; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx700 < %s | FileCheck -check-prefixes=GFX7,GFX7-SDAG %s
+; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx700 < %s | FileCheck -check-prefixes=GFX7,GFX7-GISEL %s
+
+; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx803 < %s | FileCheck -check-prefixes=GFX8,GFX8-SDAG %s
+; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx803 < %s | FileCheck -check-prefixes=GFX8,GFX8-GISEL %s
+
+; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX8-SDAG %s
+; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX9-GISEL %s
+
; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GFX10,GFX10-SDAG %s
; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GFX10,GFX10-GISEL %s
; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX11,GFX11-SDAG %s
; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX11,GFX11-GISEL %s
define void @freeze_v2i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
+; GFX6-SDAG-LABEL: freeze_v2i32:
+; GFX6-SDAG: ; %bb.0:
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-SDAG-NEXT: s_mov_b32 s6, 0
+; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000
+; GFX6-SDAG-NEXT: s_mov_b32 s4, s6
+; GFX6-SDAG-NEXT: s_mov_b32 s5, s6
+; GFX6-SDAG-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX6-SDAG-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0)
+; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX6-GISEL-LABEL: freeze_v2i32:
+; GFX6-GISEL: ; %bb.0:
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-GISEL-NEXT: s_mov_b32 s6, 0
+; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000
+; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0
+; GFX6-GISEL-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX6-GISEL-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0)
+; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-SDAG-LABEL: freeze_v2i32:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: s_mov_b32 s6, 0
+; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000
+; GFX7-SDAG-NEXT: s_mov_b32 s4, s6
+; GFX7-SDAG-NEXT: s_mov_b32 s5, s6
+; GFX7-SDAG-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX7-SDAG-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: freeze_v2i32:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: s_mov_b32 s6, 0
+; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000
+; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0
+; GFX7-GISEL-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GISEL-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: freeze_v2i32:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: flat_load_dwordx2 v[0:1], v[0:1]
+; GFX8-NEXT: s_waitcnt vmcnt(0)
+; GFX8-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
+; GFX8-NEXT: s_waitcnt vmcnt(0)
+; GFX8-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: freeze_v2i32:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: global_store_dwordx2 v[2:3], v[0:1], off
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
; GFX10-LABEL: freeze_v2i32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -27,6 +107,80 @@ define void @freeze_v2i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
}
define void @freeze_v3i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
+; GFX6-SDAG-LABEL: freeze_v3i32:
+; GFX6-SDAG: ; %bb.0:
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-SDAG-NEXT: s_mov_b32 s6, 0
+; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000
+; GFX6-SDAG-NEXT: s_mov_b32 s4, s6
+; GFX6-SDAG-NEXT: s_mov_b32 s5, s6
+; GFX6-SDAG-NEXT: buffer_load_dword v4, v[0:1], s[4:7], 0 addr64 offset:8
+; GFX6-SDAG-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(1)
+; GFX6-SDAG-NEXT: buffer_store_dword v4, v[2:3], s[4:7], 0 addr64 offset:8
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(1)
+; GFX6-SDAG-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0)
+; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX6-GISEL-LABEL: freeze_v3i32:
+; GFX6-GISEL: ; %bb.0:
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-GISEL-NEXT: s_mov_b32 s6, 0
+; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000
+; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0
+; GFX6-GISEL-NEXT: buffer_load_dwordx2 v[4:5], v[0:1], s[4:7], 0 addr64
+; GFX6-GISEL-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64 offset:8
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(1)
+; GFX6-GISEL-NEXT: buffer_store_dwordx2 v[4:5], v[2:3], s[4:7], 0 addr64
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(1)
+; GFX6-GISEL-NEXT: buffer_store_dword v0, v[2:3], s[4:7], 0 addr64 offset:8
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0)
+; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-SDAG-LABEL: freeze_v3i32:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: s_mov_b32 s6, 0
+; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000
+; GFX7-SDAG-NEXT: s_mov_b32 s4, s6
+; GFX7-SDAG-NEXT: s_mov_b32 s5, s6
+; GFX7-SDAG-NEXT: buffer_load_dwordx3 v[4:6], v[0:1], s[4:7], 0 addr64
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX7-SDAG-NEXT: buffer_store_dwordx3 v[4:6], v[2:3], s[4:7], 0 addr64
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: freeze_v3i32:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: s_mov_b32 s6, 0
+; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000
+; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0
+; GFX7-GISEL-NEXT: buffer_load_dwordx3 v[4:6], v[0:1], s[4:7], 0 addr64
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GISEL-NEXT: buffer_store_dwordx3 v[4:6], v[2:3], s[4:7], 0 addr64
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: freeze_v3i32:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: flat_load_dwordx3 v[4:6], v[0:1]
+; GFX8-NEXT: s_waitcnt vmcnt(0)
+; GFX8-NEXT: flat_store_dwordx3 v[2:3], v[4:6]
+; GFX8-NEXT: s_waitcnt vmcnt(0)
+; GFX8-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: freeze_v3i32:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: global_load_dwordx3 v[4:6], v[0:1], off
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: global_store_dwordx3 v[2:3], v[4:6], off
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
; GFX10-LABEL: freeze_v3i32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -49,6 +203,74 @@ define void @freeze_v3i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
}
define void @freeze_v4i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
+; GFX6-SDAG-LABEL: freeze_v4i32:
+; GFX6-SDAG: ; %bb.0:
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-SDAG-NEXT: s_mov_b32 s6, 0
+; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000
+; GFX6-SDAG-NEXT: s_mov_b32 s4, s6
+; GFX6-SDAG-NEXT: s_mov_b32 s5, s6
+; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0)
+; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX6-GISEL-LABEL: freeze_v4i32:
+; GFX6-GISEL: ; %bb.0:
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-GISEL-NEXT: s_mov_b32 s6, 0
+; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000
+; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0
+; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0)
+; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-SDAG-LABEL: freeze_v4i32:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: s_mov_b32 s6, 0
+; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000
+; GFX7-SDAG-NEXT: s_mov_b32 s4, s6
+; GFX7-SDAG-NEXT: s_mov_b32 s5, s6
+; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: freeze_v4i32:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: s_mov_b32 s6, 0
+; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000
+; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0
+; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: freeze_v4i32:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: flat_load_dwordx4 v[4:7], v[0:1]
+; GFX8-NEXT: s_waitcnt vmcnt(0)
+; GFX8-NEXT: flat_store_dwordx4 v[2:3], v[4:7]
+; GFX8-NEXT: s_waitcnt vmcnt(0)
+; GFX8-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: freeze_v4i32:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: global_load_dwordx4 v[4:7], v[0:1], off
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: global_store_dwordx4 v[2:3], v[4:7], off
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
; GFX10-LABEL: freeze_v4i32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -71,6 +293,96 @@ define void @freeze_v4i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
}
define void @freeze_v5i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
+; GFX6-SDAG-LABEL: freeze_v5i32:
+; GFX6-SDAG: ; %bb.0:
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-SDAG-NEXT: s_mov_b32 s6, 0
+; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000
+; GFX6-SDAG-NEXT: s_mov_b32 s4, s6
+; GFX6-SDAG-NEXT: s_mov_b32 s5, s6
+; GFX6-SDAG-NEXT: buffer_load_dword v8, v[0:1], s[4:7], 0 addr64 offset:16
+; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(1)
+; GFX6-SDAG-NEXT: buffer_store_dword v8, v[2:3], s[4:7], 0 addr64 offset:16
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(1)
+; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0)
+; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX6-GISEL-LABEL: freeze_v5i32:
+; GFX6-GISEL: ; %bb.0:
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-GISEL-NEXT: s_mov_b32 s6, 0
+; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000
+; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0
+; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
+; GFX6-GISEL-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64 offset:16
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(1)
+; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(1)
+; GFX6-GISEL-NEXT: buffer_store_dword v0, v[2:3], s[4:7], 0 addr64 offset:16
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0)
+; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-SDAG-LABEL: freeze_v5i32:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: s_mov_b32 s6, 0
+; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000
+; GFX7-SDAG-NEXT: s_mov_b32 s4, s6
+; GFX7-SDAG-NEXT: s_mov_b32 s5, s6
+; GFX7-SDAG-NEXT: buffer_load_dword v8, v[0:1], s[4:7], 0 addr64 offset:16
+; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(1)
+; GFX7-SDAG-NEXT: buffer_store_dword v8, v[2:3], s[4:7], 0 addr64 offset:16
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(1)
+; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: freeze_v5i32:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: s_mov_b32 s6, 0
+; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000
+; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0
+; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
+; GFX7-GISEL-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64 offset:16
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(1)
+; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(1)
+; GFX7-GISEL-NEXT: buffer_store_dword v0, v[2:3], s[4:7], 0 addr64 offset:16
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-GISEL-LABEL: freeze_v5i32:
+; GFX8-GISEL: ; %bb.0:
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-GISEL-NEXT: flat_load_dwordx4 v[4:7], v[0:1]
+; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 16, v0
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
+; GFX8-GISEL-NEXT: flat_load_dword v8, v[0:1]
+; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 16, v2
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v3, vcc
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(1)
+; GFX8-GISEL-NEXT: flat_store_dwordx4 v[2:3], v[4:7]
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(1)
+; GFX8-GISEL-NEXT: flat_store_dword v[0:1], v8
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GISEL-LABEL: freeze_v5i32:
+; GFX9-GISEL: ; %bb.0:
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off
+; GFX9-GISEL-NEXT: global_load_dword v8, v[0:1], off offset:16
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(1)
+; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(1)
+; GFX9-GISEL-NEXT: global_store_dword v[2:3], v8, off offset:16
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX10-SDAG-LABEL: freeze_v5i32:
; GFX10-SDAG: ; %bb.0:
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -125,6 +437,96 @@ define void @freeze_v5i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
}
define void @freeze_v6i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
+; GFX6-SDAG-LABEL: freeze_v6i32:
+; GFX6-SDAG: ; %bb.0:
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-SDAG-NEXT: s_mov_b32 s6, 0
+; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000
+; GFX6-SDAG-NEXT: s_mov_b32 s4, s6
+; GFX6-SDAG-NEXT: s_mov_b32 s5, s6
+; GFX6-SDAG-NEXT: buffer_load_dwordx2 v[8:9], v[0:1], s[4:7], 0 addr64 offset:16
+; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(1)
+; GFX6-SDAG-NEXT: buffer_store_dwordx2 v[8:9], v[2:3], s[4:7], 0 addr64 offset:16
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(1)
+; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0)
+; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX6-GISEL-LABEL: freeze_v6i32:
+; GFX6-GISEL: ; %bb.0:
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-GISEL-NEXT: s_mov_b32 s6, 0
+; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000
+; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0
+; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
+; GFX6-GISEL-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 offset:16
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(1)
+; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(1)
+; GFX6-GISEL-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64 offset:16
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0)
+; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-SDAG-LABEL: freeze_v6i32:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: s_mov_b32 s6, 0
+; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000
+; GFX7-SDAG-NEXT: s_mov_b32 s4, s6
+; GFX7-SDAG-NEXT: s_mov_b32 s5, s6
+; GFX7-SDAG-NEXT: buffer_load_dwordx2 v[8:9], v[0:1], s[4:7], 0 addr64 offset:16
+; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(1)
+; GFX7-SDAG-NEXT: buffer_store_dwordx2 v[8:9], v[2:3], s[4:7], 0 addr64 offset:16
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(1)
+; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: freeze_v6i32:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: s_mov_b32 s6, 0
+; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000
+; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0
+; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
+; GFX7-GISEL-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 offset:16
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(1)
+; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(1)
+; GFX7-GISEL-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64 offset:16
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-GISEL-LABEL: freeze_v6i32:
+; GFX8-GISEL: ; %bb.0:
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/131844
More information about the llvm-branch-commits
mailing list