[llvm] 5b6b4fd - DAG: Fix promote of half freeze (#131844)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Mar 19 04:30:38 PDT 2025
Author: Matt Arsenault
Date: 2025-03-19T18:30:34+07:00
New Revision: 5b6b4fdb4b07e1d0d9a647f40a30d47f26178e09
URL: https://github.com/llvm/llvm-project/commit/5b6b4fdb4b07e1d0d9a647f40a30d47f26178e09
DIFF: https://github.com/llvm/llvm-project/commit/5b6b4fdb4b07e1d0d9a647f40a30d47f26178e09.diff
LOG: DAG: Fix promote of half freeze (#131844)
Added:
Modified:
llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
llvm/test/CodeGen/AMDGPU/freeze.ll
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
index 894d717bbbbd5..01751dfe9eb62 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
@@ -2763,7 +2763,12 @@ void DAGTypeLegalizer::PromoteFloatResult(SDNode *N, unsigned ResNo) {
#endif
report_fatal_error("Do not know how to promote this operator's result!");
- case ISD::BITCAST: R = PromoteFloatRes_BITCAST(N); break;
+ case ISD::BITCAST:
+ R = PromoteFloatRes_BITCAST(N);
+ break;
+ case ISD::FREEZE:
+ R = PromoteFloatRes_FREEZE(N);
+ break;
case ISD::ConstantFP: R = PromoteFloatRes_ConstantFP(N); break;
case ISD::EXTRACT_VECTOR_ELT:
R = PromoteFloatRes_EXTRACT_VECTOR_ELT(N); break;
@@ -2876,6 +2881,18 @@ SDValue DAGTypeLegalizer::PromoteFloatRes_BITCAST(SDNode *N) {
return DAG.getNode(GetPromotionOpcode(VT, NVT), SDLoc(N), NVT, Cast);
}
+SDValue DAGTypeLegalizer::PromoteFloatRes_FREEZE(SDNode *N) {
+ EVT VT = N->getValueType(0);
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+ // Input type isn't guaranteed to be a scalar int so bitcast if not. The
+ // bitcast will be legalized further if necessary.
+ EVT IVT = EVT::getIntegerVT(*DAG.getContext(),
+ N->getOperand(0).getValueType().getSizeInBits());
+ SDValue Cast = DAG.getBitcast(IVT, N->getOperand(0));
+ return DAG.getNode(GetPromotionOpcode(VT, NVT), SDLoc(N), NVT,
+ DAG.getFreeze(Cast));
+}
+
SDValue DAGTypeLegalizer::PromoteFloatRes_ConstantFP(SDNode *N) {
ConstantFPSDNode *CFPNode = cast<ConstantFPSDNode>(N);
EVT VT = N->getValueType(0);
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index 50247cebb91b1..720393158aa5e 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -756,6 +756,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
void PromoteFloatResult(SDNode *N, unsigned ResNo);
SDValue PromoteFloatRes_BITCAST(SDNode *N);
+ SDValue PromoteFloatRes_FREEZE(SDNode *N);
SDValue PromoteFloatRes_BinOp(SDNode *N);
SDValue PromoteFloatRes_UnaryWithTwoFPResults(SDNode *N);
SDValue PromoteFloatRes_ConstantFP(SDNode *N);
diff --git a/llvm/test/CodeGen/AMDGPU/freeze.ll b/llvm/test/CodeGen/AMDGPU/freeze.ll
index 6f665d76b517b..b1732b905e4c1 100644
--- a/llvm/test/CodeGen/AMDGPU/freeze.ll
+++ b/llvm/test/CodeGen/AMDGPU/freeze.ll
@@ -1,10 +1,91 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx600 < %s | FileCheck -check-prefixes=GFX6,GFX6-SDAG %s
+; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx600 < %s | FileCheck -check-prefixes=GFX6,GFX6-GISEL %s
+
+; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx700 < %s | FileCheck -check-prefixes=GFX7,GFX7-SDAG %s
+; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx700 < %s | FileCheck -check-prefixes=GFX7,GFX7-GISEL %s
+
+; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx803 < %s | FileCheck -check-prefixes=GFX8,GFX8-SDAG %s
+; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx803 < %s | FileCheck -check-prefixes=GFX8,GFX8-GISEL %s
+
+; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX8-SDAG %s
+; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX9-GISEL %s
+
; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GFX10,GFX10-SDAG %s
; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GFX10,GFX10-GISEL %s
+
; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX11,GFX11-SDAG %s
; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX11,GFX11-GISEL %s
define void @freeze_v2i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
+; GFX6-SDAG-LABEL: freeze_v2i32:
+; GFX6-SDAG: ; %bb.0:
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-SDAG-NEXT: s_mov_b32 s6, 0
+; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000
+; GFX6-SDAG-NEXT: s_mov_b32 s4, s6
+; GFX6-SDAG-NEXT: s_mov_b32 s5, s6
+; GFX6-SDAG-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX6-SDAG-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0)
+; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX6-GISEL-LABEL: freeze_v2i32:
+; GFX6-GISEL: ; %bb.0:
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-GISEL-NEXT: s_mov_b32 s6, 0
+; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000
+; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0
+; GFX6-GISEL-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX6-GISEL-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0)
+; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-SDAG-LABEL: freeze_v2i32:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: s_mov_b32 s6, 0
+; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000
+; GFX7-SDAG-NEXT: s_mov_b32 s4, s6
+; GFX7-SDAG-NEXT: s_mov_b32 s5, s6
+; GFX7-SDAG-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX7-SDAG-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: freeze_v2i32:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: s_mov_b32 s6, 0
+; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000
+; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0
+; GFX7-GISEL-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GISEL-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: freeze_v2i32:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: flat_load_dwordx2 v[0:1], v[0:1]
+; GFX8-NEXT: s_waitcnt vmcnt(0)
+; GFX8-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
+; GFX8-NEXT: s_waitcnt vmcnt(0)
+; GFX8-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: freeze_v2i32:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: global_store_dwordx2 v[2:3], v[0:1], off
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
; GFX10-LABEL: freeze_v2i32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -27,6 +108,80 @@ define void @freeze_v2i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
}
define void @freeze_v3i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
+; GFX6-SDAG-LABEL: freeze_v3i32:
+; GFX6-SDAG: ; %bb.0:
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-SDAG-NEXT: s_mov_b32 s6, 0
+; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000
+; GFX6-SDAG-NEXT: s_mov_b32 s4, s6
+; GFX6-SDAG-NEXT: s_mov_b32 s5, s6
+; GFX6-SDAG-NEXT: buffer_load_dword v4, v[0:1], s[4:7], 0 addr64 offset:8
+; GFX6-SDAG-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(1)
+; GFX6-SDAG-NEXT: buffer_store_dword v4, v[2:3], s[4:7], 0 addr64 offset:8
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(1)
+; GFX6-SDAG-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0)
+; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX6-GISEL-LABEL: freeze_v3i32:
+; GFX6-GISEL: ; %bb.0:
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-GISEL-NEXT: s_mov_b32 s6, 0
+; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000
+; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0
+; GFX6-GISEL-NEXT: buffer_load_dwordx2 v[4:5], v[0:1], s[4:7], 0 addr64
+; GFX6-GISEL-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64 offset:8
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(1)
+; GFX6-GISEL-NEXT: buffer_store_dwordx2 v[4:5], v[2:3], s[4:7], 0 addr64
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(1)
+; GFX6-GISEL-NEXT: buffer_store_dword v0, v[2:3], s[4:7], 0 addr64 offset:8
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0)
+; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-SDAG-LABEL: freeze_v3i32:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: s_mov_b32 s6, 0
+; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000
+; GFX7-SDAG-NEXT: s_mov_b32 s4, s6
+; GFX7-SDAG-NEXT: s_mov_b32 s5, s6
+; GFX7-SDAG-NEXT: buffer_load_dwordx3 v[4:6], v[0:1], s[4:7], 0 addr64
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX7-SDAG-NEXT: buffer_store_dwordx3 v[4:6], v[2:3], s[4:7], 0 addr64
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: freeze_v3i32:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: s_mov_b32 s6, 0
+; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000
+; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0
+; GFX7-GISEL-NEXT: buffer_load_dwordx3 v[4:6], v[0:1], s[4:7], 0 addr64
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GISEL-NEXT: buffer_store_dwordx3 v[4:6], v[2:3], s[4:7], 0 addr64
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: freeze_v3i32:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: flat_load_dwordx3 v[4:6], v[0:1]
+; GFX8-NEXT: s_waitcnt vmcnt(0)
+; GFX8-NEXT: flat_store_dwordx3 v[2:3], v[4:6]
+; GFX8-NEXT: s_waitcnt vmcnt(0)
+; GFX8-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: freeze_v3i32:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: global_load_dwordx3 v[4:6], v[0:1], off
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: global_store_dwordx3 v[2:3], v[4:6], off
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
; GFX10-LABEL: freeze_v3i32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -49,6 +204,74 @@ define void @freeze_v3i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
}
define void @freeze_v4i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
+; GFX6-SDAG-LABEL: freeze_v4i32:
+; GFX6-SDAG: ; %bb.0:
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-SDAG-NEXT: s_mov_b32 s6, 0
+; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000
+; GFX6-SDAG-NEXT: s_mov_b32 s4, s6
+; GFX6-SDAG-NEXT: s_mov_b32 s5, s6
+; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0)
+; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX6-GISEL-LABEL: freeze_v4i32:
+; GFX6-GISEL: ; %bb.0:
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-GISEL-NEXT: s_mov_b32 s6, 0
+; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000
+; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0
+; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0)
+; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-SDAG-LABEL: freeze_v4i32:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: s_mov_b32 s6, 0
+; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000
+; GFX7-SDAG-NEXT: s_mov_b32 s4, s6
+; GFX7-SDAG-NEXT: s_mov_b32 s5, s6
+; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: freeze_v4i32:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: s_mov_b32 s6, 0
+; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000
+; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0
+; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: freeze_v4i32:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: flat_load_dwordx4 v[4:7], v[0:1]
+; GFX8-NEXT: s_waitcnt vmcnt(0)
+; GFX8-NEXT: flat_store_dwordx4 v[2:3], v[4:7]
+; GFX8-NEXT: s_waitcnt vmcnt(0)
+; GFX8-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: freeze_v4i32:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: global_load_dwordx4 v[4:7], v[0:1], off
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: global_store_dwordx4 v[2:3], v[4:7], off
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
; GFX10-LABEL: freeze_v4i32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -71,6 +294,96 @@ define void @freeze_v4i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
}
define void @freeze_v5i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
+; GFX6-SDAG-LABEL: freeze_v5i32:
+; GFX6-SDAG: ; %bb.0:
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-SDAG-NEXT: s_mov_b32 s6, 0
+; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000
+; GFX6-SDAG-NEXT: s_mov_b32 s4, s6
+; GFX6-SDAG-NEXT: s_mov_b32 s5, s6
+; GFX6-SDAG-NEXT: buffer_load_dword v8, v[0:1], s[4:7], 0 addr64 offset:16
+; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(1)
+; GFX6-SDAG-NEXT: buffer_store_dword v8, v[2:3], s[4:7], 0 addr64 offset:16
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(1)
+; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0)
+; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX6-GISEL-LABEL: freeze_v5i32:
+; GFX6-GISEL: ; %bb.0:
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-GISEL-NEXT: s_mov_b32 s6, 0
+; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000
+; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0
+; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
+; GFX6-GISEL-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64 offset:16
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(1)
+; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(1)
+; GFX6-GISEL-NEXT: buffer_store_dword v0, v[2:3], s[4:7], 0 addr64 offset:16
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0)
+; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-SDAG-LABEL: freeze_v5i32:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: s_mov_b32 s6, 0
+; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000
+; GFX7-SDAG-NEXT: s_mov_b32 s4, s6
+; GFX7-SDAG-NEXT: s_mov_b32 s5, s6
+; GFX7-SDAG-NEXT: buffer_load_dword v8, v[0:1], s[4:7], 0 addr64 offset:16
+; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(1)
+; GFX7-SDAG-NEXT: buffer_store_dword v8, v[2:3], s[4:7], 0 addr64 offset:16
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(1)
+; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: freeze_v5i32:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: s_mov_b32 s6, 0
+; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000
+; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0
+; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
+; GFX7-GISEL-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64 offset:16
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(1)
+; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(1)
+; GFX7-GISEL-NEXT: buffer_store_dword v0, v[2:3], s[4:7], 0 addr64 offset:16
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-GISEL-LABEL: freeze_v5i32:
+; GFX8-GISEL: ; %bb.0:
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-GISEL-NEXT: flat_load_dwordx4 v[4:7], v[0:1]
+; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 16, v0
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
+; GFX8-GISEL-NEXT: flat_load_dword v8, v[0:1]
+; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 16, v2
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v3, vcc
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(1)
+; GFX8-GISEL-NEXT: flat_store_dwordx4 v[2:3], v[4:7]
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(1)
+; GFX8-GISEL-NEXT: flat_store_dword v[0:1], v8
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GISEL-LABEL: freeze_v5i32:
+; GFX9-GISEL: ; %bb.0:
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off
+; GFX9-GISEL-NEXT: global_load_dword v8, v[0:1], off offset:16
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(1)
+; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(1)
+; GFX9-GISEL-NEXT: global_store_dword v[2:3], v8, off offset:16
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX10-SDAG-LABEL: freeze_v5i32:
; GFX10-SDAG: ; %bb.0:
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -125,6 +438,96 @@ define void @freeze_v5i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
}
define void @freeze_v6i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
+; GFX6-SDAG-LABEL: freeze_v6i32:
+; GFX6-SDAG: ; %bb.0:
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-SDAG-NEXT: s_mov_b32 s6, 0
+; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000
+; GFX6-SDAG-NEXT: s_mov_b32 s4, s6
+; GFX6-SDAG-NEXT: s_mov_b32 s5, s6
+; GFX6-SDAG-NEXT: buffer_load_dwordx2 v[8:9], v[0:1], s[4:7], 0 addr64 offset:16
+; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(1)
+; GFX6-SDAG-NEXT: buffer_store_dwordx2 v[8:9], v[2:3], s[4:7], 0 addr64 offset:16
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(1)
+; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0)
+; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX6-GISEL-LABEL: freeze_v6i32:
+; GFX6-GISEL: ; %bb.0:
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-GISEL-NEXT: s_mov_b32 s6, 0
+; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000
+; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0
+; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
+; GFX6-GISEL-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 offset:16
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(1)
+; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(1)
+; GFX6-GISEL-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64 offset:16
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0)
+; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-SDAG-LABEL: freeze_v6i32:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: s_mov_b32 s6, 0
+; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000
+; GFX7-SDAG-NEXT: s_mov_b32 s4, s6
+; GFX7-SDAG-NEXT: s_mov_b32 s5, s6
+; GFX7-SDAG-NEXT: buffer_load_dwordx2 v[8:9], v[0:1], s[4:7], 0 addr64 offset:16
+; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(1)
+; GFX7-SDAG-NEXT: buffer_store_dwordx2 v[8:9], v[2:3], s[4:7], 0 addr64 offset:16
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(1)
+; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: freeze_v6i32:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: s_mov_b32 s6, 0
+; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000
+; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0
+; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
+; GFX7-GISEL-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 offset:16
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(1)
+; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(1)
+; GFX7-GISEL-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64 offset:16
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-GISEL-LABEL: freeze_v6i32:
+; GFX8-GISEL: ; %bb.0:
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-GISEL-NEXT: flat_load_dwordx4 v[4:7], v[0:1]
+; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 16, v0
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
+; GFX8-GISEL-NEXT: flat_load_dwordx2 v[0:1], v[0:1]
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(1)
+; GFX8-GISEL-NEXT: flat_store_dwordx4 v[2:3], v[4:7]
+; GFX8-GISEL-NEXT: v_add_u32_e32 v2, vcc, 16, v2
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(1)
+; GFX8-GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GISEL-LABEL: freeze_v6i32:
+; GFX9-GISEL: ; %bb.0:
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off
+; GFX9-GISEL-NEXT: global_load_dwordx2 v[8:9], v[0:1], off offset:16
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(1)
+; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(1)
+; GFX9-GISEL-NEXT: global_store_dwordx2 v[2:3], v[8:9], off offset:16
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX10-SDAG-LABEL: freeze_v6i32:
; GFX10-SDAG: ; %bb.0:
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -179,6 +582,102 @@ define void @freeze_v6i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
}
define void @freeze_v7i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
+; GFX6-SDAG-LABEL: freeze_v7i32:
+; GFX6-SDAG: ; %bb.0:
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-SDAG-NEXT: s_mov_b32 s6, 0
+; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000
+; GFX6-SDAG-NEXT: s_mov_b32 s4, s6
+; GFX6-SDAG-NEXT: s_mov_b32 s5, s6
+; GFX6-SDAG-NEXT: buffer_load_dword v10, v[0:1], s[4:7], 0 addr64 offset:24
+; GFX6-SDAG-NEXT: buffer_load_dwordx2 v[8:9], v[0:1], s[4:7], 0 addr64 offset:16
+; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(2)
+; GFX6-SDAG-NEXT: buffer_store_dword v10, v[2:3], s[4:7], 0 addr64 offset:24
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(2)
+; GFX6-SDAG-NEXT: buffer_store_dwordx2 v[8:9], v[2:3], s[4:7], 0 addr64 offset:16
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(2)
+; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0)
+; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX6-GISEL-LABEL: freeze_v7i32:
+; GFX6-GISEL: ; %bb.0:
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-GISEL-NEXT: s_mov_b32 s6, 0
+; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000
+; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0
+; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
+; GFX6-GISEL-NEXT: buffer_load_dwordx2 v[8:9], v[0:1], s[4:7], 0 addr64 offset:16
+; GFX6-GISEL-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64 offset:24
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(2)
+; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(2)
+; GFX6-GISEL-NEXT: buffer_store_dwordx2 v[8:9], v[2:3], s[4:7], 0 addr64 offset:16
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(2)
+; GFX6-GISEL-NEXT: buffer_store_dword v0, v[2:3], s[4:7], 0 addr64 offset:24
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0)
+; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-SDAG-LABEL: freeze_v7i32:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: s_mov_b32 s6, 0
+; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000
+; GFX7-SDAG-NEXT: s_mov_b32 s4, s6
+; GFX7-SDAG-NEXT: s_mov_b32 s5, s6
+; GFX7-SDAG-NEXT: buffer_load_dwordx3 v[8:10], v[0:1], s[4:7], 0 addr64 offset:16
+; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(1)
+; GFX7-SDAG-NEXT: buffer_store_dwordx3 v[8:10], v[2:3], s[4:7], 0 addr64 offset:16
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(1)
+; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: freeze_v7i32:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: s_mov_b32 s6, 0
+; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000
+; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0
+; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
+; GFX7-GISEL-NEXT: buffer_load_dwordx3 v[8:10], v[0:1], s[4:7], 0 addr64 offset:16
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(1)
+; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(1)
+; GFX7-GISEL-NEXT: buffer_store_dwordx3 v[8:10], v[2:3], s[4:7], 0 addr64 offset:16
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-GISEL-LABEL: freeze_v7i32:
+; GFX8-GISEL: ; %bb.0:
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-GISEL-NEXT: flat_load_dwordx4 v[4:7], v[0:1]
+; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 16, v0
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
+; GFX8-GISEL-NEXT: flat_load_dwordx3 v[8:10], v[0:1]
+; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 16, v2
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v3, vcc
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(1)
+; GFX8-GISEL-NEXT: flat_store_dwordx4 v[2:3], v[4:7]
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(1)
+; GFX8-GISEL-NEXT: flat_store_dwordx3 v[0:1], v[8:10]
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GISEL-LABEL: freeze_v7i32:
+; GFX9-GISEL: ; %bb.0:
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off
+; GFX9-GISEL-NEXT: global_load_dwordx3 v[8:10], v[0:1], off offset:16
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(1)
+; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(1)
+; GFX9-GISEL-NEXT: global_store_dwordx3 v[2:3], v[8:10], off offset:16
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX10-SDAG-LABEL: freeze_v7i32:
; GFX10-SDAG: ; %bb.0:
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -233,6 +732,96 @@ define void @freeze_v7i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
}
define void @freeze_v8i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
+; GFX6-SDAG-LABEL: freeze_v8i32:
+; GFX6-SDAG: ; %bb.0:
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-SDAG-NEXT: s_mov_b32 s6, 0
+; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000
+; GFX6-SDAG-NEXT: s_mov_b32 s4, s6
+; GFX6-SDAG-NEXT: s_mov_b32 s5, s6
+; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:16
+; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(1)
+; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 offset:16
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(1)
+; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0)
+; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX6-GISEL-LABEL: freeze_v8i32:
+; GFX6-GISEL: ; %bb.0:
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-GISEL-NEXT: s_mov_b32 s6, 0
+; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000
+; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0
+; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
+; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(1)
+; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(1)
+; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0)
+; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-SDAG-LABEL: freeze_v8i32:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: s_mov_b32 s6, 0
+; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000
+; GFX7-SDAG-NEXT: s_mov_b32 s4, s6
+; GFX7-SDAG-NEXT: s_mov_b32 s5, s6
+; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:16
+; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(1)
+; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 offset:16
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(1)
+; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: freeze_v8i32:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: s_mov_b32 s6, 0
+; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000
+; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0
+; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
+; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(1)
+; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(1)
+; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-GISEL-LABEL: freeze_v8i32:
+; GFX8-GISEL: ; %bb.0:
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-GISEL-NEXT: flat_load_dwordx4 v[4:7], v[0:1]
+; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 16, v0
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
+; GFX8-GISEL-NEXT: flat_load_dwordx4 v[8:11], v[0:1]
+; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 16, v2
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v3, vcc
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(1)
+; GFX8-GISEL-NEXT: flat_store_dwordx4 v[2:3], v[4:7]
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(1)
+; GFX8-GISEL-NEXT: flat_store_dwordx4 v[0:1], v[8:11]
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GISEL-LABEL: freeze_v8i32:
+; GFX9-GISEL: ; %bb.0:
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off
+; GFX9-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(1)
+; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(1)
+; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX10-SDAG-LABEL: freeze_v8i32:
; GFX10-SDAG: ; %bb.0:
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -287,6 +876,118 @@ define void @freeze_v8i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
}
define void @freeze_v9i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
+; GFX6-SDAG-LABEL: freeze_v9i32:
+; GFX6-SDAG: ; %bb.0:
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-SDAG-NEXT: s_mov_b32 s6, 0
+; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000
+; GFX6-SDAG-NEXT: s_mov_b32 s4, s6
+; GFX6-SDAG-NEXT: s_mov_b32 s5, s6
+; GFX6-SDAG-NEXT: buffer_load_dword v12, v[0:1], s[4:7], 0 addr64 offset:32
+; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
+; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(2)
+; GFX6-SDAG-NEXT: buffer_store_dword v12, v[2:3], s[4:7], 0 addr64 offset:32
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(2)
+; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(2)
+; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0)
+; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX6-GISEL-LABEL: freeze_v9i32:
+; GFX6-GISEL: ; %bb.0:
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-GISEL-NEXT: s_mov_b32 s6, 0
+; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000
+; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0
+; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
+; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16
+; GFX6-GISEL-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64 offset:32
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(2)
+; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(2)
+; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(2)
+; GFX6-GISEL-NEXT: buffer_store_dword v0, v[2:3], s[4:7], 0 addr64 offset:32
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0)
+; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-SDAG-LABEL: freeze_v9i32:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: s_mov_b32 s6, 0
+; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000
+; GFX7-SDAG-NEXT: s_mov_b32 s4, s6
+; GFX7-SDAG-NEXT: s_mov_b32 s5, s6
+; GFX7-SDAG-NEXT: buffer_load_dword v12, v[0:1], s[4:7], 0 addr64 offset:32
+; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
+; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(2)
+; GFX7-SDAG-NEXT: buffer_store_dword v12, v[2:3], s[4:7], 0 addr64 offset:32
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(2)
+; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(2)
+; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: freeze_v9i32:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: s_mov_b32 s6, 0
+; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000
+; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0
+; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
+; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16
+; GFX7-GISEL-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64 offset:32
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(2)
+; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(2)
+; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(2)
+; GFX7-GISEL-NEXT: buffer_store_dword v0, v[2:3], s[4:7], 0 addr64 offset:32
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-GISEL-LABEL: freeze_v9i32:
+; GFX8-GISEL: ; %bb.0:
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-GISEL-NEXT: v_add_u32_e32 v8, vcc, 16, v0
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v9, vcc, 0, v1, vcc
+; GFX8-GISEL-NEXT: flat_load_dwordx4 v[4:7], v[0:1]
+; GFX8-GISEL-NEXT: flat_load_dwordx4 v[8:11], v[8:9]
+; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 32, v0
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
+; GFX8-GISEL-NEXT: flat_load_dword v14, v[0:1]
+; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 16, v2
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v3, vcc
+; GFX8-GISEL-NEXT: v_add_u32_e32 v12, vcc, 32, v2
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v13, vcc, 0, v3, vcc
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(2)
+; GFX8-GISEL-NEXT: flat_store_dwordx4 v[2:3], v[4:7]
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(2)
+; GFX8-GISEL-NEXT: flat_store_dwordx4 v[0:1], v[8:11]
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(2)
+; GFX8-GISEL-NEXT: flat_store_dword v[12:13], v14
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GISEL-LABEL: freeze_v9i32:
+; GFX9-GISEL: ; %bb.0:
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off
+; GFX9-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16
+; GFX9-GISEL-NEXT: global_load_dword v12, v[0:1], off offset:32
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(2)
+; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(2)
+; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(2)
+; GFX9-GISEL-NEXT: global_store_dword v[2:3], v12, off offset:32
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX10-SDAG-LABEL: freeze_v9i32:
; GFX10-SDAG: ; %bb.0:
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -353,6 +1054,118 @@ define void @freeze_v9i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
}
define void @freeze_v10i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
+; GFX6-SDAG-LABEL: freeze_v10i32:
+; GFX6-SDAG: ; %bb.0:
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-SDAG-NEXT: s_mov_b32 s6, 0
+; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000
+; GFX6-SDAG-NEXT: s_mov_b32 s4, s6
+; GFX6-SDAG-NEXT: s_mov_b32 s5, s6
+; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
+; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16
+; GFX6-SDAG-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 offset:32
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(2)
+; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(2)
+; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(2)
+; GFX6-SDAG-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64 offset:32
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0)
+; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX6-GISEL-LABEL: freeze_v10i32:
+; GFX6-GISEL: ; %bb.0:
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-GISEL-NEXT: s_mov_b32 s6, 0
+; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000
+; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0
+; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
+; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16
+; GFX6-GISEL-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 offset:32
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(2)
+; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(2)
+; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(2)
+; GFX6-GISEL-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64 offset:32
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0)
+; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-SDAG-LABEL: freeze_v10i32:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: s_mov_b32 s6, 0
+; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000
+; GFX7-SDAG-NEXT: s_mov_b32 s4, s6
+; GFX7-SDAG-NEXT: s_mov_b32 s5, s6
+; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
+; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16
+; GFX7-SDAG-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 offset:32
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(2)
+; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(2)
+; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(2)
+; GFX7-SDAG-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64 offset:32
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: freeze_v10i32:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: s_mov_b32 s6, 0
+; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000
+; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0
+; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
+; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16
+; GFX7-GISEL-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 offset:32
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(2)
+; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(2)
+; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(2)
+; GFX7-GISEL-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64 offset:32
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-GISEL-LABEL: freeze_v10i32:
+; GFX8-GISEL: ; %bb.0:
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-GISEL-NEXT: v_add_u32_e32 v8, vcc, 16, v0
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v9, vcc, 0, v1, vcc
+; GFX8-GISEL-NEXT: flat_load_dwordx4 v[4:7], v[0:1]
+; GFX8-GISEL-NEXT: flat_load_dwordx4 v[8:11], v[8:9]
+; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 32, v0
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
+; GFX8-GISEL-NEXT: flat_load_dwordx2 v[0:1], v[0:1]
+; GFX8-GISEL-NEXT: v_add_u32_e32 v12, vcc, 16, v2
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v13, vcc, 0, v3, vcc
+; GFX8-GISEL-NEXT: v_add_u32_e32 v14, vcc, 32, v2
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v15, vcc, 0, v3, vcc
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(2)
+; GFX8-GISEL-NEXT: flat_store_dwordx4 v[2:3], v[4:7]
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(2)
+; GFX8-GISEL-NEXT: flat_store_dwordx4 v[12:13], v[8:11]
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(2)
+; GFX8-GISEL-NEXT: flat_store_dwordx2 v[14:15], v[0:1]
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: freeze_v10i32:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: global_load_dwordx4 v[4:7], v[0:1], off
+; GFX9-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16
+; GFX9-NEXT: global_load_dwordx2 v[12:13], v[0:1], off offset:32
+; GFX9-NEXT: s_waitcnt vmcnt(2)
+; GFX9-NEXT: global_store_dwordx4 v[2:3], v[4:7], off
+; GFX9-NEXT: s_waitcnt vmcnt(2)
+; GFX9-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16
+; GFX9-NEXT: s_waitcnt vmcnt(2)
+; GFX9-NEXT: global_store_dwordx2 v[2:3], v[12:13], off offset:32
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
; GFX10-LABEL: freeze_v10i32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -389,6 +1202,124 @@ define void @freeze_v10i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
}
define void @freeze_v11i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
+; GFX6-SDAG-LABEL: freeze_v11i32:
+; GFX6-SDAG: ; %bb.0:
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-SDAG-NEXT: s_mov_b32 s6, 0
+; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000
+; GFX6-SDAG-NEXT: s_mov_b32 s4, s6
+; GFX6-SDAG-NEXT: s_mov_b32 s5, s6
+; GFX6-SDAG-NEXT: buffer_load_dword v14, v[0:1], s[4:7], 0 addr64 offset:40
+; GFX6-SDAG-NEXT: buffer_load_dwordx2 v[12:13], v[0:1], s[4:7], 0 addr64 offset:32
+; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
+; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(3)
+; GFX6-SDAG-NEXT: buffer_store_dword v14, v[2:3], s[4:7], 0 addr64 offset:40
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(3)
+; GFX6-SDAG-NEXT: buffer_store_dwordx2 v[12:13], v[2:3], s[4:7], 0 addr64 offset:32
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(3)
+; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(3)
+; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0)
+; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX6-GISEL-LABEL: freeze_v11i32:
+; GFX6-GISEL: ; %bb.0:
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-GISEL-NEXT: s_mov_b32 s6, 0
+; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000
+; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0
+; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
+; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16
+; GFX6-GISEL-NEXT: buffer_load_dwordx2 v[12:13], v[0:1], s[4:7], 0 addr64 offset:32
+; GFX6-GISEL-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64 offset:40
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(3)
+; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(3)
+; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(3)
+; GFX6-GISEL-NEXT: buffer_store_dwordx2 v[12:13], v[2:3], s[4:7], 0 addr64 offset:32
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(3)
+; GFX6-GISEL-NEXT: buffer_store_dword v0, v[2:3], s[4:7], 0 addr64 offset:40
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0)
+; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-SDAG-LABEL: freeze_v11i32:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: s_mov_b32 s6, 0
+; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000
+; GFX7-SDAG-NEXT: s_mov_b32 s4, s6
+; GFX7-SDAG-NEXT: s_mov_b32 s5, s6
+; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
+; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16
+; GFX7-SDAG-NEXT: buffer_load_dwordx3 v[12:14], v[0:1], s[4:7], 0 addr64 offset:32
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(2)
+; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(2)
+; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(2)
+; GFX7-SDAG-NEXT: buffer_store_dwordx3 v[12:14], v[2:3], s[4:7], 0 addr64 offset:32
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: freeze_v11i32:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: s_mov_b32 s6, 0
+; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000
+; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0
+; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
+; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16
+; GFX7-GISEL-NEXT: buffer_load_dwordx3 v[12:14], v[0:1], s[4:7], 0 addr64 offset:32
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(2)
+; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(2)
+; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(2)
+; GFX7-GISEL-NEXT: buffer_store_dwordx3 v[12:14], v[2:3], s[4:7], 0 addr64 offset:32
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-GISEL-LABEL: freeze_v11i32:
+; GFX8-GISEL: ; %bb.0:
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-GISEL-NEXT: v_add_u32_e32 v8, vcc, 16, v0
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v9, vcc, 0, v1, vcc
+; GFX8-GISEL-NEXT: flat_load_dwordx4 v[4:7], v[0:1]
+; GFX8-GISEL-NEXT: flat_load_dwordx4 v[8:11], v[8:9]
+; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 32, v0
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
+; GFX8-GISEL-NEXT: flat_load_dwordx3 v[12:14], v[0:1]
+; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 16, v2
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v3, vcc
+; GFX8-GISEL-NEXT: v_add_u32_e32 v15, vcc, 32, v2
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v16, vcc, 0, v3, vcc
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(2)
+; GFX8-GISEL-NEXT: flat_store_dwordx4 v[2:3], v[4:7]
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(2)
+; GFX8-GISEL-NEXT: flat_store_dwordx4 v[0:1], v[8:11]
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(2)
+; GFX8-GISEL-NEXT: flat_store_dwordx3 v[15:16], v[12:14]
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: freeze_v11i32:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: global_load_dwordx4 v[4:7], v[0:1], off
+; GFX9-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16
+; GFX9-NEXT: global_load_dwordx3 v[12:14], v[0:1], off offset:32
+; GFX9-NEXT: s_waitcnt vmcnt(2)
+; GFX9-NEXT: global_store_dwordx4 v[2:3], v[4:7], off
+; GFX9-NEXT: s_waitcnt vmcnt(2)
+; GFX9-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16
+; GFX9-NEXT: s_waitcnt vmcnt(2)
+; GFX9-NEXT: global_store_dwordx3 v[2:3], v[12:14], off offset:32
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
; GFX10-LABEL: freeze_v11i32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -425,6 +1356,118 @@ define void @freeze_v11i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
}
define void @freeze_v12i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
+; GFX6-SDAG-LABEL: freeze_v12i32:
+; GFX6-SDAG: ; %bb.0:
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-SDAG-NEXT: s_mov_b32 s6, 0
+; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000
+; GFX6-SDAG-NEXT: s_mov_b32 s4, s6
+; GFX6-SDAG-NEXT: s_mov_b32 s5, s6
+; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
+; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16
+; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:32
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(2)
+; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(2)
+; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(2)
+; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:32
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0)
+; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX6-GISEL-LABEL: freeze_v12i32:
+; GFX6-GISEL: ; %bb.0:
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-GISEL-NEXT: s_mov_b32 s6, 0
+; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000
+; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0
+; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
+; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16
+; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:32
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(2)
+; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(2)
+; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(2)
+; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:32
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0)
+; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-SDAG-LABEL: freeze_v12i32:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: s_mov_b32 s6, 0
+; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000
+; GFX7-SDAG-NEXT: s_mov_b32 s4, s6
+; GFX7-SDAG-NEXT: s_mov_b32 s5, s6
+; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
+; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16
+; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:32
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(2)
+; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(2)
+; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(2)
+; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:32
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: freeze_v12i32:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: s_mov_b32 s6, 0
+; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000
+; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0
+; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
+; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16
+; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:32
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(2)
+; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(2)
+; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(2)
+; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:32
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-GISEL-LABEL: freeze_v12i32:
+; GFX8-GISEL: ; %bb.0:
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-GISEL-NEXT: v_add_u32_e32 v8, vcc, 16, v0
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v9, vcc, 0, v1, vcc
+; GFX8-GISEL-NEXT: flat_load_dwordx4 v[4:7], v[0:1]
+; GFX8-GISEL-NEXT: flat_load_dwordx4 v[8:11], v[8:9]
+; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 32, v0
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
+; GFX8-GISEL-NEXT: flat_load_dwordx4 v[12:15], v[0:1]
+; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 16, v2
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v3, vcc
+; GFX8-GISEL-NEXT: v_add_u32_e32 v16, vcc, 32, v2
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v17, vcc, 0, v3, vcc
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(2)
+; GFX8-GISEL-NEXT: flat_store_dwordx4 v[2:3], v[4:7]
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(2)
+; GFX8-GISEL-NEXT: flat_store_dwordx4 v[0:1], v[8:11]
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(2)
+; GFX8-GISEL-NEXT: flat_store_dwordx4 v[16:17], v[12:15]
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: freeze_v12i32:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: global_load_dwordx4 v[4:7], v[0:1], off
+; GFX9-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16
+; GFX9-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:32
+; GFX9-NEXT: s_waitcnt vmcnt(2)
+; GFX9-NEXT: global_store_dwordx4 v[2:3], v[4:7], off
+; GFX9-NEXT: s_waitcnt vmcnt(2)
+; GFX9-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16
+; GFX9-NEXT: s_waitcnt vmcnt(2)
+; GFX9-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:32
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
; GFX10-LABEL: freeze_v12i32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -460,6 +1503,140 @@ define void @freeze_v12i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
ret void
}
define void @freeze_v13i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
+; GFX6-SDAG-LABEL: freeze_v13i32:
+; GFX6-SDAG: ; %bb.0:
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-SDAG-NEXT: s_mov_b32 s6, 0
+; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000
+; GFX6-SDAG-NEXT: s_mov_b32 s4, s6
+; GFX6-SDAG-NEXT: s_mov_b32 s5, s6
+; GFX6-SDAG-NEXT: buffer_load_dword v16, v[0:1], s[4:7], 0 addr64 offset:48
+; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:32
+; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64
+; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:16
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(3)
+; GFX6-SDAG-NEXT: buffer_store_dword v16, v[2:3], s[4:7], 0 addr64 offset:48
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(3)
+; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 offset:32
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(3)
+; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(3)
+; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:16
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0)
+; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX6-GISEL-LABEL: freeze_v13i32:
+; GFX6-GISEL: ; %bb.0:
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-GISEL-NEXT: s_mov_b32 s6, 0
+; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000
+; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0
+; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
+; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16
+; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:32
+; GFX6-GISEL-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64 offset:48
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(3)
+; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(3)
+; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(3)
+; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:32
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(3)
+; GFX6-GISEL-NEXT: buffer_store_dword v0, v[2:3], s[4:7], 0 addr64 offset:48
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0)
+; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-SDAG-LABEL: freeze_v13i32:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: s_mov_b32 s6, 0
+; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000
+; GFX7-SDAG-NEXT: s_mov_b32 s4, s6
+; GFX7-SDAG-NEXT: s_mov_b32 s5, s6
+; GFX7-SDAG-NEXT: buffer_load_dword v16, v[0:1], s[4:7], 0 addr64 offset:48
+; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:32
+; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64
+; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:16
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(3)
+; GFX7-SDAG-NEXT: buffer_store_dword v16, v[2:3], s[4:7], 0 addr64 offset:48
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(3)
+; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 offset:32
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(3)
+; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(3)
+; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:16
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: freeze_v13i32:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: s_mov_b32 s6, 0
+; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000
+; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0
+; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
+; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16
+; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:32
+; GFX7-GISEL-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64 offset:48
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(3)
+; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(3)
+; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(3)
+; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:32
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(3)
+; GFX7-GISEL-NEXT: buffer_store_dword v0, v[2:3], s[4:7], 0 addr64 offset:48
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-GISEL-LABEL: freeze_v13i32:
+; GFX8-GISEL: ; %bb.0:
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-GISEL-NEXT: v_add_u32_e32 v8, vcc, 16, v0
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v9, vcc, 0, v1, vcc
+; GFX8-GISEL-NEXT: v_add_u32_e32 v12, vcc, 32, v0
+; GFX8-GISEL-NEXT: flat_load_dwordx4 v[4:7], v[0:1]
+; GFX8-GISEL-NEXT: flat_load_dwordx4 v[8:11], v[8:9]
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v13, vcc, 0, v1, vcc
+; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 48, v0
+; GFX8-GISEL-NEXT: flat_load_dwordx4 v[12:15], v[12:13]
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
+; GFX8-GISEL-NEXT: flat_load_dword v18, v[0:1]
+; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 16, v2
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v3, vcc
+; GFX8-GISEL-NEXT: v_add_u32_e32 v16, vcc, 32, v2
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v17, vcc, 0, v3, vcc
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(3)
+; GFX8-GISEL-NEXT: flat_store_dwordx4 v[2:3], v[4:7]
+; GFX8-GISEL-NEXT: v_add_u32_e32 v2, vcc, 48, v2
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(3)
+; GFX8-GISEL-NEXT: flat_store_dwordx4 v[0:1], v[8:11]
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(3)
+; GFX8-GISEL-NEXT: flat_store_dwordx4 v[16:17], v[12:15]
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(3)
+; GFX8-GISEL-NEXT: flat_store_dword v[2:3], v18
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GISEL-LABEL: freeze_v13i32:
+; GFX9-GISEL: ; %bb.0:
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off
+; GFX9-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16
+; GFX9-GISEL-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:32
+; GFX9-GISEL-NEXT: global_load_dword v16, v[0:1], off offset:48
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(3)
+; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(3)
+; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(3)
+; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:32
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(3)
+; GFX9-GISEL-NEXT: global_store_dword v[2:3], v16, off offset:48
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX10-SDAG-LABEL: freeze_v13i32:
; GFX10-SDAG: ; %bb.0:
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -538,6 +1715,140 @@ define void @freeze_v13i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
}
define void @freeze_v14i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
+; GFX6-SDAG-LABEL: freeze_v14i32:
+; GFX6-SDAG: ; %bb.0:
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-SDAG-NEXT: s_mov_b32 s6, 0
+; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000
+; GFX6-SDAG-NEXT: s_mov_b32 s4, s6
+; GFX6-SDAG-NEXT: s_mov_b32 s5, s6
+; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:32
+; GFX6-SDAG-NEXT: buffer_load_dwordx2 v[16:17], v[0:1], s[4:7], 0 addr64 offset:48
+; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64
+; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:16
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(3)
+; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 offset:32
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(3)
+; GFX6-SDAG-NEXT: buffer_store_dwordx2 v[16:17], v[2:3], s[4:7], 0 addr64 offset:48
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(3)
+; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(3)
+; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:16
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0)
+; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX6-GISEL-LABEL: freeze_v14i32:
+; GFX6-GISEL: ; %bb.0:
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-GISEL-NEXT: s_mov_b32 s6, 0
+; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000
+; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0
+; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
+; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16
+; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:32
+; GFX6-GISEL-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 offset:48
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(3)
+; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(3)
+; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(3)
+; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:32
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(3)
+; GFX6-GISEL-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64 offset:48
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0)
+; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-SDAG-LABEL: freeze_v14i32:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: s_mov_b32 s6, 0
+; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000
+; GFX7-SDAG-NEXT: s_mov_b32 s4, s6
+; GFX7-SDAG-NEXT: s_mov_b32 s5, s6
+; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:32
+; GFX7-SDAG-NEXT: buffer_load_dwordx2 v[16:17], v[0:1], s[4:7], 0 addr64 offset:48
+; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64
+; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:16
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(3)
+; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 offset:32
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(3)
+; GFX7-SDAG-NEXT: buffer_store_dwordx2 v[16:17], v[2:3], s[4:7], 0 addr64 offset:48
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(3)
+; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(3)
+; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:16
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: freeze_v14i32:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: s_mov_b32 s6, 0
+; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000
+; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0
+; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
+; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16
+; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:32
+; GFX7-GISEL-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 offset:48
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(3)
+; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(3)
+; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(3)
+; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:32
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(3)
+; GFX7-GISEL-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64 offset:48
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-GISEL-LABEL: freeze_v14i32:
+; GFX8-GISEL: ; %bb.0:
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-GISEL-NEXT: v_add_u32_e32 v8, vcc, 16, v0
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v9, vcc, 0, v1, vcc
+; GFX8-GISEL-NEXT: v_add_u32_e32 v12, vcc, 32, v0
+; GFX8-GISEL-NEXT: flat_load_dwordx4 v[4:7], v[0:1]
+; GFX8-GISEL-NEXT: flat_load_dwordx4 v[8:11], v[8:9]
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v13, vcc, 0, v1, vcc
+; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 48, v0
+; GFX8-GISEL-NEXT: flat_load_dwordx4 v[12:15], v[12:13]
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
+; GFX8-GISEL-NEXT: flat_load_dwordx2 v[0:1], v[0:1]
+; GFX8-GISEL-NEXT: v_add_u32_e32 v16, vcc, 16, v2
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v17, vcc, 0, v3, vcc
+; GFX8-GISEL-NEXT: v_add_u32_e32 v18, vcc, 32, v2
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v19, vcc, 0, v3, vcc
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(3)
+; GFX8-GISEL-NEXT: flat_store_dwordx4 v[2:3], v[4:7]
+; GFX8-GISEL-NEXT: v_add_u32_e32 v2, vcc, 48, v2
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(3)
+; GFX8-GISEL-NEXT: flat_store_dwordx4 v[16:17], v[8:11]
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(3)
+; GFX8-GISEL-NEXT: flat_store_dwordx4 v[18:19], v[12:15]
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(3)
+; GFX8-GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GISEL-LABEL: freeze_v14i32:
+; GFX9-GISEL: ; %bb.0:
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off
+; GFX9-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16
+; GFX9-GISEL-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:32
+; GFX9-GISEL-NEXT: global_load_dwordx2 v[16:17], v[0:1], off offset:48
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(3)
+; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(3)
+; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(3)
+; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:32
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(3)
+; GFX9-GISEL-NEXT: global_store_dwordx2 v[2:3], v[16:17], off offset:48
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX10-SDAG-LABEL: freeze_v14i32:
; GFX10-SDAG: ; %bb.0:
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -616,6 +1927,150 @@ define void @freeze_v14i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
}
define void @freeze_v15i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
+; GFX6-SDAG-LABEL: freeze_v15i32:
+; GFX6-SDAG: ; %bb.0:
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-SDAG-NEXT: s_mov_b32 s6, 0
+; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000
+; GFX6-SDAG-NEXT: s_mov_b32 s4, s6
+; GFX6-SDAG-NEXT: s_mov_b32 s5, s6
+; GFX6-SDAG-NEXT: buffer_load_dword v18, v[0:1], s[4:7], 0 addr64 offset:56
+; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:32
+; GFX6-SDAG-NEXT: buffer_load_dwordx2 v[16:17], v[0:1], s[4:7], 0 addr64 offset:48
+; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64
+; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:16
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(4)
+; GFX6-SDAG-NEXT: buffer_store_dword v18, v[2:3], s[4:7], 0 addr64 offset:56
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(4)
+; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 offset:32
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(4)
+; GFX6-SDAG-NEXT: buffer_store_dwordx2 v[16:17], v[2:3], s[4:7], 0 addr64 offset:48
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(4)
+; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(4)
+; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:16
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0)
+; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX6-GISEL-LABEL: freeze_v15i32:
+; GFX6-GISEL: ; %bb.0:
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-GISEL-NEXT: s_mov_b32 s6, 0
+; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000
+; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0
+; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
+; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16
+; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:32
+; GFX6-GISEL-NEXT: buffer_load_dwordx2 v[16:17], v[0:1], s[4:7], 0 addr64 offset:48
+; GFX6-GISEL-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64 offset:56
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(4)
+; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(4)
+; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(4)
+; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:32
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(4)
+; GFX6-GISEL-NEXT: buffer_store_dwordx2 v[16:17], v[2:3], s[4:7], 0 addr64 offset:48
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(4)
+; GFX6-GISEL-NEXT: buffer_store_dword v0, v[2:3], s[4:7], 0 addr64 offset:56
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0)
+; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-SDAG-LABEL: freeze_v15i32:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: s_mov_b32 s6, 0
+; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000
+; GFX7-SDAG-NEXT: s_mov_b32 s4, s6
+; GFX7-SDAG-NEXT: s_mov_b32 s5, s6
+; GFX7-SDAG-NEXT: buffer_load_dword v18, v[0:1], s[4:7], 0 addr64 offset:56
+; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:32
+; GFX7-SDAG-NEXT: buffer_load_dwordx2 v[16:17], v[0:1], s[4:7], 0 addr64 offset:48
+; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64
+; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:16
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(4)
+; GFX7-SDAG-NEXT: buffer_store_dword v18, v[2:3], s[4:7], 0 addr64 offset:56
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(4)
+; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 offset:32
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(4)
+; GFX7-SDAG-NEXT: buffer_store_dwordx2 v[16:17], v[2:3], s[4:7], 0 addr64 offset:48
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(4)
+; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(4)
+; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:16
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: freeze_v15i32:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: s_mov_b32 s6, 0
+; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000
+; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0
+; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
+; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16
+; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:32
+; GFX7-GISEL-NEXT: buffer_load_dwordx3 v[16:18], v[0:1], s[4:7], 0 addr64 offset:48
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(3)
+; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(3)
+; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(3)
+; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:32
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(3)
+; GFX7-GISEL-NEXT: buffer_store_dwordx3 v[16:18], v[2:3], s[4:7], 0 addr64 offset:48
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-GISEL-LABEL: freeze_v15i32:
+; GFX8-GISEL: ; %bb.0:
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-GISEL-NEXT: v_add_u32_e32 v8, vcc, 16, v0
+; GFX8-GISEL-NEXT: flat_load_dwordx4 v[4:7], v[0:1]
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v9, vcc, 0, v1, vcc
+; GFX8-GISEL-NEXT: flat_load_dwordx4 v[8:11], v[8:9]
+; GFX8-GISEL-NEXT: v_add_u32_e32 v12, vcc, 32, v0
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v13, vcc, 0, v1, vcc
+; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 48, v0
+; GFX8-GISEL-NEXT: flat_load_dwordx4 v[12:15], v[12:13]
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
+; GFX8-GISEL-NEXT: flat_load_dwordx3 v[16:18], v[0:1]
+; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 16, v2
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v3, vcc
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(3)
+; GFX8-GISEL-NEXT: flat_store_dwordx4 v[2:3], v[4:7]
+; GFX8-GISEL-NEXT: s_nop 0
+; GFX8-GISEL-NEXT: v_add_u32_e32 v4, vcc, 32, v2
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v3, vcc
+; GFX8-GISEL-NEXT: v_add_u32_e32 v2, vcc, 48, v2
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(3)
+; GFX8-GISEL-NEXT: flat_store_dwordx4 v[0:1], v[8:11]
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(3)
+; GFX8-GISEL-NEXT: flat_store_dwordx4 v[4:5], v[12:15]
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(3)
+; GFX8-GISEL-NEXT: flat_store_dwordx3 v[2:3], v[16:18]
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GISEL-LABEL: freeze_v15i32:
+; GFX9-GISEL: ; %bb.0:
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off
+; GFX9-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16
+; GFX9-GISEL-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:32
+; GFX9-GISEL-NEXT: global_load_dwordx3 v[16:18], v[0:1], off offset:48
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(3)
+; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(3)
+; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(3)
+; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:32
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(3)
+; GFX9-GISEL-NEXT: global_store_dwordx3 v[2:3], v[16:18], off offset:48
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX10-SDAG-LABEL: freeze_v15i32:
; GFX10-SDAG: ; %bb.0:
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -694,6 +2149,141 @@ define void @freeze_v15i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
}
define void @freeze_v16i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
+; GFX6-SDAG-LABEL: freeze_v16i32:
+; GFX6-SDAG: ; %bb.0:
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-SDAG-NEXT: s_mov_b32 s6, 0
+; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000
+; GFX6-SDAG-NEXT: s_mov_b32 s4, s6
+; GFX6-SDAG-NEXT: s_mov_b32 s5, s6
+; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:32
+; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:48
+; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64
+; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:16
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(3)
+; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 offset:32
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(3)
+; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:48
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(3)
+; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(3)
+; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:16
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0)
+; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX6-GISEL-LABEL: freeze_v16i32:
+; GFX6-GISEL: ; %bb.0:
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-GISEL-NEXT: s_mov_b32 s6, 0
+; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000
+; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0
+; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
+; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16
+; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:32
+; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:48
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(3)
+; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(3)
+; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(3)
+; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:32
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(3)
+; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:48
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0)
+; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-SDAG-LABEL: freeze_v16i32:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: s_mov_b32 s6, 0
+; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000
+; GFX7-SDAG-NEXT: s_mov_b32 s4, s6
+; GFX7-SDAG-NEXT: s_mov_b32 s5, s6
+; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:32
+; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:48
+; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64
+; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:16
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(3)
+; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 offset:32
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(3)
+; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:48
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(3)
+; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(3)
+; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:16
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: freeze_v16i32:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: s_mov_b32 s6, 0
+; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000
+; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0
+; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
+; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16
+; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:32
+; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:48
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(3)
+; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(3)
+; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(3)
+; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:32
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(3)
+; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:48
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-GISEL-LABEL: freeze_v16i32:
+; GFX8-GISEL: ; %bb.0:
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-GISEL-NEXT: v_add_u32_e32 v8, vcc, 16, v0
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v9, vcc, 0, v1, vcc
+; GFX8-GISEL-NEXT: v_add_u32_e32 v12, vcc, 32, v0
+; GFX8-GISEL-NEXT: flat_load_dwordx4 v[4:7], v[0:1]
+; GFX8-GISEL-NEXT: flat_load_dwordx4 v[8:11], v[8:9]
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v13, vcc, 0, v1, vcc
+; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 48, v0
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
+; GFX8-GISEL-NEXT: flat_load_dwordx4 v[12:15], v[12:13]
+; GFX8-GISEL-NEXT: flat_load_dwordx4 v[16:19], v[0:1]
+; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 16, v2
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v3, vcc
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(3)
+; GFX8-GISEL-NEXT: flat_store_dwordx4 v[2:3], v[4:7]
+; GFX8-GISEL-NEXT: s_nop 0
+; GFX8-GISEL-NEXT: v_add_u32_e32 v4, vcc, 32, v2
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v3, vcc
+; GFX8-GISEL-NEXT: v_add_u32_e32 v2, vcc, 48, v2
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(3)
+; GFX8-GISEL-NEXT: flat_store_dwordx4 v[0:1], v[8:11]
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(3)
+; GFX8-GISEL-NEXT: flat_store_dwordx4 v[4:5], v[12:15]
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(3)
+; GFX8-GISEL-NEXT: flat_store_dwordx4 v[2:3], v[16:19]
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GISEL-LABEL: freeze_v16i32:
+; GFX9-GISEL: ; %bb.0:
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off
+; GFX9-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16
+; GFX9-GISEL-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:32
+; GFX9-GISEL-NEXT: global_load_dwordx4 v[16:19], v[0:1], off offset:48
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(3)
+; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(3)
+; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(3)
+; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:32
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(3)
+; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[16:19], off offset:48
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX10-SDAG-LABEL: freeze_v16i32:
; GFX10-SDAG: ; %bb.0:
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -772,6 +2362,160 @@ define void @freeze_v16i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
}
define void @freeze_v17i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
+; GFX6-SDAG-LABEL: freeze_v17i32:
+; GFX6-SDAG: ; %bb.0:
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-SDAG-NEXT: s_mov_b32 s6, 0
+; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000
+; GFX6-SDAG-NEXT: s_mov_b32 s4, s6
+; GFX6-SDAG-NEXT: s_mov_b32 s5, s6
+; GFX6-SDAG-NEXT: buffer_load_dword v20, v[0:1], s[4:7], 0 addr64 offset:64
+; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:32
+; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:48
+; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64
+; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:16
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(4)
+; GFX6-SDAG-NEXT: buffer_store_dword v20, v[2:3], s[4:7], 0 addr64 offset:64
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(4)
+; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 offset:32
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(4)
+; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:48
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(4)
+; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(4)
+; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:16
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0)
+; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX6-GISEL-LABEL: freeze_v17i32:
+; GFX6-GISEL: ; %bb.0:
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-GISEL-NEXT: s_mov_b32 s6, 0
+; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000
+; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0
+; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
+; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16
+; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:32
+; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:48
+; GFX6-GISEL-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64 offset:64
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(4)
+; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(4)
+; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(4)
+; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:32
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(4)
+; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:48
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(4)
+; GFX6-GISEL-NEXT: buffer_store_dword v0, v[2:3], s[4:7], 0 addr64 offset:64
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0)
+; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-SDAG-LABEL: freeze_v17i32:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: s_mov_b32 s6, 0
+; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000
+; GFX7-SDAG-NEXT: s_mov_b32 s4, s6
+; GFX7-SDAG-NEXT: s_mov_b32 s5, s6
+; GFX7-SDAG-NEXT: buffer_load_dword v20, v[0:1], s[4:7], 0 addr64 offset:64
+; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:32
+; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:48
+; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64
+; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:16
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(4)
+; GFX7-SDAG-NEXT: buffer_store_dword v20, v[2:3], s[4:7], 0 addr64 offset:64
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(4)
+; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 offset:32
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(4)
+; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:48
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(4)
+; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(4)
+; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:16
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: freeze_v17i32:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: s_mov_b32 s6, 0
+; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000
+; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0
+; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
+; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16
+; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:32
+; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:48
+; GFX7-GISEL-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64 offset:64
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(4)
+; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(4)
+; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(4)
+; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:32
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(4)
+; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:48
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(4)
+; GFX7-GISEL-NEXT: buffer_store_dword v0, v[2:3], s[4:7], 0 addr64 offset:64
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-GISEL-LABEL: freeze_v17i32:
+; GFX8-GISEL: ; %bb.0:
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-GISEL-NEXT: v_add_u32_e32 v4, vcc, 16, v0
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v1, vcc
+; GFX8-GISEL-NEXT: v_add_u32_e32 v8, vcc, 32, v0
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v9, vcc, 0, v1, vcc
+; GFX8-GISEL-NEXT: flat_load_dwordx4 v[4:7], v[4:5]
+; GFX8-GISEL-NEXT: flat_load_dwordx4 v[8:11], v[8:9]
+; GFX8-GISEL-NEXT: v_add_u32_e32 v16, vcc, 48, v0
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v17, vcc, 0, v1, vcc
+; GFX8-GISEL-NEXT: v_add_u32_e32 v18, vcc, 64, v0
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v19, vcc, 0, v1, vcc
+; GFX8-GISEL-NEXT: flat_load_dwordx4 v[12:15], v[0:1]
+; GFX8-GISEL-NEXT: flat_load_dword v20, v[18:19]
+; GFX8-GISEL-NEXT: flat_load_dwordx4 v[16:19], v[16:17]
+; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 16, v2
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v3, vcc
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(4)
+; GFX8-GISEL-NEXT: flat_store_dwordx4 v[0:1], v[4:7]
+; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 32, v2
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v3, vcc
+; GFX8-GISEL-NEXT: v_add_u32_e32 v4, vcc, 48, v2
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v3, vcc
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(3)
+; GFX8-GISEL-NEXT: flat_store_dwordx4 v[2:3], v[12:15]
+; GFX8-GISEL-NEXT: v_add_u32_e32 v2, vcc, 64, v2
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc
+; GFX8-GISEL-NEXT: flat_store_dwordx4 v[0:1], v[8:11]
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(3)
+; GFX8-GISEL-NEXT: flat_store_dwordx4 v[4:5], v[16:19]
+; GFX8-GISEL-NEXT: flat_store_dword v[2:3], v20
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GISEL-LABEL: freeze_v17i32:
+; GFX9-GISEL: ; %bb.0:
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off
+; GFX9-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16
+; GFX9-GISEL-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:32
+; GFX9-GISEL-NEXT: global_load_dwordx4 v[16:19], v[0:1], off offset:48
+; GFX9-GISEL-NEXT: global_load_dword v20, v[0:1], off offset:64
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(4)
+; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(4)
+; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(4)
+; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:32
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(4)
+; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[16:19], off offset:48
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(4)
+; GFX9-GISEL-NEXT: global_store_dword v[2:3], v20, off offset:64
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX10-SDAG-LABEL: freeze_v17i32:
; GFX10-SDAG: ; %bb.0:
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -862,6 +2606,161 @@ define void @freeze_v17i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
}
define void @freeze_v18i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
+; GFX6-SDAG-LABEL: freeze_v18i32:
+; GFX6-SDAG: ; %bb.0:
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-SDAG-NEXT: s_mov_b32 s6, 0
+; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000
+; GFX6-SDAG-NEXT: s_mov_b32 s4, s6
+; GFX6-SDAG-NEXT: s_mov_b32 s5, s6
+; GFX6-SDAG-NEXT: buffer_load_dwordx2 v[20:21], v[0:1], s[4:7], 0 addr64 offset:64
+; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:32
+; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:48
+; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64
+; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:16
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(4)
+; GFX6-SDAG-NEXT: buffer_store_dwordx2 v[20:21], v[2:3], s[4:7], 0 addr64 offset:64
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(4)
+; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 offset:32
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(4)
+; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:48
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(4)
+; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(4)
+; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:16
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0)
+; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX6-GISEL-LABEL: freeze_v18i32:
+; GFX6-GISEL: ; %bb.0:
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-GISEL-NEXT: s_mov_b32 s6, 0
+; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000
+; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0
+; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
+; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16
+; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:32
+; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:48
+; GFX6-GISEL-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 offset:64
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(4)
+; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(4)
+; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(4)
+; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:32
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(4)
+; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:48
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(4)
+; GFX6-GISEL-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64 offset:64
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0)
+; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-SDAG-LABEL: freeze_v18i32:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: s_mov_b32 s6, 0
+; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000
+; GFX7-SDAG-NEXT: s_mov_b32 s4, s6
+; GFX7-SDAG-NEXT: s_mov_b32 s5, s6
+; GFX7-SDAG-NEXT: buffer_load_dwordx2 v[20:21], v[0:1], s[4:7], 0 addr64 offset:64
+; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:32
+; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:48
+; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64
+; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:16
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(4)
+; GFX7-SDAG-NEXT: buffer_store_dwordx2 v[20:21], v[2:3], s[4:7], 0 addr64 offset:64
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(4)
+; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 offset:32
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(4)
+; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:48
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(4)
+; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(4)
+; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:16
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: freeze_v18i32:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: s_mov_b32 s6, 0
+; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000
+; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0
+; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
+; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16
+; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:32
+; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:48
+; GFX7-GISEL-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 offset:64
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(4)
+; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(4)
+; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(4)
+; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:32
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(4)
+; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:48
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(4)
+; GFX7-GISEL-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64 offset:64
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-GISEL-LABEL: freeze_v18i32:
+; GFX8-GISEL: ; %bb.0:
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-GISEL-NEXT: v_add_u32_e32 v4, vcc, 16, v0
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v1, vcc
+; GFX8-GISEL-NEXT: v_add_u32_e32 v8, vcc, 32, v0
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v9, vcc, 0, v1, vcc
+; GFX8-GISEL-NEXT: flat_load_dwordx4 v[4:7], v[4:5]
+; GFX8-GISEL-NEXT: flat_load_dwordx4 v[8:11], v[8:9]
+; GFX8-GISEL-NEXT: v_add_u32_e32 v16, vcc, 48, v0
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v17, vcc, 0, v1, vcc
+; GFX8-GISEL-NEXT: v_add_u32_e32 v18, vcc, 64, v0
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v19, vcc, 0, v1, vcc
+; GFX8-GISEL-NEXT: flat_load_dwordx4 v[12:15], v[0:1]
+; GFX8-GISEL-NEXT: flat_load_dwordx2 v[0:1], v[18:19]
+; GFX8-GISEL-NEXT: flat_load_dwordx4 v[16:19], v[16:17]
+; GFX8-GISEL-NEXT: v_add_u32_e32 v20, vcc, 16, v2
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v21, vcc, 0, v3, vcc
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(4)
+; GFX8-GISEL-NEXT: flat_store_dwordx4 v[20:21], v[4:7]
+; GFX8-GISEL-NEXT: s_nop 0
+; GFX8-GISEL-NEXT: v_add_u32_e32 v4, vcc, 32, v2
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v3, vcc
+; GFX8-GISEL-NEXT: v_add_u32_e32 v6, vcc, 48, v2
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v7, vcc, 0, v3, vcc
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(3)
+; GFX8-GISEL-NEXT: flat_store_dwordx4 v[2:3], v[12:15]
+; GFX8-GISEL-NEXT: v_add_u32_e32 v2, vcc, 64, v2
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc
+; GFX8-GISEL-NEXT: flat_store_dwordx4 v[4:5], v[8:11]
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(3)
+; GFX8-GISEL-NEXT: flat_store_dwordx4 v[6:7], v[16:19]
+; GFX8-GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GISEL-LABEL: freeze_v18i32:
+; GFX9-GISEL: ; %bb.0:
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off
+; GFX9-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16
+; GFX9-GISEL-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:32
+; GFX9-GISEL-NEXT: global_load_dwordx4 v[16:19], v[0:1], off offset:48
+; GFX9-GISEL-NEXT: global_load_dwordx2 v[20:21], v[0:1], off offset:64
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(4)
+; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(4)
+; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(4)
+; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:32
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(4)
+; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[16:19], off offset:48
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(4)
+; GFX9-GISEL-NEXT: global_store_dwordx2 v[2:3], v[20:21], off offset:64
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX10-SDAG-LABEL: freeze_v18i32:
; GFX10-SDAG: ; %bb.0:
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -952,6 +2851,169 @@ define void @freeze_v18i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
}
define void @freeze_v19i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
+; GFX6-SDAG-LABEL: freeze_v19i32:
+; GFX6-SDAG: ; %bb.0:
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-SDAG-NEXT: s_mov_b32 s6, 0
+; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000
+; GFX6-SDAG-NEXT: s_mov_b32 s4, s6
+; GFX6-SDAG-NEXT: s_mov_b32 s5, s6
+; GFX6-SDAG-NEXT: buffer_load_dword v22, v[0:1], s[4:7], 0 addr64 offset:72
+; GFX6-SDAG-NEXT: buffer_load_dwordx2 v[20:21], v[0:1], s[4:7], 0 addr64 offset:64
+; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:32
+; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:48
+; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64
+; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:16
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(5)
+; GFX6-SDAG-NEXT: buffer_store_dword v22, v[2:3], s[4:7], 0 addr64 offset:72
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(5)
+; GFX6-SDAG-NEXT: buffer_store_dwordx2 v[20:21], v[2:3], s[4:7], 0 addr64 offset:64
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(5)
+; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 offset:32
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(5)
+; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:48
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(5)
+; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(5)
+; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:16
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0)
+; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX6-GISEL-LABEL: freeze_v19i32:
+; GFX6-GISEL: ; %bb.0:
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-GISEL-NEXT: s_mov_b32 s6, 0
+; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000
+; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0
+; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
+; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16
+; GFX6-GISEL-NEXT: buffer_load_dwordx2 v[20:21], v[0:1], s[4:7], 0 addr64 offset:64
+; GFX6-GISEL-NEXT: buffer_load_dword v22, v[0:1], s[4:7], 0 addr64 offset:72
+; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:32
+; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:48
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(5)
+; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(5)
+; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(3)
+; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:32
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(3)
+; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:48
+; GFX6-GISEL-NEXT: buffer_store_dwordx2 v[20:21], v[2:3], s[4:7], 0 addr64 offset:64
+; GFX6-GISEL-NEXT: buffer_store_dword v22, v[2:3], s[4:7], 0 addr64 offset:72
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0)
+; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-SDAG-LABEL: freeze_v19i32:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: s_mov_b32 s6, 0
+; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000
+; GFX7-SDAG-NEXT: s_mov_b32 s4, s6
+; GFX7-SDAG-NEXT: s_mov_b32 s5, s6
+; GFX7-SDAG-NEXT: buffer_load_dword v22, v[0:1], s[4:7], 0 addr64 offset:72
+; GFX7-SDAG-NEXT: buffer_load_dwordx2 v[20:21], v[0:1], s[4:7], 0 addr64 offset:64
+; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:32
+; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:48
+; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64
+; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:16
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(5)
+; GFX7-SDAG-NEXT: buffer_store_dword v22, v[2:3], s[4:7], 0 addr64 offset:72
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(5)
+; GFX7-SDAG-NEXT: buffer_store_dwordx2 v[20:21], v[2:3], s[4:7], 0 addr64 offset:64
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(5)
+; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 offset:32
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(5)
+; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:48
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(5)
+; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(5)
+; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:16
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: freeze_v19i32:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: s_mov_b32 s6, 0
+; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000
+; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0
+; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
+; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16
+; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:32
+; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:48
+; GFX7-GISEL-NEXT: buffer_load_dwordx3 v[20:22], v[0:1], s[4:7], 0 addr64 offset:64
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(4)
+; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(4)
+; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(4)
+; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:32
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(4)
+; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:48
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(4)
+; GFX7-GISEL-NEXT: buffer_store_dwordx3 v[20:22], v[2:3], s[4:7], 0 addr64 offset:64
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-GISEL-LABEL: freeze_v19i32:
+; GFX8-GISEL: ; %bb.0:
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-GISEL-NEXT: v_add_u32_e32 v4, vcc, 16, v0
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v1, vcc
+; GFX8-GISEL-NEXT: v_add_u32_e32 v8, vcc, 32, v0
+; GFX8-GISEL-NEXT: flat_load_dwordx4 v[4:7], v[4:5]
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v9, vcc, 0, v1, vcc
+; GFX8-GISEL-NEXT: v_add_u32_e32 v16, vcc, 48, v0
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v17, vcc, 0, v1, vcc
+; GFX8-GISEL-NEXT: v_add_u32_e32 v20, vcc, 64, v0
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v21, vcc, 0, v1, vcc
+; GFX8-GISEL-NEXT: flat_load_dwordx4 v[8:11], v[8:9]
+; GFX8-GISEL-NEXT: flat_load_dwordx4 v[12:15], v[0:1]
+; GFX8-GISEL-NEXT: flat_load_dwordx4 v[16:19], v[16:17]
+; GFX8-GISEL-NEXT: flat_load_dwordx3 v[20:22], v[20:21]
+; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 16, v2
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v3, vcc
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(4)
+; GFX8-GISEL-NEXT: flat_store_dwordx4 v[0:1], v[4:7]
+; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 32, v2
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v3, vcc
+; GFX8-GISEL-NEXT: v_add_u32_e32 v4, vcc, 48, v2
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v3, vcc
+; GFX8-GISEL-NEXT: v_add_u32_e32 v6, vcc, 64, v2
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v7, vcc, 0, v3, vcc
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(4)
+; GFX8-GISEL-NEXT: flat_store_dwordx4 v[0:1], v[8:11]
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(4)
+; GFX8-GISEL-NEXT: flat_store_dwordx4 v[2:3], v[12:15]
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(4)
+; GFX8-GISEL-NEXT: flat_store_dwordx4 v[4:5], v[16:19]
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(4)
+; GFX8-GISEL-NEXT: flat_store_dwordx3 v[6:7], v[20:22]
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GISEL-LABEL: freeze_v19i32:
+; GFX9-GISEL: ; %bb.0:
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off
+; GFX9-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16
+; GFX9-GISEL-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:32
+; GFX9-GISEL-NEXT: global_load_dwordx4 v[16:19], v[0:1], off offset:48
+; GFX9-GISEL-NEXT: global_load_dwordx3 v[20:22], v[0:1], off offset:64
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(4)
+; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(4)
+; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(4)
+; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:32
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(4)
+; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[16:19], off offset:48
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(4)
+; GFX9-GISEL-NEXT: global_store_dwordx3 v[2:3], v[20:22], off offset:64
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX10-SDAG-LABEL: freeze_v19i32:
; GFX10-SDAG: ; %bb.0:
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1042,6 +3104,163 @@ define void @freeze_v19i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
}
define void @freeze_v20i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
+; GFX6-SDAG-LABEL: freeze_v20i32:
+; GFX6-SDAG: ; %bb.0:
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-SDAG-NEXT: s_mov_b32 s6, 0
+; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000
+; GFX6-SDAG-NEXT: s_mov_b32 s4, s6
+; GFX6-SDAG-NEXT: s_mov_b32 s5, s6
+; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:64
+; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:32
+; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:48
+; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64
+; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[20:23], v[0:1], s[4:7], 0 addr64 offset:16
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(4)
+; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 offset:64
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(4)
+; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:32
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(4)
+; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:48
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(4)
+; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(4)
+; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[20:23], v[2:3], s[4:7], 0 addr64 offset:16
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0)
+; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX6-GISEL-LABEL: freeze_v20i32:
+; GFX6-GISEL: ; %bb.0:
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-GISEL-NEXT: s_mov_b32 s6, 0
+; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000
+; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0
+; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
+; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16
+; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:32
+; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:48
+; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[20:23], v[0:1], s[4:7], 0 addr64 offset:64
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(4)
+; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(4)
+; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(4)
+; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:32
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(4)
+; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:48
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(4)
+; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[20:23], v[2:3], s[4:7], 0 addr64 offset:64
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0)
+; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-SDAG-LABEL: freeze_v20i32:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: s_mov_b32 s6, 0
+; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000
+; GFX7-SDAG-NEXT: s_mov_b32 s4, s6
+; GFX7-SDAG-NEXT: s_mov_b32 s5, s6
+; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:64
+; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:32
+; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:48
+; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64
+; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[20:23], v[0:1], s[4:7], 0 addr64 offset:16
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(4)
+; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 offset:64
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(4)
+; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:32
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(4)
+; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:48
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(4)
+; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(4)
+; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[20:23], v[2:3], s[4:7], 0 addr64 offset:16
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: freeze_v20i32:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: s_mov_b32 s6, 0
+; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000
+; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0
+; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
+; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16
+; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:32
+; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:48
+; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[20:23], v[0:1], s[4:7], 0 addr64 offset:64
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(4)
+; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(4)
+; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(4)
+; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:32
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(4)
+; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:48
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(4)
+; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[20:23], v[2:3], s[4:7], 0 addr64 offset:64
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-GISEL-LABEL: freeze_v20i32:
+; GFX8-GISEL: ; %bb.0:
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-GISEL-NEXT: v_add_u32_e32 v8, vcc, 16, v0
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v9, vcc, 0, v1, vcc
+; GFX8-GISEL-NEXT: flat_load_dwordx4 v[4:7], v[0:1]
+; GFX8-GISEL-NEXT: v_add_u32_e32 v12, vcc, 32, v0
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v13, vcc, 0, v1, vcc
+; GFX8-GISEL-NEXT: v_add_u32_e32 v16, vcc, 48, v0
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v17, vcc, 0, v1, vcc
+; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 64, v0
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
+; GFX8-GISEL-NEXT: flat_load_dwordx4 v[8:11], v[8:9]
+; GFX8-GISEL-NEXT: flat_load_dwordx4 v[12:15], v[12:13]
+; GFX8-GISEL-NEXT: flat_load_dwordx4 v[16:19], v[16:17]
+; GFX8-GISEL-NEXT: flat_load_dwordx4 v[20:23], v[0:1]
+; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 16, v2
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v3, vcc
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(4)
+; GFX8-GISEL-NEXT: flat_store_dwordx4 v[2:3], v[4:7]
+; GFX8-GISEL-NEXT: s_nop 0
+; GFX8-GISEL-NEXT: v_add_u32_e32 v4, vcc, 32, v2
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v3, vcc
+; GFX8-GISEL-NEXT: v_add_u32_e32 v6, vcc, 48, v2
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v7, vcc, 0, v3, vcc
+; GFX8-GISEL-NEXT: v_add_u32_e32 v2, vcc, 64, v2
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(4)
+; GFX8-GISEL-NEXT: flat_store_dwordx4 v[0:1], v[8:11]
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(4)
+; GFX8-GISEL-NEXT: flat_store_dwordx4 v[4:5], v[12:15]
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(4)
+; GFX8-GISEL-NEXT: flat_store_dwordx4 v[6:7], v[16:19]
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(4)
+; GFX8-GISEL-NEXT: flat_store_dwordx4 v[2:3], v[20:23]
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GISEL-LABEL: freeze_v20i32:
+; GFX9-GISEL: ; %bb.0:
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off
+; GFX9-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16
+; GFX9-GISEL-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:32
+; GFX9-GISEL-NEXT: global_load_dwordx4 v[16:19], v[0:1], off offset:48
+; GFX9-GISEL-NEXT: global_load_dwordx4 v[20:23], v[0:1], off offset:64
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(4)
+; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(4)
+; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(4)
+; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:32
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(4)
+; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[16:19], off offset:48
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(4)
+; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[20:23], off offset:64
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX10-SDAG-LABEL: freeze_v20i32:
; GFX10-SDAG: ; %bb.0:
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1132,6 +3351,185 @@ define void @freeze_v20i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
}
define void @freeze_v21i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
+; GFX6-SDAG-LABEL: freeze_v21i32:
+; GFX6-SDAG: ; %bb.0:
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-SDAG-NEXT: s_mov_b32 s6, 0
+; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000
+; GFX6-SDAG-NEXT: s_mov_b32 s4, s6
+; GFX6-SDAG-NEXT: s_mov_b32 s5, s6
+; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:64
+; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:32
+; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:48
+; GFX6-SDAG-NEXT: buffer_load_dword v24, v[0:1], s[4:7], 0 addr64 offset:80
+; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64
+; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[20:23], v[0:1], s[4:7], 0 addr64 offset:16
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(5)
+; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 offset:64
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(5)
+; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:32
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(5)
+; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:48
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(5)
+; GFX6-SDAG-NEXT: buffer_store_dword v24, v[2:3], s[4:7], 0 addr64 offset:80
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(5)
+; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(5)
+; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[20:23], v[2:3], s[4:7], 0 addr64 offset:16
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0)
+; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX6-GISEL-LABEL: freeze_v21i32:
+; GFX6-GISEL: ; %bb.0:
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-GISEL-NEXT: s_mov_b32 s6, 0
+; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000
+; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0
+; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
+; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16
+; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:32
+; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:48
+; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[20:23], v[0:1], s[4:7], 0 addr64 offset:64
+; GFX6-GISEL-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64 offset:80
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(5)
+; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(5)
+; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(5)
+; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:32
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(5)
+; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:48
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(5)
+; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[20:23], v[2:3], s[4:7], 0 addr64 offset:64
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(5)
+; GFX6-GISEL-NEXT: buffer_store_dword v0, v[2:3], s[4:7], 0 addr64 offset:80
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0)
+; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-SDAG-LABEL: freeze_v21i32:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: s_mov_b32 s6, 0
+; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000
+; GFX7-SDAG-NEXT: s_mov_b32 s4, s6
+; GFX7-SDAG-NEXT: s_mov_b32 s5, s6
+; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:64
+; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:32
+; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:48
+; GFX7-SDAG-NEXT: buffer_load_dword v24, v[0:1], s[4:7], 0 addr64 offset:80
+; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64
+; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[20:23], v[0:1], s[4:7], 0 addr64 offset:16
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(5)
+; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 offset:64
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(5)
+; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:32
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(5)
+; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:48
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(5)
+; GFX7-SDAG-NEXT: buffer_store_dword v24, v[2:3], s[4:7], 0 addr64 offset:80
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(5)
+; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(5)
+; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[20:23], v[2:3], s[4:7], 0 addr64 offset:16
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: freeze_v21i32:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: s_mov_b32 s6, 0
+; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000
+; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0
+; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
+; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16
+; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:32
+; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:48
+; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[20:23], v[0:1], s[4:7], 0 addr64 offset:64
+; GFX7-GISEL-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64 offset:80
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(5)
+; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(5)
+; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(5)
+; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:32
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(5)
+; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:48
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(5)
+; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[20:23], v[2:3], s[4:7], 0 addr64 offset:64
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(5)
+; GFX7-GISEL-NEXT: buffer_store_dword v0, v[2:3], s[4:7], 0 addr64 offset:80
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-GISEL-LABEL: freeze_v21i32:
+; GFX8-GISEL: ; %bb.0:
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-GISEL-NEXT: v_add_u32_e32 v4, vcc, 16, v0
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v1, vcc
+; GFX8-GISEL-NEXT: v_mov_b32_e32 v6, 0x50
+; GFX8-GISEL-NEXT: v_add_u32_e32 v8, vcc, v0, v6
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v9, vcc, 0, v1, vcc
+; GFX8-GISEL-NEXT: v_add_u32_e32 v12, vcc, 32, v0
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v13, vcc, 0, v1, vcc
+; GFX8-GISEL-NEXT: v_add_u32_e32 v16, vcc, 48, v0
+; GFX8-GISEL-NEXT: flat_load_dwordx4 v[4:7], v[4:5]
+; GFX8-GISEL-NEXT: flat_load_dword v26, v[8:9]
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v17, vcc, 0, v1, vcc
+; GFX8-GISEL-NEXT: flat_load_dwordx4 v[8:11], v[0:1]
+; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 64, v0
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
+; GFX8-GISEL-NEXT: flat_load_dwordx4 v[12:15], v[12:13]
+; GFX8-GISEL-NEXT: flat_load_dwordx4 v[16:19], v[16:17]
+; GFX8-GISEL-NEXT: flat_load_dwordx4 v[20:23], v[0:1]
+; GFX8-GISEL-NEXT: v_add_u32_e32 v24, vcc, 16, v2
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v25, vcc, 0, v3, vcc
+; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 32, v2
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v3, vcc
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(5)
+; GFX8-GISEL-NEXT: flat_store_dwordx4 v[24:25], v[4:7]
+; GFX8-GISEL-NEXT: s_nop 0
+; GFX8-GISEL-NEXT: v_add_u32_e32 v4, vcc, 48, v2
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v3, vcc
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(4)
+; GFX8-GISEL-NEXT: flat_store_dwordx4 v[2:3], v[8:11]
+; GFX8-GISEL-NEXT: v_add_u32_e32 v6, vcc, 0x50, v2
+; GFX8-GISEL-NEXT: v_add_u32_e64 v8, s[4:5], 64, v2
+; GFX8-GISEL-NEXT: v_addc_u32_e64 v9, s[4:5], 0, v3, s[4:5]
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v7, vcc, 0, v3, vcc
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(4)
+; GFX8-GISEL-NEXT: flat_store_dwordx4 v[0:1], v[12:15]
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(4)
+; GFX8-GISEL-NEXT: flat_store_dwordx4 v[4:5], v[16:19]
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(4)
+; GFX8-GISEL-NEXT: flat_store_dwordx4 v[8:9], v[20:23]
+; GFX8-GISEL-NEXT: flat_store_dword v[6:7], v26
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GISEL-LABEL: freeze_v21i32:
+; GFX9-GISEL: ; %bb.0:
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off
+; GFX9-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16
+; GFX9-GISEL-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:32
+; GFX9-GISEL-NEXT: global_load_dwordx4 v[16:19], v[0:1], off offset:48
+; GFX9-GISEL-NEXT: global_load_dwordx4 v[20:23], v[0:1], off offset:64
+; GFX9-GISEL-NEXT: global_load_dword v24, v[0:1], off offset:80
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(5)
+; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(5)
+; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(5)
+; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:32
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(5)
+; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[16:19], off offset:48
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(5)
+; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[20:23], off offset:64
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(5)
+; GFX9-GISEL-NEXT: global_store_dword v[2:3], v24, off offset:80
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX10-SDAG-LABEL: freeze_v21i32:
; GFX10-SDAG: ; %bb.0:
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1234,6 +3632,185 @@ define void @freeze_v21i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
}
define void @freeze_v22i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
+; GFX6-SDAG-LABEL: freeze_v22i32:
+; GFX6-SDAG: ; %bb.0:
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-SDAG-NEXT: s_mov_b32 s6, 0
+; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000
+; GFX6-SDAG-NEXT: s_mov_b32 s4, s6
+; GFX6-SDAG-NEXT: s_mov_b32 s5, s6
+; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:64
+; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:32
+; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:48
+; GFX6-SDAG-NEXT: buffer_load_dwordx2 v[24:25], v[0:1], s[4:7], 0 addr64 offset:80
+; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64
+; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[20:23], v[0:1], s[4:7], 0 addr64 offset:16
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(5)
+; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 offset:64
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(5)
+; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:32
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(5)
+; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:48
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(5)
+; GFX6-SDAG-NEXT: buffer_store_dwordx2 v[24:25], v[2:3], s[4:7], 0 addr64 offset:80
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(5)
+; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(5)
+; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[20:23], v[2:3], s[4:7], 0 addr64 offset:16
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0)
+; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX6-GISEL-LABEL: freeze_v22i32:
+; GFX6-GISEL: ; %bb.0:
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-GISEL-NEXT: s_mov_b32 s6, 0
+; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000
+; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0
+; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
+; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16
+; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:32
+; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:48
+; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[20:23], v[0:1], s[4:7], 0 addr64 offset:64
+; GFX6-GISEL-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 offset:80
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(5)
+; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(5)
+; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(5)
+; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:32
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(5)
+; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:48
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(5)
+; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[20:23], v[2:3], s[4:7], 0 addr64 offset:64
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(5)
+; GFX6-GISEL-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64 offset:80
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0)
+; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-SDAG-LABEL: freeze_v22i32:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: s_mov_b32 s6, 0
+; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000
+; GFX7-SDAG-NEXT: s_mov_b32 s4, s6
+; GFX7-SDAG-NEXT: s_mov_b32 s5, s6
+; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:64
+; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:32
+; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:48
+; GFX7-SDAG-NEXT: buffer_load_dwordx2 v[24:25], v[0:1], s[4:7], 0 addr64 offset:80
+; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64
+; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[20:23], v[0:1], s[4:7], 0 addr64 offset:16
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(5)
+; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 offset:64
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(5)
+; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:32
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(5)
+; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:48
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(5)
+; GFX7-SDAG-NEXT: buffer_store_dwordx2 v[24:25], v[2:3], s[4:7], 0 addr64 offset:80
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(5)
+; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(5)
+; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[20:23], v[2:3], s[4:7], 0 addr64 offset:16
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: freeze_v22i32:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: s_mov_b32 s6, 0
+; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000
+; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0
+; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
+; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16
+; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:32
+; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:48
+; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[20:23], v[0:1], s[4:7], 0 addr64 offset:64
+; GFX7-GISEL-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 offset:80
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(5)
+; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(5)
+; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(5)
+; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:32
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(5)
+; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:48
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(5)
+; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[20:23], v[2:3], s[4:7], 0 addr64 offset:64
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(5)
+; GFX7-GISEL-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64 offset:80
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-GISEL-LABEL: freeze_v22i32:
+; GFX8-GISEL: ; %bb.0:
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-GISEL-NEXT: v_add_u32_e32 v4, vcc, 16, v0
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v1, vcc
+; GFX8-GISEL-NEXT: v_mov_b32_e32 v6, 0x50
+; GFX8-GISEL-NEXT: v_add_u32_e32 v8, vcc, v0, v6
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v9, vcc, 0, v1, vcc
+; GFX8-GISEL-NEXT: v_add_u32_e32 v12, vcc, 32, v0
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v13, vcc, 0, v1, vcc
+; GFX8-GISEL-NEXT: v_add_u32_e32 v16, vcc, 48, v0
+; GFX8-GISEL-NEXT: flat_load_dwordx4 v[4:7], v[4:5]
+; GFX8-GISEL-NEXT: flat_load_dwordx2 v[24:25], v[8:9]
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v17, vcc, 0, v1, vcc
+; GFX8-GISEL-NEXT: flat_load_dwordx4 v[8:11], v[0:1]
+; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 64, v0
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
+; GFX8-GISEL-NEXT: flat_load_dwordx4 v[12:15], v[12:13]
+; GFX8-GISEL-NEXT: flat_load_dwordx4 v[16:19], v[16:17]
+; GFX8-GISEL-NEXT: flat_load_dwordx4 v[20:23], v[0:1]
+; GFX8-GISEL-NEXT: v_add_u32_e32 v26, vcc, 16, v2
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v27, vcc, 0, v3, vcc
+; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 32, v2
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v3, vcc
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(5)
+; GFX8-GISEL-NEXT: flat_store_dwordx4 v[26:27], v[4:7]
+; GFX8-GISEL-NEXT: s_nop 0
+; GFX8-GISEL-NEXT: v_add_u32_e32 v4, vcc, 48, v2
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v3, vcc
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(4)
+; GFX8-GISEL-NEXT: flat_store_dwordx4 v[2:3], v[8:11]
+; GFX8-GISEL-NEXT: v_add_u32_e32 v6, vcc, 0x50, v2
+; GFX8-GISEL-NEXT: v_add_u32_e64 v8, s[4:5], 64, v2
+; GFX8-GISEL-NEXT: v_addc_u32_e64 v9, s[4:5], 0, v3, s[4:5]
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v7, vcc, 0, v3, vcc
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(4)
+; GFX8-GISEL-NEXT: flat_store_dwordx4 v[0:1], v[12:15]
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(4)
+; GFX8-GISEL-NEXT: flat_store_dwordx4 v[4:5], v[16:19]
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(4)
+; GFX8-GISEL-NEXT: flat_store_dwordx4 v[8:9], v[20:23]
+; GFX8-GISEL-NEXT: flat_store_dwordx2 v[6:7], v[24:25]
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GISEL-LABEL: freeze_v22i32:
+; GFX9-GISEL: ; %bb.0:
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off
+; GFX9-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16
+; GFX9-GISEL-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:32
+; GFX9-GISEL-NEXT: global_load_dwordx4 v[16:19], v[0:1], off offset:48
+; GFX9-GISEL-NEXT: global_load_dwordx4 v[20:23], v[0:1], off offset:64
+; GFX9-GISEL-NEXT: global_load_dwordx2 v[24:25], v[0:1], off offset:80
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(5)
+; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(5)
+; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(5)
+; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:32
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(5)
+; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[16:19], off offset:48
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(5)
+; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[20:23], off offset:64
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(5)
+; GFX9-GISEL-NEXT: global_store_dwordx2 v[2:3], v[24:25], off offset:80
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX10-SDAG-LABEL: freeze_v22i32:
; GFX10-SDAG: ; %bb.0:
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1336,6 +3913,235 @@ define void @freeze_v22i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
}
define void @freeze_v30i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
+; GFX6-SDAG-LABEL: freeze_v30i32:
+; GFX6-SDAG: ; %bb.0:
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-SDAG-NEXT: s_mov_b32 s6, 0
+; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000
+; GFX6-SDAG-NEXT: s_mov_b32 s4, s6
+; GFX6-SDAG-NEXT: s_mov_b32 s5, s6
+; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:96
+; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:64
+; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:80
+; GFX6-SDAG-NEXT: buffer_load_dwordx2 v[32:33], v[0:1], s[4:7], 0 addr64 offset:112
+; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:32
+; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[20:23], v[0:1], s[4:7], 0 addr64 offset:48
+; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[24:27], v[0:1], s[4:7], 0 addr64
+; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[28:31], v[0:1], s[4:7], 0 addr64 offset:16
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(7)
+; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 offset:96
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(7)
+; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:64
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(7)
+; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:80
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(7)
+; GFX6-SDAG-NEXT: buffer_store_dwordx2 v[32:33], v[2:3], s[4:7], 0 addr64 offset:112
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(7)
+; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:32
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(7)
+; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[20:23], v[2:3], s[4:7], 0 addr64 offset:48
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(7)
+; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[24:27], v[2:3], s[4:7], 0 addr64
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(7)
+; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[28:31], v[2:3], s[4:7], 0 addr64 offset:16
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0)
+; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX6-GISEL-LABEL: freeze_v30i32:
+; GFX6-GISEL: ; %bb.0:
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-GISEL-NEXT: s_mov_b32 s6, 0
+; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000
+; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0
+; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
+; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16
+; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:32
+; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:48
+; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[20:23], v[0:1], s[4:7], 0 addr64 offset:64
+; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[24:27], v[0:1], s[4:7], 0 addr64 offset:80
+; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[28:31], v[0:1], s[4:7], 0 addr64 offset:96
+; GFX6-GISEL-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 offset:112
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(7)
+; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(7)
+; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(7)
+; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:32
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(7)
+; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:48
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(7)
+; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[20:23], v[2:3], s[4:7], 0 addr64 offset:64
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(7)
+; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[24:27], v[2:3], s[4:7], 0 addr64 offset:80
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(7)
+; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[28:31], v[2:3], s[4:7], 0 addr64 offset:96
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(7)
+; GFX6-GISEL-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64 offset:112
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0)
+; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-SDAG-LABEL: freeze_v30i32:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: s_mov_b32 s6, 0
+; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000
+; GFX7-SDAG-NEXT: s_mov_b32 s4, s6
+; GFX7-SDAG-NEXT: s_mov_b32 s5, s6
+; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:96
+; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:64
+; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:80
+; GFX7-SDAG-NEXT: buffer_load_dwordx2 v[32:33], v[0:1], s[4:7], 0 addr64 offset:112
+; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:32
+; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[20:23], v[0:1], s[4:7], 0 addr64 offset:48
+; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[24:27], v[0:1], s[4:7], 0 addr64
+; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[28:31], v[0:1], s[4:7], 0 addr64 offset:16
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(7)
+; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 offset:96
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(7)
+; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:64
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(7)
+; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:80
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(7)
+; GFX7-SDAG-NEXT: buffer_store_dwordx2 v[32:33], v[2:3], s[4:7], 0 addr64 offset:112
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(7)
+; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:32
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(7)
+; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[20:23], v[2:3], s[4:7], 0 addr64 offset:48
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(7)
+; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[24:27], v[2:3], s[4:7], 0 addr64
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(7)
+; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[28:31], v[2:3], s[4:7], 0 addr64 offset:16
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: freeze_v30i32:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: s_mov_b32 s6, 0
+; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000
+; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0
+; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
+; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16
+; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:32
+; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:48
+; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[20:23], v[0:1], s[4:7], 0 addr64 offset:64
+; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[24:27], v[0:1], s[4:7], 0 addr64 offset:80
+; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[28:31], v[0:1], s[4:7], 0 addr64 offset:96
+; GFX7-GISEL-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 offset:112
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(7)
+; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(7)
+; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(7)
+; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:32
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(7)
+; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:48
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(7)
+; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[20:23], v[2:3], s[4:7], 0 addr64 offset:64
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(7)
+; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[24:27], v[2:3], s[4:7], 0 addr64 offset:80
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(7)
+; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[28:31], v[2:3], s[4:7], 0 addr64 offset:96
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(7)
+; GFX7-GISEL-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64 offset:112
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-GISEL-LABEL: freeze_v30i32:
+; GFX8-GISEL: ; %bb.0:
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-GISEL-NEXT: v_add_u32_e32 v4, vcc, 16, v0
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v1, vcc
+; GFX8-GISEL-NEXT: v_add_u32_e32 v12, vcc, 32, v0
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v13, vcc, 0, v1, vcc
+; GFX8-GISEL-NEXT: v_add_u32_e32 v16, vcc, 48, v0
+; GFX8-GISEL-NEXT: flat_load_dwordx4 v[4:7], v[4:5]
+; GFX8-GISEL-NEXT: flat_load_dwordx4 v[8:11], v[0:1]
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v17, vcc, 0, v1, vcc
+; GFX8-GISEL-NEXT: v_add_u32_e32 v20, vcc, 64, v0
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v21, vcc, 0, v1, vcc
+; GFX8-GISEL-NEXT: v_mov_b32_e32 v34, 0x50
+; GFX8-GISEL-NEXT: v_add_u32_e32 v24, vcc, v0, v34
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v25, vcc, 0, v1, vcc
+; GFX8-GISEL-NEXT: v_mov_b32_e32 v14, 0x60
+; GFX8-GISEL-NEXT: v_add_u32_e32 v28, vcc, v0, v14
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v29, vcc, 0, v1, vcc
+; GFX8-GISEL-NEXT: v_mov_b32_e32 v14, 0x70
+; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v14
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
+; GFX8-GISEL-NEXT: flat_load_dwordx4 v[12:15], v[12:13]
+; GFX8-GISEL-NEXT: flat_load_dwordx4 v[16:19], v[16:17]
+; GFX8-GISEL-NEXT: flat_load_dwordx4 v[20:23], v[20:21]
+; GFX8-GISEL-NEXT: flat_load_dwordx4 v[24:27], v[24:25]
+; GFX8-GISEL-NEXT: flat_load_dwordx4 v[28:31], v[28:29]
+; GFX8-GISEL-NEXT: flat_load_dwordx2 v[0:1], v[0:1]
+; GFX8-GISEL-NEXT: v_add_u32_e32 v32, vcc, 16, v2
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v33, vcc, 0, v3, vcc
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(7)
+; GFX8-GISEL-NEXT: flat_store_dwordx4 v[32:33], v[4:7]
+; GFX8-GISEL-NEXT: s_nop 0
+; GFX8-GISEL-NEXT: v_add_u32_e32 v4, vcc, 32, v2
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v3, vcc
+; GFX8-GISEL-NEXT: v_add_u32_e32 v6, vcc, 48, v2
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v7, vcc, 0, v3, vcc
+; GFX8-GISEL-NEXT: v_add_u32_e32 v32, vcc, v2, v34
+; GFX8-GISEL-NEXT: v_add_u32_e64 v34, s[4:5], 64, v2
+; GFX8-GISEL-NEXT: v_addc_u32_e64 v35, s[4:5], 0, v3, s[4:5]
+; GFX8-GISEL-NEXT: s_mov_b64 s[4:5], vcc
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(7)
+; GFX8-GISEL-NEXT: flat_store_dwordx4 v[2:3], v[8:11]
+; GFX8-GISEL-NEXT: v_addc_u32_e64 v33, s[4:5], 0, v3, s[4:5]
+; GFX8-GISEL-NEXT: v_add_u32_e32 v8, vcc, 0x60, v2
+; GFX8-GISEL-NEXT: s_mov_b64 s[4:5], vcc
+; GFX8-GISEL-NEXT: v_add_u32_e32 v2, vcc, 0x70, v2
+; GFX8-GISEL-NEXT: v_addc_u32_e64 v9, s[4:5], 0, v3, s[4:5]
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(7)
+; GFX8-GISEL-NEXT: flat_store_dwordx4 v[4:5], v[12:15]
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(7)
+; GFX8-GISEL-NEXT: flat_store_dwordx4 v[6:7], v[16:19]
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(7)
+; GFX8-GISEL-NEXT: flat_store_dwordx4 v[34:35], v[20:23]
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(7)
+; GFX8-GISEL-NEXT: flat_store_dwordx4 v[32:33], v[24:27]
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(7)
+; GFX8-GISEL-NEXT: flat_store_dwordx4 v[8:9], v[28:31]
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(7)
+; GFX8-GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GISEL-LABEL: freeze_v30i32:
+; GFX9-GISEL: ; %bb.0:
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off
+; GFX9-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16
+; GFX9-GISEL-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:32
+; GFX9-GISEL-NEXT: global_load_dwordx4 v[16:19], v[0:1], off offset:48
+; GFX9-GISEL-NEXT: global_load_dwordx4 v[20:23], v[0:1], off offset:64
+; GFX9-GISEL-NEXT: global_load_dwordx4 v[24:27], v[0:1], off offset:80
+; GFX9-GISEL-NEXT: global_load_dwordx4 v[28:31], v[0:1], off offset:96
+; GFX9-GISEL-NEXT: s_nop 0
+; GFX9-GISEL-NEXT: global_load_dwordx2 v[0:1], v[0:1], off offset:112
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(7)
+; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(7)
+; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(7)
+; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:32
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(7)
+; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[16:19], off offset:48
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(7)
+; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[20:23], off offset:64
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(7)
+; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[24:27], off offset:80
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(7)
+; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[28:31], off offset:96
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(7)
+; GFX9-GISEL-NEXT: global_store_dwordx2 v[2:3], v[0:1], off offset:112
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX10-SDAG-LABEL: freeze_v30i32:
; GFX10-SDAG: ; %bb.0:
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1462,6 +4268,239 @@ define void @freeze_v30i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
}
define void @freeze_v31i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
+; GFX6-SDAG-LABEL: freeze_v31i32:
+; GFX6-SDAG: ; %bb.0:
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-SDAG-NEXT: s_mov_b32 s6, 0
+; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000
+; GFX6-SDAG-NEXT: s_mov_b32 s4, s6
+; GFX6-SDAG-NEXT: s_mov_b32 s5, s6
+; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:96
+; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:64
+; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:80
+; GFX6-SDAG-NEXT: buffer_load_dword v34, v[0:1], s[4:7], 0 addr64 offset:120
+; GFX6-SDAG-NEXT: buffer_load_dwordx2 v[32:33], v[0:1], s[4:7], 0 addr64 offset:112
+; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:32
+; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[20:23], v[0:1], s[4:7], 0 addr64 offset:48
+; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[24:27], v[0:1], s[4:7], 0 addr64
+; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[28:31], v[0:1], s[4:7], 0 addr64 offset:16
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(8)
+; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 offset:96
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(8)
+; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:64
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(8)
+; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:80
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(8)
+; GFX6-SDAG-NEXT: buffer_store_dword v34, v[2:3], s[4:7], 0 addr64 offset:120
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(8)
+; GFX6-SDAG-NEXT: buffer_store_dwordx2 v[32:33], v[2:3], s[4:7], 0 addr64 offset:112
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(8)
+; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:32
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(8)
+; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[20:23], v[2:3], s[4:7], 0 addr64 offset:48
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(8)
+; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[24:27], v[2:3], s[4:7], 0 addr64
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(8)
+; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[28:31], v[2:3], s[4:7], 0 addr64 offset:16
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0)
+; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX6-GISEL-LABEL: freeze_v31i32:
+; GFX6-GISEL: ; %bb.0:
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-GISEL-NEXT: s_mov_b32 s6, 0
+; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000
+; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0
+; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
+; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16
+; GFX6-GISEL-NEXT: buffer_load_dword v34, v[0:1], s[4:7], 0 addr64 offset:120
+; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:32
+; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:48
+; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[20:23], v[0:1], s[4:7], 0 addr64 offset:64
+; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[24:27], v[0:1], s[4:7], 0 addr64 offset:80
+; GFX6-GISEL-NEXT: buffer_load_dwordx2 v[32:33], v[0:1], s[4:7], 0 addr64 offset:112
+; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[28:31], v[0:1], s[4:7], 0 addr64 offset:96
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(8)
+; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(8)
+; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(7)
+; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:32
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(7)
+; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:48
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(7)
+; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[20:23], v[2:3], s[4:7], 0 addr64 offset:64
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(7)
+; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[24:27], v[2:3], s[4:7], 0 addr64 offset:80
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(6)
+; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[28:31], v[2:3], s[4:7], 0 addr64 offset:96
+; GFX6-GISEL-NEXT: buffer_store_dwordx2 v[32:33], v[2:3], s[4:7], 0 addr64 offset:112
+; GFX6-GISEL-NEXT: buffer_store_dword v34, v[2:3], s[4:7], 0 addr64 offset:120
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0)
+; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-SDAG-LABEL: freeze_v31i32:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: s_mov_b32 s6, 0
+; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000
+; GFX7-SDAG-NEXT: s_mov_b32 s4, s6
+; GFX7-SDAG-NEXT: s_mov_b32 s5, s6
+; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:96
+; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:64
+; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:80
+; GFX7-SDAG-NEXT: buffer_load_dword v34, v[0:1], s[4:7], 0 addr64 offset:120
+; GFX7-SDAG-NEXT: buffer_load_dwordx2 v[32:33], v[0:1], s[4:7], 0 addr64 offset:112
+; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:32
+; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[20:23], v[0:1], s[4:7], 0 addr64 offset:48
+; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[24:27], v[0:1], s[4:7], 0 addr64
+; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[28:31], v[0:1], s[4:7], 0 addr64 offset:16
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(8)
+; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 offset:96
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(8)
+; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:64
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(8)
+; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:80
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(8)
+; GFX7-SDAG-NEXT: buffer_store_dword v34, v[2:3], s[4:7], 0 addr64 offset:120
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(8)
+; GFX7-SDAG-NEXT: buffer_store_dwordx2 v[32:33], v[2:3], s[4:7], 0 addr64 offset:112
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(8)
+; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:32
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(8)
+; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[20:23], v[2:3], s[4:7], 0 addr64 offset:48
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(8)
+; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[24:27], v[2:3], s[4:7], 0 addr64
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(8)
+; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[28:31], v[2:3], s[4:7], 0 addr64 offset:16
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: freeze_v31i32:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: s_mov_b32 s6, 0
+; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000
+; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0
+; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
+; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16
+; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:32
+; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:48
+; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[20:23], v[0:1], s[4:7], 0 addr64 offset:64
+; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[24:27], v[0:1], s[4:7], 0 addr64 offset:80
+; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[28:31], v[0:1], s[4:7], 0 addr64 offset:96
+; GFX7-GISEL-NEXT: buffer_load_dwordx3 v[32:34], v[0:1], s[4:7], 0 addr64 offset:112
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(7)
+; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(7)
+; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(7)
+; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:32
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(7)
+; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:48
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(7)
+; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[20:23], v[2:3], s[4:7], 0 addr64 offset:64
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(7)
+; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[24:27], v[2:3], s[4:7], 0 addr64 offset:80
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(7)
+; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[28:31], v[2:3], s[4:7], 0 addr64 offset:96
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(7)
+; GFX7-GISEL-NEXT: buffer_store_dwordx3 v[32:34], v[2:3], s[4:7], 0 addr64 offset:112
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-GISEL-LABEL: freeze_v31i32:
+; GFX8-GISEL: ; %bb.0:
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-GISEL-NEXT: v_add_u32_e32 v4, vcc, 16, v0
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v1, vcc
+; GFX8-GISEL-NEXT: v_add_u32_e32 v12, vcc, 32, v0
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v13, vcc, 0, v1, vcc
+; GFX8-GISEL-NEXT: v_add_u32_e32 v16, vcc, 48, v0
+; GFX8-GISEL-NEXT: flat_load_dwordx4 v[4:7], v[4:5]
+; GFX8-GISEL-NEXT: flat_load_dwordx4 v[8:11], v[0:1]
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v17, vcc, 0, v1, vcc
+; GFX8-GISEL-NEXT: v_add_u32_e32 v20, vcc, 64, v0
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v21, vcc, 0, v1, vcc
+; GFX8-GISEL-NEXT: v_mov_b32_e32 v35, 0x50
+; GFX8-GISEL-NEXT: v_add_u32_e32 v24, vcc, v0, v35
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v25, vcc, 0, v1, vcc
+; GFX8-GISEL-NEXT: v_mov_b32_e32 v14, 0x60
+; GFX8-GISEL-NEXT: v_add_u32_e32 v28, vcc, v0, v14
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v29, vcc, 0, v1, vcc
+; GFX8-GISEL-NEXT: v_mov_b32_e32 v14, 0x70
+; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v14
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
+; GFX8-GISEL-NEXT: flat_load_dwordx4 v[12:15], v[12:13]
+; GFX8-GISEL-NEXT: flat_load_dwordx4 v[16:19], v[16:17]
+; GFX8-GISEL-NEXT: flat_load_dwordx4 v[20:23], v[20:21]
+; GFX8-GISEL-NEXT: flat_load_dwordx4 v[24:27], v[24:25]
+; GFX8-GISEL-NEXT: flat_load_dwordx4 v[28:31], v[28:29]
+; GFX8-GISEL-NEXT: flat_load_dwordx3 v[32:34], v[0:1]
+; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 16, v2
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v3, vcc
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(7)
+; GFX8-GISEL-NEXT: flat_store_dwordx4 v[0:1], v[4:7]
+; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 32, v2
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v3, vcc
+; GFX8-GISEL-NEXT: v_add_u32_e32 v4, vcc, 48, v2
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v3, vcc
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(7)
+; GFX8-GISEL-NEXT: flat_store_dwordx4 v[2:3], v[8:11]
+; GFX8-GISEL-NEXT: v_add_u32_e32 v6, vcc, v2, v35
+; GFX8-GISEL-NEXT: v_add_u32_e64 v8, s[4:5], 64, v2
+; GFX8-GISEL-NEXT: v_addc_u32_e64 v9, s[4:5], 0, v3, s[4:5]
+; GFX8-GISEL-NEXT: s_mov_b64 s[4:5], vcc
+; GFX8-GISEL-NEXT: v_add_u32_e32 v10, vcc, 0x60, v2
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v11, vcc, 0, v3, vcc
+; GFX8-GISEL-NEXT: v_add_u32_e32 v2, vcc, 0x70, v2
+; GFX8-GISEL-NEXT: v_addc_u32_e64 v7, s[4:5], 0, v3, s[4:5]
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(7)
+; GFX8-GISEL-NEXT: flat_store_dwordx4 v[0:1], v[12:15]
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(7)
+; GFX8-GISEL-NEXT: flat_store_dwordx4 v[4:5], v[16:19]
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(7)
+; GFX8-GISEL-NEXT: flat_store_dwordx4 v[8:9], v[20:23]
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(7)
+; GFX8-GISEL-NEXT: flat_store_dwordx4 v[6:7], v[24:27]
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(7)
+; GFX8-GISEL-NEXT: flat_store_dwordx4 v[10:11], v[28:31]
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(7)
+; GFX8-GISEL-NEXT: flat_store_dwordx3 v[2:3], v[32:34]
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GISEL-LABEL: freeze_v31i32:
+; GFX9-GISEL: ; %bb.0:
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off
+; GFX9-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16
+; GFX9-GISEL-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:32
+; GFX9-GISEL-NEXT: global_load_dwordx4 v[16:19], v[0:1], off offset:48
+; GFX9-GISEL-NEXT: global_load_dwordx4 v[20:23], v[0:1], off offset:64
+; GFX9-GISEL-NEXT: global_load_dwordx4 v[24:27], v[0:1], off offset:80
+; GFX9-GISEL-NEXT: global_load_dwordx4 v[28:31], v[0:1], off offset:96
+; GFX9-GISEL-NEXT: global_load_dwordx3 v[32:34], v[0:1], off offset:112
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(7)
+; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(7)
+; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(7)
+; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:32
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(7)
+; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[16:19], off offset:48
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(7)
+; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[20:23], off offset:64
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(7)
+; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[24:27], off offset:80
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(7)
+; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[28:31], off offset:96
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(7)
+; GFX9-GISEL-NEXT: global_store_dwordx3 v[2:3], v[32:34], off offset:112
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX10-SDAG-LABEL: freeze_v31i32:
; GFX10-SDAG: ; %bb.0:
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1588,6 +4627,233 @@ define void @freeze_v31i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
}
define void @freeze_v32i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
+; GFX6-SDAG-LABEL: freeze_v32i32:
+; GFX6-SDAG: ; %bb.0:
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-SDAG-NEXT: s_mov_b32 s6, 0
+; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000
+; GFX6-SDAG-NEXT: s_mov_b32 s4, s6
+; GFX6-SDAG-NEXT: s_mov_b32 s5, s6
+; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:96
+; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:112
+; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:64
+; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:80
+; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[20:23], v[0:1], s[4:7], 0 addr64 offset:32
+; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[24:27], v[0:1], s[4:7], 0 addr64 offset:48
+; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[28:31], v[0:1], s[4:7], 0 addr64
+; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[32:35], v[0:1], s[4:7], 0 addr64 offset:16
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(7)
+; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 offset:96
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(7)
+; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:112
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(7)
+; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:64
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(7)
+; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:80
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(7)
+; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[20:23], v[2:3], s[4:7], 0 addr64 offset:32
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(7)
+; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[24:27], v[2:3], s[4:7], 0 addr64 offset:48
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(7)
+; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[28:31], v[2:3], s[4:7], 0 addr64
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(7)
+; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[32:35], v[2:3], s[4:7], 0 addr64 offset:16
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0)
+; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX6-GISEL-LABEL: freeze_v32i32:
+; GFX6-GISEL: ; %bb.0:
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-GISEL-NEXT: s_mov_b32 s6, 0
+; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000
+; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0
+; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
+; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16
+; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:32
+; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:48
+; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[20:23], v[0:1], s[4:7], 0 addr64 offset:64
+; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[24:27], v[0:1], s[4:7], 0 addr64 offset:80
+; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[28:31], v[0:1], s[4:7], 0 addr64 offset:96
+; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[32:35], v[0:1], s[4:7], 0 addr64 offset:112
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(7)
+; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(7)
+; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(7)
+; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:32
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(7)
+; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:48
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(7)
+; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[20:23], v[2:3], s[4:7], 0 addr64 offset:64
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(7)
+; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[24:27], v[2:3], s[4:7], 0 addr64 offset:80
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(7)
+; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[28:31], v[2:3], s[4:7], 0 addr64 offset:96
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(7)
+; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[32:35], v[2:3], s[4:7], 0 addr64 offset:112
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0)
+; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-SDAG-LABEL: freeze_v32i32:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: s_mov_b32 s6, 0
+; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000
+; GFX7-SDAG-NEXT: s_mov_b32 s4, s6
+; GFX7-SDAG-NEXT: s_mov_b32 s5, s6
+; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:96
+; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:112
+; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:64
+; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:80
+; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[20:23], v[0:1], s[4:7], 0 addr64 offset:32
+; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[24:27], v[0:1], s[4:7], 0 addr64 offset:48
+; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[28:31], v[0:1], s[4:7], 0 addr64
+; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[32:35], v[0:1], s[4:7], 0 addr64 offset:16
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(7)
+; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 offset:96
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(7)
+; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:112
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(7)
+; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:64
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(7)
+; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:80
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(7)
+; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[20:23], v[2:3], s[4:7], 0 addr64 offset:32
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(7)
+; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[24:27], v[2:3], s[4:7], 0 addr64 offset:48
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(7)
+; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[28:31], v[2:3], s[4:7], 0 addr64
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(7)
+; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[32:35], v[2:3], s[4:7], 0 addr64 offset:16
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: freeze_v32i32:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: s_mov_b32 s6, 0
+; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000
+; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0
+; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
+; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16
+; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:32
+; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:48
+; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[20:23], v[0:1], s[4:7], 0 addr64 offset:64
+; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[24:27], v[0:1], s[4:7], 0 addr64 offset:80
+; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[28:31], v[0:1], s[4:7], 0 addr64 offset:96
+; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[32:35], v[0:1], s[4:7], 0 addr64 offset:112
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(7)
+; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(7)
+; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(7)
+; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:32
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(7)
+; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:48
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(7)
+; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[20:23], v[2:3], s[4:7], 0 addr64 offset:64
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(7)
+; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[24:27], v[2:3], s[4:7], 0 addr64 offset:80
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(7)
+; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[28:31], v[2:3], s[4:7], 0 addr64 offset:96
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(7)
+; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[32:35], v[2:3], s[4:7], 0 addr64 offset:112
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-GISEL-LABEL: freeze_v32i32:
+; GFX8-GISEL: ; %bb.0:
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-GISEL-NEXT: v_add_u32_e32 v4, vcc, 16, v0
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v1, vcc
+; GFX8-GISEL-NEXT: v_add_u32_e32 v12, vcc, 32, v0
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v13, vcc, 0, v1, vcc
+; GFX8-GISEL-NEXT: v_add_u32_e32 v16, vcc, 48, v0
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v17, vcc, 0, v1, vcc
+; GFX8-GISEL-NEXT: v_add_u32_e32 v20, vcc, 64, v0
+; GFX8-GISEL-NEXT: flat_load_dwordx4 v[4:7], v[4:5]
+; GFX8-GISEL-NEXT: flat_load_dwordx4 v[8:11], v[0:1]
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v21, vcc, 0, v1, vcc
+; GFX8-GISEL-NEXT: v_mov_b32_e32 v38, 0x50
+; GFX8-GISEL-NEXT: v_add_u32_e32 v24, vcc, v0, v38
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v25, vcc, 0, v1, vcc
+; GFX8-GISEL-NEXT: v_mov_b32_e32 v14, 0x60
+; GFX8-GISEL-NEXT: v_add_u32_e32 v28, vcc, v0, v14
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v29, vcc, 0, v1, vcc
+; GFX8-GISEL-NEXT: v_mov_b32_e32 v14, 0x70
+; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v14
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
+; GFX8-GISEL-NEXT: flat_load_dwordx4 v[12:15], v[12:13]
+; GFX8-GISEL-NEXT: flat_load_dwordx4 v[16:19], v[16:17]
+; GFX8-GISEL-NEXT: flat_load_dwordx4 v[20:23], v[20:21]
+; GFX8-GISEL-NEXT: flat_load_dwordx4 v[24:27], v[24:25]
+; GFX8-GISEL-NEXT: flat_load_dwordx4 v[28:31], v[28:29]
+; GFX8-GISEL-NEXT: flat_load_dwordx4 v[32:35], v[0:1]
+; GFX8-GISEL-NEXT: v_add_u32_e32 v36, vcc, 16, v2
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v37, vcc, 0, v3, vcc
+; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 32, v2
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v3, vcc
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(7)
+; GFX8-GISEL-NEXT: flat_store_dwordx4 v[36:37], v[4:7]
+; GFX8-GISEL-NEXT: s_nop 0
+; GFX8-GISEL-NEXT: v_add_u32_e32 v4, vcc, 48, v2
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v3, vcc
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(7)
+; GFX8-GISEL-NEXT: flat_store_dwordx4 v[2:3], v[8:11]
+; GFX8-GISEL-NEXT: v_add_u32_e32 v6, vcc, v2, v38
+; GFX8-GISEL-NEXT: v_add_u32_e64 v8, s[4:5], 64, v2
+; GFX8-GISEL-NEXT: v_addc_u32_e64 v9, s[4:5], 0, v3, s[4:5]
+; GFX8-GISEL-NEXT: s_mov_b64 s[4:5], vcc
+; GFX8-GISEL-NEXT: v_add_u32_e32 v10, vcc, 0x60, v2
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v11, vcc, 0, v3, vcc
+; GFX8-GISEL-NEXT: v_add_u32_e32 v2, vcc, 0x70, v2
+; GFX8-GISEL-NEXT: v_addc_u32_e64 v7, s[4:5], 0, v3, s[4:5]
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(7)
+; GFX8-GISEL-NEXT: flat_store_dwordx4 v[0:1], v[12:15]
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(7)
+; GFX8-GISEL-NEXT: flat_store_dwordx4 v[4:5], v[16:19]
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(7)
+; GFX8-GISEL-NEXT: flat_store_dwordx4 v[8:9], v[20:23]
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(7)
+; GFX8-GISEL-NEXT: flat_store_dwordx4 v[6:7], v[24:27]
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(7)
+; GFX8-GISEL-NEXT: flat_store_dwordx4 v[10:11], v[28:31]
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(7)
+; GFX8-GISEL-NEXT: flat_store_dwordx4 v[2:3], v[32:35]
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GISEL-LABEL: freeze_v32i32:
+; GFX9-GISEL: ; %bb.0:
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off
+; GFX9-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16
+; GFX9-GISEL-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:32
+; GFX9-GISEL-NEXT: global_load_dwordx4 v[16:19], v[0:1], off offset:48
+; GFX9-GISEL-NEXT: global_load_dwordx4 v[20:23], v[0:1], off offset:64
+; GFX9-GISEL-NEXT: global_load_dwordx4 v[24:27], v[0:1], off offset:80
+; GFX9-GISEL-NEXT: global_load_dwordx4 v[28:31], v[0:1], off offset:96
+; GFX9-GISEL-NEXT: global_load_dwordx4 v[32:35], v[0:1], off offset:112
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(7)
+; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(7)
+; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(7)
+; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:32
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(7)
+; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[16:19], off offset:48
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(7)
+; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[20:23], off offset:64
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(7)
+; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[24:27], off offset:80
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(7)
+; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[28:31], off offset:96
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(7)
+; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[32:35], off offset:112
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX10-SDAG-LABEL: freeze_v32i32:
; GFX10-SDAG: ; %bb.0:
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1714,6 +4980,74 @@ define void @freeze_v32i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
}
define void @freeze_i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
+; GFX6-SDAG-LABEL: freeze_i32:
+; GFX6-SDAG: ; %bb.0:
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-SDAG-NEXT: s_mov_b32 s6, 0
+; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000
+; GFX6-SDAG-NEXT: s_mov_b32 s4, s6
+; GFX6-SDAG-NEXT: s_mov_b32 s5, s6
+; GFX6-SDAG-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX6-SDAG-NEXT: buffer_store_dword v0, v[2:3], s[4:7], 0 addr64
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0)
+; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX6-GISEL-LABEL: freeze_i32:
+; GFX6-GISEL: ; %bb.0:
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-GISEL-NEXT: s_mov_b32 s6, 0
+; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000
+; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0
+; GFX6-GISEL-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX6-GISEL-NEXT: buffer_store_dword v0, v[2:3], s[4:7], 0 addr64
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0)
+; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-SDAG-LABEL: freeze_i32:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: s_mov_b32 s6, 0
+; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000
+; GFX7-SDAG-NEXT: s_mov_b32 s4, s6
+; GFX7-SDAG-NEXT: s_mov_b32 s5, s6
+; GFX7-SDAG-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX7-SDAG-NEXT: buffer_store_dword v0, v[2:3], s[4:7], 0 addr64
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: freeze_i32:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: s_mov_b32 s6, 0
+; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000
+; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0
+; GFX7-GISEL-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GISEL-NEXT: buffer_store_dword v0, v[2:3], s[4:7], 0 addr64
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: freeze_i32:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: flat_load_dword v0, v[0:1]
+; GFX8-NEXT: s_waitcnt vmcnt(0)
+; GFX8-NEXT: flat_store_dword v[2:3], v0
+; GFX8-NEXT: s_waitcnt vmcnt(0)
+; GFX8-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: freeze_i32:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: global_load_dword v0, v[0:1], off
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: global_store_dword v[2:3], v0, off
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
; GFX10-LABEL: freeze_i32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1736,6 +5070,74 @@ define void @freeze_i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
}
define void @freeze_i64(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
+; GFX6-SDAG-LABEL: freeze_i64:
+; GFX6-SDAG: ; %bb.0:
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-SDAG-NEXT: s_mov_b32 s6, 0
+; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000
+; GFX6-SDAG-NEXT: s_mov_b32 s4, s6
+; GFX6-SDAG-NEXT: s_mov_b32 s5, s6
+; GFX6-SDAG-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX6-SDAG-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0)
+; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX6-GISEL-LABEL: freeze_i64:
+; GFX6-GISEL: ; %bb.0:
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-GISEL-NEXT: s_mov_b32 s6, 0
+; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000
+; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0
+; GFX6-GISEL-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX6-GISEL-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0)
+; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-SDAG-LABEL: freeze_i64:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: s_mov_b32 s6, 0
+; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000
+; GFX7-SDAG-NEXT: s_mov_b32 s4, s6
+; GFX7-SDAG-NEXT: s_mov_b32 s5, s6
+; GFX7-SDAG-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX7-SDAG-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: freeze_i64:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: s_mov_b32 s6, 0
+; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000
+; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0
+; GFX7-GISEL-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GISEL-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: freeze_i64:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: flat_load_dwordx2 v[0:1], v[0:1]
+; GFX8-NEXT: s_waitcnt vmcnt(0)
+; GFX8-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
+; GFX8-NEXT: s_waitcnt vmcnt(0)
+; GFX8-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: freeze_i64:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: global_store_dwordx2 v[2:3], v[0:1], off
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
; GFX10-LABEL: freeze_i64:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1758,6 +5160,74 @@ define void @freeze_i64(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
}
define void @freeze_float(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
+; GFX6-SDAG-LABEL: freeze_float:
+; GFX6-SDAG: ; %bb.0:
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-SDAG-NEXT: s_mov_b32 s6, 0
+; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000
+; GFX6-SDAG-NEXT: s_mov_b32 s4, s6
+; GFX6-SDAG-NEXT: s_mov_b32 s5, s6
+; GFX6-SDAG-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX6-SDAG-NEXT: buffer_store_dword v0, v[2:3], s[4:7], 0 addr64
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0)
+; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX6-GISEL-LABEL: freeze_float:
+; GFX6-GISEL: ; %bb.0:
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-GISEL-NEXT: s_mov_b32 s6, 0
+; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000
+; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0
+; GFX6-GISEL-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX6-GISEL-NEXT: buffer_store_dword v0, v[2:3], s[4:7], 0 addr64
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0)
+; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-SDAG-LABEL: freeze_float:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: s_mov_b32 s6, 0
+; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000
+; GFX7-SDAG-NEXT: s_mov_b32 s4, s6
+; GFX7-SDAG-NEXT: s_mov_b32 s5, s6
+; GFX7-SDAG-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX7-SDAG-NEXT: buffer_store_dword v0, v[2:3], s[4:7], 0 addr64
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: freeze_float:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: s_mov_b32 s6, 0
+; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000
+; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0
+; GFX7-GISEL-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GISEL-NEXT: buffer_store_dword v0, v[2:3], s[4:7], 0 addr64
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: freeze_float:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: flat_load_dword v0, v[0:1]
+; GFX8-NEXT: s_waitcnt vmcnt(0)
+; GFX8-NEXT: flat_store_dword v[2:3], v0
+; GFX8-NEXT: s_waitcnt vmcnt(0)
+; GFX8-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: freeze_float:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: global_load_dword v0, v[0:1], off
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: global_store_dword v[2:3], v0, off
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
; GFX10-LABEL: freeze_float:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1780,6 +5250,74 @@ define void @freeze_float(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
}
define void @freeze_i128(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
+; GFX6-SDAG-LABEL: freeze_i128:
+; GFX6-SDAG: ; %bb.0:
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-SDAG-NEXT: s_mov_b32 s6, 0
+; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000
+; GFX6-SDAG-NEXT: s_mov_b32 s4, s6
+; GFX6-SDAG-NEXT: s_mov_b32 s5, s6
+; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0)
+; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX6-GISEL-LABEL: freeze_i128:
+; GFX6-GISEL: ; %bb.0:
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-GISEL-NEXT: s_mov_b32 s6, 0
+; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000
+; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0
+; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0)
+; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-SDAG-LABEL: freeze_i128:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: s_mov_b32 s6, 0
+; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000
+; GFX7-SDAG-NEXT: s_mov_b32 s4, s6
+; GFX7-SDAG-NEXT: s_mov_b32 s5, s6
+; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: freeze_i128:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: s_mov_b32 s6, 0
+; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000
+; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0
+; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: freeze_i128:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: flat_load_dwordx4 v[4:7], v[0:1]
+; GFX8-NEXT: s_waitcnt vmcnt(0)
+; GFX8-NEXT: flat_store_dwordx4 v[2:3], v[4:7]
+; GFX8-NEXT: s_waitcnt vmcnt(0)
+; GFX8-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: freeze_i128:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: global_load_dwordx4 v[4:7], v[0:1], off
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: global_store_dwordx4 v[2:3], v[4:7], off
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
; GFX10-LABEL: freeze_i128:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1802,6 +5340,96 @@ define void @freeze_i128(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
}
define void @freeze_i256(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
+; GFX6-SDAG-LABEL: freeze_i256:
+; GFX6-SDAG: ; %bb.0:
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-SDAG-NEXT: s_mov_b32 s6, 0
+; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000
+; GFX6-SDAG-NEXT: s_mov_b32 s4, s6
+; GFX6-SDAG-NEXT: s_mov_b32 s5, s6
+; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:16
+; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(1)
+; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 offset:16
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(1)
+; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0)
+; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX6-GISEL-LABEL: freeze_i256:
+; GFX6-GISEL: ; %bb.0:
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-GISEL-NEXT: s_mov_b32 s6, 0
+; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000
+; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0
+; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
+; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(1)
+; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(1)
+; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0)
+; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-SDAG-LABEL: freeze_i256:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: s_mov_b32 s6, 0
+; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000
+; GFX7-SDAG-NEXT: s_mov_b32 s4, s6
+; GFX7-SDAG-NEXT: s_mov_b32 s5, s6
+; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:16
+; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(1)
+; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 offset:16
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(1)
+; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: freeze_i256:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: s_mov_b32 s6, 0
+; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000
+; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0
+; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
+; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(1)
+; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(1)
+; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-GISEL-LABEL: freeze_i256:
+; GFX8-GISEL: ; %bb.0:
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-GISEL-NEXT: flat_load_dwordx4 v[4:7], v[0:1]
+; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 16, v0
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
+; GFX8-GISEL-NEXT: flat_load_dwordx4 v[8:11], v[0:1]
+; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 16, v2
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v3, vcc
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(1)
+; GFX8-GISEL-NEXT: flat_store_dwordx4 v[2:3], v[4:7]
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(1)
+; GFX8-GISEL-NEXT: flat_store_dwordx4 v[0:1], v[8:11]
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GISEL-LABEL: freeze_i256:
+; GFX9-GISEL: ; %bb.0:
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off
+; GFX9-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(1)
+; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(1)
+; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX10-SDAG-LABEL: freeze_i256:
; GFX10-SDAG: ; %bb.0:
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1856,6 +5484,74 @@ define void @freeze_i256(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
}
define void @freeze_i16(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
+; GFX6-SDAG-LABEL: freeze_i16:
+; GFX6-SDAG: ; %bb.0:
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-SDAG-NEXT: s_mov_b32 s6, 0
+; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000
+; GFX6-SDAG-NEXT: s_mov_b32 s4, s6
+; GFX6-SDAG-NEXT: s_mov_b32 s5, s6
+; GFX6-SDAG-NEXT: buffer_load_ushort v0, v[0:1], s[4:7], 0 addr64
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX6-SDAG-NEXT: buffer_store_short v0, v[2:3], s[4:7], 0 addr64
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0)
+; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX6-GISEL-LABEL: freeze_i16:
+; GFX6-GISEL: ; %bb.0:
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-GISEL-NEXT: s_mov_b32 s6, 0
+; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000
+; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0
+; GFX6-GISEL-NEXT: buffer_load_ushort v0, v[0:1], s[4:7], 0 addr64
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX6-GISEL-NEXT: buffer_store_short v0, v[2:3], s[4:7], 0 addr64
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0)
+; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-SDAG-LABEL: freeze_i16:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: s_mov_b32 s6, 0
+; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000
+; GFX7-SDAG-NEXT: s_mov_b32 s4, s6
+; GFX7-SDAG-NEXT: s_mov_b32 s5, s6
+; GFX7-SDAG-NEXT: buffer_load_ushort v0, v[0:1], s[4:7], 0 addr64
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX7-SDAG-NEXT: buffer_store_short v0, v[2:3], s[4:7], 0 addr64
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: freeze_i16:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: s_mov_b32 s6, 0
+; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000
+; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0
+; GFX7-GISEL-NEXT: buffer_load_ushort v0, v[0:1], s[4:7], 0 addr64
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GISEL-NEXT: buffer_store_short v0, v[2:3], s[4:7], 0 addr64
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: freeze_i16:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: flat_load_ushort v0, v[0:1]
+; GFX8-NEXT: s_waitcnt vmcnt(0)
+; GFX8-NEXT: flat_store_short v[2:3], v0
+; GFX8-NEXT: s_waitcnt vmcnt(0)
+; GFX8-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: freeze_i16:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: global_load_ushort v0, v[0:1], off
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: global_store_short v[2:3], v0, off
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
; GFX10-LABEL: freeze_i16:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1878,6 +5574,74 @@ define void @freeze_i16(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
}
define void @freeze_v2i16(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
+; GFX6-SDAG-LABEL: freeze_v2i16:
+; GFX6-SDAG: ; %bb.0:
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-SDAG-NEXT: s_mov_b32 s6, 0
+; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000
+; GFX6-SDAG-NEXT: s_mov_b32 s4, s6
+; GFX6-SDAG-NEXT: s_mov_b32 s5, s6
+; GFX6-SDAG-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX6-SDAG-NEXT: buffer_store_dword v0, v[2:3], s[4:7], 0 addr64
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0)
+; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX6-GISEL-LABEL: freeze_v2i16:
+; GFX6-GISEL: ; %bb.0:
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-GISEL-NEXT: s_mov_b32 s6, 0
+; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000
+; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0
+; GFX6-GISEL-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX6-GISEL-NEXT: buffer_store_dword v0, v[2:3], s[4:7], 0 addr64
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0)
+; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-SDAG-LABEL: freeze_v2i16:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: s_mov_b32 s6, 0
+; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000
+; GFX7-SDAG-NEXT: s_mov_b32 s4, s6
+; GFX7-SDAG-NEXT: s_mov_b32 s5, s6
+; GFX7-SDAG-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX7-SDAG-NEXT: buffer_store_dword v0, v[2:3], s[4:7], 0 addr64
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: freeze_v2i16:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: s_mov_b32 s6, 0
+; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000
+; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0
+; GFX7-GISEL-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GISEL-NEXT: buffer_store_dword v0, v[2:3], s[4:7], 0 addr64
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: freeze_v2i16:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: flat_load_dword v0, v[0:1]
+; GFX8-NEXT: s_waitcnt vmcnt(0)
+; GFX8-NEXT: flat_store_dword v[2:3], v0
+; GFX8-NEXT: s_waitcnt vmcnt(0)
+; GFX8-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: freeze_v2i16:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: global_load_dword v0, v[0:1], off
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: global_store_dword v[2:3], v0, off
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
; GFX10-LABEL: freeze_v2i16:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1900,6 +5664,99 @@ define void @freeze_v2i16(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
}
define void @freeze_v3i16(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
+; GFX6-SDAG-LABEL: freeze_v3i16:
+; GFX6-SDAG: ; %bb.0:
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-SDAG-NEXT: s_mov_b32 s6, 0
+; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000
+; GFX6-SDAG-NEXT: s_mov_b32 s4, s6
+; GFX6-SDAG-NEXT: s_mov_b32 s5, s6
+; GFX6-SDAG-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX6-SDAG-NEXT: v_lshrrev_b32_e32 v4, 16, v0
+; GFX6-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX6-SDAG-NEXT: v_lshlrev_b32_e32 v4, 16, v4
+; GFX6-SDAG-NEXT: v_or_b32_e32 v0, v0, v4
+; GFX6-SDAG-NEXT: buffer_store_short v1, v[2:3], s[4:7], 0 addr64 offset:4
+; GFX6-SDAG-NEXT: buffer_store_dword v0, v[2:3], s[4:7], 0 addr64
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0)
+; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX6-GISEL-LABEL: freeze_v3i16:
+; GFX6-GISEL: ; %bb.0:
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-GISEL-NEXT: s_mov_b32 s6, 0
+; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000
+; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0
+; GFX6-GISEL-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX6-GISEL-NEXT: v_lshrrev_b32_e32 v4, 16, v0
+; GFX6-GISEL-NEXT: buffer_store_short v0, v[2:3], s[4:7], 0 addr64
+; GFX6-GISEL-NEXT: buffer_store_short v4, v[2:3], s[4:7], 0 addr64 offset:2
+; GFX6-GISEL-NEXT: buffer_store_short v1, v[2:3], s[4:7], 0 addr64 offset:4
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0)
+; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-SDAG-LABEL: freeze_v3i16:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: s_mov_b32 s6, 0
+; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000
+; GFX7-SDAG-NEXT: s_mov_b32 s4, s6
+; GFX7-SDAG-NEXT: s_mov_b32 s5, s6
+; GFX7-SDAG-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX7-SDAG-NEXT: v_lshrrev_b32_e32 v4, 16, v0
+; GFX7-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX7-SDAG-NEXT: v_lshlrev_b32_e32 v4, 16, v4
+; GFX7-SDAG-NEXT: v_or_b32_e32 v0, v0, v4
+; GFX7-SDAG-NEXT: buffer_store_short v1, v[2:3], s[4:7], 0 addr64 offset:4
+; GFX7-SDAG-NEXT: buffer_store_dword v0, v[2:3], s[4:7], 0 addr64
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: freeze_v3i16:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: s_mov_b32 s6, 0
+; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000
+; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0
+; GFX7-GISEL-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GISEL-NEXT: v_lshrrev_b32_e32 v4, 16, v0
+; GFX7-GISEL-NEXT: buffer_store_short v0, v[2:3], s[4:7], 0 addr64
+; GFX7-GISEL-NEXT: buffer_store_short v4, v[2:3], s[4:7], 0 addr64 offset:2
+; GFX7-GISEL-NEXT: buffer_store_short v1, v[2:3], s[4:7], 0 addr64 offset:4
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-GISEL-LABEL: freeze_v3i16:
+; GFX8-GISEL: ; %bb.0:
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-GISEL-NEXT: flat_load_dwordx2 v[0:1], v[0:1]
+; GFX8-GISEL-NEXT: v_add_u32_e32 v4, vcc, 2, v2
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v3, vcc
+; GFX8-GISEL-NEXT: v_add_u32_e32 v6, vcc, 4, v2
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v7, vcc, 0, v3, vcc
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX8-GISEL-NEXT: v_lshrrev_b32_e32 v8, 16, v0
+; GFX8-GISEL-NEXT: flat_store_short v[2:3], v0
+; GFX8-GISEL-NEXT: flat_store_short v[4:5], v8
+; GFX8-GISEL-NEXT: flat_store_short v[6:7], v1
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GISEL-LABEL: freeze_v3i16:
+; GFX9-GISEL: ; %bb.0:
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GISEL-NEXT: global_load_dwordx2 v[0:1], v[0:1], off
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GISEL-NEXT: global_store_short v[2:3], v0, off
+; GFX9-GISEL-NEXT: global_store_short_d16_hi v[2:3], v0, off offset:2
+; GFX9-GISEL-NEXT: global_store_short v[2:3], v1, off offset:4
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX10-SDAG-LABEL: freeze_v3i16:
; GFX10-SDAG: ; %bb.0:
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1946,6 +5803,74 @@ define void @freeze_v3i16(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
}
define void @freeze_v4i16(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
+; GFX6-SDAG-LABEL: freeze_v4i16:
+; GFX6-SDAG: ; %bb.0:
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-SDAG-NEXT: s_mov_b32 s6, 0
+; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000
+; GFX6-SDAG-NEXT: s_mov_b32 s4, s6
+; GFX6-SDAG-NEXT: s_mov_b32 s5, s6
+; GFX6-SDAG-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX6-SDAG-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0)
+; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX6-GISEL-LABEL: freeze_v4i16:
+; GFX6-GISEL: ; %bb.0:
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-GISEL-NEXT: s_mov_b32 s6, 0
+; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000
+; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0
+; GFX6-GISEL-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX6-GISEL-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0)
+; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-SDAG-LABEL: freeze_v4i16:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: s_mov_b32 s6, 0
+; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000
+; GFX7-SDAG-NEXT: s_mov_b32 s4, s6
+; GFX7-SDAG-NEXT: s_mov_b32 s5, s6
+; GFX7-SDAG-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX7-SDAG-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: freeze_v4i16:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: s_mov_b32 s6, 0
+; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000
+; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0
+; GFX7-GISEL-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GISEL-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: freeze_v4i16:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: flat_load_dwordx2 v[0:1], v[0:1]
+; GFX8-NEXT: s_waitcnt vmcnt(0)
+; GFX8-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
+; GFX8-NEXT: s_waitcnt vmcnt(0)
+; GFX8-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: freeze_v4i16:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: global_store_dwordx2 v[2:3], v[0:1], off
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
; GFX10-LABEL: freeze_v4i16:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1968,6 +5893,74 @@ define void @freeze_v4i16(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
}
define void @freeze_v8i16(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
+; GFX6-SDAG-LABEL: freeze_v8i16:
+; GFX6-SDAG: ; %bb.0:
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-SDAG-NEXT: s_mov_b32 s6, 0
+; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000
+; GFX6-SDAG-NEXT: s_mov_b32 s4, s6
+; GFX6-SDAG-NEXT: s_mov_b32 s5, s6
+; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0)
+; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX6-GISEL-LABEL: freeze_v8i16:
+; GFX6-GISEL: ; %bb.0:
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-GISEL-NEXT: s_mov_b32 s6, 0
+; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000
+; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0
+; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0)
+; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-SDAG-LABEL: freeze_v8i16:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: s_mov_b32 s6, 0
+; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000
+; GFX7-SDAG-NEXT: s_mov_b32 s4, s6
+; GFX7-SDAG-NEXT: s_mov_b32 s5, s6
+; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: freeze_v8i16:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: s_mov_b32 s6, 0
+; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000
+; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0
+; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: freeze_v8i16:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: flat_load_dwordx4 v[4:7], v[0:1]
+; GFX8-NEXT: s_waitcnt vmcnt(0)
+; GFX8-NEXT: flat_store_dwordx4 v[2:3], v[4:7]
+; GFX8-NEXT: s_waitcnt vmcnt(0)
+; GFX8-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: freeze_v8i16:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: global_load_dwordx4 v[4:7], v[0:1], off
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: global_store_dwordx4 v[2:3], v[4:7], off
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
; GFX10-LABEL: freeze_v8i16:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1990,6 +5983,96 @@ define void @freeze_v8i16(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
}
define void @freeze_v16i16(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
+; GFX6-SDAG-LABEL: freeze_v16i16:
+; GFX6-SDAG: ; %bb.0:
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-SDAG-NEXT: s_mov_b32 s6, 0
+; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000
+; GFX6-SDAG-NEXT: s_mov_b32 s4, s6
+; GFX6-SDAG-NEXT: s_mov_b32 s5, s6
+; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:16
+; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(1)
+; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 offset:16
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(1)
+; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0)
+; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX6-GISEL-LABEL: freeze_v16i16:
+; GFX6-GISEL: ; %bb.0:
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-GISEL-NEXT: s_mov_b32 s6, 0
+; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000
+; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0
+; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
+; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(1)
+; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(1)
+; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0)
+; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-SDAG-LABEL: freeze_v16i16:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: s_mov_b32 s6, 0
+; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000
+; GFX7-SDAG-NEXT: s_mov_b32 s4, s6
+; GFX7-SDAG-NEXT: s_mov_b32 s5, s6
+; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:16
+; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(1)
+; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 offset:16
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(1)
+; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: freeze_v16i16:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: s_mov_b32 s6, 0
+; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000
+; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0
+; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
+; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(1)
+; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(1)
+; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-GISEL-LABEL: freeze_v16i16:
+; GFX8-GISEL: ; %bb.0:
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-GISEL-NEXT: flat_load_dwordx4 v[4:7], v[0:1]
+; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 16, v0
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
+; GFX8-GISEL-NEXT: flat_load_dwordx4 v[8:11], v[0:1]
+; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 16, v2
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v3, vcc
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(1)
+; GFX8-GISEL-NEXT: flat_store_dwordx4 v[2:3], v[4:7]
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(1)
+; GFX8-GISEL-NEXT: flat_store_dwordx4 v[0:1], v[8:11]
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GISEL-LABEL: freeze_v16i16:
+; GFX9-GISEL: ; %bb.0:
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off
+; GFX9-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(1)
+; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(1)
+; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX10-SDAG-LABEL: freeze_v16i16:
; GFX10-SDAG: ; %bb.0:
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2044,6 +6127,74 @@ define void @freeze_v16i16(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
}
define void @freeze_f16(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
+; GFX6-SDAG-LABEL: freeze_f16:
+; GFX6-SDAG: ; %bb.0:
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-SDAG-NEXT: s_mov_b32 s6, 0
+; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000
+; GFX6-SDAG-NEXT: s_mov_b32 s4, s6
+; GFX6-SDAG-NEXT: s_mov_b32 s5, s6
+; GFX6-SDAG-NEXT: buffer_load_ushort v0, v[0:1], s[4:7], 0 addr64
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX6-SDAG-NEXT: buffer_store_short v0, v[2:3], s[4:7], 0 addr64
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0)
+; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX6-GISEL-LABEL: freeze_f16:
+; GFX6-GISEL: ; %bb.0:
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-GISEL-NEXT: s_mov_b32 s6, 0
+; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000
+; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0
+; GFX6-GISEL-NEXT: buffer_load_ushort v0, v[0:1], s[4:7], 0 addr64
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX6-GISEL-NEXT: buffer_store_short v0, v[2:3], s[4:7], 0 addr64
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0)
+; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-SDAG-LABEL: freeze_f16:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: s_mov_b32 s6, 0
+; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000
+; GFX7-SDAG-NEXT: s_mov_b32 s4, s6
+; GFX7-SDAG-NEXT: s_mov_b32 s5, s6
+; GFX7-SDAG-NEXT: buffer_load_ushort v0, v[0:1], s[4:7], 0 addr64
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX7-SDAG-NEXT: buffer_store_short v0, v[2:3], s[4:7], 0 addr64
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: freeze_f16:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: s_mov_b32 s6, 0
+; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000
+; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0
+; GFX7-GISEL-NEXT: buffer_load_ushort v0, v[0:1], s[4:7], 0 addr64
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GISEL-NEXT: buffer_store_short v0, v[2:3], s[4:7], 0 addr64
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: freeze_f16:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: flat_load_ushort v0, v[0:1]
+; GFX8-NEXT: s_waitcnt vmcnt(0)
+; GFX8-NEXT: flat_store_short v[2:3], v0
+; GFX8-NEXT: s_waitcnt vmcnt(0)
+; GFX8-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: freeze_f16:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: global_load_ushort v0, v[0:1], off
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: global_store_short v[2:3], v0, off
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
; GFX10-LABEL: freeze_f16:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2066,6 +6217,74 @@ define void @freeze_f16(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
}
define void @freeze_v2f16(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
+; GFX6-SDAG-LABEL: freeze_v2f16:
+; GFX6-SDAG: ; %bb.0:
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-SDAG-NEXT: s_mov_b32 s6, 0
+; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000
+; GFX6-SDAG-NEXT: s_mov_b32 s4, s6
+; GFX6-SDAG-NEXT: s_mov_b32 s5, s6
+; GFX6-SDAG-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX6-SDAG-NEXT: buffer_store_dword v0, v[2:3], s[4:7], 0 addr64
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0)
+; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX6-GISEL-LABEL: freeze_v2f16:
+; GFX6-GISEL: ; %bb.0:
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-GISEL-NEXT: s_mov_b32 s6, 0
+; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000
+; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0
+; GFX6-GISEL-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX6-GISEL-NEXT: buffer_store_dword v0, v[2:3], s[4:7], 0 addr64
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0)
+; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-SDAG-LABEL: freeze_v2f16:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: s_mov_b32 s6, 0
+; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000
+; GFX7-SDAG-NEXT: s_mov_b32 s4, s6
+; GFX7-SDAG-NEXT: s_mov_b32 s5, s6
+; GFX7-SDAG-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX7-SDAG-NEXT: buffer_store_dword v0, v[2:3], s[4:7], 0 addr64
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: freeze_v2f16:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: s_mov_b32 s6, 0
+; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000
+; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0
+; GFX7-GISEL-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GISEL-NEXT: buffer_store_dword v0, v[2:3], s[4:7], 0 addr64
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: freeze_v2f16:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: flat_load_dword v0, v[0:1]
+; GFX8-NEXT: s_waitcnt vmcnt(0)
+; GFX8-NEXT: flat_store_dword v[2:3], v0
+; GFX8-NEXT: s_waitcnt vmcnt(0)
+; GFX8-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: freeze_v2f16:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: global_load_dword v0, v[0:1], off
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: global_store_dword v[2:3], v0, off
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
; GFX10-LABEL: freeze_v2f16:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2088,6 +6307,99 @@ define void @freeze_v2f16(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
}
define void @freeze_v3f16(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
+; GFX6-SDAG-LABEL: freeze_v3f16:
+; GFX6-SDAG: ; %bb.0:
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-SDAG-NEXT: s_mov_b32 s6, 0
+; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000
+; GFX6-SDAG-NEXT: s_mov_b32 s4, s6
+; GFX6-SDAG-NEXT: s_mov_b32 s5, s6
+; GFX6-SDAG-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX6-SDAG-NEXT: v_and_b32_e32 v4, 0xffff, v0
+; GFX6-SDAG-NEXT: v_lshrrev_b32_e32 v0, 16, v0
+; GFX6-SDAG-NEXT: v_lshlrev_b32_e32 v0, 16, v0
+; GFX6-SDAG-NEXT: v_or_b32_e32 v0, v4, v0
+; GFX6-SDAG-NEXT: buffer_store_short v1, v[2:3], s[4:7], 0 addr64 offset:4
+; GFX6-SDAG-NEXT: buffer_store_dword v0, v[2:3], s[4:7], 0 addr64
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0)
+; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX6-GISEL-LABEL: freeze_v3f16:
+; GFX6-GISEL: ; %bb.0:
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-GISEL-NEXT: s_mov_b32 s6, 0
+; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000
+; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0
+; GFX6-GISEL-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX6-GISEL-NEXT: v_lshrrev_b32_e32 v4, 16, v0
+; GFX6-GISEL-NEXT: buffer_store_short v0, v[2:3], s[4:7], 0 addr64
+; GFX6-GISEL-NEXT: buffer_store_short v4, v[2:3], s[4:7], 0 addr64 offset:2
+; GFX6-GISEL-NEXT: buffer_store_short v1, v[2:3], s[4:7], 0 addr64 offset:4
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0)
+; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-SDAG-LABEL: freeze_v3f16:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: s_mov_b32 s6, 0
+; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000
+; GFX7-SDAG-NEXT: s_mov_b32 s4, s6
+; GFX7-SDAG-NEXT: s_mov_b32 s5, s6
+; GFX7-SDAG-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX7-SDAG-NEXT: v_and_b32_e32 v4, 0xffff, v0
+; GFX7-SDAG-NEXT: v_lshrrev_b32_e32 v0, 16, v0
+; GFX7-SDAG-NEXT: v_lshlrev_b32_e32 v0, 16, v0
+; GFX7-SDAG-NEXT: v_or_b32_e32 v0, v4, v0
+; GFX7-SDAG-NEXT: buffer_store_short v1, v[2:3], s[4:7], 0 addr64 offset:4
+; GFX7-SDAG-NEXT: buffer_store_dword v0, v[2:3], s[4:7], 0 addr64
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: freeze_v3f16:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: s_mov_b32 s6, 0
+; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000
+; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0
+; GFX7-GISEL-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GISEL-NEXT: v_lshrrev_b32_e32 v4, 16, v0
+; GFX7-GISEL-NEXT: buffer_store_short v0, v[2:3], s[4:7], 0 addr64
+; GFX7-GISEL-NEXT: buffer_store_short v4, v[2:3], s[4:7], 0 addr64 offset:2
+; GFX7-GISEL-NEXT: buffer_store_short v1, v[2:3], s[4:7], 0 addr64 offset:4
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-GISEL-LABEL: freeze_v3f16:
+; GFX8-GISEL: ; %bb.0:
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-GISEL-NEXT: flat_load_dwordx2 v[0:1], v[0:1]
+; GFX8-GISEL-NEXT: v_add_u32_e32 v4, vcc, 2, v2
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v3, vcc
+; GFX8-GISEL-NEXT: v_add_u32_e32 v6, vcc, 4, v2
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v7, vcc, 0, v3, vcc
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX8-GISEL-NEXT: v_lshrrev_b32_e32 v8, 16, v0
+; GFX8-GISEL-NEXT: flat_store_short v[2:3], v0
+; GFX8-GISEL-NEXT: flat_store_short v[4:5], v8
+; GFX8-GISEL-NEXT: flat_store_short v[6:7], v1
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GISEL-LABEL: freeze_v3f16:
+; GFX9-GISEL: ; %bb.0:
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GISEL-NEXT: global_load_dwordx2 v[0:1], v[0:1], off
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GISEL-NEXT: global_store_short v[2:3], v0, off
+; GFX9-GISEL-NEXT: global_store_short_d16_hi v[2:3], v0, off offset:2
+; GFX9-GISEL-NEXT: global_store_short v[2:3], v1, off offset:4
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX10-SDAG-LABEL: freeze_v3f16:
; GFX10-SDAG: ; %bb.0:
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2134,6 +6446,74 @@ define void @freeze_v3f16(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
}
define void @freeze_v4f16(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
+; GFX6-SDAG-LABEL: freeze_v4f16:
+; GFX6-SDAG: ; %bb.0:
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-SDAG-NEXT: s_mov_b32 s6, 0
+; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000
+; GFX6-SDAG-NEXT: s_mov_b32 s4, s6
+; GFX6-SDAG-NEXT: s_mov_b32 s5, s6
+; GFX6-SDAG-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX6-SDAG-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0)
+; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX6-GISEL-LABEL: freeze_v4f16:
+; GFX6-GISEL: ; %bb.0:
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-GISEL-NEXT: s_mov_b32 s6, 0
+; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000
+; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0
+; GFX6-GISEL-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX6-GISEL-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0)
+; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-SDAG-LABEL: freeze_v4f16:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: s_mov_b32 s6, 0
+; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000
+; GFX7-SDAG-NEXT: s_mov_b32 s4, s6
+; GFX7-SDAG-NEXT: s_mov_b32 s5, s6
+; GFX7-SDAG-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX7-SDAG-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: freeze_v4f16:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: s_mov_b32 s6, 0
+; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000
+; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0
+; GFX7-GISEL-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GISEL-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: freeze_v4f16:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: flat_load_dwordx2 v[0:1], v[0:1]
+; GFX8-NEXT: s_waitcnt vmcnt(0)
+; GFX8-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
+; GFX8-NEXT: s_waitcnt vmcnt(0)
+; GFX8-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: freeze_v4f16:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: global_store_dwordx2 v[2:3], v[0:1], off
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
; GFX10-LABEL: freeze_v4f16:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2156,6 +6536,74 @@ define void @freeze_v4f16(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
}
define void @freeze_v8f16(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
+; GFX6-SDAG-LABEL: freeze_v8f16:
+; GFX6-SDAG: ; %bb.0:
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-SDAG-NEXT: s_mov_b32 s6, 0
+; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000
+; GFX6-SDAG-NEXT: s_mov_b32 s4, s6
+; GFX6-SDAG-NEXT: s_mov_b32 s5, s6
+; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0)
+; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX6-GISEL-LABEL: freeze_v8f16:
+; GFX6-GISEL: ; %bb.0:
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-GISEL-NEXT: s_mov_b32 s6, 0
+; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000
+; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0
+; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0)
+; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-SDAG-LABEL: freeze_v8f16:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: s_mov_b32 s6, 0
+; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000
+; GFX7-SDAG-NEXT: s_mov_b32 s4, s6
+; GFX7-SDAG-NEXT: s_mov_b32 s5, s6
+; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: freeze_v8f16:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: s_mov_b32 s6, 0
+; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000
+; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0
+; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: freeze_v8f16:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: flat_load_dwordx4 v[4:7], v[0:1]
+; GFX8-NEXT: s_waitcnt vmcnt(0)
+; GFX8-NEXT: flat_store_dwordx4 v[2:3], v[4:7]
+; GFX8-NEXT: s_waitcnt vmcnt(0)
+; GFX8-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: freeze_v8f16:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: global_load_dwordx4 v[4:7], v[0:1], off
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: global_store_dwordx4 v[2:3], v[4:7], off
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
; GFX10-LABEL: freeze_v8f16:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2178,6 +6626,96 @@ define void @freeze_v8f16(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
}
define void @freeze_v16f16(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
+; GFX6-SDAG-LABEL: freeze_v16f16:
+; GFX6-SDAG: ; %bb.0:
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-SDAG-NEXT: s_mov_b32 s6, 0
+; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000
+; GFX6-SDAG-NEXT: s_mov_b32 s4, s6
+; GFX6-SDAG-NEXT: s_mov_b32 s5, s6
+; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:16
+; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(1)
+; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 offset:16
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(1)
+; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0)
+; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX6-GISEL-LABEL: freeze_v16f16:
+; GFX6-GISEL: ; %bb.0:
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-GISEL-NEXT: s_mov_b32 s6, 0
+; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000
+; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0
+; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
+; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(1)
+; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(1)
+; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0)
+; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-SDAG-LABEL: freeze_v16f16:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: s_mov_b32 s6, 0
+; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000
+; GFX7-SDAG-NEXT: s_mov_b32 s4, s6
+; GFX7-SDAG-NEXT: s_mov_b32 s5, s6
+; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:16
+; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(1)
+; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 offset:16
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(1)
+; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: freeze_v16f16:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: s_mov_b32 s6, 0
+; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000
+; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0
+; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
+; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(1)
+; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(1)
+; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-GISEL-LABEL: freeze_v16f16:
+; GFX8-GISEL: ; %bb.0:
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-GISEL-NEXT: flat_load_dwordx4 v[4:7], v[0:1]
+; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 16, v0
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
+; GFX8-GISEL-NEXT: flat_load_dwordx4 v[8:11], v[0:1]
+; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 16, v2
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v3, vcc
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(1)
+; GFX8-GISEL-NEXT: flat_store_dwordx4 v[2:3], v[4:7]
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(1)
+; GFX8-GISEL-NEXT: flat_store_dwordx4 v[0:1], v[8:11]
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GISEL-LABEL: freeze_v16f16:
+; GFX9-GISEL: ; %bb.0:
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off
+; GFX9-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(1)
+; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(1)
+; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX10-SDAG-LABEL: freeze_v16f16:
; GFX10-SDAG: ; %bb.0:
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2232,6 +6770,80 @@ define void @freeze_v16f16(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
}
define void @freeze_bf16(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
+; GFX6-SDAG-LABEL: freeze_bf16:
+; GFX6-SDAG: ; %bb.0:
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-SDAG-NEXT: s_mov_b32 s6, 0
+; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000
+; GFX6-SDAG-NEXT: s_mov_b32 s4, s6
+; GFX6-SDAG-NEXT: s_mov_b32 s5, s6
+; GFX6-SDAG-NEXT: buffer_load_ushort v0, v[0:1], s[4:7], 0 addr64
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX6-SDAG-NEXT: v_lshlrev_b32_e32 v0, 16, v0
+; GFX6-SDAG-NEXT: v_mul_f32_e32 v0, 1.0, v0
+; GFX6-SDAG-NEXT: v_lshrrev_b32_e32 v0, 16, v0
+; GFX6-SDAG-NEXT: buffer_store_short v0, v[2:3], s[4:7], 0 addr64
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0)
+; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX6-GISEL-LABEL: freeze_bf16:
+; GFX6-GISEL: ; %bb.0:
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-GISEL-NEXT: s_mov_b32 s6, 0
+; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000
+; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0
+; GFX6-GISEL-NEXT: buffer_load_ushort v0, v[0:1], s[4:7], 0 addr64
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX6-GISEL-NEXT: buffer_store_short v0, v[2:3], s[4:7], 0 addr64
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0)
+; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-SDAG-LABEL: freeze_bf16:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: s_mov_b32 s6, 0
+; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000
+; GFX7-SDAG-NEXT: s_mov_b32 s4, s6
+; GFX7-SDAG-NEXT: s_mov_b32 s5, s6
+; GFX7-SDAG-NEXT: buffer_load_ushort v0, v[0:1], s[4:7], 0 addr64
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX7-SDAG-NEXT: v_lshlrev_b32_e32 v0, 16, v0
+; GFX7-SDAG-NEXT: v_mul_f32_e32 v0, 1.0, v0
+; GFX7-SDAG-NEXT: v_lshrrev_b32_e32 v0, 16, v0
+; GFX7-SDAG-NEXT: buffer_store_short v0, v[2:3], s[4:7], 0 addr64
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: freeze_bf16:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: s_mov_b32 s6, 0
+; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000
+; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0
+; GFX7-GISEL-NEXT: buffer_load_ushort v0, v[0:1], s[4:7], 0 addr64
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GISEL-NEXT: buffer_store_short v0, v[2:3], s[4:7], 0 addr64
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: freeze_bf16:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: flat_load_ushort v0, v[0:1]
+; GFX8-NEXT: s_waitcnt vmcnt(0)
+; GFX8-NEXT: flat_store_short v[2:3], v0
+; GFX8-NEXT: s_waitcnt vmcnt(0)
+; GFX8-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: freeze_bf16:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: global_load_ushort v0, v[0:1], off
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: global_store_short v[2:3], v0, off
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
; GFX10-LABEL: freeze_bf16:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2254,6 +6866,74 @@ define void @freeze_bf16(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
}
define void @freeze_v2bf16(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
+; GFX6-SDAG-LABEL: freeze_v2bf16:
+; GFX6-SDAG: ; %bb.0:
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-SDAG-NEXT: s_mov_b32 s6, 0
+; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000
+; GFX6-SDAG-NEXT: s_mov_b32 s4, s6
+; GFX6-SDAG-NEXT: s_mov_b32 s5, s6
+; GFX6-SDAG-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX6-SDAG-NEXT: buffer_store_dword v0, v[2:3], s[4:7], 0 addr64
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0)
+; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX6-GISEL-LABEL: freeze_v2bf16:
+; GFX6-GISEL: ; %bb.0:
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-GISEL-NEXT: s_mov_b32 s6, 0
+; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000
+; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0
+; GFX6-GISEL-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX6-GISEL-NEXT: buffer_store_dword v0, v[2:3], s[4:7], 0 addr64
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0)
+; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-SDAG-LABEL: freeze_v2bf16:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: s_mov_b32 s6, 0
+; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000
+; GFX7-SDAG-NEXT: s_mov_b32 s4, s6
+; GFX7-SDAG-NEXT: s_mov_b32 s5, s6
+; GFX7-SDAG-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX7-SDAG-NEXT: buffer_store_dword v0, v[2:3], s[4:7], 0 addr64
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: freeze_v2bf16:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: s_mov_b32 s6, 0
+; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000
+; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0
+; GFX7-GISEL-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GISEL-NEXT: buffer_store_dword v0, v[2:3], s[4:7], 0 addr64
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: freeze_v2bf16:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: flat_load_dword v0, v[0:1]
+; GFX8-NEXT: s_waitcnt vmcnt(0)
+; GFX8-NEXT: flat_store_dword v[2:3], v0
+; GFX8-NEXT: s_waitcnt vmcnt(0)
+; GFX8-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: freeze_v2bf16:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: global_load_dword v0, v[0:1], off
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: global_store_dword v[2:3], v0, off
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
; GFX10-LABEL: freeze_v2bf16:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2276,6 +6956,109 @@ define void @freeze_v2bf16(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
}
define void @freeze_v3bf16(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
+; GFX6-SDAG-LABEL: freeze_v3bf16:
+; GFX6-SDAG: ; %bb.0:
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-SDAG-NEXT: s_mov_b32 s6, 0
+; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000
+; GFX6-SDAG-NEXT: s_mov_b32 s4, s6
+; GFX6-SDAG-NEXT: s_mov_b32 s5, s6
+; GFX6-SDAG-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX6-SDAG-NEXT: v_and_b32_e32 v4, 0xffff0000, v0
+; GFX6-SDAG-NEXT: v_lshlrev_b32_e32 v1, 16, v1
+; GFX6-SDAG-NEXT: v_lshlrev_b32_e32 v0, 16, v0
+; GFX6-SDAG-NEXT: v_mul_f32_e32 v4, 1.0, v4
+; GFX6-SDAG-NEXT: v_mul_f32_e32 v1, 1.0, v1
+; GFX6-SDAG-NEXT: v_mul_f32_e32 v0, 1.0, v0
+; GFX6-SDAG-NEXT: v_lshrrev_b32_e32 v4, 16, v4
+; GFX6-SDAG-NEXT: v_lshrrev_b32_e32 v1, 16, v1
+; GFX6-SDAG-NEXT: v_alignbit_b32 v0, v4, v0, 16
+; GFX6-SDAG-NEXT: buffer_store_short v1, v[2:3], s[4:7], 0 addr64 offset:4
+; GFX6-SDAG-NEXT: buffer_store_dword v0, v[2:3], s[4:7], 0 addr64
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0)
+; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX6-GISEL-LABEL: freeze_v3bf16:
+; GFX6-GISEL: ; %bb.0:
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-GISEL-NEXT: s_mov_b32 s6, 0
+; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000
+; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0
+; GFX6-GISEL-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX6-GISEL-NEXT: v_lshrrev_b32_e32 v4, 16, v0
+; GFX6-GISEL-NEXT: buffer_store_short v0, v[2:3], s[4:7], 0 addr64
+; GFX6-GISEL-NEXT: buffer_store_short v4, v[2:3], s[4:7], 0 addr64 offset:2
+; GFX6-GISEL-NEXT: buffer_store_short v1, v[2:3], s[4:7], 0 addr64 offset:4
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0)
+; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-SDAG-LABEL: freeze_v3bf16:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: s_mov_b32 s6, 0
+; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000
+; GFX7-SDAG-NEXT: s_mov_b32 s4, s6
+; GFX7-SDAG-NEXT: s_mov_b32 s5, s6
+; GFX7-SDAG-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX7-SDAG-NEXT: v_and_b32_e32 v4, 0xffff0000, v0
+; GFX7-SDAG-NEXT: v_lshlrev_b32_e32 v1, 16, v1
+; GFX7-SDAG-NEXT: v_lshlrev_b32_e32 v0, 16, v0
+; GFX7-SDAG-NEXT: v_mul_f32_e32 v4, 1.0, v4
+; GFX7-SDAG-NEXT: v_mul_f32_e32 v1, 1.0, v1
+; GFX7-SDAG-NEXT: v_mul_f32_e32 v0, 1.0, v0
+; GFX7-SDAG-NEXT: v_lshrrev_b32_e32 v4, 16, v4
+; GFX7-SDAG-NEXT: v_lshrrev_b32_e32 v1, 16, v1
+; GFX7-SDAG-NEXT: v_alignbit_b32 v0, v4, v0, 16
+; GFX7-SDAG-NEXT: buffer_store_short v1, v[2:3], s[4:7], 0 addr64 offset:4
+; GFX7-SDAG-NEXT: buffer_store_dword v0, v[2:3], s[4:7], 0 addr64
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: freeze_v3bf16:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: s_mov_b32 s6, 0
+; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000
+; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0
+; GFX7-GISEL-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GISEL-NEXT: v_lshrrev_b32_e32 v4, 16, v0
+; GFX7-GISEL-NEXT: buffer_store_short v0, v[2:3], s[4:7], 0 addr64
+; GFX7-GISEL-NEXT: buffer_store_short v4, v[2:3], s[4:7], 0 addr64 offset:2
+; GFX7-GISEL-NEXT: buffer_store_short v1, v[2:3], s[4:7], 0 addr64 offset:4
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-GISEL-LABEL: freeze_v3bf16:
+; GFX8-GISEL: ; %bb.0:
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-GISEL-NEXT: flat_load_dwordx2 v[0:1], v[0:1]
+; GFX8-GISEL-NEXT: v_add_u32_e32 v4, vcc, 2, v2
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v3, vcc
+; GFX8-GISEL-NEXT: v_add_u32_e32 v6, vcc, 4, v2
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v7, vcc, 0, v3, vcc
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX8-GISEL-NEXT: v_lshrrev_b32_e32 v8, 16, v0
+; GFX8-GISEL-NEXT: flat_store_short v[2:3], v0
+; GFX8-GISEL-NEXT: flat_store_short v[4:5], v8
+; GFX8-GISEL-NEXT: flat_store_short v[6:7], v1
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GISEL-LABEL: freeze_v3bf16:
+; GFX9-GISEL: ; %bb.0:
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GISEL-NEXT: global_load_dwordx2 v[0:1], v[0:1], off
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GISEL-NEXT: global_store_short v[2:3], v0, off
+; GFX9-GISEL-NEXT: global_store_short_d16_hi v[2:3], v0, off offset:2
+; GFX9-GISEL-NEXT: global_store_short v[2:3], v1, off offset:4
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX10-SDAG-LABEL: freeze_v3bf16:
; GFX10-SDAG: ; %bb.0:
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2322,6 +7105,74 @@ define void @freeze_v3bf16(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
}
define void @freeze_v4bf16(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
+; GFX6-SDAG-LABEL: freeze_v4bf16:
+; GFX6-SDAG: ; %bb.0:
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-SDAG-NEXT: s_mov_b32 s6, 0
+; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000
+; GFX6-SDAG-NEXT: s_mov_b32 s4, s6
+; GFX6-SDAG-NEXT: s_mov_b32 s5, s6
+; GFX6-SDAG-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX6-SDAG-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0)
+; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX6-GISEL-LABEL: freeze_v4bf16:
+; GFX6-GISEL: ; %bb.0:
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-GISEL-NEXT: s_mov_b32 s6, 0
+; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000
+; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0
+; GFX6-GISEL-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX6-GISEL-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0)
+; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-SDAG-LABEL: freeze_v4bf16:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: s_mov_b32 s6, 0
+; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000
+; GFX7-SDAG-NEXT: s_mov_b32 s4, s6
+; GFX7-SDAG-NEXT: s_mov_b32 s5, s6
+; GFX7-SDAG-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX7-SDAG-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: freeze_v4bf16:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: s_mov_b32 s6, 0
+; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000
+; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0
+; GFX7-GISEL-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GISEL-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: freeze_v4bf16:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: flat_load_dwordx2 v[0:1], v[0:1]
+; GFX8-NEXT: s_waitcnt vmcnt(0)
+; GFX8-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
+; GFX8-NEXT: s_waitcnt vmcnt(0)
+; GFX8-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: freeze_v4bf16:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: global_store_dwordx2 v[2:3], v[0:1], off
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
; GFX10-LABEL: freeze_v4bf16:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2344,6 +7195,74 @@ define void @freeze_v4bf16(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
}
define void @freeze_v8bf16(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
+; GFX6-SDAG-LABEL: freeze_v8bf16:
+; GFX6-SDAG: ; %bb.0:
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-SDAG-NEXT: s_mov_b32 s6, 0
+; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000
+; GFX6-SDAG-NEXT: s_mov_b32 s4, s6
+; GFX6-SDAG-NEXT: s_mov_b32 s5, s6
+; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0)
+; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX6-GISEL-LABEL: freeze_v8bf16:
+; GFX6-GISEL: ; %bb.0:
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-GISEL-NEXT: s_mov_b32 s6, 0
+; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000
+; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0
+; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0)
+; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-SDAG-LABEL: freeze_v8bf16:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: s_mov_b32 s6, 0
+; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000
+; GFX7-SDAG-NEXT: s_mov_b32 s4, s6
+; GFX7-SDAG-NEXT: s_mov_b32 s5, s6
+; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: freeze_v8bf16:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: s_mov_b32 s6, 0
+; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000
+; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0
+; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: freeze_v8bf16:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: flat_load_dwordx4 v[4:7], v[0:1]
+; GFX8-NEXT: s_waitcnt vmcnt(0)
+; GFX8-NEXT: flat_store_dwordx4 v[2:3], v[4:7]
+; GFX8-NEXT: s_waitcnt vmcnt(0)
+; GFX8-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: freeze_v8bf16:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: global_load_dwordx4 v[4:7], v[0:1], off
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: global_store_dwordx4 v[2:3], v[4:7], off
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
; GFX10-LABEL: freeze_v8bf16:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2366,6 +7285,74 @@ define void @freeze_v8bf16(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
}
define void @freeze_f64(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
+; GFX6-SDAG-LABEL: freeze_f64:
+; GFX6-SDAG: ; %bb.0:
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-SDAG-NEXT: s_mov_b32 s6, 0
+; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000
+; GFX6-SDAG-NEXT: s_mov_b32 s4, s6
+; GFX6-SDAG-NEXT: s_mov_b32 s5, s6
+; GFX6-SDAG-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX6-SDAG-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0)
+; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX6-GISEL-LABEL: freeze_f64:
+; GFX6-GISEL: ; %bb.0:
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-GISEL-NEXT: s_mov_b32 s6, 0
+; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000
+; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0
+; GFX6-GISEL-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX6-GISEL-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0)
+; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-SDAG-LABEL: freeze_f64:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: s_mov_b32 s6, 0
+; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000
+; GFX7-SDAG-NEXT: s_mov_b32 s4, s6
+; GFX7-SDAG-NEXT: s_mov_b32 s5, s6
+; GFX7-SDAG-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX7-SDAG-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: freeze_f64:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: s_mov_b32 s6, 0
+; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000
+; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0
+; GFX7-GISEL-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GISEL-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: freeze_f64:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: flat_load_dwordx2 v[0:1], v[0:1]
+; GFX8-NEXT: s_waitcnt vmcnt(0)
+; GFX8-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
+; GFX8-NEXT: s_waitcnt vmcnt(0)
+; GFX8-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: freeze_f64:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: global_store_dwordx2 v[2:3], v[0:1], off
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
; GFX10-LABEL: freeze_f64:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2388,6 +7375,74 @@ define void @freeze_f64(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
}
define void @freeze_v2f64(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
+; GFX6-SDAG-LABEL: freeze_v2f64:
+; GFX6-SDAG: ; %bb.0:
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-SDAG-NEXT: s_mov_b32 s6, 0
+; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000
+; GFX6-SDAG-NEXT: s_mov_b32 s4, s6
+; GFX6-SDAG-NEXT: s_mov_b32 s5, s6
+; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0)
+; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX6-GISEL-LABEL: freeze_v2f64:
+; GFX6-GISEL: ; %bb.0:
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-GISEL-NEXT: s_mov_b32 s6, 0
+; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000
+; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0
+; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0)
+; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-SDAG-LABEL: freeze_v2f64:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: s_mov_b32 s6, 0
+; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000
+; GFX7-SDAG-NEXT: s_mov_b32 s4, s6
+; GFX7-SDAG-NEXT: s_mov_b32 s5, s6
+; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: freeze_v2f64:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: s_mov_b32 s6, 0
+; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000
+; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0
+; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: freeze_v2f64:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: flat_load_dwordx4 v[4:7], v[0:1]
+; GFX8-NEXT: s_waitcnt vmcnt(0)
+; GFX8-NEXT: flat_store_dwordx4 v[2:3], v[4:7]
+; GFX8-NEXT: s_waitcnt vmcnt(0)
+; GFX8-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: freeze_v2f64:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: global_load_dwordx4 v[4:7], v[0:1], off
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: global_store_dwordx4 v[2:3], v[4:7], off
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
; GFX10-LABEL: freeze_v2f64:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2410,6 +7465,96 @@ define void @freeze_v2f64(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
}
define void @freeze_v3f64(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
+; GFX6-SDAG-LABEL: freeze_v3f64:
+; GFX6-SDAG: ; %bb.0:
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-SDAG-NEXT: s_mov_b32 s6, 0
+; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000
+; GFX6-SDAG-NEXT: s_mov_b32 s4, s6
+; GFX6-SDAG-NEXT: s_mov_b32 s5, s6
+; GFX6-SDAG-NEXT: buffer_load_dwordx2 v[8:9], v[0:1], s[4:7], 0 addr64 offset:16
+; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(1)
+; GFX6-SDAG-NEXT: buffer_store_dwordx2 v[8:9], v[2:3], s[4:7], 0 addr64 offset:16
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(1)
+; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0)
+; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX6-GISEL-LABEL: freeze_v3f64:
+; GFX6-GISEL: ; %bb.0:
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-GISEL-NEXT: s_mov_b32 s6, 0
+; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000
+; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0
+; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
+; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(1)
+; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(1)
+; GFX6-GISEL-NEXT: buffer_store_dwordx2 v[8:9], v[2:3], s[4:7], 0 addr64 offset:16
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0)
+; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-SDAG-LABEL: freeze_v3f64:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: s_mov_b32 s6, 0
+; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000
+; GFX7-SDAG-NEXT: s_mov_b32 s4, s6
+; GFX7-SDAG-NEXT: s_mov_b32 s5, s6
+; GFX7-SDAG-NEXT: buffer_load_dwordx2 v[8:9], v[0:1], s[4:7], 0 addr64 offset:16
+; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(1)
+; GFX7-SDAG-NEXT: buffer_store_dwordx2 v[8:9], v[2:3], s[4:7], 0 addr64 offset:16
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(1)
+; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: freeze_v3f64:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: s_mov_b32 s6, 0
+; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000
+; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0
+; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
+; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(1)
+; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(1)
+; GFX7-GISEL-NEXT: buffer_store_dwordx2 v[8:9], v[2:3], s[4:7], 0 addr64 offset:16
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-GISEL-LABEL: freeze_v3f64:
+; GFX8-GISEL: ; %bb.0:
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-GISEL-NEXT: flat_load_dwordx4 v[4:7], v[0:1]
+; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 16, v0
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
+; GFX8-GISEL-NEXT: flat_load_dwordx4 v[8:11], v[0:1]
+; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 16, v2
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v3, vcc
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(1)
+; GFX8-GISEL-NEXT: flat_store_dwordx4 v[2:3], v[4:7]
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(1)
+; GFX8-GISEL-NEXT: flat_store_dwordx2 v[0:1], v[8:9]
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GISEL-LABEL: freeze_v3f64:
+; GFX9-GISEL: ; %bb.0:
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off
+; GFX9-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(1)
+; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(1)
+; GFX9-GISEL-NEXT: global_store_dwordx2 v[2:3], v[8:9], off offset:16
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX10-SDAG-LABEL: freeze_v3f64:
; GFX10-SDAG: ; %bb.0:
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2464,6 +7609,96 @@ define void @freeze_v3f64(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
}
define void @freeze_v4f64(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
+; GFX6-SDAG-LABEL: freeze_v4f64:
+; GFX6-SDAG: ; %bb.0:
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-SDAG-NEXT: s_mov_b32 s6, 0
+; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000
+; GFX6-SDAG-NEXT: s_mov_b32 s4, s6
+; GFX6-SDAG-NEXT: s_mov_b32 s5, s6
+; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:16
+; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(1)
+; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 offset:16
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(1)
+; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0)
+; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX6-GISEL-LABEL: freeze_v4f64:
+; GFX6-GISEL: ; %bb.0:
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-GISEL-NEXT: s_mov_b32 s6, 0
+; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000
+; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0
+; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
+; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(1)
+; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(1)
+; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0)
+; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-SDAG-LABEL: freeze_v4f64:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: s_mov_b32 s6, 0
+; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000
+; GFX7-SDAG-NEXT: s_mov_b32 s4, s6
+; GFX7-SDAG-NEXT: s_mov_b32 s5, s6
+; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:16
+; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(1)
+; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 offset:16
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(1)
+; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: freeze_v4f64:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: s_mov_b32 s6, 0
+; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000
+; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0
+; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
+; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(1)
+; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(1)
+; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-GISEL-LABEL: freeze_v4f64:
+; GFX8-GISEL: ; %bb.0:
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-GISEL-NEXT: flat_load_dwordx4 v[4:7], v[0:1]
+; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 16, v0
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
+; GFX8-GISEL-NEXT: flat_load_dwordx4 v[8:11], v[0:1]
+; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 16, v2
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v3, vcc
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(1)
+; GFX8-GISEL-NEXT: flat_store_dwordx4 v[2:3], v[4:7]
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(1)
+; GFX8-GISEL-NEXT: flat_store_dwordx4 v[0:1], v[8:11]
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GISEL-LABEL: freeze_v4f64:
+; GFX9-GISEL: ; %bb.0:
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off
+; GFX9-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(1)
+; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(1)
+; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX10-SDAG-LABEL: freeze_v4f64:
; GFX10-SDAG: ; %bb.0:
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2518,6 +7753,141 @@ define void @freeze_v4f64(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
}
define void @freeze_v8f64(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
+; GFX6-SDAG-LABEL: freeze_v8f64:
+; GFX6-SDAG: ; %bb.0:
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-SDAG-NEXT: s_mov_b32 s6, 0
+; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000
+; GFX6-SDAG-NEXT: s_mov_b32 s4, s6
+; GFX6-SDAG-NEXT: s_mov_b32 s5, s6
+; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:32
+; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:48
+; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64
+; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:16
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(3)
+; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 offset:32
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(3)
+; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:48
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(3)
+; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(3)
+; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:16
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0)
+; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX6-GISEL-LABEL: freeze_v8f64:
+; GFX6-GISEL: ; %bb.0:
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-GISEL-NEXT: s_mov_b32 s6, 0
+; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000
+; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0
+; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
+; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16
+; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:32
+; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:48
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(3)
+; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(3)
+; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(3)
+; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:32
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(3)
+; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:48
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0)
+; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-SDAG-LABEL: freeze_v8f64:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: s_mov_b32 s6, 0
+; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000
+; GFX7-SDAG-NEXT: s_mov_b32 s4, s6
+; GFX7-SDAG-NEXT: s_mov_b32 s5, s6
+; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:32
+; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:48
+; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64
+; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:16
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(3)
+; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 offset:32
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(3)
+; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:48
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(3)
+; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(3)
+; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:16
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: freeze_v8f64:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: s_mov_b32 s6, 0
+; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000
+; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0
+; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
+; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16
+; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:32
+; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:48
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(3)
+; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(3)
+; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(3)
+; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:32
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(3)
+; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:48
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-GISEL-LABEL: freeze_v8f64:
+; GFX8-GISEL: ; %bb.0:
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-GISEL-NEXT: v_add_u32_e32 v8, vcc, 16, v0
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v9, vcc, 0, v1, vcc
+; GFX8-GISEL-NEXT: v_add_u32_e32 v12, vcc, 32, v0
+; GFX8-GISEL-NEXT: flat_load_dwordx4 v[4:7], v[0:1]
+; GFX8-GISEL-NEXT: flat_load_dwordx4 v[8:11], v[8:9]
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v13, vcc, 0, v1, vcc
+; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 48, v0
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
+; GFX8-GISEL-NEXT: flat_load_dwordx4 v[12:15], v[12:13]
+; GFX8-GISEL-NEXT: flat_load_dwordx4 v[16:19], v[0:1]
+; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 16, v2
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v3, vcc
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(3)
+; GFX8-GISEL-NEXT: flat_store_dwordx4 v[2:3], v[4:7]
+; GFX8-GISEL-NEXT: s_nop 0
+; GFX8-GISEL-NEXT: v_add_u32_e32 v4, vcc, 32, v2
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v3, vcc
+; GFX8-GISEL-NEXT: v_add_u32_e32 v2, vcc, 48, v2
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(3)
+; GFX8-GISEL-NEXT: flat_store_dwordx4 v[0:1], v[8:11]
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(3)
+; GFX8-GISEL-NEXT: flat_store_dwordx4 v[4:5], v[12:15]
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(3)
+; GFX8-GISEL-NEXT: flat_store_dwordx4 v[2:3], v[16:19]
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GISEL-LABEL: freeze_v8f64:
+; GFX9-GISEL: ; %bb.0:
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off
+; GFX9-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16
+; GFX9-GISEL-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:32
+; GFX9-GISEL-NEXT: global_load_dwordx4 v[16:19], v[0:1], off offset:48
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(3)
+; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(3)
+; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(3)
+; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:32
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(3)
+; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[16:19], off offset:48
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX10-SDAG-LABEL: freeze_v8f64:
; GFX10-SDAG: ; %bb.0:
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2596,6 +7966,74 @@ define void @freeze_v8f64(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
}
define void @freeze_p0(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
+; GFX6-SDAG-LABEL: freeze_p0:
+; GFX6-SDAG: ; %bb.0:
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-SDAG-NEXT: s_mov_b32 s6, 0
+; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000
+; GFX6-SDAG-NEXT: s_mov_b32 s4, s6
+; GFX6-SDAG-NEXT: s_mov_b32 s5, s6
+; GFX6-SDAG-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX6-SDAG-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0)
+; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX6-GISEL-LABEL: freeze_p0:
+; GFX6-GISEL: ; %bb.0:
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-GISEL-NEXT: s_mov_b32 s6, 0
+; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000
+; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0
+; GFX6-GISEL-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX6-GISEL-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0)
+; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-SDAG-LABEL: freeze_p0:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: s_mov_b32 s6, 0
+; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000
+; GFX7-SDAG-NEXT: s_mov_b32 s4, s6
+; GFX7-SDAG-NEXT: s_mov_b32 s5, s6
+; GFX7-SDAG-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX7-SDAG-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: freeze_p0:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: s_mov_b32 s6, 0
+; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000
+; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0
+; GFX7-GISEL-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GISEL-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: freeze_p0:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: flat_load_dwordx2 v[0:1], v[0:1]
+; GFX8-NEXT: s_waitcnt vmcnt(0)
+; GFX8-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
+; GFX8-NEXT: s_waitcnt vmcnt(0)
+; GFX8-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: freeze_p0:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: global_store_dwordx2 v[2:3], v[0:1], off
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
; GFX10-LABEL: freeze_p0:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2618,6 +8056,74 @@ define void @freeze_p0(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
}
define void @freeze_v2p0(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
+; GFX6-SDAG-LABEL: freeze_v2p0:
+; GFX6-SDAG: ; %bb.0:
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-SDAG-NEXT: s_mov_b32 s6, 0
+; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000
+; GFX6-SDAG-NEXT: s_mov_b32 s4, s6
+; GFX6-SDAG-NEXT: s_mov_b32 s5, s6
+; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0)
+; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX6-GISEL-LABEL: freeze_v2p0:
+; GFX6-GISEL: ; %bb.0:
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-GISEL-NEXT: s_mov_b32 s6, 0
+; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000
+; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0
+; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0)
+; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-SDAG-LABEL: freeze_v2p0:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: s_mov_b32 s6, 0
+; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000
+; GFX7-SDAG-NEXT: s_mov_b32 s4, s6
+; GFX7-SDAG-NEXT: s_mov_b32 s5, s6
+; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: freeze_v2p0:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: s_mov_b32 s6, 0
+; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000
+; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0
+; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: freeze_v2p0:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: flat_load_dwordx4 v[4:7], v[0:1]
+; GFX8-NEXT: s_waitcnt vmcnt(0)
+; GFX8-NEXT: flat_store_dwordx4 v[2:3], v[4:7]
+; GFX8-NEXT: s_waitcnt vmcnt(0)
+; GFX8-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: freeze_v2p0:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: global_load_dwordx4 v[4:7], v[0:1], off
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: global_store_dwordx4 v[2:3], v[4:7], off
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
; GFX10-LABEL: freeze_v2p0:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2640,6 +8146,105 @@ define void @freeze_v2p0(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
}
define void @freeze_v3p0(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
+; GFX6-SDAG-LABEL: freeze_v3p0:
+; GFX6-SDAG: ; %bb.0:
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-SDAG-NEXT: s_mov_b32 s6, 0
+; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000
+; GFX6-SDAG-NEXT: s_mov_b32 s4, s6
+; GFX6-SDAG-NEXT: s_mov_b32 s5, s6
+; GFX6-SDAG-NEXT: buffer_load_dwordx2 v[8:9], v[0:1], s[4:7], 0 addr64 offset:16
+; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(1)
+; GFX6-SDAG-NEXT: buffer_store_dwordx2 v[8:9], v[2:3], s[4:7], 0 addr64 offset:16
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(1)
+; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0)
+; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX6-GISEL-LABEL: freeze_v3p0:
+; GFX6-GISEL: ; %bb.0:
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-GISEL-NEXT: s_mov_b32 s6, 0
+; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000
+; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0
+; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:16
+; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[6:9], v[0:1], s[4:7], 0 addr64
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(1)
+; GFX6-GISEL-NEXT: v_mov_b32_e32 v0, v4
+; GFX6-GISEL-NEXT: v_mov_b32_e32 v1, v5
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[6:9], v[2:3], s[4:7], 0 addr64
+; GFX6-GISEL-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64 offset:16
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0)
+; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-SDAG-LABEL: freeze_v3p0:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: s_mov_b32 s6, 0
+; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000
+; GFX7-SDAG-NEXT: s_mov_b32 s4, s6
+; GFX7-SDAG-NEXT: s_mov_b32 s5, s6
+; GFX7-SDAG-NEXT: buffer_load_dwordx2 v[8:9], v[0:1], s[4:7], 0 addr64 offset:16
+; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(1)
+; GFX7-SDAG-NEXT: buffer_store_dwordx2 v[8:9], v[2:3], s[4:7], 0 addr64 offset:16
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(1)
+; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: freeze_v3p0:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: s_mov_b32 s6, 0
+; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000
+; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0
+; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:16
+; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[6:9], v[0:1], s[4:7], 0 addr64
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(1)
+; GFX7-GISEL-NEXT: v_mov_b32_e32 v0, v4
+; GFX7-GISEL-NEXT: v_mov_b32_e32 v1, v5
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[6:9], v[2:3], s[4:7], 0 addr64
+; GFX7-GISEL-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64 offset:16
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-GISEL-LABEL: freeze_v3p0:
+; GFX8-GISEL: ; %bb.0:
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-GISEL-NEXT: v_add_u32_e32 v8, vcc, 16, v0
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v9, vcc, 0, v1, vcc
+; GFX8-GISEL-NEXT: flat_load_dwordx4 v[4:7], v[0:1]
+; GFX8-GISEL-NEXT: flat_load_dwordx4 v[8:11], v[8:9]
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(1)
+; GFX8-GISEL-NEXT: flat_store_dwordx4 v[2:3], v[4:7]
+; GFX8-GISEL-NEXT: v_add_u32_e32 v2, vcc, 16, v2
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(1)
+; GFX8-GISEL-NEXT: v_mov_b32_e32 v0, v8
+; GFX8-GISEL-NEXT: v_mov_b32_e32 v1, v9
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc
+; GFX8-GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GISEL-LABEL: freeze_v3p0:
+; GFX9-GISEL: ; %bb.0:
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off offset:16
+; GFX9-GISEL-NEXT: global_load_dwordx4 v[6:9], v[0:1], off
+; GFX9-GISEL-NEXT: ; kill: killed $vgpr0 killed $vgpr1
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(1)
+; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, v4
+; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, v5
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[6:9], off
+; GFX9-GISEL-NEXT: global_store_dwordx2 v[2:3], v[0:1], off offset:16
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX10-SDAG-LABEL: freeze_v3p0:
; GFX10-SDAG: ; %bb.0:
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2699,6 +8304,96 @@ define void @freeze_v3p0(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
}
define void @freeze_v4p0(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
+; GFX6-SDAG-LABEL: freeze_v4p0:
+; GFX6-SDAG: ; %bb.0:
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-SDAG-NEXT: s_mov_b32 s6, 0
+; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000
+; GFX6-SDAG-NEXT: s_mov_b32 s4, s6
+; GFX6-SDAG-NEXT: s_mov_b32 s5, s6
+; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:16
+; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(1)
+; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 offset:16
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(1)
+; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0)
+; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX6-GISEL-LABEL: freeze_v4p0:
+; GFX6-GISEL: ; %bb.0:
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-GISEL-NEXT: s_mov_b32 s6, 0
+; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000
+; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0
+; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
+; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(1)
+; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(1)
+; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0)
+; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-SDAG-LABEL: freeze_v4p0:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: s_mov_b32 s6, 0
+; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000
+; GFX7-SDAG-NEXT: s_mov_b32 s4, s6
+; GFX7-SDAG-NEXT: s_mov_b32 s5, s6
+; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:16
+; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(1)
+; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 offset:16
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(1)
+; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: freeze_v4p0:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: s_mov_b32 s6, 0
+; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000
+; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0
+; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
+; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(1)
+; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(1)
+; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-GISEL-LABEL: freeze_v4p0:
+; GFX8-GISEL: ; %bb.0:
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-GISEL-NEXT: flat_load_dwordx4 v[4:7], v[0:1]
+; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 16, v0
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
+; GFX8-GISEL-NEXT: flat_load_dwordx4 v[8:11], v[0:1]
+; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 16, v2
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v3, vcc
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(1)
+; GFX8-GISEL-NEXT: flat_store_dwordx4 v[2:3], v[4:7]
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(1)
+; GFX8-GISEL-NEXT: flat_store_dwordx4 v[0:1], v[8:11]
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GISEL-LABEL: freeze_v4p0:
+; GFX9-GISEL: ; %bb.0:
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off
+; GFX9-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(1)
+; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(1)
+; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX10-SDAG-LABEL: freeze_v4p0:
; GFX10-SDAG: ; %bb.0:
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2753,6 +8448,141 @@ define void @freeze_v4p0(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
}
define void @freeze_v8p0(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
+; GFX6-SDAG-LABEL: freeze_v8p0:
+; GFX6-SDAG: ; %bb.0:
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-SDAG-NEXT: s_mov_b32 s6, 0
+; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000
+; GFX6-SDAG-NEXT: s_mov_b32 s4, s6
+; GFX6-SDAG-NEXT: s_mov_b32 s5, s6
+; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:32
+; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:48
+; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64
+; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:16
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(3)
+; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 offset:32
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(3)
+; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:48
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(3)
+; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(3)
+; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:16
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0)
+; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX6-GISEL-LABEL: freeze_v8p0:
+; GFX6-GISEL: ; %bb.0:
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-GISEL-NEXT: s_mov_b32 s6, 0
+; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000
+; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0
+; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
+; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16
+; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:32
+; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:48
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(3)
+; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(3)
+; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(3)
+; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:32
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(3)
+; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:48
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0)
+; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-SDAG-LABEL: freeze_v8p0:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: s_mov_b32 s6, 0
+; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000
+; GFX7-SDAG-NEXT: s_mov_b32 s4, s6
+; GFX7-SDAG-NEXT: s_mov_b32 s5, s6
+; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:32
+; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:48
+; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64
+; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:16
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(3)
+; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 offset:32
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(3)
+; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:48
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(3)
+; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(3)
+; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:16
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: freeze_v8p0:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: s_mov_b32 s6, 0
+; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000
+; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0
+; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
+; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16
+; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:32
+; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:48
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(3)
+; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(3)
+; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(3)
+; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:32
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(3)
+; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:48
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-GISEL-LABEL: freeze_v8p0:
+; GFX8-GISEL: ; %bb.0:
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-GISEL-NEXT: v_add_u32_e32 v8, vcc, 16, v0
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v9, vcc, 0, v1, vcc
+; GFX8-GISEL-NEXT: v_add_u32_e32 v12, vcc, 32, v0
+; GFX8-GISEL-NEXT: flat_load_dwordx4 v[4:7], v[0:1]
+; GFX8-GISEL-NEXT: flat_load_dwordx4 v[8:11], v[8:9]
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v13, vcc, 0, v1, vcc
+; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 48, v0
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
+; GFX8-GISEL-NEXT: flat_load_dwordx4 v[12:15], v[12:13]
+; GFX8-GISEL-NEXT: flat_load_dwordx4 v[16:19], v[0:1]
+; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 16, v2
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v3, vcc
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(3)
+; GFX8-GISEL-NEXT: flat_store_dwordx4 v[2:3], v[4:7]
+; GFX8-GISEL-NEXT: s_nop 0
+; GFX8-GISEL-NEXT: v_add_u32_e32 v4, vcc, 32, v2
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v3, vcc
+; GFX8-GISEL-NEXT: v_add_u32_e32 v2, vcc, 48, v2
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(3)
+; GFX8-GISEL-NEXT: flat_store_dwordx4 v[0:1], v[8:11]
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(3)
+; GFX8-GISEL-NEXT: flat_store_dwordx4 v[4:5], v[12:15]
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(3)
+; GFX8-GISEL-NEXT: flat_store_dwordx4 v[2:3], v[16:19]
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GISEL-LABEL: freeze_v8p0:
+; GFX9-GISEL: ; %bb.0:
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off
+; GFX9-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16
+; GFX9-GISEL-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:32
+; GFX9-GISEL-NEXT: global_load_dwordx4 v[16:19], v[0:1], off offset:48
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(3)
+; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(3)
+; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(3)
+; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:32
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(3)
+; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[16:19], off offset:48
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX10-SDAG-LABEL: freeze_v8p0:
; GFX10-SDAG: ; %bb.0:
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2831,6 +8661,233 @@ define void @freeze_v8p0(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
}
define void @freeze_v16p0(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
+; GFX6-SDAG-LABEL: freeze_v16p0:
+; GFX6-SDAG: ; %bb.0:
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-SDAG-NEXT: s_mov_b32 s6, 0
+; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000
+; GFX6-SDAG-NEXT: s_mov_b32 s4, s6
+; GFX6-SDAG-NEXT: s_mov_b32 s5, s6
+; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:96
+; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:112
+; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:64
+; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:80
+; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[20:23], v[0:1], s[4:7], 0 addr64 offset:32
+; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[24:27], v[0:1], s[4:7], 0 addr64 offset:48
+; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[28:31], v[0:1], s[4:7], 0 addr64
+; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[32:35], v[0:1], s[4:7], 0 addr64 offset:16
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(7)
+; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 offset:96
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(7)
+; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:112
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(7)
+; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:64
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(7)
+; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:80
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(7)
+; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[20:23], v[2:3], s[4:7], 0 addr64 offset:32
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(7)
+; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[24:27], v[2:3], s[4:7], 0 addr64 offset:48
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(7)
+; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[28:31], v[2:3], s[4:7], 0 addr64
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(7)
+; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[32:35], v[2:3], s[4:7], 0 addr64 offset:16
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0)
+; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX6-GISEL-LABEL: freeze_v16p0:
+; GFX6-GISEL: ; %bb.0:
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-GISEL-NEXT: s_mov_b32 s6, 0
+; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000
+; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0
+; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
+; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16
+; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:32
+; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:48
+; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[20:23], v[0:1], s[4:7], 0 addr64 offset:64
+; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[24:27], v[0:1], s[4:7], 0 addr64 offset:80
+; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[28:31], v[0:1], s[4:7], 0 addr64 offset:96
+; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[32:35], v[0:1], s[4:7], 0 addr64 offset:112
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(7)
+; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(7)
+; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(7)
+; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:32
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(7)
+; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:48
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(7)
+; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[20:23], v[2:3], s[4:7], 0 addr64 offset:64
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(7)
+; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[24:27], v[2:3], s[4:7], 0 addr64 offset:80
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(7)
+; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[28:31], v[2:3], s[4:7], 0 addr64 offset:96
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(7)
+; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[32:35], v[2:3], s[4:7], 0 addr64 offset:112
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0)
+; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-SDAG-LABEL: freeze_v16p0:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: s_mov_b32 s6, 0
+; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000
+; GFX7-SDAG-NEXT: s_mov_b32 s4, s6
+; GFX7-SDAG-NEXT: s_mov_b32 s5, s6
+; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:96
+; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:112
+; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:64
+; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:80
+; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[20:23], v[0:1], s[4:7], 0 addr64 offset:32
+; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[24:27], v[0:1], s[4:7], 0 addr64 offset:48
+; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[28:31], v[0:1], s[4:7], 0 addr64
+; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[32:35], v[0:1], s[4:7], 0 addr64 offset:16
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(7)
+; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 offset:96
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(7)
+; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:112
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(7)
+; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:64
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(7)
+; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:80
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(7)
+; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[20:23], v[2:3], s[4:7], 0 addr64 offset:32
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(7)
+; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[24:27], v[2:3], s[4:7], 0 addr64 offset:48
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(7)
+; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[28:31], v[2:3], s[4:7], 0 addr64
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(7)
+; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[32:35], v[2:3], s[4:7], 0 addr64 offset:16
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: freeze_v16p0:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: s_mov_b32 s6, 0
+; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000
+; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0
+; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
+; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16
+; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:32
+; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:48
+; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[20:23], v[0:1], s[4:7], 0 addr64 offset:64
+; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[24:27], v[0:1], s[4:7], 0 addr64 offset:80
+; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[28:31], v[0:1], s[4:7], 0 addr64 offset:96
+; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[32:35], v[0:1], s[4:7], 0 addr64 offset:112
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(7)
+; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(7)
+; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(7)
+; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:32
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(7)
+; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:48
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(7)
+; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[20:23], v[2:3], s[4:7], 0 addr64 offset:64
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(7)
+; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[24:27], v[2:3], s[4:7], 0 addr64 offset:80
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(7)
+; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[28:31], v[2:3], s[4:7], 0 addr64 offset:96
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(7)
+; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[32:35], v[2:3], s[4:7], 0 addr64 offset:112
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-GISEL-LABEL: freeze_v16p0:
+; GFX8-GISEL: ; %bb.0:
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-GISEL-NEXT: v_add_u32_e32 v4, vcc, 16, v0
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v1, vcc
+; GFX8-GISEL-NEXT: v_add_u32_e32 v12, vcc, 32, v0
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v13, vcc, 0, v1, vcc
+; GFX8-GISEL-NEXT: v_add_u32_e32 v16, vcc, 48, v0
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v17, vcc, 0, v1, vcc
+; GFX8-GISEL-NEXT: v_add_u32_e32 v20, vcc, 64, v0
+; GFX8-GISEL-NEXT: flat_load_dwordx4 v[4:7], v[4:5]
+; GFX8-GISEL-NEXT: flat_load_dwordx4 v[8:11], v[0:1]
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v21, vcc, 0, v1, vcc
+; GFX8-GISEL-NEXT: v_mov_b32_e32 v38, 0x50
+; GFX8-GISEL-NEXT: v_add_u32_e32 v24, vcc, v0, v38
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v25, vcc, 0, v1, vcc
+; GFX8-GISEL-NEXT: v_mov_b32_e32 v14, 0x60
+; GFX8-GISEL-NEXT: v_add_u32_e32 v28, vcc, v0, v14
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v29, vcc, 0, v1, vcc
+; GFX8-GISEL-NEXT: v_mov_b32_e32 v14, 0x70
+; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v14
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
+; GFX8-GISEL-NEXT: flat_load_dwordx4 v[12:15], v[12:13]
+; GFX8-GISEL-NEXT: flat_load_dwordx4 v[16:19], v[16:17]
+; GFX8-GISEL-NEXT: flat_load_dwordx4 v[20:23], v[20:21]
+; GFX8-GISEL-NEXT: flat_load_dwordx4 v[24:27], v[24:25]
+; GFX8-GISEL-NEXT: flat_load_dwordx4 v[28:31], v[28:29]
+; GFX8-GISEL-NEXT: flat_load_dwordx4 v[32:35], v[0:1]
+; GFX8-GISEL-NEXT: v_add_u32_e32 v36, vcc, 16, v2
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v37, vcc, 0, v3, vcc
+; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 32, v2
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v3, vcc
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(7)
+; GFX8-GISEL-NEXT: flat_store_dwordx4 v[36:37], v[4:7]
+; GFX8-GISEL-NEXT: s_nop 0
+; GFX8-GISEL-NEXT: v_add_u32_e32 v4, vcc, 48, v2
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v3, vcc
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(7)
+; GFX8-GISEL-NEXT: flat_store_dwordx4 v[2:3], v[8:11]
+; GFX8-GISEL-NEXT: v_add_u32_e32 v6, vcc, v2, v38
+; GFX8-GISEL-NEXT: v_add_u32_e64 v8, s[4:5], 64, v2
+; GFX8-GISEL-NEXT: v_addc_u32_e64 v9, s[4:5], 0, v3, s[4:5]
+; GFX8-GISEL-NEXT: s_mov_b64 s[4:5], vcc
+; GFX8-GISEL-NEXT: v_add_u32_e32 v10, vcc, 0x60, v2
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v11, vcc, 0, v3, vcc
+; GFX8-GISEL-NEXT: v_add_u32_e32 v2, vcc, 0x70, v2
+; GFX8-GISEL-NEXT: v_addc_u32_e64 v7, s[4:5], 0, v3, s[4:5]
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(7)
+; GFX8-GISEL-NEXT: flat_store_dwordx4 v[0:1], v[12:15]
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(7)
+; GFX8-GISEL-NEXT: flat_store_dwordx4 v[4:5], v[16:19]
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(7)
+; GFX8-GISEL-NEXT: flat_store_dwordx4 v[8:9], v[20:23]
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(7)
+; GFX8-GISEL-NEXT: flat_store_dwordx4 v[6:7], v[24:27]
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(7)
+; GFX8-GISEL-NEXT: flat_store_dwordx4 v[10:11], v[28:31]
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(7)
+; GFX8-GISEL-NEXT: flat_store_dwordx4 v[2:3], v[32:35]
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GISEL-LABEL: freeze_v16p0:
+; GFX9-GISEL: ; %bb.0:
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off
+; GFX9-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16
+; GFX9-GISEL-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:32
+; GFX9-GISEL-NEXT: global_load_dwordx4 v[16:19], v[0:1], off offset:48
+; GFX9-GISEL-NEXT: global_load_dwordx4 v[20:23], v[0:1], off offset:64
+; GFX9-GISEL-NEXT: global_load_dwordx4 v[24:27], v[0:1], off offset:80
+; GFX9-GISEL-NEXT: global_load_dwordx4 v[28:31], v[0:1], off offset:96
+; GFX9-GISEL-NEXT: global_load_dwordx4 v[32:35], v[0:1], off offset:112
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(7)
+; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(7)
+; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(7)
+; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:32
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(7)
+; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[16:19], off offset:48
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(7)
+; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[20:23], off offset:64
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(7)
+; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[24:27], off offset:80
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(7)
+; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[28:31], off offset:96
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(7)
+; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[32:35], off offset:112
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX10-SDAG-LABEL: freeze_v16p0:
; GFX10-SDAG: ; %bb.0:
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2957,6 +9014,74 @@ define void @freeze_v16p0(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
}
define void @freeze_p1(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
+; GFX6-SDAG-LABEL: freeze_p1:
+; GFX6-SDAG: ; %bb.0:
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-SDAG-NEXT: s_mov_b32 s6, 0
+; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000
+; GFX6-SDAG-NEXT: s_mov_b32 s4, s6
+; GFX6-SDAG-NEXT: s_mov_b32 s5, s6
+; GFX6-SDAG-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX6-SDAG-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0)
+; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX6-GISEL-LABEL: freeze_p1:
+; GFX6-GISEL: ; %bb.0:
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-GISEL-NEXT: s_mov_b32 s6, 0
+; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000
+; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0
+; GFX6-GISEL-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX6-GISEL-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0)
+; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-SDAG-LABEL: freeze_p1:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: s_mov_b32 s6, 0
+; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000
+; GFX7-SDAG-NEXT: s_mov_b32 s4, s6
+; GFX7-SDAG-NEXT: s_mov_b32 s5, s6
+; GFX7-SDAG-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX7-SDAG-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: freeze_p1:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: s_mov_b32 s6, 0
+; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000
+; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0
+; GFX7-GISEL-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GISEL-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: freeze_p1:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: flat_load_dwordx2 v[0:1], v[0:1]
+; GFX8-NEXT: s_waitcnt vmcnt(0)
+; GFX8-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
+; GFX8-NEXT: s_waitcnt vmcnt(0)
+; GFX8-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: freeze_p1:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: global_store_dwordx2 v[2:3], v[0:1], off
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
; GFX10-LABEL: freeze_p1:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2979,6 +9104,74 @@ define void @freeze_p1(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
}
define void @freeze_v2p1(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
+; GFX6-SDAG-LABEL: freeze_v2p1:
+; GFX6-SDAG: ; %bb.0:
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-SDAG-NEXT: s_mov_b32 s6, 0
+; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000
+; GFX6-SDAG-NEXT: s_mov_b32 s4, s6
+; GFX6-SDAG-NEXT: s_mov_b32 s5, s6
+; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0)
+; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX6-GISEL-LABEL: freeze_v2p1:
+; GFX6-GISEL: ; %bb.0:
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-GISEL-NEXT: s_mov_b32 s6, 0
+; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000
+; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0
+; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0)
+; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-SDAG-LABEL: freeze_v2p1:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: s_mov_b32 s6, 0
+; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000
+; GFX7-SDAG-NEXT: s_mov_b32 s4, s6
+; GFX7-SDAG-NEXT: s_mov_b32 s5, s6
+; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: freeze_v2p1:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: s_mov_b32 s6, 0
+; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000
+; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0
+; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: freeze_v2p1:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: flat_load_dwordx4 v[4:7], v[0:1]
+; GFX8-NEXT: s_waitcnt vmcnt(0)
+; GFX8-NEXT: flat_store_dwordx4 v[2:3], v[4:7]
+; GFX8-NEXT: s_waitcnt vmcnt(0)
+; GFX8-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: freeze_v2p1:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: global_load_dwordx4 v[4:7], v[0:1], off
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: global_store_dwordx4 v[2:3], v[4:7], off
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
; GFX10-LABEL: freeze_v2p1:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -3001,6 +9194,105 @@ define void @freeze_v2p1(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
}
define void @freeze_v3p1(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
+; GFX6-SDAG-LABEL: freeze_v3p1:
+; GFX6-SDAG: ; %bb.0:
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-SDAG-NEXT: s_mov_b32 s6, 0
+; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000
+; GFX6-SDAG-NEXT: s_mov_b32 s4, s6
+; GFX6-SDAG-NEXT: s_mov_b32 s5, s6
+; GFX6-SDAG-NEXT: buffer_load_dwordx2 v[8:9], v[0:1], s[4:7], 0 addr64 offset:16
+; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(1)
+; GFX6-SDAG-NEXT: buffer_store_dwordx2 v[8:9], v[2:3], s[4:7], 0 addr64 offset:16
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(1)
+; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0)
+; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX6-GISEL-LABEL: freeze_v3p1:
+; GFX6-GISEL: ; %bb.0:
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-GISEL-NEXT: s_mov_b32 s6, 0
+; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000
+; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0
+; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:16
+; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[6:9], v[0:1], s[4:7], 0 addr64
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(1)
+; GFX6-GISEL-NEXT: v_mov_b32_e32 v0, v4
+; GFX6-GISEL-NEXT: v_mov_b32_e32 v1, v5
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[6:9], v[2:3], s[4:7], 0 addr64
+; GFX6-GISEL-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64 offset:16
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0)
+; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-SDAG-LABEL: freeze_v3p1:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: s_mov_b32 s6, 0
+; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000
+; GFX7-SDAG-NEXT: s_mov_b32 s4, s6
+; GFX7-SDAG-NEXT: s_mov_b32 s5, s6
+; GFX7-SDAG-NEXT: buffer_load_dwordx2 v[8:9], v[0:1], s[4:7], 0 addr64 offset:16
+; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(1)
+; GFX7-SDAG-NEXT: buffer_store_dwordx2 v[8:9], v[2:3], s[4:7], 0 addr64 offset:16
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(1)
+; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: freeze_v3p1:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: s_mov_b32 s6, 0
+; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000
+; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0
+; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:16
+; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[6:9], v[0:1], s[4:7], 0 addr64
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(1)
+; GFX7-GISEL-NEXT: v_mov_b32_e32 v0, v4
+; GFX7-GISEL-NEXT: v_mov_b32_e32 v1, v5
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[6:9], v[2:3], s[4:7], 0 addr64
+; GFX7-GISEL-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64 offset:16
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-GISEL-LABEL: freeze_v3p1:
+; GFX8-GISEL: ; %bb.0:
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-GISEL-NEXT: v_add_u32_e32 v8, vcc, 16, v0
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v9, vcc, 0, v1, vcc
+; GFX8-GISEL-NEXT: flat_load_dwordx4 v[4:7], v[0:1]
+; GFX8-GISEL-NEXT: flat_load_dwordx4 v[8:11], v[8:9]
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(1)
+; GFX8-GISEL-NEXT: flat_store_dwordx4 v[2:3], v[4:7]
+; GFX8-GISEL-NEXT: v_add_u32_e32 v2, vcc, 16, v2
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(1)
+; GFX8-GISEL-NEXT: v_mov_b32_e32 v0, v8
+; GFX8-GISEL-NEXT: v_mov_b32_e32 v1, v9
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc
+; GFX8-GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GISEL-LABEL: freeze_v3p1:
+; GFX9-GISEL: ; %bb.0:
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off offset:16
+; GFX9-GISEL-NEXT: global_load_dwordx4 v[6:9], v[0:1], off
+; GFX9-GISEL-NEXT: ; kill: killed $vgpr0 killed $vgpr1
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(1)
+; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, v4
+; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, v5
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[6:9], off
+; GFX9-GISEL-NEXT: global_store_dwordx2 v[2:3], v[0:1], off offset:16
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX10-SDAG-LABEL: freeze_v3p1:
; GFX10-SDAG: ; %bb.0:
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -3060,6 +9352,96 @@ define void @freeze_v3p1(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
}
define void @freeze_v4p1(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
+; GFX6-SDAG-LABEL: freeze_v4p1:
+; GFX6-SDAG: ; %bb.0:
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-SDAG-NEXT: s_mov_b32 s6, 0
+; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000
+; GFX6-SDAG-NEXT: s_mov_b32 s4, s6
+; GFX6-SDAG-NEXT: s_mov_b32 s5, s6
+; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:16
+; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(1)
+; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 offset:16
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(1)
+; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0)
+; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX6-GISEL-LABEL: freeze_v4p1:
+; GFX6-GISEL: ; %bb.0:
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-GISEL-NEXT: s_mov_b32 s6, 0
+; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000
+; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0
+; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
+; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(1)
+; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(1)
+; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0)
+; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-SDAG-LABEL: freeze_v4p1:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: s_mov_b32 s6, 0
+; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000
+; GFX7-SDAG-NEXT: s_mov_b32 s4, s6
+; GFX7-SDAG-NEXT: s_mov_b32 s5, s6
+; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:16
+; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(1)
+; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 offset:16
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(1)
+; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: freeze_v4p1:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: s_mov_b32 s6, 0
+; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000
+; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0
+; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
+; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(1)
+; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(1)
+; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-GISEL-LABEL: freeze_v4p1:
+; GFX8-GISEL: ; %bb.0:
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-GISEL-NEXT: flat_load_dwordx4 v[4:7], v[0:1]
+; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 16, v0
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
+; GFX8-GISEL-NEXT: flat_load_dwordx4 v[8:11], v[0:1]
+; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 16, v2
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v3, vcc
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(1)
+; GFX8-GISEL-NEXT: flat_store_dwordx4 v[2:3], v[4:7]
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(1)
+; GFX8-GISEL-NEXT: flat_store_dwordx4 v[0:1], v[8:11]
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GISEL-LABEL: freeze_v4p1:
+; GFX9-GISEL: ; %bb.0:
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off
+; GFX9-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(1)
+; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(1)
+; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX10-SDAG-LABEL: freeze_v4p1:
; GFX10-SDAG: ; %bb.0:
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -3114,6 +9496,141 @@ define void @freeze_v4p1(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
}
define void @freeze_v8p1(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
+; GFX6-SDAG-LABEL: freeze_v8p1:
+; GFX6-SDAG: ; %bb.0:
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-SDAG-NEXT: s_mov_b32 s6, 0
+; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000
+; GFX6-SDAG-NEXT: s_mov_b32 s4, s6
+; GFX6-SDAG-NEXT: s_mov_b32 s5, s6
+; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:32
+; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:48
+; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64
+; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:16
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(3)
+; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 offset:32
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(3)
+; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:48
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(3)
+; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(3)
+; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:16
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0)
+; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX6-GISEL-LABEL: freeze_v8p1:
+; GFX6-GISEL: ; %bb.0:
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-GISEL-NEXT: s_mov_b32 s6, 0
+; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000
+; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0
+; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
+; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16
+; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:32
+; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:48
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(3)
+; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(3)
+; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(3)
+; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:32
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(3)
+; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:48
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0)
+; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-SDAG-LABEL: freeze_v8p1:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: s_mov_b32 s6, 0
+; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000
+; GFX7-SDAG-NEXT: s_mov_b32 s4, s6
+; GFX7-SDAG-NEXT: s_mov_b32 s5, s6
+; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:32
+; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:48
+; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64
+; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:16
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(3)
+; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 offset:32
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(3)
+; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:48
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(3)
+; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(3)
+; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:16
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: freeze_v8p1:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: s_mov_b32 s6, 0
+; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000
+; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0
+; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
+; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16
+; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:32
+; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:48
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(3)
+; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(3)
+; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(3)
+; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:32
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(3)
+; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:48
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-GISEL-LABEL: freeze_v8p1:
+; GFX8-GISEL: ; %bb.0:
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-GISEL-NEXT: v_add_u32_e32 v8, vcc, 16, v0
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v9, vcc, 0, v1, vcc
+; GFX8-GISEL-NEXT: v_add_u32_e32 v12, vcc, 32, v0
+; GFX8-GISEL-NEXT: flat_load_dwordx4 v[4:7], v[0:1]
+; GFX8-GISEL-NEXT: flat_load_dwordx4 v[8:11], v[8:9]
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v13, vcc, 0, v1, vcc
+; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 48, v0
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
+; GFX8-GISEL-NEXT: flat_load_dwordx4 v[12:15], v[12:13]
+; GFX8-GISEL-NEXT: flat_load_dwordx4 v[16:19], v[0:1]
+; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 16, v2
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v3, vcc
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(3)
+; GFX8-GISEL-NEXT: flat_store_dwordx4 v[2:3], v[4:7]
+; GFX8-GISEL-NEXT: s_nop 0
+; GFX8-GISEL-NEXT: v_add_u32_e32 v4, vcc, 32, v2
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v3, vcc
+; GFX8-GISEL-NEXT: v_add_u32_e32 v2, vcc, 48, v2
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(3)
+; GFX8-GISEL-NEXT: flat_store_dwordx4 v[0:1], v[8:11]
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(3)
+; GFX8-GISEL-NEXT: flat_store_dwordx4 v[4:5], v[12:15]
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(3)
+; GFX8-GISEL-NEXT: flat_store_dwordx4 v[2:3], v[16:19]
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GISEL-LABEL: freeze_v8p1:
+; GFX9-GISEL: ; %bb.0:
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off
+; GFX9-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16
+; GFX9-GISEL-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:32
+; GFX9-GISEL-NEXT: global_load_dwordx4 v[16:19], v[0:1], off offset:48
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(3)
+; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(3)
+; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(3)
+; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:32
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(3)
+; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[16:19], off offset:48
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX10-SDAG-LABEL: freeze_v8p1:
; GFX10-SDAG: ; %bb.0:
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -3192,6 +9709,233 @@ define void @freeze_v8p1(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
}
define void @freeze_v16p1(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
+; GFX6-SDAG-LABEL: freeze_v16p1:
+; GFX6-SDAG: ; %bb.0:
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-SDAG-NEXT: s_mov_b32 s6, 0
+; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000
+; GFX6-SDAG-NEXT: s_mov_b32 s4, s6
+; GFX6-SDAG-NEXT: s_mov_b32 s5, s6
+; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:96
+; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:112
+; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:64
+; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:80
+; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[20:23], v[0:1], s[4:7], 0 addr64 offset:32
+; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[24:27], v[0:1], s[4:7], 0 addr64 offset:48
+; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[28:31], v[0:1], s[4:7], 0 addr64
+; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[32:35], v[0:1], s[4:7], 0 addr64 offset:16
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(7)
+; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 offset:96
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(7)
+; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:112
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(7)
+; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:64
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(7)
+; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:80
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(7)
+; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[20:23], v[2:3], s[4:7], 0 addr64 offset:32
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(7)
+; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[24:27], v[2:3], s[4:7], 0 addr64 offset:48
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(7)
+; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[28:31], v[2:3], s[4:7], 0 addr64
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(7)
+; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[32:35], v[2:3], s[4:7], 0 addr64 offset:16
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0)
+; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX6-GISEL-LABEL: freeze_v16p1:
+; GFX6-GISEL: ; %bb.0:
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-GISEL-NEXT: s_mov_b32 s6, 0
+; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000
+; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0
+; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
+; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16
+; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:32
+; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:48
+; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[20:23], v[0:1], s[4:7], 0 addr64 offset:64
+; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[24:27], v[0:1], s[4:7], 0 addr64 offset:80
+; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[28:31], v[0:1], s[4:7], 0 addr64 offset:96
+; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[32:35], v[0:1], s[4:7], 0 addr64 offset:112
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(7)
+; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(7)
+; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(7)
+; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:32
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(7)
+; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:48
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(7)
+; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[20:23], v[2:3], s[4:7], 0 addr64 offset:64
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(7)
+; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[24:27], v[2:3], s[4:7], 0 addr64 offset:80
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(7)
+; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[28:31], v[2:3], s[4:7], 0 addr64 offset:96
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(7)
+; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[32:35], v[2:3], s[4:7], 0 addr64 offset:112
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0)
+; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-SDAG-LABEL: freeze_v16p1:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: s_mov_b32 s6, 0
+; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000
+; GFX7-SDAG-NEXT: s_mov_b32 s4, s6
+; GFX7-SDAG-NEXT: s_mov_b32 s5, s6
+; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:96
+; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:112
+; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:64
+; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:80
+; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[20:23], v[0:1], s[4:7], 0 addr64 offset:32
+; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[24:27], v[0:1], s[4:7], 0 addr64 offset:48
+; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[28:31], v[0:1], s[4:7], 0 addr64
+; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[32:35], v[0:1], s[4:7], 0 addr64 offset:16
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(7)
+; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 offset:96
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(7)
+; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:112
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(7)
+; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:64
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(7)
+; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:80
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(7)
+; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[20:23], v[2:3], s[4:7], 0 addr64 offset:32
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(7)
+; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[24:27], v[2:3], s[4:7], 0 addr64 offset:48
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(7)
+; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[28:31], v[2:3], s[4:7], 0 addr64
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(7)
+; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[32:35], v[2:3], s[4:7], 0 addr64 offset:16
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: freeze_v16p1:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: s_mov_b32 s6, 0
+; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000
+; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0
+; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
+; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16
+; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:32
+; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:48
+; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[20:23], v[0:1], s[4:7], 0 addr64 offset:64
+; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[24:27], v[0:1], s[4:7], 0 addr64 offset:80
+; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[28:31], v[0:1], s[4:7], 0 addr64 offset:96
+; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[32:35], v[0:1], s[4:7], 0 addr64 offset:112
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(7)
+; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(7)
+; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(7)
+; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:32
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(7)
+; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:48
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(7)
+; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[20:23], v[2:3], s[4:7], 0 addr64 offset:64
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(7)
+; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[24:27], v[2:3], s[4:7], 0 addr64 offset:80
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(7)
+; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[28:31], v[2:3], s[4:7], 0 addr64 offset:96
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(7)
+; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[32:35], v[2:3], s[4:7], 0 addr64 offset:112
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-GISEL-LABEL: freeze_v16p1:
+; GFX8-GISEL: ; %bb.0:
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-GISEL-NEXT: v_add_u32_e32 v4, vcc, 16, v0
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v1, vcc
+; GFX8-GISEL-NEXT: v_add_u32_e32 v12, vcc, 32, v0
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v13, vcc, 0, v1, vcc
+; GFX8-GISEL-NEXT: v_add_u32_e32 v16, vcc, 48, v0
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v17, vcc, 0, v1, vcc
+; GFX8-GISEL-NEXT: v_add_u32_e32 v20, vcc, 64, v0
+; GFX8-GISEL-NEXT: flat_load_dwordx4 v[4:7], v[4:5]
+; GFX8-GISEL-NEXT: flat_load_dwordx4 v[8:11], v[0:1]
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v21, vcc, 0, v1, vcc
+; GFX8-GISEL-NEXT: v_mov_b32_e32 v38, 0x50
+; GFX8-GISEL-NEXT: v_add_u32_e32 v24, vcc, v0, v38
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v25, vcc, 0, v1, vcc
+; GFX8-GISEL-NEXT: v_mov_b32_e32 v14, 0x60
+; GFX8-GISEL-NEXT: v_add_u32_e32 v28, vcc, v0, v14
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v29, vcc, 0, v1, vcc
+; GFX8-GISEL-NEXT: v_mov_b32_e32 v14, 0x70
+; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v14
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
+; GFX8-GISEL-NEXT: flat_load_dwordx4 v[12:15], v[12:13]
+; GFX8-GISEL-NEXT: flat_load_dwordx4 v[16:19], v[16:17]
+; GFX8-GISEL-NEXT: flat_load_dwordx4 v[20:23], v[20:21]
+; GFX8-GISEL-NEXT: flat_load_dwordx4 v[24:27], v[24:25]
+; GFX8-GISEL-NEXT: flat_load_dwordx4 v[28:31], v[28:29]
+; GFX8-GISEL-NEXT: flat_load_dwordx4 v[32:35], v[0:1]
+; GFX8-GISEL-NEXT: v_add_u32_e32 v36, vcc, 16, v2
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v37, vcc, 0, v3, vcc
+; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 32, v2
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v3, vcc
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(7)
+; GFX8-GISEL-NEXT: flat_store_dwordx4 v[36:37], v[4:7]
+; GFX8-GISEL-NEXT: s_nop 0
+; GFX8-GISEL-NEXT: v_add_u32_e32 v4, vcc, 48, v2
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v3, vcc
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(7)
+; GFX8-GISEL-NEXT: flat_store_dwordx4 v[2:3], v[8:11]
+; GFX8-GISEL-NEXT: v_add_u32_e32 v6, vcc, v2, v38
+; GFX8-GISEL-NEXT: v_add_u32_e64 v8, s[4:5], 64, v2
+; GFX8-GISEL-NEXT: v_addc_u32_e64 v9, s[4:5], 0, v3, s[4:5]
+; GFX8-GISEL-NEXT: s_mov_b64 s[4:5], vcc
+; GFX8-GISEL-NEXT: v_add_u32_e32 v10, vcc, 0x60, v2
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v11, vcc, 0, v3, vcc
+; GFX8-GISEL-NEXT: v_add_u32_e32 v2, vcc, 0x70, v2
+; GFX8-GISEL-NEXT: v_addc_u32_e64 v7, s[4:5], 0, v3, s[4:5]
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(7)
+; GFX8-GISEL-NEXT: flat_store_dwordx4 v[0:1], v[12:15]
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(7)
+; GFX8-GISEL-NEXT: flat_store_dwordx4 v[4:5], v[16:19]
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(7)
+; GFX8-GISEL-NEXT: flat_store_dwordx4 v[8:9], v[20:23]
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(7)
+; GFX8-GISEL-NEXT: flat_store_dwordx4 v[6:7], v[24:27]
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(7)
+; GFX8-GISEL-NEXT: flat_store_dwordx4 v[10:11], v[28:31]
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(7)
+; GFX8-GISEL-NEXT: flat_store_dwordx4 v[2:3], v[32:35]
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GISEL-LABEL: freeze_v16p1:
+; GFX9-GISEL: ; %bb.0:
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off
+; GFX9-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16
+; GFX9-GISEL-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:32
+; GFX9-GISEL-NEXT: global_load_dwordx4 v[16:19], v[0:1], off offset:48
+; GFX9-GISEL-NEXT: global_load_dwordx4 v[20:23], v[0:1], off offset:64
+; GFX9-GISEL-NEXT: global_load_dwordx4 v[24:27], v[0:1], off offset:80
+; GFX9-GISEL-NEXT: global_load_dwordx4 v[28:31], v[0:1], off offset:96
+; GFX9-GISEL-NEXT: global_load_dwordx4 v[32:35], v[0:1], off offset:112
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(7)
+; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(7)
+; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(7)
+; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:32
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(7)
+; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[16:19], off offset:48
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(7)
+; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[20:23], off offset:64
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(7)
+; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[24:27], off offset:80
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(7)
+; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[28:31], off offset:96
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(7)
+; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[32:35], off offset:112
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX10-SDAG-LABEL: freeze_v16p1:
; GFX10-SDAG: ; %bb.0:
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -3318,6 +10062,45 @@ define void @freeze_v16p1(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
}
define void @freeze_p3(ptr addrspace(3) %ptra, ptr addrspace(3) %ptrb) {
+; GFX6-LABEL: freeze_p3:
+; GFX6: ; %bb.0:
+; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-NEXT: s_mov_b32 m0, -1
+; GFX6-NEXT: ds_read_b32 v0, v0
+; GFX6-NEXT: s_waitcnt lgkmcnt(0)
+; GFX6-NEXT: ds_write_b32 v1, v0
+; GFX6-NEXT: s_waitcnt lgkmcnt(0)
+; GFX6-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-LABEL: freeze_p3:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: s_mov_b32 m0, -1
+; GFX7-NEXT: ds_read_b32 v0, v0
+; GFX7-NEXT: s_waitcnt lgkmcnt(0)
+; GFX7-NEXT: ds_write_b32 v1, v0
+; GFX7-NEXT: s_waitcnt lgkmcnt(0)
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: freeze_p3:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: s_mov_b32 m0, -1
+; GFX8-NEXT: ds_read_b32 v0, v0
+; GFX8-NEXT: s_waitcnt lgkmcnt(0)
+; GFX8-NEXT: ds_write_b32 v1, v0
+; GFX8-NEXT: s_waitcnt lgkmcnt(0)
+; GFX8-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: freeze_p3:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ds_read_b32 v0, v0
+; GFX9-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-NEXT: ds_write_b32 v1, v0
+; GFX9-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
; GFX10-LABEL: freeze_p3:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -3342,6 +10125,45 @@ define void @freeze_p3(ptr addrspace(3) %ptra, ptr addrspace(3) %ptrb) {
}
define void @freeze_v2p3(ptr addrspace(3) %ptra, ptr addrspace(3) %ptrb) {
+; GFX6-LABEL: freeze_v2p3:
+; GFX6: ; %bb.0:
+; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-NEXT: s_mov_b32 m0, -1
+; GFX6-NEXT: ds_read_b64 v[2:3], v0
+; GFX6-NEXT: s_waitcnt lgkmcnt(0)
+; GFX6-NEXT: ds_write_b64 v1, v[2:3]
+; GFX6-NEXT: s_waitcnt lgkmcnt(0)
+; GFX6-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-LABEL: freeze_v2p3:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: s_mov_b32 m0, -1
+; GFX7-NEXT: ds_read_b64 v[2:3], v0
+; GFX7-NEXT: s_waitcnt lgkmcnt(0)
+; GFX7-NEXT: ds_write_b64 v1, v[2:3]
+; GFX7-NEXT: s_waitcnt lgkmcnt(0)
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: freeze_v2p3:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: s_mov_b32 m0, -1
+; GFX8-NEXT: ds_read_b64 v[2:3], v0
+; GFX8-NEXT: s_waitcnt lgkmcnt(0)
+; GFX8-NEXT: ds_write_b64 v1, v[2:3]
+; GFX8-NEXT: s_waitcnt lgkmcnt(0)
+; GFX8-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: freeze_v2p3:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ds_read_b64 v[2:3], v0
+; GFX9-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-NEXT: ds_write_b64 v1, v[2:3]
+; GFX9-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
; GFX10-LABEL: freeze_v2p3:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -3366,6 +10188,65 @@ define void @freeze_v2p3(ptr addrspace(3) %ptra, ptr addrspace(3) %ptrb) {
}
define void @freeze_v3p3(ptr addrspace(3) %ptra, ptr addrspace(3) %ptrb) {
+; GFX6-SDAG-LABEL: freeze_v3p3:
+; GFX6-SDAG: ; %bb.0:
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-SDAG-NEXT: v_add_i32_e32 v2, vcc, 8, v0
+; GFX6-SDAG-NEXT: s_mov_b32 m0, -1
+; GFX6-SDAG-NEXT: ds_read_b32 v4, v2
+; GFX6-SDAG-NEXT: ds_read_b64 v[2:3], v0
+; GFX6-SDAG-NEXT: v_add_i32_e32 v0, vcc, 8, v1
+; GFX6-SDAG-NEXT: s_waitcnt lgkmcnt(1)
+; GFX6-SDAG-NEXT: ds_write_b32 v0, v4
+; GFX6-SDAG-NEXT: s_waitcnt lgkmcnt(1)
+; GFX6-SDAG-NEXT: ds_write_b64 v1, v[2:3]
+; GFX6-SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX6-GISEL-LABEL: freeze_v3p3:
+; GFX6-GISEL: ; %bb.0:
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-GISEL-NEXT: s_mov_b32 m0, -1
+; GFX6-GISEL-NEXT: ds_read_b64 v[2:3], v0
+; GFX6-GISEL-NEXT: v_add_i32_e32 v0, vcc, 8, v0
+; GFX6-GISEL-NEXT: ds_read_b32 v0, v0
+; GFX6-GISEL-NEXT: s_waitcnt lgkmcnt(1)
+; GFX6-GISEL-NEXT: ds_write_b64 v1, v[2:3]
+; GFX6-GISEL-NEXT: v_add_i32_e32 v1, vcc, 8, v1
+; GFX6-GISEL-NEXT: s_waitcnt lgkmcnt(1)
+; GFX6-GISEL-NEXT: ds_write_b32 v1, v0
+; GFX6-GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-LABEL: freeze_v3p3:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: s_mov_b32 m0, -1
+; GFX7-NEXT: ds_read_b96 v[2:4], v0
+; GFX7-NEXT: s_waitcnt lgkmcnt(0)
+; GFX7-NEXT: ds_write_b96 v1, v[2:4]
+; GFX7-NEXT: s_waitcnt lgkmcnt(0)
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: freeze_v3p3:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: s_mov_b32 m0, -1
+; GFX8-NEXT: ds_read_b96 v[2:4], v0
+; GFX8-NEXT: s_waitcnt lgkmcnt(0)
+; GFX8-NEXT: ds_write_b96 v1, v[2:4]
+; GFX8-NEXT: s_waitcnt lgkmcnt(0)
+; GFX8-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: freeze_v3p3:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ds_read_b96 v[2:4], v0
+; GFX9-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-NEXT: ds_write_b96 v1, v[2:4]
+; GFX9-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
; GFX10-LABEL: freeze_v3p3:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -3390,6 +10271,50 @@ define void @freeze_v3p3(ptr addrspace(3) %ptra, ptr addrspace(3) %ptrb) {
}
define void @freeze_v4p3(ptr addrspace(3) %ptra, ptr addrspace(3) %ptrb) {
+; GFX6-LABEL: freeze_v4p3:
+; GFX6: ; %bb.0:
+; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-NEXT: s_mov_b32 m0, -1
+; GFX6-NEXT: ds_read_b64 v[2:3], v0
+; GFX6-NEXT: v_add_i32_e32 v0, vcc, 8, v0
+; GFX6-NEXT: ds_read_b64 v[4:5], v0
+; GFX6-NEXT: v_add_i32_e32 v0, vcc, 8, v1
+; GFX6-NEXT: s_waitcnt lgkmcnt(1)
+; GFX6-NEXT: ds_write_b64 v1, v[2:3]
+; GFX6-NEXT: s_waitcnt lgkmcnt(1)
+; GFX6-NEXT: ds_write_b64 v0, v[4:5]
+; GFX6-NEXT: s_waitcnt lgkmcnt(0)
+; GFX6-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-LABEL: freeze_v4p3:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: s_mov_b32 m0, -1
+; GFX7-NEXT: ds_read_b128 v[2:5], v0
+; GFX7-NEXT: s_waitcnt lgkmcnt(0)
+; GFX7-NEXT: ds_write_b128 v1, v[2:5]
+; GFX7-NEXT: s_waitcnt lgkmcnt(0)
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: freeze_v4p3:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: s_mov_b32 m0, -1
+; GFX8-NEXT: ds_read_b128 v[2:5], v0
+; GFX8-NEXT: s_waitcnt lgkmcnt(0)
+; GFX8-NEXT: ds_write_b128 v1, v[2:5]
+; GFX8-NEXT: s_waitcnt lgkmcnt(0)
+; GFX8-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: freeze_v4p3:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ds_read_b128 v[2:5], v0
+; GFX9-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-NEXT: ds_write_b128 v1, v[2:5]
+; GFX9-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
; GFX10-LABEL: freeze_v4p3:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -3414,6 +10339,105 @@ define void @freeze_v4p3(ptr addrspace(3) %ptra, ptr addrspace(3) %ptrb) {
}
define void @freeze_v8p3(ptr addrspace(3) %ptra, ptr addrspace(3) %ptrb) {
+; GFX6-SDAG-LABEL: freeze_v8p3:
+; GFX6-SDAG: ; %bb.0:
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-SDAG-NEXT: v_add_i32_e32 v2, vcc, 24, v0
+; GFX6-SDAG-NEXT: v_add_i32_e32 v4, vcc, 16, v0
+; GFX6-SDAG-NEXT: s_mov_b32 m0, -1
+; GFX6-SDAG-NEXT: ds_read_b64 v[2:3], v2
+; GFX6-SDAG-NEXT: ds_read_b64 v[4:5], v4
+; GFX6-SDAG-NEXT: ds_read_b64 v[6:7], v0
+; GFX6-SDAG-NEXT: v_add_i32_e32 v0, vcc, 8, v0
+; GFX6-SDAG-NEXT: ds_read_b64 v[8:9], v0
+; GFX6-SDAG-NEXT: v_add_i32_e32 v0, vcc, 16, v1
+; GFX6-SDAG-NEXT: s_waitcnt lgkmcnt(2)
+; GFX6-SDAG-NEXT: ds_write_b64 v0, v[4:5]
+; GFX6-SDAG-NEXT: v_add_i32_e32 v0, vcc, 24, v1
+; GFX6-SDAG-NEXT: ds_write_b64 v0, v[2:3]
+; GFX6-SDAG-NEXT: v_add_i32_e32 v0, vcc, 8, v1
+; GFX6-SDAG-NEXT: s_waitcnt lgkmcnt(3)
+; GFX6-SDAG-NEXT: ds_write_b64 v1, v[6:7]
+; GFX6-SDAG-NEXT: s_waitcnt lgkmcnt(3)
+; GFX6-SDAG-NEXT: ds_write_b64 v0, v[8:9]
+; GFX6-SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX6-GISEL-LABEL: freeze_v8p3:
+; GFX6-GISEL: ; %bb.0:
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-GISEL-NEXT: v_add_i32_e32 v4, vcc, 8, v0
+; GFX6-GISEL-NEXT: v_add_i32_e32 v6, vcc, 16, v0
+; GFX6-GISEL-NEXT: s_mov_b32 m0, -1
+; GFX6-GISEL-NEXT: ds_read_b64 v[2:3], v0
+; GFX6-GISEL-NEXT: v_add_i32_e32 v0, vcc, 24, v0
+; GFX6-GISEL-NEXT: ds_read_b64 v[4:5], v4
+; GFX6-GISEL-NEXT: ds_read_b64 v[6:7], v6
+; GFX6-GISEL-NEXT: ds_read_b64 v[8:9], v0
+; GFX6-GISEL-NEXT: v_add_i32_e32 v0, vcc, 8, v1
+; GFX6-GISEL-NEXT: s_waitcnt lgkmcnt(2)
+; GFX6-GISEL-NEXT: ds_write_b64 v0, v[4:5]
+; GFX6-GISEL-NEXT: v_add_i32_e32 v0, vcc, 16, v1
+; GFX6-GISEL-NEXT: s_waitcnt lgkmcnt(2)
+; GFX6-GISEL-NEXT: ds_write_b64 v0, v[6:7]
+; GFX6-GISEL-NEXT: v_add_i32_e32 v0, vcc, 24, v1
+; GFX6-GISEL-NEXT: ds_write_b64 v1, v[2:3]
+; GFX6-GISEL-NEXT: s_waitcnt lgkmcnt(3)
+; GFX6-GISEL-NEXT: ds_write_b64 v0, v[8:9]
+; GFX6-GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-SDAG-LABEL: freeze_v8p3:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: s_mov_b32 m0, -1
+; GFX7-SDAG-NEXT: ds_read_b128 v[2:5], v0 offset:16
+; GFX7-SDAG-NEXT: ds_read_b128 v[6:9], v0
+; GFX7-SDAG-NEXT: s_waitcnt lgkmcnt(1)
+; GFX7-SDAG-NEXT: ds_write_b128 v1, v[2:5] offset:16
+; GFX7-SDAG-NEXT: s_waitcnt lgkmcnt(1)
+; GFX7-SDAG-NEXT: ds_write_b128 v1, v[6:9]
+; GFX7-SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: freeze_v8p3:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: s_mov_b32 m0, -1
+; GFX7-GISEL-NEXT: ds_read_b128 v[2:5], v0
+; GFX7-GISEL-NEXT: ds_read_b128 v[6:9], v0 offset:16
+; GFX7-GISEL-NEXT: s_waitcnt lgkmcnt(1)
+; GFX7-GISEL-NEXT: ds_write_b128 v1, v[2:5]
+; GFX7-GISEL-NEXT: s_waitcnt lgkmcnt(1)
+; GFX7-GISEL-NEXT: ds_write_b128 v1, v[6:9] offset:16
+; GFX7-GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-GISEL-LABEL: freeze_v8p3:
+; GFX8-GISEL: ; %bb.0:
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-GISEL-NEXT: s_mov_b32 m0, -1
+; GFX8-GISEL-NEXT: ds_read_b128 v[2:5], v0
+; GFX8-GISEL-NEXT: ds_read_b128 v[6:9], v0 offset:16
+; GFX8-GISEL-NEXT: s_waitcnt lgkmcnt(1)
+; GFX8-GISEL-NEXT: ds_write_b128 v1, v[2:5]
+; GFX8-GISEL-NEXT: s_waitcnt lgkmcnt(1)
+; GFX8-GISEL-NEXT: ds_write_b128 v1, v[6:9] offset:16
+; GFX8-GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GISEL-LABEL: freeze_v8p3:
+; GFX9-GISEL: ; %bb.0:
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GISEL-NEXT: ds_read_b128 v[2:5], v0
+; GFX9-GISEL-NEXT: ds_read_b128 v[6:9], v0 offset:16
+; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(1)
+; GFX9-GISEL-NEXT: ds_write_b128 v1, v[2:5]
+; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(1)
+; GFX9-GISEL-NEXT: ds_write_b128 v1, v[6:9] offset:16
+; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX10-SDAG-LABEL: freeze_v8p3:
; GFX10-SDAG: ; %bb.0:
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -3468,6 +10492,164 @@ define void @freeze_v8p3(ptr addrspace(3) %ptra, ptr addrspace(3) %ptrb) {
}
define void @freeze_v16p3(ptr addrspace(3) %ptra, ptr addrspace(3) %ptrb) {
+; GFX6-SDAG-LABEL: freeze_v16p3:
+; GFX6-SDAG: ; %bb.0:
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-SDAG-NEXT: v_add_i32_e32 v6, vcc, 8, v0
+; GFX6-SDAG-NEXT: v_add_i32_e32 v2, vcc, 24, v0
+; GFX6-SDAG-NEXT: v_add_i32_e32 v4, vcc, 16, v0
+; GFX6-SDAG-NEXT: s_mov_b32 m0, -1
+; GFX6-SDAG-NEXT: v_add_i32_e32 v12, vcc, 40, v0
+; GFX6-SDAG-NEXT: v_add_i32_e32 v14, vcc, 32, v0
+; GFX6-SDAG-NEXT: v_add_i32_e32 v16, vcc, 56, v0
+; GFX6-SDAG-NEXT: v_add_i32_e32 v10, vcc, 48, v0
+; GFX6-SDAG-NEXT: ds_read_b64 v[2:3], v2
+; GFX6-SDAG-NEXT: ds_read_b64 v[4:5], v4
+; GFX6-SDAG-NEXT: ds_read_b64 v[6:7], v6
+; GFX6-SDAG-NEXT: ds_read_b64 v[8:9], v0
+; GFX6-SDAG-NEXT: ds_read_b64 v[10:11], v10
+; GFX6-SDAG-NEXT: ds_read_b64 v[12:13], v12
+; GFX6-SDAG-NEXT: ds_read_b64 v[14:15], v14
+; GFX6-SDAG-NEXT: ds_read_b64 v[16:17], v16
+; GFX6-SDAG-NEXT: v_add_i32_e32 v0, vcc, 48, v1
+; GFX6-SDAG-NEXT: s_waitcnt lgkmcnt(3)
+; GFX6-SDAG-NEXT: ds_write_b64 v0, v[10:11]
+; GFX6-SDAG-NEXT: v_add_i32_e32 v0, vcc, 56, v1
+; GFX6-SDAG-NEXT: s_waitcnt lgkmcnt(1)
+; GFX6-SDAG-NEXT: ds_write_b64 v0, v[16:17]
+; GFX6-SDAG-NEXT: v_add_i32_e32 v0, vcc, 32, v1
+; GFX6-SDAG-NEXT: ds_write_b64 v0, v[14:15]
+; GFX6-SDAG-NEXT: v_add_i32_e32 v0, vcc, 40, v1
+; GFX6-SDAG-NEXT: ds_write_b64 v0, v[12:13]
+; GFX6-SDAG-NEXT: v_add_i32_e32 v0, vcc, 16, v1
+; GFX6-SDAG-NEXT: ds_write_b64 v0, v[4:5]
+; GFX6-SDAG-NEXT: v_add_i32_e32 v0, vcc, 24, v1
+; GFX6-SDAG-NEXT: ds_write_b64 v0, v[2:3]
+; GFX6-SDAG-NEXT: v_add_i32_e32 v0, vcc, 8, v1
+; GFX6-SDAG-NEXT: ds_write_b64 v1, v[8:9]
+; GFX6-SDAG-NEXT: ds_write_b64 v0, v[6:7]
+; GFX6-SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX6-GISEL-LABEL: freeze_v16p3:
+; GFX6-GISEL: ; %bb.0:
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-GISEL-NEXT: v_add_i32_e32 v4, vcc, 8, v0
+; GFX6-GISEL-NEXT: v_add_i32_e32 v6, vcc, 16, v0
+; GFX6-GISEL-NEXT: v_add_i32_e32 v8, vcc, 24, v0
+; GFX6-GISEL-NEXT: s_mov_b32 m0, -1
+; GFX6-GISEL-NEXT: ds_read_b64 v[2:3], v0
+; GFX6-GISEL-NEXT: ds_read_b64 v[4:5], v4
+; GFX6-GISEL-NEXT: ds_read_b64 v[6:7], v6
+; GFX6-GISEL-NEXT: ds_read_b64 v[8:9], v8
+; GFX6-GISEL-NEXT: v_add_i32_e32 v10, vcc, 32, v0
+; GFX6-GISEL-NEXT: v_add_i32_e32 v12, vcc, 40, v0
+; GFX6-GISEL-NEXT: v_add_i32_e32 v14, vcc, 48, v0
+; GFX6-GISEL-NEXT: v_add_i32_e32 v0, vcc, 56, v0
+; GFX6-GISEL-NEXT: ds_read_b64 v[10:11], v10
+; GFX6-GISEL-NEXT: ds_read_b64 v[12:13], v12
+; GFX6-GISEL-NEXT: ds_read_b64 v[14:15], v14
+; GFX6-GISEL-NEXT: ds_read_b64 v[16:17], v0
+; GFX6-GISEL-NEXT: v_add_i32_e32 v0, vcc, 8, v1
+; GFX6-GISEL-NEXT: s_waitcnt lgkmcnt(6)
+; GFX6-GISEL-NEXT: ds_write_b64 v0, v[4:5]
+; GFX6-GISEL-NEXT: v_add_i32_e32 v0, vcc, 16, v1
+; GFX6-GISEL-NEXT: s_waitcnt lgkmcnt(6)
+; GFX6-GISEL-NEXT: ds_write_b64 v0, v[6:7]
+; GFX6-GISEL-NEXT: v_add_i32_e32 v0, vcc, 24, v1
+; GFX6-GISEL-NEXT: s_waitcnt lgkmcnt(6)
+; GFX6-GISEL-NEXT: ds_write_b64 v0, v[8:9]
+; GFX6-GISEL-NEXT: v_add_i32_e32 v0, vcc, 32, v1
+; GFX6-GISEL-NEXT: s_waitcnt lgkmcnt(6)
+; GFX6-GISEL-NEXT: ds_write_b64 v0, v[10:11]
+; GFX6-GISEL-NEXT: v_add_i32_e32 v0, vcc, 40, v1
+; GFX6-GISEL-NEXT: s_waitcnt lgkmcnt(6)
+; GFX6-GISEL-NEXT: ds_write_b64 v0, v[12:13]
+; GFX6-GISEL-NEXT: v_add_i32_e32 v0, vcc, 48, v1
+; GFX6-GISEL-NEXT: s_waitcnt lgkmcnt(6)
+; GFX6-GISEL-NEXT: ds_write_b64 v0, v[14:15]
+; GFX6-GISEL-NEXT: v_add_i32_e32 v0, vcc, 56, v1
+; GFX6-GISEL-NEXT: ds_write_b64 v1, v[2:3]
+; GFX6-GISEL-NEXT: s_waitcnt lgkmcnt(7)
+; GFX6-GISEL-NEXT: ds_write_b64 v0, v[16:17]
+; GFX6-GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-SDAG-LABEL: freeze_v16p3:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: s_mov_b32 m0, -1
+; GFX7-SDAG-NEXT: ds_read_b128 v[2:5], v0 offset:32
+; GFX7-SDAG-NEXT: ds_read_b128 v[6:9], v0 offset:48
+; GFX7-SDAG-NEXT: ds_read_b128 v[10:13], v0
+; GFX7-SDAG-NEXT: ds_read_b128 v[14:17], v0 offset:16
+; GFX7-SDAG-NEXT: s_waitcnt lgkmcnt(3)
+; GFX7-SDAG-NEXT: ds_write_b128 v1, v[2:5] offset:32
+; GFX7-SDAG-NEXT: s_waitcnt lgkmcnt(3)
+; GFX7-SDAG-NEXT: ds_write_b128 v1, v[6:9] offset:48
+; GFX7-SDAG-NEXT: s_waitcnt lgkmcnt(3)
+; GFX7-SDAG-NEXT: ds_write_b128 v1, v[10:13]
+; GFX7-SDAG-NEXT: s_waitcnt lgkmcnt(3)
+; GFX7-SDAG-NEXT: ds_write_b128 v1, v[14:17] offset:16
+; GFX7-SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: freeze_v16p3:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: s_mov_b32 m0, -1
+; GFX7-GISEL-NEXT: ds_read_b128 v[2:5], v0
+; GFX7-GISEL-NEXT: ds_read_b128 v[6:9], v0 offset:16
+; GFX7-GISEL-NEXT: ds_read_b128 v[10:13], v0 offset:32
+; GFX7-GISEL-NEXT: ds_read_b128 v[14:17], v0 offset:48
+; GFX7-GISEL-NEXT: s_waitcnt lgkmcnt(3)
+; GFX7-GISEL-NEXT: ds_write_b128 v1, v[2:5]
+; GFX7-GISEL-NEXT: s_waitcnt lgkmcnt(3)
+; GFX7-GISEL-NEXT: ds_write_b128 v1, v[6:9] offset:16
+; GFX7-GISEL-NEXT: s_waitcnt lgkmcnt(3)
+; GFX7-GISEL-NEXT: ds_write_b128 v1, v[10:13] offset:32
+; GFX7-GISEL-NEXT: s_waitcnt lgkmcnt(3)
+; GFX7-GISEL-NEXT: ds_write_b128 v1, v[14:17] offset:48
+; GFX7-GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-GISEL-LABEL: freeze_v16p3:
+; GFX8-GISEL: ; %bb.0:
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-GISEL-NEXT: s_mov_b32 m0, -1
+; GFX8-GISEL-NEXT: ds_read_b128 v[2:5], v0
+; GFX8-GISEL-NEXT: ds_read_b128 v[6:9], v0 offset:16
+; GFX8-GISEL-NEXT: ds_read_b128 v[10:13], v0 offset:32
+; GFX8-GISEL-NEXT: ds_read_b128 v[14:17], v0 offset:48
+; GFX8-GISEL-NEXT: s_waitcnt lgkmcnt(3)
+; GFX8-GISEL-NEXT: ds_write_b128 v1, v[2:5]
+; GFX8-GISEL-NEXT: s_waitcnt lgkmcnt(3)
+; GFX8-GISEL-NEXT: ds_write_b128 v1, v[6:9] offset:16
+; GFX8-GISEL-NEXT: s_waitcnt lgkmcnt(3)
+; GFX8-GISEL-NEXT: ds_write_b128 v1, v[10:13] offset:32
+; GFX8-GISEL-NEXT: s_waitcnt lgkmcnt(3)
+; GFX8-GISEL-NEXT: ds_write_b128 v1, v[14:17] offset:48
+; GFX8-GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GISEL-LABEL: freeze_v16p3:
+; GFX9-GISEL: ; %bb.0:
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GISEL-NEXT: ds_read_b128 v[2:5], v0
+; GFX9-GISEL-NEXT: ds_read_b128 v[6:9], v0 offset:16
+; GFX9-GISEL-NEXT: ds_read_b128 v[10:13], v0 offset:32
+; GFX9-GISEL-NEXT: ds_read_b128 v[14:17], v0 offset:48
+; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(3)
+; GFX9-GISEL-NEXT: ds_write_b128 v1, v[2:5]
+; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(3)
+; GFX9-GISEL-NEXT: ds_write_b128 v1, v[6:9] offset:16
+; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(3)
+; GFX9-GISEL-NEXT: ds_write_b128 v1, v[10:13] offset:32
+; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(3)
+; GFX9-GISEL-NEXT: ds_write_b128 v1, v[14:17] offset:48
+; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX10-SDAG-LABEL: freeze_v16p3:
; GFX10-SDAG: ; %bb.0:
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -3546,6 +10728,42 @@ define void @freeze_v16p3(ptr addrspace(3) %ptra, ptr addrspace(3) %ptrb) {
}
define void @freeze_p5(ptr addrspace(5) %ptra, ptr addrspace(5) %ptrb) {
+; GFX6-LABEL: freeze_p5:
+; GFX6: ; %bb.0:
+; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen
+; GFX6-NEXT: s_waitcnt vmcnt(0)
+; GFX6-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen
+; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
+; GFX6-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-LABEL: freeze_p5:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: freeze_p5:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen
+; GFX8-NEXT: s_waitcnt vmcnt(0)
+; GFX8-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen
+; GFX8-NEXT: s_waitcnt vmcnt(0)
+; GFX8-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: freeze_p5:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
; GFX10-LABEL: freeze_p5:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -3568,6 +10786,88 @@ define void @freeze_p5(ptr addrspace(5) %ptra, ptr addrspace(5) %ptrb) {
}
define void @freeze_v2p5(ptr addrspace(5) %ptra, ptr addrspace(5) %ptrb) {
+; GFX6-SDAG-LABEL: freeze_v2p5:
+; GFX6-SDAG: ; %bb.0:
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-SDAG-NEXT: v_add_i32_e32 v2, vcc, 4, v0
+; GFX6-SDAG-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen
+; GFX6-SDAG-NEXT: buffer_load_dword v2, v2, s[0:3], 0 offen
+; GFX6-SDAG-NEXT: v_add_i32_e32 v3, vcc, 4, v1
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(1)
+; GFX6-SDAG-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(1)
+; GFX6-SDAG-NEXT: buffer_store_dword v2, v3, s[0:3], 0 offen
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0)
+; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX6-GISEL-LABEL: freeze_v2p5:
+; GFX6-GISEL: ; %bb.0:
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-GISEL-NEXT: buffer_load_dword v2, v0, s[0:3], 0 offen
+; GFX6-GISEL-NEXT: v_add_i32_e32 v0, vcc, 4, v0
+; GFX6-GISEL-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(1)
+; GFX6-GISEL-NEXT: buffer_store_dword v2, v1, s[0:3], 0 offen
+; GFX6-GISEL-NEXT: v_add_i32_e32 v1, vcc, 4, v1
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(1)
+; GFX6-GISEL-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0)
+; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-SDAG-LABEL: freeze_v2p5:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: v_add_i32_e32 v2, vcc, 4, v0
+; GFX7-SDAG-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen
+; GFX7-SDAG-NEXT: buffer_load_dword v2, v2, s[0:3], 0 offen
+; GFX7-SDAG-NEXT: v_add_i32_e32 v3, vcc, 4, v1
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(1)
+; GFX7-SDAG-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(1)
+; GFX7-SDAG-NEXT: buffer_store_dword v2, v3, s[0:3], 0 offen
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: freeze_v2p5:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: buffer_load_dword v2, v0, s[0:3], 0 offen
+; GFX7-GISEL-NEXT: v_add_i32_e32 v0, vcc, 4, v0
+; GFX7-GISEL-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(1)
+; GFX7-GISEL-NEXT: buffer_store_dword v2, v1, s[0:3], 0 offen
+; GFX7-GISEL-NEXT: v_add_i32_e32 v1, vcc, 4, v1
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(1)
+; GFX7-GISEL-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-GISEL-LABEL: freeze_v2p5:
+; GFX8-GISEL: ; %bb.0:
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-GISEL-NEXT: buffer_load_dword v2, v0, s[0:3], 0 offen
+; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 4, v0
+; GFX8-GISEL-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(1)
+; GFX8-GISEL-NEXT: buffer_store_dword v2, v1, s[0:3], 0 offen
+; GFX8-GISEL-NEXT: v_add_u32_e32 v1, vcc, 4, v1
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(1)
+; GFX8-GISEL-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: freeze_v2p5:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: buffer_load_dword v2, v0, s[0:3], 0 offen
+; GFX9-NEXT: buffer_load_dword v3, v0, s[0:3], 0 offen offset:4
+; GFX9-NEXT: s_waitcnt vmcnt(1)
+; GFX9-NEXT: buffer_store_dword v2, v1, s[0:3], 0 offen
+; GFX9-NEXT: s_waitcnt vmcnt(1)
+; GFX9-NEXT: buffer_store_dword v3, v1, s[0:3], 0 offen offset:4
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
; GFX10-LABEL: freeze_v2p5:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -3594,6 +10894,114 @@ define void @freeze_v2p5(ptr addrspace(5) %ptra, ptr addrspace(5) %ptrb) {
}
define void @freeze_v3p5(ptr addrspace(5) %ptra, ptr addrspace(5) %ptrb) {
+; GFX6-SDAG-LABEL: freeze_v3p5:
+; GFX6-SDAG: ; %bb.0:
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-SDAG-NEXT: v_add_i32_e32 v2, vcc, 4, v0
+; GFX6-SDAG-NEXT: buffer_load_dword v2, v2, s[0:3], 0 offen
+; GFX6-SDAG-NEXT: buffer_load_dword v3, v0, s[0:3], 0 offen
+; GFX6-SDAG-NEXT: v_add_i32_e32 v0, vcc, 8, v0
+; GFX6-SDAG-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen
+; GFX6-SDAG-NEXT: v_add_i32_e32 v4, vcc, 4, v1
+; GFX6-SDAG-NEXT: v_add_i32_e32 v5, vcc, 8, v1
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(1)
+; GFX6-SDAG-NEXT: buffer_store_dword v3, v1, s[0:3], 0 offen
+; GFX6-SDAG-NEXT: buffer_store_dword v2, v4, s[0:3], 0 offen
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(2)
+; GFX6-SDAG-NEXT: buffer_store_dword v0, v5, s[0:3], 0 offen
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0)
+; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX6-GISEL-LABEL: freeze_v3p5:
+; GFX6-GISEL: ; %bb.0:
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-GISEL-NEXT: v_add_i32_e32 v2, vcc, 4, v0
+; GFX6-GISEL-NEXT: v_add_i32_e32 v3, vcc, 8, v0
+; GFX6-GISEL-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen
+; GFX6-GISEL-NEXT: buffer_load_dword v2, v2, s[0:3], 0 offen
+; GFX6-GISEL-NEXT: buffer_load_dword v3, v3, s[0:3], 0 offen
+; GFX6-GISEL-NEXT: v_add_i32_e32 v4, vcc, 4, v1
+; GFX6-GISEL-NEXT: v_add_i32_e32 v5, vcc, 8, v1
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(2)
+; GFX6-GISEL-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(2)
+; GFX6-GISEL-NEXT: buffer_store_dword v2, v4, s[0:3], 0 offen
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(2)
+; GFX6-GISEL-NEXT: buffer_store_dword v3, v5, s[0:3], 0 offen
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0)
+; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-SDAG-LABEL: freeze_v3p5:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: v_add_i32_e32 v2, vcc, 4, v0
+; GFX7-SDAG-NEXT: buffer_load_dword v2, v2, s[0:3], 0 offen
+; GFX7-SDAG-NEXT: buffer_load_dword v3, v0, s[0:3], 0 offen
+; GFX7-SDAG-NEXT: v_add_i32_e32 v0, vcc, 8, v0
+; GFX7-SDAG-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen
+; GFX7-SDAG-NEXT: v_add_i32_e32 v4, vcc, 4, v1
+; GFX7-SDAG-NEXT: v_add_i32_e32 v5, vcc, 8, v1
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(1)
+; GFX7-SDAG-NEXT: buffer_store_dword v3, v1, s[0:3], 0 offen
+; GFX7-SDAG-NEXT: buffer_store_dword v2, v4, s[0:3], 0 offen
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(2)
+; GFX7-SDAG-NEXT: buffer_store_dword v0, v5, s[0:3], 0 offen
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: freeze_v3p5:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: v_add_i32_e32 v2, vcc, 4, v0
+; GFX7-GISEL-NEXT: v_add_i32_e32 v3, vcc, 8, v0
+; GFX7-GISEL-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen
+; GFX7-GISEL-NEXT: buffer_load_dword v2, v2, s[0:3], 0 offen
+; GFX7-GISEL-NEXT: buffer_load_dword v3, v3, s[0:3], 0 offen
+; GFX7-GISEL-NEXT: v_add_i32_e32 v4, vcc, 4, v1
+; GFX7-GISEL-NEXT: v_add_i32_e32 v5, vcc, 8, v1
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(2)
+; GFX7-GISEL-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(2)
+; GFX7-GISEL-NEXT: buffer_store_dword v2, v4, s[0:3], 0 offen
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(2)
+; GFX7-GISEL-NEXT: buffer_store_dword v3, v5, s[0:3], 0 offen
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-GISEL-LABEL: freeze_v3p5:
+; GFX8-GISEL: ; %bb.0:
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-GISEL-NEXT: v_add_u32_e32 v2, vcc, 4, v0
+; GFX8-GISEL-NEXT: v_add_u32_e32 v3, vcc, 8, v0
+; GFX8-GISEL-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen
+; GFX8-GISEL-NEXT: buffer_load_dword v2, v2, s[0:3], 0 offen
+; GFX8-GISEL-NEXT: buffer_load_dword v3, v3, s[0:3], 0 offen
+; GFX8-GISEL-NEXT: v_add_u32_e32 v4, vcc, 4, v1
+; GFX8-GISEL-NEXT: v_add_u32_e32 v5, vcc, 8, v1
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(2)
+; GFX8-GISEL-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(2)
+; GFX8-GISEL-NEXT: buffer_store_dword v2, v4, s[0:3], 0 offen
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(2)
+; GFX8-GISEL-NEXT: buffer_store_dword v3, v5, s[0:3], 0 offen
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: freeze_v3p5:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: buffer_load_dword v2, v0, s[0:3], 0 offen
+; GFX9-NEXT: buffer_load_dword v3, v0, s[0:3], 0 offen offset:4
+; GFX9-NEXT: buffer_load_dword v4, v0, s[0:3], 0 offen offset:8
+; GFX9-NEXT: s_waitcnt vmcnt(2)
+; GFX9-NEXT: buffer_store_dword v2, v1, s[0:3], 0 offen
+; GFX9-NEXT: s_waitcnt vmcnt(2)
+; GFX9-NEXT: buffer_store_dword v3, v1, s[0:3], 0 offen offset:4
+; GFX9-NEXT: s_waitcnt vmcnt(2)
+; GFX9-NEXT: buffer_store_dword v4, v1, s[0:3], 0 offen offset:8
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
; GFX10-LABEL: freeze_v3p5:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -3623,6 +11031,140 @@ define void @freeze_v3p5(ptr addrspace(5) %ptra, ptr addrspace(5) %ptrb) {
}
define void @freeze_v4p5(ptr addrspace(5) %ptra, ptr addrspace(5) %ptrb) {
+; GFX6-SDAG-LABEL: freeze_v4p5:
+; GFX6-SDAG: ; %bb.0:
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-SDAG-NEXT: v_add_i32_e32 v2, vcc, 8, v0
+; GFX6-SDAG-NEXT: v_add_i32_e32 v3, vcc, 4, v0
+; GFX6-SDAG-NEXT: buffer_load_dword v2, v2, s[0:3], 0 offen
+; GFX6-SDAG-NEXT: buffer_load_dword v3, v3, s[0:3], 0 offen
+; GFX6-SDAG-NEXT: buffer_load_dword v4, v0, s[0:3], 0 offen
+; GFX6-SDAG-NEXT: v_add_i32_e32 v0, vcc, 12, v0
+; GFX6-SDAG-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen
+; GFX6-SDAG-NEXT: v_add_i32_e32 v5, vcc, 4, v1
+; GFX6-SDAG-NEXT: v_add_i32_e32 v6, vcc, 8, v1
+; GFX6-SDAG-NEXT: v_add_i32_e32 v7, vcc, 12, v1
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(1)
+; GFX6-SDAG-NEXT: buffer_store_dword v4, v1, s[0:3], 0 offen
+; GFX6-SDAG-NEXT: buffer_store_dword v3, v5, s[0:3], 0 offen
+; GFX6-SDAG-NEXT: buffer_store_dword v2, v6, s[0:3], 0 offen
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(3)
+; GFX6-SDAG-NEXT: buffer_store_dword v0, v7, s[0:3], 0 offen
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0)
+; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX6-GISEL-LABEL: freeze_v4p5:
+; GFX6-GISEL: ; %bb.0:
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-GISEL-NEXT: v_add_i32_e32 v2, vcc, 4, v0
+; GFX6-GISEL-NEXT: v_add_i32_e32 v3, vcc, 8, v0
+; GFX6-GISEL-NEXT: v_add_i32_e32 v4, vcc, 12, v0
+; GFX6-GISEL-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen
+; GFX6-GISEL-NEXT: buffer_load_dword v2, v2, s[0:3], 0 offen
+; GFX6-GISEL-NEXT: buffer_load_dword v3, v3, s[0:3], 0 offen
+; GFX6-GISEL-NEXT: buffer_load_dword v4, v4, s[0:3], 0 offen
+; GFX6-GISEL-NEXT: v_add_i32_e32 v5, vcc, 4, v1
+; GFX6-GISEL-NEXT: v_add_i32_e32 v6, vcc, 8, v1
+; GFX6-GISEL-NEXT: v_add_i32_e32 v7, vcc, 12, v1
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(3)
+; GFX6-GISEL-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(3)
+; GFX6-GISEL-NEXT: buffer_store_dword v2, v5, s[0:3], 0 offen
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(3)
+; GFX6-GISEL-NEXT: buffer_store_dword v3, v6, s[0:3], 0 offen
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(3)
+; GFX6-GISEL-NEXT: buffer_store_dword v4, v7, s[0:3], 0 offen
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0)
+; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-SDAG-LABEL: freeze_v4p5:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: v_add_i32_e32 v2, vcc, 8, v0
+; GFX7-SDAG-NEXT: v_add_i32_e32 v3, vcc, 4, v0
+; GFX7-SDAG-NEXT: buffer_load_dword v2, v2, s[0:3], 0 offen
+; GFX7-SDAG-NEXT: buffer_load_dword v3, v3, s[0:3], 0 offen
+; GFX7-SDAG-NEXT: buffer_load_dword v4, v0, s[0:3], 0 offen
+; GFX7-SDAG-NEXT: v_add_i32_e32 v0, vcc, 12, v0
+; GFX7-SDAG-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen
+; GFX7-SDAG-NEXT: v_add_i32_e32 v5, vcc, 4, v1
+; GFX7-SDAG-NEXT: v_add_i32_e32 v6, vcc, 8, v1
+; GFX7-SDAG-NEXT: v_add_i32_e32 v7, vcc, 12, v1
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(1)
+; GFX7-SDAG-NEXT: buffer_store_dword v4, v1, s[0:3], 0 offen
+; GFX7-SDAG-NEXT: buffer_store_dword v3, v5, s[0:3], 0 offen
+; GFX7-SDAG-NEXT: buffer_store_dword v2, v6, s[0:3], 0 offen
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(3)
+; GFX7-SDAG-NEXT: buffer_store_dword v0, v7, s[0:3], 0 offen
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: freeze_v4p5:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: v_add_i32_e32 v2, vcc, 4, v0
+; GFX7-GISEL-NEXT: v_add_i32_e32 v3, vcc, 8, v0
+; GFX7-GISEL-NEXT: v_add_i32_e32 v4, vcc, 12, v0
+; GFX7-GISEL-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen
+; GFX7-GISEL-NEXT: buffer_load_dword v2, v2, s[0:3], 0 offen
+; GFX7-GISEL-NEXT: buffer_load_dword v3, v3, s[0:3], 0 offen
+; GFX7-GISEL-NEXT: buffer_load_dword v4, v4, s[0:3], 0 offen
+; GFX7-GISEL-NEXT: v_add_i32_e32 v5, vcc, 4, v1
+; GFX7-GISEL-NEXT: v_add_i32_e32 v6, vcc, 8, v1
+; GFX7-GISEL-NEXT: v_add_i32_e32 v7, vcc, 12, v1
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(3)
+; GFX7-GISEL-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(3)
+; GFX7-GISEL-NEXT: buffer_store_dword v2, v5, s[0:3], 0 offen
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(3)
+; GFX7-GISEL-NEXT: buffer_store_dword v3, v6, s[0:3], 0 offen
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(3)
+; GFX7-GISEL-NEXT: buffer_store_dword v4, v7, s[0:3], 0 offen
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-GISEL-LABEL: freeze_v4p5:
+; GFX8-GISEL: ; %bb.0:
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-GISEL-NEXT: v_add_u32_e32 v2, vcc, 4, v0
+; GFX8-GISEL-NEXT: v_add_u32_e32 v3, vcc, 8, v0
+; GFX8-GISEL-NEXT: v_add_u32_e32 v4, vcc, 12, v0
+; GFX8-GISEL-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen
+; GFX8-GISEL-NEXT: buffer_load_dword v2, v2, s[0:3], 0 offen
+; GFX8-GISEL-NEXT: buffer_load_dword v3, v3, s[0:3], 0 offen
+; GFX8-GISEL-NEXT: buffer_load_dword v4, v4, s[0:3], 0 offen
+; GFX8-GISEL-NEXT: v_add_u32_e32 v5, vcc, 4, v1
+; GFX8-GISEL-NEXT: v_add_u32_e32 v6, vcc, 8, v1
+; GFX8-GISEL-NEXT: v_add_u32_e32 v7, vcc, 12, v1
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(3)
+; GFX8-GISEL-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(3)
+; GFX8-GISEL-NEXT: buffer_store_dword v2, v5, s[0:3], 0 offen
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(3)
+; GFX8-GISEL-NEXT: buffer_store_dword v3, v6, s[0:3], 0 offen
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(3)
+; GFX8-GISEL-NEXT: buffer_store_dword v4, v7, s[0:3], 0 offen
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: freeze_v4p5:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: buffer_load_dword v2, v0, s[0:3], 0 offen
+; GFX9-NEXT: buffer_load_dword v3, v0, s[0:3], 0 offen offset:4
+; GFX9-NEXT: buffer_load_dword v4, v0, s[0:3], 0 offen offset:8
+; GFX9-NEXT: buffer_load_dword v5, v0, s[0:3], 0 offen offset:12
+; GFX9-NEXT: s_waitcnt vmcnt(3)
+; GFX9-NEXT: buffer_store_dword v2, v1, s[0:3], 0 offen
+; GFX9-NEXT: s_waitcnt vmcnt(3)
+; GFX9-NEXT: buffer_store_dword v3, v1, s[0:3], 0 offen offset:4
+; GFX9-NEXT: s_waitcnt vmcnt(3)
+; GFX9-NEXT: buffer_store_dword v4, v1, s[0:3], 0 offen offset:8
+; GFX9-NEXT: s_waitcnt vmcnt(3)
+; GFX9-NEXT: buffer_store_dword v5, v1, s[0:3], 0 offen offset:12
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
; GFX10-LABEL: freeze_v4p5:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -3655,6 +11197,244 @@ define void @freeze_v4p5(ptr addrspace(5) %ptra, ptr addrspace(5) %ptrb) {
}
define void @freeze_v8p5(ptr addrspace(5) %ptra, ptr addrspace(5) %ptrb) {
+; GFX6-SDAG-LABEL: freeze_v8p5:
+; GFX6-SDAG: ; %bb.0:
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-SDAG-NEXT: v_add_i32_e32 v2, vcc, 24, v0
+; GFX6-SDAG-NEXT: v_add_i32_e32 v3, vcc, 20, v0
+; GFX6-SDAG-NEXT: v_add_i32_e32 v4, vcc, 16, v0
+; GFX6-SDAG-NEXT: v_add_i32_e32 v5, vcc, 12, v0
+; GFX6-SDAG-NEXT: v_add_i32_e32 v6, vcc, 8, v0
+; GFX6-SDAG-NEXT: v_add_i32_e32 v7, vcc, 4, v0
+; GFX6-SDAG-NEXT: buffer_load_dword v2, v2, s[0:3], 0 offen
+; GFX6-SDAG-NEXT: buffer_load_dword v3, v3, s[0:3], 0 offen
+; GFX6-SDAG-NEXT: buffer_load_dword v4, v4, s[0:3], 0 offen
+; GFX6-SDAG-NEXT: buffer_load_dword v5, v5, s[0:3], 0 offen
+; GFX6-SDAG-NEXT: buffer_load_dword v6, v6, s[0:3], 0 offen
+; GFX6-SDAG-NEXT: buffer_load_dword v7, v7, s[0:3], 0 offen
+; GFX6-SDAG-NEXT: buffer_load_dword v8, v0, s[0:3], 0 offen
+; GFX6-SDAG-NEXT: v_add_i32_e32 v0, vcc, 28, v0
+; GFX6-SDAG-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen
+; GFX6-SDAG-NEXT: v_add_i32_e32 v9, vcc, 4, v1
+; GFX6-SDAG-NEXT: v_add_i32_e32 v10, vcc, 8, v1
+; GFX6-SDAG-NEXT: v_add_i32_e32 v11, vcc, 12, v1
+; GFX6-SDAG-NEXT: v_add_i32_e32 v12, vcc, 16, v1
+; GFX6-SDAG-NEXT: v_add_i32_e32 v13, vcc, 20, v1
+; GFX6-SDAG-NEXT: v_add_i32_e32 v14, vcc, 24, v1
+; GFX6-SDAG-NEXT: v_add_i32_e32 v15, vcc, 28, v1
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(1)
+; GFX6-SDAG-NEXT: buffer_store_dword v8, v1, s[0:3], 0 offen
+; GFX6-SDAG-NEXT: buffer_store_dword v7, v9, s[0:3], 0 offen
+; GFX6-SDAG-NEXT: buffer_store_dword v6, v10, s[0:3], 0 offen
+; GFX6-SDAG-NEXT: buffer_store_dword v5, v11, s[0:3], 0 offen
+; GFX6-SDAG-NEXT: buffer_store_dword v4, v12, s[0:3], 0 offen
+; GFX6-SDAG-NEXT: buffer_store_dword v3, v13, s[0:3], 0 offen
+; GFX6-SDAG-NEXT: buffer_store_dword v2, v14, s[0:3], 0 offen
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(7)
+; GFX6-SDAG-NEXT: buffer_store_dword v0, v15, s[0:3], 0 offen
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0)
+; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX6-GISEL-LABEL: freeze_v8p5:
+; GFX6-GISEL: ; %bb.0:
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-GISEL-NEXT: v_add_i32_e32 v2, vcc, 4, v0
+; GFX6-GISEL-NEXT: v_add_i32_e32 v3, vcc, 8, v0
+; GFX6-GISEL-NEXT: v_add_i32_e32 v4, vcc, 12, v0
+; GFX6-GISEL-NEXT: v_add_i32_e32 v5, vcc, 16, v0
+; GFX6-GISEL-NEXT: v_add_i32_e32 v6, vcc, 20, v0
+; GFX6-GISEL-NEXT: v_add_i32_e32 v7, vcc, 24, v0
+; GFX6-GISEL-NEXT: v_add_i32_e32 v8, vcc, 28, v0
+; GFX6-GISEL-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen
+; GFX6-GISEL-NEXT: buffer_load_dword v2, v2, s[0:3], 0 offen
+; GFX6-GISEL-NEXT: buffer_load_dword v3, v3, s[0:3], 0 offen
+; GFX6-GISEL-NEXT: buffer_load_dword v4, v4, s[0:3], 0 offen
+; GFX6-GISEL-NEXT: buffer_load_dword v5, v5, s[0:3], 0 offen
+; GFX6-GISEL-NEXT: buffer_load_dword v6, v6, s[0:3], 0 offen
+; GFX6-GISEL-NEXT: buffer_load_dword v7, v7, s[0:3], 0 offen
+; GFX6-GISEL-NEXT: buffer_load_dword v8, v8, s[0:3], 0 offen
+; GFX6-GISEL-NEXT: v_add_i32_e32 v9, vcc, 4, v1
+; GFX6-GISEL-NEXT: v_add_i32_e32 v10, vcc, 8, v1
+; GFX6-GISEL-NEXT: v_add_i32_e32 v11, vcc, 12, v1
+; GFX6-GISEL-NEXT: v_add_i32_e32 v12, vcc, 16, v1
+; GFX6-GISEL-NEXT: v_add_i32_e32 v13, vcc, 20, v1
+; GFX6-GISEL-NEXT: v_add_i32_e32 v14, vcc, 24, v1
+; GFX6-GISEL-NEXT: v_add_i32_e32 v15, vcc, 28, v1
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(7)
+; GFX6-GISEL-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(7)
+; GFX6-GISEL-NEXT: buffer_store_dword v2, v9, s[0:3], 0 offen
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(7)
+; GFX6-GISEL-NEXT: buffer_store_dword v3, v10, s[0:3], 0 offen
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(7)
+; GFX6-GISEL-NEXT: buffer_store_dword v4, v11, s[0:3], 0 offen
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(7)
+; GFX6-GISEL-NEXT: buffer_store_dword v5, v12, s[0:3], 0 offen
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(7)
+; GFX6-GISEL-NEXT: buffer_store_dword v6, v13, s[0:3], 0 offen
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(7)
+; GFX6-GISEL-NEXT: buffer_store_dword v7, v14, s[0:3], 0 offen
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(7)
+; GFX6-GISEL-NEXT: buffer_store_dword v8, v15, s[0:3], 0 offen
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0)
+; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-SDAG-LABEL: freeze_v8p5:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: v_add_i32_e32 v2, vcc, 24, v0
+; GFX7-SDAG-NEXT: v_add_i32_e32 v3, vcc, 20, v0
+; GFX7-SDAG-NEXT: v_add_i32_e32 v4, vcc, 16, v0
+; GFX7-SDAG-NEXT: v_add_i32_e32 v5, vcc, 12, v0
+; GFX7-SDAG-NEXT: v_add_i32_e32 v6, vcc, 8, v0
+; GFX7-SDAG-NEXT: v_add_i32_e32 v7, vcc, 4, v0
+; GFX7-SDAG-NEXT: buffer_load_dword v2, v2, s[0:3], 0 offen
+; GFX7-SDAG-NEXT: buffer_load_dword v3, v3, s[0:3], 0 offen
+; GFX7-SDAG-NEXT: buffer_load_dword v4, v4, s[0:3], 0 offen
+; GFX7-SDAG-NEXT: buffer_load_dword v5, v5, s[0:3], 0 offen
+; GFX7-SDAG-NEXT: buffer_load_dword v6, v6, s[0:3], 0 offen
+; GFX7-SDAG-NEXT: buffer_load_dword v7, v7, s[0:3], 0 offen
+; GFX7-SDAG-NEXT: buffer_load_dword v8, v0, s[0:3], 0 offen
+; GFX7-SDAG-NEXT: v_add_i32_e32 v0, vcc, 28, v0
+; GFX7-SDAG-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen
+; GFX7-SDAG-NEXT: v_add_i32_e32 v9, vcc, 4, v1
+; GFX7-SDAG-NEXT: v_add_i32_e32 v10, vcc, 8, v1
+; GFX7-SDAG-NEXT: v_add_i32_e32 v11, vcc, 12, v1
+; GFX7-SDAG-NEXT: v_add_i32_e32 v12, vcc, 16, v1
+; GFX7-SDAG-NEXT: v_add_i32_e32 v13, vcc, 20, v1
+; GFX7-SDAG-NEXT: v_add_i32_e32 v14, vcc, 24, v1
+; GFX7-SDAG-NEXT: v_add_i32_e32 v15, vcc, 28, v1
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(1)
+; GFX7-SDAG-NEXT: buffer_store_dword v8, v1, s[0:3], 0 offen
+; GFX7-SDAG-NEXT: buffer_store_dword v7, v9, s[0:3], 0 offen
+; GFX7-SDAG-NEXT: buffer_store_dword v6, v10, s[0:3], 0 offen
+; GFX7-SDAG-NEXT: buffer_store_dword v5, v11, s[0:3], 0 offen
+; GFX7-SDAG-NEXT: buffer_store_dword v4, v12, s[0:3], 0 offen
+; GFX7-SDAG-NEXT: buffer_store_dword v3, v13, s[0:3], 0 offen
+; GFX7-SDAG-NEXT: buffer_store_dword v2, v14, s[0:3], 0 offen
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(7)
+; GFX7-SDAG-NEXT: buffer_store_dword v0, v15, s[0:3], 0 offen
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: freeze_v8p5:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: v_add_i32_e32 v2, vcc, 4, v0
+; GFX7-GISEL-NEXT: v_add_i32_e32 v3, vcc, 8, v0
+; GFX7-GISEL-NEXT: v_add_i32_e32 v4, vcc, 12, v0
+; GFX7-GISEL-NEXT: v_add_i32_e32 v5, vcc, 16, v0
+; GFX7-GISEL-NEXT: v_add_i32_e32 v6, vcc, 20, v0
+; GFX7-GISEL-NEXT: v_add_i32_e32 v7, vcc, 24, v0
+; GFX7-GISEL-NEXT: v_add_i32_e32 v8, vcc, 28, v0
+; GFX7-GISEL-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen
+; GFX7-GISEL-NEXT: buffer_load_dword v2, v2, s[0:3], 0 offen
+; GFX7-GISEL-NEXT: buffer_load_dword v3, v3, s[0:3], 0 offen
+; GFX7-GISEL-NEXT: buffer_load_dword v4, v4, s[0:3], 0 offen
+; GFX7-GISEL-NEXT: buffer_load_dword v5, v5, s[0:3], 0 offen
+; GFX7-GISEL-NEXT: buffer_load_dword v6, v6, s[0:3], 0 offen
+; GFX7-GISEL-NEXT: buffer_load_dword v7, v7, s[0:3], 0 offen
+; GFX7-GISEL-NEXT: buffer_load_dword v8, v8, s[0:3], 0 offen
+; GFX7-GISEL-NEXT: v_add_i32_e32 v9, vcc, 4, v1
+; GFX7-GISEL-NEXT: v_add_i32_e32 v10, vcc, 8, v1
+; GFX7-GISEL-NEXT: v_add_i32_e32 v11, vcc, 12, v1
+; GFX7-GISEL-NEXT: v_add_i32_e32 v12, vcc, 16, v1
+; GFX7-GISEL-NEXT: v_add_i32_e32 v13, vcc, 20, v1
+; GFX7-GISEL-NEXT: v_add_i32_e32 v14, vcc, 24, v1
+; GFX7-GISEL-NEXT: v_add_i32_e32 v15, vcc, 28, v1
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(7)
+; GFX7-GISEL-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(7)
+; GFX7-GISEL-NEXT: buffer_store_dword v2, v9, s[0:3], 0 offen
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(7)
+; GFX7-GISEL-NEXT: buffer_store_dword v3, v10, s[0:3], 0 offen
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(7)
+; GFX7-GISEL-NEXT: buffer_store_dword v4, v11, s[0:3], 0 offen
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(7)
+; GFX7-GISEL-NEXT: buffer_store_dword v5, v12, s[0:3], 0 offen
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(7)
+; GFX7-GISEL-NEXT: buffer_store_dword v6, v13, s[0:3], 0 offen
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(7)
+; GFX7-GISEL-NEXT: buffer_store_dword v7, v14, s[0:3], 0 offen
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(7)
+; GFX7-GISEL-NEXT: buffer_store_dword v8, v15, s[0:3], 0 offen
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-GISEL-LABEL: freeze_v8p5:
+; GFX8-GISEL: ; %bb.0:
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-GISEL-NEXT: v_add_u32_e32 v2, vcc, 4, v0
+; GFX8-GISEL-NEXT: v_add_u32_e32 v3, vcc, 8, v0
+; GFX8-GISEL-NEXT: v_add_u32_e32 v4, vcc, 12, v0
+; GFX8-GISEL-NEXT: v_add_u32_e32 v5, vcc, 16, v0
+; GFX8-GISEL-NEXT: v_add_u32_e32 v6, vcc, 20, v0
+; GFX8-GISEL-NEXT: v_add_u32_e32 v7, vcc, 24, v0
+; GFX8-GISEL-NEXT: v_add_u32_e32 v8, vcc, 28, v0
+; GFX8-GISEL-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen
+; GFX8-GISEL-NEXT: buffer_load_dword v2, v2, s[0:3], 0 offen
+; GFX8-GISEL-NEXT: buffer_load_dword v3, v3, s[0:3], 0 offen
+; GFX8-GISEL-NEXT: buffer_load_dword v4, v4, s[0:3], 0 offen
+; GFX8-GISEL-NEXT: buffer_load_dword v5, v5, s[0:3], 0 offen
+; GFX8-GISEL-NEXT: buffer_load_dword v6, v6, s[0:3], 0 offen
+; GFX8-GISEL-NEXT: buffer_load_dword v7, v7, s[0:3], 0 offen
+; GFX8-GISEL-NEXT: buffer_load_dword v8, v8, s[0:3], 0 offen
+; GFX8-GISEL-NEXT: v_add_u32_e32 v9, vcc, 4, v1
+; GFX8-GISEL-NEXT: v_add_u32_e32 v10, vcc, 8, v1
+; GFX8-GISEL-NEXT: v_add_u32_e32 v11, vcc, 12, v1
+; GFX8-GISEL-NEXT: v_add_u32_e32 v12, vcc, 16, v1
+; GFX8-GISEL-NEXT: v_add_u32_e32 v13, vcc, 20, v1
+; GFX8-GISEL-NEXT: v_add_u32_e32 v14, vcc, 24, v1
+; GFX8-GISEL-NEXT: v_add_u32_e32 v15, vcc, 28, v1
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(7)
+; GFX8-GISEL-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(7)
+; GFX8-GISEL-NEXT: buffer_store_dword v2, v9, s[0:3], 0 offen
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(7)
+; GFX8-GISEL-NEXT: buffer_store_dword v3, v10, s[0:3], 0 offen
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(7)
+; GFX8-GISEL-NEXT: buffer_store_dword v4, v11, s[0:3], 0 offen
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(7)
+; GFX8-GISEL-NEXT: buffer_store_dword v5, v12, s[0:3], 0 offen
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(7)
+; GFX8-GISEL-NEXT: buffer_store_dword v6, v13, s[0:3], 0 offen
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(7)
+; GFX8-GISEL-NEXT: buffer_store_dword v7, v14, s[0:3], 0 offen
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(7)
+; GFX8-GISEL-NEXT: buffer_store_dword v8, v15, s[0:3], 0 offen
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: freeze_v8p5:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: buffer_load_dword v2, v0, s[0:3], 0 offen
+; GFX9-NEXT: buffer_load_dword v3, v0, s[0:3], 0 offen offset:4
+; GFX9-NEXT: buffer_load_dword v4, v0, s[0:3], 0 offen offset:8
+; GFX9-NEXT: buffer_load_dword v5, v0, s[0:3], 0 offen offset:12
+; GFX9-NEXT: buffer_load_dword v6, v0, s[0:3], 0 offen offset:16
+; GFX9-NEXT: buffer_load_dword v7, v0, s[0:3], 0 offen offset:20
+; GFX9-NEXT: buffer_load_dword v8, v0, s[0:3], 0 offen offset:24
+; GFX9-NEXT: buffer_load_dword v9, v0, s[0:3], 0 offen offset:28
+; GFX9-NEXT: s_waitcnt vmcnt(7)
+; GFX9-NEXT: buffer_store_dword v2, v1, s[0:3], 0 offen
+; GFX9-NEXT: s_waitcnt vmcnt(7)
+; GFX9-NEXT: buffer_store_dword v3, v1, s[0:3], 0 offen offset:4
+; GFX9-NEXT: s_waitcnt vmcnt(7)
+; GFX9-NEXT: buffer_store_dword v4, v1, s[0:3], 0 offen offset:8
+; GFX9-NEXT: s_waitcnt vmcnt(7)
+; GFX9-NEXT: buffer_store_dword v5, v1, s[0:3], 0 offen offset:12
+; GFX9-NEXT: s_waitcnt vmcnt(7)
+; GFX9-NEXT: buffer_store_dword v6, v1, s[0:3], 0 offen offset:16
+; GFX9-NEXT: s_waitcnt vmcnt(7)
+; GFX9-NEXT: buffer_store_dword v7, v1, s[0:3], 0 offen offset:20
+; GFX9-NEXT: s_waitcnt vmcnt(7)
+; GFX9-NEXT: buffer_store_dword v8, v1, s[0:3], 0 offen offset:24
+; GFX9-NEXT: s_waitcnt vmcnt(7)
+; GFX9-NEXT: buffer_store_dword v9, v1, s[0:3], 0 offen offset:28
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
; GFX10-LABEL: freeze_v8p5:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -3717,6 +11497,446 @@ define void @freeze_v8p5(ptr addrspace(5) %ptra, ptr addrspace(5) %ptrb) {
}
define void @freeze_v16p5(ptr addrspace(5) %ptra, ptr addrspace(5) %ptrb) {
+; GFX6-SDAG-LABEL: freeze_v16p5:
+; GFX6-SDAG: ; %bb.0:
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-SDAG-NEXT: v_add_i32_e32 v5, vcc, 16, v0
+; GFX6-SDAG-NEXT: v_add_i32_e32 v6, vcc, 12, v0
+; GFX6-SDAG-NEXT: v_add_i32_e32 v7, vcc, 8, v0
+; GFX6-SDAG-NEXT: v_add_i32_e32 v8, vcc, 4, v0
+; GFX6-SDAG-NEXT: buffer_load_dword v5, v5, s[0:3], 0 offen
+; GFX6-SDAG-NEXT: buffer_load_dword v6, v6, s[0:3], 0 offen
+; GFX6-SDAG-NEXT: buffer_load_dword v7, v7, s[0:3], 0 offen
+; GFX6-SDAG-NEXT: buffer_load_dword v8, v8, s[0:3], 0 offen
+; GFX6-SDAG-NEXT: v_add_i32_e32 v2, vcc, 56, v0
+; GFX6-SDAG-NEXT: v_add_i32_e32 v3, vcc, 52, v0
+; GFX6-SDAG-NEXT: v_add_i32_e32 v4, vcc, 48, v0
+; GFX6-SDAG-NEXT: v_add_i32_e32 v9, vcc, 44, v0
+; GFX6-SDAG-NEXT: v_add_i32_e32 v10, vcc, 40, v0
+; GFX6-SDAG-NEXT: v_add_i32_e32 v11, vcc, 36, v0
+; GFX6-SDAG-NEXT: v_add_i32_e32 v12, vcc, 28, v0
+; GFX6-SDAG-NEXT: v_add_i32_e32 v13, vcc, 24, v0
+; GFX6-SDAG-NEXT: v_add_i32_e32 v14, vcc, 20, v0
+; GFX6-SDAG-NEXT: buffer_load_dword v2, v2, s[0:3], 0 offen
+; GFX6-SDAG-NEXT: buffer_load_dword v3, v3, s[0:3], 0 offen
+; GFX6-SDAG-NEXT: buffer_load_dword v4, v4, s[0:3], 0 offen
+; GFX6-SDAG-NEXT: buffer_load_dword v9, v9, s[0:3], 0 offen
+; GFX6-SDAG-NEXT: buffer_load_dword v10, v10, s[0:3], 0 offen
+; GFX6-SDAG-NEXT: buffer_load_dword v11, v11, s[0:3], 0 offen
+; GFX6-SDAG-NEXT: buffer_load_dword v15, v0, s[0:3], 0 offen
+; GFX6-SDAG-NEXT: buffer_load_dword v12, v12, s[0:3], 0 offen
+; GFX6-SDAG-NEXT: buffer_load_dword v13, v13, s[0:3], 0 offen
+; GFX6-SDAG-NEXT: buffer_load_dword v14, v14, s[0:3], 0 offen
+; GFX6-SDAG-NEXT: v_add_i32_e32 v16, vcc, 32, v0
+; GFX6-SDAG-NEXT: v_add_i32_e32 v0, vcc, 60, v0
+; GFX6-SDAG-NEXT: buffer_load_dword v16, v16, s[0:3], 0 offen
+; GFX6-SDAG-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen
+; GFX6-SDAG-NEXT: v_add_i32_e32 v17, vcc, 4, v1
+; GFX6-SDAG-NEXT: v_add_i32_e32 v18, vcc, 8, v1
+; GFX6-SDAG-NEXT: v_add_i32_e32 v19, vcc, 12, v1
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(14)
+; GFX6-SDAG-NEXT: buffer_store_dword v6, v19, s[0:3], 0 offen
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(14)
+; GFX6-SDAG-NEXT: buffer_store_dword v7, v18, s[0:3], 0 offen
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(14)
+; GFX6-SDAG-NEXT: buffer_store_dword v8, v17, s[0:3], 0 offen
+; GFX6-SDAG-NEXT: s_waitcnt expcnt(0)
+; GFX6-SDAG-NEXT: v_add_i32_e32 v8, vcc, 16, v1
+; GFX6-SDAG-NEXT: buffer_store_dword v5, v8, s[0:3], 0 offen
+; GFX6-SDAG-NEXT: s_waitcnt expcnt(0)
+; GFX6-SDAG-NEXT: v_add_i32_e32 v5, vcc, 40, v1
+; GFX6-SDAG-NEXT: v_add_i32_e32 v17, vcc, 20, v1
+; GFX6-SDAG-NEXT: v_add_i32_e32 v7, vcc, 24, v1
+; GFX6-SDAG-NEXT: v_add_i32_e32 v18, vcc, 28, v1
+; GFX6-SDAG-NEXT: v_add_i32_e32 v6, vcc, 32, v1
+; GFX6-SDAG-NEXT: v_add_i32_e32 v19, vcc, 36, v1
+; GFX6-SDAG-NEXT: v_add_i32_e32 v8, vcc, 44, v1
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(9)
+; GFX6-SDAG-NEXT: buffer_store_dword v15, v1, s[0:3], 0 offen
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(7)
+; GFX6-SDAG-NEXT: buffer_store_dword v14, v17, s[0:3], 0 offen
+; GFX6-SDAG-NEXT: buffer_store_dword v13, v7, s[0:3], 0 offen
+; GFX6-SDAG-NEXT: buffer_store_dword v12, v18, s[0:3], 0 offen
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(9)
+; GFX6-SDAG-NEXT: buffer_store_dword v16, v6, s[0:3], 0 offen
+; GFX6-SDAG-NEXT: buffer_store_dword v11, v19, s[0:3], 0 offen
+; GFX6-SDAG-NEXT: buffer_store_dword v10, v5, s[0:3], 0 offen
+; GFX6-SDAG-NEXT: buffer_store_dword v9, v8, s[0:3], 0 offen
+; GFX6-SDAG-NEXT: v_add_i32_e32 v5, vcc, 48, v1
+; GFX6-SDAG-NEXT: buffer_store_dword v4, v5, s[0:3], 0 offen
+; GFX6-SDAG-NEXT: s_waitcnt expcnt(0)
+; GFX6-SDAG-NEXT: v_add_i32_e32 v4, vcc, 52, v1
+; GFX6-SDAG-NEXT: buffer_store_dword v3, v4, s[0:3], 0 offen
+; GFX6-SDAG-NEXT: s_waitcnt expcnt(0)
+; GFX6-SDAG-NEXT: v_add_i32_e32 v3, vcc, 56, v1
+; GFX6-SDAG-NEXT: v_add_i32_e32 v1, vcc, 60, v1
+; GFX6-SDAG-NEXT: buffer_store_dword v2, v3, s[0:3], 0 offen
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(14)
+; GFX6-SDAG-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0)
+; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX6-GISEL-LABEL: freeze_v16p5:
+; GFX6-GISEL: ; %bb.0:
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-GISEL-NEXT: v_add_i32_e32 v2, vcc, 4, v0
+; GFX6-GISEL-NEXT: buffer_load_dword v4, v0, s[0:3], 0 offen
+; GFX6-GISEL-NEXT: buffer_load_dword v2, v2, s[0:3], 0 offen
+; GFX6-GISEL-NEXT: v_add_i32_e32 v3, vcc, 8, v0
+; GFX6-GISEL-NEXT: v_add_i32_e32 v5, vcc, 12, v0
+; GFX6-GISEL-NEXT: v_add_i32_e32 v6, vcc, 16, v0
+; GFX6-GISEL-NEXT: v_add_i32_e32 v7, vcc, 20, v0
+; GFX6-GISEL-NEXT: v_add_i32_e32 v8, vcc, 24, v0
+; GFX6-GISEL-NEXT: v_add_i32_e32 v9, vcc, 28, v0
+; GFX6-GISEL-NEXT: v_add_i32_e32 v10, vcc, 32, v0
+; GFX6-GISEL-NEXT: v_add_i32_e32 v11, vcc, 36, v0
+; GFX6-GISEL-NEXT: v_add_i32_e32 v12, vcc, 40, v0
+; GFX6-GISEL-NEXT: v_add_i32_e32 v13, vcc, 44, v0
+; GFX6-GISEL-NEXT: buffer_load_dword v3, v3, s[0:3], 0 offen
+; GFX6-GISEL-NEXT: buffer_load_dword v5, v5, s[0:3], 0 offen
+; GFX6-GISEL-NEXT: buffer_load_dword v6, v6, s[0:3], 0 offen
+; GFX6-GISEL-NEXT: buffer_load_dword v7, v7, s[0:3], 0 offen
+; GFX6-GISEL-NEXT: buffer_load_dword v8, v8, s[0:3], 0 offen
+; GFX6-GISEL-NEXT: buffer_load_dword v9, v9, s[0:3], 0 offen
+; GFX6-GISEL-NEXT: buffer_load_dword v10, v10, s[0:3], 0 offen
+; GFX6-GISEL-NEXT: buffer_load_dword v11, v11, s[0:3], 0 offen
+; GFX6-GISEL-NEXT: buffer_load_dword v12, v12, s[0:3], 0 offen
+; GFX6-GISEL-NEXT: buffer_load_dword v13, v13, s[0:3], 0 offen
+; GFX6-GISEL-NEXT: v_add_i32_e32 v14, vcc, 48, v0
+; GFX6-GISEL-NEXT: buffer_load_dword v14, v14, s[0:3], 0 offen
+; GFX6-GISEL-NEXT: v_add_i32_e32 v15, vcc, 52, v0
+; GFX6-GISEL-NEXT: buffer_load_dword v15, v15, s[0:3], 0 offen
+; GFX6-GISEL-NEXT: v_add_i32_e32 v16, vcc, 56, v0
+; GFX6-GISEL-NEXT: buffer_load_dword v16, v16, s[0:3], 0 offen
+; GFX6-GISEL-NEXT: v_add_i32_e32 v0, vcc, 60, v0
+; GFX6-GISEL-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen
+; GFX6-GISEL-NEXT: v_add_i32_e32 v17, vcc, 4, v1
+; GFX6-GISEL-NEXT: v_add_i32_e32 v18, vcc, 8, v1
+; GFX6-GISEL-NEXT: v_add_i32_e32 v19, vcc, 12, v1
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(14)
+; GFX6-GISEL-NEXT: buffer_store_dword v2, v17, s[0:3], 0 offen
+; GFX6-GISEL-NEXT: s_waitcnt expcnt(0)
+; GFX6-GISEL-NEXT: v_add_i32_e32 v2, vcc, 16, v1
+; GFX6-GISEL-NEXT: v_add_i32_e32 v17, vcc, 20, v1
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(12)
+; GFX6-GISEL-NEXT: buffer_store_dword v6, v2, s[0:3], 0 offen
+; GFX6-GISEL-NEXT: v_add_i32_e32 v2, vcc, 40, v1
+; GFX6-GISEL-NEXT: buffer_store_dword v3, v18, s[0:3], 0 offen
+; GFX6-GISEL-NEXT: s_waitcnt expcnt(0)
+; GFX6-GISEL-NEXT: v_add_i32_e32 v3, vcc, 24, v1
+; GFX6-GISEL-NEXT: v_add_i32_e32 v18, vcc, 28, v1
+; GFX6-GISEL-NEXT: buffer_store_dword v5, v19, s[0:3], 0 offen
+; GFX6-GISEL-NEXT: s_waitcnt expcnt(0)
+; GFX6-GISEL-NEXT: v_add_i32_e32 v5, vcc, 32, v1
+; GFX6-GISEL-NEXT: v_add_i32_e32 v19, vcc, 36, v1
+; GFX6-GISEL-NEXT: v_add_i32_e32 v6, vcc, 44, v1
+; GFX6-GISEL-NEXT: buffer_store_dword v4, v1, s[0:3], 0 offen
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(14)
+; GFX6-GISEL-NEXT: buffer_store_dword v7, v17, s[0:3], 0 offen
+; GFX6-GISEL-NEXT: buffer_store_dword v8, v3, s[0:3], 0 offen
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(14)
+; GFX6-GISEL-NEXT: buffer_store_dword v9, v18, s[0:3], 0 offen
+; GFX6-GISEL-NEXT: buffer_store_dword v10, v5, s[0:3], 0 offen
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(14)
+; GFX6-GISEL-NEXT: buffer_store_dword v11, v19, s[0:3], 0 offen
+; GFX6-GISEL-NEXT: buffer_store_dword v12, v2, s[0:3], 0 offen
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(14)
+; GFX6-GISEL-NEXT: buffer_store_dword v13, v6, s[0:3], 0 offen
+; GFX6-GISEL-NEXT: v_add_i32_e32 v2, vcc, 48, v1
+; GFX6-GISEL-NEXT: buffer_store_dword v14, v2, s[0:3], 0 offen
+; GFX6-GISEL-NEXT: v_add_i32_e32 v2, vcc, 52, v1
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(14)
+; GFX6-GISEL-NEXT: buffer_store_dword v15, v2, s[0:3], 0 offen
+; GFX6-GISEL-NEXT: v_add_i32_e32 v2, vcc, 56, v1
+; GFX6-GISEL-NEXT: v_add_i32_e32 v1, vcc, 60, v1
+; GFX6-GISEL-NEXT: buffer_store_dword v16, v2, s[0:3], 0 offen
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(14)
+; GFX6-GISEL-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0)
+; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-SDAG-LABEL: freeze_v16p5:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: v_add_i32_e32 v5, vcc, 16, v0
+; GFX7-SDAG-NEXT: v_add_i32_e32 v6, vcc, 12, v0
+; GFX7-SDAG-NEXT: v_add_i32_e32 v7, vcc, 8, v0
+; GFX7-SDAG-NEXT: v_add_i32_e32 v8, vcc, 4, v0
+; GFX7-SDAG-NEXT: buffer_load_dword v5, v5, s[0:3], 0 offen
+; GFX7-SDAG-NEXT: buffer_load_dword v6, v6, s[0:3], 0 offen
+; GFX7-SDAG-NEXT: buffer_load_dword v7, v7, s[0:3], 0 offen
+; GFX7-SDAG-NEXT: buffer_load_dword v8, v8, s[0:3], 0 offen
+; GFX7-SDAG-NEXT: v_add_i32_e32 v2, vcc, 56, v0
+; GFX7-SDAG-NEXT: v_add_i32_e32 v3, vcc, 52, v0
+; GFX7-SDAG-NEXT: v_add_i32_e32 v4, vcc, 48, v0
+; GFX7-SDAG-NEXT: v_add_i32_e32 v9, vcc, 44, v0
+; GFX7-SDAG-NEXT: v_add_i32_e32 v10, vcc, 40, v0
+; GFX7-SDAG-NEXT: v_add_i32_e32 v11, vcc, 36, v0
+; GFX7-SDAG-NEXT: v_add_i32_e32 v12, vcc, 28, v0
+; GFX7-SDAG-NEXT: v_add_i32_e32 v13, vcc, 24, v0
+; GFX7-SDAG-NEXT: v_add_i32_e32 v14, vcc, 20, v0
+; GFX7-SDAG-NEXT: buffer_load_dword v2, v2, s[0:3], 0 offen
+; GFX7-SDAG-NEXT: buffer_load_dword v3, v3, s[0:3], 0 offen
+; GFX7-SDAG-NEXT: buffer_load_dword v4, v4, s[0:3], 0 offen
+; GFX7-SDAG-NEXT: buffer_load_dword v9, v9, s[0:3], 0 offen
+; GFX7-SDAG-NEXT: buffer_load_dword v10, v10, s[0:3], 0 offen
+; GFX7-SDAG-NEXT: buffer_load_dword v11, v11, s[0:3], 0 offen
+; GFX7-SDAG-NEXT: buffer_load_dword v15, v0, s[0:3], 0 offen
+; GFX7-SDAG-NEXT: buffer_load_dword v12, v12, s[0:3], 0 offen
+; GFX7-SDAG-NEXT: buffer_load_dword v13, v13, s[0:3], 0 offen
+; GFX7-SDAG-NEXT: buffer_load_dword v14, v14, s[0:3], 0 offen
+; GFX7-SDAG-NEXT: v_add_i32_e32 v16, vcc, 32, v0
+; GFX7-SDAG-NEXT: v_add_i32_e32 v0, vcc, 60, v0
+; GFX7-SDAG-NEXT: buffer_load_dword v16, v16, s[0:3], 0 offen
+; GFX7-SDAG-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen
+; GFX7-SDAG-NEXT: v_add_i32_e32 v17, vcc, 4, v1
+; GFX7-SDAG-NEXT: v_add_i32_e32 v18, vcc, 8, v1
+; GFX7-SDAG-NEXT: v_add_i32_e32 v19, vcc, 12, v1
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(14)
+; GFX7-SDAG-NEXT: buffer_store_dword v6, v19, s[0:3], 0 offen
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(14)
+; GFX7-SDAG-NEXT: buffer_store_dword v7, v18, s[0:3], 0 offen
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(14)
+; GFX7-SDAG-NEXT: buffer_store_dword v8, v17, s[0:3], 0 offen
+; GFX7-SDAG-NEXT: v_add_i32_e32 v8, vcc, 16, v1
+; GFX7-SDAG-NEXT: buffer_store_dword v5, v8, s[0:3], 0 offen
+; GFX7-SDAG-NEXT: v_add_i32_e32 v5, vcc, 40, v1
+; GFX7-SDAG-NEXT: v_add_i32_e32 v17, vcc, 20, v1
+; GFX7-SDAG-NEXT: v_add_i32_e32 v7, vcc, 24, v1
+; GFX7-SDAG-NEXT: v_add_i32_e32 v18, vcc, 28, v1
+; GFX7-SDAG-NEXT: v_add_i32_e32 v6, vcc, 32, v1
+; GFX7-SDAG-NEXT: v_add_i32_e32 v19, vcc, 36, v1
+; GFX7-SDAG-NEXT: v_add_i32_e32 v8, vcc, 44, v1
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(9)
+; GFX7-SDAG-NEXT: buffer_store_dword v15, v1, s[0:3], 0 offen
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(7)
+; GFX7-SDAG-NEXT: buffer_store_dword v14, v17, s[0:3], 0 offen
+; GFX7-SDAG-NEXT: buffer_store_dword v13, v7, s[0:3], 0 offen
+; GFX7-SDAG-NEXT: buffer_store_dword v12, v18, s[0:3], 0 offen
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(9)
+; GFX7-SDAG-NEXT: buffer_store_dword v16, v6, s[0:3], 0 offen
+; GFX7-SDAG-NEXT: buffer_store_dword v11, v19, s[0:3], 0 offen
+; GFX7-SDAG-NEXT: buffer_store_dword v10, v5, s[0:3], 0 offen
+; GFX7-SDAG-NEXT: buffer_store_dword v9, v8, s[0:3], 0 offen
+; GFX7-SDAG-NEXT: v_add_i32_e32 v5, vcc, 48, v1
+; GFX7-SDAG-NEXT: buffer_store_dword v4, v5, s[0:3], 0 offen
+; GFX7-SDAG-NEXT: v_add_i32_e32 v4, vcc, 52, v1
+; GFX7-SDAG-NEXT: buffer_store_dword v3, v4, s[0:3], 0 offen
+; GFX7-SDAG-NEXT: v_add_i32_e32 v3, vcc, 56, v1
+; GFX7-SDAG-NEXT: v_add_i32_e32 v1, vcc, 60, v1
+; GFX7-SDAG-NEXT: buffer_store_dword v2, v3, s[0:3], 0 offen
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(14)
+; GFX7-SDAG-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: freeze_v16p5:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: v_add_i32_e32 v2, vcc, 4, v0
+; GFX7-GISEL-NEXT: buffer_load_dword v4, v0, s[0:3], 0 offen
+; GFX7-GISEL-NEXT: buffer_load_dword v2, v2, s[0:3], 0 offen
+; GFX7-GISEL-NEXT: v_add_i32_e32 v3, vcc, 8, v0
+; GFX7-GISEL-NEXT: v_add_i32_e32 v5, vcc, 12, v0
+; GFX7-GISEL-NEXT: v_add_i32_e32 v6, vcc, 16, v0
+; GFX7-GISEL-NEXT: v_add_i32_e32 v7, vcc, 20, v0
+; GFX7-GISEL-NEXT: v_add_i32_e32 v8, vcc, 24, v0
+; GFX7-GISEL-NEXT: v_add_i32_e32 v9, vcc, 28, v0
+; GFX7-GISEL-NEXT: v_add_i32_e32 v10, vcc, 32, v0
+; GFX7-GISEL-NEXT: v_add_i32_e32 v11, vcc, 36, v0
+; GFX7-GISEL-NEXT: v_add_i32_e32 v12, vcc, 40, v0
+; GFX7-GISEL-NEXT: v_add_i32_e32 v13, vcc, 44, v0
+; GFX7-GISEL-NEXT: buffer_load_dword v3, v3, s[0:3], 0 offen
+; GFX7-GISEL-NEXT: buffer_load_dword v5, v5, s[0:3], 0 offen
+; GFX7-GISEL-NEXT: buffer_load_dword v6, v6, s[0:3], 0 offen
+; GFX7-GISEL-NEXT: buffer_load_dword v7, v7, s[0:3], 0 offen
+; GFX7-GISEL-NEXT: buffer_load_dword v8, v8, s[0:3], 0 offen
+; GFX7-GISEL-NEXT: buffer_load_dword v9, v9, s[0:3], 0 offen
+; GFX7-GISEL-NEXT: buffer_load_dword v10, v10, s[0:3], 0 offen
+; GFX7-GISEL-NEXT: buffer_load_dword v11, v11, s[0:3], 0 offen
+; GFX7-GISEL-NEXT: buffer_load_dword v12, v12, s[0:3], 0 offen
+; GFX7-GISEL-NEXT: buffer_load_dword v13, v13, s[0:3], 0 offen
+; GFX7-GISEL-NEXT: v_add_i32_e32 v14, vcc, 48, v0
+; GFX7-GISEL-NEXT: buffer_load_dword v14, v14, s[0:3], 0 offen
+; GFX7-GISEL-NEXT: v_add_i32_e32 v15, vcc, 52, v0
+; GFX7-GISEL-NEXT: buffer_load_dword v15, v15, s[0:3], 0 offen
+; GFX7-GISEL-NEXT: v_add_i32_e32 v16, vcc, 56, v0
+; GFX7-GISEL-NEXT: buffer_load_dword v16, v16, s[0:3], 0 offen
+; GFX7-GISEL-NEXT: v_add_i32_e32 v0, vcc, 60, v0
+; GFX7-GISEL-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen
+; GFX7-GISEL-NEXT: v_add_i32_e32 v17, vcc, 4, v1
+; GFX7-GISEL-NEXT: v_add_i32_e32 v18, vcc, 8, v1
+; GFX7-GISEL-NEXT: v_add_i32_e32 v19, vcc, 12, v1
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(14)
+; GFX7-GISEL-NEXT: buffer_store_dword v2, v17, s[0:3], 0 offen
+; GFX7-GISEL-NEXT: v_add_i32_e32 v2, vcc, 16, v1
+; GFX7-GISEL-NEXT: v_add_i32_e32 v17, vcc, 20, v1
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(12)
+; GFX7-GISEL-NEXT: buffer_store_dword v6, v2, s[0:3], 0 offen
+; GFX7-GISEL-NEXT: v_add_i32_e32 v2, vcc, 40, v1
+; GFX7-GISEL-NEXT: buffer_store_dword v3, v18, s[0:3], 0 offen
+; GFX7-GISEL-NEXT: v_add_i32_e32 v3, vcc, 24, v1
+; GFX7-GISEL-NEXT: v_add_i32_e32 v18, vcc, 28, v1
+; GFX7-GISEL-NEXT: buffer_store_dword v5, v19, s[0:3], 0 offen
+; GFX7-GISEL-NEXT: v_add_i32_e32 v5, vcc, 32, v1
+; GFX7-GISEL-NEXT: v_add_i32_e32 v19, vcc, 36, v1
+; GFX7-GISEL-NEXT: v_add_i32_e32 v6, vcc, 44, v1
+; GFX7-GISEL-NEXT: buffer_store_dword v4, v1, s[0:3], 0 offen
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(14)
+; GFX7-GISEL-NEXT: buffer_store_dword v7, v17, s[0:3], 0 offen
+; GFX7-GISEL-NEXT: buffer_store_dword v8, v3, s[0:3], 0 offen
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(14)
+; GFX7-GISEL-NEXT: buffer_store_dword v9, v18, s[0:3], 0 offen
+; GFX7-GISEL-NEXT: buffer_store_dword v10, v5, s[0:3], 0 offen
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(14)
+; GFX7-GISEL-NEXT: buffer_store_dword v11, v19, s[0:3], 0 offen
+; GFX7-GISEL-NEXT: buffer_store_dword v12, v2, s[0:3], 0 offen
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(14)
+; GFX7-GISEL-NEXT: buffer_store_dword v13, v6, s[0:3], 0 offen
+; GFX7-GISEL-NEXT: v_add_i32_e32 v2, vcc, 48, v1
+; GFX7-GISEL-NEXT: buffer_store_dword v14, v2, s[0:3], 0 offen
+; GFX7-GISEL-NEXT: v_add_i32_e32 v2, vcc, 52, v1
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(14)
+; GFX7-GISEL-NEXT: buffer_store_dword v15, v2, s[0:3], 0 offen
+; GFX7-GISEL-NEXT: v_add_i32_e32 v2, vcc, 56, v1
+; GFX7-GISEL-NEXT: v_add_i32_e32 v1, vcc, 60, v1
+; GFX7-GISEL-NEXT: buffer_store_dword v16, v2, s[0:3], 0 offen
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(14)
+; GFX7-GISEL-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-GISEL-LABEL: freeze_v16p5:
+; GFX8-GISEL: ; %bb.0:
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-GISEL-NEXT: v_add_u32_e32 v2, vcc, 4, v0
+; GFX8-GISEL-NEXT: buffer_load_dword v4, v0, s[0:3], 0 offen
+; GFX8-GISEL-NEXT: buffer_load_dword v2, v2, s[0:3], 0 offen
+; GFX8-GISEL-NEXT: v_add_u32_e32 v3, vcc, 8, v0
+; GFX8-GISEL-NEXT: v_add_u32_e32 v5, vcc, 12, v0
+; GFX8-GISEL-NEXT: v_add_u32_e32 v6, vcc, 16, v0
+; GFX8-GISEL-NEXT: v_add_u32_e32 v7, vcc, 20, v0
+; GFX8-GISEL-NEXT: v_add_u32_e32 v8, vcc, 24, v0
+; GFX8-GISEL-NEXT: v_add_u32_e32 v9, vcc, 28, v0
+; GFX8-GISEL-NEXT: v_add_u32_e32 v10, vcc, 32, v0
+; GFX8-GISEL-NEXT: v_add_u32_e32 v11, vcc, 36, v0
+; GFX8-GISEL-NEXT: v_add_u32_e32 v12, vcc, 40, v0
+; GFX8-GISEL-NEXT: v_add_u32_e32 v13, vcc, 44, v0
+; GFX8-GISEL-NEXT: buffer_load_dword v3, v3, s[0:3], 0 offen
+; GFX8-GISEL-NEXT: buffer_load_dword v5, v5, s[0:3], 0 offen
+; GFX8-GISEL-NEXT: buffer_load_dword v6, v6, s[0:3], 0 offen
+; GFX8-GISEL-NEXT: buffer_load_dword v7, v7, s[0:3], 0 offen
+; GFX8-GISEL-NEXT: buffer_load_dword v8, v8, s[0:3], 0 offen
+; GFX8-GISEL-NEXT: buffer_load_dword v9, v9, s[0:3], 0 offen
+; GFX8-GISEL-NEXT: buffer_load_dword v10, v10, s[0:3], 0 offen
+; GFX8-GISEL-NEXT: buffer_load_dword v11, v11, s[0:3], 0 offen
+; GFX8-GISEL-NEXT: buffer_load_dword v12, v12, s[0:3], 0 offen
+; GFX8-GISEL-NEXT: buffer_load_dword v13, v13, s[0:3], 0 offen
+; GFX8-GISEL-NEXT: v_add_u32_e32 v14, vcc, 48, v0
+; GFX8-GISEL-NEXT: buffer_load_dword v14, v14, s[0:3], 0 offen
+; GFX8-GISEL-NEXT: v_add_u32_e32 v15, vcc, 52, v0
+; GFX8-GISEL-NEXT: buffer_load_dword v15, v15, s[0:3], 0 offen
+; GFX8-GISEL-NEXT: v_add_u32_e32 v16, vcc, 56, v0
+; GFX8-GISEL-NEXT: buffer_load_dword v16, v16, s[0:3], 0 offen
+; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 60, v0
+; GFX8-GISEL-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen
+; GFX8-GISEL-NEXT: v_add_u32_e32 v17, vcc, 4, v1
+; GFX8-GISEL-NEXT: v_add_u32_e32 v18, vcc, 8, v1
+; GFX8-GISEL-NEXT: v_add_u32_e32 v19, vcc, 12, v1
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(14)
+; GFX8-GISEL-NEXT: buffer_store_dword v2, v17, s[0:3], 0 offen
+; GFX8-GISEL-NEXT: v_add_u32_e32 v2, vcc, 16, v1
+; GFX8-GISEL-NEXT: v_add_u32_e32 v17, vcc, 20, v1
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(12)
+; GFX8-GISEL-NEXT: buffer_store_dword v6, v2, s[0:3], 0 offen
+; GFX8-GISEL-NEXT: v_add_u32_e32 v2, vcc, 40, v1
+; GFX8-GISEL-NEXT: buffer_store_dword v3, v18, s[0:3], 0 offen
+; GFX8-GISEL-NEXT: v_add_u32_e32 v3, vcc, 24, v1
+; GFX8-GISEL-NEXT: v_add_u32_e32 v18, vcc, 28, v1
+; GFX8-GISEL-NEXT: buffer_store_dword v5, v19, s[0:3], 0 offen
+; GFX8-GISEL-NEXT: v_add_u32_e32 v5, vcc, 32, v1
+; GFX8-GISEL-NEXT: v_add_u32_e32 v19, vcc, 36, v1
+; GFX8-GISEL-NEXT: v_add_u32_e32 v6, vcc, 44, v1
+; GFX8-GISEL-NEXT: buffer_store_dword v4, v1, s[0:3], 0 offen
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(14)
+; GFX8-GISEL-NEXT: buffer_store_dword v7, v17, s[0:3], 0 offen
+; GFX8-GISEL-NEXT: buffer_store_dword v8, v3, s[0:3], 0 offen
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(14)
+; GFX8-GISEL-NEXT: buffer_store_dword v9, v18, s[0:3], 0 offen
+; GFX8-GISEL-NEXT: buffer_store_dword v10, v5, s[0:3], 0 offen
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(14)
+; GFX8-GISEL-NEXT: buffer_store_dword v11, v19, s[0:3], 0 offen
+; GFX8-GISEL-NEXT: buffer_store_dword v12, v2, s[0:3], 0 offen
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(14)
+; GFX8-GISEL-NEXT: buffer_store_dword v13, v6, s[0:3], 0 offen
+; GFX8-GISEL-NEXT: v_add_u32_e32 v2, vcc, 48, v1
+; GFX8-GISEL-NEXT: buffer_store_dword v14, v2, s[0:3], 0 offen
+; GFX8-GISEL-NEXT: v_add_u32_e32 v2, vcc, 52, v1
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(14)
+; GFX8-GISEL-NEXT: buffer_store_dword v15, v2, s[0:3], 0 offen
+; GFX8-GISEL-NEXT: v_add_u32_e32 v2, vcc, 56, v1
+; GFX8-GISEL-NEXT: v_add_u32_e32 v1, vcc, 60, v1
+; GFX8-GISEL-NEXT: buffer_store_dword v16, v2, s[0:3], 0 offen
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(14)
+; GFX8-GISEL-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: freeze_v16p5:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: buffer_load_dword v2, v0, s[0:3], 0 offen
+; GFX9-NEXT: buffer_load_dword v3, v0, s[0:3], 0 offen offset:4
+; GFX9-NEXT: buffer_load_dword v4, v0, s[0:3], 0 offen offset:8
+; GFX9-NEXT: buffer_load_dword v5, v0, s[0:3], 0 offen offset:12
+; GFX9-NEXT: buffer_load_dword v6, v0, s[0:3], 0 offen offset:16
+; GFX9-NEXT: buffer_load_dword v7, v0, s[0:3], 0 offen offset:20
+; GFX9-NEXT: buffer_load_dword v8, v0, s[0:3], 0 offen offset:24
+; GFX9-NEXT: buffer_load_dword v9, v0, s[0:3], 0 offen offset:28
+; GFX9-NEXT: buffer_load_dword v10, v0, s[0:3], 0 offen offset:32
+; GFX9-NEXT: buffer_load_dword v11, v0, s[0:3], 0 offen offset:36
+; GFX9-NEXT: buffer_load_dword v12, v0, s[0:3], 0 offen offset:40
+; GFX9-NEXT: buffer_load_dword v13, v0, s[0:3], 0 offen offset:44
+; GFX9-NEXT: buffer_load_dword v14, v0, s[0:3], 0 offen offset:48
+; GFX9-NEXT: buffer_load_dword v15, v0, s[0:3], 0 offen offset:52
+; GFX9-NEXT: buffer_load_dword v16, v0, s[0:3], 0 offen offset:56
+; GFX9-NEXT: s_nop 0
+; GFX9-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen offset:60
+; GFX9-NEXT: s_waitcnt vmcnt(15)
+; GFX9-NEXT: buffer_store_dword v2, v1, s[0:3], 0 offen
+; GFX9-NEXT: s_waitcnt vmcnt(15)
+; GFX9-NEXT: buffer_store_dword v3, v1, s[0:3], 0 offen offset:4
+; GFX9-NEXT: s_waitcnt vmcnt(15)
+; GFX9-NEXT: buffer_store_dword v4, v1, s[0:3], 0 offen offset:8
+; GFX9-NEXT: s_waitcnt vmcnt(15)
+; GFX9-NEXT: buffer_store_dword v5, v1, s[0:3], 0 offen offset:12
+; GFX9-NEXT: s_waitcnt vmcnt(15)
+; GFX9-NEXT: buffer_store_dword v6, v1, s[0:3], 0 offen offset:16
+; GFX9-NEXT: s_waitcnt vmcnt(15)
+; GFX9-NEXT: buffer_store_dword v7, v1, s[0:3], 0 offen offset:20
+; GFX9-NEXT: s_waitcnt vmcnt(15)
+; GFX9-NEXT: buffer_store_dword v8, v1, s[0:3], 0 offen offset:24
+; GFX9-NEXT: s_waitcnt vmcnt(15)
+; GFX9-NEXT: buffer_store_dword v9, v1, s[0:3], 0 offen offset:28
+; GFX9-NEXT: s_waitcnt vmcnt(15)
+; GFX9-NEXT: buffer_store_dword v10, v1, s[0:3], 0 offen offset:32
+; GFX9-NEXT: s_waitcnt vmcnt(15)
+; GFX9-NEXT: buffer_store_dword v11, v1, s[0:3], 0 offen offset:36
+; GFX9-NEXT: s_waitcnt vmcnt(15)
+; GFX9-NEXT: buffer_store_dword v12, v1, s[0:3], 0 offen offset:40
+; GFX9-NEXT: s_waitcnt vmcnt(15)
+; GFX9-NEXT: buffer_store_dword v13, v1, s[0:3], 0 offen offset:44
+; GFX9-NEXT: s_waitcnt vmcnt(15)
+; GFX9-NEXT: buffer_store_dword v14, v1, s[0:3], 0 offen offset:48
+; GFX9-NEXT: s_waitcnt vmcnt(15)
+; GFX9-NEXT: buffer_store_dword v15, v1, s[0:3], 0 offen offset:52
+; GFX9-NEXT: s_waitcnt vmcnt(15)
+; GFX9-NEXT: buffer_store_dword v16, v1, s[0:3], 0 offen offset:56
+; GFX9-NEXT: s_waitcnt vmcnt(15)
+; GFX9-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen offset:60
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
; GFX10-LABEL: freeze_v16p5:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -3819,6 +12039,74 @@ define void @freeze_v16p5(ptr addrspace(5) %ptra, ptr addrspace(5) %ptrb) {
}
define void @freeze_i8(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
+; GFX6-SDAG-LABEL: freeze_i8:
+; GFX6-SDAG: ; %bb.0:
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-SDAG-NEXT: s_mov_b32 s6, 0
+; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000
+; GFX6-SDAG-NEXT: s_mov_b32 s4, s6
+; GFX6-SDAG-NEXT: s_mov_b32 s5, s6
+; GFX6-SDAG-NEXT: buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX6-SDAG-NEXT: buffer_store_byte v0, v[2:3], s[4:7], 0 addr64
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0)
+; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX6-GISEL-LABEL: freeze_i8:
+; GFX6-GISEL: ; %bb.0:
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-GISEL-NEXT: s_mov_b32 s6, 0
+; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000
+; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0
+; GFX6-GISEL-NEXT: buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX6-GISEL-NEXT: buffer_store_byte v0, v[2:3], s[4:7], 0 addr64
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0)
+; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-SDAG-LABEL: freeze_i8:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: s_mov_b32 s6, 0
+; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000
+; GFX7-SDAG-NEXT: s_mov_b32 s4, s6
+; GFX7-SDAG-NEXT: s_mov_b32 s5, s6
+; GFX7-SDAG-NEXT: buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX7-SDAG-NEXT: buffer_store_byte v0, v[2:3], s[4:7], 0 addr64
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: freeze_i8:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: s_mov_b32 s6, 0
+; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000
+; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0
+; GFX7-GISEL-NEXT: buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GISEL-NEXT: buffer_store_byte v0, v[2:3], s[4:7], 0 addr64
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: freeze_i8:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: flat_load_ubyte v0, v[0:1]
+; GFX8-NEXT: s_waitcnt vmcnt(0)
+; GFX8-NEXT: flat_store_byte v[2:3], v0
+; GFX8-NEXT: s_waitcnt vmcnt(0)
+; GFX8-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: freeze_i8:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: global_load_ubyte v0, v[0:1], off
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: global_store_byte v[2:3], v0, off
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
; GFX10-LABEL: freeze_i8:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -3841,6 +12129,92 @@ define void @freeze_i8(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
}
define void @freeze_v2i8(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
+; GFX6-SDAG-LABEL: freeze_v2i8:
+; GFX6-SDAG: ; %bb.0:
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-SDAG-NEXT: s_mov_b32 s6, 0
+; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000
+; GFX6-SDAG-NEXT: s_mov_b32 s4, s6
+; GFX6-SDAG-NEXT: s_mov_b32 s5, s6
+; GFX6-SDAG-NEXT: buffer_load_ushort v0, v[0:1], s[4:7], 0 addr64
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX6-SDAG-NEXT: buffer_store_short v0, v[2:3], s[4:7], 0 addr64
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0)
+; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX6-GISEL-LABEL: freeze_v2i8:
+; GFX6-GISEL: ; %bb.0:
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-GISEL-NEXT: s_mov_b32 s6, 0
+; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000
+; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0
+; GFX6-GISEL-NEXT: buffer_load_ushort v0, v[0:1], s[4:7], 0 addr64
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX6-GISEL-NEXT: v_lshrrev_b32_e32 v1, 8, v0
+; GFX6-GISEL-NEXT: v_and_b32_e32 v1, 0xff, v1
+; GFX6-GISEL-NEXT: v_and_b32_e32 v0, 0xff, v0
+; GFX6-GISEL-NEXT: v_lshlrev_b32_e32 v1, 8, v1
+; GFX6-GISEL-NEXT: v_or_b32_e32 v0, v0, v1
+; GFX6-GISEL-NEXT: buffer_store_short v0, v[2:3], s[4:7], 0 addr64
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0)
+; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-SDAG-LABEL: freeze_v2i8:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: s_mov_b32 s6, 0
+; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000
+; GFX7-SDAG-NEXT: s_mov_b32 s4, s6
+; GFX7-SDAG-NEXT: s_mov_b32 s5, s6
+; GFX7-SDAG-NEXT: buffer_load_ushort v0, v[0:1], s[4:7], 0 addr64
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX7-SDAG-NEXT: buffer_store_short v0, v[2:3], s[4:7], 0 addr64
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: freeze_v2i8:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: s_mov_b32 s6, 0
+; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000
+; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0
+; GFX7-GISEL-NEXT: buffer_load_ushort v0, v[0:1], s[4:7], 0 addr64
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GISEL-NEXT: v_lshrrev_b32_e32 v1, 8, v0
+; GFX7-GISEL-NEXT: v_and_b32_e32 v1, 0xff, v1
+; GFX7-GISEL-NEXT: v_and_b32_e32 v0, 0xff, v0
+; GFX7-GISEL-NEXT: v_lshlrev_b32_e32 v1, 8, v1
+; GFX7-GISEL-NEXT: v_or_b32_e32 v0, v0, v1
+; GFX7-GISEL-NEXT: buffer_store_short v0, v[2:3], s[4:7], 0 addr64
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-GISEL-LABEL: freeze_v2i8:
+; GFX8-GISEL: ; %bb.0:
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-GISEL-NEXT: flat_load_ushort v0, v[0:1]
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX8-GISEL-NEXT: v_lshrrev_b32_e32 v1, 8, v0
+; GFX8-GISEL-NEXT: v_and_b32_e32 v1, 0xff, v1
+; GFX8-GISEL-NEXT: v_lshlrev_b16_e32 v1, 8, v1
+; GFX8-GISEL-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
+; GFX8-GISEL-NEXT: flat_store_short v[2:3], v0
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GISEL-LABEL: freeze_v2i8:
+; GFX9-GISEL: ; %bb.0:
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GISEL-NEXT: global_load_ushort v0, v[0:1], off
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GISEL-NEXT: v_lshrrev_b32_e32 v1, 8, v0
+; GFX9-GISEL-NEXT: v_and_b32_e32 v1, 0xff, v1
+; GFX9-GISEL-NEXT: v_lshlrev_b16_e32 v1, 8, v1
+; GFX9-GISEL-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
+; GFX9-GISEL-NEXT: global_store_short v[2:3], v0, off
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX10-SDAG-LABEL: freeze_v2i8:
; GFX10-SDAG: ; %bb.0:
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -3888,6 +12262,137 @@ define void @freeze_v2i8(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
}
define void @freeze_v3i8(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
+; GFX6-SDAG-LABEL: freeze_v3i8:
+; GFX6-SDAG: ; %bb.0:
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-SDAG-NEXT: s_mov_b32 s6, 0
+; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000
+; GFX6-SDAG-NEXT: s_mov_b32 s4, s6
+; GFX6-SDAG-NEXT: s_mov_b32 s5, s6
+; GFX6-SDAG-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX6-SDAG-NEXT: v_lshrrev_b32_e32 v4, 8, v0
+; GFX6-SDAG-NEXT: v_and_b32_e32 v4, 0xff, v4
+; GFX6-SDAG-NEXT: v_lshrrev_b32_e32 v1, 16, v0
+; GFX6-SDAG-NEXT: v_and_b32_e32 v0, 0xff, v0
+; GFX6-SDAG-NEXT: v_lshlrev_b32_e32 v4, 8, v4
+; GFX6-SDAG-NEXT: v_or_b32_e32 v0, v0, v4
+; GFX6-SDAG-NEXT: buffer_store_byte v1, v[2:3], s[4:7], 0 addr64 offset:2
+; GFX6-SDAG-NEXT: buffer_store_short v0, v[2:3], s[4:7], 0 addr64
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0)
+; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX6-GISEL-LABEL: freeze_v3i8:
+; GFX6-GISEL: ; %bb.0:
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-GISEL-NEXT: s_mov_b32 s6, 0
+; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000
+; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0
+; GFX6-GISEL-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX6-GISEL-NEXT: v_lshrrev_b32_e32 v1, 8, v0
+; GFX6-GISEL-NEXT: v_lshrrev_b32_e32 v4, 16, v0
+; GFX6-GISEL-NEXT: v_and_b32_e32 v1, 0xff, v1
+; GFX6-GISEL-NEXT: v_and_b32_e32 v0, 0xff, v0
+; GFX6-GISEL-NEXT: v_and_b32_e32 v4, 0xff, v4
+; GFX6-GISEL-NEXT: v_lshlrev_b32_e32 v1, 8, v1
+; GFX6-GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v4
+; GFX6-GISEL-NEXT: v_or_b32_e32 v0, v0, v1
+; GFX6-GISEL-NEXT: v_lshlrev_b32_e32 v1, 16, v4
+; GFX6-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX6-GISEL-NEXT: v_or_b32_e32 v0, v0, v1
+; GFX6-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v0
+; GFX6-GISEL-NEXT: buffer_store_short v0, v[2:3], s[4:7], 0 addr64
+; GFX6-GISEL-NEXT: buffer_store_byte v1, v[2:3], s[4:7], 0 addr64 offset:2
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0)
+; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-SDAG-LABEL: freeze_v3i8:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: s_mov_b32 s6, 0
+; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000
+; GFX7-SDAG-NEXT: s_mov_b32 s4, s6
+; GFX7-SDAG-NEXT: s_mov_b32 s5, s6
+; GFX7-SDAG-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX7-SDAG-NEXT: v_lshrrev_b32_e32 v4, 8, v0
+; GFX7-SDAG-NEXT: v_and_b32_e32 v4, 0xff, v4
+; GFX7-SDAG-NEXT: v_lshrrev_b32_e32 v1, 16, v0
+; GFX7-SDAG-NEXT: v_and_b32_e32 v0, 0xff, v0
+; GFX7-SDAG-NEXT: v_lshlrev_b32_e32 v4, 8, v4
+; GFX7-SDAG-NEXT: v_or_b32_e32 v0, v0, v4
+; GFX7-SDAG-NEXT: buffer_store_byte v1, v[2:3], s[4:7], 0 addr64 offset:2
+; GFX7-SDAG-NEXT: buffer_store_short v0, v[2:3], s[4:7], 0 addr64
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: freeze_v3i8:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: s_mov_b32 s6, 0
+; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000
+; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0
+; GFX7-GISEL-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GISEL-NEXT: v_lshrrev_b32_e32 v1, 8, v0
+; GFX7-GISEL-NEXT: v_lshrrev_b32_e32 v4, 16, v0
+; GFX7-GISEL-NEXT: v_and_b32_e32 v1, 0xff, v1
+; GFX7-GISEL-NEXT: v_and_b32_e32 v0, 0xff, v0
+; GFX7-GISEL-NEXT: v_and_b32_e32 v4, 0xff, v4
+; GFX7-GISEL-NEXT: v_lshlrev_b32_e32 v1, 8, v1
+; GFX7-GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v4
+; GFX7-GISEL-NEXT: v_or_b32_e32 v0, v0, v1
+; GFX7-GISEL-NEXT: v_lshlrev_b32_e32 v1, 16, v4
+; GFX7-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX7-GISEL-NEXT: v_or_b32_e32 v0, v0, v1
+; GFX7-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v0
+; GFX7-GISEL-NEXT: buffer_store_short v0, v[2:3], s[4:7], 0 addr64
+; GFX7-GISEL-NEXT: buffer_store_byte v1, v[2:3], s[4:7], 0 addr64 offset:2
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-GISEL-LABEL: freeze_v3i8:
+; GFX8-GISEL: ; %bb.0:
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-GISEL-NEXT: flat_load_dword v4, v[0:1]
+; GFX8-GISEL-NEXT: v_mov_b32_e32 v5, 0xff
+; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 2, v2
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v3, vcc
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX8-GISEL-NEXT: v_lshrrev_b32_e32 v6, 8, v4
+; GFX8-GISEL-NEXT: v_and_b32_sdwa v5, v4, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX8-GISEL-NEXT: v_and_b32_e32 v6, 0xff, v6
+; GFX8-GISEL-NEXT: v_and_b32_e32 v5, 0xffff, v5
+; GFX8-GISEL-NEXT: v_lshlrev_b16_e32 v6, 8, v6
+; GFX8-GISEL-NEXT: v_lshlrev_b32_e32 v5, 16, v5
+; GFX8-GISEL-NEXT: v_or_b32_sdwa v4, v4, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
+; GFX8-GISEL-NEXT: v_or_b32_sdwa v4, v4, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GFX8-GISEL-NEXT: v_lshrrev_b32_e32 v5, 16, v4
+; GFX8-GISEL-NEXT: flat_store_short v[2:3], v4
+; GFX8-GISEL-NEXT: flat_store_byte v[0:1], v5
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GISEL-LABEL: freeze_v3i8:
+; GFX9-GISEL: ; %bb.0:
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GISEL-NEXT: global_load_dword v0, v[0:1], off
+; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, 0xff
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GISEL-NEXT: v_lshrrev_b32_e32 v4, 8, v0
+; GFX9-GISEL-NEXT: v_and_b32_e32 v4, 0xff, v4
+; GFX9-GISEL-NEXT: v_lshlrev_b16_e32 v4, 8, v4
+; GFX9-GISEL-NEXT: v_and_b32_sdwa v1, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX9-GISEL-NEXT: v_or_b32_sdwa v0, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
+; GFX9-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
+; GFX9-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX9-GISEL-NEXT: v_lshl_or_b32 v0, v1, 16, v0
+; GFX9-GISEL-NEXT: global_store_short v[2:3], v0, off
+; GFX9-GISEL-NEXT: global_store_byte_d16_hi v[2:3], v0, off offset:2
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX10-SDAG-LABEL: freeze_v3i8:
; GFX10-SDAG: ; %bb.0:
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -3959,6 +12464,117 @@ define void @freeze_v3i8(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
}
define void @freeze_v4i8(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
+; GFX6-SDAG-LABEL: freeze_v4i8:
+; GFX6-SDAG: ; %bb.0:
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-SDAG-NEXT: s_mov_b32 s6, 0
+; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000
+; GFX6-SDAG-NEXT: s_mov_b32 s4, s6
+; GFX6-SDAG-NEXT: s_mov_b32 s5, s6
+; GFX6-SDAG-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX6-SDAG-NEXT: buffer_store_dword v0, v[2:3], s[4:7], 0 addr64
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0)
+; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX6-GISEL-LABEL: freeze_v4i8:
+; GFX6-GISEL: ; %bb.0:
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-GISEL-NEXT: s_mov_b32 s6, 0
+; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000
+; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0
+; GFX6-GISEL-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX6-GISEL-NEXT: v_lshrrev_b32_e32 v1, 8, v0
+; GFX6-GISEL-NEXT: v_lshrrev_b32_e32 v4, 16, v0
+; GFX6-GISEL-NEXT: v_and_b32_e32 v1, 0xff, v1
+; GFX6-GISEL-NEXT: v_lshrrev_b32_e32 v5, 24, v0
+; GFX6-GISEL-NEXT: v_and_b32_e32 v0, 0xff, v0
+; GFX6-GISEL-NEXT: v_and_b32_e32 v4, 0xff, v4
+; GFX6-GISEL-NEXT: v_lshlrev_b32_e32 v1, 8, v1
+; GFX6-GISEL-NEXT: v_and_b32_e32 v5, 0xff, v5
+; GFX6-GISEL-NEXT: v_lshlrev_b32_e32 v4, 16, v4
+; GFX6-GISEL-NEXT: v_or_b32_e32 v0, v0, v1
+; GFX6-GISEL-NEXT: v_or_b32_e32 v0, v0, v4
+; GFX6-GISEL-NEXT: v_lshlrev_b32_e32 v1, 24, v5
+; GFX6-GISEL-NEXT: v_or_b32_e32 v0, v0, v1
+; GFX6-GISEL-NEXT: buffer_store_dword v0, v[2:3], s[4:7], 0 addr64
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0)
+; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-SDAG-LABEL: freeze_v4i8:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: s_mov_b32 s6, 0
+; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000
+; GFX7-SDAG-NEXT: s_mov_b32 s4, s6
+; GFX7-SDAG-NEXT: s_mov_b32 s5, s6
+; GFX7-SDAG-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX7-SDAG-NEXT: buffer_store_dword v0, v[2:3], s[4:7], 0 addr64
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: freeze_v4i8:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: s_mov_b32 s6, 0
+; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000
+; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0
+; GFX7-GISEL-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GISEL-NEXT: v_lshrrev_b32_e32 v1, 8, v0
+; GFX7-GISEL-NEXT: v_lshrrev_b32_e32 v4, 16, v0
+; GFX7-GISEL-NEXT: v_and_b32_e32 v1, 0xff, v1
+; GFX7-GISEL-NEXT: v_lshrrev_b32_e32 v5, 24, v0
+; GFX7-GISEL-NEXT: v_and_b32_e32 v0, 0xff, v0
+; GFX7-GISEL-NEXT: v_and_b32_e32 v4, 0xff, v4
+; GFX7-GISEL-NEXT: v_lshlrev_b32_e32 v1, 8, v1
+; GFX7-GISEL-NEXT: v_and_b32_e32 v5, 0xff, v5
+; GFX7-GISEL-NEXT: v_lshlrev_b32_e32 v4, 16, v4
+; GFX7-GISEL-NEXT: v_or_b32_e32 v0, v0, v1
+; GFX7-GISEL-NEXT: v_or_b32_e32 v0, v0, v4
+; GFX7-GISEL-NEXT: v_lshlrev_b32_e32 v1, 24, v5
+; GFX7-GISEL-NEXT: v_or_b32_e32 v0, v0, v1
+; GFX7-GISEL-NEXT: buffer_store_dword v0, v[2:3], s[4:7], 0 addr64
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-GISEL-LABEL: freeze_v4i8:
+; GFX8-GISEL: ; %bb.0:
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-GISEL-NEXT: flat_load_dword v0, v[0:1]
+; GFX8-GISEL-NEXT: v_mov_b32_e32 v4, 8
+; GFX8-GISEL-NEXT: v_mov_b32_e32 v1, 0xff
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX8-GISEL-NEXT: v_lshrrev_b32_e32 v5, 8, v0
+; GFX8-GISEL-NEXT: v_lshlrev_b32_sdwa v4, v4, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
+; GFX8-GISEL-NEXT: v_and_b32_sdwa v6, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX8-GISEL-NEXT: v_or_b32_sdwa v4, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
+; GFX8-GISEL-NEXT: v_or_b32_e32 v4, v4, v6
+; GFX8-GISEL-NEXT: v_and_b32_sdwa v0, v0, v1 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD
+; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v4, v0
+; GFX8-GISEL-NEXT: flat_store_dword v[2:3], v0
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GISEL-LABEL: freeze_v4i8:
+; GFX9-GISEL: ; %bb.0:
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GISEL-NEXT: global_load_dword v0, v[0:1], off
+; GFX9-GISEL-NEXT: v_mov_b32_e32 v4, 8
+; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, 0xff
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GISEL-NEXT: v_lshrrev_b32_e32 v5, 8, v0
+; GFX9-GISEL-NEXT: v_lshlrev_b32_sdwa v4, v4, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
+; GFX9-GISEL-NEXT: v_and_b32_sdwa v6, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX9-GISEL-NEXT: v_and_or_b32 v4, v0, v1, v4
+; GFX9-GISEL-NEXT: v_and_b32_sdwa v0, v0, v1 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD
+; GFX9-GISEL-NEXT: v_or3_b32 v0, v4, v6, v0
+; GFX9-GISEL-NEXT: global_store_dword v[2:3], v0, off
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX10-SDAG-LABEL: freeze_v4i8:
; GFX10-SDAG: ; %bb.0:
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -4016,6 +12632,156 @@ define void @freeze_v4i8(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
}
define void @freeze_v8i8(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
+; GFX6-SDAG-LABEL: freeze_v8i8:
+; GFX6-SDAG: ; %bb.0:
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-SDAG-NEXT: s_mov_b32 s6, 0
+; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000
+; GFX6-SDAG-NEXT: s_mov_b32 s4, s6
+; GFX6-SDAG-NEXT: s_mov_b32 s5, s6
+; GFX6-SDAG-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX6-SDAG-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0)
+; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX6-GISEL-LABEL: freeze_v8i8:
+; GFX6-GISEL: ; %bb.0:
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-GISEL-NEXT: s_mov_b32 s6, 0
+; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000
+; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0
+; GFX6-GISEL-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX6-GISEL-NEXT: v_lshrrev_b32_e32 v4, 8, v0
+; GFX6-GISEL-NEXT: v_lshrrev_b32_e32 v7, 8, v1
+; GFX6-GISEL-NEXT: v_lshrrev_b32_e32 v5, 16, v0
+; GFX6-GISEL-NEXT: v_lshrrev_b32_e32 v8, 16, v1
+; GFX6-GISEL-NEXT: v_and_b32_e32 v4, 0xff, v4
+; GFX6-GISEL-NEXT: v_and_b32_e32 v7, 0xff, v7
+; GFX6-GISEL-NEXT: v_lshrrev_b32_e32 v6, 24, v0
+; GFX6-GISEL-NEXT: v_lshrrev_b32_e32 v9, 24, v1
+; GFX6-GISEL-NEXT: v_and_b32_e32 v0, 0xff, v0
+; GFX6-GISEL-NEXT: v_and_b32_e32 v1, 0xff, v1
+; GFX6-GISEL-NEXT: v_and_b32_e32 v5, 0xff, v5
+; GFX6-GISEL-NEXT: v_and_b32_e32 v8, 0xff, v8
+; GFX6-GISEL-NEXT: v_lshlrev_b32_e32 v4, 8, v4
+; GFX6-GISEL-NEXT: v_lshlrev_b32_e32 v7, 8, v7
+; GFX6-GISEL-NEXT: v_and_b32_e32 v6, 0xff, v6
+; GFX6-GISEL-NEXT: v_and_b32_e32 v9, 0xff, v9
+; GFX6-GISEL-NEXT: v_lshlrev_b32_e32 v5, 16, v5
+; GFX6-GISEL-NEXT: v_lshlrev_b32_e32 v8, 16, v8
+; GFX6-GISEL-NEXT: v_or_b32_e32 v0, v0, v4
+; GFX6-GISEL-NEXT: v_or_b32_e32 v1, v1, v7
+; GFX6-GISEL-NEXT: v_lshlrev_b32_e32 v6, 24, v6
+; GFX6-GISEL-NEXT: v_lshlrev_b32_e32 v9, 24, v9
+; GFX6-GISEL-NEXT: v_or_b32_e32 v0, v0, v5
+; GFX6-GISEL-NEXT: v_or_b32_e32 v1, v1, v8
+; GFX6-GISEL-NEXT: v_or_b32_e32 v0, v0, v6
+; GFX6-GISEL-NEXT: v_or_b32_e32 v1, v1, v9
+; GFX6-GISEL-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0)
+; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-SDAG-LABEL: freeze_v8i8:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: s_mov_b32 s6, 0
+; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000
+; GFX7-SDAG-NEXT: s_mov_b32 s4, s6
+; GFX7-SDAG-NEXT: s_mov_b32 s5, s6
+; GFX7-SDAG-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX7-SDAG-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: freeze_v8i8:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: s_mov_b32 s6, 0
+; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000
+; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0
+; GFX7-GISEL-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GISEL-NEXT: v_lshrrev_b32_e32 v4, 8, v0
+; GFX7-GISEL-NEXT: v_lshrrev_b32_e32 v7, 8, v1
+; GFX7-GISEL-NEXT: v_lshrrev_b32_e32 v5, 16, v0
+; GFX7-GISEL-NEXT: v_lshrrev_b32_e32 v8, 16, v1
+; GFX7-GISEL-NEXT: v_and_b32_e32 v4, 0xff, v4
+; GFX7-GISEL-NEXT: v_and_b32_e32 v7, 0xff, v7
+; GFX7-GISEL-NEXT: v_lshrrev_b32_e32 v6, 24, v0
+; GFX7-GISEL-NEXT: v_lshrrev_b32_e32 v9, 24, v1
+; GFX7-GISEL-NEXT: v_and_b32_e32 v0, 0xff, v0
+; GFX7-GISEL-NEXT: v_and_b32_e32 v1, 0xff, v1
+; GFX7-GISEL-NEXT: v_and_b32_e32 v5, 0xff, v5
+; GFX7-GISEL-NEXT: v_and_b32_e32 v8, 0xff, v8
+; GFX7-GISEL-NEXT: v_lshlrev_b32_e32 v4, 8, v4
+; GFX7-GISEL-NEXT: v_lshlrev_b32_e32 v7, 8, v7
+; GFX7-GISEL-NEXT: v_and_b32_e32 v6, 0xff, v6
+; GFX7-GISEL-NEXT: v_and_b32_e32 v9, 0xff, v9
+; GFX7-GISEL-NEXT: v_lshlrev_b32_e32 v5, 16, v5
+; GFX7-GISEL-NEXT: v_lshlrev_b32_e32 v8, 16, v8
+; GFX7-GISEL-NEXT: v_or_b32_e32 v0, v0, v4
+; GFX7-GISEL-NEXT: v_or_b32_e32 v1, v1, v7
+; GFX7-GISEL-NEXT: v_lshlrev_b32_e32 v6, 24, v6
+; GFX7-GISEL-NEXT: v_lshlrev_b32_e32 v9, 24, v9
+; GFX7-GISEL-NEXT: v_or_b32_e32 v0, v0, v5
+; GFX7-GISEL-NEXT: v_or_b32_e32 v1, v1, v8
+; GFX7-GISEL-NEXT: v_or_b32_e32 v0, v0, v6
+; GFX7-GISEL-NEXT: v_or_b32_e32 v1, v1, v9
+; GFX7-GISEL-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-GISEL-LABEL: freeze_v8i8:
+; GFX8-GISEL: ; %bb.0:
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-GISEL-NEXT: flat_load_dwordx2 v[0:1], v[0:1]
+; GFX8-GISEL-NEXT: v_mov_b32_e32 v5, 8
+; GFX8-GISEL-NEXT: v_mov_b32_e32 v4, 0xff
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX8-GISEL-NEXT: v_lshrrev_b32_e32 v6, 8, v0
+; GFX8-GISEL-NEXT: v_lshrrev_b32_e32 v7, 8, v1
+; GFX8-GISEL-NEXT: v_lshlrev_b32_sdwa v6, v5, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
+; GFX8-GISEL-NEXT: v_lshlrev_b32_sdwa v5, v5, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
+; GFX8-GISEL-NEXT: v_and_b32_sdwa v8, v0, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX8-GISEL-NEXT: v_and_b32_sdwa v9, v0, v4 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD
+; GFX8-GISEL-NEXT: v_and_b32_sdwa v10, v1, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX8-GISEL-NEXT: v_and_b32_sdwa v4, v1, v4 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD
+; GFX8-GISEL-NEXT: v_or_b32_sdwa v0, v0, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
+; GFX8-GISEL-NEXT: v_or_b32_sdwa v1, v1, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
+; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v0, v8
+; GFX8-GISEL-NEXT: v_or_b32_e32 v1, v1, v10
+; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v0, v9
+; GFX8-GISEL-NEXT: v_or_b32_e32 v1, v1, v4
+; GFX8-GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GISEL-LABEL: freeze_v8i8:
+; GFX9-GISEL: ; %bb.0:
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GISEL-NEXT: global_load_dwordx2 v[0:1], v[0:1], off
+; GFX9-GISEL-NEXT: v_mov_b32_e32 v5, 8
+; GFX9-GISEL-NEXT: v_mov_b32_e32 v4, 0xff
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GISEL-NEXT: v_lshrrev_b32_e32 v6, 8, v0
+; GFX9-GISEL-NEXT: v_lshrrev_b32_e32 v7, 8, v1
+; GFX9-GISEL-NEXT: v_lshlrev_b32_sdwa v6, v5, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
+; GFX9-GISEL-NEXT: v_lshlrev_b32_sdwa v5, v5, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
+; GFX9-GISEL-NEXT: v_and_b32_sdwa v8, v0, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX9-GISEL-NEXT: v_and_b32_sdwa v9, v0, v4 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD
+; GFX9-GISEL-NEXT: v_and_b32_sdwa v10, v1, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX9-GISEL-NEXT: v_and_b32_sdwa v11, v1, v4 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD
+; GFX9-GISEL-NEXT: v_and_or_b32 v0, v0, v4, v6
+; GFX9-GISEL-NEXT: v_and_or_b32 v1, v1, v4, v5
+; GFX9-GISEL-NEXT: v_or3_b32 v0, v0, v8, v9
+; GFX9-GISEL-NEXT: v_or3_b32 v1, v1, v10, v11
+; GFX9-GISEL-NEXT: global_store_dwordx2 v[2:3], v[0:1], off
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX10-SDAG-LABEL: freeze_v8i8:
; GFX10-SDAG: ; %bb.0:
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -4090,6 +12856,234 @@ define void @freeze_v8i8(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
}
define void @freeze_v16i8(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
+; GFX6-SDAG-LABEL: freeze_v16i8:
+; GFX6-SDAG: ; %bb.0:
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-SDAG-NEXT: s_mov_b32 s6, 0
+; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000
+; GFX6-SDAG-NEXT: s_mov_b32 s4, s6
+; GFX6-SDAG-NEXT: s_mov_b32 s5, s6
+; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0)
+; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX6-GISEL-LABEL: freeze_v16i8:
+; GFX6-GISEL: ; %bb.0:
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-GISEL-NEXT: s_mov_b32 s6, 0
+; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000
+; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0
+; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX6-GISEL-NEXT: v_lshrrev_b32_e32 v0, 8, v4
+; GFX6-GISEL-NEXT: v_lshrrev_b32_e32 v9, 8, v5
+; GFX6-GISEL-NEXT: v_lshrrev_b32_e32 v12, 8, v6
+; GFX6-GISEL-NEXT: v_lshrrev_b32_e32 v15, 8, v7
+; GFX6-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v4
+; GFX6-GISEL-NEXT: v_lshrrev_b32_e32 v10, 16, v5
+; GFX6-GISEL-NEXT: v_lshrrev_b32_e32 v13, 16, v6
+; GFX6-GISEL-NEXT: v_lshrrev_b32_e32 v16, 16, v7
+; GFX6-GISEL-NEXT: v_and_b32_e32 v0, 0xff, v0
+; GFX6-GISEL-NEXT: v_and_b32_e32 v9, 0xff, v9
+; GFX6-GISEL-NEXT: v_and_b32_e32 v12, 0xff, v12
+; GFX6-GISEL-NEXT: v_and_b32_e32 v15, 0xff, v15
+; GFX6-GISEL-NEXT: v_lshrrev_b32_e32 v8, 24, v4
+; GFX6-GISEL-NEXT: v_lshrrev_b32_e32 v11, 24, v5
+; GFX6-GISEL-NEXT: v_lshrrev_b32_e32 v14, 24, v6
+; GFX6-GISEL-NEXT: v_lshrrev_b32_e32 v17, 24, v7
+; GFX6-GISEL-NEXT: v_and_b32_e32 v4, 0xff, v4
+; GFX6-GISEL-NEXT: v_and_b32_e32 v5, 0xff, v5
+; GFX6-GISEL-NEXT: v_and_b32_e32 v6, 0xff, v6
+; GFX6-GISEL-NEXT: v_and_b32_e32 v7, 0xff, v7
+; GFX6-GISEL-NEXT: v_and_b32_e32 v1, 0xff, v1
+; GFX6-GISEL-NEXT: v_and_b32_e32 v10, 0xff, v10
+; GFX6-GISEL-NEXT: v_and_b32_e32 v13, 0xff, v13
+; GFX6-GISEL-NEXT: v_and_b32_e32 v16, 0xff, v16
+; GFX6-GISEL-NEXT: v_lshlrev_b32_e32 v0, 8, v0
+; GFX6-GISEL-NEXT: v_lshlrev_b32_e32 v9, 8, v9
+; GFX6-GISEL-NEXT: v_lshlrev_b32_e32 v12, 8, v12
+; GFX6-GISEL-NEXT: v_lshlrev_b32_e32 v15, 8, v15
+; GFX6-GISEL-NEXT: v_and_b32_e32 v8, 0xff, v8
+; GFX6-GISEL-NEXT: v_and_b32_e32 v11, 0xff, v11
+; GFX6-GISEL-NEXT: v_and_b32_e32 v14, 0xff, v14
+; GFX6-GISEL-NEXT: v_and_b32_e32 v17, 0xff, v17
+; GFX6-GISEL-NEXT: v_lshlrev_b32_e32 v1, 16, v1
+; GFX6-GISEL-NEXT: v_lshlrev_b32_e32 v10, 16, v10
+; GFX6-GISEL-NEXT: v_lshlrev_b32_e32 v13, 16, v13
+; GFX6-GISEL-NEXT: v_lshlrev_b32_e32 v16, 16, v16
+; GFX6-GISEL-NEXT: v_or_b32_e32 v0, v4, v0
+; GFX6-GISEL-NEXT: v_or_b32_e32 v4, v5, v9
+; GFX6-GISEL-NEXT: v_or_b32_e32 v5, v6, v12
+; GFX6-GISEL-NEXT: v_or_b32_e32 v6, v7, v15
+; GFX6-GISEL-NEXT: v_lshlrev_b32_e32 v8, 24, v8
+; GFX6-GISEL-NEXT: v_lshlrev_b32_e32 v11, 24, v11
+; GFX6-GISEL-NEXT: v_lshlrev_b32_e32 v14, 24, v14
+; GFX6-GISEL-NEXT: v_lshlrev_b32_e32 v17, 24, v17
+; GFX6-GISEL-NEXT: v_or_b32_e32 v0, v0, v1
+; GFX6-GISEL-NEXT: v_or_b32_e32 v1, v4, v10
+; GFX6-GISEL-NEXT: v_or_b32_e32 v7, v5, v13
+; GFX6-GISEL-NEXT: v_or_b32_e32 v9, v6, v16
+; GFX6-GISEL-NEXT: v_or_b32_e32 v4, v0, v8
+; GFX6-GISEL-NEXT: v_or_b32_e32 v5, v1, v11
+; GFX6-GISEL-NEXT: v_or_b32_e32 v6, v7, v14
+; GFX6-GISEL-NEXT: v_or_b32_e32 v7, v9, v17
+; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0)
+; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-SDAG-LABEL: freeze_v16i8:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: s_mov_b32 s6, 0
+; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000
+; GFX7-SDAG-NEXT: s_mov_b32 s4, s6
+; GFX7-SDAG-NEXT: s_mov_b32 s5, s6
+; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: freeze_v16i8:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: s_mov_b32 s6, 0
+; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000
+; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0
+; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GISEL-NEXT: v_lshrrev_b32_e32 v0, 8, v4
+; GFX7-GISEL-NEXT: v_lshrrev_b32_e32 v9, 8, v5
+; GFX7-GISEL-NEXT: v_lshrrev_b32_e32 v12, 8, v6
+; GFX7-GISEL-NEXT: v_lshrrev_b32_e32 v15, 8, v7
+; GFX7-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v4
+; GFX7-GISEL-NEXT: v_lshrrev_b32_e32 v10, 16, v5
+; GFX7-GISEL-NEXT: v_lshrrev_b32_e32 v13, 16, v6
+; GFX7-GISEL-NEXT: v_lshrrev_b32_e32 v16, 16, v7
+; GFX7-GISEL-NEXT: v_and_b32_e32 v0, 0xff, v0
+; GFX7-GISEL-NEXT: v_and_b32_e32 v9, 0xff, v9
+; GFX7-GISEL-NEXT: v_and_b32_e32 v12, 0xff, v12
+; GFX7-GISEL-NEXT: v_and_b32_e32 v15, 0xff, v15
+; GFX7-GISEL-NEXT: v_lshrrev_b32_e32 v8, 24, v4
+; GFX7-GISEL-NEXT: v_lshrrev_b32_e32 v11, 24, v5
+; GFX7-GISEL-NEXT: v_lshrrev_b32_e32 v14, 24, v6
+; GFX7-GISEL-NEXT: v_lshrrev_b32_e32 v17, 24, v7
+; GFX7-GISEL-NEXT: v_and_b32_e32 v4, 0xff, v4
+; GFX7-GISEL-NEXT: v_and_b32_e32 v5, 0xff, v5
+; GFX7-GISEL-NEXT: v_and_b32_e32 v6, 0xff, v6
+; GFX7-GISEL-NEXT: v_and_b32_e32 v7, 0xff, v7
+; GFX7-GISEL-NEXT: v_and_b32_e32 v1, 0xff, v1
+; GFX7-GISEL-NEXT: v_and_b32_e32 v10, 0xff, v10
+; GFX7-GISEL-NEXT: v_and_b32_e32 v13, 0xff, v13
+; GFX7-GISEL-NEXT: v_and_b32_e32 v16, 0xff, v16
+; GFX7-GISEL-NEXT: v_lshlrev_b32_e32 v0, 8, v0
+; GFX7-GISEL-NEXT: v_lshlrev_b32_e32 v9, 8, v9
+; GFX7-GISEL-NEXT: v_lshlrev_b32_e32 v12, 8, v12
+; GFX7-GISEL-NEXT: v_lshlrev_b32_e32 v15, 8, v15
+; GFX7-GISEL-NEXT: v_and_b32_e32 v8, 0xff, v8
+; GFX7-GISEL-NEXT: v_and_b32_e32 v11, 0xff, v11
+; GFX7-GISEL-NEXT: v_and_b32_e32 v14, 0xff, v14
+; GFX7-GISEL-NEXT: v_and_b32_e32 v17, 0xff, v17
+; GFX7-GISEL-NEXT: v_lshlrev_b32_e32 v1, 16, v1
+; GFX7-GISEL-NEXT: v_lshlrev_b32_e32 v10, 16, v10
+; GFX7-GISEL-NEXT: v_lshlrev_b32_e32 v13, 16, v13
+; GFX7-GISEL-NEXT: v_lshlrev_b32_e32 v16, 16, v16
+; GFX7-GISEL-NEXT: v_or_b32_e32 v0, v4, v0
+; GFX7-GISEL-NEXT: v_or_b32_e32 v4, v5, v9
+; GFX7-GISEL-NEXT: v_or_b32_e32 v5, v6, v12
+; GFX7-GISEL-NEXT: v_or_b32_e32 v6, v7, v15
+; GFX7-GISEL-NEXT: v_lshlrev_b32_e32 v8, 24, v8
+; GFX7-GISEL-NEXT: v_lshlrev_b32_e32 v11, 24, v11
+; GFX7-GISEL-NEXT: v_lshlrev_b32_e32 v14, 24, v14
+; GFX7-GISEL-NEXT: v_lshlrev_b32_e32 v17, 24, v17
+; GFX7-GISEL-NEXT: v_or_b32_e32 v0, v0, v1
+; GFX7-GISEL-NEXT: v_or_b32_e32 v1, v4, v10
+; GFX7-GISEL-NEXT: v_or_b32_e32 v7, v5, v13
+; GFX7-GISEL-NEXT: v_or_b32_e32 v9, v6, v16
+; GFX7-GISEL-NEXT: v_or_b32_e32 v4, v0, v8
+; GFX7-GISEL-NEXT: v_or_b32_e32 v5, v1, v11
+; GFX7-GISEL-NEXT: v_or_b32_e32 v6, v7, v14
+; GFX7-GISEL-NEXT: v_or_b32_e32 v7, v9, v17
+; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-GISEL-LABEL: freeze_v16i8:
+; GFX8-GISEL: ; %bb.0:
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-GISEL-NEXT: flat_load_dwordx4 v[4:7], v[0:1]
+; GFX8-GISEL-NEXT: v_mov_b32_e32 v1, 8
+; GFX8-GISEL-NEXT: v_mov_b32_e32 v0, 0xff
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX8-GISEL-NEXT: v_lshrrev_b32_e32 v8, 8, v4
+; GFX8-GISEL-NEXT: v_lshrrev_b32_e32 v9, 8, v5
+; GFX8-GISEL-NEXT: v_lshrrev_b32_e32 v10, 8, v6
+; GFX8-GISEL-NEXT: v_lshrrev_b32_e32 v11, 8, v7
+; GFX8-GISEL-NEXT: v_lshlrev_b32_sdwa v8, v1, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
+; GFX8-GISEL-NEXT: v_lshlrev_b32_sdwa v9, v1, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
+; GFX8-GISEL-NEXT: v_lshlrev_b32_sdwa v10, v1, v10 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
+; GFX8-GISEL-NEXT: v_lshlrev_b32_sdwa v1, v1, v11 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
+; GFX8-GISEL-NEXT: v_and_b32_sdwa v12, v4, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX8-GISEL-NEXT: v_and_b32_sdwa v13, v4, v0 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD
+; GFX8-GISEL-NEXT: v_and_b32_sdwa v14, v5, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX8-GISEL-NEXT: v_and_b32_sdwa v15, v5, v0 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD
+; GFX8-GISEL-NEXT: v_and_b32_sdwa v16, v6, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX8-GISEL-NEXT: v_and_b32_sdwa v17, v6, v0 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD
+; GFX8-GISEL-NEXT: v_and_b32_sdwa v18, v7, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX8-GISEL-NEXT: v_or_b32_sdwa v4, v4, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
+; GFX8-GISEL-NEXT: v_or_b32_sdwa v5, v5, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
+; GFX8-GISEL-NEXT: v_or_b32_sdwa v6, v6, v10 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
+; GFX8-GISEL-NEXT: v_or_b32_sdwa v1, v7, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
+; GFX8-GISEL-NEXT: v_and_b32_sdwa v0, v7, v0 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD
+; GFX8-GISEL-NEXT: v_or_b32_e32 v4, v4, v12
+; GFX8-GISEL-NEXT: v_or_b32_e32 v5, v5, v14
+; GFX8-GISEL-NEXT: v_or_b32_e32 v6, v6, v16
+; GFX8-GISEL-NEXT: v_or_b32_e32 v1, v1, v18
+; GFX8-GISEL-NEXT: v_or_b32_e32 v4, v4, v13
+; GFX8-GISEL-NEXT: v_or_b32_e32 v5, v5, v15
+; GFX8-GISEL-NEXT: v_or_b32_e32 v6, v6, v17
+; GFX8-GISEL-NEXT: v_or_b32_e32 v7, v1, v0
+; GFX8-GISEL-NEXT: flat_store_dwordx4 v[2:3], v[4:7]
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GISEL-LABEL: freeze_v16i8:
+; GFX9-GISEL: ; %bb.0:
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off
+; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, 8
+; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, 0xff
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GISEL-NEXT: v_lshrrev_b32_e32 v8, 8, v4
+; GFX9-GISEL-NEXT: v_lshrrev_b32_e32 v9, 8, v5
+; GFX9-GISEL-NEXT: v_lshrrev_b32_e32 v10, 8, v6
+; GFX9-GISEL-NEXT: v_lshrrev_b32_e32 v11, 8, v7
+; GFX9-GISEL-NEXT: v_lshlrev_b32_sdwa v8, v1, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
+; GFX9-GISEL-NEXT: v_lshlrev_b32_sdwa v9, v1, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
+; GFX9-GISEL-NEXT: v_lshlrev_b32_sdwa v10, v1, v10 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
+; GFX9-GISEL-NEXT: v_lshlrev_b32_sdwa v1, v1, v11 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
+; GFX9-GISEL-NEXT: v_and_b32_sdwa v12, v4, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX9-GISEL-NEXT: v_and_b32_sdwa v13, v4, v0 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD
+; GFX9-GISEL-NEXT: v_and_b32_sdwa v14, v5, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX9-GISEL-NEXT: v_and_b32_sdwa v15, v5, v0 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD
+; GFX9-GISEL-NEXT: v_and_b32_sdwa v16, v6, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX9-GISEL-NEXT: v_and_b32_sdwa v17, v6, v0 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD
+; GFX9-GISEL-NEXT: v_and_b32_sdwa v18, v7, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX9-GISEL-NEXT: v_and_b32_sdwa v19, v7, v0 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD
+; GFX9-GISEL-NEXT: v_and_or_b32 v4, v4, v0, v8
+; GFX9-GISEL-NEXT: v_and_or_b32 v5, v5, v0, v9
+; GFX9-GISEL-NEXT: v_and_or_b32 v6, v6, v0, v10
+; GFX9-GISEL-NEXT: v_and_or_b32 v0, v7, v0, v1
+; GFX9-GISEL-NEXT: v_or3_b32 v4, v4, v12, v13
+; GFX9-GISEL-NEXT: v_or3_b32 v5, v5, v14, v15
+; GFX9-GISEL-NEXT: v_or3_b32 v6, v6, v16, v17
+; GFX9-GISEL-NEXT: v_or3_b32 v7, v0, v18, v19
+; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX10-SDAG-LABEL: freeze_v16i8:
; GFX10-SDAG: ; %bb.0:
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -4198,6 +13192,80 @@ define void @freeze_v16i8(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
}
define void @freeze_i1(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
+; GFX6-SDAG-LABEL: freeze_i1:
+; GFX6-SDAG: ; %bb.0:
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-SDAG-NEXT: s_mov_b32 s6, 0
+; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000
+; GFX6-SDAG-NEXT: s_mov_b32 s4, s6
+; GFX6-SDAG-NEXT: s_mov_b32 s5, s6
+; GFX6-SDAG-NEXT: buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX6-SDAG-NEXT: v_and_b32_e32 v0, 1, v0
+; GFX6-SDAG-NEXT: buffer_store_byte v0, v[2:3], s[4:7], 0 addr64
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0)
+; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX6-GISEL-LABEL: freeze_i1:
+; GFX6-GISEL: ; %bb.0:
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-GISEL-NEXT: s_mov_b32 s6, 0
+; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000
+; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0
+; GFX6-GISEL-NEXT: buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX6-GISEL-NEXT: v_and_b32_e32 v0, 1, v0
+; GFX6-GISEL-NEXT: buffer_store_byte v0, v[2:3], s[4:7], 0 addr64
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0)
+; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-SDAG-LABEL: freeze_i1:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: s_mov_b32 s6, 0
+; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000
+; GFX7-SDAG-NEXT: s_mov_b32 s4, s6
+; GFX7-SDAG-NEXT: s_mov_b32 s5, s6
+; GFX7-SDAG-NEXT: buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX7-SDAG-NEXT: v_and_b32_e32 v0, 1, v0
+; GFX7-SDAG-NEXT: buffer_store_byte v0, v[2:3], s[4:7], 0 addr64
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: freeze_i1:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: s_mov_b32 s6, 0
+; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000
+; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0
+; GFX7-GISEL-NEXT: buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GISEL-NEXT: v_and_b32_e32 v0, 1, v0
+; GFX7-GISEL-NEXT: buffer_store_byte v0, v[2:3], s[4:7], 0 addr64
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: freeze_i1:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: flat_load_ubyte v0, v[0:1]
+; GFX8-NEXT: s_waitcnt vmcnt(0)
+; GFX8-NEXT: v_and_b32_e32 v0, 1, v0
+; GFX8-NEXT: flat_store_byte v[2:3], v0
+; GFX8-NEXT: s_waitcnt vmcnt(0)
+; GFX8-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: freeze_i1:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: global_load_ubyte v0, v[0:1], off
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: v_and_b32_e32 v0, 1, v0
+; GFX9-NEXT: global_store_byte v[2:3], v0, off
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
; GFX10-LABEL: freeze_i1:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -4222,6 +13290,100 @@ define void @freeze_i1(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
}
define void @freeze_v2i1(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
+; GFX6-SDAG-LABEL: freeze_v2i1:
+; GFX6-SDAG: ; %bb.0:
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-SDAG-NEXT: s_mov_b32 s6, 0
+; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000
+; GFX6-SDAG-NEXT: s_mov_b32 s4, s6
+; GFX6-SDAG-NEXT: s_mov_b32 s5, s6
+; GFX6-SDAG-NEXT: buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX6-SDAG-NEXT: v_and_b32_e32 v0, 3, v0
+; GFX6-SDAG-NEXT: buffer_store_byte v0, v[2:3], s[4:7], 0 addr64
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0)
+; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX6-GISEL-LABEL: freeze_v2i1:
+; GFX6-GISEL: ; %bb.0:
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-GISEL-NEXT: s_mov_b32 s6, 0
+; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000
+; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0
+; GFX6-GISEL-NEXT: buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX6-GISEL-NEXT: v_lshrrev_b32_e32 v1, 1, v0
+; GFX6-GISEL-NEXT: v_and_b32_e32 v1, 1, v1
+; GFX6-GISEL-NEXT: v_and_b32_e32 v0, 1, v0
+; GFX6-GISEL-NEXT: v_lshlrev_b32_e32 v1, 1, v1
+; GFX6-GISEL-NEXT: v_or_b32_e32 v0, v0, v1
+; GFX6-GISEL-NEXT: v_and_b32_e32 v0, 3, v0
+; GFX6-GISEL-NEXT: buffer_store_byte v0, v[2:3], s[4:7], 0 addr64
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0)
+; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-SDAG-LABEL: freeze_v2i1:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: s_mov_b32 s6, 0
+; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000
+; GFX7-SDAG-NEXT: s_mov_b32 s4, s6
+; GFX7-SDAG-NEXT: s_mov_b32 s5, s6
+; GFX7-SDAG-NEXT: buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX7-SDAG-NEXT: v_and_b32_e32 v0, 3, v0
+; GFX7-SDAG-NEXT: buffer_store_byte v0, v[2:3], s[4:7], 0 addr64
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: freeze_v2i1:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: s_mov_b32 s6, 0
+; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000
+; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0
+; GFX7-GISEL-NEXT: buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GISEL-NEXT: v_lshrrev_b32_e32 v1, 1, v0
+; GFX7-GISEL-NEXT: v_and_b32_e32 v1, 1, v1
+; GFX7-GISEL-NEXT: v_and_b32_e32 v0, 1, v0
+; GFX7-GISEL-NEXT: v_lshlrev_b32_e32 v1, 1, v1
+; GFX7-GISEL-NEXT: v_or_b32_e32 v0, v0, v1
+; GFX7-GISEL-NEXT: v_and_b32_e32 v0, 3, v0
+; GFX7-GISEL-NEXT: buffer_store_byte v0, v[2:3], s[4:7], 0 addr64
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-GISEL-LABEL: freeze_v2i1:
+; GFX8-GISEL: ; %bb.0:
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-GISEL-NEXT: flat_load_ubyte v0, v[0:1]
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX8-GISEL-NEXT: v_lshrrev_b32_e32 v1, 1, v0
+; GFX8-GISEL-NEXT: v_and_b32_e32 v1, 1, v1
+; GFX8-GISEL-NEXT: v_and_b32_e32 v0, 1, v0
+; GFX8-GISEL-NEXT: v_lshlrev_b16_e32 v1, 1, v1
+; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v0, v1
+; GFX8-GISEL-NEXT: v_and_b32_e32 v0, 3, v0
+; GFX8-GISEL-NEXT: flat_store_byte v[2:3], v0
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GISEL-LABEL: freeze_v2i1:
+; GFX9-GISEL: ; %bb.0:
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GISEL-NEXT: global_load_ubyte v0, v[0:1], off
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GISEL-NEXT: v_lshrrev_b32_e32 v1, 1, v0
+; GFX9-GISEL-NEXT: v_and_b32_e32 v1, 1, v1
+; GFX9-GISEL-NEXT: v_and_b32_e32 v0, 1, v0
+; GFX9-GISEL-NEXT: v_lshlrev_b16_e32 v1, 1, v1
+; GFX9-GISEL-NEXT: v_or_b32_e32 v0, v0, v1
+; GFX9-GISEL-NEXT: v_and_b32_e32 v0, 3, v0
+; GFX9-GISEL-NEXT: global_store_byte v[2:3], v0, off
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX10-SDAG-LABEL: freeze_v2i1:
; GFX10-SDAG: ; %bb.0:
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -4274,6 +13436,116 @@ define void @freeze_v2i1(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
}
define void @freeze_v3i1(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
+; GFX6-SDAG-LABEL: freeze_v3i1:
+; GFX6-SDAG: ; %bb.0:
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-SDAG-NEXT: s_mov_b32 s6, 0
+; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000
+; GFX6-SDAG-NEXT: s_mov_b32 s4, s6
+; GFX6-SDAG-NEXT: s_mov_b32 s5, s6
+; GFX6-SDAG-NEXT: buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX6-SDAG-NEXT: v_and_b32_e32 v0, 7, v0
+; GFX6-SDAG-NEXT: buffer_store_byte v0, v[2:3], s[4:7], 0 addr64
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0)
+; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX6-GISEL-LABEL: freeze_v3i1:
+; GFX6-GISEL: ; %bb.0:
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-GISEL-NEXT: s_mov_b32 s6, 0
+; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000
+; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0
+; GFX6-GISEL-NEXT: buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX6-GISEL-NEXT: v_lshrrev_b32_e32 v1, 1, v0
+; GFX6-GISEL-NEXT: v_lshrrev_b32_e32 v4, 2, v0
+; GFX6-GISEL-NEXT: v_and_b32_e32 v1, 1, v1
+; GFX6-GISEL-NEXT: v_and_b32_e32 v0, 1, v0
+; GFX6-GISEL-NEXT: v_and_b32_e32 v4, 1, v4
+; GFX6-GISEL-NEXT: v_lshlrev_b32_e32 v1, 1, v1
+; GFX6-GISEL-NEXT: v_or_b32_e32 v0, v0, v1
+; GFX6-GISEL-NEXT: v_lshlrev_b32_e32 v1, 2, v4
+; GFX6-GISEL-NEXT: v_or_b32_e32 v0, v0, v1
+; GFX6-GISEL-NEXT: v_and_b32_e32 v0, 7, v0
+; GFX6-GISEL-NEXT: buffer_store_byte v0, v[2:3], s[4:7], 0 addr64
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0)
+; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-SDAG-LABEL: freeze_v3i1:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: s_mov_b32 s6, 0
+; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000
+; GFX7-SDAG-NEXT: s_mov_b32 s4, s6
+; GFX7-SDAG-NEXT: s_mov_b32 s5, s6
+; GFX7-SDAG-NEXT: buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX7-SDAG-NEXT: v_and_b32_e32 v0, 7, v0
+; GFX7-SDAG-NEXT: buffer_store_byte v0, v[2:3], s[4:7], 0 addr64
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: freeze_v3i1:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: s_mov_b32 s6, 0
+; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000
+; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0
+; GFX7-GISEL-NEXT: buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GISEL-NEXT: v_lshrrev_b32_e32 v1, 1, v0
+; GFX7-GISEL-NEXT: v_lshrrev_b32_e32 v4, 2, v0
+; GFX7-GISEL-NEXT: v_and_b32_e32 v1, 1, v1
+; GFX7-GISEL-NEXT: v_and_b32_e32 v0, 1, v0
+; GFX7-GISEL-NEXT: v_and_b32_e32 v4, 1, v4
+; GFX7-GISEL-NEXT: v_lshlrev_b32_e32 v1, 1, v1
+; GFX7-GISEL-NEXT: v_or_b32_e32 v0, v0, v1
+; GFX7-GISEL-NEXT: v_lshlrev_b32_e32 v1, 2, v4
+; GFX7-GISEL-NEXT: v_or_b32_e32 v0, v0, v1
+; GFX7-GISEL-NEXT: v_and_b32_e32 v0, 7, v0
+; GFX7-GISEL-NEXT: buffer_store_byte v0, v[2:3], s[4:7], 0 addr64
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-GISEL-LABEL: freeze_v3i1:
+; GFX8-GISEL: ; %bb.0:
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-GISEL-NEXT: flat_load_ubyte v0, v[0:1]
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX8-GISEL-NEXT: v_lshrrev_b32_e32 v1, 1, v0
+; GFX8-GISEL-NEXT: v_lshrrev_b32_e32 v4, 2, v0
+; GFX8-GISEL-NEXT: v_and_b32_e32 v1, 1, v1
+; GFX8-GISEL-NEXT: v_and_b32_e32 v0, 1, v0
+; GFX8-GISEL-NEXT: v_and_b32_e32 v4, 1, v4
+; GFX8-GISEL-NEXT: v_lshlrev_b16_e32 v1, 1, v1
+; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v0, v1
+; GFX8-GISEL-NEXT: v_lshlrev_b16_e32 v1, 2, v4
+; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v0, v1
+; GFX8-GISEL-NEXT: v_and_b32_e32 v0, 7, v0
+; GFX8-GISEL-NEXT: flat_store_byte v[2:3], v0
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GISEL-LABEL: freeze_v3i1:
+; GFX9-GISEL: ; %bb.0:
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GISEL-NEXT: global_load_ubyte v0, v[0:1], off
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GISEL-NEXT: v_lshrrev_b32_e32 v1, 1, v0
+; GFX9-GISEL-NEXT: v_lshrrev_b32_e32 v4, 2, v0
+; GFX9-GISEL-NEXT: v_and_b32_e32 v1, 1, v1
+; GFX9-GISEL-NEXT: v_and_b32_e32 v0, 1, v0
+; GFX9-GISEL-NEXT: v_and_b32_e32 v4, 1, v4
+; GFX9-GISEL-NEXT: v_lshlrev_b16_e32 v1, 1, v1
+; GFX9-GISEL-NEXT: v_or_b32_e32 v0, v0, v1
+; GFX9-GISEL-NEXT: v_lshlrev_b16_e32 v1, 2, v4
+; GFX9-GISEL-NEXT: v_or_b32_e32 v0, v0, v1
+; GFX9-GISEL-NEXT: v_and_b32_e32 v0, 7, v0
+; GFX9-GISEL-NEXT: global_store_byte v[2:3], v0, off
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX10-SDAG-LABEL: freeze_v3i1:
; GFX10-SDAG: ; %bb.0:
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -4334,6 +13606,86 @@ define void @freeze_v3i1(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
}
define void @freeze_i1_vcc(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
+; GFX6-SDAG-LABEL: freeze_i1_vcc:
+; GFX6-SDAG: ; %bb.0:
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-SDAG-NEXT: s_mov_b32 s6, 0
+; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000
+; GFX6-SDAG-NEXT: s_mov_b32 s4, s6
+; GFX6-SDAG-NEXT: s_mov_b32 s5, s6
+; GFX6-SDAG-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX6-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
+; GFX6-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
+; GFX6-SDAG-NEXT: buffer_store_byte v0, v[2:3], s[4:7], 0 addr64
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0)
+; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX6-GISEL-LABEL: freeze_i1_vcc:
+; GFX6-GISEL: ; %bb.0:
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-GISEL-NEXT: s_mov_b32 s6, 0
+; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000
+; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0
+; GFX6-GISEL-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX6-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
+; GFX6-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
+; GFX6-GISEL-NEXT: buffer_store_byte v0, v[2:3], s[4:7], 0 addr64
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0)
+; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-SDAG-LABEL: freeze_i1_vcc:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: s_mov_b32 s6, 0
+; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000
+; GFX7-SDAG-NEXT: s_mov_b32 s4, s6
+; GFX7-SDAG-NEXT: s_mov_b32 s5, s6
+; GFX7-SDAG-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX7-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
+; GFX7-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
+; GFX7-SDAG-NEXT: buffer_store_byte v0, v[2:3], s[4:7], 0 addr64
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: freeze_i1_vcc:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: s_mov_b32 s6, 0
+; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000
+; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0
+; GFX7-GISEL-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
+; GFX7-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
+; GFX7-GISEL-NEXT: buffer_store_byte v0, v[2:3], s[4:7], 0 addr64
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: freeze_i1_vcc:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: flat_load_dword v0, v[0:1]
+; GFX8-NEXT: s_waitcnt vmcnt(0)
+; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
+; GFX8-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
+; GFX8-NEXT: flat_store_byte v[2:3], v0
+; GFX8-NEXT: s_waitcnt vmcnt(0)
+; GFX8-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: freeze_i1_vcc:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: global_load_dword v0, v[0:1], off
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
+; GFX9-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
+; GFX9-NEXT: global_store_byte v[2:3], v0, off
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
; GFX10-LABEL: freeze_i1_vcc:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -4361,6 +13713,124 @@ define void @freeze_i1_vcc(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
}
define void @freeze_v2i1_vcc(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
+; GFX6-SDAG-LABEL: freeze_v2i1_vcc:
+; GFX6-SDAG: ; %bb.0:
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-SDAG-NEXT: s_mov_b32 s6, 0
+; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000
+; GFX6-SDAG-NEXT: s_mov_b32 s4, s6
+; GFX6-SDAG-NEXT: s_mov_b32 s5, s6
+; GFX6-SDAG-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX6-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
+; GFX6-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; GFX6-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
+; GFX6-SDAG-NEXT: v_lshlrev_b32_e32 v1, 1, v1
+; GFX6-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
+; GFX6-SDAG-NEXT: v_or_b32_e32 v0, v0, v1
+; GFX6-SDAG-NEXT: v_and_b32_e32 v0, 3, v0
+; GFX6-SDAG-NEXT: buffer_store_byte v0, v[2:3], s[4:7], 0 addr64
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0)
+; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX6-GISEL-LABEL: freeze_v2i1_vcc:
+; GFX6-GISEL: ; %bb.0:
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-GISEL-NEXT: s_mov_b32 s6, 0
+; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000
+; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0
+; GFX6-GISEL-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX6-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
+; GFX6-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
+; GFX6-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
+; GFX6-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; GFX6-GISEL-NEXT: v_and_b32_e32 v1, 1, v1
+; GFX6-GISEL-NEXT: v_and_b32_e32 v0, 1, v0
+; GFX6-GISEL-NEXT: v_lshlrev_b32_e32 v1, 1, v1
+; GFX6-GISEL-NEXT: v_or_b32_e32 v0, v0, v1
+; GFX6-GISEL-NEXT: v_and_b32_e32 v0, 3, v0
+; GFX6-GISEL-NEXT: buffer_store_byte v0, v[2:3], s[4:7], 0 addr64
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0)
+; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-SDAG-LABEL: freeze_v2i1_vcc:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: s_mov_b32 s6, 0
+; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000
+; GFX7-SDAG-NEXT: s_mov_b32 s4, s6
+; GFX7-SDAG-NEXT: s_mov_b32 s5, s6
+; GFX7-SDAG-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX7-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
+; GFX7-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; GFX7-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
+; GFX7-SDAG-NEXT: v_lshlrev_b32_e32 v1, 1, v1
+; GFX7-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
+; GFX7-SDAG-NEXT: v_or_b32_e32 v0, v0, v1
+; GFX7-SDAG-NEXT: v_and_b32_e32 v0, 3, v0
+; GFX7-SDAG-NEXT: buffer_store_byte v0, v[2:3], s[4:7], 0 addr64
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: freeze_v2i1_vcc:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: s_mov_b32 s6, 0
+; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000
+; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0
+; GFX7-GISEL-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
+; GFX7-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
+; GFX7-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
+; GFX7-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; GFX7-GISEL-NEXT: v_and_b32_e32 v1, 1, v1
+; GFX7-GISEL-NEXT: v_and_b32_e32 v0, 1, v0
+; GFX7-GISEL-NEXT: v_lshlrev_b32_e32 v1, 1, v1
+; GFX7-GISEL-NEXT: v_or_b32_e32 v0, v0, v1
+; GFX7-GISEL-NEXT: v_and_b32_e32 v0, 3, v0
+; GFX7-GISEL-NEXT: buffer_store_byte v0, v[2:3], s[4:7], 0 addr64
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-GISEL-LABEL: freeze_v2i1_vcc:
+; GFX8-GISEL: ; %bb.0:
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-GISEL-NEXT: flat_load_dwordx2 v[0:1], v[0:1]
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX8-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
+; GFX8-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
+; GFX8-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
+; GFX8-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; GFX8-GISEL-NEXT: v_and_b32_e32 v1, 1, v1
+; GFX8-GISEL-NEXT: v_and_b32_e32 v0, 1, v0
+; GFX8-GISEL-NEXT: v_lshlrev_b16_e32 v1, 1, v1
+; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v0, v1
+; GFX8-GISEL-NEXT: v_and_b32_e32 v0, 3, v0
+; GFX8-GISEL-NEXT: flat_store_byte v[2:3], v0
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GISEL-LABEL: freeze_v2i1_vcc:
+; GFX9-GISEL: ; %bb.0:
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GISEL-NEXT: global_load_dwordx2 v[0:1], v[0:1], off
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
+; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
+; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
+; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; GFX9-GISEL-NEXT: v_and_b32_e32 v1, 1, v1
+; GFX9-GISEL-NEXT: v_and_b32_e32 v0, 1, v0
+; GFX9-GISEL-NEXT: v_lshlrev_b16_e32 v1, 1, v1
+; GFX9-GISEL-NEXT: v_or_b32_e32 v0, v0, v1
+; GFX9-GISEL-NEXT: v_and_b32_e32 v0, 3, v0
+; GFX9-GISEL-NEXT: global_store_byte v[2:3], v0, off
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX10-SDAG-LABEL: freeze_v2i1_vcc:
; GFX10-SDAG: ; %bb.0:
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -4432,6 +13902,152 @@ define void @freeze_v2i1_vcc(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
}
define void @freeze_v3i1_vcc(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
+; GFX6-SDAG-LABEL: freeze_v3i1_vcc:
+; GFX6-SDAG: ; %bb.0:
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-SDAG-NEXT: s_mov_b32 s6, 0
+; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000
+; GFX6-SDAG-NEXT: s_mov_b32 s4, s6
+; GFX6-SDAG-NEXT: s_mov_b32 s5, s6
+; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX6-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v5
+; GFX6-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
+; GFX6-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4
+; GFX6-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; GFX6-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v6
+; GFX6-SDAG-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
+; GFX6-SDAG-NEXT: v_lshlrev_b32_e32 v0, 1, v0
+; GFX6-SDAG-NEXT: v_or_b32_e32 v0, v1, v0
+; GFX6-SDAG-NEXT: v_lshlrev_b32_e32 v1, 2, v4
+; GFX6-SDAG-NEXT: v_or_b32_e32 v0, v0, v1
+; GFX6-SDAG-NEXT: v_and_b32_e32 v0, 7, v0
+; GFX6-SDAG-NEXT: buffer_store_byte v0, v[2:3], s[4:7], 0 addr64
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0)
+; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX6-GISEL-LABEL: freeze_v3i1_vcc:
+; GFX6-GISEL: ; %bb.0:
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-GISEL-NEXT: s_mov_b32 s6, 0
+; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000
+; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0
+; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX6-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4
+; GFX6-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
+; GFX6-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v5
+; GFX6-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; GFX6-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v6
+; GFX6-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
+; GFX6-GISEL-NEXT: v_and_b32_e32 v1, 1, v1
+; GFX6-GISEL-NEXT: v_and_b32_e32 v0, 1, v0
+; GFX6-GISEL-NEXT: v_and_b32_e32 v4, 1, v4
+; GFX6-GISEL-NEXT: v_lshlrev_b32_e32 v1, 1, v1
+; GFX6-GISEL-NEXT: v_or_b32_e32 v0, v0, v1
+; GFX6-GISEL-NEXT: v_lshlrev_b32_e32 v1, 2, v4
+; GFX6-GISEL-NEXT: v_or_b32_e32 v0, v0, v1
+; GFX6-GISEL-NEXT: v_and_b32_e32 v0, 7, v0
+; GFX6-GISEL-NEXT: buffer_store_byte v0, v[2:3], s[4:7], 0 addr64
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0)
+; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-SDAG-LABEL: freeze_v3i1_vcc:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: s_mov_b32 s6, 0
+; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000
+; GFX7-SDAG-NEXT: s_mov_b32 s4, s6
+; GFX7-SDAG-NEXT: s_mov_b32 s5, s6
+; GFX7-SDAG-NEXT: buffer_load_dwordx3 v[4:6], v[0:1], s[4:7], 0 addr64
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX7-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v5
+; GFX7-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
+; GFX7-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4
+; GFX7-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; GFX7-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v6
+; GFX7-SDAG-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
+; GFX7-SDAG-NEXT: v_lshlrev_b32_e32 v0, 1, v0
+; GFX7-SDAG-NEXT: v_or_b32_e32 v0, v1, v0
+; GFX7-SDAG-NEXT: v_lshlrev_b32_e32 v1, 2, v4
+; GFX7-SDAG-NEXT: v_or_b32_e32 v0, v0, v1
+; GFX7-SDAG-NEXT: v_and_b32_e32 v0, 7, v0
+; GFX7-SDAG-NEXT: buffer_store_byte v0, v[2:3], s[4:7], 0 addr64
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: freeze_v3i1_vcc:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: s_mov_b32 s6, 0
+; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000
+; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0
+; GFX7-GISEL-NEXT: buffer_load_dwordx3 v[4:6], v[0:1], s[4:7], 0 addr64
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4
+; GFX7-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
+; GFX7-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v5
+; GFX7-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; GFX7-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v6
+; GFX7-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
+; GFX7-GISEL-NEXT: v_and_b32_e32 v1, 1, v1
+; GFX7-GISEL-NEXT: v_and_b32_e32 v0, 1, v0
+; GFX7-GISEL-NEXT: v_and_b32_e32 v4, 1, v4
+; GFX7-GISEL-NEXT: v_lshlrev_b32_e32 v1, 1, v1
+; GFX7-GISEL-NEXT: v_or_b32_e32 v0, v0, v1
+; GFX7-GISEL-NEXT: v_lshlrev_b32_e32 v1, 2, v4
+; GFX7-GISEL-NEXT: v_or_b32_e32 v0, v0, v1
+; GFX7-GISEL-NEXT: v_and_b32_e32 v0, 7, v0
+; GFX7-GISEL-NEXT: buffer_store_byte v0, v[2:3], s[4:7], 0 addr64
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-GISEL-LABEL: freeze_v3i1_vcc:
+; GFX8-GISEL: ; %bb.0:
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-GISEL-NEXT: flat_load_dwordx3 v[4:6], v[0:1]
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX8-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4
+; GFX8-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
+; GFX8-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v5
+; GFX8-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; GFX8-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v6
+; GFX8-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
+; GFX8-GISEL-NEXT: v_and_b32_e32 v1, 1, v1
+; GFX8-GISEL-NEXT: v_and_b32_e32 v0, 1, v0
+; GFX8-GISEL-NEXT: v_and_b32_e32 v4, 1, v4
+; GFX8-GISEL-NEXT: v_lshlrev_b16_e32 v1, 1, v1
+; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v0, v1
+; GFX8-GISEL-NEXT: v_lshlrev_b16_e32 v1, 2, v4
+; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v0, v1
+; GFX8-GISEL-NEXT: v_and_b32_e32 v0, 7, v0
+; GFX8-GISEL-NEXT: flat_store_byte v[2:3], v0
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GISEL-LABEL: freeze_v3i1_vcc:
+; GFX9-GISEL: ; %bb.0:
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GISEL-NEXT: global_load_dwordx3 v[4:6], v[0:1], off
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4
+; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
+; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v5
+; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v6
+; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
+; GFX9-GISEL-NEXT: v_and_b32_e32 v1, 1, v1
+; GFX9-GISEL-NEXT: v_and_b32_e32 v0, 1, v0
+; GFX9-GISEL-NEXT: v_and_b32_e32 v4, 1, v4
+; GFX9-GISEL-NEXT: v_lshlrev_b16_e32 v1, 1, v1
+; GFX9-GISEL-NEXT: v_or_b32_e32 v0, v0, v1
+; GFX9-GISEL-NEXT: v_lshlrev_b16_e32 v1, 2, v4
+; GFX9-GISEL-NEXT: v_or_b32_e32 v0, v0, v1
+; GFX9-GISEL-NEXT: v_and_b32_e32 v0, 7, v0
+; GFX9-GISEL-NEXT: global_store_byte v[2:3], v0, off
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX10-SDAG-LABEL: freeze_v3i1_vcc:
; GFX10-SDAG: ; %bb.0:
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -4521,6 +14137,180 @@ define void @freeze_v3i1_vcc(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
}
define void @freeze_v4i1_vcc(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
+; GFX6-SDAG-LABEL: freeze_v4i1_vcc:
+; GFX6-SDAG: ; %bb.0:
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-SDAG-NEXT: s_mov_b32 s6, 0
+; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000
+; GFX6-SDAG-NEXT: s_mov_b32 s4, s6
+; GFX6-SDAG-NEXT: s_mov_b32 s5, s6
+; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX6-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v5
+; GFX6-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
+; GFX6-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4
+; GFX6-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; GFX6-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v6
+; GFX6-SDAG-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
+; GFX6-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v7
+; GFX6-SDAG-NEXT: v_lshlrev_b32_e32 v0, 1, v0
+; GFX6-SDAG-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
+; GFX6-SDAG-NEXT: v_lshlrev_b32_e32 v4, 2, v4
+; GFX6-SDAG-NEXT: v_or_b32_e32 v0, v1, v0
+; GFX6-SDAG-NEXT: v_or_b32_e32 v0, v0, v4
+; GFX6-SDAG-NEXT: v_lshlrev_b32_e32 v1, 3, v5
+; GFX6-SDAG-NEXT: v_or_b32_e32 v0, v0, v1
+; GFX6-SDAG-NEXT: v_and_b32_e32 v0, 15, v0
+; GFX6-SDAG-NEXT: buffer_store_byte v0, v[2:3], s[4:7], 0 addr64
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0)
+; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX6-GISEL-LABEL: freeze_v4i1_vcc:
+; GFX6-GISEL: ; %bb.0:
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-GISEL-NEXT: s_mov_b32 s6, 0
+; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000
+; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0
+; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX6-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4
+; GFX6-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
+; GFX6-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v5
+; GFX6-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; GFX6-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v6
+; GFX6-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
+; GFX6-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v7
+; GFX6-GISEL-NEXT: v_and_b32_e32 v1, 1, v1
+; GFX6-GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
+; GFX6-GISEL-NEXT: v_and_b32_e32 v0, 1, v0
+; GFX6-GISEL-NEXT: v_and_b32_e32 v4, 1, v4
+; GFX6-GISEL-NEXT: v_lshlrev_b32_e32 v1, 1, v1
+; GFX6-GISEL-NEXT: v_and_b32_e32 v5, 1, v5
+; GFX6-GISEL-NEXT: v_lshlrev_b32_e32 v4, 2, v4
+; GFX6-GISEL-NEXT: v_or_b32_e32 v0, v0, v1
+; GFX6-GISEL-NEXT: v_or_b32_e32 v0, v0, v4
+; GFX6-GISEL-NEXT: v_lshlrev_b32_e32 v1, 3, v5
+; GFX6-GISEL-NEXT: v_or_b32_e32 v0, v0, v1
+; GFX6-GISEL-NEXT: v_and_b32_e32 v0, 15, v0
+; GFX6-GISEL-NEXT: buffer_store_byte v0, v[2:3], s[4:7], 0 addr64
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0)
+; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-SDAG-LABEL: freeze_v4i1_vcc:
+; GFX7-SDAG: ; %bb.0:
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-SDAG-NEXT: s_mov_b32 s6, 0
+; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000
+; GFX7-SDAG-NEXT: s_mov_b32 s4, s6
+; GFX7-SDAG-NEXT: s_mov_b32 s5, s6
+; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX7-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v5
+; GFX7-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
+; GFX7-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4
+; GFX7-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; GFX7-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v6
+; GFX7-SDAG-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
+; GFX7-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v7
+; GFX7-SDAG-NEXT: v_lshlrev_b32_e32 v0, 1, v0
+; GFX7-SDAG-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
+; GFX7-SDAG-NEXT: v_lshlrev_b32_e32 v4, 2, v4
+; GFX7-SDAG-NEXT: v_or_b32_e32 v0, v1, v0
+; GFX7-SDAG-NEXT: v_or_b32_e32 v0, v0, v4
+; GFX7-SDAG-NEXT: v_lshlrev_b32_e32 v1, 3, v5
+; GFX7-SDAG-NEXT: v_or_b32_e32 v0, v0, v1
+; GFX7-SDAG-NEXT: v_and_b32_e32 v0, 15, v0
+; GFX7-SDAG-NEXT: buffer_store_byte v0, v[2:3], s[4:7], 0 addr64
+; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GISEL-LABEL: freeze_v4i1_vcc:
+; GFX7-GISEL: ; %bb.0:
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GISEL-NEXT: s_mov_b32 s6, 0
+; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000
+; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0
+; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4
+; GFX7-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
+; GFX7-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v5
+; GFX7-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; GFX7-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v6
+; GFX7-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
+; GFX7-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v7
+; GFX7-GISEL-NEXT: v_and_b32_e32 v1, 1, v1
+; GFX7-GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
+; GFX7-GISEL-NEXT: v_and_b32_e32 v0, 1, v0
+; GFX7-GISEL-NEXT: v_and_b32_e32 v4, 1, v4
+; GFX7-GISEL-NEXT: v_lshlrev_b32_e32 v1, 1, v1
+; GFX7-GISEL-NEXT: v_and_b32_e32 v5, 1, v5
+; GFX7-GISEL-NEXT: v_lshlrev_b32_e32 v4, 2, v4
+; GFX7-GISEL-NEXT: v_or_b32_e32 v0, v0, v1
+; GFX7-GISEL-NEXT: v_or_b32_e32 v0, v0, v4
+; GFX7-GISEL-NEXT: v_lshlrev_b32_e32 v1, 3, v5
+; GFX7-GISEL-NEXT: v_or_b32_e32 v0, v0, v1
+; GFX7-GISEL-NEXT: v_and_b32_e32 v0, 15, v0
+; GFX7-GISEL-NEXT: buffer_store_byte v0, v[2:3], s[4:7], 0 addr64
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-GISEL-LABEL: freeze_v4i1_vcc:
+; GFX8-GISEL: ; %bb.0:
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-GISEL-NEXT: flat_load_dwordx4 v[4:7], v[0:1]
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX8-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4
+; GFX8-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
+; GFX8-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v5
+; GFX8-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; GFX8-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v6
+; GFX8-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
+; GFX8-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v7
+; GFX8-GISEL-NEXT: v_and_b32_e32 v1, 1, v1
+; GFX8-GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
+; GFX8-GISEL-NEXT: v_and_b32_e32 v0, 1, v0
+; GFX8-GISEL-NEXT: v_and_b32_e32 v4, 1, v4
+; GFX8-GISEL-NEXT: v_lshlrev_b16_e32 v1, 1, v1
+; GFX8-GISEL-NEXT: v_and_b32_e32 v5, 1, v5
+; GFX8-GISEL-NEXT: v_lshlrev_b16_e32 v4, 2, v4
+; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v0, v1
+; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v0, v4
+; GFX8-GISEL-NEXT: v_lshlrev_b16_e32 v1, 3, v5
+; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v0, v1
+; GFX8-GISEL-NEXT: v_and_b32_e32 v0, 15, v0
+; GFX8-GISEL-NEXT: flat_store_byte v[2:3], v0
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GISEL-LABEL: freeze_v4i1_vcc:
+; GFX9-GISEL: ; %bb.0:
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4
+; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
+; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v5
+; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v6
+; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
+; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v7
+; GFX9-GISEL-NEXT: v_and_b32_e32 v1, 1, v1
+; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
+; GFX9-GISEL-NEXT: v_and_b32_e32 v0, 1, v0
+; GFX9-GISEL-NEXT: v_and_b32_e32 v4, 1, v4
+; GFX9-GISEL-NEXT: v_lshlrev_b16_e32 v1, 1, v1
+; GFX9-GISEL-NEXT: v_and_b32_e32 v5, 1, v5
+; GFX9-GISEL-NEXT: v_lshlrev_b16_e32 v4, 2, v4
+; GFX9-GISEL-NEXT: v_or_b32_e32 v0, v0, v1
+; GFX9-GISEL-NEXT: v_or_b32_e32 v0, v0, v4
+; GFX9-GISEL-NEXT: v_lshlrev_b16_e32 v1, 3, v5
+; GFX9-GISEL-NEXT: v_or_b32_e32 v0, v0, v1
+; GFX9-GISEL-NEXT: v_and_b32_e32 v0, 15, v0
+; GFX9-GISEL-NEXT: global_store_byte v[2:3], v0, off
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
; GFX10-SDAG-LABEL: freeze_v4i1_vcc:
; GFX10-SDAG: ; %bb.0:
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -4626,3 +14416,5 @@ define void @freeze_v4i1_vcc(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
store <4 x i1> %freeze, ptr addrspace(1) %ptrb
ret void
}
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; GFX8-SDAG: {{.*}}
More information about the llvm-commits
mailing list