[llvm] [LoadStoreVectorizer] Allow redundant stores (PR #169946)
Gang Chen via llvm-commits
llvm-commits at lists.llvm.org
Fri Nov 28 11:54:26 PST 2025
https://github.com/cmc-rep updated https://github.com/llvm/llvm-project/pull/169946
>From 75954833f636a61c841d37736bc5620c81225a88 Mon Sep 17 00:00:00 2001
From: Gang Chen <Gang.Chen at amd.com>
Date: Fri, 28 Nov 2025 09:49:11 -0800
Subject: [PATCH 1/2] [LoadStoreVectorizer] Allow redundant stores
---
.../Vectorize/LoadStoreVectorizer.cpp | 68 +++--------
.../AMDGPU/multiple_tails.ll | 4 -
.../AMDGPU/vectorize-redund-stores.ll | 108 ++++++++++++++++++
3 files changed, 124 insertions(+), 56 deletions(-)
create mode 100644 llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/vectorize-redund-stores.ll
diff --git a/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp b/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp
index c28314f6ab124..d48c00688e878 100644
--- a/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp
@@ -316,14 +316,12 @@ class Vectorizer {
/// !IsLoad) to ChainBegin -- i.e. there are no intervening may-alias
/// instructions.
///
- /// The map ChainElemOffsets must contain all of the elements in
- /// [ChainBegin, ChainElem] and their offsets from some arbitrary base
- /// address. It's ok if it contains additional entries.
+ /// The map ChainSet must contain all of the elements in
+ /// [ChainBegin, ChainElem]. It's ok if it contains additional entries.
template <bool IsLoadChain>
- bool isSafeToMove(
- Instruction *ChainElem, Instruction *ChainBegin,
- const DenseMap<Instruction *, APInt /*OffsetFromLeader*/> &ChainOffsets,
- BatchAAResults &BatchAA);
+ bool isSafeToMove(Instruction *ChainElem, Instruction *ChainBegin,
+ const DenseSet<Instruction *> &ChainSet,
+ BatchAAResults &BatchAA);
/// Merges the equivalence classes if they have underlying objects that differ
/// by one level of indirection (i.e., one is a getelementptr and the other is
@@ -540,9 +538,9 @@ std::vector<Chain> Vectorizer::splitChainByMayAliasInstrs(Chain &C) {
// We know that elements in the chain with nonverlapping offsets can't
// alias, but AA may not be smart enough to figure this out. Use a
// hashtable.
- DenseMap<Instruction *, APInt /*OffsetFromLeader*/> ChainOffsets;
+ DenseSet<Instruction *> ChainSet;
for (const auto &E : C)
- ChainOffsets.insert({&*E.Inst, E.OffsetFromLeader});
+ ChainSet.insert(E.Inst);
// Across a single invocation of this function the IR is not changing, so
// using a batched Alias Analysis is safe and can reduce compile time.
@@ -573,8 +571,8 @@ std::vector<Chain> Vectorizer::splitChainByMayAliasInstrs(Chain &C) {
SmallVector<ChainElem, 1> NewChain;
NewChain.emplace_back(*ChainBegin);
for (auto ChainIt = std::next(ChainBegin); ChainIt != ChainEnd; ++ChainIt) {
- if (isSafeToMove<IsLoad>(ChainIt->Inst, NewChain.front().Inst,
- ChainOffsets, BatchAA)) {
+ if (isSafeToMove<IsLoad>(ChainIt->Inst, NewChain.front().Inst, ChainSet,
+ BatchAA)) {
LLVM_DEBUG(dbgs() << "LSV: No intervening may-alias instrs; can merge "
<< *ChainIt->Inst << " into " << *ChainBegin->Inst
<< "\n");
@@ -1037,10 +1035,9 @@ bool Vectorizer::vectorizeChain(Chain &C) {
}
template <bool IsLoadChain>
-bool Vectorizer::isSafeToMove(
- Instruction *ChainElem, Instruction *ChainBegin,
- const DenseMap<Instruction *, APInt /*OffsetFromLeader*/> &ChainOffsets,
- BatchAAResults &BatchAA) {
+bool Vectorizer::isSafeToMove(Instruction *ChainElem, Instruction *ChainBegin,
+ const DenseSet<Instruction *> &ChainSet,
+ BatchAAResults &BatchAA) {
LLVM_DEBUG(dbgs() << "LSV: isSafeToMove(" << *ChainElem << " -> "
<< *ChainBegin << ")\n");
@@ -1066,10 +1063,6 @@ bool Vectorizer::isSafeToMove(
return BasicBlock::iterator(ChainBegin);
}());
- const APInt &ChainElemOffset = ChainOffsets.at(ChainElem);
- const unsigned ChainElemSize =
- DL.getTypeStoreSize(getLoadStoreType(ChainElem));
-
for (; BBIt != BBItEnd; ++BBIt) {
Instruction *I = &*BBIt;
@@ -1084,39 +1077,10 @@ bool Vectorizer::isSafeToMove(
if (!IsLoadChain && isInvariantLoad(I))
continue;
- // If I is in the chain, we can tell whether it aliases ChainIt by checking
- // what offset ChainIt accesses. This may be better than AA is able to do.
- //
- // We should really only have duplicate offsets for stores (the duplicate
- // loads should be CSE'ed), but in case we have a duplicate load, we'll
- // split the chain so we don't have to handle this case specially.
- if (auto OffsetIt = ChainOffsets.find(I); OffsetIt != ChainOffsets.end()) {
- // I and ChainElem overlap if:
- // - I and ChainElem have the same offset, OR
- // - I's offset is less than ChainElem's, but I touches past the
- // beginning of ChainElem, OR
- // - ChainElem's offset is less than I's, but ChainElem touches past the
- // beginning of I.
- const APInt &IOffset = OffsetIt->second;
- unsigned IElemSize = DL.getTypeStoreSize(getLoadStoreType(I));
- if (IOffset == ChainElemOffset ||
- (IOffset.sle(ChainElemOffset) &&
- (IOffset + IElemSize).sgt(ChainElemOffset)) ||
- (ChainElemOffset.sle(IOffset) &&
- (ChainElemOffset + ChainElemSize).sgt(OffsetIt->second))) {
- LLVM_DEBUG({
- // Double check that AA also sees this alias. If not, we probably
- // have a bug.
- ModRefInfo MR =
- BatchAA.getModRefInfo(I, MemoryLocation::get(ChainElem));
- assert(IsLoadChain ? isModSet(MR) : isModOrRefSet(MR));
- dbgs() << "LSV: Found alias in chain: " << *I << "\n";
- });
- return false; // We found an aliasing instruction; bail.
- }
-
- continue; // We're confident there's no alias.
- }
+ // Allow on-chain aliasing because write-order is preserved when stores are
+ // vectorized.
+ if (ChainSet.count(I))
+ continue;
LLVM_DEBUG(dbgs() << "LSV: Querying AA for " << *I << "\n");
ModRefInfo MR = BatchAA.getModRefInfo(I, MemoryLocation::get(ChainElem));
diff --git a/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/multiple_tails.ll b/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/multiple_tails.ll
index 57da5976b3cfa..6f3c2fc5f387e 100644
--- a/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/multiple_tails.ll
+++ b/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/multiple_tails.ll
@@ -10,11 +10,7 @@ define amdgpu_kernel void @no_crash(i32 %arg) {
; GCN-SAME: i32 [[ARG:%.*]]) {
; GCN-NEXT: [[TEMP2:%.*]] = add i32 [[ARG]], 14
; GCN-NEXT: [[TEMP3:%.*]] = getelementptr [16384 x i32], ptr addrspace(3) @[[GLOB0:[0-9]+]], i32 0, i32 [[TEMP2]]
-; GCN-NEXT: [[TEMP4:%.*]] = add i32 [[ARG]], 15
-; GCN-NEXT: [[TEMP5:%.*]] = getelementptr [16384 x i32], ptr addrspace(3) @[[GLOB0]], i32 0, i32 [[TEMP4]]
; GCN-NEXT: store <2 x i32> zeroinitializer, ptr addrspace(3) [[TEMP3]], align 4
-; GCN-NEXT: store i32 0, ptr addrspace(3) [[TEMP5]], align 4
-; GCN-NEXT: store i32 0, ptr addrspace(3) [[TEMP5]], align 4
; GCN-NEXT: ret void
;
%temp2 = add i32 %arg, 14
diff --git a/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/vectorize-redund-stores.ll b/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/vectorize-redund-stores.ll
new file mode 100644
index 0000000000000..cd3e3bded681a
--- /dev/null
+++ b/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/vectorize-redund-stores.ll
@@ -0,0 +1,108 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
+; RUN: opt -mtriple=amdgcn-amd-amdhsa -passes=load-store-vectorizer -S -o - %s | FileCheck %s
+
+define void @onevec(ptr %ptr, <1 x i32> %sd0, i32 %sd1, i32 %sd2, <1 x i32> %sd3, <1 x i32> %sd4, <1 x i32> %sd5) {
+; CHECK-LABEL: define void @onevec(
+; CHECK-SAME: ptr [[PTR:%.*]], <1 x i32> [[SD0:%.*]], i32 [[SD1:%.*]], i32 [[SD2:%.*]], <1 x i32> [[SD3:%.*]], <1 x i32> [[SD4:%.*]], <1 x i32> [[SD5:%.*]]) {
+; CHECK-NEXT: [[TMP1:%.*]] = extractelement <1 x i32> [[SD0]], i32 0
+; CHECK-NEXT: [[TMP2:%.*]] = insertelement <1 x i32> poison, i32 [[TMP1]], i32 0
+; CHECK-NEXT: [[TMP3:%.*]] = insertelement <1 x i32> [[TMP2]], i32 [[SD1]], i32 0
+; CHECK-NEXT: store <1 x i32> [[TMP3]], ptr [[PTR]], align 4
+; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i32 16
+; CHECK-NEXT: [[TMP4:%.*]] = insertelement <1 x i32> poison, i32 [[SD2]], i32 0
+; CHECK-NEXT: [[TMP5:%.*]] = extractelement <1 x i32> [[SD3]], i32 0
+; CHECK-NEXT: [[TMP6:%.*]] = insertelement <1 x i32> [[TMP4]], i32 [[TMP5]], i32 0
+; CHECK-NEXT: store <1 x i32> [[TMP6]], ptr [[GEP1]], align 4
+; CHECK-NEXT: [[GEP2:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i32 32
+; CHECK-NEXT: [[TMP7:%.*]] = extractelement <1 x i32> [[SD4]], i32 0
+; CHECK-NEXT: [[TMP8:%.*]] = insertelement <1 x i32> poison, i32 [[TMP7]], i32 0
+; CHECK-NEXT: [[TMP9:%.*]] = extractelement <1 x i32> [[SD5]], i32 0
+; CHECK-NEXT: [[TMP10:%.*]] = insertelement <1 x i32> [[TMP8]], i32 [[TMP9]], i32 0
+; CHECK-NEXT: store <1 x i32> [[TMP10]], ptr [[GEP2]], align 4
+; CHECK-NEXT: ret void
+;
+ store <1 x i32> %sd0, ptr %ptr, align 4
+ store i32 %sd1, ptr %ptr, align 4
+
+ %gep1 = getelementptr inbounds i8, ptr %ptr, i32 16
+ store i32 %sd2, ptr %gep1, align 4
+ store <1 x i32> %sd3, ptr %gep1, align 4
+
+ %gep2 = getelementptr inbounds i8, ptr %ptr, i32 32
+ store <1 x i32> %sd4, ptr %gep2, align 4
+ store <1 x i32> %sd5, ptr %gep2, align 4
+ ret void
+}
+
+define void @test(ptr %ptr, i32 %sd0, <2 x i32> %sd1, <2 x i32> %sd2, i32 %sd3) {
+; CHECK-LABEL: define void @test(
+; CHECK-SAME: ptr [[PTR:%.*]], i32 [[SD0:%.*]], <2 x i32> [[SD1:%.*]], <2 x i32> [[SD2:%.*]], i32 [[SD3:%.*]]) {
+; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> poison, i32 [[SD0]], i32 0
+; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x i32> [[SD1]], i32 0
+; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[TMP2]], i32 1
+; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i32> [[SD1]], i32 1
+; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> [[TMP3]], i32 [[TMP4]], i32 2
+; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i32> [[SD2]], i32 0
+; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> [[TMP5]], i32 [[TMP6]], i32 2
+; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x i32> [[SD2]], i32 1
+; CHECK-NEXT: [[TMP9:%.*]] = insertelement <4 x i32> [[TMP7]], i32 [[TMP8]], i32 3
+; CHECK-NEXT: [[TMP10:%.*]] = insertelement <4 x i32> [[TMP9]], i32 [[SD3]], i32 2
+; CHECK-NEXT: store <4 x i32> [[TMP10]], ptr [[PTR]], align 4
+; CHECK-NEXT: ret void
+;
+ store i32 %sd0, ptr %ptr, align 4
+ %gep1 = getelementptr inbounds i8, ptr %ptr, i32 4
+ store <2 x i32> %sd1, ptr %gep1, align 4
+ %gep2 = getelementptr inbounds i8, ptr %ptr, i32 8
+ store <2 x i32> %sd2, ptr %gep2, align 4
+ %gep3 = getelementptr inbounds i8, ptr %ptr, i32 8
+ store i32 %sd3, ptr %gep3, align 4
+ ret void
+}
+
+define void @vect_zext_bitcast_i8_st4_to_i32_idx(ptr addrspace(1) %arg1, i32 %base, i32 %sd1, i32 %sd2, i32 %sd25, i32 %sd3, i32 %sd4) {
+; CHECK-LABEL: define void @vect_zext_bitcast_i8_st4_to_i32_idx(
+; CHECK-SAME: ptr addrspace(1) [[ARG1:%.*]], i32 [[BASE:%.*]], i32 [[SD1:%.*]], i32 [[SD2:%.*]], i32 [[SD25:%.*]], i32 [[SD3:%.*]], i32 [[SD4:%.*]]) {
+; CHECK-NEXT: [[ADD1:%.*]] = add nuw i32 [[BASE]], 0
+; CHECK-NEXT: [[ZEXT1:%.*]] = zext i32 [[ADD1]] to i64
+; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[ARG1]], i64 [[ZEXT1]]
+; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> poison, i32 [[SD1]], i32 0
+; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i32> [[TMP1]], i32 [[SD2]], i32 1
+; CHECK-NEXT: store <2 x i32> [[TMP2]], ptr addrspace(1) [[GEP1]], align 4
+; CHECK-NEXT: [[ADD25:%.*]] = add nuw i32 [[BASE]], 6
+; CHECK-NEXT: [[ZEXT25:%.*]] = zext i32 [[ADD25]] to i64
+; CHECK-NEXT: [[GEP25:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[ARG1]], i64 [[ZEXT25]]
+; CHECK-NEXT: store i32 [[SD25]], ptr addrspace(1) [[GEP25]], align 4
+; CHECK-NEXT: [[ADD3:%.*]] = add nuw i32 [[BASE]], 8
+; CHECK-NEXT: [[ZEXT3:%.*]] = zext i32 [[ADD3]] to i64
+; CHECK-NEXT: [[GEP3:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[ARG1]], i64 [[ZEXT3]]
+; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x i32> poison, i32 [[SD3]], i32 0
+; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x i32> [[TMP3]], i32 [[SD4]], i32 1
+; CHECK-NEXT: store <2 x i32> [[TMP4]], ptr addrspace(1) [[GEP3]], align 4
+; CHECK-NEXT: ret void
+;
+ %add1 = add nuw i32 %base, 0
+ %zext1 = zext i32 %add1 to i64
+ %gep1 = getelementptr inbounds i8, ptr addrspace(1) %arg1, i64 %zext1
+ store i32 %sd1, ptr addrspace(1) %gep1, align 4
+ %add2 = add nuw i32 %base, 4
+ %zext2 = zext i32 %add2 to i64
+ %gep2 = getelementptr inbounds i8,ptr addrspace(1) %arg1, i64 %zext2
+ store i32 %sd2, ptr addrspace(1) %gep2, align 4
+
+ ; A store with 2-byte overlap breaks continuity.
+ %add25 = add nuw i32 %base, 6
+ %zext25 = zext i32 %add25 to i64
+ %gep25 = getelementptr inbounds i8,ptr addrspace(1) %arg1, i64 %zext25
+ store i32 %sd25, ptr addrspace(1) %gep25, align 4
+
+ %add3 = add nuw i32 %base, 8
+ %zext3 = zext i32 %add3 to i64
+ %gep3 = getelementptr inbounds i8, ptr addrspace(1) %arg1, i64 %zext3
+ store i32 %sd3, ptr addrspace(1) %gep3, align 4
+ %add4 = add nuw i32 %base, 12
+ %zext4 = zext i32 %add4 to i64
+ %gep4 = getelementptr inbounds i8, ptr addrspace(1) %arg1, i64 %zext4
+ store i32 %sd4, ptr addrspace(1) %gep4, align 4
+ ret void
+}
>From e1ef1e367d9ac4a66a586d44e11a343594bd319e Mon Sep 17 00:00:00 2001
From: Gang Chen <Gang.Chen at amd.com>
Date: Fri, 28 Nov 2025 11:53:49 -0800
Subject: [PATCH 2/2] [LoadStoreVectorizer] Update more tests
---
.../irtranslator-struct-return-intrinsics.ll | 1 -
.../CodeGen/AMDGPU/function-args-inreg.ll | 7 +-
.../isel-amdgpu-cs-chain-preserve-cc.ll | 380 ++++++++----------
.../AMDGPU/undefined-subreg-liverange.ll | 11 +-
4 files changed, 170 insertions(+), 229 deletions(-)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-struct-return-intrinsics.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-struct-return-intrinsics.ll
index 01367a5fad447..731d00dcde77d 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-struct-return-intrinsics.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-struct-return-intrinsics.ll
@@ -13,7 +13,6 @@ define amdgpu_ps void @test_div_scale(float %arg0, float %arg1) {
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s32), [[COPY1]](s32), -1
; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[INT1]](s1)
- ; CHECK-NEXT: G_STORE [[INT]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) poison`, addrspace 1)
; CHECK-NEXT: G_STORE [[SEXT]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) poison`, addrspace 1)
; CHECK-NEXT: S_ENDPGM 0
%call = call { float, i1 } @llvm.amdgcn.div.scale.f32(float %arg0, float %arg1, i1 true)
diff --git a/llvm/test/CodeGen/AMDGPU/function-args-inreg.ll b/llvm/test/CodeGen/AMDGPU/function-args-inreg.ll
index 831d10480c51c..35bfb6664904a 100644
--- a/llvm/test/CodeGen/AMDGPU/function-args-inreg.ll
+++ b/llvm/test/CodeGen/AMDGPU/function-args-inreg.ll
@@ -1696,8 +1696,6 @@ define void @void_func_i32_v2float_inreg(i32 inreg %arg0, <2 x float> inreg %arg
; GFX9-LABEL: void_func_i32_v2float_inreg:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_mov_b32_e32 v0, s16
-; GFX9-NEXT: global_store_dword v[0:1], v0, off
; GFX9-NEXT: v_mov_b32_e32 v0, s17
; GFX9-NEXT: v_mov_b32_e32 v1, s18
; GFX9-NEXT: global_store_dwordx2 v[0:1], v[0:1], off
@@ -1707,10 +1705,7 @@ define void @void_func_i32_v2float_inreg(i32 inreg %arg0, <2 x float> inreg %arg
; GFX11-LABEL: void_func_i32_v2float_inreg:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_dual_mov_b32 v2, s0 :: v_dual_mov_b32 v1, s2
-; GFX11-NEXT: v_mov_b32_e32 v0, s1
-; GFX11-NEXT: s_clause 0x1
-; GFX11-NEXT: global_store_b32 v[0:1], v2, off
+; GFX11-NEXT: v_dual_mov_b32 v0, s1 :: v_dual_mov_b32 v1, s2
; GFX11-NEXT: global_store_b64 v[0:1], v[0:1], off
; GFX11-NEXT: s_setpc_b64 s[30:31]
store i32 %arg0, ptr addrspace(1) poison
diff --git a/llvm/test/CodeGen/AMDGPU/isel-amdgpu-cs-chain-preserve-cc.ll b/llvm/test/CodeGen/AMDGPU/isel-amdgpu-cs-chain-preserve-cc.ll
index b1d382040addc..b6326d02fee12 100644
--- a/llvm/test/CodeGen/AMDGPU/isel-amdgpu-cs-chain-preserve-cc.ll
+++ b/llvm/test/CodeGen/AMDGPU/isel-amdgpu-cs-chain-preserve-cc.ll
@@ -371,37 +371,33 @@ define amdgpu_cs_chain_preserve void @amdgpu_cs_chain_preserve_cc_struct( {ptr,
; GISEL-GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
; GISEL-GFX11-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
; GISEL-GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
- ; GISEL-GFX11-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2
- ; GISEL-GFX11-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3
- ; GISEL-GFX11-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr4
- ; GISEL-GFX11-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr5
- ; GISEL-GFX11-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; GISEL-GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY5]], %subreg.sub2, [[COPY6]], %subreg.sub3
- ; GISEL-GFX11-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr8
- ; GISEL-GFX11-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr9
- ; GISEL-GFX11-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY8]], %subreg.sub1
- ; GISEL-GFX11-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY $vgpr10
- ; GISEL-GFX11-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY $vgpr11
- ; GISEL-GFX11-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY $vgpr12
- ; GISEL-GFX11-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY $vgpr13
- ; GISEL-GFX11-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY $vgpr14
- ; GISEL-GFX11-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY10]], %subreg.sub0, [[COPY11]], %subreg.sub1, [[COPY12]], %subreg.sub2, [[COPY13]], %subreg.sub3
+ ; GISEL-GFX11-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GISEL-GFX11-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GISEL-GFX11-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GISEL-GFX11-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
+ ; GISEL-GFX11-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr8
+ ; GISEL-GFX11-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr9
+ ; GISEL-GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1
+ ; GISEL-GFX11-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr10
+ ; GISEL-GFX11-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY $vgpr11
+ ; GISEL-GFX11-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY $vgpr12
+ ; GISEL-GFX11-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY $vgpr13
+ ; GISEL-GFX11-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY $vgpr14
+ ; GISEL-GFX11-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1, [[COPY11]], %subreg.sub2, [[COPY12]], %subreg.sub3
; GISEL-GFX11-NEXT: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
- ; GISEL-GFX11-NEXT: [[COPY14:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]]
- ; GISEL-GFX11-NEXT: [[COPY15:%[0-9]+]]:vreg_64 = COPY [[DEF]]
- ; GISEL-GFX11-NEXT: GLOBAL_STORE_DWORDX2 [[COPY15]], [[COPY14]], 0, 0, implicit $exec :: (store (p0) into `ptr addrspace(1) poison`, addrspace 1)
- ; GISEL-GFX11-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[COPY2]]
+ ; GISEL-GFX11-NEXT: [[COPY13:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]]
+ ; GISEL-GFX11-NEXT: [[COPY14:%[0-9]+]]:vreg_64 = COPY [[DEF]]
+ ; GISEL-GFX11-NEXT: GLOBAL_STORE_DWORDX2 [[COPY14]], [[COPY13]], 0, 0, implicit $exec :: (store (p0) into `ptr addrspace(1) poison`, addrspace 1)
+ ; GISEL-GFX11-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3
+ ; GISEL-GFX11-NEXT: [[COPY15:%[0-9]+]]:vreg_128 = COPY [[REG_SEQUENCE3]]
+ ; GISEL-GFX11-NEXT: [[COPY16:%[0-9]+]]:vreg_64 = COPY [[DEF]]
+ ; GISEL-GFX11-NEXT: GLOBAL_STORE_DWORDX4 [[COPY16]], [[COPY15]], 0, 0, implicit $exec :: (store (<4 x s32>) into `ptr addrspace(1) poison`, align 4, addrspace 1)
; GISEL-GFX11-NEXT: [[COPY17:%[0-9]+]]:vreg_64 = COPY [[DEF]]
- ; GISEL-GFX11-NEXT: GLOBAL_STORE_DWORD [[COPY17]], [[COPY16]], 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(1) poison`, addrspace 1)
- ; GISEL-GFX11-NEXT: [[COPY18:%[0-9]+]]:vreg_128 = COPY [[REG_SEQUENCE1]]
+ ; GISEL-GFX11-NEXT: GLOBAL_STORE_DWORDX2 [[COPY17]], [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (store (p0) into `ptr addrspace(1) poison`, align 16, addrspace 1)
+ ; GISEL-GFX11-NEXT: [[COPY18:%[0-9]+]]:vreg_64 = COPY [[DEF]]
+ ; GISEL-GFX11-NEXT: GLOBAL_STORE_DWORD [[COPY18]], [[COPY8]], 8, 0, implicit $exec :: (store (s32) into `ptr addrspace(1) poison` + 8, align 8, addrspace 1)
; GISEL-GFX11-NEXT: [[COPY19:%[0-9]+]]:vreg_64 = COPY [[DEF]]
- ; GISEL-GFX11-NEXT: GLOBAL_STORE_DWORDX4 [[COPY19]], [[COPY18]], 0, 0, implicit $exec :: (store (<4 x s32>) into `ptr addrspace(1) poison`, addrspace 1)
- ; GISEL-GFX11-NEXT: [[COPY20:%[0-9]+]]:vreg_64 = COPY [[DEF]]
- ; GISEL-GFX11-NEXT: GLOBAL_STORE_DWORDX2 [[COPY20]], [[REG_SEQUENCE2]], 0, 0, implicit $exec :: (store (p0) into `ptr addrspace(1) poison`, align 16, addrspace 1)
- ; GISEL-GFX11-NEXT: [[COPY21:%[0-9]+]]:vreg_64 = COPY [[DEF]]
- ; GISEL-GFX11-NEXT: GLOBAL_STORE_DWORD [[COPY21]], [[COPY9]], 8, 0, implicit $exec :: (store (s32) into `ptr addrspace(1) poison` + 8, align 8, addrspace 1)
- ; GISEL-GFX11-NEXT: [[COPY22:%[0-9]+]]:vreg_64 = COPY [[DEF]]
- ; GISEL-GFX11-NEXT: GLOBAL_STORE_DWORDX4 [[COPY22]], [[REG_SEQUENCE3]], 16, 0, implicit $exec :: (store (<4 x s32>) into `ptr addrspace(1) poison` + 16, addrspace 1)
+ ; GISEL-GFX11-NEXT: GLOBAL_STORE_DWORDX4 [[COPY19]], [[REG_SEQUENCE2]], 16, 0, implicit $exec :: (store (<4 x s32>) into `ptr addrspace(1) poison` + 16, addrspace 1)
; GISEL-GFX11-NEXT: S_ENDPGM 0
;
; GISEL-GFX10-LABEL: name: amdgpu_cs_chain_preserve_cc_struct
@@ -411,42 +407,38 @@ define amdgpu_cs_chain_preserve void @amdgpu_cs_chain_preserve_cc_struct( {ptr,
; GISEL-GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
; GISEL-GFX10-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
; GISEL-GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
- ; GISEL-GFX10-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2
- ; GISEL-GFX10-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3
- ; GISEL-GFX10-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr4
- ; GISEL-GFX10-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr5
- ; GISEL-GFX10-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; GISEL-GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY5]], %subreg.sub2, [[COPY6]], %subreg.sub3
- ; GISEL-GFX10-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr8
- ; GISEL-GFX10-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr9
- ; GISEL-GFX10-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY8]], %subreg.sub1
- ; GISEL-GFX10-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY $vgpr10
- ; GISEL-GFX10-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY $vgpr11
- ; GISEL-GFX10-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY $vgpr12
- ; GISEL-GFX10-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY $vgpr13
- ; GISEL-GFX10-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY $vgpr14
- ; GISEL-GFX10-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY10]], %subreg.sub0, [[COPY11]], %subreg.sub1, [[COPY12]], %subreg.sub2, [[COPY13]], %subreg.sub3
+ ; GISEL-GFX10-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GISEL-GFX10-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GISEL-GFX10-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GISEL-GFX10-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
+ ; GISEL-GFX10-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr8
+ ; GISEL-GFX10-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr9
+ ; GISEL-GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1
+ ; GISEL-GFX10-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr10
+ ; GISEL-GFX10-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY $vgpr11
+ ; GISEL-GFX10-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY $vgpr12
+ ; GISEL-GFX10-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY $vgpr13
+ ; GISEL-GFX10-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY $vgpr14
+ ; GISEL-GFX10-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1, [[COPY11]], %subreg.sub2, [[COPY12]], %subreg.sub3
; GISEL-GFX10-NEXT: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
- ; GISEL-GFX10-NEXT: [[COPY14:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]]
- ; GISEL-GFX10-NEXT: [[COPY15:%[0-9]+]]:vreg_64 = COPY [[DEF]]
- ; GISEL-GFX10-NEXT: GLOBAL_STORE_DWORDX2 [[COPY15]], [[COPY14]], 0, 0, implicit $exec :: (store (p0) into `ptr addrspace(1) poison`, addrspace 1)
- ; GISEL-GFX10-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[COPY2]]
+ ; GISEL-GFX10-NEXT: [[COPY13:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]]
+ ; GISEL-GFX10-NEXT: [[COPY14:%[0-9]+]]:vreg_64 = COPY [[DEF]]
+ ; GISEL-GFX10-NEXT: GLOBAL_STORE_DWORDX2 [[COPY14]], [[COPY13]], 0, 0, implicit $exec :: (store (p0) into `ptr addrspace(1) poison`, addrspace 1)
+ ; GISEL-GFX10-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3
+ ; GISEL-GFX10-NEXT: [[COPY15:%[0-9]+]]:vreg_128 = COPY [[REG_SEQUENCE3]]
+ ; GISEL-GFX10-NEXT: [[COPY16:%[0-9]+]]:vreg_64 = COPY [[DEF]]
+ ; GISEL-GFX10-NEXT: GLOBAL_STORE_DWORDX4 [[COPY16]], [[COPY15]], 0, 0, implicit $exec :: (store (<4 x s32>) into `ptr addrspace(1) poison`, align 4, addrspace 1)
; GISEL-GFX10-NEXT: [[COPY17:%[0-9]+]]:vreg_64 = COPY [[DEF]]
- ; GISEL-GFX10-NEXT: GLOBAL_STORE_DWORD [[COPY17]], [[COPY16]], 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(1) poison`, addrspace 1)
- ; GISEL-GFX10-NEXT: [[COPY18:%[0-9]+]]:vreg_128 = COPY [[REG_SEQUENCE1]]
+ ; GISEL-GFX10-NEXT: GLOBAL_STORE_DWORDX2 [[COPY17]], [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (store (p0) into `ptr addrspace(1) poison`, align 16, addrspace 1)
+ ; GISEL-GFX10-NEXT: [[COPY18:%[0-9]+]]:vreg_64 = COPY [[DEF]]
+ ; GISEL-GFX10-NEXT: GLOBAL_STORE_DWORD [[COPY18]], [[COPY8]], 8, 0, implicit $exec :: (store (s32) into `ptr addrspace(1) poison` + 8, align 8, addrspace 1)
; GISEL-GFX10-NEXT: [[COPY19:%[0-9]+]]:vreg_64 = COPY [[DEF]]
- ; GISEL-GFX10-NEXT: GLOBAL_STORE_DWORDX4 [[COPY19]], [[COPY18]], 0, 0, implicit $exec :: (store (<4 x s32>) into `ptr addrspace(1) poison`, addrspace 1)
- ; GISEL-GFX10-NEXT: [[COPY20:%[0-9]+]]:vreg_64 = COPY [[DEF]]
- ; GISEL-GFX10-NEXT: GLOBAL_STORE_DWORDX2 [[COPY20]], [[REG_SEQUENCE2]], 0, 0, implicit $exec :: (store (p0) into `ptr addrspace(1) poison`, align 16, addrspace 1)
- ; GISEL-GFX10-NEXT: [[COPY21:%[0-9]+]]:vreg_64 = COPY [[DEF]]
- ; GISEL-GFX10-NEXT: GLOBAL_STORE_DWORD [[COPY21]], [[COPY9]], 8, 0, implicit $exec :: (store (s32) into `ptr addrspace(1) poison` + 8, align 8, addrspace 1)
- ; GISEL-GFX10-NEXT: [[COPY22:%[0-9]+]]:vreg_64 = COPY [[DEF]]
- ; GISEL-GFX10-NEXT: GLOBAL_STORE_DWORDX4 [[COPY22]], [[REG_SEQUENCE3]], 16, 0, implicit $exec :: (store (<4 x s32>) into `ptr addrspace(1) poison` + 16, addrspace 1)
+ ; GISEL-GFX10-NEXT: GLOBAL_STORE_DWORDX4 [[COPY19]], [[REG_SEQUENCE2]], 16, 0, implicit $exec :: (store (<4 x s32>) into `ptr addrspace(1) poison` + 16, addrspace 1)
; GISEL-GFX10-NEXT: S_ENDPGM 0
;
; DAGISEL-GFX11-WF32-LABEL: name: amdgpu_cs_chain_preserve_cc_struct
; DAGISEL-GFX11-WF32: bb.0 (%ir-block.0):
- ; DAGISEL-GFX11-WF32-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14
+ ; DAGISEL-GFX11-WF32-NEXT: liveins: $sgpr0, $sgpr1, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14
; DAGISEL-GFX11-WF32-NEXT: {{ $}}
; DAGISEL-GFX11-WF32-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr14
; DAGISEL-GFX11-WF32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr13
@@ -459,44 +451,39 @@ define amdgpu_cs_chain_preserve void @amdgpu_cs_chain_preserve_cc_struct( {ptr,
; DAGISEL-GFX11-WF32-NEXT: [[COPY8:%[0-9]+]]:sgpr_32 = COPY $sgpr5
; DAGISEL-GFX11-WF32-NEXT: [[COPY9:%[0-9]+]]:sgpr_32 = COPY $sgpr4
; DAGISEL-GFX11-WF32-NEXT: [[COPY10:%[0-9]+]]:sgpr_32 = COPY $sgpr3
- ; DAGISEL-GFX11-WF32-NEXT: [[COPY11:%[0-9]+]]:sgpr_32 = COPY $sgpr2
- ; DAGISEL-GFX11-WF32-NEXT: [[COPY12:%[0-9]+]]:sgpr_32 = COPY $sgpr1
- ; DAGISEL-GFX11-WF32-NEXT: [[COPY13:%[0-9]+]]:sgpr_32 = COPY $sgpr0
+ ; DAGISEL-GFX11-WF32-NEXT: [[COPY11:%[0-9]+]]:sgpr_32 = COPY $sgpr1
+ ; DAGISEL-GFX11-WF32-NEXT: [[COPY12:%[0-9]+]]:sgpr_32 = COPY $sgpr0
; DAGISEL-GFX11-WF32-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY1]], %subreg.sub2, [[COPY]], %subreg.sub3
- ; DAGISEL-GFX11-WF32-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[COPY10]]
- ; DAGISEL-GFX11-WF32-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[COPY9]]
- ; DAGISEL-GFX11-WF32-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[COPY8]]
- ; DAGISEL-GFX11-WF32-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[COPY7]]
- ; DAGISEL-GFX11-WF32-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY14]], %subreg.sub0, [[COPY15]], %subreg.sub1, [[COPY16]], %subreg.sub2, [[COPY17]], %subreg.sub3
+ ; DAGISEL-GFX11-WF32-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[COPY10]]
+ ; DAGISEL-GFX11-WF32-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[COPY9]]
+ ; DAGISEL-GFX11-WF32-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[COPY8]]
+ ; DAGISEL-GFX11-WF32-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[COPY7]]
+ ; DAGISEL-GFX11-WF32-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY13]], %subreg.sub0, [[COPY14]], %subreg.sub1, [[COPY15]], %subreg.sub2, [[COPY16]], %subreg.sub3
; DAGISEL-GFX11-WF32-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1
- ; DAGISEL-GFX11-WF32-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[COPY13]]
- ; DAGISEL-GFX11-WF32-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[COPY12]]
- ; DAGISEL-GFX11-WF32-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY18]], %subreg.sub0, [[COPY19]], %subreg.sub1
+ ; DAGISEL-GFX11-WF32-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[COPY12]]
+ ; DAGISEL-GFX11-WF32-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[COPY11]]
+ ; DAGISEL-GFX11-WF32-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY17]], %subreg.sub0, [[COPY18]], %subreg.sub1
; DAGISEL-GFX11-WF32-NEXT: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
- ; DAGISEL-GFX11-WF32-NEXT: [[COPY20:%[0-9]+]]:vreg_64 = COPY [[DEF]]
- ; DAGISEL-GFX11-WF32-NEXT: GLOBAL_STORE_DWORDX2 [[COPY20]], killed [[REG_SEQUENCE3]], 0, 0, implicit $exec :: (store (s64) into `ptr addrspace(1) poison`, addrspace 1)
+ ; DAGISEL-GFX11-WF32-NEXT: [[COPY19:%[0-9]+]]:vreg_64 = COPY [[DEF]]
+ ; DAGISEL-GFX11-WF32-NEXT: GLOBAL_STORE_DWORDX2 [[COPY19]], killed [[REG_SEQUENCE3]], 0, 0, implicit $exec :: (store (s64) into `ptr addrspace(1) poison`, addrspace 1)
; DAGISEL-GFX11-WF32-NEXT: [[DEF1:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
- ; DAGISEL-GFX11-WF32-NEXT: [[COPY21:%[0-9]+]]:vreg_64 = COPY [[DEF1]]
- ; DAGISEL-GFX11-WF32-NEXT: [[COPY22:%[0-9]+]]:vgpr_32 = COPY [[COPY11]]
- ; DAGISEL-GFX11-WF32-NEXT: GLOBAL_STORE_DWORD [[COPY21]], [[COPY22]], 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(1) poison`, addrspace 1)
+ ; DAGISEL-GFX11-WF32-NEXT: [[COPY20:%[0-9]+]]:vreg_64 = COPY [[DEF1]]
+ ; DAGISEL-GFX11-WF32-NEXT: GLOBAL_STORE_DWORDX4 [[COPY20]], killed [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (store (s128) into `ptr addrspace(1) poison`, align 4, addrspace 1)
; DAGISEL-GFX11-WF32-NEXT: [[DEF2:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
- ; DAGISEL-GFX11-WF32-NEXT: [[COPY23:%[0-9]+]]:vreg_64 = COPY [[DEF2]]
- ; DAGISEL-GFX11-WF32-NEXT: GLOBAL_STORE_DWORDX4 [[COPY23]], killed [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (store (s128) into `ptr addrspace(1) poison`, addrspace 1)
+ ; DAGISEL-GFX11-WF32-NEXT: [[COPY21:%[0-9]+]]:vreg_64 = COPY [[DEF2]]
+ ; DAGISEL-GFX11-WF32-NEXT: GLOBAL_STORE_DWORDX4 [[COPY21]], killed [[REG_SEQUENCE]], 0, 0, implicit $exec :: (store (s128) into `ptr addrspace(1) poison` + 16, addrspace 1)
; DAGISEL-GFX11-WF32-NEXT: [[DEF3:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
- ; DAGISEL-GFX11-WF32-NEXT: [[COPY24:%[0-9]+]]:vreg_64 = COPY [[DEF3]]
- ; DAGISEL-GFX11-WF32-NEXT: GLOBAL_STORE_DWORDX4 [[COPY24]], killed [[REG_SEQUENCE]], 0, 0, implicit $exec :: (store (s128) into `ptr addrspace(1) poison` + 16, addrspace 1)
+ ; DAGISEL-GFX11-WF32-NEXT: [[COPY22:%[0-9]+]]:vreg_64 = COPY [[DEF3]]
+ ; DAGISEL-GFX11-WF32-NEXT: GLOBAL_STORE_DWORD [[COPY22]], [[COPY4]], 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(1) poison` + 8, align 8, basealign 16, addrspace 1)
; DAGISEL-GFX11-WF32-NEXT: [[DEF4:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
- ; DAGISEL-GFX11-WF32-NEXT: [[COPY25:%[0-9]+]]:vreg_64 = COPY [[DEF4]]
- ; DAGISEL-GFX11-WF32-NEXT: GLOBAL_STORE_DWORD [[COPY25]], [[COPY4]], 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(1) poison` + 8, align 8, basealign 16, addrspace 1)
- ; DAGISEL-GFX11-WF32-NEXT: [[DEF5:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
- ; DAGISEL-GFX11-WF32-NEXT: [[COPY26:%[0-9]+]]:vreg_64 = COPY [[DEF5]]
- ; DAGISEL-GFX11-WF32-NEXT: [[COPY27:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE2]]
- ; DAGISEL-GFX11-WF32-NEXT: GLOBAL_STORE_DWORDX2 [[COPY26]], killed [[COPY27]], 0, 0, implicit $exec :: (store (s64) into `ptr addrspace(1) poison`, align 16, addrspace 1)
+ ; DAGISEL-GFX11-WF32-NEXT: [[COPY23:%[0-9]+]]:vreg_64 = COPY [[DEF4]]
+ ; DAGISEL-GFX11-WF32-NEXT: [[COPY24:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE2]]
+ ; DAGISEL-GFX11-WF32-NEXT: GLOBAL_STORE_DWORDX2 [[COPY23]], killed [[COPY24]], 0, 0, implicit $exec :: (store (s64) into `ptr addrspace(1) poison`, align 16, addrspace 1)
; DAGISEL-GFX11-WF32-NEXT: S_ENDPGM 0
;
; DAGISEL-GFX11-WF64-LABEL: name: amdgpu_cs_chain_preserve_cc_struct
; DAGISEL-GFX11-WF64: bb.0 (%ir-block.0):
- ; DAGISEL-GFX11-WF64-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14
+ ; DAGISEL-GFX11-WF64-NEXT: liveins: $sgpr0, $sgpr1, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14
; DAGISEL-GFX11-WF64-NEXT: {{ $}}
; DAGISEL-GFX11-WF64-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr14
; DAGISEL-GFX11-WF64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr13
@@ -509,44 +496,39 @@ define amdgpu_cs_chain_preserve void @amdgpu_cs_chain_preserve_cc_struct( {ptr,
; DAGISEL-GFX11-WF64-NEXT: [[COPY8:%[0-9]+]]:sgpr_32 = COPY $sgpr5
; DAGISEL-GFX11-WF64-NEXT: [[COPY9:%[0-9]+]]:sgpr_32 = COPY $sgpr4
; DAGISEL-GFX11-WF64-NEXT: [[COPY10:%[0-9]+]]:sgpr_32 = COPY $sgpr3
- ; DAGISEL-GFX11-WF64-NEXT: [[COPY11:%[0-9]+]]:sgpr_32 = COPY $sgpr2
- ; DAGISEL-GFX11-WF64-NEXT: [[COPY12:%[0-9]+]]:sgpr_32 = COPY $sgpr1
- ; DAGISEL-GFX11-WF64-NEXT: [[COPY13:%[0-9]+]]:sgpr_32 = COPY $sgpr0
+ ; DAGISEL-GFX11-WF64-NEXT: [[COPY11:%[0-9]+]]:sgpr_32 = COPY $sgpr1
+ ; DAGISEL-GFX11-WF64-NEXT: [[COPY12:%[0-9]+]]:sgpr_32 = COPY $sgpr0
; DAGISEL-GFX11-WF64-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY1]], %subreg.sub2, [[COPY]], %subreg.sub3
- ; DAGISEL-GFX11-WF64-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[COPY10]]
- ; DAGISEL-GFX11-WF64-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[COPY9]]
- ; DAGISEL-GFX11-WF64-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[COPY8]]
- ; DAGISEL-GFX11-WF64-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[COPY7]]
- ; DAGISEL-GFX11-WF64-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY14]], %subreg.sub0, [[COPY15]], %subreg.sub1, [[COPY16]], %subreg.sub2, [[COPY17]], %subreg.sub3
+ ; DAGISEL-GFX11-WF64-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[COPY10]]
+ ; DAGISEL-GFX11-WF64-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[COPY9]]
+ ; DAGISEL-GFX11-WF64-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[COPY8]]
+ ; DAGISEL-GFX11-WF64-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[COPY7]]
+ ; DAGISEL-GFX11-WF64-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY13]], %subreg.sub0, [[COPY14]], %subreg.sub1, [[COPY15]], %subreg.sub2, [[COPY16]], %subreg.sub3
; DAGISEL-GFX11-WF64-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1
- ; DAGISEL-GFX11-WF64-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[COPY13]]
- ; DAGISEL-GFX11-WF64-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[COPY12]]
- ; DAGISEL-GFX11-WF64-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY18]], %subreg.sub0, [[COPY19]], %subreg.sub1
+ ; DAGISEL-GFX11-WF64-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[COPY12]]
+ ; DAGISEL-GFX11-WF64-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[COPY11]]
+ ; DAGISEL-GFX11-WF64-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY17]], %subreg.sub0, [[COPY18]], %subreg.sub1
; DAGISEL-GFX11-WF64-NEXT: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
- ; DAGISEL-GFX11-WF64-NEXT: [[COPY20:%[0-9]+]]:vreg_64 = COPY [[DEF]]
- ; DAGISEL-GFX11-WF64-NEXT: GLOBAL_STORE_DWORDX2 [[COPY20]], killed [[REG_SEQUENCE3]], 0, 0, implicit $exec :: (store (s64) into `ptr addrspace(1) poison`, addrspace 1)
+ ; DAGISEL-GFX11-WF64-NEXT: [[COPY19:%[0-9]+]]:vreg_64 = COPY [[DEF]]
+ ; DAGISEL-GFX11-WF64-NEXT: GLOBAL_STORE_DWORDX2 [[COPY19]], killed [[REG_SEQUENCE3]], 0, 0, implicit $exec :: (store (s64) into `ptr addrspace(1) poison`, addrspace 1)
; DAGISEL-GFX11-WF64-NEXT: [[DEF1:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
- ; DAGISEL-GFX11-WF64-NEXT: [[COPY21:%[0-9]+]]:vreg_64 = COPY [[DEF1]]
- ; DAGISEL-GFX11-WF64-NEXT: [[COPY22:%[0-9]+]]:vgpr_32 = COPY [[COPY11]]
- ; DAGISEL-GFX11-WF64-NEXT: GLOBAL_STORE_DWORD [[COPY21]], [[COPY22]], 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(1) poison`, addrspace 1)
+ ; DAGISEL-GFX11-WF64-NEXT: [[COPY20:%[0-9]+]]:vreg_64 = COPY [[DEF1]]
+ ; DAGISEL-GFX11-WF64-NEXT: GLOBAL_STORE_DWORDX4 [[COPY20]], killed [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (store (s128) into `ptr addrspace(1) poison`, align 4, addrspace 1)
; DAGISEL-GFX11-WF64-NEXT: [[DEF2:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
- ; DAGISEL-GFX11-WF64-NEXT: [[COPY23:%[0-9]+]]:vreg_64 = COPY [[DEF2]]
- ; DAGISEL-GFX11-WF64-NEXT: GLOBAL_STORE_DWORDX4 [[COPY23]], killed [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (store (s128) into `ptr addrspace(1) poison`, addrspace 1)
+ ; DAGISEL-GFX11-WF64-NEXT: [[COPY21:%[0-9]+]]:vreg_64 = COPY [[DEF2]]
+ ; DAGISEL-GFX11-WF64-NEXT: GLOBAL_STORE_DWORDX4 [[COPY21]], killed [[REG_SEQUENCE]], 0, 0, implicit $exec :: (store (s128) into `ptr addrspace(1) poison` + 16, addrspace 1)
; DAGISEL-GFX11-WF64-NEXT: [[DEF3:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
- ; DAGISEL-GFX11-WF64-NEXT: [[COPY24:%[0-9]+]]:vreg_64 = COPY [[DEF3]]
- ; DAGISEL-GFX11-WF64-NEXT: GLOBAL_STORE_DWORDX4 [[COPY24]], killed [[REG_SEQUENCE]], 0, 0, implicit $exec :: (store (s128) into `ptr addrspace(1) poison` + 16, addrspace 1)
+ ; DAGISEL-GFX11-WF64-NEXT: [[COPY22:%[0-9]+]]:vreg_64 = COPY [[DEF3]]
+ ; DAGISEL-GFX11-WF64-NEXT: GLOBAL_STORE_DWORD [[COPY22]], [[COPY4]], 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(1) poison` + 8, align 8, basealign 16, addrspace 1)
; DAGISEL-GFX11-WF64-NEXT: [[DEF4:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
- ; DAGISEL-GFX11-WF64-NEXT: [[COPY25:%[0-9]+]]:vreg_64 = COPY [[DEF4]]
- ; DAGISEL-GFX11-WF64-NEXT: GLOBAL_STORE_DWORD [[COPY25]], [[COPY4]], 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(1) poison` + 8, align 8, basealign 16, addrspace 1)
- ; DAGISEL-GFX11-WF64-NEXT: [[DEF5:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
- ; DAGISEL-GFX11-WF64-NEXT: [[COPY26:%[0-9]+]]:vreg_64 = COPY [[DEF5]]
- ; DAGISEL-GFX11-WF64-NEXT: [[COPY27:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE2]]
- ; DAGISEL-GFX11-WF64-NEXT: GLOBAL_STORE_DWORDX2 [[COPY26]], killed [[COPY27]], 0, 0, implicit $exec :: (store (s64) into `ptr addrspace(1) poison`, align 16, addrspace 1)
+ ; DAGISEL-GFX11-WF64-NEXT: [[COPY23:%[0-9]+]]:vreg_64 = COPY [[DEF4]]
+ ; DAGISEL-GFX11-WF64-NEXT: [[COPY24:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE2]]
+ ; DAGISEL-GFX11-WF64-NEXT: GLOBAL_STORE_DWORDX2 [[COPY23]], killed [[COPY24]], 0, 0, implicit $exec :: (store (s64) into `ptr addrspace(1) poison`, align 16, addrspace 1)
; DAGISEL-GFX11-WF64-NEXT: S_ENDPGM 0
;
; DAGISEL-GFX10-WF32-LABEL: name: amdgpu_cs_chain_preserve_cc_struct
; DAGISEL-GFX10-WF32: bb.0 (%ir-block.0):
- ; DAGISEL-GFX10-WF32-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14
+ ; DAGISEL-GFX10-WF32-NEXT: liveins: $sgpr0, $sgpr1, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14
; DAGISEL-GFX10-WF32-NEXT: {{ $}}
; DAGISEL-GFX10-WF32-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr14
; DAGISEL-GFX10-WF32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr13
@@ -559,44 +541,39 @@ define amdgpu_cs_chain_preserve void @amdgpu_cs_chain_preserve_cc_struct( {ptr,
; DAGISEL-GFX10-WF32-NEXT: [[COPY8:%[0-9]+]]:sgpr_32 = COPY $sgpr5
; DAGISEL-GFX10-WF32-NEXT: [[COPY9:%[0-9]+]]:sgpr_32 = COPY $sgpr4
; DAGISEL-GFX10-WF32-NEXT: [[COPY10:%[0-9]+]]:sgpr_32 = COPY $sgpr3
- ; DAGISEL-GFX10-WF32-NEXT: [[COPY11:%[0-9]+]]:sgpr_32 = COPY $sgpr2
- ; DAGISEL-GFX10-WF32-NEXT: [[COPY12:%[0-9]+]]:sgpr_32 = COPY $sgpr1
- ; DAGISEL-GFX10-WF32-NEXT: [[COPY13:%[0-9]+]]:sgpr_32 = COPY $sgpr0
+ ; DAGISEL-GFX10-WF32-NEXT: [[COPY11:%[0-9]+]]:sgpr_32 = COPY $sgpr1
+ ; DAGISEL-GFX10-WF32-NEXT: [[COPY12:%[0-9]+]]:sgpr_32 = COPY $sgpr0
; DAGISEL-GFX10-WF32-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY1]], %subreg.sub2, [[COPY]], %subreg.sub3
- ; DAGISEL-GFX10-WF32-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[COPY10]]
- ; DAGISEL-GFX10-WF32-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[COPY9]]
- ; DAGISEL-GFX10-WF32-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[COPY8]]
- ; DAGISEL-GFX10-WF32-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[COPY7]]
- ; DAGISEL-GFX10-WF32-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY14]], %subreg.sub0, [[COPY15]], %subreg.sub1, [[COPY16]], %subreg.sub2, [[COPY17]], %subreg.sub3
+ ; DAGISEL-GFX10-WF32-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[COPY10]]
+ ; DAGISEL-GFX10-WF32-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[COPY9]]
+ ; DAGISEL-GFX10-WF32-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[COPY8]]
+ ; DAGISEL-GFX10-WF32-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[COPY7]]
+ ; DAGISEL-GFX10-WF32-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY13]], %subreg.sub0, [[COPY14]], %subreg.sub1, [[COPY15]], %subreg.sub2, [[COPY16]], %subreg.sub3
; DAGISEL-GFX10-WF32-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1
- ; DAGISEL-GFX10-WF32-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[COPY13]]
- ; DAGISEL-GFX10-WF32-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[COPY12]]
- ; DAGISEL-GFX10-WF32-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY18]], %subreg.sub0, [[COPY19]], %subreg.sub1
+ ; DAGISEL-GFX10-WF32-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[COPY12]]
+ ; DAGISEL-GFX10-WF32-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[COPY11]]
+ ; DAGISEL-GFX10-WF32-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY17]], %subreg.sub0, [[COPY18]], %subreg.sub1
; DAGISEL-GFX10-WF32-NEXT: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
- ; DAGISEL-GFX10-WF32-NEXT: [[COPY20:%[0-9]+]]:vreg_64 = COPY [[DEF]]
- ; DAGISEL-GFX10-WF32-NEXT: GLOBAL_STORE_DWORDX2 [[COPY20]], killed [[REG_SEQUENCE3]], 0, 0, implicit $exec :: (store (s64) into `ptr addrspace(1) poison`, addrspace 1)
+ ; DAGISEL-GFX10-WF32-NEXT: [[COPY19:%[0-9]+]]:vreg_64 = COPY [[DEF]]
+ ; DAGISEL-GFX10-WF32-NEXT: GLOBAL_STORE_DWORDX2 [[COPY19]], killed [[REG_SEQUENCE3]], 0, 0, implicit $exec :: (store (s64) into `ptr addrspace(1) poison`, addrspace 1)
; DAGISEL-GFX10-WF32-NEXT: [[DEF1:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
- ; DAGISEL-GFX10-WF32-NEXT: [[COPY21:%[0-9]+]]:vreg_64 = COPY [[DEF1]]
- ; DAGISEL-GFX10-WF32-NEXT: [[COPY22:%[0-9]+]]:vgpr_32 = COPY [[COPY11]]
- ; DAGISEL-GFX10-WF32-NEXT: GLOBAL_STORE_DWORD [[COPY21]], [[COPY22]], 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(1) poison`, addrspace 1)
+ ; DAGISEL-GFX10-WF32-NEXT: [[COPY20:%[0-9]+]]:vreg_64 = COPY [[DEF1]]
+ ; DAGISEL-GFX10-WF32-NEXT: GLOBAL_STORE_DWORDX4 [[COPY20]], killed [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (store (s128) into `ptr addrspace(1) poison`, align 4, addrspace 1)
; DAGISEL-GFX10-WF32-NEXT: [[DEF2:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
- ; DAGISEL-GFX10-WF32-NEXT: [[COPY23:%[0-9]+]]:vreg_64 = COPY [[DEF2]]
- ; DAGISEL-GFX10-WF32-NEXT: GLOBAL_STORE_DWORDX4 [[COPY23]], killed [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (store (s128) into `ptr addrspace(1) poison`, addrspace 1)
+ ; DAGISEL-GFX10-WF32-NEXT: [[COPY21:%[0-9]+]]:vreg_64 = COPY [[DEF2]]
+ ; DAGISEL-GFX10-WF32-NEXT: GLOBAL_STORE_DWORDX4 [[COPY21]], killed [[REG_SEQUENCE]], 0, 0, implicit $exec :: (store (s128) into `ptr addrspace(1) poison` + 16, addrspace 1)
; DAGISEL-GFX10-WF32-NEXT: [[DEF3:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
- ; DAGISEL-GFX10-WF32-NEXT: [[COPY24:%[0-9]+]]:vreg_64 = COPY [[DEF3]]
- ; DAGISEL-GFX10-WF32-NEXT: GLOBAL_STORE_DWORDX4 [[COPY24]], killed [[REG_SEQUENCE]], 0, 0, implicit $exec :: (store (s128) into `ptr addrspace(1) poison` + 16, addrspace 1)
+ ; DAGISEL-GFX10-WF32-NEXT: [[COPY22:%[0-9]+]]:vreg_64 = COPY [[DEF3]]
+ ; DAGISEL-GFX10-WF32-NEXT: GLOBAL_STORE_DWORD [[COPY22]], [[COPY4]], 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(1) poison` + 8, align 8, basealign 16, addrspace 1)
; DAGISEL-GFX10-WF32-NEXT: [[DEF4:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
- ; DAGISEL-GFX10-WF32-NEXT: [[COPY25:%[0-9]+]]:vreg_64 = COPY [[DEF4]]
- ; DAGISEL-GFX10-WF32-NEXT: GLOBAL_STORE_DWORD [[COPY25]], [[COPY4]], 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(1) poison` + 8, align 8, basealign 16, addrspace 1)
- ; DAGISEL-GFX10-WF32-NEXT: [[DEF5:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
- ; DAGISEL-GFX10-WF32-NEXT: [[COPY26:%[0-9]+]]:vreg_64 = COPY [[DEF5]]
- ; DAGISEL-GFX10-WF32-NEXT: [[COPY27:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE2]]
- ; DAGISEL-GFX10-WF32-NEXT: GLOBAL_STORE_DWORDX2 [[COPY26]], killed [[COPY27]], 0, 0, implicit $exec :: (store (s64) into `ptr addrspace(1) poison`, align 16, addrspace 1)
+ ; DAGISEL-GFX10-WF32-NEXT: [[COPY23:%[0-9]+]]:vreg_64 = COPY [[DEF4]]
+ ; DAGISEL-GFX10-WF32-NEXT: [[COPY24:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE2]]
+ ; DAGISEL-GFX10-WF32-NEXT: GLOBAL_STORE_DWORDX2 [[COPY23]], killed [[COPY24]], 0, 0, implicit $exec :: (store (s64) into `ptr addrspace(1) poison`, align 16, addrspace 1)
; DAGISEL-GFX10-WF32-NEXT: S_ENDPGM 0
;
; DAGISEL-GFX10-WF64-LABEL: name: amdgpu_cs_chain_preserve_cc_struct
; DAGISEL-GFX10-WF64: bb.0 (%ir-block.0):
- ; DAGISEL-GFX10-WF64-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14
+ ; DAGISEL-GFX10-WF64-NEXT: liveins: $sgpr0, $sgpr1, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14
; DAGISEL-GFX10-WF64-NEXT: {{ $}}
; DAGISEL-GFX10-WF64-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr14
; DAGISEL-GFX10-WF64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr13
@@ -609,39 +586,34 @@ define amdgpu_cs_chain_preserve void @amdgpu_cs_chain_preserve_cc_struct( {ptr,
; DAGISEL-GFX10-WF64-NEXT: [[COPY8:%[0-9]+]]:sgpr_32 = COPY $sgpr5
; DAGISEL-GFX10-WF64-NEXT: [[COPY9:%[0-9]+]]:sgpr_32 = COPY $sgpr4
; DAGISEL-GFX10-WF64-NEXT: [[COPY10:%[0-9]+]]:sgpr_32 = COPY $sgpr3
- ; DAGISEL-GFX10-WF64-NEXT: [[COPY11:%[0-9]+]]:sgpr_32 = COPY $sgpr2
- ; DAGISEL-GFX10-WF64-NEXT: [[COPY12:%[0-9]+]]:sgpr_32 = COPY $sgpr1
- ; DAGISEL-GFX10-WF64-NEXT: [[COPY13:%[0-9]+]]:sgpr_32 = COPY $sgpr0
+ ; DAGISEL-GFX10-WF64-NEXT: [[COPY11:%[0-9]+]]:sgpr_32 = COPY $sgpr1
+ ; DAGISEL-GFX10-WF64-NEXT: [[COPY12:%[0-9]+]]:sgpr_32 = COPY $sgpr0
; DAGISEL-GFX10-WF64-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY1]], %subreg.sub2, [[COPY]], %subreg.sub3
- ; DAGISEL-GFX10-WF64-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[COPY10]]
- ; DAGISEL-GFX10-WF64-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[COPY9]]
- ; DAGISEL-GFX10-WF64-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[COPY8]]
- ; DAGISEL-GFX10-WF64-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[COPY7]]
- ; DAGISEL-GFX10-WF64-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY14]], %subreg.sub0, [[COPY15]], %subreg.sub1, [[COPY16]], %subreg.sub2, [[COPY17]], %subreg.sub3
+ ; DAGISEL-GFX10-WF64-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[COPY10]]
+ ; DAGISEL-GFX10-WF64-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[COPY9]]
+ ; DAGISEL-GFX10-WF64-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[COPY8]]
+ ; DAGISEL-GFX10-WF64-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[COPY7]]
+ ; DAGISEL-GFX10-WF64-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY13]], %subreg.sub0, [[COPY14]], %subreg.sub1, [[COPY15]], %subreg.sub2, [[COPY16]], %subreg.sub3
; DAGISEL-GFX10-WF64-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1
- ; DAGISEL-GFX10-WF64-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[COPY13]]
- ; DAGISEL-GFX10-WF64-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[COPY12]]
- ; DAGISEL-GFX10-WF64-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY18]], %subreg.sub0, [[COPY19]], %subreg.sub1
+ ; DAGISEL-GFX10-WF64-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[COPY12]]
+ ; DAGISEL-GFX10-WF64-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[COPY11]]
+ ; DAGISEL-GFX10-WF64-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY17]], %subreg.sub0, [[COPY18]], %subreg.sub1
; DAGISEL-GFX10-WF64-NEXT: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
- ; DAGISEL-GFX10-WF64-NEXT: [[COPY20:%[0-9]+]]:vreg_64 = COPY [[DEF]]
- ; DAGISEL-GFX10-WF64-NEXT: GLOBAL_STORE_DWORDX2 [[COPY20]], killed [[REG_SEQUENCE3]], 0, 0, implicit $exec :: (store (s64) into `ptr addrspace(1) poison`, addrspace 1)
+ ; DAGISEL-GFX10-WF64-NEXT: [[COPY19:%[0-9]+]]:vreg_64 = COPY [[DEF]]
+ ; DAGISEL-GFX10-WF64-NEXT: GLOBAL_STORE_DWORDX2 [[COPY19]], killed [[REG_SEQUENCE3]], 0, 0, implicit $exec :: (store (s64) into `ptr addrspace(1) poison`, addrspace 1)
; DAGISEL-GFX10-WF64-NEXT: [[DEF1:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
- ; DAGISEL-GFX10-WF64-NEXT: [[COPY21:%[0-9]+]]:vreg_64 = COPY [[DEF1]]
- ; DAGISEL-GFX10-WF64-NEXT: [[COPY22:%[0-9]+]]:vgpr_32 = COPY [[COPY11]]
- ; DAGISEL-GFX10-WF64-NEXT: GLOBAL_STORE_DWORD [[COPY21]], [[COPY22]], 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(1) poison`, addrspace 1)
+ ; DAGISEL-GFX10-WF64-NEXT: [[COPY20:%[0-9]+]]:vreg_64 = COPY [[DEF1]]
+ ; DAGISEL-GFX10-WF64-NEXT: GLOBAL_STORE_DWORDX4 [[COPY20]], killed [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (store (s128) into `ptr addrspace(1) poison`, align 4, addrspace 1)
; DAGISEL-GFX10-WF64-NEXT: [[DEF2:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
- ; DAGISEL-GFX10-WF64-NEXT: [[COPY23:%[0-9]+]]:vreg_64 = COPY [[DEF2]]
- ; DAGISEL-GFX10-WF64-NEXT: GLOBAL_STORE_DWORDX4 [[COPY23]], killed [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (store (s128) into `ptr addrspace(1) poison`, addrspace 1)
+ ; DAGISEL-GFX10-WF64-NEXT: [[COPY21:%[0-9]+]]:vreg_64 = COPY [[DEF2]]
+ ; DAGISEL-GFX10-WF64-NEXT: GLOBAL_STORE_DWORDX4 [[COPY21]], killed [[REG_SEQUENCE]], 0, 0, implicit $exec :: (store (s128) into `ptr addrspace(1) poison` + 16, addrspace 1)
; DAGISEL-GFX10-WF64-NEXT: [[DEF3:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
- ; DAGISEL-GFX10-WF64-NEXT: [[COPY24:%[0-9]+]]:vreg_64 = COPY [[DEF3]]
- ; DAGISEL-GFX10-WF64-NEXT: GLOBAL_STORE_DWORDX4 [[COPY24]], killed [[REG_SEQUENCE]], 0, 0, implicit $exec :: (store (s128) into `ptr addrspace(1) poison` + 16, addrspace 1)
+ ; DAGISEL-GFX10-WF64-NEXT: [[COPY22:%[0-9]+]]:vreg_64 = COPY [[DEF3]]
+ ; DAGISEL-GFX10-WF64-NEXT: GLOBAL_STORE_DWORD [[COPY22]], [[COPY4]], 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(1) poison` + 8, align 8, basealign 16, addrspace 1)
; DAGISEL-GFX10-WF64-NEXT: [[DEF4:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
- ; DAGISEL-GFX10-WF64-NEXT: [[COPY25:%[0-9]+]]:vreg_64 = COPY [[DEF4]]
- ; DAGISEL-GFX10-WF64-NEXT: GLOBAL_STORE_DWORD [[COPY25]], [[COPY4]], 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(1) poison` + 8, align 8, basealign 16, addrspace 1)
- ; DAGISEL-GFX10-WF64-NEXT: [[DEF5:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
- ; DAGISEL-GFX10-WF64-NEXT: [[COPY26:%[0-9]+]]:vreg_64 = COPY [[DEF5]]
- ; DAGISEL-GFX10-WF64-NEXT: [[COPY27:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE2]]
- ; DAGISEL-GFX10-WF64-NEXT: GLOBAL_STORE_DWORDX2 [[COPY26]], killed [[COPY27]], 0, 0, implicit $exec :: (store (s64) into `ptr addrspace(1) poison`, align 16, addrspace 1)
+ ; DAGISEL-GFX10-WF64-NEXT: [[COPY23:%[0-9]+]]:vreg_64 = COPY [[DEF4]]
+ ; DAGISEL-GFX10-WF64-NEXT: [[COPY24:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE2]]
+ ; DAGISEL-GFX10-WF64-NEXT: GLOBAL_STORE_DWORDX2 [[COPY23]], killed [[COPY24]], 0, 0, implicit $exec :: (store (s64) into `ptr addrspace(1) poison`, align 16, addrspace 1)
; DAGISEL-GFX10-WF64-NEXT: S_ENDPGM 0
%p = extractvalue {ptr, i32, <4 x i32>} %a, 0
%i = extractvalue {ptr, i32, <4 x i32>} %a, 1
@@ -1360,100 +1332,72 @@ define amdgpu_cs_chain_preserve void @amdgpu_cs_chain_preserve_many_regs(<36 x i
; GISEL-GFX11: bb.1 (%ir-block.0):
; GISEL-GFX11-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr24, $sgpr25, $sgpr26, $sgpr27, $sgpr28, $sgpr29, $sgpr30, $sgpr31, $sgpr32, $sgpr33, $sgpr34, $sgpr35, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr40, $vgpr41, $vgpr42, $vgpr43, $vgpr44, $vgpr45, $vgpr46, $vgpr47, $vgpr48, $vgpr49, $vgpr50, $vgpr51, $vgpr52, $vgpr53, $vgpr54, $vgpr55, $vgpr56, $vgpr57, $vgpr58, $vgpr59, $vgpr60, $vgpr61, $vgpr62, $vgpr63, $vgpr64, $vgpr65, $vgpr66, $vgpr67, $vgpr68, $vgpr69, $vgpr70, $vgpr71, $vgpr72, $vgpr73, $vgpr74, $vgpr75, $vgpr76, $vgpr77, $vgpr78, $vgpr79, $vgpr80, $vgpr81, $vgpr82, $vgpr83, $vgpr84, $vgpr85, $vgpr86, $vgpr87, $vgpr88, $vgpr89, $vgpr90, $vgpr91, $vgpr92, $vgpr93, $vgpr94, $vgpr95, $vgpr96, $vgpr97, $vgpr98, $vgpr99, $vgpr100, $vgpr101, $vgpr102, $vgpr103, $vgpr104, $vgpr105, $vgpr106, $vgpr107, $vgpr108, $vgpr109, $vgpr110, $vgpr111, $vgpr112, $vgpr113, $vgpr114, $vgpr115, $vgpr116, $vgpr117, $vgpr118, $vgpr119, $vgpr120, $vgpr121, $vgpr122, $vgpr123, $vgpr124, $vgpr125, $vgpr126, $vgpr127, $vgpr128, $vgpr129, $vgpr130, $vgpr131, $vgpr132, $vgpr133, $vgpr134, $vgpr135
; GISEL-GFX11-NEXT: {{ $}}
- ; GISEL-GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr35
- ; GISEL-GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr8
- ; GISEL-GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr135
+ ; GISEL-GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr8
+ ; GISEL-GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr135
; GISEL-GFX11-NEXT: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
- ; GISEL-GFX11-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]]
- ; GISEL-GFX11-NEXT: [[COPY4:%[0-9]+]]:vreg_64 = COPY [[DEF]]
- ; GISEL-GFX11-NEXT: GLOBAL_STORE_DWORD [[COPY4]], [[COPY3]], 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(1) poison`, addrspace 1)
- ; GISEL-GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1
- ; GISEL-GFX11-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[DEF]]
- ; GISEL-GFX11-NEXT: GLOBAL_STORE_DWORDX2 [[COPY5]], [[REG_SEQUENCE]], 0, 0, implicit $exec :: (store (<2 x s32>) into `ptr addrspace(1) poison`, addrspace 1)
+ ; GISEL-GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
+ ; GISEL-GFX11-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY [[DEF]]
+ ; GISEL-GFX11-NEXT: GLOBAL_STORE_DWORDX2 [[COPY2]], [[REG_SEQUENCE]], 0, 0, implicit $exec :: (store (<2 x s32>) into `ptr addrspace(1) poison`, align 4, addrspace 1)
; GISEL-GFX11-NEXT: S_ENDPGM 0
;
; GISEL-GFX10-LABEL: name: amdgpu_cs_chain_preserve_many_regs
; GISEL-GFX10: bb.1 (%ir-block.0):
; GISEL-GFX10-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr24, $sgpr25, $sgpr26, $sgpr27, $sgpr28, $sgpr29, $sgpr30, $sgpr31, $sgpr32, $sgpr33, $sgpr34, $sgpr35, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr40, $vgpr41, $vgpr42, $vgpr43, $vgpr44, $vgpr45, $vgpr46, $vgpr47, $vgpr48, $vgpr49, $vgpr50, $vgpr51, $vgpr52, $vgpr53, $vgpr54, $vgpr55, $vgpr56, $vgpr57, $vgpr58, $vgpr59, $vgpr60, $vgpr61, $vgpr62, $vgpr63, $vgpr64, $vgpr65, $vgpr66, $vgpr67, $vgpr68, $vgpr69, $vgpr70, $vgpr71, $vgpr72, $vgpr73, $vgpr74, $vgpr75, $vgpr76, $vgpr77, $vgpr78, $vgpr79, $vgpr80, $vgpr81, $vgpr82, $vgpr83, $vgpr84, $vgpr85, $vgpr86, $vgpr87, $vgpr88, $vgpr89, $vgpr90, $vgpr91, $vgpr92, $vgpr93, $vgpr94, $vgpr95, $vgpr96, $vgpr97, $vgpr98, $vgpr99, $vgpr100, $vgpr101, $vgpr102, $vgpr103, $vgpr104, $vgpr105, $vgpr106, $vgpr107, $vgpr108, $vgpr109, $vgpr110, $vgpr111, $vgpr112, $vgpr113, $vgpr114, $vgpr115, $vgpr116, $vgpr117, $vgpr118, $vgpr119, $vgpr120, $vgpr121, $vgpr122, $vgpr123, $vgpr124, $vgpr125, $vgpr126, $vgpr127, $vgpr128, $vgpr129, $vgpr130, $vgpr131, $vgpr132, $vgpr133, $vgpr134, $vgpr135
; GISEL-GFX10-NEXT: {{ $}}
- ; GISEL-GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr35
- ; GISEL-GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr8
- ; GISEL-GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr135
+ ; GISEL-GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr8
+ ; GISEL-GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr135
; GISEL-GFX10-NEXT: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
- ; GISEL-GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]]
- ; GISEL-GFX10-NEXT: [[COPY4:%[0-9]+]]:vreg_64 = COPY [[DEF]]
- ; GISEL-GFX10-NEXT: GLOBAL_STORE_DWORD [[COPY4]], [[COPY3]], 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(1) poison`, addrspace 1)
- ; GISEL-GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1
- ; GISEL-GFX10-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[DEF]]
- ; GISEL-GFX10-NEXT: GLOBAL_STORE_DWORDX2 [[COPY5]], [[REG_SEQUENCE]], 0, 0, implicit $exec :: (store (<2 x s32>) into `ptr addrspace(1) poison`, addrspace 1)
+ ; GISEL-GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
+ ; GISEL-GFX10-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY [[DEF]]
+ ; GISEL-GFX10-NEXT: GLOBAL_STORE_DWORDX2 [[COPY2]], [[REG_SEQUENCE]], 0, 0, implicit $exec :: (store (<2 x s32>) into `ptr addrspace(1) poison`, align 4, addrspace 1)
; GISEL-GFX10-NEXT: S_ENDPGM 0
;
; DAGISEL-GFX11-WF32-LABEL: name: amdgpu_cs_chain_preserve_many_regs
; DAGISEL-GFX11-WF32: bb.0 (%ir-block.0):
- ; DAGISEL-GFX11-WF32-NEXT: liveins: $sgpr35, $vgpr8, $vgpr135
+ ; DAGISEL-GFX11-WF32-NEXT: liveins: $vgpr8, $vgpr135
; DAGISEL-GFX11-WF32-NEXT: {{ $}}
; DAGISEL-GFX11-WF32-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr135
; DAGISEL-GFX11-WF32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr8
- ; DAGISEL-GFX11-WF32-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr35
- ; DAGISEL-GFX11-WF32-NEXT: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
- ; DAGISEL-GFX11-WF32-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY [[DEF]]
- ; DAGISEL-GFX11-WF32-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY2]]
- ; DAGISEL-GFX11-WF32-NEXT: GLOBAL_STORE_DWORD [[COPY3]], [[COPY4]], 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(1) poison`, addrspace 1)
; DAGISEL-GFX11-WF32-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY]], %subreg.sub1
- ; DAGISEL-GFX11-WF32-NEXT: [[DEF1:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
- ; DAGISEL-GFX11-WF32-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[DEF1]]
- ; DAGISEL-GFX11-WF32-NEXT: GLOBAL_STORE_DWORDX2 [[COPY5]], killed [[REG_SEQUENCE]], 0, 0, implicit $exec :: (store (s64) into `ptr addrspace(1) poison`, addrspace 1)
+ ; DAGISEL-GFX11-WF32-NEXT: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
+ ; DAGISEL-GFX11-WF32-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY [[DEF]]
+ ; DAGISEL-GFX11-WF32-NEXT: GLOBAL_STORE_DWORDX2 killed [[COPY2]], killed [[REG_SEQUENCE]], 0, 0, implicit $exec :: (store (s64) into `ptr addrspace(1) poison`, align 4, addrspace 1)
; DAGISEL-GFX11-WF32-NEXT: S_ENDPGM 0
;
; DAGISEL-GFX11-WF64-LABEL: name: amdgpu_cs_chain_preserve_many_regs
; DAGISEL-GFX11-WF64: bb.0 (%ir-block.0):
- ; DAGISEL-GFX11-WF64-NEXT: liveins: $sgpr35, $vgpr8, $vgpr135
+ ; DAGISEL-GFX11-WF64-NEXT: liveins: $vgpr8, $vgpr135
; DAGISEL-GFX11-WF64-NEXT: {{ $}}
; DAGISEL-GFX11-WF64-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr135
; DAGISEL-GFX11-WF64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr8
- ; DAGISEL-GFX11-WF64-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr35
- ; DAGISEL-GFX11-WF64-NEXT: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
- ; DAGISEL-GFX11-WF64-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY [[DEF]]
- ; DAGISEL-GFX11-WF64-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY2]]
- ; DAGISEL-GFX11-WF64-NEXT: GLOBAL_STORE_DWORD [[COPY3]], [[COPY4]], 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(1) poison`, addrspace 1)
; DAGISEL-GFX11-WF64-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY]], %subreg.sub1
- ; DAGISEL-GFX11-WF64-NEXT: [[DEF1:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
- ; DAGISEL-GFX11-WF64-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[DEF1]]
- ; DAGISEL-GFX11-WF64-NEXT: GLOBAL_STORE_DWORDX2 [[COPY5]], killed [[REG_SEQUENCE]], 0, 0, implicit $exec :: (store (s64) into `ptr addrspace(1) poison`, addrspace 1)
+ ; DAGISEL-GFX11-WF64-NEXT: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
+ ; DAGISEL-GFX11-WF64-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY [[DEF]]
+ ; DAGISEL-GFX11-WF64-NEXT: GLOBAL_STORE_DWORDX2 killed [[COPY2]], killed [[REG_SEQUENCE]], 0, 0, implicit $exec :: (store (s64) into `ptr addrspace(1) poison`, align 4, addrspace 1)
; DAGISEL-GFX11-WF64-NEXT: S_ENDPGM 0
;
; DAGISEL-GFX10-WF32-LABEL: name: amdgpu_cs_chain_preserve_many_regs
; DAGISEL-GFX10-WF32: bb.0 (%ir-block.0):
- ; DAGISEL-GFX10-WF32-NEXT: liveins: $sgpr35, $vgpr8, $vgpr135
+ ; DAGISEL-GFX10-WF32-NEXT: liveins: $vgpr8, $vgpr135
; DAGISEL-GFX10-WF32-NEXT: {{ $}}
; DAGISEL-GFX10-WF32-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr135
; DAGISEL-GFX10-WF32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr8
- ; DAGISEL-GFX10-WF32-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr35
- ; DAGISEL-GFX10-WF32-NEXT: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
- ; DAGISEL-GFX10-WF32-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY [[DEF]]
- ; DAGISEL-GFX10-WF32-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY2]]
- ; DAGISEL-GFX10-WF32-NEXT: GLOBAL_STORE_DWORD [[COPY3]], [[COPY4]], 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(1) poison`, addrspace 1)
; DAGISEL-GFX10-WF32-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY]], %subreg.sub1
- ; DAGISEL-GFX10-WF32-NEXT: [[DEF1:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
- ; DAGISEL-GFX10-WF32-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[DEF1]]
- ; DAGISEL-GFX10-WF32-NEXT: GLOBAL_STORE_DWORDX2 [[COPY5]], killed [[REG_SEQUENCE]], 0, 0, implicit $exec :: (store (s64) into `ptr addrspace(1) poison`, addrspace 1)
+ ; DAGISEL-GFX10-WF32-NEXT: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
+ ; DAGISEL-GFX10-WF32-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY [[DEF]]
+ ; DAGISEL-GFX10-WF32-NEXT: GLOBAL_STORE_DWORDX2 killed [[COPY2]], killed [[REG_SEQUENCE]], 0, 0, implicit $exec :: (store (s64) into `ptr addrspace(1) poison`, align 4, addrspace 1)
; DAGISEL-GFX10-WF32-NEXT: S_ENDPGM 0
;
; DAGISEL-GFX10-WF64-LABEL: name: amdgpu_cs_chain_preserve_many_regs
; DAGISEL-GFX10-WF64: bb.0 (%ir-block.0):
- ; DAGISEL-GFX10-WF64-NEXT: liveins: $sgpr35, $vgpr8, $vgpr135
+ ; DAGISEL-GFX10-WF64-NEXT: liveins: $vgpr8, $vgpr135
; DAGISEL-GFX10-WF64-NEXT: {{ $}}
; DAGISEL-GFX10-WF64-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr135
; DAGISEL-GFX10-WF64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr8
- ; DAGISEL-GFX10-WF64-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr35
- ; DAGISEL-GFX10-WF64-NEXT: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
- ; DAGISEL-GFX10-WF64-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY [[DEF]]
- ; DAGISEL-GFX10-WF64-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY2]]
- ; DAGISEL-GFX10-WF64-NEXT: GLOBAL_STORE_DWORD [[COPY3]], [[COPY4]], 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(1) poison`, addrspace 1)
; DAGISEL-GFX10-WF64-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY]], %subreg.sub1
- ; DAGISEL-GFX10-WF64-NEXT: [[DEF1:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
- ; DAGISEL-GFX10-WF64-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[DEF1]]
- ; DAGISEL-GFX10-WF64-NEXT: GLOBAL_STORE_DWORDX2 [[COPY5]], killed [[REG_SEQUENCE]], 0, 0, implicit $exec :: (store (s64) into `ptr addrspace(1) poison`, addrspace 1)
+ ; DAGISEL-GFX10-WF64-NEXT: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
+ ; DAGISEL-GFX10-WF64-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY [[DEF]]
+ ; DAGISEL-GFX10-WF64-NEXT: GLOBAL_STORE_DWORDX2 killed [[COPY2]], killed [[REG_SEQUENCE]], 0, 0, implicit $exec :: (store (s64) into `ptr addrspace(1) poison`, align 4, addrspace 1)
; DAGISEL-GFX10-WF64-NEXT: S_ENDPGM 0
%c = extractelement <36 x i32> %a, i32 35
store i32 %c, ptr addrspace(1) poison
diff --git a/llvm/test/CodeGen/AMDGPU/undefined-subreg-liverange.ll b/llvm/test/CodeGen/AMDGPU/undefined-subreg-liverange.ll
index 31708a9b738db..23086fb1c18fa 100644
--- a/llvm/test/CodeGen/AMDGPU/undefined-subreg-liverange.ll
+++ b/llvm/test/CodeGen/AMDGPU/undefined-subreg-liverange.ll
@@ -52,12 +52,15 @@ define amdgpu_ps float @valley_partially_undef_copy() #0 {
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: buffer_load_dword v0, off, s[0:3], 0 glc
; CHECK-NEXT: s_waitcnt vmcnt(0)
-; CHECK-NEXT: v_mov_b32_e32 v2, 0x7fc00000
-; CHECK-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0
-; CHECK-NEXT: buffer_store_dword v2, off, s[0:3], 0
+; CHECK-NEXT: s_mov_b32 s0, 0x7fc00000
+; CHECK-NEXT: v_mov_b32_e32 v5, v3
+; CHECK-NEXT: v_mov_b32_e32 v4, v2
+; CHECK-NEXT: v_mov_b32_e32 v3, v1
+; CHECK-NEXT: v_mov_b32_e32 v2, v0
; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v1
-; CHECK-NEXT: s_waitcnt expcnt(1)
; CHECK-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; CHECK-NEXT: v_mov_b32_e32 v2, s0
+; CHECK-NEXT: buffer_store_dwordx4 v[2:5], off, s[0:3], 0
; CHECK-NEXT: v_cmp_ne_u32_e64 s[0:1], 1, v1
; CHECK-NEXT: .LBB1_1: ; %bb9
; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
More information about the llvm-commits
mailing list