[llvm] 29f88b9 - [GlobalISel] Rework more/fewer elements for vectors
Petar Avramovic via llvm-commits
llvm-commits at lists.llvm.org
Thu Dec 23 05:30:45 PST 2021
Author: Petar Avramovic
Date: 2021-12-23T14:30:02+01:00
New Revision: 29f88b93fdbe3e20c35842ca3a6c2a3f1a81cfce
URL: https://github.com/llvm/llvm-project/commit/29f88b93fdbe3e20c35842ca3a6c2a3f1a81cfce
DIFF: https://github.com/llvm/llvm-project/commit/29f88b93fdbe3e20c35842ca3a6c2a3f1a81cfce.diff
LOG: [GlobalISel] Rework more/fewer elements for vectors
Artifact combiner is not able to access individual elements after using
LCMTy style merge/unmerge, extract and insert to change vector number of
elements (pad with undef or split to sub-vector instructions).
Use unmerge to individual elements instead and then merge elements into
requested types.
Change argument lowering for vectors and moreElementsVector to use
buildPadVectorWithUndefElements and buildDeleteTrailingVectorElements.
FewerElementsVector had a few helpers that had different behavior,
introduce new helper for most of the opcodes.
FewerElementsVector helper is more flexible since it can create leftover
instruction smaller then requested type (useful in case target wants to
avoid pad with undef and use fewer registers). If target does not want
leftover of different type it should call more elements first.
Some helpers were performing more elements first to have split without
leftover. Opcodes that used this helper use clampMaxNumElementsStrict
(does more elements first) in LegalizerInfo to avoid test changes.
Fixes failures caused by failing to combine artifacts created during
more/fewer elements vector.
Differential Revision: https://reviews.llvm.org/D114198
Added:
Modified:
llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h
llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h
llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h
llvm/include/llvm/CodeGen/GlobalISel/Utils.h
llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
llvm/lib/CodeGen/GlobalISel/Utils.cpp
llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
llvm/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll
llvm/test/CodeGen/AArch64/GlobalISel/call-lowering-vectors.ll
llvm/test/CodeGen/AArch64/GlobalISel/legalize-saddsat.mir
llvm/test/CodeGen/AArch64/GlobalISel/legalize-shuffle-vector.mir
llvm/test/CodeGen/AArch64/GlobalISel/legalize-ssubsat.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-extract.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-zext.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/bug-legalization-artifact-combiner-dead-def.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-mul.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/function-returns.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.large.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-extract.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fract.f64.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-insert.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-insert.xfail.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-function-args.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sibling-call.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-and.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-anyext.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ashr.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-bitcast.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-extract-vector-elt.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-extract.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fabs.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fadd.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fdiv.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fma.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmaxnum.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fminnum.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmul.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-freeze.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fshl.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fshr.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-implicit-def.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-insert-vector-elt.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-insert.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.2d.d16.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.store.2d.d16.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.s.buffer.load.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-constant.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-flat.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-global.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-local.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-private.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-lshr.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-or.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-phi.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-saddsat.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-select.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sext-inreg.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sext.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-shl.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-shuffle-vector.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-shuffle-vector.s16.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-smax.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-smin.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sshlsat.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ssubsat.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store-global.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-uaddsat.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umax.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umin.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umulh.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ushlsat.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-usubsat.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-vector-args-gfx8-plus.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-xor.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-zext.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.1d.d16.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.s.buffer.load.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/load-constant.96.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.s.buffer.load.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-load.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-split-scalar-load-metadata.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll
llvm/test/CodeGen/AMDGPU/fp-min-max-atomics.ll
llvm/test/CodeGen/AMDGPU/lds-atomic-fmin-fmax.ll
llvm/test/CodeGen/AMDGPU/twoaddr-constrain.ll
llvm/unittests/CodeGen/GlobalISel/LegalizerHelperTest.cpp
Removed:
################################################################################
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h
index 74615c73741a2..044f2e22cfdd2 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h
@@ -192,6 +192,10 @@ class LegalizerHelper {
SmallVectorImpl<Register> &VRegs,
SmallVectorImpl<Register> &LeftoverVRegs);
+ /// Version which handles irregular sub-vector splits.
+ void extractVectorParts(Register Reg, unsigned NumElst,
+ SmallVectorImpl<Register> &VRegs);
+
/// Helper function to build a wide generic register \p DstReg of type \p
/// RegTy from smaller parts. This will produce a G_MERGE_VALUES,
/// G_BUILD_VECTOR, G_CONCAT_VECTORS, or sequence of G_INSERT as appropriate
@@ -205,6 +209,11 @@ class LegalizerHelper {
LLT PartTy, ArrayRef<Register> PartRegs,
LLT LeftoverTy = LLT(), ArrayRef<Register> LeftoverRegs = {});
+ /// Merge \p PartRegs with
diff erent types into \p DstReg.
+ void mergeMixedSubvectors(Register DstReg, ArrayRef<Register> PartRegs);
+
+ void appendVectorElts(SmallVectorImpl<Register> &Elts, Register Reg);
+
/// Unmerge \p SrcReg into smaller sized values, and append them to \p
/// Parts. The elements of \p Parts will be the greatest common divisor type
/// of \p DstTy, \p NarrowTy and the type of \p SrcReg. This will compute and
@@ -285,26 +294,18 @@ class LegalizerHelper {
/// vector bounds.
Register getVectorElementPointer(Register VecPtr, LLT VecTy, Register Index);
- LegalizeResult fewerElementsVectorImplicitDef(MachineInstr &MI,
- unsigned TypeIdx, LLT NarrowTy);
-
- /// Legalize a instruction with a vector type where each operand may have a
- ///
diff erent element type. All type indexes must have the same number of
- /// elements.
- LegalizeResult fewerElementsVectorMultiEltType(MachineInstr &MI,
- unsigned TypeIdx, LLT NarrowTy);
+ /// Handles most opcodes. Split \p MI into same instruction on sub-vectors or
+ /// scalars with \p NumElts elements (1 for scalar). Supports uneven splits:
+ /// there can be leftover sub-vector with fewer then \p NumElts or a leftover
+ /// scalar. To avoid this use moreElements first and set MI number of elements
+ /// to multiple of \p NumElts. Non-vector operands that should be used on all
+ /// sub-instructions without split are listed in \p NonVecOpIndices.
+ LegalizeResult fewerElementsVectorMultiEltType(
+ GenericMachineInstr &MI, unsigned NumElts,
+ std::initializer_list<unsigned> NonVecOpIndices = {});
- LegalizeResult fewerElementsVectorCasts(MachineInstr &MI, unsigned TypeIdx,
- LLT NarrowTy);
-
- LegalizeResult
- fewerElementsVectorCmp(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy);
-
- LegalizeResult
- fewerElementsVectorSelect(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy);
-
- LegalizeResult fewerElementsVectorPhi(MachineInstr &MI,
- unsigned TypeIdx, LLT NarrowTy);
+ LegalizeResult fewerElementsVectorPhi(GenericMachineInstr &MI,
+ unsigned NumElts);
LegalizeResult moreElementsVectorPhi(MachineInstr &MI, unsigned TypeIdx,
LLT MoreTy);
@@ -320,22 +321,9 @@ class LegalizerHelper {
unsigned TypeIdx,
LLT NarrowTy);
- LegalizeResult fewerElementsVectorMulo(MachineInstr &MI, unsigned TypeIdx,
- LLT NarrowTy);
-
LegalizeResult reduceLoadStoreWidth(GLoadStore &MI, unsigned TypeIdx,
LLT NarrowTy);
- /// Legalize an instruction by reducing the operation width, either by
- /// narrowing the type of the operation or by reducing the number of elements
- /// of a vector.
- /// The used strategy (narrow vs. fewerElements) is decided by \p NarrowTy.
- /// Narrow is used if the scalar type of \p NarrowTy and \p DstTy
diff er,
- /// fewerElements is used when the scalar type is the same but the number of
- /// elements between \p NarrowTy and \p DstTy
diff er.
- LegalizeResult reduceOperationWidth(MachineInstr &MI, unsigned TypeIdx,
- LLT NarrowTy);
-
LegalizeResult fewerElementsVectorSextInReg(MachineInstr &MI, unsigned TypeIdx,
LLT NarrowTy);
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h
index 68c14240ebc7f..0b37539030b17 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h
@@ -58,7 +58,10 @@ enum LegalizeAction : std::uint8_t {
/// The (vector) operation should be implemented by splitting it into
/// sub-vectors where the operation is legal. For example a <8 x s64> add
- /// might be implemented as 4 separate <2 x s64> adds.
+ /// might be implemented as 4 separate <2 x s64> adds. There can be a leftover
+ /// if there are not enough elements for last sub-vector e.g. <7 x s64> add
+ /// will be implemented as 3 separate <2 x s64> adds and one s64 add. Leftover
+ /// types can be avoided by doing MoreElements first.
FewerElements,
/// The (vector) operation should be implemented by widening the input
@@ -1050,6 +1053,26 @@ class LegalizeRuleSet {
TypeIdx, LLT::fixed_vector(MinElements, VecTy.getElementType()));
});
}
+
+ /// Set number of elements to nearest larger multiple of NumElts.
+ LegalizeRuleSet &alignNumElementsTo(unsigned TypeIdx, const LLT EltTy,
+ unsigned NumElts) {
+ typeIdx(TypeIdx);
+ return actionIf(
+ LegalizeAction::MoreElements,
+ [=](const LegalityQuery &Query) {
+ LLT VecTy = Query.Types[TypeIdx];
+ return VecTy.isVector() && VecTy.getElementType() == EltTy &&
+ (VecTy.getNumElements() % NumElts != 0);
+ },
+ [=](const LegalityQuery &Query) {
+ LLT VecTy = Query.Types[TypeIdx];
+ unsigned NewSize = alignTo(VecTy.getNumElements(), NumElts);
+ return std::make_pair(
+ TypeIdx, LLT::fixed_vector(NewSize, VecTy.getElementType()));
+ });
+ }
+
/// Limit the number of elements in EltTy vectors to at most MaxElements.
LegalizeRuleSet &clampMaxNumElements(unsigned TypeIdx, const LLT EltTy,
unsigned MaxElements) {
@@ -1085,6 +1108,19 @@ class LegalizeRuleSet {
.clampMaxNumElements(TypeIdx, EltTy, MaxTy.getNumElements());
}
+ /// Express \p EltTy vectors strictly using vectors with \p NumElts elements
+ /// (or scalars when \p NumElts equals 1).
+ /// First pad with undef elements to nearest larger multiple of \p NumElts.
+ /// Then perform split with all sub-instructions having the same type.
+ /// Using clampMaxNumElements (non-strict) can result in leftover instruction
+ /// with
diff erent type (fewer elements then \p NumElts or scalar).
+ /// No effect if the type is not a vector.
+ LegalizeRuleSet &clampMaxNumElementsStrict(unsigned TypeIdx, const LLT EltTy,
+ unsigned NumElts) {
+ return alignNumElementsTo(TypeIdx, EltTy, NumElts)
+ .clampMaxNumElements(TypeIdx, EltTy, NumElts);
+ }
+
/// Fallback on the previous implementation. This should only be used while
/// porting a rule.
LegalizeRuleSet &fallback() {
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h b/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h
index 069f71b543286..fde0cb3cf1af5 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h
@@ -497,6 +497,34 @@ class MachineIRBuilder {
MachineInstrBuilder buildMaskLowPtrBits(const DstOp &Res, const SrcOp &Op0,
uint32_t NumBits);
+ /// Build and insert
+ /// a, b, ..., x = G_UNMERGE_VALUES \p Op0
+ /// \p Res = G_BUILD_VECTOR a, b, ..., x, undef, ..., undef
+ ///
+ /// Pad \p Op0 with undef elements to match number of elements in \p Res.
+ ///
+ /// \pre setBasicBlock or setMI must have been called.
+ /// \pre \p Res and \p Op0 must be generic virtual registers with vector type,
+ /// same vector element type and Op0 must have fewer elements then Res.
+ ///
+ /// \return a MachineInstrBuilder for the newly created build vector instr.
+ MachineInstrBuilder buildPadVectorWithUndefElements(const DstOp &Res,
+ const SrcOp &Op0);
+
+ /// Build and insert
+ /// a, b, ..., x, y, z = G_UNMERGE_VALUES \p Op0
+ /// \p Res = G_BUILD_VECTOR a, b, ..., x
+ ///
+ /// Delete trailing elements in \p Op0 to match number of elements in \p Res.
+ ///
+ /// \pre setBasicBlock or setMI must have been called.
+ /// \pre \p Res and \p Op0 must be generic virtual registers with vector type,
+ /// same vector element type and Op0 must have more elements then Res.
+ ///
+ /// \return a MachineInstrBuilder for the newly created build vector instr.
+ MachineInstrBuilder buildDeleteTrailingVectorElements(const DstOp &Res,
+ const SrcOp &Op0);
+
/// Build and insert \p Res, \p CarryOut = G_UADDO \p Op0, \p Op1
///
/// G_UADDO sets \p Res to \p Op0 + \p Op1 (truncated to the bit width) and
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/Utils.h b/llvm/include/llvm/CodeGen/GlobalISel/Utils.h
index 4126e2ac7b8ff..8fed79585fe9e 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/Utils.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/Utils.h
@@ -323,6 +323,11 @@ Register getFunctionLiveInPhysReg(MachineFunction &MF, const TargetInstrInfo &TI
LLVM_READNONE
LLT getLCMType(LLT OrigTy, LLT TargetTy);
+LLVM_READNONE
+/// Return smallest type that covers both \p OrigTy and \p TargetTy and is
+/// multiple of TargetTy.
+LLT getCoverTy(LLT OrigTy, LLT TargetTy);
+
/// Return a type where the total size is the greatest common divisor of \p
/// OrigTy and \p TargetTy. This will try to either change the number of vector
/// elements, or bitwidth of scalars. The intent is the result type can be used
diff --git a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
index 17094a8e44f89..d061664e8c5d1 100644
--- a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
@@ -256,7 +256,7 @@ mergeVectorRegsToResultRegs(MachineIRBuilder &B, ArrayRef<Register> DstRegs,
LLT PartLLT = MRI.getType(SrcRegs[0]);
// Deal with v3s16 split into v2s16
- LLT LCMTy = getLCMType(LLTy, PartLLT);
+ LLT LCMTy = getCoverTy(LLTy, PartLLT);
if (LCMTy == LLTy) {
// Common case where no padding is needed.
assert(DstRegs.size() == 1);
@@ -267,21 +267,9 @@ mergeVectorRegsToResultRegs(MachineIRBuilder &B, ArrayRef<Register> DstRegs,
// widening the original value.
Register UnmergeSrcReg;
if (LCMTy != PartLLT) {
- // e.g. A <3 x s16> value was split to <2 x s16>
- // %register_value0:_(<2 x s16>)
- // %register_value1:_(<2 x s16>)
- // %undef:_(<2 x s16>) = G_IMPLICIT_DEF
- // %concat:_<6 x s16>) = G_CONCAT_VECTORS %reg_value0, %reg_value1, %undef
- // %dst_reg:_(<3 x s16>), %dead:_(<3 x s16>) = G_UNMERGE_VALUES %concat
- const int NumWide = LCMTy.getSizeInBits() / PartLLT.getSizeInBits();
- Register Undef = B.buildUndef(PartLLT).getReg(0);
-
- // Build vector of undefs.
- SmallVector<Register, 8> WidenedSrcs(NumWide, Undef);
-
- // Replace the first sources with the real registers.
- std::copy(SrcRegs.begin(), SrcRegs.end(), WidenedSrcs.begin());
- UnmergeSrcReg = B.buildConcatVectors(LCMTy, WidenedSrcs).getReg(0);
+ assert(DstRegs.size() == 1);
+ return B.buildDeleteTrailingVectorElements(DstRegs[0],
+ B.buildMerge(LCMTy, SrcRegs));
} else {
// We don't need to widen anything if we're extracting a scalar which was
// promoted to a vector e.g. s8 -> v4s8 -> s8
@@ -298,6 +286,8 @@ mergeVectorRegsToResultRegs(MachineIRBuilder &B, ArrayRef<Register> DstRegs,
for (int I = DstRegs.size(); I != NumDst; ++I)
PadDstRegs[I] = MRI.createGenericVirtualRegister(LLTy);
+ if (PadDstRegs.size() == 1)
+ return B.buildDeleteTrailingVectorElements(DstRegs[0], UnmergeSrcReg);
return B.buildUnmerge(PadDstRegs, UnmergeSrcReg);
}
@@ -485,7 +475,7 @@ static void buildCopyToRegs(MachineIRBuilder &B, ArrayRef<Register> DstRegs,
MachineRegisterInfo &MRI = *B.getMRI();
LLT DstTy = MRI.getType(DstRegs[0]);
- LLT LCMTy = getLCMType(SrcTy, PartTy);
+ LLT LCMTy = getCoverTy(SrcTy, PartTy);
const unsigned DstSize = DstTy.getSizeInBits();
const unsigned SrcSize = SrcTy.getSizeInBits();
@@ -493,7 +483,7 @@ static void buildCopyToRegs(MachineIRBuilder &B, ArrayRef<Register> DstRegs,
Register UnmergeSrc = SrcReg;
- if (CoveringSize != SrcSize) {
+ if (!LCMTy.isVector() && CoveringSize != SrcSize) {
// For scalars, it's common to be able to use a simple extension.
if (SrcTy.isScalar() && DstTy.isScalar()) {
CoveringSize = alignTo(SrcSize, DstSize);
@@ -510,14 +500,10 @@ static void buildCopyToRegs(MachineIRBuilder &B, ArrayRef<Register> DstRegs,
}
}
- // Unmerge to the original registers and pad with dead defs.
- SmallVector<Register, 8> UnmergeResults(DstRegs.begin(), DstRegs.end());
- for (unsigned Size = DstSize * DstRegs.size(); Size != CoveringSize;
- Size += DstSize) {
- UnmergeResults.push_back(MRI.createGenericVirtualRegister(DstTy));
- }
+ if (LCMTy.isVector() && CoveringSize != SrcSize)
+ UnmergeSrc = B.buildPadVectorWithUndefElements(LCMTy, SrcReg).getReg(0);
- B.buildUnmerge(UnmergeResults, UnmergeSrc);
+ B.buildUnmerge(DstRegs, UnmergeSrc);
}
bool CallLowering::determineAndHandleAssignments(
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index e09cd26eb0c15..6b50fa49c063f 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -176,16 +176,18 @@ bool LegalizerHelper::extractParts(Register Reg, LLT RegTy,
return true;
}
+ // Perform irregular split. Leftover is last element of RegPieces.
if (MainTy.isVector()) {
- unsigned EltSize = MainTy.getScalarSizeInBits();
- if (LeftoverSize % EltSize != 0)
- return false;
- LeftoverTy = LLT::scalarOrVector(
- ElementCount::getFixed(LeftoverSize / EltSize), EltSize);
- } else {
- LeftoverTy = LLT::scalar(LeftoverSize);
+ SmallVector<Register, 8> RegPieces;
+ extractVectorParts(Reg, MainTy.getNumElements(), RegPieces);
+ for (unsigned i = 0; i < RegPieces.size() - 1; ++i)
+ VRegs.push_back(RegPieces[i]);
+ LeftoverRegs.push_back(RegPieces[RegPieces.size() - 1]);
+ LeftoverTy = MRI.getType(LeftoverRegs[0]);
+ return true;
}
+ LeftoverTy = LLT::scalar(LeftoverSize);
// For irregular sizes, extract the individual parts.
for (unsigned I = 0; I != NumParts; ++I) {
Register NewReg = MRI.createGenericVirtualRegister(MainTy);
@@ -203,6 +205,44 @@ bool LegalizerHelper::extractParts(Register Reg, LLT RegTy,
return true;
}
+void LegalizerHelper::extractVectorParts(Register Reg, unsigned NumElts,
+ SmallVectorImpl<Register> &VRegs) {
+ LLT RegTy = MRI.getType(Reg);
+ assert(RegTy.isVector() && "Expected a vector type");
+
+ LLT EltTy = RegTy.getElementType();
+ LLT NarrowTy = (NumElts == 1) ? EltTy : LLT::fixed_vector(NumElts, EltTy);
+ unsigned RegNumElts = RegTy.getNumElements();
+ unsigned LeftoverNumElts = RegNumElts % NumElts;
+ unsigned NumNarrowTyPieces = RegNumElts / NumElts;
+
+ // Perfect split without leftover
+ if (LeftoverNumElts == 0)
+ return extractParts(Reg, NarrowTy, NumNarrowTyPieces, VRegs);
+
+ // Irregular split. Provide direct access to all elements for artifact
+ // combiner using unmerge to elements. Then build vectors with NumElts
+ // elements. Remaining element(s) will be (used to build vector) Leftover.
+ SmallVector<Register, 8> Elts;
+ extractParts(Reg, EltTy, RegNumElts, Elts);
+
+ unsigned Offset = 0;
+ // Requested sub-vectors of NarrowTy.
+ for (unsigned i = 0; i < NumNarrowTyPieces; ++i, Offset += NumElts) {
+ ArrayRef<Register> Pieces(&Elts[Offset], NumElts);
+ VRegs.push_back(MIRBuilder.buildMerge(NarrowTy, Pieces).getReg(0));
+ }
+
+ // Leftover element(s).
+ if (LeftoverNumElts == 1) {
+ VRegs.push_back(Elts[Offset]);
+ } else {
+ LLT LeftoverTy = LLT::fixed_vector(LeftoverNumElts, EltTy);
+ ArrayRef<Register> Pieces(&Elts[Offset], LeftoverNumElts);
+ VRegs.push_back(MIRBuilder.buildMerge(LeftoverTy, Pieces).getReg(0));
+ }
+}
+
void LegalizerHelper::insertParts(Register DstReg,
LLT ResultTy, LLT PartTy,
ArrayRef<Register> PartRegs,
@@ -223,6 +263,15 @@ void LegalizerHelper::insertParts(Register DstReg,
return;
}
+ // Merge sub-vectors with
diff erent number of elements and insert into DstReg.
+ if (ResultTy.isVector()) {
+ assert(LeftoverRegs.size() == 1 && "Expected one leftover register");
+ SmallVector<Register, 8> AllRegs;
+ for (auto Reg : concat<const Register>(PartRegs, LeftoverRegs))
+ AllRegs.push_back(Reg);
+ return mergeMixedSubvectors(DstReg, AllRegs);
+ }
+
SmallVector<Register> GCDRegs;
LLT GCDTy = getGCDType(getGCDType(ResultTy, LeftoverTy), PartTy);
for (auto PartReg : concat<const Register>(PartRegs, LeftoverRegs))
@@ -231,6 +280,31 @@ void LegalizerHelper::insertParts(Register DstReg,
buildWidenedRemergeToDst(DstReg, ResultLCMTy, GCDRegs);
}
+void LegalizerHelper::appendVectorElts(SmallVectorImpl<Register> &Elts,
+ Register Reg) {
+ LLT Ty = MRI.getType(Reg);
+ SmallVector<Register, 8> RegElts;
+ extractParts(Reg, Ty.getScalarType(), Ty.getNumElements(), RegElts);
+ Elts.append(RegElts);
+}
+
+/// Merge \p PartRegs with
diff erent types into \p DstReg.
+void LegalizerHelper::mergeMixedSubvectors(Register DstReg,
+ ArrayRef<Register> PartRegs) {
+ SmallVector<Register, 8> AllElts;
+ for (unsigned i = 0; i < PartRegs.size() - 1; ++i)
+ appendVectorElts(AllElts, PartRegs[i]);
+
+ Register Leftover = PartRegs[PartRegs.size() - 1];
+ if (MRI.getType(Leftover).isScalar())
+ AllElts.push_back(Leftover);
+ else
+ appendVectorElts(AllElts, Leftover);
+
+ MIRBuilder.buildMerge(DstReg, AllElts);
+ return;
+}
+
/// Append the result registers of G_UNMERGE_VALUES \p MI to \p Regs.
static void getUnmergeResults(SmallVectorImpl<Register> &Regs,
const MachineInstr &MI) {
@@ -916,8 +990,26 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
return Legalized;
}
- case TargetOpcode::G_FREEZE:
- return reduceOperationWidth(MI, TypeIdx, NarrowTy);
+ case TargetOpcode::G_FREEZE: {
+ if (TypeIdx != 0)
+ return UnableToLegalize;
+
+ LLT Ty = MRI.getType(MI.getOperand(0).getReg());
+ // Should widen scalar first
+ if (Ty.getSizeInBits() % NarrowTy.getSizeInBits() != 0)
+ return UnableToLegalize;
+
+ auto Unmerge = MIRBuilder.buildUnmerge(NarrowTy, MI.getOperand(1).getReg());
+ SmallVector<Register, 8> Parts;
+ for (unsigned i = 0; i < Unmerge->getNumDefs(); ++i) {
+ Parts.push_back(
+ MIRBuilder.buildFreeze(NarrowTy, Unmerge.getReg(i)).getReg(0));
+ }
+
+ MIRBuilder.buildMerge(MI.getOperand(0).getReg(), Parts);
+ MI.eraseFromParent();
+ return Legalized;
+ }
case TargetOpcode::G_ADD:
case TargetOpcode::G_SUB:
case TargetOpcode::G_SADDO:
@@ -1372,37 +1464,17 @@ void LegalizerHelper::moreElementsVectorDst(MachineInstr &MI, LLT WideTy,
unsigned OpIdx) {
MachineOperand &MO = MI.getOperand(OpIdx);
MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
- MO.setReg(widenWithUnmerge(WideTy, MO.getReg()));
+ Register Dst = MO.getReg();
+ Register DstExt = MRI.createGenericVirtualRegister(WideTy);
+ MO.setReg(DstExt);
+ MIRBuilder.buildDeleteTrailingVectorElements(Dst, DstExt);
}
void LegalizerHelper::moreElementsVectorSrc(MachineInstr &MI, LLT MoreTy,
unsigned OpIdx) {
MachineOperand &MO = MI.getOperand(OpIdx);
-
- LLT OldTy = MRI.getType(MO.getReg());
- unsigned OldElts = OldTy.getNumElements();
- unsigned NewElts = MoreTy.getNumElements();
-
- unsigned NumParts = NewElts / OldElts;
-
- // Use concat_vectors if the result is a multiple of the number of elements.
- if (NumParts * OldElts == NewElts) {
- SmallVector<Register, 8> Parts;
- Parts.push_back(MO.getReg());
-
- Register ImpDef = MIRBuilder.buildUndef(OldTy).getReg(0);
- for (unsigned I = 1; I != NumParts; ++I)
- Parts.push_back(ImpDef);
-
- auto Concat = MIRBuilder.buildConcatVectors(MoreTy, Parts);
- MO.setReg(Concat.getReg(0));
- return;
- }
-
- Register MoreReg = MRI.createGenericVirtualRegister(MoreTy);
- Register ImpDef = MIRBuilder.buildUndef(MoreTy).getReg(0);
- MIRBuilder.buildInsert(MoreReg, ImpDef, MO.getReg(), 0);
- MO.setReg(MoreReg);
+ SmallVector<Register, 8> Regs;
+ MO.setReg(MIRBuilder.buildPadVectorWithUndefElements(MoreTy, MO).getReg(0));
}
void LegalizerHelper::bitcastSrc(MachineInstr &MI, LLT CastTy, unsigned OpIdx) {
@@ -3558,20 +3630,81 @@ Register LegalizerHelper::getVectorElementPointer(Register VecPtr, LLT VecTy,
return MIRBuilder.buildPtrAdd(PtrTy, VecPtr, Mul).getReg(0);
}
-LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorImplicitDef(
- MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy) {
- Register DstReg = MI.getOperand(0).getReg();
- LLT DstTy = MRI.getType(DstReg);
- LLT LCMTy = getLCMType(DstTy, NarrowTy);
+/// Check that all vector operands have same number of elements. Other operands
+/// should be listed in NonVecOp.
+static bool hasSameNumEltsOnAllVectorOperands(
+ GenericMachineInstr &MI, MachineRegisterInfo &MRI,
+ std::initializer_list<unsigned> NonVecOpIndices) {
+ if (MI.getNumMemOperands() != 0)
+ return false;
- unsigned NumParts = LCMTy.getSizeInBits() / NarrowTy.getSizeInBits();
+ LLT VecTy = MRI.getType(MI.getReg(0));
+ if (!VecTy.isVector())
+ return false;
+ unsigned NumElts = VecTy.getNumElements();
- auto NewUndef = MIRBuilder.buildUndef(NarrowTy);
- SmallVector<Register, 8> Parts(NumParts, NewUndef.getReg(0));
+ for (unsigned OpIdx = 1; OpIdx < MI.getNumOperands(); ++OpIdx) {
+ MachineOperand &Op = MI.getOperand(OpIdx);
+ if (!Op.isReg()) {
+ if (!is_contained(NonVecOpIndices, OpIdx))
+ return false;
+ continue;
+ }
- buildWidenedRemergeToDst(DstReg, LCMTy, Parts);
- MI.eraseFromParent();
- return Legalized;
+ LLT Ty = MRI.getType(Op.getReg());
+ if (!Ty.isVector()) {
+ if (!is_contained(NonVecOpIndices, OpIdx))
+ return false;
+ is_contained(NonVecOpIndices, OpIdx);
+ continue;
+ }
+
+ if (Ty.getNumElements() != NumElts)
+ return false;
+ }
+
+ return true;
+}
+
+/// Fill \p DstOps with DstOps that have same number of elements combined as
+/// the Ty. These DstOps have either scalar type when \p NumElts = 1 or are
+/// vectors with \p NumElts elements. When Ty.getNumElements() is not multiple
+/// of \p NumElts last DstOp (leftover) has fewer then \p NumElts elements.
+static void makeDstOps(SmallVectorImpl<DstOp> &DstOps, LLT Ty,
+ unsigned NumElts) {
+ LLT LeftoverTy;
+ assert(Ty.isVector() && "Expected vector type");
+ LLT EltTy = Ty.getElementType();
+ LLT NarrowTy = (NumElts == 1) ? EltTy : LLT::fixed_vector(NumElts, EltTy);
+ int NumParts, NumLeftover;
+ std::tie(NumParts, NumLeftover) =
+ getNarrowTypeBreakDown(Ty, NarrowTy, LeftoverTy);
+
+ assert(NumParts > 0 && "Error in getNarrowTypeBreakDown");
+ for (int i = 0; i < NumParts; ++i) {
+ DstOps.push_back(NarrowTy);
+ }
+
+ if (LeftoverTy.isValid()) {
+ assert(NumLeftover == 1 && "expected exactly one leftover");
+ DstOps.push_back(LeftoverTy);
+ }
+}
+
+/// Operand \p Op is used on \p N sub-instructions. Fill \p Ops with \p N SrcOps
+/// made from \p Op depending on operand type.
+static void broadcastSrcOp(SmallVectorImpl<SrcOp> &Ops, unsigned N,
+ MachineOperand &Op) {
+ for (unsigned i = 0; i < N; ++i) {
+ if (Op.isReg())
+ Ops.push_back(Op.getReg());
+ else if (Op.isImm())
+ Ops.push_back(Op.getImm());
+ else if (Op.isPredicate())
+ Ops.push_back(static_cast<CmpInst::Predicate>(Op.getPredicate()));
+ else
+ llvm_unreachable("Unsupported type");
+ }
}
// Handle splitting vector operations which need to have the same number of
@@ -3588,335 +3721,116 @@ LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorImplicitDef(
// s64 = G_SHL s64, s32
LegalizerHelper::LegalizeResult
LegalizerHelper::fewerElementsVectorMultiEltType(
- MachineInstr &MI, unsigned TypeIdx, LLT NarrowTyArg) {
- if (TypeIdx != 0)
- return UnableToLegalize;
-
- const LLT NarrowTy0 = NarrowTyArg;
- const Register DstReg = MI.getOperand(0).getReg();
- LLT DstTy = MRI.getType(DstReg);
- LLT LeftoverTy0;
-
- // All of the operands need to have the same number of elements, so if we can
- // determine a type breakdown for the result type, we can for all of the
- // source types.
- int NumParts = getNarrowTypeBreakDown(DstTy, NarrowTy0, LeftoverTy0).first;
- if (NumParts < 0)
- return UnableToLegalize;
-
- SmallVector<MachineInstrBuilder, 4> NewInsts;
-
- SmallVector<Register, 4> DstRegs, LeftoverDstRegs;
- SmallVector<Register, 4> PartRegs, LeftoverRegs;
-
- for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I) {
- Register SrcReg = MI.getOperand(I).getReg();
- LLT SrcTyI = MRI.getType(SrcReg);
- const auto NewEC = NarrowTy0.isVector() ? NarrowTy0.getElementCount()
- : ElementCount::getFixed(1);
- LLT NarrowTyI = LLT::scalarOrVector(NewEC, SrcTyI.getScalarType());
- LLT LeftoverTyI;
-
- // Split this operand into the requested typed registers, and any leftover
- // required to reproduce the original type.
- if (!extractParts(SrcReg, SrcTyI, NarrowTyI, LeftoverTyI, PartRegs,
- LeftoverRegs))
- return UnableToLegalize;
-
- if (I == 1) {
- // For the first operand, create an instruction for each part and setup
- // the result.
- for (Register PartReg : PartRegs) {
- Register PartDstReg = MRI.createGenericVirtualRegister(NarrowTy0);
- NewInsts.push_back(MIRBuilder.buildInstrNoInsert(MI.getOpcode())
- .addDef(PartDstReg)
- .addUse(PartReg));
- DstRegs.push_back(PartDstReg);
- }
-
- for (Register LeftoverReg : LeftoverRegs) {
- Register PartDstReg = MRI.createGenericVirtualRegister(LeftoverTy0);
- NewInsts.push_back(MIRBuilder.buildInstrNoInsert(MI.getOpcode())
- .addDef(PartDstReg)
- .addUse(LeftoverReg));
- LeftoverDstRegs.push_back(PartDstReg);
- }
+ GenericMachineInstr &MI, unsigned NumElts,
+ std::initializer_list<unsigned> NonVecOpIndices) {
+ assert(hasSameNumEltsOnAllVectorOperands(MI, MRI, NonVecOpIndices) &&
+ "Non-compatible opcode or not specified non-vector operands");
+ unsigned OrigNumElts = MRI.getType(MI.getReg(0)).getNumElements();
+
+ unsigned NumInputs = MI.getNumOperands() - MI.getNumDefs();
+ unsigned NumDefs = MI.getNumDefs();
+
+ // Create DstOps (sub-vectors with NumElts elts + Leftover) for each output.
+ // Build instructions with DstOps to use instruction found by CSE directly.
+ // CSE copies found instruction into given vreg when building with vreg dest.
+ SmallVector<SmallVector<DstOp, 8>, 2> OutputOpsPieces(NumDefs);
+ // Output registers will be taken from created instructions.
+ SmallVector<SmallVector<Register, 8>, 2> OutputRegs(NumDefs);
+ for (unsigned i = 0; i < NumDefs; ++i) {
+ makeDstOps(OutputOpsPieces[i], MRI.getType(MI.getReg(i)), NumElts);
+ }
+
+ // Split vector input operands into sub-vectors with NumElts elts + Leftover.
+ // Operands listed in NonVecOpIndices will be used as is without splitting;
+ // examples: compare predicate in icmp and fcmp (op 1), vector select with i1
+ // scalar condition (op 1), immediate in sext_inreg (op 2).
+ SmallVector<SmallVector<SrcOp, 8>, 3> InputOpsPieces(NumInputs);
+ for (unsigned UseIdx = NumDefs, UseNo = 0; UseIdx < MI.getNumOperands();
+ ++UseIdx, ++UseNo) {
+ if (is_contained(NonVecOpIndices, UseIdx)) {
+ broadcastSrcOp(InputOpsPieces[UseNo], OutputOpsPieces[0].size(),
+ MI.getOperand(UseIdx));
} else {
- assert(NewInsts.size() == PartRegs.size() + LeftoverRegs.size());
-
- // Add the newly created operand splits to the existing instructions. The
- // odd-sized pieces are ordered after the requested NarrowTyArg sized
- // pieces.
- unsigned InstCount = 0;
- for (unsigned J = 0, JE = PartRegs.size(); J != JE; ++J)
- NewInsts[InstCount++].addUse(PartRegs[J]);
- for (unsigned J = 0, JE = LeftoverRegs.size(); J != JE; ++J)
- NewInsts[InstCount++].addUse(LeftoverRegs[J]);
+ SmallVector<Register, 8> SplitPieces;
+ extractVectorParts(MI.getReg(UseIdx), NumElts, SplitPieces);
+ for (auto Reg : SplitPieces)
+ InputOpsPieces[UseNo].push_back(Reg);
}
-
- PartRegs.clear();
- LeftoverRegs.clear();
}
- // Insert the newly built operations and rebuild the result register.
- for (auto &MIB : NewInsts)
- MIRBuilder.insertInstr(MIB);
+ unsigned NumLeftovers = OrigNumElts % NumElts ? 1 : 0;
- insertParts(DstReg, DstTy, NarrowTy0, DstRegs, LeftoverTy0, LeftoverDstRegs);
+ // Take i-th piece of each input operand split and build sub-vector/scalar
+ // instruction. Set i-th DstOp(s) from OutputOpsPieces as destination(s).
+ for (unsigned i = 0; i < OrigNumElts / NumElts + NumLeftovers; ++i) {
+ SmallVector<DstOp, 2> Defs;
+ for (unsigned DstNo = 0; DstNo < NumDefs; ++DstNo)
+ Defs.push_back(OutputOpsPieces[DstNo][i]);
- MI.eraseFromParent();
- return Legalized;
-}
+ SmallVector<SrcOp, 3> Uses;
+ for (unsigned InputNo = 0; InputNo < NumInputs; ++InputNo)
+ Uses.push_back(InputOpsPieces[InputNo][i]);
-LegalizerHelper::LegalizeResult
-LegalizerHelper::fewerElementsVectorCasts(MachineInstr &MI, unsigned TypeIdx,
- LLT NarrowTy) {
- if (TypeIdx != 0)
- return UnableToLegalize;
-
- Register DstReg = MI.getOperand(0).getReg();
- Register SrcReg = MI.getOperand(1).getReg();
- LLT DstTy = MRI.getType(DstReg);
- LLT SrcTy = MRI.getType(SrcReg);
-
- LLT NarrowTy0 = NarrowTy;
- LLT NarrowTy1;
- unsigned NumParts;
-
- if (NarrowTy.isVector()) {
- // Uneven breakdown not handled.
- NumParts = DstTy.getNumElements() / NarrowTy.getNumElements();
- if (NumParts * NarrowTy.getNumElements() != DstTy.getNumElements())
- return UnableToLegalize;
-
- NarrowTy1 = LLT::vector(NarrowTy.getElementCount(), SrcTy.getElementType());
- } else {
- NumParts = DstTy.getNumElements();
- NarrowTy1 = SrcTy.getElementType();
+ auto I = MIRBuilder.buildInstr(MI.getOpcode(), Defs, Uses, MI.getFlags());
+ for (unsigned DstNo = 0; DstNo < NumDefs; ++DstNo)
+ OutputRegs[DstNo].push_back(I.getReg(DstNo));
}
- SmallVector<Register, 4> SrcRegs, DstRegs;
- extractParts(SrcReg, NarrowTy1, NumParts, SrcRegs);
-
- for (unsigned I = 0; I < NumParts; ++I) {
- Register DstReg = MRI.createGenericVirtualRegister(NarrowTy0);
- MachineInstr *NewInst =
- MIRBuilder.buildInstr(MI.getOpcode(), {DstReg}, {SrcRegs[I]});
-
- NewInst->setFlags(MI.getFlags());
- DstRegs.push_back(DstReg);
+ // Merge small outputs into MI's output for each def operand.
+ if (NumLeftovers) {
+ for (unsigned i = 0; i < NumDefs; ++i)
+ mergeMixedSubvectors(MI.getReg(i), OutputRegs[i]);
+ } else {
+ for (unsigned i = 0; i < NumDefs; ++i)
+ MIRBuilder.buildMerge(MI.getReg(i), OutputRegs[i]);
}
- if (NarrowTy.isVector())
- MIRBuilder.buildConcatVectors(DstReg, DstRegs);
- else
- MIRBuilder.buildBuildVector(DstReg, DstRegs);
-
MI.eraseFromParent();
return Legalized;
}
LegalizerHelper::LegalizeResult
-LegalizerHelper::fewerElementsVectorCmp(MachineInstr &MI, unsigned TypeIdx,
- LLT NarrowTy) {
- Register DstReg = MI.getOperand(0).getReg();
- Register Src0Reg = MI.getOperand(2).getReg();
- LLT DstTy = MRI.getType(DstReg);
- LLT SrcTy = MRI.getType(Src0Reg);
-
- unsigned NumParts;
- LLT NarrowTy0, NarrowTy1;
-
- if (TypeIdx == 0) {
- unsigned NewElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1;
- unsigned OldElts = DstTy.getNumElements();
-
- NarrowTy0 = NarrowTy;
- NumParts = NarrowTy.isVector() ? (OldElts / NewElts) : DstTy.getNumElements();
- NarrowTy1 = NarrowTy.isVector() ? LLT::vector(NarrowTy.getElementCount(),
- SrcTy.getScalarSizeInBits())
- : SrcTy.getElementType();
-
- } else {
- unsigned NewElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1;
- unsigned OldElts = SrcTy.getNumElements();
-
- NumParts = NarrowTy.isVector() ? (OldElts / NewElts) :
- NarrowTy.getNumElements();
- NarrowTy0 =
- LLT::vector(NarrowTy.getElementCount(), DstTy.getScalarSizeInBits());
- NarrowTy1 = NarrowTy;
+LegalizerHelper::fewerElementsVectorPhi(GenericMachineInstr &MI,
+ unsigned NumElts) {
+ unsigned OrigNumElts = MRI.getType(MI.getReg(0)).getNumElements();
+
+ unsigned NumInputs = MI.getNumOperands() - MI.getNumDefs();
+ unsigned NumDefs = MI.getNumDefs();
+
+ SmallVector<DstOp, 8> OutputOpsPieces;
+ SmallVector<Register, 8> OutputRegs;
+ makeDstOps(OutputOpsPieces, MRI.getType(MI.getReg(0)), NumElts);
+
+ // Instructions that perform register split will be inserted in basic block
+ // where register is defined (basic block is in the next operand).
+ SmallVector<SmallVector<Register, 8>, 3> InputOpsPieces(NumInputs / 2);
+ for (unsigned UseIdx = NumDefs, UseNo = 0; UseIdx < MI.getNumOperands();
+ UseIdx += 2, ++UseNo) {
+ MachineBasicBlock &OpMBB = *MI.getOperand(UseIdx + 1).getMBB();
+ MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator());
+ extractVectorParts(MI.getReg(UseIdx), NumElts, InputOpsPieces[UseNo]);
}
- // FIXME: Don't know how to handle the situation where the small vectors
- // aren't all the same size yet.
- if (NarrowTy1.isVector() &&
- NarrowTy1.getNumElements() * NumParts != DstTy.getNumElements())
- return UnableToLegalize;
-
- CmpInst::Predicate Pred
- = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
-
- SmallVector<Register, 2> Src1Regs, Src2Regs, DstRegs;
- extractParts(MI.getOperand(2).getReg(), NarrowTy1, NumParts, Src1Regs);
- extractParts(MI.getOperand(3).getReg(), NarrowTy1, NumParts, Src2Regs);
+ // Build PHIs with fewer elements.
+ unsigned NumLeftovers = OrigNumElts % NumElts ? 1 : 0;
+ MIRBuilder.setInsertPt(*MI.getParent(), MI);
+ for (unsigned i = 0; i < OrigNumElts / NumElts + NumLeftovers; ++i) {
+ auto Phi = MIRBuilder.buildInstr(TargetOpcode::G_PHI);
+ Phi.addDef(
+ MRI.createGenericVirtualRegister(OutputOpsPieces[i].getLLTTy(MRI)));
+ OutputRegs.push_back(Phi.getReg(0));
- for (unsigned I = 0; I < NumParts; ++I) {
- Register DstReg = MRI.createGenericVirtualRegister(NarrowTy0);
- DstRegs.push_back(DstReg);
-
- if (MI.getOpcode() == TargetOpcode::G_ICMP)
- MIRBuilder.buildICmp(Pred, DstReg, Src1Regs[I], Src2Regs[I]);
- else {
- MachineInstr *NewCmp
- = MIRBuilder.buildFCmp(Pred, DstReg, Src1Regs[I], Src2Regs[I]);
- NewCmp->setFlags(MI.getFlags());
+ for (unsigned j = 0; j < NumInputs / 2; ++j) {
+ Phi.addUse(InputOpsPieces[j][i]);
+ Phi.add(MI.getOperand(1 + j * 2 + 1));
}
}
- if (NarrowTy1.isVector())
- MIRBuilder.buildConcatVectors(DstReg, DstRegs);
- else
- MIRBuilder.buildBuildVector(DstReg, DstRegs);
-
- MI.eraseFromParent();
- return Legalized;
-}
-
-LegalizerHelper::LegalizeResult
-LegalizerHelper::fewerElementsVectorSelect(MachineInstr &MI, unsigned TypeIdx,
- LLT NarrowTy) {
- Register DstReg = MI.getOperand(0).getReg();
- Register CondReg = MI.getOperand(1).getReg();
-
- unsigned NumParts = 0;
- LLT NarrowTy0, NarrowTy1;
-
- LLT DstTy = MRI.getType(DstReg);
- LLT CondTy = MRI.getType(CondReg);
- unsigned Size = DstTy.getSizeInBits();
-
- assert(TypeIdx == 0 || CondTy.isVector());
-
- if (TypeIdx == 0) {
- NarrowTy0 = NarrowTy;
- NarrowTy1 = CondTy;
-
- unsigned NarrowSize = NarrowTy0.getSizeInBits();
- // FIXME: Don't know how to handle the situation where the small vectors
- // aren't all the same size yet.
- if (Size % NarrowSize != 0)
- return UnableToLegalize;
-
- NumParts = Size / NarrowSize;
-
- // Need to break down the condition type
- if (CondTy.isVector()) {
- if (CondTy.getNumElements() == NumParts)
- NarrowTy1 = CondTy.getElementType();
- else
- NarrowTy1 =
- LLT::vector(CondTy.getElementCount().divideCoefficientBy(NumParts),
- CondTy.getScalarSizeInBits());
- }
+ // Merge small outputs into MI's def.
+ if (NumLeftovers) {
+ mergeMixedSubvectors(MI.getReg(0), OutputRegs);
} else {
- NumParts = CondTy.getNumElements();
- if (NarrowTy.isVector()) {
- // TODO: Handle uneven breakdown.
- if (NumParts * NarrowTy.getNumElements() != CondTy.getNumElements())
- return UnableToLegalize;
-
- return UnableToLegalize;
- } else {
- NarrowTy0 = DstTy.getElementType();
- NarrowTy1 = NarrowTy;
- }
- }
-
- SmallVector<Register, 2> DstRegs, Src0Regs, Src1Regs, Src2Regs;
- if (CondTy.isVector())
- extractParts(MI.getOperand(1).getReg(), NarrowTy1, NumParts, Src0Regs);
-
- extractParts(MI.getOperand(2).getReg(), NarrowTy0, NumParts, Src1Regs);
- extractParts(MI.getOperand(3).getReg(), NarrowTy0, NumParts, Src2Regs);
-
- for (unsigned i = 0; i < NumParts; ++i) {
- Register DstReg = MRI.createGenericVirtualRegister(NarrowTy0);
- MIRBuilder.buildSelect(DstReg, CondTy.isVector() ? Src0Regs[i] : CondReg,
- Src1Regs[i], Src2Regs[i]);
- DstRegs.push_back(DstReg);
- }
-
- if (NarrowTy0.isVector())
- MIRBuilder.buildConcatVectors(DstReg, DstRegs);
- else
- MIRBuilder.buildBuildVector(DstReg, DstRegs);
-
- MI.eraseFromParent();
- return Legalized;
-}
-
-LegalizerHelper::LegalizeResult
-LegalizerHelper::fewerElementsVectorPhi(MachineInstr &MI, unsigned TypeIdx,
- LLT NarrowTy) {
- const Register DstReg = MI.getOperand(0).getReg();
- LLT PhiTy = MRI.getType(DstReg);
- LLT LeftoverTy;
-
- // All of the operands need to have the same number of elements, so if we can
- // determine a type breakdown for the result type, we can for all of the
- // source types.
- int NumParts, NumLeftover;
- std::tie(NumParts, NumLeftover)
- = getNarrowTypeBreakDown(PhiTy, NarrowTy, LeftoverTy);
- if (NumParts < 0)
- return UnableToLegalize;
-
- SmallVector<Register, 4> DstRegs, LeftoverDstRegs;
- SmallVector<MachineInstrBuilder, 4> NewInsts;
-
- const int TotalNumParts = NumParts + NumLeftover;
-
- // Insert the new phis in the result block first.
- for (int I = 0; I != TotalNumParts; ++I) {
- LLT Ty = I < NumParts ? NarrowTy : LeftoverTy;
- Register PartDstReg = MRI.createGenericVirtualRegister(Ty);
- NewInsts.push_back(MIRBuilder.buildInstr(TargetOpcode::G_PHI)
- .addDef(PartDstReg));
- if (I < NumParts)
- DstRegs.push_back(PartDstReg);
- else
- LeftoverDstRegs.push_back(PartDstReg);
- }
-
- MachineBasicBlock *MBB = MI.getParent();
- MIRBuilder.setInsertPt(*MBB, MBB->getFirstNonPHI());
- insertParts(DstReg, PhiTy, NarrowTy, DstRegs, LeftoverTy, LeftoverDstRegs);
-
- SmallVector<Register, 4> PartRegs, LeftoverRegs;
-
- // Insert code to extract the incoming values in each predecessor block.
- for (unsigned I = 1, E = MI.getNumOperands(); I != E; I += 2) {
- PartRegs.clear();
- LeftoverRegs.clear();
-
- Register SrcReg = MI.getOperand(I).getReg();
- MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB();
- MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator());
-
- LLT Unused;
- if (!extractParts(SrcReg, PhiTy, NarrowTy, Unused, PartRegs,
- LeftoverRegs))
- return UnableToLegalize;
-
- // Add the newly created operand splits to the existing instructions. The
- // odd-sized pieces are ordered after the requested NarrowTyArg sized
- // pieces.
- for (int J = 0; J != TotalNumParts; ++J) {
- MachineInstrBuilder MIB = NewInsts[J];
- MIB.addUse(J < NumParts ? PartRegs[J] : LeftoverRegs[J - NumParts]);
- MIB.addMBB(&OpMBB);
- }
+ MIRBuilder.buildMerge(MI.getReg(0), OutputRegs);
}
MI.eraseFromParent();
@@ -3927,27 +3841,36 @@ LegalizerHelper::LegalizeResult
LegalizerHelper::fewerElementsVectorUnmergeValues(MachineInstr &MI,
unsigned TypeIdx,
LLT NarrowTy) {
- if (TypeIdx != 1)
- return UnableToLegalize;
-
const int NumDst = MI.getNumOperands() - 1;
const Register SrcReg = MI.getOperand(NumDst).getReg();
- LLT SrcTy = MRI.getType(SrcReg);
-
LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
+ LLT SrcTy = MRI.getType(SrcReg);
- // TODO: Create sequence of extracts.
- if (DstTy == NarrowTy)
+ if (TypeIdx != 1 || NarrowTy == DstTy)
return UnableToLegalize;
- LLT GCDTy = getGCDType(SrcTy, NarrowTy);
- if (DstTy == GCDTy) {
- // This would just be a copy of the same unmerge.
- // TODO: Create extracts, pad with undef and create intermediate merges.
+ // Requires compatible types. Otherwise SrcReg should have been defined by
+ // merge-like instruction that would get artifact combined. Most likely
+ // instruction that defines SrcReg has to perform more/fewer elements
+ // legalization compatible with NarrowTy.
+ assert(SrcTy.isVector() && NarrowTy.isVector() && "Expected vector types");
+ assert((SrcTy.getScalarType() == NarrowTy.getScalarType()) && "bad type");
+
+ if ((SrcTy.getSizeInBits() % NarrowTy.getSizeInBits() != 0) ||
+ (NarrowTy.getSizeInBits() % DstTy.getSizeInBits() != 0))
return UnableToLegalize;
- }
- auto Unmerge = MIRBuilder.buildUnmerge(GCDTy, SrcReg);
+ // This is most likely DstTy (smaller then register size) packed in SrcTy
+ // (larger then register size) and since unmerge was not combined it will be
+ // lowered to bit sequence extracts from register. Unpack SrcTy to NarrowTy
+ // (register size) pieces first. Then unpack each of NarrowTy pieces to DstTy.
+
+ // %1:_(DstTy), %2, %3, %4 = G_UNMERGE_VALUES %0:_(SrcTy)
+ //
+ // %5:_(NarrowTy), %6 = G_UNMERGE_VALUES %0:_(SrcTy) - reg sequence
+ // %1:_(DstTy), %2 = G_UNMERGE_VALUES %5:_(NarrowTy) - sequence of bits in reg
+ // %3:_(DstTy), %4 = G_UNMERGE_VALUES %6:_(NarrowTy)
+ auto Unmerge = MIRBuilder.buildUnmerge(NarrowTy, SrcReg);
const int NumUnmerge = Unmerge->getNumOperands() - 1;
const int PartsPerUnmerge = NumDst / NumUnmerge;
@@ -3963,90 +3886,88 @@ LegalizerHelper::fewerElementsVectorUnmergeValues(MachineInstr &MI,
return Legalized;
}
-LegalizerHelper::LegalizeResult
-LegalizerHelper::fewerElementsVectorMulo(MachineInstr &MI, unsigned TypeIdx,
- LLT NarrowTy) {
- Register Result = MI.getOperand(0).getReg();
- Register Overflow = MI.getOperand(1).getReg();
- Register LHS = MI.getOperand(2).getReg();
- Register RHS = MI.getOperand(3).getReg();
-
- LLT SrcTy = MRI.getType(LHS);
- if (!SrcTy.isVector())
- return UnableToLegalize;
-
- LLT ElementType = SrcTy.getElementType();
- LLT OverflowElementTy = MRI.getType(Overflow).getElementType();
- const ElementCount NumResult = SrcTy.getElementCount();
- LLT GCDTy = getGCDType(SrcTy, NarrowTy);
-
- // Unmerge the operands to smaller parts of GCD type.
- auto UnmergeLHS = MIRBuilder.buildUnmerge(GCDTy, LHS);
- auto UnmergeRHS = MIRBuilder.buildUnmerge(GCDTy, RHS);
-
- const int NumOps = UnmergeLHS->getNumOperands() - 1;
- const ElementCount PartsPerUnmerge = NumResult.divideCoefficientBy(NumOps);
- LLT OverflowTy = LLT::scalarOrVector(PartsPerUnmerge, OverflowElementTy);
- LLT ResultTy = LLT::scalarOrVector(PartsPerUnmerge, ElementType);
-
- // Perform the operation over unmerged parts.
- SmallVector<Register, 8> ResultParts;
- SmallVector<Register, 8> OverflowParts;
- for (int I = 0; I != NumOps; ++I) {
- Register Operand1 = UnmergeLHS->getOperand(I).getReg();
- Register Operand2 = UnmergeRHS->getOperand(I).getReg();
- auto PartMul = MIRBuilder.buildInstr(MI.getOpcode(), {ResultTy, OverflowTy},
- {Operand1, Operand2});
- ResultParts.push_back(PartMul->getOperand(0).getReg());
- OverflowParts.push_back(PartMul->getOperand(1).getReg());
- }
-
- LLT ResultLCMTy = buildLCMMergePieces(SrcTy, NarrowTy, GCDTy, ResultParts);
- LLT OverflowLCMTy =
- LLT::scalarOrVector(ResultLCMTy.getElementCount(), OverflowElementTy);
-
- // Recombine the pieces to the original result and overflow registers.
- buildWidenedRemergeToDst(Result, ResultLCMTy, ResultParts);
- buildWidenedRemergeToDst(Overflow, OverflowLCMTy, OverflowParts);
- MI.eraseFromParent();
- return Legalized;
-}
-
-// Handle FewerElementsVector a G_BUILD_VECTOR or G_CONCAT_VECTORS that produces
-// a vector
-//
-// Create a G_BUILD_VECTOR or G_CONCAT_VECTORS of NarrowTy pieces, padding with
-// undef as necessary.
-//
-// %3:_(<3 x s16>) = G_BUILD_VECTOR %0, %1, %2
-// -> <2 x s16>
-//
-// %4:_(s16) = G_IMPLICIT_DEF
-// %5:_(<2 x s16>) = G_BUILD_VECTOR %0, %1
-// %6:_(<2 x s16>) = G_BUILD_VECTOR %2, %4
-// %7:_(<2 x s16>) = G_IMPLICIT_DEF
-// %8:_(<6 x s16>) = G_CONCAT_VECTORS %5, %6, %7
-// %3:_(<3 x s16>), %8:_(<3 x s16>) = G_UNMERGE_VALUES %8
LegalizerHelper::LegalizeResult
LegalizerHelper::fewerElementsVectorMerge(MachineInstr &MI, unsigned TypeIdx,
LLT NarrowTy) {
Register DstReg = MI.getOperand(0).getReg();
LLT DstTy = MRI.getType(DstReg);
LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
- LLT GCDTy = getGCDType(getGCDType(SrcTy, NarrowTy), DstTy);
+ // Requires compatible types. Otherwise user of DstReg did not perform unmerge
+ // that should have been artifact combined. Most likely instruction that uses
+ // DstReg has to do more/fewer elements legalization compatible with NarrowTy.
+ assert(DstTy.isVector() && NarrowTy.isVector() && "Expected vector types");
+ assert((DstTy.getScalarType() == NarrowTy.getScalarType()) && "bad type");
+ if (NarrowTy == SrcTy)
+ return UnableToLegalize;
- // Break into a common type
- SmallVector<Register, 16> Parts;
- for (const MachineOperand &MO : llvm::drop_begin(MI.operands()))
- extractGCDType(Parts, GCDTy, MO.getReg());
+ // This attempts to lower part of LCMTy merge/unmerge sequence. Intended use
+ // is for old mir tests. Since the changes to more/fewer elements it should no
+ // longer be possible to generate MIR like this when starting from llvm-ir
+ // because LCMTy approach was replaced with merge/unmerge to vector elements.
+ if (TypeIdx == 1) {
+ assert(SrcTy.isVector() && "Expected vector types");
+ assert((SrcTy.getScalarType() == NarrowTy.getScalarType()) && "bad type");
+ if ((DstTy.getSizeInBits() % NarrowTy.getSizeInBits() != 0) ||
+ (NarrowTy.getNumElements() >= SrcTy.getNumElements()))
+ return UnableToLegalize;
+ // %2:_(DstTy) = G_CONCAT_VECTORS %0:_(SrcTy), %1:_(SrcTy)
+ //
+ // %3:_(EltTy), %4, %5 = G_UNMERGE_VALUES %0:_(SrcTy)
+ // %6:_(EltTy), %7, %8 = G_UNMERGE_VALUES %1:_(SrcTy)
+ // %9:_(NarrowTy) = G_BUILD_VECTOR %3:_(EltTy), %4
+ // %10:_(NarrowTy) = G_BUILD_VECTOR %5:_(EltTy), %6
+ // %11:_(NarrowTy) = G_BUILD_VECTOR %7:_(EltTy), %8
+ // %2:_(DstTy) = G_CONCAT_VECTORS %9:_(NarrowTy), %10, %11
+
+ SmallVector<Register, 8> Elts;
+ LLT EltTy = MRI.getType(MI.getOperand(1).getReg()).getScalarType();
+ for (unsigned i = 1; i < MI.getNumOperands(); ++i) {
+ auto Unmerge = MIRBuilder.buildUnmerge(EltTy, MI.getOperand(i).getReg());
+ for (unsigned j = 0; j < Unmerge->getNumDefs(); ++j)
+ Elts.push_back(Unmerge.getReg(j));
+ }
- // Build the requested new merge, padding with undef.
- LLT LCMTy = buildLCMMergePieces(DstTy, NarrowTy, GCDTy, Parts,
- TargetOpcode::G_ANYEXT);
+ SmallVector<Register, 8> NarrowTyElts;
+ unsigned NumNarrowTyElts = NarrowTy.getNumElements();
+ unsigned NumNarrowTyPieces = DstTy.getNumElements() / NumNarrowTyElts;
+ for (unsigned i = 0, Offset = 0; i < NumNarrowTyPieces;
+ ++i, Offset += NumNarrowTyElts) {
+ ArrayRef<Register> Pieces(&Elts[Offset], NumNarrowTyElts);
+ NarrowTyElts.push_back(MIRBuilder.buildMerge(NarrowTy, Pieces).getReg(0));
+ }
- // Pack into the original result register.
- buildWidenedRemergeToDst(DstReg, LCMTy, Parts);
+ MIRBuilder.buildMerge(DstReg, NarrowTyElts);
+ MI.eraseFromParent();
+ return Legalized;
+ }
+
+ assert(TypeIdx == 0 && "Bad type index");
+ if ((NarrowTy.getSizeInBits() % SrcTy.getSizeInBits() != 0) ||
+ (DstTy.getSizeInBits() % NarrowTy.getSizeInBits() != 0))
+ return UnableToLegalize;
+ // This is most likely SrcTy (smaller then register size) packed in DstTy
+ // (larger then register size) and since merge was not combined it will be
+ // lowered to bit sequence packing into register. Merge SrcTy to NarrowTy
+ // (register size) pieces first. Then merge each of NarrowTy pieces to DstTy.
+
+ // %0:_(DstTy) = G_MERGE_VALUES %1:_(SrcTy), %2, %3, %4
+ //
+ // %5:_(NarrowTy) = G_MERGE_VALUES %1:_(SrcTy), %2 - sequence of bits in reg
+ // %6:_(NarrowTy) = G_MERGE_VALUES %3:_(SrcTy), %4
+ // %0:_(DstTy) = G_MERGE_VALUES %5:_(NarrowTy), %6 - reg sequence
+ SmallVector<Register, 8> NarrowTyElts;
+ unsigned NumParts = DstTy.getNumElements() / NarrowTy.getNumElements();
+ unsigned NumSrcElts = SrcTy.isVector() ? SrcTy.getNumElements() : 1;
+ unsigned NumElts = NarrowTy.getNumElements() / NumSrcElts;
+ for (unsigned i = 0; i < NumParts; ++i) {
+ SmallVector<Register, 8> Sources;
+ for (unsigned j = 0; j < NumElts; ++j)
+ Sources.push_back(MI.getOperand(1 + i * NumElts + j).getReg());
+ NarrowTyElts.push_back(MIRBuilder.buildMerge(NarrowTy, Sources).getReg(0));
+ }
+
+ MIRBuilder.buildMerge(DstReg, NarrowTyElts);
MI.eraseFromParent();
return Legalized;
}
@@ -4217,164 +4138,15 @@ LegalizerHelper::reduceLoadStoreWidth(GLoadStore &LdStMI, unsigned TypeIdx,
return Legalized;
}
-LegalizerHelper::LegalizeResult
-LegalizerHelper::reduceOperationWidth(MachineInstr &MI, unsigned int TypeIdx,
- LLT NarrowTy) {
- assert(TypeIdx == 0 && "only one type index expected");
-
- const unsigned Opc = MI.getOpcode();
- const int NumDefOps = MI.getNumExplicitDefs();
- const int NumSrcOps = MI.getNumOperands() - NumDefOps;
- const unsigned Flags = MI.getFlags();
- const unsigned NarrowSize = NarrowTy.getSizeInBits();
- const LLT NarrowScalarTy = LLT::scalar(NarrowSize);
-
- assert(MI.getNumOperands() <= 4 && "expected instruction with either 1 "
- "result and 1-3 sources or 2 results and "
- "1-2 sources");
-
- SmallVector<Register, 2> DstRegs;
- for (int I = 0; I < NumDefOps; ++I)
- DstRegs.push_back(MI.getOperand(I).getReg());
-
- // First of all check whether we are narrowing (changing the element type)
- // or reducing the vector elements
- const LLT DstTy = MRI.getType(DstRegs[0]);
- const bool IsNarrow = NarrowTy.getScalarType() != DstTy.getScalarType();
-
- SmallVector<Register, 8> ExtractedRegs[3];
- SmallVector<Register, 8> Parts;
-
- // Break down all the sources into NarrowTy pieces we can operate on. This may
- // involve creating merges to a wider type, padded with undef.
- for (int I = 0; I != NumSrcOps; ++I) {
- Register SrcReg = MI.getOperand(I + NumDefOps).getReg();
- LLT SrcTy = MRI.getType(SrcReg);
-
- // The type to narrow SrcReg to. For narrowing, this is a smaller scalar.
- // For fewerElements, this is a smaller vector with the same element type.
- LLT OpNarrowTy;
- if (IsNarrow) {
- OpNarrowTy = NarrowScalarTy;
-
- // In case of narrowing, we need to cast vectors to scalars for this to
- // work properly
- // FIXME: Can we do without the bitcast here if we're narrowing?
- if (SrcTy.isVector()) {
- SrcTy = LLT::scalar(SrcTy.getSizeInBits());
- SrcReg = MIRBuilder.buildBitcast(SrcTy, SrcReg).getReg(0);
- }
- } else {
- auto NarrowEC = NarrowTy.isVector() ? NarrowTy.getElementCount()
- : ElementCount::getFixed(1);
- OpNarrowTy = LLT::scalarOrVector(NarrowEC, SrcTy.getScalarType());
- }
-
- LLT GCDTy = extractGCDType(ExtractedRegs[I], SrcTy, OpNarrowTy, SrcReg);
-
- // Build a sequence of NarrowTy pieces in ExtractedRegs for this operand.
- buildLCMMergePieces(SrcTy, OpNarrowTy, GCDTy, ExtractedRegs[I],
- TargetOpcode::G_ANYEXT);
- }
-
- SmallVector<Register, 8> ResultRegs[2];
-
- // Input operands for each sub-instruction.
- SmallVector<SrcOp, 4> InputRegs(NumSrcOps, Register());
-
- int NumParts = ExtractedRegs[0].size();
- const unsigned DstSize = DstTy.getSizeInBits();
- const LLT DstScalarTy = LLT::scalar(DstSize);
-
- // Narrowing needs to use scalar types
- LLT DstLCMTy, NarrowDstTy;
- if (IsNarrow) {
- DstLCMTy = getLCMType(DstScalarTy, NarrowScalarTy);
- NarrowDstTy = NarrowScalarTy;
- } else {
- DstLCMTy = getLCMType(DstTy, NarrowTy);
- NarrowDstTy = NarrowTy;
- }
-
- // We widened the source registers to satisfy merge/unmerge size
- // constraints. We'll have some extra fully undef parts.
- const int NumRealParts = (DstSize + NarrowSize - 1) / NarrowSize;
-
- for (int I = 0; I != NumRealParts; ++I) {
- // Emit this instruction on each of the split pieces.
- for (int J = 0; J != NumSrcOps; ++J)
- InputRegs[J] = ExtractedRegs[J][I];
-
- MachineInstrBuilder Inst;
- if (NumDefOps == 1)
- Inst = MIRBuilder.buildInstr(Opc, {NarrowDstTy}, InputRegs, Flags);
- else
- Inst = MIRBuilder.buildInstr(Opc, {NarrowDstTy, NarrowDstTy}, InputRegs,
- Flags);
-
- for (int J = 0; J != NumDefOps; ++J)
- ResultRegs[J].push_back(Inst.getReg(J));
- }
-
- // Fill out the widened result with undef instead of creating instructions
- // with undef inputs.
- int NumUndefParts = NumParts - NumRealParts;
- if (NumUndefParts != 0) {
- Register Undef = MIRBuilder.buildUndef(NarrowDstTy).getReg(0);
- for (int I = 0; I != NumDefOps; ++I)
- ResultRegs[I].append(NumUndefParts, Undef);
- }
-
- // Extract the possibly padded result. Use a scratch register if we need to do
- // a final bitcast, otherwise use the original result register.
- Register MergeDstReg;
- for (int I = 0; I != NumDefOps; ++I) {
- if (IsNarrow && DstTy.isVector())
- MergeDstReg = MRI.createGenericVirtualRegister(DstScalarTy);
- else
- MergeDstReg = DstRegs[I];
-
- buildWidenedRemergeToDst(MergeDstReg, DstLCMTy, ResultRegs[I]);
-
- // Recast to vector if we narrowed a vector
- if (IsNarrow && DstTy.isVector())
- MIRBuilder.buildBitcast(DstRegs[I], MergeDstReg);
- }
-
- MI.eraseFromParent();
- return Legalized;
-}
-
-LegalizerHelper::LegalizeResult
-LegalizerHelper::fewerElementsVectorSextInReg(MachineInstr &MI, unsigned TypeIdx,
- LLT NarrowTy) {
- Register DstReg = MI.getOperand(0).getReg();
- Register SrcReg = MI.getOperand(1).getReg();
- int64_t Imm = MI.getOperand(2).getImm();
-
- LLT DstTy = MRI.getType(DstReg);
-
- SmallVector<Register, 8> Parts;
- LLT GCDTy = extractGCDType(Parts, DstTy, NarrowTy, SrcReg);
- LLT LCMTy = buildLCMMergePieces(DstTy, NarrowTy, GCDTy, Parts);
-
- for (Register &R : Parts)
- R = MIRBuilder.buildSExtInReg(NarrowTy, R, Imm).getReg(0);
-
- buildWidenedRemergeToDst(DstReg, LCMTy, Parts);
-
- MI.eraseFromParent();
- return Legalized;
-}
-
LegalizerHelper::LegalizeResult
LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx,
LLT NarrowTy) {
using namespace TargetOpcode;
+ GenericMachineInstr &GMI = cast<GenericMachineInstr>(MI);
+ unsigned NumElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1;
switch (MI.getOpcode()) {
case G_IMPLICIT_DEF:
- return fewerElementsVectorImplicitDef(MI, TypeIdx, NarrowTy);
case G_TRUNC:
case G_AND:
case G_OR:
@@ -4439,10 +4211,8 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx,
case G_SSUBSAT:
case G_UADDSAT:
case G_USUBSAT:
- return reduceOperationWidth(MI, TypeIdx, NarrowTy);
case G_UMULO:
case G_SMULO:
- return fewerElementsVectorMulo(MI, TypeIdx, NarrowTy);
case G_SHL:
case G_LSHR:
case G_ASHR:
@@ -4454,7 +4224,6 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx,
case G_CTTZ_ZERO_UNDEF:
case G_CTPOP:
case G_FCOPYSIGN:
- return fewerElementsVectorMultiEltType(MI, TypeIdx, NarrowTy);
case G_ZEXT:
case G_SEXT:
case G_ANYEXT:
@@ -4467,14 +4236,16 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx,
case G_INTTOPTR:
case G_PTRTOINT:
case G_ADDRSPACE_CAST:
- return fewerElementsVectorCasts(MI, TypeIdx, NarrowTy);
+ return fewerElementsVectorMultiEltType(GMI, NumElts);
case G_ICMP:
case G_FCMP:
- return fewerElementsVectorCmp(MI, TypeIdx, NarrowTy);
+ return fewerElementsVectorMultiEltType(GMI, NumElts, {1 /*cpm predicate*/});
case G_SELECT:
- return fewerElementsVectorSelect(MI, TypeIdx, NarrowTy);
+ if (MRI.getType(MI.getOperand(1).getReg()).isVector())
+ return fewerElementsVectorMultiEltType(GMI, NumElts);
+ return fewerElementsVectorMultiEltType(GMI, NumElts, {1 /*scalar cond*/});
case G_PHI:
- return fewerElementsVectorPhi(MI, TypeIdx, NarrowTy);
+ return fewerElementsVectorPhi(GMI, NumElts);
case G_UNMERGE_VALUES:
return fewerElementsVectorUnmergeValues(MI, TypeIdx, NarrowTy);
case G_BUILD_VECTOR:
@@ -4491,7 +4262,7 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx,
case G_STORE:
return reduceLoadStoreWidth(cast<GLoadStore>(MI), TypeIdx, NarrowTy);
case G_SEXT_INREG:
- return fewerElementsVectorSextInReg(MI, TypeIdx, NarrowTy);
+ return fewerElementsVectorMultiEltType(GMI, NumElts, {2 /*imm*/});
GISEL_VECREDUCE_CASES_NONSEQ
return fewerElementsVectorReductions(MI, TypeIdx, NarrowTy);
case G_SHUFFLE_VECTOR:
@@ -5053,6 +4824,15 @@ LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx,
case TargetOpcode::G_AND:
case TargetOpcode::G_OR:
case TargetOpcode::G_XOR:
+ case TargetOpcode::G_ADD:
+ case TargetOpcode::G_SUB:
+ case TargetOpcode::G_MUL:
+ case TargetOpcode::G_FADD:
+ case TargetOpcode::G_FMUL:
+ case TargetOpcode::G_UADDSAT:
+ case TargetOpcode::G_USUBSAT:
+ case TargetOpcode::G_SADDSAT:
+ case TargetOpcode::G_SSUBSAT:
case TargetOpcode::G_SMIN:
case TargetOpcode::G_SMAX:
case TargetOpcode::G_UMIN:
@@ -5070,6 +4850,17 @@ LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx,
Observer.changedInstr(MI);
return Legalized;
}
+ case TargetOpcode::G_FMA:
+ case TargetOpcode::G_FSHR:
+ case TargetOpcode::G_FSHL: {
+ Observer.changingInstr(MI);
+ moreElementsVectorSrc(MI, MoreTy, 1);
+ moreElementsVectorSrc(MI, MoreTy, 2);
+ moreElementsVectorSrc(MI, MoreTy, 3);
+ moreElementsVectorDst(MI, MoreTy, 0);
+ Observer.changedInstr(MI);
+ return Legalized;
+ }
case TargetOpcode::G_EXTRACT:
if (TypeIdx != 1)
return UnableToLegalize;
@@ -5079,6 +4870,11 @@ LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx,
return Legalized;
case TargetOpcode::G_INSERT:
case TargetOpcode::G_FREEZE:
+ case TargetOpcode::G_FNEG:
+ case TargetOpcode::G_FABS:
+ case TargetOpcode::G_BSWAP:
+ case TargetOpcode::G_FCANONICALIZE:
+ case TargetOpcode::G_SEXT_INREG:
if (TypeIdx != 0)
return UnableToLegalize;
Observer.changingInstr(MI);
@@ -5098,30 +4894,34 @@ LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx,
moreElementsVectorDst(MI, MoreTy, 0);
Observer.changedInstr(MI);
return Legalized;
- case TargetOpcode::G_UNMERGE_VALUES: {
- if (TypeIdx != 1)
- return UnableToLegalize;
-
- LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
- int NumDst = MI.getNumOperands() - 1;
- moreElementsVectorSrc(MI, MoreTy, NumDst);
-
- auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES);
- for (int I = 0; I != NumDst; ++I)
- MIB.addDef(MI.getOperand(I).getReg());
+ case TargetOpcode::G_UNMERGE_VALUES:
+ return UnableToLegalize;
+ case TargetOpcode::G_PHI:
+ return moreElementsVectorPhi(MI, TypeIdx, MoreTy);
+ case TargetOpcode::G_SHUFFLE_VECTOR:
+ return moreElementsVectorShuffle(MI, TypeIdx, MoreTy);
+ case TargetOpcode::G_BUILD_VECTOR: {
+ SmallVector<SrcOp, 8> Elts;
+ for (auto Op : MI.uses()) {
+ Elts.push_back(Op.getReg());
+ }
- int NewNumDst = MoreTy.getSizeInBits() / DstTy.getSizeInBits();
- for (int I = NumDst; I != NewNumDst; ++I)
- MIB.addDef(MRI.createGenericVirtualRegister(DstTy));
+ for (unsigned i = Elts.size(); i < MoreTy.getNumElements(); ++i) {
+ Elts.push_back(MIRBuilder.buildUndef(MoreTy.getScalarType()));
+ }
- MIB.addUse(MI.getOperand(NumDst).getReg());
+ MIRBuilder.buildDeleteTrailingVectorElements(
+ MI.getOperand(0).getReg(), MIRBuilder.buildInstr(Opc, {MoreTy}, Elts));
MI.eraseFromParent();
return Legalized;
}
- case TargetOpcode::G_PHI:
- return moreElementsVectorPhi(MI, TypeIdx, MoreTy);
- case TargetOpcode::G_SHUFFLE_VECTOR:
- return moreElementsVectorShuffle(MI, TypeIdx, MoreTy);
+ case TargetOpcode::G_TRUNC: {
+ Observer.changingInstr(MI);
+ moreElementsVectorSrc(MI, MoreTy, 1);
+ moreElementsVectorDst(MI, MoreTy, 0);
+ Observer.changedInstr(MI);
+ return Legalized;
+ }
default:
return UnableToLegalize;
}
@@ -6778,6 +6578,24 @@ LegalizerHelper::lowerExtractInsertVectorElt(MachineInstr &MI) {
LLT VecTy = MRI.getType(SrcVec);
LLT EltTy = VecTy.getElementType();
+ unsigned NumElts = VecTy.getNumElements();
+
+ int64_t IdxVal;
+ if (mi_match(Idx, MRI, m_ICst(IdxVal)) && IdxVal <= NumElts) {
+ SmallVector<Register, 8> SrcRegs;
+ extractParts(SrcVec, EltTy, NumElts, SrcRegs);
+
+ if (InsertVal) {
+ SrcRegs[IdxVal] = MI.getOperand(2).getReg();
+ MIRBuilder.buildMerge(DstReg, SrcRegs);
+ } else {
+ MIRBuilder.buildCopy(DstReg, SrcRegs[IdxVal]);
+ }
+
+ MI.eraseFromParent();
+ return Legalized;
+ }
+
if (!EltTy.isByteSized()) { // Not implemented.
LLVM_DEBUG(dbgs() << "Can't handle non-byte element vectors yet\n");
return UnableToLegalize;
@@ -6796,7 +6614,6 @@ LegalizerHelper::lowerExtractInsertVectorElt(MachineInstr &MI) {
// if the index is out of bounds.
Register EltPtr = getVectorElementPointer(StackTemp.getReg(0), VecTy, Idx);
- int64_t IdxVal;
if (mi_match(Idx, MRI, m_ICst(IdxVal))) {
int64_t Offset = IdxVal * EltBytes;
PtrInfo = PtrInfo.getWithOffset(Offset);
@@ -6923,6 +6740,32 @@ LegalizerHelper::lowerExtract(MachineInstr &MI) {
LLT DstTy = MRI.getType(Dst);
LLT SrcTy = MRI.getType(Src);
+ // Extract sub-vector or one element
+ if (SrcTy.isVector()) {
+ unsigned SrcEltSize = SrcTy.getElementType().getSizeInBits();
+ unsigned DstSize = DstTy.getSizeInBits();
+
+ if ((Offset % SrcEltSize == 0) && (DstSize % SrcEltSize == 0) &&
+ (Offset + DstSize <= SrcTy.getSizeInBits())) {
+ // Unmerge and allow access to each Src element for the artifact combiner.
+ auto Unmerge = MIRBuilder.buildUnmerge(SrcTy.getElementType(), Src);
+
+ // Take element(s) we need to extract and copy it (merge them).
+ SmallVector<Register, 8> SubVectorElts;
+ for (unsigned Idx = Offset / SrcEltSize;
+ Idx < (Offset + DstSize) / SrcEltSize; ++Idx) {
+ SubVectorElts.push_back(Unmerge.getReg(Idx));
+ }
+ if (SubVectorElts.size() == 1)
+ MIRBuilder.buildCopy(Dst, SubVectorElts[0]);
+ else
+ MIRBuilder.buildMerge(Dst, SubVectorElts);
+
+ MI.eraseFromParent();
+ return Legalized;
+ }
+ }
+
if (DstTy.isScalar() &&
(SrcTy.isScalar() ||
(SrcTy.isVector() && DstTy == SrcTy.getElementType()))) {
@@ -6956,6 +6799,45 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerInsert(MachineInstr &MI) {
LLT DstTy = MRI.getType(Src);
LLT InsertTy = MRI.getType(InsertSrc);
+ // Insert sub-vector or one element
+ if (DstTy.isVector() && !InsertTy.isPointer()) {
+ LLT EltTy = DstTy.getElementType();
+ unsigned EltSize = EltTy.getSizeInBits();
+ unsigned InsertSize = InsertTy.getSizeInBits();
+
+ if ((Offset % EltSize == 0) && (InsertSize % EltSize == 0) &&
+ (Offset + InsertSize <= DstTy.getSizeInBits())) {
+ auto UnmergeSrc = MIRBuilder.buildUnmerge(EltTy, Src);
+ SmallVector<Register, 8> DstElts;
+ unsigned Idx = 0;
+ // Elements from Src before insert start Offset
+ for (; Idx < Offset / EltSize; ++Idx) {
+ DstElts.push_back(UnmergeSrc.getReg(Idx));
+ }
+
+ // Replace elements in Src with elements from InsertSrc
+ if (InsertTy.getSizeInBits() > EltSize) {
+ auto UnmergeInsertSrc = MIRBuilder.buildUnmerge(EltTy, InsertSrc);
+ for (unsigned i = 0; Idx < (Offset + InsertSize) / EltSize;
+ ++Idx, ++i) {
+ DstElts.push_back(UnmergeInsertSrc.getReg(i));
+ }
+ } else {
+ DstElts.push_back(InsertSrc);
+ ++Idx;
+ }
+
+ // Remaining elements from Src after insert
+ for (; Idx < DstTy.getNumElements(); ++Idx) {
+ DstElts.push_back(UnmergeSrc.getReg(Idx));
+ }
+
+ MIRBuilder.buildMerge(Dst, DstElts);
+ MI.eraseFromParent();
+ return Legalized;
+ }
+ }
+
if (InsertTy.isVector() ||
(DstTy.isVector() && DstTy.getElementType() != InsertTy))
return UnableToLegalize;
diff --git a/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp b/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
index fb5ed35c1f726..391251886fbb2 100644
--- a/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
@@ -215,6 +215,48 @@ MachineInstrBuilder MachineIRBuilder::buildMaskLowPtrBits(const DstOp &Res,
return buildPtrMask(Res, Op0, MaskReg);
}
+MachineInstrBuilder
+MachineIRBuilder::buildPadVectorWithUndefElements(const DstOp &Res,
+ const SrcOp &Op0) {
+ LLT ResTy = Res.getLLTTy(*getMRI());
+ LLT Op0Ty = Op0.getLLTTy(*getMRI());
+
+ assert((ResTy.isVector() && Op0Ty.isVector()) && "Non vector type");
+ assert((ResTy.getElementType() == Op0Ty.getElementType()) &&
+ "Different vector element types");
+ assert((ResTy.getNumElements() > Op0Ty.getNumElements()) &&
+ "Op0 has more elements");
+
+ auto Unmerge = buildUnmerge(Op0Ty.getElementType(), Op0);
+ SmallVector<Register, 8> Regs;
+ for (auto Op : Unmerge.getInstr()->defs())
+ Regs.push_back(Op.getReg());
+ Register Undef = buildUndef(Op0Ty.getElementType()).getReg(0);
+ unsigned NumberOfPadElts = ResTy.getNumElements() - Regs.size();
+ for (unsigned i = 0; i < NumberOfPadElts; ++i)
+ Regs.push_back(Undef);
+ return buildMerge(Res, Regs);
+}
+
+MachineInstrBuilder
+MachineIRBuilder::buildDeleteTrailingVectorElements(const DstOp &Res,
+ const SrcOp &Op0) {
+ LLT ResTy = Res.getLLTTy(*getMRI());
+ LLT Op0Ty = Op0.getLLTTy(*getMRI());
+
+ assert((ResTy.isVector() && Op0Ty.isVector()) && "Non vector type");
+ assert((ResTy.getElementType() == Op0Ty.getElementType()) &&
+ "Different vector element types");
+ assert((ResTy.getNumElements() < Op0Ty.getNumElements()) &&
+ "Op0 has fewer elements");
+
+ SmallVector<Register, 8> Regs;
+ auto Unmerge = buildUnmerge(Op0Ty.getElementType(), Op0);
+ for (unsigned i = 0; i < ResTy.getNumElements(); ++i)
+ Regs.push_back(Unmerge.getReg(i));
+ return buildMerge(Res, Regs);
+}
+
MachineInstrBuilder MachineIRBuilder::buildBr(MachineBasicBlock &Dest) {
return buildInstr(TargetOpcode::G_BR).addMBB(&Dest);
}
@@ -613,10 +655,8 @@ MachineInstrBuilder MachineIRBuilder::buildUnmerge(ArrayRef<LLT> Res,
MachineInstrBuilder MachineIRBuilder::buildUnmerge(LLT Res,
const SrcOp &Op) {
unsigned NumReg = Op.getLLTTy(*getMRI()).getSizeInBits() / Res.getSizeInBits();
- SmallVector<Register, 8> TmpVec;
- for (unsigned I = 0; I != NumReg; ++I)
- TmpVec.push_back(getMRI()->createGenericVirtualRegister(Res));
- return buildUnmerge(TmpVec, Op);
+ SmallVector<DstOp, 8> TmpVec(NumReg, Res);
+ return buildInstr(TargetOpcode::G_UNMERGE_VALUES, TmpVec, Op);
}
MachineInstrBuilder MachineIRBuilder::buildUnmerge(ArrayRef<Register> Res,
diff --git a/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/llvm/lib/CodeGen/GlobalISel/Utils.cpp
index 00ee2988d17fe..4981a537dc7c0 100644
--- a/llvm/lib/CodeGen/GlobalISel/Utils.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/Utils.cpp
@@ -923,6 +923,21 @@ LLT llvm::getLCMType(LLT OrigTy, LLT TargetTy) {
return LLT::scalar(LCMSize);
}
+LLT llvm::getCoverTy(LLT OrigTy, LLT TargetTy) {
+ if (!OrigTy.isVector() || !TargetTy.isVector() || OrigTy == TargetTy ||
+ (OrigTy.getScalarSizeInBits() != TargetTy.getScalarSizeInBits()))
+ return getLCMType(OrigTy, TargetTy);
+
+ unsigned OrigTyNumElts = OrigTy.getNumElements();
+ unsigned TargetTyNumElts = TargetTy.getNumElements();
+ if (OrigTyNumElts % TargetTyNumElts == 0)
+ return OrigTy;
+
+ unsigned NumElts = alignTo(OrigTyNumElts, TargetTyNumElts);
+ return LLT::scalarOrVector(ElementCount::getFixed(NumElts),
+ OrigTy.getElementType());
+}
+
LLT llvm::getGCDType(LLT OrigTy, LLT TargetTy) {
const unsigned OrigSize = OrigTy.getSizeInBits();
const unsigned TargetSize = TargetTy.getSizeInBits();
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index 7fd1aa3044fee..5046daaed9777 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -533,7 +533,7 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
getActionDefinitionsBuilder({G_ADD, G_SUB, G_MUL})
.legalFor({S32, S16, V2S16})
.minScalar(0, S16)
- .clampMaxNumElements(0, S16, 2)
+ .clampMaxNumElementsStrict(0, S16, 2)
.widenScalarToNextMultipleOf(0, 32)
.maxScalar(0, S32)
.scalarize(0);
@@ -541,7 +541,7 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
getActionDefinitionsBuilder({G_UADDSAT, G_USUBSAT, G_SADDSAT, G_SSUBSAT})
.legalFor({S32, S16, V2S16}) // Clamp modifier
.minScalarOrElt(0, S16)
- .clampMaxNumElements(0, S16, 2)
+ .clampMaxNumElementsStrict(0, S16, 2)
.scalarize(0)
.widenScalarToNextPow2(0, 32)
.lower();
@@ -712,7 +712,7 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
}
if (ST.hasVOP3PInsts())
- FPOpActions.clampMaxNumElements(0, S16, 2);
+ FPOpActions.clampMaxNumElementsStrict(0, S16, 2);
FPOpActions
.scalarize(0)
@@ -728,7 +728,7 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
getActionDefinitionsBuilder({G_FNEG, G_FABS})
.legalFor(FPTypesPK16)
- .clampMaxNumElements(0, S16, 2)
+ .clampMaxNumElementsStrict(0, S16, 2)
.scalarize(0)
.clampScalar(0, S16, S64);
@@ -965,7 +965,7 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
if (ST.has16BitInsts()) {
getActionDefinitionsBuilder(G_BSWAP)
.legalFor({S16, S32, V2S16})
- .clampMaxNumElements(0, S16, 2)
+ .clampMaxNumElementsStrict(0, S16, 2)
// FIXME: Fixing non-power-of-2 before clamp is workaround for
// narrowScalar limitation.
.widenScalarToNextPow2(0)
@@ -1425,6 +1425,13 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
// FIXME: Doesn't handle extract of illegal sizes.
getActionDefinitionsBuilder(Op)
.lowerIf(all(typeIs(LitTyIdx, S16), sizeIs(BigTyIdx, 32)))
+ .lowerIf([=](const LegalityQuery &Query) {
+ // Sub-vector(or single element) insert and extract.
+ // TODO: verify immediate offset here since lower only works with
+ // whole elements.
+ const LLT BigTy = Query.Types[BigTyIdx];
+ return BigTy.isVector();
+ })
// FIXME: Multiples of 16 should not be legal.
.legalIf([=](const LegalityQuery &Query) {
const LLT BigTy = Query.Types[BigTyIdx];
@@ -1583,7 +1590,7 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
// Prefer to reduce vector widths for 16-bit vectors before lowering, to
// get more vector shift opportunities, since we'll get those when
// expanded.
- .fewerElementsIf(elementTypeIs(0, S16), changeTo(0, V2S16));
+ .clampMaxNumElementsStrict(0, S16, 2);
} else if (ST.has16BitInsts()) {
SextInReg.lowerFor({{S32}, {S64}, {S16}});
} else {
@@ -1605,14 +1612,14 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
getActionDefinitionsBuilder(G_FSHR)
.legalFor({{S32, S32}})
.lowerFor({{V2S16, V2S16}})
- .fewerElementsIf(elementTypeIs(0, S16), changeTo(0, V2S16))
+ .clampMaxNumElementsStrict(0, S16, 2)
.scalarize(0)
.lower();
if (ST.hasVOP3PInsts()) {
getActionDefinitionsBuilder(G_FSHL)
.lowerFor({{V2S16, V2S16}})
- .fewerElementsIf(elementTypeIs(0, S16), changeTo(0, V2S16))
+ .clampMaxNumElementsStrict(0, S16, 2)
.scalarize(0)
.lower();
} else {
@@ -2535,10 +2542,8 @@ bool AMDGPULegalizerInfo::legalizeLoad(LegalizerHelper &Helper,
} else {
// For cases where the widened type isn't a nice register value, unmerge
// from a widened register (e.g. <3 x s16> -> <4 x s16>)
- B.setInsertPt(B.getMBB(), ++B.getInsertPt());
- WideLoad = Helper.widenWithUnmerge(WideTy, ValReg);
- B.setInsertPt(B.getMBB(), MI.getIterator());
- B.buildLoadFromOffset(WideLoad, PtrReg, *MMO, 0);
+ WideLoad = B.buildLoadFromOffset(WideTy, PtrReg, *MMO, 0).getReg(0);
+ B.buildDeleteTrailingVectorElements(ValReg, WideLoad);
}
}
@@ -3811,6 +3816,10 @@ Register AMDGPULegalizerInfo::handleD16VData(MachineIRBuilder &B,
llvm_unreachable("invalid data type");
}
+ if (StoreVT == LLT::fixed_vector(3, S16)) {
+ Reg = B.buildPadVectorWithUndefElements(LLT::fixed_vector(4, S16), Reg)
+ .getReg(0);
+ }
return Reg;
}
@@ -4653,9 +4662,23 @@ bool AMDGPULegalizerInfo::legalizeImageIntrinsic(
// Deal with the one annoying legal case.
const LLT V3S16 = LLT::fixed_vector(3, 16);
if (Ty == V3S16) {
- padWithUndef(ResTy, RegsToCover - ResultRegs.size() + 1);
- auto Concat = B.buildConcatVectors(LLT::fixed_vector(6, 16), ResultRegs);
- B.buildUnmerge({DstReg, MRI->createGenericVirtualRegister(V3S16)}, Concat);
+ if (IsTFE) {
+ if (ResultRegs.size() == 1) {
+ NewResultReg = ResultRegs[0];
+ } else if (ResultRegs.size() == 2) {
+ LLT V4S16 = LLT::fixed_vector(4, 16);
+ NewResultReg = B.buildConcatVectors(V4S16, ResultRegs).getReg(0);
+ } else {
+ return false;
+ }
+ }
+
+ if (MRI->getType(DstReg).getNumElements() <
+ MRI->getType(NewResultReg).getNumElements()) {
+ B.buildDeleteTrailingVectorElements(DstReg, NewResultReg);
+ } else {
+ B.buildPadVectorWithUndefElements(DstReg, NewResultReg);
+ }
return true;
}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
index 2bb77d9de0b27..c60012bcfe2e8 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
@@ -1162,18 +1162,25 @@ bool AMDGPURegisterBankInfo::applyMappingLoad(MachineInstr &MI,
// 96-bit loads are only available for vector loads. We need to split this
// into a 64-bit part, and 32 (unless we can widen to a 128-bit load).
if (MMO->getAlign() < Align(16)) {
+ MachineFunction *MF = MI.getParent()->getParent();
+ ApplyRegBankMapping ApplyBank(*this, MRI, DstBank);
+ MachineIRBuilder B(MI, ApplyBank);
+ LegalizerHelper Helper(*MF, ApplyBank, B);
LLT Part64, Part32;
std::tie(Part64, Part32) = splitUnequalType(LoadTy, 64);
- auto Load0 = B.buildLoadFromOffset(Part64, PtrReg, *MMO, 0);
- auto Load1 = B.buildLoadFromOffset(Part32, PtrReg, *MMO, 8);
-
- auto Undef = B.buildUndef(LoadTy);
- auto Ins0 = B.buildInsert(LoadTy, Undef, Load0, 0);
- B.buildInsert(MI.getOperand(0), Ins0, Load1, 64);
+ if (Helper.reduceLoadStoreWidth(cast<GAnyLoad>(MI), 0, Part64) !=
+ LegalizerHelper::Legalized)
+ return false;
+ return true;
} else {
LLT WiderTy = widen96To128(LoadTy);
auto WideLoad = B.buildLoadFromOffset(WiderTy, PtrReg, *MMO, 0);
- B.buildExtract(MI.getOperand(0), WideLoad, 0);
+ if (WiderTy.isScalar())
+ B.buildTrunc(MI.getOperand(0), WideLoad);
+ else {
+ B.buildDeleteTrailingVectorElements(MI.getOperand(0).getReg(),
+ WideLoad);
+ }
}
}
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll
index 2da155797fa9e..13a6fe72c2f0c 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll
@@ -12,14 +12,6 @@ target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
target triple = "aarch64--"
; BIG-ENDIAN: unable to translate in big endian mode
-; FALLBACK-WITH-REPORT-ERR: remark: <unknown>:0:0: unable to legalize instruction: %{{[0-9]+}}:_(<28 x s32>) = G_CONCAT_VECTORS %{{[0-9]+}}:_(<4 x s32>), %{{[0-9]+}}:_(<4 x s32>), %{{[0-9]+}}:_(<4 x s32>), %{{[0-9]+}}:_(<4 x s32>), %{{[0-9]+}}:_(<4 x s32>), %{{[0-9]+}}:_(<4 x s32>), %{{[0-9]+}}:_(<4 x s32>) (in function: odd_vector)
-; FALLBACK-WITH-REPORT-ERR: warning: Instruction selection used fallback path for odd_vector
-; FALLBACK-WITH-REPORT-OUT-LABEL: odd_vector:
-define void @odd_vector(<7 x i32>* %addr) {
- %vec = load <7 x i32>, <7 x i32>* %addr
- store <7 x i32> %vec, <7 x i32>* %addr
- ret void
-}
; Make sure we don't mess up metadata arguments.
declare void @llvm.write_register.i64(metadata, i64)
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/call-lowering-vectors.ll b/llvm/test/CodeGen/AArch64/GlobalISel/call-lowering-vectors.ll
index 988de92eeacb9..6c2d5e6967bae 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/call-lowering-vectors.ll
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/call-lowering-vectors.ll
@@ -58,9 +58,8 @@ define void @test_return_v3f32() {
; CHECK-NEXT: BL @bar, csr_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $s0, implicit-def $q0
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0
; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY]](<2 x s64>)
- ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s32>) = G_CONCAT_VECTORS [[BITCAST]](<4 x s32>), [[DEF1]](<4 x s32>), [[DEF1]](<4 x s32>)
- ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<3 x s32>), [[UV1:%[0-9]+]]:_(<3 x s32>), [[UV2:%[0-9]+]]:_(<3 x s32>), [[UV3:%[0-9]+]]:_(<3 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s32>)
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<4 x s32>)
+ ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32)
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp
; CHECK-NEXT: RET_ReallyLR
%call = call <3 x float> @bar(float undef)
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-saddsat.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-saddsat.mir
index 860b5f75b1f83..cfce6fd390227 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-saddsat.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-saddsat.mir
@@ -229,11 +229,10 @@ body: |
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s8) = G_IMPLICIT_DEF
; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; CHECK-NEXT: [[UV16:%[0-9]+]]:_(s8), [[UV17:%[0-9]+]]:_(s8), [[UV18:%[0-9]+]]:_(s8), [[UV19:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[DEF1]](s32)
- ; CHECK-NEXT: [[UV20:%[0-9]+]]:_(s8), [[UV21:%[0-9]+]]:_(s8), [[UV22:%[0-9]+]]:_(s8), [[UV23:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[DEF1]](s32)
; CHECK-NEXT: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[UV4]](s8), [[UV5]](s8), [[UV6]](s8), [[UV7]](s8)
; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[UV8]](s8), [[UV9]](s8), [[UV10]](s8), [[UV11]](s8)
; CHECK-NEXT: [[MV2:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[UV12]](s8), [[UV13]](s8), [[UV14]](s8), [[DEF]](s8)
- ; CHECK-NEXT: [[MV3:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[UV16]](s8), [[UV17]](s8), [[UV18]](s8), [[UV20]](s8)
+ ; CHECK-NEXT: [[MV3:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[UV16]](s8), [[UV17]](s8), [[UV18]](s8), [[UV16]](s8)
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 40
; CHECK-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[MV]](s32), [[MV1]](s32)
; CHECK-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[MV2]](s32), [[MV3]](s32)
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-shuffle-vector.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-shuffle-vector.mir
index 6ea81e73fc7e2..bf3619c4210ff 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-shuffle-vector.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-shuffle-vector.mir
@@ -244,35 +244,37 @@ body: |
; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s64) = COPY $d3
; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s64) = COPY $d4
; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s64) = COPY $d5
- ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[COPY2]](s64), [[COPY3]](s64)
- ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[COPY4]](s64), [[COPY5]](s64)
- ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[COPY]](s64), [[COPY1]](s64)
- ; CHECK-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[COPY]](s64), [[COPY1]](s64)
; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s64) = COPY $d6
; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s64) = COPY $d7
; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.2
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load (s64) from %fixed-stack.2, align 16)
; CHECK-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.3
; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[FRAME_INDEX1]](p0) :: (invariant load (s64) from %fixed-stack.3)
+ ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p0) = COPY $x0
+ ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[COPY2]](s64), [[COPY3]](s64)
+ ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[COPY4]](s64), [[COPY5]](s64)
+ ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[COPY]](s64), [[COPY1]](s64)
+ ; CHECK-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[COPY]](s64), [[COPY1]](s64)
; CHECK-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[COPY6]](s64), [[COPY7]](s64)
; CHECK-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[LOAD]](s64), [[LOAD1]](s64)
- ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p0) = COPY $x0
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR]](<2 x s64>), [[C]](s64)
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
; CHECK-NEXT: [[EVEC1:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR1]](<2 x s64>), [[C1]](s64)
; CHECK-NEXT: [[EVEC2:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR4]](<2 x s64>), [[C]](s64)
; CHECK-NEXT: [[EVEC3:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR2]](<2 x s64>), [[C1]](s64)
+ ; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<2 x s64>) = G_SHUFFLE_VECTOR [[BUILD_VECTOR3]](<2 x s64>), [[BUILD_VECTOR5]], shufflemask(1, 3)
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[SHUF]](<2 x s64>)
; CHECK-NEXT: [[BUILD_VECTOR6:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[EVEC]](s64), [[EVEC1]](s64)
; CHECK-NEXT: [[BUILD_VECTOR7:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[EVEC2]](s64), [[EVEC3]](s64)
- ; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<2 x s64>) = G_SHUFFLE_VECTOR [[BUILD_VECTOR3]](<2 x s64>), [[BUILD_VECTOR5]], shufflemask(1, 3)
+ ; CHECK-NEXT: [[BUILD_VECTOR8:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64)
; CHECK-NEXT: G_STORE [[BUILD_VECTOR6]](<2 x s64>), [[COPY8]](p0) :: (store (<2 x s64>), align 64)
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY8]], [[C2]](s64)
; CHECK-NEXT: G_STORE [[BUILD_VECTOR7]](<2 x s64>), [[PTR_ADD]](p0) :: (store (<2 x s64>) into unknown-address + 16)
; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY8]], [[C3]](s64)
- ; CHECK-NEXT: G_STORE [[SHUF]](<2 x s64>), [[PTR_ADD1]](p0) :: (store (<2 x s64>) into unknown-address + 32, align 32)
+ ; CHECK-NEXT: G_STORE [[BUILD_VECTOR8]](<2 x s64>), [[PTR_ADD1]](p0) :: (store (<2 x s64>) into unknown-address + 32, align 32)
; CHECK-NEXT: RET_ReallyLR
%3:_(s64) = COPY $d0
%4:_(s64) = COPY $d1
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-ssubsat.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-ssubsat.mir
index 6c5c2b783a492..10d365f993b2b 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-ssubsat.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-ssubsat.mir
@@ -229,11 +229,10 @@ body: |
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s8) = G_IMPLICIT_DEF
; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; CHECK-NEXT: [[UV16:%[0-9]+]]:_(s8), [[UV17:%[0-9]+]]:_(s8), [[UV18:%[0-9]+]]:_(s8), [[UV19:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[DEF1]](s32)
- ; CHECK-NEXT: [[UV20:%[0-9]+]]:_(s8), [[UV21:%[0-9]+]]:_(s8), [[UV22:%[0-9]+]]:_(s8), [[UV23:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[DEF1]](s32)
; CHECK-NEXT: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[UV4]](s8), [[UV5]](s8), [[UV6]](s8), [[UV7]](s8)
; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[UV8]](s8), [[UV9]](s8), [[UV10]](s8), [[UV11]](s8)
; CHECK-NEXT: [[MV2:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[UV12]](s8), [[UV13]](s8), [[UV14]](s8), [[DEF]](s8)
- ; CHECK-NEXT: [[MV3:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[UV16]](s8), [[UV17]](s8), [[UV18]](s8), [[UV20]](s8)
+ ; CHECK-NEXT: [[MV3:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[UV16]](s8), [[UV17]](s8), [[UV18]](s8), [[UV16]](s8)
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 40
; CHECK-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[MV]](s32), [[MV1]](s32)
; CHECK-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[MV2]](s32), [[MV3]](s32)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-extract.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-extract.mir
index b7ec21b576af4..336521199709e 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-extract.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-extract.mir
@@ -326,8 +326,13 @@ body: |
; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C]](s64), [[C1]](s64)
- ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s64) = G_EXTRACT [[BUILD_VECTOR]](<2 x s64>), 32
- ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[EXTRACT]](s64)
+ ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<2 x s64>)
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[BITCAST]](s128)
+ ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
+ ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV]], [[C2]](s32)
+ ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV1]], [[C2]](s32)
+ ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[LSHR]], [[SHL]]
+ ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[OR]](s64)
%0:_(s64) = G_CONSTANT i64 0
%1:_(s64) = G_CONSTANT i64 1
%2:_(<2 x s64>) = G_BUILD_VECTOR %0, %1
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-zext.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-zext.mir
index 71eee774acbe7..b0fb33b340611 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-zext.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-zext.mir
@@ -74,17 +74,13 @@ body: |
; CHECK: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>)
- ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32)
- ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV2]](s32), [[DEF]](s32)
- ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32)
- ; CHECK-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[C]](s32), [[DEF]](s32)
- ; CHECK-NEXT: [[AND:%[0-9]+]]:_(<2 x s32>) = G_AND [[BUILD_VECTOR]], [[BUILD_VECTOR2]]
- ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(<2 x s32>) = G_AND [[BUILD_VECTOR1]], [[BUILD_VECTOR3]]
- ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s32>) = G_CONCAT_VECTORS [[AND]](<2 x s32>), [[AND1]](<2 x s32>), [[DEF1]](<2 x s32>)
- ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(<3 x s32>), [[UV4:%[0-9]+]]:_(<3 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s32>)
- ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[UV3]](<3 x s32>)
+ ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32)
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:_(<2 x s32>) = G_AND [[BUILD_VECTOR]], [[BUILD_VECTOR1]]
+ ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C]]
+ ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND]](<2 x s32>)
+ ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV3]](s32), [[UV4]](s32), [[AND1]](s32)
+ ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR2]](<3 x s32>)
%0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2
%1:_(<3 x s16>) = G_TRUNC %0
%2:_(<3 x s32>) = G_ZEXT %1
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/bug-legalization-artifact-combiner-dead-def.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/bug-legalization-artifact-combiner-dead-def.mir
index f7bf2b25e95bf..2ec0cb5a66e12 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/bug-legalization-artifact-combiner-dead-def.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/bug-legalization-artifact-combiner-dead-def.mir
@@ -30,12 +30,14 @@ body: |
; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
; GFX10-NEXT: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32)
; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[MV]](p4) :: (load (<4 x s32>), align 4, addrspace 4)
- ; GFX10-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[LOAD]](<4 x s32>), 96
- ; GFX10-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[LOAD]](<4 x s32>), 64
- ; GFX10-NEXT: G_STORE [[EXTRACT1]](s32), [[COPY]](p5) :: (store (s32), align 8, addrspace 5)
+ ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>)
+ ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV3]](s32)
+ ; GFX10-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>)
+ ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV6]](s32)
+ ; GFX10-NEXT: G_STORE [[COPY4]](s32), [[COPY]](p5) :: (store (s32), align 8, addrspace 5)
; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
- ; GFX10-NEXT: G_STORE [[EXTRACT]](s32), [[PTR_ADD]](p5) :: (store (s32) into unknown-address + 4, addrspace 5)
+ ; GFX10-NEXT: G_STORE [[COPY3]](s32), [[PTR_ADD]](p5) :: (store (s32) into unknown-address + 4, addrspace 5)
%0:_(p5) = COPY $vgpr0
%1:_(s32) = COPY $vgpr1
%2:_(s32) = COPY $vgpr2
@@ -62,12 +64,14 @@ body: |
; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
; GFX10-NEXT: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32)
; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[MV]](p4) :: (load (<4 x s32>), align 4, addrspace 4)
- ; GFX10-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[LOAD]](<4 x s32>), 96
- ; GFX10-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[LOAD]](<4 x s32>), 64
- ; GFX10-NEXT: G_STORE [[EXTRACT1]](s32), [[COPY]](p5) :: (store (s32), align 8, addrspace 5)
+ ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>)
+ ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV3]](s32)
+ ; GFX10-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>)
+ ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV6]](s32)
+ ; GFX10-NEXT: G_STORE [[COPY4]](s32), [[COPY]](p5) :: (store (s32), align 8, addrspace 5)
; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
- ; GFX10-NEXT: G_STORE [[EXTRACT]](s32), [[PTR_ADD]](p5) :: (store (s32) into unknown-address + 4, addrspace 5)
+ ; GFX10-NEXT: G_STORE [[COPY3]](s32), [[PTR_ADD]](p5) :: (store (s32) into unknown-address + 4, addrspace 5)
%0:_(p5) = COPY $vgpr0
%1:_(s32) = COPY $vgpr1
%2:_(s32) = COPY $vgpr2
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-mul.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-mul.ll
index 657184620fa6a..a407de876bbfe 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-mul.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-mul.ll
@@ -628,23 +628,25 @@ define <3 x half> @test_3xhalf_add_mul_rhs(<3 x half> %x, <3 x half> %y, <3 x ha
; GFX9-NEXT: v_lshlrev_b32_e32 v6, 16, v6
; GFX9-NEXT: v_and_or_b32 v0, v0, v9, v6
; GFX9-NEXT: v_lshlrev_b32_e32 v6, 16, v7
-; GFX9-NEXT: v_lshrrev_b32_e32 v8, 16, v4
-; GFX9-NEXT: s_lshl_b32 s4, s4, 16
; GFX9-NEXT: v_and_or_b32 v2, v2, v9, v6
+; GFX9-NEXT: s_lshl_b32 s4, s4, 16
+; GFX9-NEXT: v_pk_mul_f16 v0, v0, v2
+; GFX9-NEXT: v_lshrrev_b32_e32 v8, 16, v4
; GFX9-NEXT: v_and_or_b32 v1, v1, v9, s4
; GFX9-NEXT: v_and_or_b32 v3, v3, v9, s4
-; GFX9-NEXT: v_pk_mul_f16 v0, v0, v2
-; GFX9-NEXT: v_lshlrev_b32_e32 v2, 16, v8
+; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v0
; GFX9-NEXT: v_pk_mul_f16 v1, v1, v3
-; GFX9-NEXT: v_and_or_b32 v2, v4, v9, v2
-; GFX9-NEXT: v_lshrrev_b32_e32 v4, 16, v0
-; GFX9-NEXT: v_and_or_b32 v3, v5, v9, s4
-; GFX9-NEXT: v_lshlrev_b32_e32 v4, 16, v4
+; GFX9-NEXT: v_lshlrev_b32_e32 v3, 16, v8
+; GFX9-NEXT: v_lshlrev_b32_e32 v2, 16, v2
+; GFX9-NEXT: v_and_or_b32 v3, v4, v9, v3
+; GFX9-NEXT: v_and_or_b32 v0, v0, v9, v2
+; GFX9-NEXT: v_pk_add_f16 v0, v3, v0
+; GFX9-NEXT: v_and_or_b32 v4, v5, v9, s4
; GFX9-NEXT: v_and_or_b32 v1, v1, v9, s4
-; GFX9-NEXT: v_and_or_b32 v0, v0, v9, v4
-; GFX9-NEXT: v_pk_add_f16 v1, v3, v1
-; GFX9-NEXT: s_lshl_b32 s4, s4, 16
-; GFX9-NEXT: v_pk_add_f16 v0, v2, v0
+; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v0
+; GFX9-NEXT: v_pk_add_f16 v1, v4, v1
+; GFX9-NEXT: v_lshlrev_b32_e32 v2, 16, v2
+; GFX9-NEXT: v_and_or_b32 v0, v0, v9, v2
; GFX9-NEXT: v_and_or_b32 v1, v1, v9, s4
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
@@ -657,18 +659,20 @@ define <3 x half> @test_3xhalf_add_mul_rhs(<3 x half> %x, <3 x half> %y, <3 x ha
; GFX9-CONTRACT-NEXT: v_lshlrev_b32_e32 v6, 16, v6
; GFX9-CONTRACT-NEXT: v_lshrrev_b32_e32 v8, 16, v4
; GFX9-CONTRACT-NEXT: v_and_or_b32 v0, v0, v9, v6
-; GFX9-CONTRACT-NEXT: s_lshl_b32 s4, s4, 16
; GFX9-CONTRACT-NEXT: v_lshlrev_b32_e32 v6, 16, v7
-; GFX9-CONTRACT-NEXT: v_and_or_b32 v1, v1, v9, s4
; GFX9-CONTRACT-NEXT: v_and_or_b32 v2, v2, v9, v6
-; GFX9-CONTRACT-NEXT: v_and_or_b32 v3, v3, v9, s4
; GFX9-CONTRACT-NEXT: v_lshlrev_b32_e32 v6, 16, v8
-; GFX9-CONTRACT-NEXT: v_and_or_b32 v5, v5, v9, s4
; GFX9-CONTRACT-NEXT: v_and_or_b32 v4, v4, v9, v6
-; GFX9-CONTRACT-NEXT: v_pk_fma_f16 v1, v1, v3, v5
; GFX9-CONTRACT-NEXT: s_lshl_b32 s4, s4, 16
; GFX9-CONTRACT-NEXT: v_pk_fma_f16 v0, v0, v2, v4
; GFX9-CONTRACT-NEXT: v_and_or_b32 v1, v1, v9, s4
+; GFX9-CONTRACT-NEXT: v_and_or_b32 v3, v3, v9, s4
+; GFX9-CONTRACT-NEXT: v_and_or_b32 v5, v5, v9, s4
+; GFX9-CONTRACT-NEXT: v_lshrrev_b32_e32 v2, 16, v0
+; GFX9-CONTRACT-NEXT: v_pk_fma_f16 v1, v1, v3, v5
+; GFX9-CONTRACT-NEXT: v_lshlrev_b32_e32 v2, 16, v2
+; GFX9-CONTRACT-NEXT: v_and_or_b32 v0, v0, v9, v2
+; GFX9-CONTRACT-NEXT: v_and_or_b32 v1, v1, v9, s4
; GFX9-CONTRACT-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-DENORM-LABEL: test_3xhalf_add_mul_rhs:
@@ -680,23 +684,25 @@ define <3 x half> @test_3xhalf_add_mul_rhs(<3 x half> %x, <3 x half> %y, <3 x ha
; GFX9-DENORM-NEXT: v_lshlrev_b32_e32 v6, 16, v6
; GFX9-DENORM-NEXT: v_and_or_b32 v0, v0, v9, v6
; GFX9-DENORM-NEXT: v_lshlrev_b32_e32 v6, 16, v7
-; GFX9-DENORM-NEXT: v_lshrrev_b32_e32 v8, 16, v4
-; GFX9-DENORM-NEXT: s_lshl_b32 s4, s4, 16
; GFX9-DENORM-NEXT: v_and_or_b32 v2, v2, v9, v6
+; GFX9-DENORM-NEXT: s_lshl_b32 s4, s4, 16
+; GFX9-DENORM-NEXT: v_pk_mul_f16 v0, v0, v2
+; GFX9-DENORM-NEXT: v_lshrrev_b32_e32 v8, 16, v4
; GFX9-DENORM-NEXT: v_and_or_b32 v1, v1, v9, s4
; GFX9-DENORM-NEXT: v_and_or_b32 v3, v3, v9, s4
-; GFX9-DENORM-NEXT: v_pk_mul_f16 v0, v0, v2
-; GFX9-DENORM-NEXT: v_lshlrev_b32_e32 v2, 16, v8
+; GFX9-DENORM-NEXT: v_lshrrev_b32_e32 v2, 16, v0
; GFX9-DENORM-NEXT: v_pk_mul_f16 v1, v1, v3
-; GFX9-DENORM-NEXT: v_and_or_b32 v2, v4, v9, v2
-; GFX9-DENORM-NEXT: v_lshrrev_b32_e32 v4, 16, v0
-; GFX9-DENORM-NEXT: v_and_or_b32 v3, v5, v9, s4
-; GFX9-DENORM-NEXT: v_lshlrev_b32_e32 v4, 16, v4
+; GFX9-DENORM-NEXT: v_lshlrev_b32_e32 v3, 16, v8
+; GFX9-DENORM-NEXT: v_lshlrev_b32_e32 v2, 16, v2
+; GFX9-DENORM-NEXT: v_and_or_b32 v3, v4, v9, v3
+; GFX9-DENORM-NEXT: v_and_or_b32 v0, v0, v9, v2
+; GFX9-DENORM-NEXT: v_pk_add_f16 v0, v3, v0
+; GFX9-DENORM-NEXT: v_and_or_b32 v4, v5, v9, s4
; GFX9-DENORM-NEXT: v_and_or_b32 v1, v1, v9, s4
-; GFX9-DENORM-NEXT: v_and_or_b32 v0, v0, v9, v4
-; GFX9-DENORM-NEXT: v_pk_add_f16 v1, v3, v1
-; GFX9-DENORM-NEXT: s_lshl_b32 s4, s4, 16
-; GFX9-DENORM-NEXT: v_pk_add_f16 v0, v2, v0
+; GFX9-DENORM-NEXT: v_lshrrev_b32_e32 v2, 16, v0
+; GFX9-DENORM-NEXT: v_pk_add_f16 v1, v4, v1
+; GFX9-DENORM-NEXT: v_lshlrev_b32_e32 v2, 16, v2
+; GFX9-DENORM-NEXT: v_and_or_b32 v0, v0, v9, v2
; GFX9-DENORM-NEXT: v_and_or_b32 v1, v1, v9, s4
; GFX9-DENORM-NEXT: s_setpc_b64 s[30:31]
;
@@ -709,18 +715,20 @@ define <3 x half> @test_3xhalf_add_mul_rhs(<3 x half> %x, <3 x half> %y, <3 x ha
; GFX9-UNSAFE-NEXT: v_lshlrev_b32_e32 v6, 16, v6
; GFX9-UNSAFE-NEXT: v_lshrrev_b32_e32 v8, 16, v4
; GFX9-UNSAFE-NEXT: v_and_or_b32 v0, v0, v9, v6
-; GFX9-UNSAFE-NEXT: s_lshl_b32 s4, s4, 16
; GFX9-UNSAFE-NEXT: v_lshlrev_b32_e32 v6, 16, v7
-; GFX9-UNSAFE-NEXT: v_and_or_b32 v1, v1, v9, s4
; GFX9-UNSAFE-NEXT: v_and_or_b32 v2, v2, v9, v6
-; GFX9-UNSAFE-NEXT: v_and_or_b32 v3, v3, v9, s4
; GFX9-UNSAFE-NEXT: v_lshlrev_b32_e32 v6, 16, v8
-; GFX9-UNSAFE-NEXT: v_and_or_b32 v5, v5, v9, s4
; GFX9-UNSAFE-NEXT: v_and_or_b32 v4, v4, v9, v6
-; GFX9-UNSAFE-NEXT: v_pk_fma_f16 v1, v1, v3, v5
; GFX9-UNSAFE-NEXT: s_lshl_b32 s4, s4, 16
; GFX9-UNSAFE-NEXT: v_pk_fma_f16 v0, v0, v2, v4
; GFX9-UNSAFE-NEXT: v_and_or_b32 v1, v1, v9, s4
+; GFX9-UNSAFE-NEXT: v_and_or_b32 v3, v3, v9, s4
+; GFX9-UNSAFE-NEXT: v_and_or_b32 v5, v5, v9, s4
+; GFX9-UNSAFE-NEXT: v_lshrrev_b32_e32 v2, 16, v0
+; GFX9-UNSAFE-NEXT: v_pk_fma_f16 v1, v1, v3, v5
+; GFX9-UNSAFE-NEXT: v_lshlrev_b32_e32 v2, 16, v2
+; GFX9-UNSAFE-NEXT: v_and_or_b32 v0, v0, v9, v2
+; GFX9-UNSAFE-NEXT: v_and_or_b32 v1, v1, v9, s4
; GFX9-UNSAFE-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: test_3xhalf_add_mul_rhs:
@@ -734,23 +742,25 @@ define <3 x half> @test_3xhalf_add_mul_rhs(<3 x half> %x, <3 x half> %y, <3 x ha
; GFX10-NEXT: v_lshlrev_b32_e32 v6, 16, v6
; GFX10-NEXT: v_lshlrev_b32_e32 v7, 16, v7
; GFX10-NEXT: v_and_or_b32 v1, v1, v8, s4
+; GFX10-NEXT: v_and_or_b32 v3, v3, v8, s4
; GFX10-NEXT: v_and_or_b32 v0, v0, v8, v6
; GFX10-NEXT: v_and_or_b32 v2, v2, v8, v7
+; GFX10-NEXT: v_pk_mul_f16 v1, v1, v3
; GFX10-NEXT: v_pk_mul_f16 v0, v0, v2
-; GFX10-NEXT: v_and_or_b32 v2, v3, v8, s4
-; GFX10-NEXT: v_lshrrev_b32_e32 v3, 16, v4
+; GFX10-NEXT: v_lshrrev_b32_e32 v2, 16, v4
+; GFX10-NEXT: v_and_or_b32 v1, v1, v8, s4
; GFX10-NEXT: v_lshrrev_b32_e32 v6, 16, v0
-; GFX10-NEXT: v_pk_mul_f16 v1, v1, v2
+; GFX10-NEXT: v_lshlrev_b32_e32 v2, 16, v2
+; GFX10-NEXT: v_lshlrev_b32_e32 v6, 16, v6
+; GFX10-NEXT: v_and_or_b32 v2, v4, v8, v2
+; GFX10-NEXT: v_and_or_b32 v0, v0, v8, v6
+; GFX10-NEXT: v_pk_add_f16 v0, v2, v0
; GFX10-NEXT: v_and_or_b32 v2, v5, v8, s4
-; GFX10-NEXT: v_lshlrev_b32_e32 v3, 16, v3
-; GFX10-NEXT: v_lshlrev_b32_e32 v5, 16, v6
-; GFX10-NEXT: v_and_or_b32 v1, v1, v8, s4
-; GFX10-NEXT: s_lshl_b32 s4, s4, 16
-; GFX10-NEXT: v_and_or_b32 v3, v4, v8, v3
-; GFX10-NEXT: v_and_or_b32 v0, v0, v8, v5
+; GFX10-NEXT: v_lshrrev_b32_e32 v3, 16, v0
; GFX10-NEXT: v_pk_add_f16 v1, v2, v1
-; GFX10-NEXT: v_pk_add_f16 v0, v3, v0
+; GFX10-NEXT: v_lshlrev_b32_e32 v3, 16, v3
; GFX10-NEXT: v_and_or_b32 v1, v1, v8, s4
+; GFX10-NEXT: v_and_or_b32 v0, v0, v8, v3
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-CONTRACT-LABEL: test_3xhalf_add_mul_rhs:
@@ -758,23 +768,25 @@ define <3 x half> @test_3xhalf_add_mul_rhs(<3 x half> %x, <3 x half> %y, <3 x ha
; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-CONTRACT-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-CONTRACT-NEXT: v_lshrrev_b32_e32 v6, 16, v0
-; GFX10-CONTRACT-NEXT: v_mov_b32_e32 v7, 0xffff
-; GFX10-CONTRACT-NEXT: v_lshrrev_b32_e32 v8, 16, v2
-; GFX10-CONTRACT-NEXT: v_lshrrev_b32_e32 v9, 16, v4
+; GFX10-CONTRACT-NEXT: v_lshrrev_b32_e32 v7, 16, v2
+; GFX10-CONTRACT-NEXT: v_lshrrev_b32_e32 v8, 16, v4
+; GFX10-CONTRACT-NEXT: v_mov_b32_e32 v9, 0xffff
; GFX10-CONTRACT-NEXT: s_lshl_b32 s4, s4, 16
; GFX10-CONTRACT-NEXT: v_lshlrev_b32_e32 v6, 16, v6
-; GFX10-CONTRACT-NEXT: v_and_or_b32 v1, v1, v7, s4
+; GFX10-CONTRACT-NEXT: v_lshlrev_b32_e32 v7, 16, v7
; GFX10-CONTRACT-NEXT: v_lshlrev_b32_e32 v8, 16, v8
-; GFX10-CONTRACT-NEXT: v_and_or_b32 v3, v3, v7, s4
-; GFX10-CONTRACT-NEXT: v_lshlrev_b32_e32 v9, 16, v9
-; GFX10-CONTRACT-NEXT: v_and_or_b32 v5, v5, v7, s4
-; GFX10-CONTRACT-NEXT: v_and_or_b32 v0, v0, v7, v6
-; GFX10-CONTRACT-NEXT: v_and_or_b32 v2, v2, v7, v8
-; GFX10-CONTRACT-NEXT: s_lshl_b32 s4, s4, 16
-; GFX10-CONTRACT-NEXT: v_and_or_b32 v4, v4, v7, v9
-; GFX10-CONTRACT-NEXT: v_pk_fma_f16 v1, v1, v3, v5
+; GFX10-CONTRACT-NEXT: v_and_or_b32 v1, v1, v9, s4
+; GFX10-CONTRACT-NEXT: v_and_or_b32 v0, v0, v9, v6
+; GFX10-CONTRACT-NEXT: v_and_or_b32 v2, v2, v9, v7
+; GFX10-CONTRACT-NEXT: v_and_or_b32 v4, v4, v9, v8
; GFX10-CONTRACT-NEXT: v_pk_fma_f16 v0, v0, v2, v4
-; GFX10-CONTRACT-NEXT: v_and_or_b32 v1, v1, v7, s4
+; GFX10-CONTRACT-NEXT: v_and_or_b32 v2, v3, v9, s4
+; GFX10-CONTRACT-NEXT: v_and_or_b32 v4, v5, v9, s4
+; GFX10-CONTRACT-NEXT: v_lshrrev_b32_e32 v3, 16, v0
+; GFX10-CONTRACT-NEXT: v_pk_fma_f16 v1, v1, v2, v4
+; GFX10-CONTRACT-NEXT: v_lshlrev_b32_e32 v3, 16, v3
+; GFX10-CONTRACT-NEXT: v_and_or_b32 v1, v1, v9, s4
+; GFX10-CONTRACT-NEXT: v_and_or_b32 v0, v0, v9, v3
; GFX10-CONTRACT-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-DENORM-LABEL: test_3xhalf_add_mul_rhs:
@@ -788,23 +800,25 @@ define <3 x half> @test_3xhalf_add_mul_rhs(<3 x half> %x, <3 x half> %y, <3 x ha
; GFX10-DENORM-NEXT: v_lshlrev_b32_e32 v6, 16, v6
; GFX10-DENORM-NEXT: v_lshlrev_b32_e32 v7, 16, v7
; GFX10-DENORM-NEXT: v_and_or_b32 v1, v1, v8, s4
+; GFX10-DENORM-NEXT: v_and_or_b32 v3, v3, v8, s4
; GFX10-DENORM-NEXT: v_and_or_b32 v0, v0, v8, v6
; GFX10-DENORM-NEXT: v_and_or_b32 v2, v2, v8, v7
+; GFX10-DENORM-NEXT: v_pk_mul_f16 v1, v1, v3
; GFX10-DENORM-NEXT: v_pk_mul_f16 v0, v0, v2
-; GFX10-DENORM-NEXT: v_and_or_b32 v2, v3, v8, s4
-; GFX10-DENORM-NEXT: v_lshrrev_b32_e32 v3, 16, v4
+; GFX10-DENORM-NEXT: v_lshrrev_b32_e32 v2, 16, v4
+; GFX10-DENORM-NEXT: v_and_or_b32 v1, v1, v8, s4
; GFX10-DENORM-NEXT: v_lshrrev_b32_e32 v6, 16, v0
-; GFX10-DENORM-NEXT: v_pk_mul_f16 v1, v1, v2
+; GFX10-DENORM-NEXT: v_lshlrev_b32_e32 v2, 16, v2
+; GFX10-DENORM-NEXT: v_lshlrev_b32_e32 v6, 16, v6
+; GFX10-DENORM-NEXT: v_and_or_b32 v2, v4, v8, v2
+; GFX10-DENORM-NEXT: v_and_or_b32 v0, v0, v8, v6
+; GFX10-DENORM-NEXT: v_pk_add_f16 v0, v2, v0
; GFX10-DENORM-NEXT: v_and_or_b32 v2, v5, v8, s4
-; GFX10-DENORM-NEXT: v_lshlrev_b32_e32 v3, 16, v3
-; GFX10-DENORM-NEXT: v_lshlrev_b32_e32 v5, 16, v6
-; GFX10-DENORM-NEXT: v_and_or_b32 v1, v1, v8, s4
-; GFX10-DENORM-NEXT: s_lshl_b32 s4, s4, 16
-; GFX10-DENORM-NEXT: v_and_or_b32 v3, v4, v8, v3
-; GFX10-DENORM-NEXT: v_and_or_b32 v0, v0, v8, v5
+; GFX10-DENORM-NEXT: v_lshrrev_b32_e32 v3, 16, v0
; GFX10-DENORM-NEXT: v_pk_add_f16 v1, v2, v1
-; GFX10-DENORM-NEXT: v_pk_add_f16 v0, v3, v0
+; GFX10-DENORM-NEXT: v_lshlrev_b32_e32 v3, 16, v3
; GFX10-DENORM-NEXT: v_and_or_b32 v1, v1, v8, s4
+; GFX10-DENORM-NEXT: v_and_or_b32 v0, v0, v8, v3
; GFX10-DENORM-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-UNSAFE-LABEL: test_3xhalf_add_mul_rhs:
@@ -812,23 +826,25 @@ define <3 x half> @test_3xhalf_add_mul_rhs(<3 x half> %x, <3 x half> %y, <3 x ha
; GFX10-UNSAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-UNSAFE-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-UNSAFE-NEXT: v_lshrrev_b32_e32 v6, 16, v0
-; GFX10-UNSAFE-NEXT: v_mov_b32_e32 v7, 0xffff
-; GFX10-UNSAFE-NEXT: v_lshrrev_b32_e32 v8, 16, v2
-; GFX10-UNSAFE-NEXT: v_lshrrev_b32_e32 v9, 16, v4
+; GFX10-UNSAFE-NEXT: v_lshrrev_b32_e32 v7, 16, v2
+; GFX10-UNSAFE-NEXT: v_lshrrev_b32_e32 v8, 16, v4
+; GFX10-UNSAFE-NEXT: v_mov_b32_e32 v9, 0xffff
; GFX10-UNSAFE-NEXT: s_lshl_b32 s4, s4, 16
; GFX10-UNSAFE-NEXT: v_lshlrev_b32_e32 v6, 16, v6
-; GFX10-UNSAFE-NEXT: v_and_or_b32 v1, v1, v7, s4
+; GFX10-UNSAFE-NEXT: v_lshlrev_b32_e32 v7, 16, v7
; GFX10-UNSAFE-NEXT: v_lshlrev_b32_e32 v8, 16, v8
-; GFX10-UNSAFE-NEXT: v_and_or_b32 v3, v3, v7, s4
-; GFX10-UNSAFE-NEXT: v_lshlrev_b32_e32 v9, 16, v9
-; GFX10-UNSAFE-NEXT: v_and_or_b32 v5, v5, v7, s4
-; GFX10-UNSAFE-NEXT: v_and_or_b32 v0, v0, v7, v6
-; GFX10-UNSAFE-NEXT: v_and_or_b32 v2, v2, v7, v8
-; GFX10-UNSAFE-NEXT: s_lshl_b32 s4, s4, 16
-; GFX10-UNSAFE-NEXT: v_and_or_b32 v4, v4, v7, v9
-; GFX10-UNSAFE-NEXT: v_pk_fma_f16 v1, v1, v3, v5
+; GFX10-UNSAFE-NEXT: v_and_or_b32 v1, v1, v9, s4
+; GFX10-UNSAFE-NEXT: v_and_or_b32 v0, v0, v9, v6
+; GFX10-UNSAFE-NEXT: v_and_or_b32 v2, v2, v9, v7
+; GFX10-UNSAFE-NEXT: v_and_or_b32 v4, v4, v9, v8
; GFX10-UNSAFE-NEXT: v_pk_fma_f16 v0, v0, v2, v4
-; GFX10-UNSAFE-NEXT: v_and_or_b32 v1, v1, v7, s4
+; GFX10-UNSAFE-NEXT: v_and_or_b32 v2, v3, v9, s4
+; GFX10-UNSAFE-NEXT: v_and_or_b32 v4, v5, v9, s4
+; GFX10-UNSAFE-NEXT: v_lshrrev_b32_e32 v3, 16, v0
+; GFX10-UNSAFE-NEXT: v_pk_fma_f16 v1, v1, v2, v4
+; GFX10-UNSAFE-NEXT: v_lshlrev_b32_e32 v3, 16, v3
+; GFX10-UNSAFE-NEXT: v_and_or_b32 v1, v1, v9, s4
+; GFX10-UNSAFE-NEXT: v_and_or_b32 v0, v0, v9, v3
; GFX10-UNSAFE-NEXT: s_setpc_b64 s[30:31]
.entry:
%a = fmul <3 x half> %x, %y
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/function-returns.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/function-returns.ll
index 147d60fdac64d..122b5e65df6ee 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/function-returns.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/function-returns.ll
@@ -888,11 +888,12 @@ define <3 x i16> @v3i16_func_void() #0 {
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s16>) = G_LOAD [[DEF]](p1) :: (load (<3 x s16>) from `<3 x i16> addrspace(1)* undef`, align 8, addrspace 1)
- ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[LOAD]](<3 x s16>), [[DEF1]](<3 x s16>)
- ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>)
- ; CHECK-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>)
- ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>)
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[LOAD]](<3 x s16>)
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16), [[DEF1]](s16)
+ ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s16>)
+ ; CHECK-NEXT: $vgpr0 = COPY [[UV3]](<2 x s16>)
+ ; CHECK-NEXT: $vgpr1 = COPY [[UV4]](<2 x s16>)
; CHECK-NEXT: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]]
; CHECK-NEXT: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1
%val = load <3 x i16>, <3 x i16> addrspace(1)* undef
@@ -942,12 +943,13 @@ define <5 x i16> @v5i16_func_void() #0 {
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `<5 x i16> addrspace(1)* addrspace(4)* undef`, addrspace 4)
; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<5 x s16>) = G_LOAD [[LOAD]](p1) :: (load (<5 x s16>) from %ir.ptr, align 16, addrspace 1)
- ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<5 x s16>) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<10 x s16>) = G_CONCAT_VECTORS [[LOAD1]](<5 x s16>), [[DEF1]](<5 x s16>)
- ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<10 x s16>)
- ; CHECK-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>)
- ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>)
- ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](<2 x s16>)
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16), [[UV4:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[LOAD1]](<5 x s16>)
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<6 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16), [[UV3]](s16), [[UV4]](s16), [[DEF1]](s16)
+ ; CHECK-NEXT: [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<6 x s16>)
+ ; CHECK-NEXT: $vgpr0 = COPY [[UV5]](<2 x s16>)
+ ; CHECK-NEXT: $vgpr1 = COPY [[UV6]](<2 x s16>)
+ ; CHECK-NEXT: $vgpr2 = COPY [[UV7]](<2 x s16>)
; CHECK-NEXT: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]]
; CHECK-NEXT: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2
%ptr = load volatile <5 x i16> addrspace(1)*, <5 x i16> addrspace(1)* addrspace(4)* undef
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.large.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.large.ll
index 3d4acd79132de..e78d959f0af57 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.large.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.large.ll
@@ -14,9 +14,9 @@ define amdgpu_kernel void @v_insert_v64i32_37(<64 x i32> addrspace(1)* %ptr.in,
; GCN-NEXT: global_load_dwordx4 v[44:47], v64, s[0:1] offset:48
; GCN-NEXT: global_load_dwordx4 v[48:51], v64, s[0:1] offset:64
; GCN-NEXT: global_load_dwordx4 v[52:55], v64, s[0:1] offset:80
+; GCN-NEXT: global_load_dwordx4 v[0:3], v64, s[0:1] offset:128
; GCN-NEXT: global_load_dwordx4 v[56:59], v64, s[0:1] offset:96
; GCN-NEXT: global_load_dwordx4 v[60:63], v64, s[0:1] offset:112
-; GCN-NEXT: global_load_dwordx4 v[0:3], v64, s[0:1] offset:128
; GCN-NEXT: global_load_dwordx4 v[4:7], v64, s[0:1] offset:144
; GCN-NEXT: global_load_dwordx4 v[8:11], v64, s[0:1] offset:160
; GCN-NEXT: global_load_dwordx4 v[12:15], v64, s[0:1] offset:176
@@ -38,8 +38,6 @@ define amdgpu_kernel void @v_insert_v64i32_37(<64 x i32> addrspace(1)* %ptr.in,
; GCN-NEXT: global_store_dwordx4 v64, v[20:23], s[2:3] offset:208
; GCN-NEXT: s_waitcnt vmcnt(7)
; GCN-NEXT: global_store_dwordx4 v64, v[24:27], s[2:3] offset:224
-; GCN-NEXT: s_waitcnt vmcnt(7)
-; GCN-NEXT: global_store_dwordx4 v64, v[28:31], s[2:3] offset:240
; GCN-NEXT: global_store_dwordx4 v64, v[32:35], s[2:3]
; GCN-NEXT: global_store_dwordx4 v64, v[36:39], s[2:3] offset:16
; GCN-NEXT: global_store_dwordx4 v64, v[40:43], s[2:3] offset:32
@@ -48,6 +46,8 @@ define amdgpu_kernel void @v_insert_v64i32_37(<64 x i32> addrspace(1)* %ptr.in,
; GCN-NEXT: global_store_dwordx4 v64, v[52:55], s[2:3] offset:80
; GCN-NEXT: global_store_dwordx4 v64, v[56:59], s[2:3] offset:96
; GCN-NEXT: global_store_dwordx4 v64, v[60:63], s[2:3] offset:112
+; GCN-NEXT: s_waitcnt vmcnt(15)
+; GCN-NEXT: global_store_dwordx4 v64, v[28:31], s[2:3] offset:240
; GCN-NEXT: s_endpgm
;
; GFX10-LABEL: v_insert_v64i32_37:
@@ -58,13 +58,13 @@ define amdgpu_kernel void @v_insert_v64i32_37(<64 x i32> addrspace(1)* %ptr.in,
; GFX10-NEXT: s_clause 0xf
; GFX10-NEXT: global_load_dwordx4 v[32:35], v64, s[0:1]
; GFX10-NEXT: global_load_dwordx4 v[36:39], v64, s[0:1] offset:16
+; GFX10-NEXT: global_load_dwordx4 v[0:3], v64, s[0:1] offset:128
; GFX10-NEXT: global_load_dwordx4 v[40:43], v64, s[0:1] offset:32
; GFX10-NEXT: global_load_dwordx4 v[44:47], v64, s[0:1] offset:48
; GFX10-NEXT: global_load_dwordx4 v[48:51], v64, s[0:1] offset:64
; GFX10-NEXT: global_load_dwordx4 v[52:55], v64, s[0:1] offset:80
; GFX10-NEXT: global_load_dwordx4 v[56:59], v64, s[0:1] offset:96
; GFX10-NEXT: global_load_dwordx4 v[60:63], v64, s[0:1] offset:112
-; GFX10-NEXT: global_load_dwordx4 v[0:3], v64, s[0:1] offset:128
; GFX10-NEXT: global_load_dwordx4 v[4:7], v64, s[0:1] offset:144
; GFX10-NEXT: global_load_dwordx4 v[8:11], v64, s[0:1] offset:160
; GFX10-NEXT: global_load_dwordx4 v[12:15], v64, s[0:1] offset:176
@@ -84,8 +84,6 @@ define amdgpu_kernel void @v_insert_v64i32_37(<64 x i32> addrspace(1)* %ptr.in,
; GFX10-NEXT: global_store_dwordx4 v64, v[16:19], s[2:3] offset:192
; GFX10-NEXT: s_waitcnt vmcnt(2)
; GFX10-NEXT: global_store_dwordx4 v64, v[20:23], s[2:3] offset:208
-; GFX10-NEXT: s_waitcnt vmcnt(1)
-; GFX10-NEXT: global_store_dwordx4 v64, v[24:27], s[2:3] offset:224
; GFX10-NEXT: global_store_dwordx4 v64, v[32:35], s[2:3]
; GFX10-NEXT: global_store_dwordx4 v64, v[36:39], s[2:3] offset:16
; GFX10-NEXT: global_store_dwordx4 v64, v[40:43], s[2:3] offset:32
@@ -94,6 +92,8 @@ define amdgpu_kernel void @v_insert_v64i32_37(<64 x i32> addrspace(1)* %ptr.in,
; GFX10-NEXT: global_store_dwordx4 v64, v[52:55], s[2:3] offset:80
; GFX10-NEXT: global_store_dwordx4 v64, v[56:59], s[2:3] offset:96
; GFX10-NEXT: global_store_dwordx4 v64, v[60:63], s[2:3] offset:112
+; GFX10-NEXT: s_waitcnt vmcnt(1)
+; GFX10-NEXT: global_store_dwordx4 v64, v[24:27], s[2:3] offset:224
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: global_store_dwordx4 v64, v[28:31], s[2:3] offset:240
; GFX10-NEXT: s_endpgm
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-extract.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-extract.mir
index d6d2a74cbcc21..9580520d42164 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-extract.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-extract.mir
@@ -198,156 +198,3 @@ body: |
S_ENDPGM 0, implicit %1, implicit %2
...
-
----
-name: extract_sgpr_s32_from_v3s32
-legalized: true
-regBankSelected: true
-
-body: |
- bb.0:
- liveins: $sgpr0_sgpr1_sgpr2
- ; CHECK-LABEL: name: extract_sgpr_s32_from_v3s32
- ; CHECK: [[COPY:%[0-9]+]]:sgpr_96 = COPY $sgpr0_sgpr1_sgpr2
- ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1
- ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub2
- ; CHECK: S_ENDPGM 0, implicit [[COPY]], implicit [[COPY1]], implicit [[COPY2]]
- %0:sgpr(<3 x s32>) = COPY $sgpr0_sgpr1_sgpr2
- %1:sgpr(s32) = G_EXTRACT %0, 0
- %2:sgpr(s32) = G_EXTRACT %0, 32
- %3:sgpr(s32) = G_EXTRACT %0, 64
- S_ENDPGM 0, implicit %0, implicit %2, implicit %3
-
-...
-
----
-name: extract_sgpr_v2s32_from_v3s32
-legalized: true
-regBankSelected: true
-
-body: |
- bb.0:
- liveins: $sgpr0_sgpr1_sgpr2
- ; CHECK-LABEL: name: extract_sgpr_v2s32_from_v3s32
- ; CHECK: [[COPY:%[0-9]+]]:sgpr_96_with_sub0_sub1 = COPY $sgpr0_sgpr1_sgpr2
- ; CHECK: [[COPY1:%[0-9]+]]:sreg_64 = COPY [[COPY]].sub0_sub1
- ; CHECK: S_ENDPGM 0, implicit [[COPY1]]
- %0:sgpr(<3 x s32>) = COPY $sgpr0_sgpr1_sgpr2
- %1:sgpr(<2 x s32>) = G_EXTRACT %0, 0
- S_ENDPGM 0, implicit %1
-
-...
-
----
-name: extract_sgpr_v3s32_from_v4s32
-legalized: true
-regBankSelected: true
-
-body: |
- bb.0:
- liveins: $sgpr0_sgpr1_sgpr2_sgpr3
- ; CHECK-LABEL: name: extract_sgpr_v3s32_from_v4s32
- ; CHECK: [[COPY:%[0-9]+]]:sgpr_128_with_sub1_sub2_sub3 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
- ; CHECK: [[COPY1:%[0-9]+]]:sgpr_128_with_sub0_sub1_sub2 = COPY [[COPY]]
- ; CHECK: [[COPY2:%[0-9]+]]:sgpr_96 = COPY [[COPY1]].sub0_sub1_sub2
- ; CHECK: [[COPY3:%[0-9]+]]:sgpr_96 = COPY [[COPY]].sub1_sub2_sub3
- ; CHECK: S_ENDPGM 0, implicit [[COPY2]], implicit [[COPY3]]
- %0:sgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
- %1:sgpr(<3 x s32>) = G_EXTRACT %0, 0
- %2:sgpr(<3 x s32>) = G_EXTRACT %0, 32
- S_ENDPGM 0, implicit %1, implicit %2
-
-...
-
----
-name: extract_sgpr_v2s16_from_v4s16_offset0
-legalized: true
-regBankSelected: true
-
-body: |
- bb.0:
- liveins: $sgpr0_sgpr1
- ; CHECK-LABEL: name: extract_sgpr_v2s16_from_v4s16_offset0
- ; CHECK: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
- ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0
- ; CHECK: S_ENDPGM 0, implicit [[COPY1]]
- %0:sgpr(<4 x s16>) = COPY $sgpr0_sgpr1
- %1:sgpr(<2 x s16>) = G_EXTRACT %0, 0
- S_ENDPGM 0, implicit %1
-
-...
-
----
-name: extract_sgpr_v2s16_from_v4s16_offset32
-legalized: true
-regBankSelected: true
-
-body: |
- bb.0:
- liveins: $sgpr0_sgpr1
- ; CHECK-LABEL: name: extract_sgpr_v2s16_from_v4s16_offset32
- ; CHECK: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
- ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1
- ; CHECK: S_ENDPGM 0, implicit [[COPY1]]
- %0:sgpr(<4 x s16>) = COPY $sgpr0_sgpr1
- %1:sgpr(<2 x s16>) = G_EXTRACT %0, 32
- S_ENDPGM 0, implicit %1
-
-...
-
-# FIXME: Probably should not be legal
----
-name: extract_sgpr_s16_from_v4s16_offset0
-legalized: true
-regBankSelected: true
-
-body: |
- bb.0:
- liveins: $sgpr0_sgpr1
- ; CHECK-LABEL: name: extract_sgpr_s16_from_v4s16_offset0
- ; CHECK: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
- ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0
- ; CHECK: S_ENDPGM 0, implicit [[COPY1]]
- %0:sgpr(<4 x s16>) = COPY $sgpr0_sgpr1
- %1:sgpr(s16) = G_EXTRACT %0, 0
- S_ENDPGM 0, implicit %1
-
-...
-
-# FIXME: Probably should not be legal
----
-name: extract_sgpr_s16_from_v4s16_offset32
-legalized: true
-regBankSelected: true
-
-body: |
- bb.0:
- liveins: $sgpr0_sgpr1
- ; CHECK-LABEL: name: extract_sgpr_s16_from_v4s16_offset32
- ; CHECK: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
- ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1
- ; CHECK: S_ENDPGM 0, implicit [[COPY1]]
- %0:sgpr(<4 x s16>) = COPY $sgpr0_sgpr1
- %1:sgpr(s16) = G_EXTRACT %0, 32
- S_ENDPGM 0, implicit %1
-
-...
-
-# FIXME: Probably should not be legal
----
-name: extract_sgpr_s16_from_v6s16_offset32
-legalized: true
-regBankSelected: true
-
-body: |
- bb.0:
- liveins: $sgpr0_sgpr1_sgpr2
- ; CHECK-LABEL: name: extract_sgpr_s16_from_v6s16_offset32
- ; CHECK: [[COPY:%[0-9]+]]:sgpr_96 = COPY $sgpr0_sgpr1_sgpr2
- ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1
- ; CHECK: S_ENDPGM 0, implicit [[COPY1]]
- %0:sgpr(<6 x s16>) = COPY $sgpr0_sgpr1_sgpr2
- %1:sgpr(s16) = G_EXTRACT %0, 32
- S_ENDPGM 0, implicit %1
-
-...
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fract.f64.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fract.f64.mir
index 4b6628472c8bb..7d8e42e39fac1 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fract.f64.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fract.f64.mir
@@ -32,8 +32,7 @@ body: |
%7:sgpr(s64) = G_CONSTANT i64 36
%8:sgpr(p4) = G_PTR_ADD %2, %7(s64)
%9:sgpr(<2 x s64>) = G_LOAD %8(p4) :: (dereferenceable invariant load (<2 x s64>), align 4, addrspace 4)
- %10:sgpr(s64) = G_EXTRACT %9(<2 x s64>), 0
- %13:sgpr(s64) = G_EXTRACT %9(<2 x s64>), 64
+ %10:sgpr(s64), %13:sgpr(s64) = G_UNMERGE_VALUES %9(<2 x s64>)
%15:sgpr(p1) = G_INTTOPTR %13(s64)
%18:sgpr(s64) = G_LOAD %15(p1) :: (load (s64), addrspace 1)
%19:sgpr(s64) = G_FCONSTANT double -0.000000e+00
@@ -82,8 +81,7 @@ body: |
%7:sgpr(s64) = G_CONSTANT i64 36
%8:sgpr(p4) = G_PTR_ADD %2, %7(s64)
%9:sgpr(<2 x s64>) = G_LOAD %8(p4) :: (dereferenceable invariant load (<2 x s64>), align 4, addrspace 4)
- %10:sgpr(s64) = G_EXTRACT %9(<2 x s64>), 0
- %13:sgpr(s64) = G_EXTRACT %9(<2 x s64>), 64
+ %10:sgpr(s64), %13:sgpr(s64) = G_UNMERGE_VALUES %9(<2 x s64>)
%15:sgpr(p1) = G_INTTOPTR %13(s64)
%18:sgpr(s64) = G_LOAD %15(p1) :: (load (s64), addrspace 1)
%19:sgpr(s64) = G_FABS %18
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-insert.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-insert.mir
index 10b9280837a7f..57dbe204ecc98 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-insert.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-insert.mir
@@ -566,42 +566,3 @@ body: |
%2:vgpr(s256) = G_INSERT %0, %1, 128
S_ENDPGM 0, implicit %2
...
-
----
-name: insert_sgpr_v2s16_to_v4s16_offset0
-legalized: true
-regBankSelected: true
-
-body: |
- bb.0:
- liveins: $sgpr0_sgpr1, $sgpr2
- ; CHECK-LABEL: name: insert_sgpr_v2s16_to_v4s16_offset0
- ; CHECK: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
- ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2
- ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:sreg_64 = INSERT_SUBREG [[COPY]], [[COPY1]], %subreg.sub0
- ; CHECK: S_ENDPGM 0, implicit [[INSERT_SUBREG]]
- %0:sgpr(<4 x s16>) = COPY $sgpr0_sgpr1
- %1:sgpr(<2 x s16>) = COPY $sgpr2
- %2:sgpr(<4 x s16>) = G_INSERT %0, %1, 0
- S_ENDPGM 0, implicit %2
-
-...
-
----
-name: insert_sgpr_v2s16_to_v4s16_offset32
-legalized: true
-regBankSelected: true
-
-body: |
- bb.0:
- liveins: $sgpr0_sgpr1, $sgpr2
- ; CHECK-LABEL: name: insert_sgpr_v2s16_to_v4s16_offset32
- ; CHECK: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
- ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2
- ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:sreg_64 = INSERT_SUBREG [[COPY]], [[COPY1]], %subreg.sub1
- ; CHECK: S_ENDPGM 0, implicit [[INSERT_SUBREG]]
- %0:sgpr(<4 x s16>) = COPY $sgpr0_sgpr1
- %1:sgpr(<2 x s16>) = COPY $sgpr2
- %2:sgpr(<4 x s16>) = G_INSERT %0, %1, 32
- S_ENDPGM 0, implicit %2
-...
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-insert.xfail.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-insert.xfail.mir
index 150b341561f97..1720519b89e2e 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-insert.xfail.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-insert.xfail.mir
@@ -1,7 +1,7 @@
# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -global-isel-abort=2 -pass-remarks-missed='gisel*' -verify-machineinstrs -o /dev/null %s 2>&1 | FileCheck -check-prefix=ERR %s
-# ERR: remark: <unknown>:0:0: cannot select: %3:sgpr(<4 x s16>) = G_INSERT %0:sgpr, %2:sgpr(s16), 0 (in function: insert_sgpr_s16_to_v4s16_offset0)
-# ERR-NEXT: remark: <unknown>:0:0: cannot select: %2:sgpr(<16 x s32>) = G_INSERT %0:sgpr, %1:sgpr(<8 x s32>), 0 (in function: insert_sgpr_v8s32_to_v16s32_offset0)
+# ERR: remark: <unknown>:0:0: instruction is not legal: %3:sgpr(<4 x s16>) = G_INSERT %0:sgpr, %2:sgpr(s16), 0 (in function: insert_sgpr_s16_to_v4s16_offset0)
+# ERR-NEXT: <unknown>:0:0: instruction is not legal: %2:sgpr(<16 x s32>) = G_INSERT %0:sgpr, %1:sgpr(<8 x s32>), 0 (in function: insert_sgpr_v8s32_to_v16s32_offset0)
# ERR-NOT: remark
# FIXME: This 16-bit insert source should not be legal and this test
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll
index 17d8f497cbcdb..47461bf5e17d3 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll
@@ -2078,11 +2078,11 @@ define amdgpu_kernel void @test_call_external_v3i16_func_void() #0 {
; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_v3i16_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1
; GCN-NEXT: [[COPY21:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
; GCN-NEXT: [[COPY22:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
- ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
- ; GCN-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY21]](<2 x s16>), [[COPY22]](<2 x s16>), [[DEF1]](<2 x s16>)
- ; GCN-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>)
+ ; GCN-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY21]](<2 x s16>), [[COPY22]](<2 x s16>)
+ ; GCN-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<4 x s16>)
+ ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16)
; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
- ; GCN-NEXT: G_STORE [[UV]](<3 x s16>), [[DEF]](p1) :: (volatile store (<3 x s16>) into `<3 x i16> addrspace(1)* undef`, align 8, addrspace 1)
+ ; GCN-NEXT: G_STORE [[BUILD_VECTOR]](<3 x s16>), [[DEF]](p1) :: (volatile store (<3 x s16>) into `<3 x i16> addrspace(1)* undef`, align 8, addrspace 1)
; GCN-NEXT: S_ENDPGM 0
%val = call <3 x i16> @external_v3i16_func_void()
store volatile <3 x i16> %val, <3 x i16> addrspace(1)* undef
@@ -2252,11 +2252,11 @@ define amdgpu_kernel void @test_call_external_v3f16_func_void() #0 {
; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_v3f16_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1
; GCN-NEXT: [[COPY21:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
; GCN-NEXT: [[COPY22:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
- ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
- ; GCN-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY21]](<2 x s16>), [[COPY22]](<2 x s16>), [[DEF1]](<2 x s16>)
- ; GCN-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>)
+ ; GCN-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY21]](<2 x s16>), [[COPY22]](<2 x s16>)
+ ; GCN-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<4 x s16>)
+ ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16)
; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
- ; GCN-NEXT: G_STORE [[UV]](<3 x s16>), [[DEF]](p1) :: (volatile store (<3 x s16>) into `<3 x half> addrspace(1)* undef`, align 8, addrspace 1)
+ ; GCN-NEXT: G_STORE [[BUILD_VECTOR]](<3 x s16>), [[DEF]](p1) :: (volatile store (<3 x s16>) into `<3 x half> addrspace(1)* undef`, align 8, addrspace 1)
; GCN-NEXT: S_ENDPGM 0
%val = call <3 x half> @external_v3f16_func_void()
store volatile <3 x half> %val, <3 x half> addrspace(1)* undef
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll
index d81a40bfe6d04..419c8f920f482 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll
@@ -2101,11 +2101,12 @@ define amdgpu_kernel void @test_call_external_void_func_v3i16() #0 {
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
- ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[LOAD]](<3 x s16>), [[DEF1]](<3 x s16>)
- ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>)
- ; CHECK-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>)
- ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>)
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[LOAD]](<3 x s16>)
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16), [[DEF1]](s16)
+ ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s16>)
+ ; CHECK-NEXT: $vgpr0 = COPY [[UV3]](<2 x s16>)
+ ; CHECK-NEXT: $vgpr1 = COPY [[UV4]](<2 x s16>)
; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4)
@@ -2161,11 +2162,12 @@ define amdgpu_kernel void @test_call_external_void_func_v3f16() #0 {
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
- ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[LOAD]](<3 x s16>), [[DEF1]](<3 x s16>)
- ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>)
- ; CHECK-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>)
- ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>)
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[LOAD]](<3 x s16>)
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16), [[DEF1]](s16)
+ ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s16>)
+ ; CHECK-NEXT: $vgpr0 = COPY [[UV3]](<2 x s16>)
+ ; CHECK-NEXT: $vgpr1 = COPY [[UV4]](<2 x s16>)
; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4)
@@ -2339,12 +2341,13 @@ define amdgpu_kernel void @test_call_external_void_func_v5i16() #0 {
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
- ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<5 x s16>) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<10 x s16>) = G_CONCAT_VECTORS [[LOAD]](<5 x s16>), [[DEF1]](<5 x s16>)
- ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<10 x s16>)
- ; CHECK-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>)
- ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>)
- ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](<2 x s16>)
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16), [[UV4:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[LOAD]](<5 x s16>)
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<6 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16), [[UV3]](s16), [[UV4]](s16), [[DEF1]](s16)
+ ; CHECK-NEXT: [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<6 x s16>)
+ ; CHECK-NEXT: $vgpr0 = COPY [[UV5]](<2 x s16>)
+ ; CHECK-NEXT: $vgpr1 = COPY [[UV6]](<2 x s16>)
+ ; CHECK-NEXT: $vgpr2 = COPY [[UV7]](<2 x s16>)
; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4)
@@ -2400,13 +2403,14 @@ define amdgpu_kernel void @test_call_external_void_func_v7i16() #0 {
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
- ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<7 x s16>) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<14 x s16>) = G_CONCAT_VECTORS [[LOAD]](<7 x s16>), [[DEF1]](<7 x s16>)
- ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<14 x s16>)
- ; CHECK-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>)
- ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>)
- ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](<2 x s16>)
- ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](<2 x s16>)
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16), [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16), [[UV6:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[LOAD]](<7 x s16>)
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16), [[UV3]](s16), [[UV4]](s16), [[UV5]](s16), [[UV6]](s16), [[DEF1]](s16)
+ ; CHECK-NEXT: [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<8 x s16>)
+ ; CHECK-NEXT: $vgpr0 = COPY [[UV7]](<2 x s16>)
+ ; CHECK-NEXT: $vgpr1 = COPY [[UV8]](<2 x s16>)
+ ; CHECK-NEXT: $vgpr2 = COPY [[UV9]](<2 x s16>)
+ ; CHECK-NEXT: $vgpr3 = COPY [[UV10]](<2 x s16>)
; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4)
@@ -2462,44 +2466,45 @@ define amdgpu_kernel void @test_call_external_void_func_v63i16() #0 {
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
- ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<63 x s16>) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<126 x s16>) = G_CONCAT_VECTORS [[LOAD]](<63 x s16>), [[DEF1]](<63 x s16>)
- ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>), [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>), [[UV14:%[0-9]+]]:_(<2 x s16>), [[UV15:%[0-9]+]]:_(<2 x s16>), [[UV16:%[0-9]+]]:_(<2 x s16>), [[UV17:%[0-9]+]]:_(<2 x s16>), [[UV18:%[0-9]+]]:_(<2 x s16>), [[UV19:%[0-9]+]]:_(<2 x s16>), [[UV20:%[0-9]+]]:_(<2 x s16>), [[UV21:%[0-9]+]]:_(<2 x s16>), [[UV22:%[0-9]+]]:_(<2 x s16>), [[UV23:%[0-9]+]]:_(<2 x s16>), [[UV24:%[0-9]+]]:_(<2 x s16>), [[UV25:%[0-9]+]]:_(<2 x s16>), [[UV26:%[0-9]+]]:_(<2 x s16>), [[UV27:%[0-9]+]]:_(<2 x s16>), [[UV28:%[0-9]+]]:_(<2 x s16>), [[UV29:%[0-9]+]]:_(<2 x s16>), [[UV30:%[0-9]+]]:_(<2 x s16>), [[UV31:%[0-9]+]]:_(<2 x s16>), [[UV32:%[0-9]+]]:_(<2 x s16>), [[UV33:%[0-9]+]]:_(<2 x s16>), [[UV34:%[0-9]+]]:_(<2 x s16>), [[UV35:%[0-9]+]]:_(<2 x s16>), [[UV36:%[0-9]+]]:_(<2 x s16>), [[UV37:%[0-9]+]]:_(<2 x s16>), [[UV38:%[0-9]+]]:_(<2 x s16>), [[UV39:%[0-9]+]]:_(<2 x s16>), [[UV40:%[0-9]+]]:_(<2 x s16>), [[UV41:%[0-9]+]]:_(<2 x s16>), [[UV42:%[0-9]+]]:_(<2 x s16>), [[UV43:%[0-9]+]]:_(<2 x s16>), [[UV44:%[0-9]+]]:_(<2 x s16>), [[UV45:%[0-9]+]]:_(<2 x s16>), [[UV46:%[0-9]+]]:_(<2 x s16>), [[UV47:%[0-9]+]]:_(<2 x s16>), [[UV48:%[0-9]+]]:_(<2 x s16>), [[UV49:%[0-9]+]]:_(<2 x s16>), [[UV50:%[0-9]+]]:_(<2 x s16>), [[UV51:%[0-9]+]]:_(<2 x s16>), [[UV52:%[0-9]+]]:_(<2 x s16>), [[UV53:%[0-9]+]]:_(<2 x s16>), [[UV54:%[0-9]+]]:_(<2 x s16>), [[UV55:%[0-9]+]]:_(<2 x s16>), [[UV56:%[0-9]+]]:_(<2 x s16>), [[UV57:%[0-9]+]]:_(<2 x s16>), [[UV58:%[0-9]+]]:_(<2 x s16>), [[UV59:%[0-9]+]]:_(<2 x s16>), [[UV60:%[0-9]+]]:_(<2 x s16>), [[UV61:%[0-9]+]]:_(<2 x s16>), [[UV62:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<126 x s16>)
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16), [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16), [[UV6:%[0-9]+]]:_(s16), [[UV7:%[0-9]+]]:_(s16), [[UV8:%[0-9]+]]:_(s16), [[UV9:%[0-9]+]]:_(s16), [[UV10:%[0-9]+]]:_(s16), [[UV11:%[0-9]+]]:_(s16), [[UV12:%[0-9]+]]:_(s16), [[UV13:%[0-9]+]]:_(s16), [[UV14:%[0-9]+]]:_(s16), [[UV15:%[0-9]+]]:_(s16), [[UV16:%[0-9]+]]:_(s16), [[UV17:%[0-9]+]]:_(s16), [[UV18:%[0-9]+]]:_(s16), [[UV19:%[0-9]+]]:_(s16), [[UV20:%[0-9]+]]:_(s16), [[UV21:%[0-9]+]]:_(s16), [[UV22:%[0-9]+]]:_(s16), [[UV23:%[0-9]+]]:_(s16), [[UV24:%[0-9]+]]:_(s16), [[UV25:%[0-9]+]]:_(s16), [[UV26:%[0-9]+]]:_(s16), [[UV27:%[0-9]+]]:_(s16), [[UV28:%[0-9]+]]:_(s16), [[UV29:%[0-9]+]]:_(s16), [[UV30:%[0-9]+]]:_(s16), [[UV31:%[0-9]+]]:_(s16), [[UV32:%[0-9]+]]:_(s16), [[UV33:%[0-9]+]]:_(s16), [[UV34:%[0-9]+]]:_(s16), [[UV35:%[0-9]+]]:_(s16), [[UV36:%[0-9]+]]:_(s16), [[UV37:%[0-9]+]]:_(s16), [[UV38:%[0-9]+]]:_(s16), [[UV39:%[0-9]+]]:_(s16), [[UV40:%[0-9]+]]:_(s16), [[UV41:%[0-9]+]]:_(s16), [[UV42:%[0-9]+]]:_(s16), [[UV43:%[0-9]+]]:_(s16), [[UV44:%[0-9]+]]:_(s16), [[UV45:%[0-9]+]]:_(s16), [[UV46:%[0-9]+]]:_(s16), [[UV47:%[0-9]+]]:_(s16), [[UV48:%[0-9]+]]:_(s16), [[UV49:%[0-9]+]]:_(s16), [[UV50:%[0-9]+]]:_(s16), [[UV51:%[0-9]+]]:_(s16), [[UV52:%[0-9]+]]:_(s16), [[UV53:%[0-9]+]]:_(s16), [[UV54:%[0-9]+]]:_(s16), [[UV55:%[0-9]+]]:_(s16), [[UV56:%[0-9]+]]:_(s16), [[UV57:%[0-9]+]]:_(s16), [[UV58:%[0-9]+]]:_(s16), [[UV59:%[0-9]+]]:_(s16), [[UV60:%[0-9]+]]:_(s16), [[UV61:%[0-9]+]]:_(s16), [[UV62:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[LOAD]](<63 x s16>)
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<64 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16), [[UV3]](s16), [[UV4]](s16), [[UV5]](s16), [[UV6]](s16), [[UV7]](s16), [[UV8]](s16), [[UV9]](s16), [[UV10]](s16), [[UV11]](s16), [[UV12]](s16), [[UV13]](s16), [[UV14]](s16), [[UV15]](s16), [[UV16]](s16), [[UV17]](s16), [[UV18]](s16), [[UV19]](s16), [[UV20]](s16), [[UV21]](s16), [[UV22]](s16), [[UV23]](s16), [[UV24]](s16), [[UV25]](s16), [[UV26]](s16), [[UV27]](s16), [[UV28]](s16), [[UV29]](s16), [[UV30]](s16), [[UV31]](s16), [[UV32]](s16), [[UV33]](s16), [[UV34]](s16), [[UV35]](s16), [[UV36]](s16), [[UV37]](s16), [[UV38]](s16), [[UV39]](s16), [[UV40]](s16), [[UV41]](s16), [[UV42]](s16), [[UV43]](s16), [[UV44]](s16), [[UV45]](s16), [[UV46]](s16), [[UV47]](s16), [[UV48]](s16), [[UV49]](s16), [[UV50]](s16), [[UV51]](s16), [[UV52]](s16), [[UV53]](s16), [[UV54]](s16), [[UV55]](s16), [[UV56]](s16), [[UV57]](s16), [[UV58]](s16), [[UV59]](s16), [[UV60]](s16), [[UV61]](s16), [[UV62]](s16), [[DEF1]](s16)
+ ; CHECK-NEXT: [[UV63:%[0-9]+]]:_(<2 x s16>), [[UV64:%[0-9]+]]:_(<2 x s16>), [[UV65:%[0-9]+]]:_(<2 x s16>), [[UV66:%[0-9]+]]:_(<2 x s16>), [[UV67:%[0-9]+]]:_(<2 x s16>), [[UV68:%[0-9]+]]:_(<2 x s16>), [[UV69:%[0-9]+]]:_(<2 x s16>), [[UV70:%[0-9]+]]:_(<2 x s16>), [[UV71:%[0-9]+]]:_(<2 x s16>), [[UV72:%[0-9]+]]:_(<2 x s16>), [[UV73:%[0-9]+]]:_(<2 x s16>), [[UV74:%[0-9]+]]:_(<2 x s16>), [[UV75:%[0-9]+]]:_(<2 x s16>), [[UV76:%[0-9]+]]:_(<2 x s16>), [[UV77:%[0-9]+]]:_(<2 x s16>), [[UV78:%[0-9]+]]:_(<2 x s16>), [[UV79:%[0-9]+]]:_(<2 x s16>), [[UV80:%[0-9]+]]:_(<2 x s16>), [[UV81:%[0-9]+]]:_(<2 x s16>), [[UV82:%[0-9]+]]:_(<2 x s16>), [[UV83:%[0-9]+]]:_(<2 x s16>), [[UV84:%[0-9]+]]:_(<2 x s16>), [[UV85:%[0-9]+]]:_(<2 x s16>), [[UV86:%[0-9]+]]:_(<2 x s16>), [[UV87:%[0-9]+]]:_(<2 x s16>), [[UV88:%[0-9]+]]:_(<2 x s16>), [[UV89:%[0-9]+]]:_(<2 x s16>), [[UV90:%[0-9]+]]:_(<2 x s16>), [[UV91:%[0-9]+]]:_(<2 x s16>), [[UV92:%[0-9]+]]:_(<2 x s16>), [[UV93:%[0-9]+]]:_(<2 x s16>), [[UV94:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<64 x s16>)
; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(p5) = COPY $sp_reg
; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY20]], [[C3]](s32)
- ; CHECK-NEXT: G_STORE [[UV31]](<2 x s16>), [[PTR_ADD1]](p5) :: (store (<2 x s16>) into stack, align 16, addrspace 5)
- ; CHECK-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>)
- ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>)
- ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](<2 x s16>)
- ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](<2 x s16>)
- ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](<2 x s16>)
- ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](<2 x s16>)
- ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](<2 x s16>)
- ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](<2 x s16>)
- ; CHECK-NEXT: $vgpr8 = COPY [[UV8]](<2 x s16>)
- ; CHECK-NEXT: $vgpr9 = COPY [[UV9]](<2 x s16>)
- ; CHECK-NEXT: $vgpr10 = COPY [[UV10]](<2 x s16>)
- ; CHECK-NEXT: $vgpr11 = COPY [[UV11]](<2 x s16>)
- ; CHECK-NEXT: $vgpr12 = COPY [[UV12]](<2 x s16>)
- ; CHECK-NEXT: $vgpr13 = COPY [[UV13]](<2 x s16>)
- ; CHECK-NEXT: $vgpr14 = COPY [[UV14]](<2 x s16>)
- ; CHECK-NEXT: $vgpr15 = COPY [[UV15]](<2 x s16>)
- ; CHECK-NEXT: $vgpr16 = COPY [[UV16]](<2 x s16>)
- ; CHECK-NEXT: $vgpr17 = COPY [[UV17]](<2 x s16>)
- ; CHECK-NEXT: $vgpr18 = COPY [[UV18]](<2 x s16>)
- ; CHECK-NEXT: $vgpr19 = COPY [[UV19]](<2 x s16>)
- ; CHECK-NEXT: $vgpr20 = COPY [[UV20]](<2 x s16>)
- ; CHECK-NEXT: $vgpr21 = COPY [[UV21]](<2 x s16>)
- ; CHECK-NEXT: $vgpr22 = COPY [[UV22]](<2 x s16>)
- ; CHECK-NEXT: $vgpr23 = COPY [[UV23]](<2 x s16>)
- ; CHECK-NEXT: $vgpr24 = COPY [[UV24]](<2 x s16>)
- ; CHECK-NEXT: $vgpr25 = COPY [[UV25]](<2 x s16>)
- ; CHECK-NEXT: $vgpr26 = COPY [[UV26]](<2 x s16>)
- ; CHECK-NEXT: $vgpr27 = COPY [[UV27]](<2 x s16>)
- ; CHECK-NEXT: $vgpr28 = COPY [[UV28]](<2 x s16>)
- ; CHECK-NEXT: $vgpr29 = COPY [[UV29]](<2 x s16>)
- ; CHECK-NEXT: $vgpr30 = COPY [[UV30]](<2 x s16>)
+ ; CHECK-NEXT: G_STORE [[UV94]](<2 x s16>), [[PTR_ADD1]](p5) :: (store (<2 x s16>) into stack, align 16, addrspace 5)
+ ; CHECK-NEXT: $vgpr0 = COPY [[UV63]](<2 x s16>)
+ ; CHECK-NEXT: $vgpr1 = COPY [[UV64]](<2 x s16>)
+ ; CHECK-NEXT: $vgpr2 = COPY [[UV65]](<2 x s16>)
+ ; CHECK-NEXT: $vgpr3 = COPY [[UV66]](<2 x s16>)
+ ; CHECK-NEXT: $vgpr4 = COPY [[UV67]](<2 x s16>)
+ ; CHECK-NEXT: $vgpr5 = COPY [[UV68]](<2 x s16>)
+ ; CHECK-NEXT: $vgpr6 = COPY [[UV69]](<2 x s16>)
+ ; CHECK-NEXT: $vgpr7 = COPY [[UV70]](<2 x s16>)
+ ; CHECK-NEXT: $vgpr8 = COPY [[UV71]](<2 x s16>)
+ ; CHECK-NEXT: $vgpr9 = COPY [[UV72]](<2 x s16>)
+ ; CHECK-NEXT: $vgpr10 = COPY [[UV73]](<2 x s16>)
+ ; CHECK-NEXT: $vgpr11 = COPY [[UV74]](<2 x s16>)
+ ; CHECK-NEXT: $vgpr12 = COPY [[UV75]](<2 x s16>)
+ ; CHECK-NEXT: $vgpr13 = COPY [[UV76]](<2 x s16>)
+ ; CHECK-NEXT: $vgpr14 = COPY [[UV77]](<2 x s16>)
+ ; CHECK-NEXT: $vgpr15 = COPY [[UV78]](<2 x s16>)
+ ; CHECK-NEXT: $vgpr16 = COPY [[UV79]](<2 x s16>)
+ ; CHECK-NEXT: $vgpr17 = COPY [[UV80]](<2 x s16>)
+ ; CHECK-NEXT: $vgpr18 = COPY [[UV81]](<2 x s16>)
+ ; CHECK-NEXT: $vgpr19 = COPY [[UV82]](<2 x s16>)
+ ; CHECK-NEXT: $vgpr20 = COPY [[UV83]](<2 x s16>)
+ ; CHECK-NEXT: $vgpr21 = COPY [[UV84]](<2 x s16>)
+ ; CHECK-NEXT: $vgpr22 = COPY [[UV85]](<2 x s16>)
+ ; CHECK-NEXT: $vgpr23 = COPY [[UV86]](<2 x s16>)
+ ; CHECK-NEXT: $vgpr24 = COPY [[UV87]](<2 x s16>)
+ ; CHECK-NEXT: $vgpr25 = COPY [[UV88]](<2 x s16>)
+ ; CHECK-NEXT: $vgpr26 = COPY [[UV89]](<2 x s16>)
+ ; CHECK-NEXT: $vgpr27 = COPY [[UV90]](<2 x s16>)
+ ; CHECK-NEXT: $vgpr28 = COPY [[UV91]](<2 x s16>)
+ ; CHECK-NEXT: $vgpr29 = COPY [[UV92]](<2 x s16>)
+ ; CHECK-NEXT: $vgpr30 = COPY [[UV93]](<2 x s16>)
; CHECK-NEXT: [[COPY21:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY21]](<4 x s32>)
; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4)
@@ -2555,47 +2560,48 @@ define amdgpu_kernel void @test_call_external_void_func_v65i16() #0 {
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
- ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<65 x s16>) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<130 x s16>) = G_CONCAT_VECTORS [[LOAD]](<65 x s16>), [[DEF1]](<65 x s16>)
- ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>), [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>), [[UV14:%[0-9]+]]:_(<2 x s16>), [[UV15:%[0-9]+]]:_(<2 x s16>), [[UV16:%[0-9]+]]:_(<2 x s16>), [[UV17:%[0-9]+]]:_(<2 x s16>), [[UV18:%[0-9]+]]:_(<2 x s16>), [[UV19:%[0-9]+]]:_(<2 x s16>), [[UV20:%[0-9]+]]:_(<2 x s16>), [[UV21:%[0-9]+]]:_(<2 x s16>), [[UV22:%[0-9]+]]:_(<2 x s16>), [[UV23:%[0-9]+]]:_(<2 x s16>), [[UV24:%[0-9]+]]:_(<2 x s16>), [[UV25:%[0-9]+]]:_(<2 x s16>), [[UV26:%[0-9]+]]:_(<2 x s16>), [[UV27:%[0-9]+]]:_(<2 x s16>), [[UV28:%[0-9]+]]:_(<2 x s16>), [[UV29:%[0-9]+]]:_(<2 x s16>), [[UV30:%[0-9]+]]:_(<2 x s16>), [[UV31:%[0-9]+]]:_(<2 x s16>), [[UV32:%[0-9]+]]:_(<2 x s16>), [[UV33:%[0-9]+]]:_(<2 x s16>), [[UV34:%[0-9]+]]:_(<2 x s16>), [[UV35:%[0-9]+]]:_(<2 x s16>), [[UV36:%[0-9]+]]:_(<2 x s16>), [[UV37:%[0-9]+]]:_(<2 x s16>), [[UV38:%[0-9]+]]:_(<2 x s16>), [[UV39:%[0-9]+]]:_(<2 x s16>), [[UV40:%[0-9]+]]:_(<2 x s16>), [[UV41:%[0-9]+]]:_(<2 x s16>), [[UV42:%[0-9]+]]:_(<2 x s16>), [[UV43:%[0-9]+]]:_(<2 x s16>), [[UV44:%[0-9]+]]:_(<2 x s16>), [[UV45:%[0-9]+]]:_(<2 x s16>), [[UV46:%[0-9]+]]:_(<2 x s16>), [[UV47:%[0-9]+]]:_(<2 x s16>), [[UV48:%[0-9]+]]:_(<2 x s16>), [[UV49:%[0-9]+]]:_(<2 x s16>), [[UV50:%[0-9]+]]:_(<2 x s16>), [[UV51:%[0-9]+]]:_(<2 x s16>), [[UV52:%[0-9]+]]:_(<2 x s16>), [[UV53:%[0-9]+]]:_(<2 x s16>), [[UV54:%[0-9]+]]:_(<2 x s16>), [[UV55:%[0-9]+]]:_(<2 x s16>), [[UV56:%[0-9]+]]:_(<2 x s16>), [[UV57:%[0-9]+]]:_(<2 x s16>), [[UV58:%[0-9]+]]:_(<2 x s16>), [[UV59:%[0-9]+]]:_(<2 x s16>), [[UV60:%[0-9]+]]:_(<2 x s16>), [[UV61:%[0-9]+]]:_(<2 x s16>), [[UV62:%[0-9]+]]:_(<2 x s16>), [[UV63:%[0-9]+]]:_(<2 x s16>), [[UV64:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<130 x s16>)
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16), [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16), [[UV6:%[0-9]+]]:_(s16), [[UV7:%[0-9]+]]:_(s16), [[UV8:%[0-9]+]]:_(s16), [[UV9:%[0-9]+]]:_(s16), [[UV10:%[0-9]+]]:_(s16), [[UV11:%[0-9]+]]:_(s16), [[UV12:%[0-9]+]]:_(s16), [[UV13:%[0-9]+]]:_(s16), [[UV14:%[0-9]+]]:_(s16), [[UV15:%[0-9]+]]:_(s16), [[UV16:%[0-9]+]]:_(s16), [[UV17:%[0-9]+]]:_(s16), [[UV18:%[0-9]+]]:_(s16), [[UV19:%[0-9]+]]:_(s16), [[UV20:%[0-9]+]]:_(s16), [[UV21:%[0-9]+]]:_(s16), [[UV22:%[0-9]+]]:_(s16), [[UV23:%[0-9]+]]:_(s16), [[UV24:%[0-9]+]]:_(s16), [[UV25:%[0-9]+]]:_(s16), [[UV26:%[0-9]+]]:_(s16), [[UV27:%[0-9]+]]:_(s16), [[UV28:%[0-9]+]]:_(s16), [[UV29:%[0-9]+]]:_(s16), [[UV30:%[0-9]+]]:_(s16), [[UV31:%[0-9]+]]:_(s16), [[UV32:%[0-9]+]]:_(s16), [[UV33:%[0-9]+]]:_(s16), [[UV34:%[0-9]+]]:_(s16), [[UV35:%[0-9]+]]:_(s16), [[UV36:%[0-9]+]]:_(s16), [[UV37:%[0-9]+]]:_(s16), [[UV38:%[0-9]+]]:_(s16), [[UV39:%[0-9]+]]:_(s16), [[UV40:%[0-9]+]]:_(s16), [[UV41:%[0-9]+]]:_(s16), [[UV42:%[0-9]+]]:_(s16), [[UV43:%[0-9]+]]:_(s16), [[UV44:%[0-9]+]]:_(s16), [[UV45:%[0-9]+]]:_(s16), [[UV46:%[0-9]+]]:_(s16), [[UV47:%[0-9]+]]:_(s16), [[UV48:%[0-9]+]]:_(s16), [[UV49:%[0-9]+]]:_(s16), [[UV50:%[0-9]+]]:_(s16), [[UV51:%[0-9]+]]:_(s16), [[UV52:%[0-9]+]]:_(s16), [[UV53:%[0-9]+]]:_(s16), [[UV54:%[0-9]+]]:_(s16), [[UV55:%[0-9]+]]:_(s16), [[UV56:%[0-9]+]]:_(s16), [[UV57:%[0-9]+]]:_(s16), [[UV58:%[0-9]+]]:_(s16), [[UV59:%[0-9]+]]:_(s16), [[UV60:%[0-9]+]]:_(s16), [[UV61:%[0-9]+]]:_(s16), [[UV62:%[0-9]+]]:_(s16), [[UV63:%[0-9]+]]:_(s16), [[UV64:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[LOAD]](<65 x s16>)
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<66 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16), [[UV3]](s16), [[UV4]](s16), [[UV5]](s16), [[UV6]](s16), [[UV7]](s16), [[UV8]](s16), [[UV9]](s16), [[UV10]](s16), [[UV11]](s16), [[UV12]](s16), [[UV13]](s16), [[UV14]](s16), [[UV15]](s16), [[UV16]](s16), [[UV17]](s16), [[UV18]](s16), [[UV19]](s16), [[UV20]](s16), [[UV21]](s16), [[UV22]](s16), [[UV23]](s16), [[UV24]](s16), [[UV25]](s16), [[UV26]](s16), [[UV27]](s16), [[UV28]](s16), [[UV29]](s16), [[UV30]](s16), [[UV31]](s16), [[UV32]](s16), [[UV33]](s16), [[UV34]](s16), [[UV35]](s16), [[UV36]](s16), [[UV37]](s16), [[UV38]](s16), [[UV39]](s16), [[UV40]](s16), [[UV41]](s16), [[UV42]](s16), [[UV43]](s16), [[UV44]](s16), [[UV45]](s16), [[UV46]](s16), [[UV47]](s16), [[UV48]](s16), [[UV49]](s16), [[UV50]](s16), [[UV51]](s16), [[UV52]](s16), [[UV53]](s16), [[UV54]](s16), [[UV55]](s16), [[UV56]](s16), [[UV57]](s16), [[UV58]](s16), [[UV59]](s16), [[UV60]](s16), [[UV61]](s16), [[UV62]](s16), [[UV63]](s16), [[UV64]](s16), [[DEF1]](s16)
+ ; CHECK-NEXT: [[UV65:%[0-9]+]]:_(<2 x s16>), [[UV66:%[0-9]+]]:_(<2 x s16>), [[UV67:%[0-9]+]]:_(<2 x s16>), [[UV68:%[0-9]+]]:_(<2 x s16>), [[UV69:%[0-9]+]]:_(<2 x s16>), [[UV70:%[0-9]+]]:_(<2 x s16>), [[UV71:%[0-9]+]]:_(<2 x s16>), [[UV72:%[0-9]+]]:_(<2 x s16>), [[UV73:%[0-9]+]]:_(<2 x s16>), [[UV74:%[0-9]+]]:_(<2 x s16>), [[UV75:%[0-9]+]]:_(<2 x s16>), [[UV76:%[0-9]+]]:_(<2 x s16>), [[UV77:%[0-9]+]]:_(<2 x s16>), [[UV78:%[0-9]+]]:_(<2 x s16>), [[UV79:%[0-9]+]]:_(<2 x s16>), [[UV80:%[0-9]+]]:_(<2 x s16>), [[UV81:%[0-9]+]]:_(<2 x s16>), [[UV82:%[0-9]+]]:_(<2 x s16>), [[UV83:%[0-9]+]]:_(<2 x s16>), [[UV84:%[0-9]+]]:_(<2 x s16>), [[UV85:%[0-9]+]]:_(<2 x s16>), [[UV86:%[0-9]+]]:_(<2 x s16>), [[UV87:%[0-9]+]]:_(<2 x s16>), [[UV88:%[0-9]+]]:_(<2 x s16>), [[UV89:%[0-9]+]]:_(<2 x s16>), [[UV90:%[0-9]+]]:_(<2 x s16>), [[UV91:%[0-9]+]]:_(<2 x s16>), [[UV92:%[0-9]+]]:_(<2 x s16>), [[UV93:%[0-9]+]]:_(<2 x s16>), [[UV94:%[0-9]+]]:_(<2 x s16>), [[UV95:%[0-9]+]]:_(<2 x s16>), [[UV96:%[0-9]+]]:_(<2 x s16>), [[UV97:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<66 x s16>)
; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(p5) = COPY $sp_reg
; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY20]], [[C3]](s32)
- ; CHECK-NEXT: G_STORE [[UV31]](<2 x s16>), [[PTR_ADD1]](p5) :: (store (<2 x s16>) into stack, align 16, addrspace 5)
+ ; CHECK-NEXT: G_STORE [[UV96]](<2 x s16>), [[PTR_ADD1]](p5) :: (store (<2 x s16>) into stack, align 16, addrspace 5)
; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY20]], [[C4]](s32)
- ; CHECK-NEXT: G_STORE [[UV32]](<2 x s16>), [[PTR_ADD2]](p5) :: (store (<2 x s16>) into stack + 4, addrspace 5)
- ; CHECK-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>)
- ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>)
- ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](<2 x s16>)
- ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](<2 x s16>)
- ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](<2 x s16>)
- ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](<2 x s16>)
- ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](<2 x s16>)
- ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](<2 x s16>)
- ; CHECK-NEXT: $vgpr8 = COPY [[UV8]](<2 x s16>)
- ; CHECK-NEXT: $vgpr9 = COPY [[UV9]](<2 x s16>)
- ; CHECK-NEXT: $vgpr10 = COPY [[UV10]](<2 x s16>)
- ; CHECK-NEXT: $vgpr11 = COPY [[UV11]](<2 x s16>)
- ; CHECK-NEXT: $vgpr12 = COPY [[UV12]](<2 x s16>)
- ; CHECK-NEXT: $vgpr13 = COPY [[UV13]](<2 x s16>)
- ; CHECK-NEXT: $vgpr14 = COPY [[UV14]](<2 x s16>)
- ; CHECK-NEXT: $vgpr15 = COPY [[UV15]](<2 x s16>)
- ; CHECK-NEXT: $vgpr16 = COPY [[UV16]](<2 x s16>)
- ; CHECK-NEXT: $vgpr17 = COPY [[UV17]](<2 x s16>)
- ; CHECK-NEXT: $vgpr18 = COPY [[UV18]](<2 x s16>)
- ; CHECK-NEXT: $vgpr19 = COPY [[UV19]](<2 x s16>)
- ; CHECK-NEXT: $vgpr20 = COPY [[UV20]](<2 x s16>)
- ; CHECK-NEXT: $vgpr21 = COPY [[UV21]](<2 x s16>)
- ; CHECK-NEXT: $vgpr22 = COPY [[UV22]](<2 x s16>)
- ; CHECK-NEXT: $vgpr23 = COPY [[UV23]](<2 x s16>)
- ; CHECK-NEXT: $vgpr24 = COPY [[UV24]](<2 x s16>)
- ; CHECK-NEXT: $vgpr25 = COPY [[UV25]](<2 x s16>)
- ; CHECK-NEXT: $vgpr26 = COPY [[UV26]](<2 x s16>)
- ; CHECK-NEXT: $vgpr27 = COPY [[UV27]](<2 x s16>)
- ; CHECK-NEXT: $vgpr28 = COPY [[UV28]](<2 x s16>)
- ; CHECK-NEXT: $vgpr29 = COPY [[UV29]](<2 x s16>)
- ; CHECK-NEXT: $vgpr30 = COPY [[UV30]](<2 x s16>)
+ ; CHECK-NEXT: G_STORE [[UV97]](<2 x s16>), [[PTR_ADD2]](p5) :: (store (<2 x s16>) into stack + 4, addrspace 5)
+ ; CHECK-NEXT: $vgpr0 = COPY [[UV65]](<2 x s16>)
+ ; CHECK-NEXT: $vgpr1 = COPY [[UV66]](<2 x s16>)
+ ; CHECK-NEXT: $vgpr2 = COPY [[UV67]](<2 x s16>)
+ ; CHECK-NEXT: $vgpr3 = COPY [[UV68]](<2 x s16>)
+ ; CHECK-NEXT: $vgpr4 = COPY [[UV69]](<2 x s16>)
+ ; CHECK-NEXT: $vgpr5 = COPY [[UV70]](<2 x s16>)
+ ; CHECK-NEXT: $vgpr6 = COPY [[UV71]](<2 x s16>)
+ ; CHECK-NEXT: $vgpr7 = COPY [[UV72]](<2 x s16>)
+ ; CHECK-NEXT: $vgpr8 = COPY [[UV73]](<2 x s16>)
+ ; CHECK-NEXT: $vgpr9 = COPY [[UV74]](<2 x s16>)
+ ; CHECK-NEXT: $vgpr10 = COPY [[UV75]](<2 x s16>)
+ ; CHECK-NEXT: $vgpr11 = COPY [[UV76]](<2 x s16>)
+ ; CHECK-NEXT: $vgpr12 = COPY [[UV77]](<2 x s16>)
+ ; CHECK-NEXT: $vgpr13 = COPY [[UV78]](<2 x s16>)
+ ; CHECK-NEXT: $vgpr14 = COPY [[UV79]](<2 x s16>)
+ ; CHECK-NEXT: $vgpr15 = COPY [[UV80]](<2 x s16>)
+ ; CHECK-NEXT: $vgpr16 = COPY [[UV81]](<2 x s16>)
+ ; CHECK-NEXT: $vgpr17 = COPY [[UV82]](<2 x s16>)
+ ; CHECK-NEXT: $vgpr18 = COPY [[UV83]](<2 x s16>)
+ ; CHECK-NEXT: $vgpr19 = COPY [[UV84]](<2 x s16>)
+ ; CHECK-NEXT: $vgpr20 = COPY [[UV85]](<2 x s16>)
+ ; CHECK-NEXT: $vgpr21 = COPY [[UV86]](<2 x s16>)
+ ; CHECK-NEXT: $vgpr22 = COPY [[UV87]](<2 x s16>)
+ ; CHECK-NEXT: $vgpr23 = COPY [[UV88]](<2 x s16>)
+ ; CHECK-NEXT: $vgpr24 = COPY [[UV89]](<2 x s16>)
+ ; CHECK-NEXT: $vgpr25 = COPY [[UV90]](<2 x s16>)
+ ; CHECK-NEXT: $vgpr26 = COPY [[UV91]](<2 x s16>)
+ ; CHECK-NEXT: $vgpr27 = COPY [[UV92]](<2 x s16>)
+ ; CHECK-NEXT: $vgpr28 = COPY [[UV93]](<2 x s16>)
+ ; CHECK-NEXT: $vgpr29 = COPY [[UV94]](<2 x s16>)
+ ; CHECK-NEXT: $vgpr30 = COPY [[UV95]](<2 x s16>)
; CHECK-NEXT: [[COPY21:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY21]](<4 x s32>)
; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-function-args.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-function-args.ll
index 70a41f9dafaaa..f29e6456d090f 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-function-args.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-function-args.ll
@@ -1230,12 +1230,12 @@ define void @void_func_v3i16(<3 x i16> %arg0) #0 {
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
- ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[DEF]](<2 x s16>)
- ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>)
+ ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>)
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<4 x s16>)
+ ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16)
; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
- ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
- ; CHECK-NEXT: G_STORE [[UV]](<3 x s16>), [[DEF1]](p1) :: (store (<3 x s16>) into `<3 x i16> addrspace(1)* undef`, align 8, addrspace 1)
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<3 x s16>), [[DEF]](p1) :: (store (<3 x s16>) into `<3 x i16> addrspace(1)* undef`, align 8, addrspace 1)
; CHECK-NEXT: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]]
; CHECK-NEXT: S_SETPC_B64_return [[COPY3]]
store <3 x i16> %arg0, <3 x i16> addrspace(1)* undef
@@ -1267,12 +1267,12 @@ define void @void_func_v5i16(<5 x i16> %arg0) #0 {
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2
- ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<10 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[COPY2]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>)
- ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<5 x s16>), [[UV1:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<10 x s16>)
+ ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[COPY2]](<2 x s16>)
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16), [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>)
+ ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16), [[UV3]](s16), [[UV4]](s16)
; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
- ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
- ; CHECK-NEXT: G_STORE [[UV]](<5 x s16>), [[DEF1]](p1) :: (store (<5 x s16>) into `<5 x i16> addrspace(1)* undef`, align 16, addrspace 1)
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<5 x s16>), [[DEF]](p1) :: (store (<5 x s16>) into `<5 x i16> addrspace(1)* undef`, align 16, addrspace 1)
; CHECK-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]]
; CHECK-NEXT: S_SETPC_B64_return [[COPY4]]
store <5 x i16> %arg0, <5 x i16> addrspace(1)* undef
@@ -1363,12 +1363,12 @@ define void @void_func_v65i16(<65 x i16> %arg0) #0 {
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (<2 x s16>) from %fixed-stack.1, align 16, addrspace 5)
; CHECK-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0
; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (<2 x s16>) from %fixed-stack.0, addrspace 5)
- ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<130 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>), [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>), [[COPY6]](<2 x s16>), [[COPY7]](<2 x s16>), [[COPY8]](<2 x s16>), [[COPY9]](<2 x s16>), [[COPY10]](<2 x s16>), [[COPY11]](<2 x s16>), [[COPY12]](<2 x s16>), [[COPY13]](<2 x s16>), [[COPY14]](<2 x s16>), [[COPY15]](<2 x s16>), [[COPY16]](<2 x s16>), [[COPY17]](<2 x s16>), [[COPY18]](<2 x s16>), [[COPY19]](<2 x s16>), [[COPY20]](<2 x s16>), [[COPY21]](<2 x s16>), [[COPY22]](<2 x s16>), [[COPY23]](<2 x s16>), [[COPY24]](<2 x s16>), [[COPY25]](<2 x s16>), [[COPY26]](<2 x s16>), [[COPY27]](<2 x s16>), [[COPY28]](<2 x s16>), [[COPY29]](<2 x s16>), [[COPY30]](<2 x s16>), [[LOAD]](<2 x s16>), [[LOAD1]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>)
- ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<65 x s16>), [[UV1:%[0-9]+]]:_(<65 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<130 x s16>)
+ ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<66 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>), [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>), [[COPY6]](<2 x s16>), [[COPY7]](<2 x s16>), [[COPY8]](<2 x s16>), [[COPY9]](<2 x s16>), [[COPY10]](<2 x s16>), [[COPY11]](<2 x s16>), [[COPY12]](<2 x s16>), [[COPY13]](<2 x s16>), [[COPY14]](<2 x s16>), [[COPY15]](<2 x s16>), [[COPY16]](<2 x s16>), [[COPY17]](<2 x s16>), [[COPY18]](<2 x s16>), [[COPY19]](<2 x s16>), [[COPY20]](<2 x s16>), [[COPY21]](<2 x s16>), [[COPY22]](<2 x s16>), [[COPY23]](<2 x s16>), [[COPY24]](<2 x s16>), [[COPY25]](<2 x s16>), [[COPY26]](<2 x s16>), [[COPY27]](<2 x s16>), [[COPY28]](<2 x s16>), [[COPY29]](<2 x s16>), [[COPY30]](<2 x s16>), [[LOAD]](<2 x s16>), [[LOAD1]](<2 x s16>)
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16), [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16), [[UV6:%[0-9]+]]:_(s16), [[UV7:%[0-9]+]]:_(s16), [[UV8:%[0-9]+]]:_(s16), [[UV9:%[0-9]+]]:_(s16), [[UV10:%[0-9]+]]:_(s16), [[UV11:%[0-9]+]]:_(s16), [[UV12:%[0-9]+]]:_(s16), [[UV13:%[0-9]+]]:_(s16), [[UV14:%[0-9]+]]:_(s16), [[UV15:%[0-9]+]]:_(s16), [[UV16:%[0-9]+]]:_(s16), [[UV17:%[0-9]+]]:_(s16), [[UV18:%[0-9]+]]:_(s16), [[UV19:%[0-9]+]]:_(s16), [[UV20:%[0-9]+]]:_(s16), [[UV21:%[0-9]+]]:_(s16), [[UV22:%[0-9]+]]:_(s16), [[UV23:%[0-9]+]]:_(s16), [[UV24:%[0-9]+]]:_(s16), [[UV25:%[0-9]+]]:_(s16), [[UV26:%[0-9]+]]:_(s16), [[UV27:%[0-9]+]]:_(s16), [[UV28:%[0-9]+]]:_(s16), [[UV29:%[0-9]+]]:_(s16), [[UV30:%[0-9]+]]:_(s16), [[UV31:%[0-9]+]]:_(s16), [[UV32:%[0-9]+]]:_(s16), [[UV33:%[0-9]+]]:_(s16), [[UV34:%[0-9]+]]:_(s16), [[UV35:%[0-9]+]]:_(s16), [[UV36:%[0-9]+]]:_(s16), [[UV37:%[0-9]+]]:_(s16), [[UV38:%[0-9]+]]:_(s16), [[UV39:%[0-9]+]]:_(s16), [[UV40:%[0-9]+]]:_(s16), [[UV41:%[0-9]+]]:_(s16), [[UV42:%[0-9]+]]:_(s16), [[UV43:%[0-9]+]]:_(s16), [[UV44:%[0-9]+]]:_(s16), [[UV45:%[0-9]+]]:_(s16), [[UV46:%[0-9]+]]:_(s16), [[UV47:%[0-9]+]]:_(s16), [[UV48:%[0-9]+]]:_(s16), [[UV49:%[0-9]+]]:_(s16), [[UV50:%[0-9]+]]:_(s16), [[UV51:%[0-9]+]]:_(s16), [[UV52:%[0-9]+]]:_(s16), [[UV53:%[0-9]+]]:_(s16), [[UV54:%[0-9]+]]:_(s16), [[UV55:%[0-9]+]]:_(s16), [[UV56:%[0-9]+]]:_(s16), [[UV57:%[0-9]+]]:_(s16), [[UV58:%[0-9]+]]:_(s16), [[UV59:%[0-9]+]]:_(s16), [[UV60:%[0-9]+]]:_(s16), [[UV61:%[0-9]+]]:_(s16), [[UV62:%[0-9]+]]:_(s16), [[UV63:%[0-9]+]]:_(s16), [[UV64:%[0-9]+]]:_(s16), [[UV65:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<66 x s16>)
+ ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<65 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16), [[UV3]](s16), [[UV4]](s16), [[UV5]](s16), [[UV6]](s16), [[UV7]](s16), [[UV8]](s16), [[UV9]](s16), [[UV10]](s16), [[UV11]](s16), [[UV12]](s16), [[UV13]](s16), [[UV14]](s16), [[UV15]](s16), [[UV16]](s16), [[UV17]](s16), [[UV18]](s16), [[UV19]](s16), [[UV20]](s16), [[UV21]](s16), [[UV22]](s16), [[UV23]](s16), [[UV24]](s16), [[UV25]](s16), [[UV26]](s16), [[UV27]](s16), [[UV28]](s16), [[UV29]](s16), [[UV30]](s16), [[UV31]](s16), [[UV32]](s16), [[UV33]](s16), [[UV34]](s16), [[UV35]](s16), [[UV36]](s16), [[UV37]](s16), [[UV38]](s16), [[UV39]](s16), [[UV40]](s16), [[UV41]](s16), [[UV42]](s16), [[UV43]](s16), [[UV44]](s16), [[UV45]](s16), [[UV46]](s16), [[UV47]](s16), [[UV48]](s16), [[UV49]](s16), [[UV50]](s16), [[UV51]](s16), [[UV52]](s16), [[UV53]](s16), [[UV54]](s16), [[UV55]](s16), [[UV56]](s16), [[UV57]](s16), [[UV58]](s16), [[UV59]](s16), [[UV60]](s16), [[UV61]](s16), [[UV62]](s16), [[UV63]](s16), [[UV64]](s16)
; CHECK-NEXT: [[COPY31:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
- ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
- ; CHECK-NEXT: G_STORE [[UV]](<65 x s16>), [[DEF1]](p1) :: (store (<65 x s16>) into `<65 x i16> addrspace(1)* undef`, align 256, addrspace 1)
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<65 x s16>), [[DEF]](p1) :: (store (<65 x s16>) into `<65 x i16> addrspace(1)* undef`, align 256, addrspace 1)
; CHECK-NEXT: [[COPY32:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY31]]
; CHECK-NEXT: S_SETPC_B64_return [[COPY32]]
store <65 x i16> %arg0, <65 x i16> addrspace(1)* undef
@@ -1680,12 +1680,12 @@ define void @void_func_v3f16(<3 x half> %arg0) #0 {
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
- ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[DEF]](<2 x s16>)
- ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>)
+ ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>)
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<4 x s16>)
+ ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16)
; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
- ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
- ; CHECK-NEXT: G_STORE [[UV]](<3 x s16>), [[DEF1]](p1) :: (store (<3 x s16>) into `<3 x half> addrspace(1)* undef`, align 8, addrspace 1)
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<3 x s16>), [[DEF]](p1) :: (store (<3 x s16>) into `<3 x half> addrspace(1)* undef`, align 8, addrspace 1)
; CHECK-NEXT: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]]
; CHECK-NEXT: S_SETPC_B64_return [[COPY3]]
store <3 x half> %arg0, <3 x half> addrspace(1)* undef
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sibling-call.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sibling-call.ll
index d6da4fca21980..7a0176d6233fe 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sibling-call.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sibling-call.ll
@@ -1383,9 +1383,9 @@ define hidden fastcc <3 x i16> @sibling_call_v3i16_fastcc_v3i16(<3 x i16> %a) #1
; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
; GCN-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
; GCN-NEXT: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
- ; GCN-NEXT: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
- ; GCN-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY8]](<2 x s16>), [[COPY9]](<2 x s16>), [[DEF]](<2 x s16>)
- ; GCN-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>)
+ ; GCN-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY8]](<2 x s16>), [[COPY9]](<2 x s16>)
+ ; GCN-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<4 x s16>)
+ ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16)
; GCN-NEXT: [[COPY10:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
; GCN-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @v3i16_fastcc_v3i16
; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
@@ -1396,11 +1396,12 @@ define hidden fastcc <3 x i16> @sibling_call_v3i16_fastcc_v3i16(<3 x i16> %a) #1
; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY2]]
; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY1]]
; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
- ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF
- ; GCN-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV]](<3 x s16>), [[DEF1]](<3 x s16>)
- ; GCN-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<6 x s16>)
- ; GCN-NEXT: $vgpr0 = COPY [[UV2]](<2 x s16>)
- ; GCN-NEXT: $vgpr1 = COPY [[UV3]](<2 x s16>)
+ ; GCN-NEXT: [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16), [[UV6:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<3 x s16>)
+ ; GCN-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+ ; GCN-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[UV4]](s16), [[UV5]](s16), [[UV6]](s16), [[DEF]](s16)
+ ; GCN-NEXT: [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s16>)
+ ; GCN-NEXT: $vgpr0 = COPY [[UV7]](<2 x s16>)
+ ; GCN-NEXT: $vgpr1 = COPY [[UV8]](<2 x s16>)
; GCN-NEXT: [[COPY19:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY19]](<4 x s32>)
; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY11]](p4)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-and.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-and.mir
index 6d67456bf6f54..9acec3f568762 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-and.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-and.mir
@@ -1,5 +1,5 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer -allow-ginsert-as-artifact=0 -global-isel-abort=0 %s -o - | FileCheck %s
+# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer %s -o - | FileCheck %s
---
name: test_and_s32
@@ -302,18 +302,14 @@ body: |
; CHECK: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>)
- ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32)
- ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV2]](s32), [[DEF]](s32)
- ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF
; CHECK-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>)
- ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV3]](s32), [[UV4]](s32)
- ; CHECK-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV5]](s32), [[DEF]](s32)
- ; CHECK-NEXT: [[AND:%[0-9]+]]:_(<2 x s32>) = G_AND [[BUILD_VECTOR]], [[BUILD_VECTOR2]]
- ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(<2 x s32>) = G_AND [[BUILD_VECTOR1]], [[BUILD_VECTOR3]]
- ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s32>) = G_CONCAT_VECTORS [[AND]](<2 x s32>), [[AND1]](<2 x s32>), [[DEF1]](<2 x s32>)
- ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(<3 x s32>), [[UV7:%[0-9]+]]:_(<3 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s32>)
- ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[UV6]](<3 x s32>)
+ ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV3]](s32), [[UV4]](s32)
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:_(<2 x s32>) = G_AND [[BUILD_VECTOR]], [[BUILD_VECTOR1]]
+ ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[UV5]]
+ ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND]](<2 x s32>)
+ ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV6]](s32), [[UV7]](s32), [[AND1]](s32)
+ ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR2]](<3 x s32>)
%0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2
%1:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5
%2:_(<3 x s32>) = G_AND %0, %1
@@ -350,23 +346,20 @@ body: |
; CHECK: [[DEF:%[0-9]+]]:_(<5 x s32>) = G_IMPLICIT_DEF
; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<5 x s32>) = G_IMPLICIT_DEF
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<5 x s32>)
- ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32)
; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV2]](s32), [[UV3]](s32)
- ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV4]](s32), [[DEF2]](s32)
- ; CHECK-NEXT: [[DEF3:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF
; CHECK-NEXT: [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](<5 x s32>)
- ; CHECK-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV5]](s32), [[UV6]](s32)
- ; CHECK-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV7]](s32), [[UV8]](s32)
- ; CHECK-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV9]](s32), [[DEF2]](s32)
- ; CHECK-NEXT: [[AND:%[0-9]+]]:_(<2 x s32>) = G_AND [[BUILD_VECTOR]], [[BUILD_VECTOR3]]
- ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(<2 x s32>) = G_AND [[BUILD_VECTOR1]], [[BUILD_VECTOR4]]
- ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(<2 x s32>) = G_AND [[BUILD_VECTOR2]], [[BUILD_VECTOR5]]
- ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<10 x s32>) = G_CONCAT_VECTORS [[AND]](<2 x s32>), [[AND1]](<2 x s32>), [[AND2]](<2 x s32>), [[DEF3]](<2 x s32>), [[DEF3]](<2 x s32>)
- ; CHECK-NEXT: [[UV10:%[0-9]+]]:_(<5 x s32>), [[UV11:%[0-9]+]]:_(<5 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<10 x s32>)
- ; CHECK-NEXT: [[DEF4:%[0-9]+]]:_(<8 x s32>) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<8 x s32>) = G_INSERT [[DEF4]], [[UV10]](<5 x s32>), 0
- ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<8 x s32>)
+ ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV5]](s32), [[UV6]](s32)
+ ; CHECK-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV7]](s32), [[UV8]](s32)
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:_(<2 x s32>) = G_AND [[BUILD_VECTOR]], [[BUILD_VECTOR2]]
+ ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(<2 x s32>) = G_AND [[BUILD_VECTOR1]], [[BUILD_VECTOR3]]
+ ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV4]], [[UV9]]
+ ; CHECK-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND]](<2 x s32>)
+ ; CHECK-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND1]](<2 x s32>)
+ ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(<8 x s32>) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF2]](<8 x s32>)
+ ; CHECK-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[UV10]](s32), [[UV11]](s32), [[UV12]](s32), [[UV13]](s32), [[AND2]](s32), [[UV19]](s32), [[UV20]](s32), [[UV21]](s32)
+ ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR4]](<8 x s32>)
%0:_(<5 x s32>) = G_IMPLICIT_DEF
%1:_(<5 x s32>) = G_IMPLICIT_DEF
%2:_(<5 x s32>) = G_AND %0, %1
@@ -421,40 +414,65 @@ body: |
; CHECK-LABEL: name: test_and_v3s16
; CHECK: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5
- ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>)
- ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>)
- ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV]](<3 x s16>), 0
- ; CHECK-NEXT: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV2]](<3 x s16>), 0
- ; CHECK-NEXT: [[AND:%[0-9]+]]:_(<4 x s16>) = G_AND [[INSERT]], [[INSERT1]]
- ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[AND]](<4 x s16>)
- ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>)
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>)
+ ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
- ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>)
- ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>)
- ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>)
- ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
- ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>)
+ ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
- ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]]
- ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]]
- ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32)
- ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL]]
- ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
- ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]]
- ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]]
- ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C]](s32)
- ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND3]], [[SHL1]]
- ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
- ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]]
- ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]]
- ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C]](s32)
- ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND5]], [[SHL2]]
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]]
+ ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]]
+ ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+ ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+ ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]]
+ ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
+ ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY2]], [[C]](s32)
+ ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+ ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+ ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>)
+ ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>)
+ ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
+ ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32)
+ ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>)
+ ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]]
+ ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]]
+ ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C]](s32)
+ ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND3]], [[SHL2]]
; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
- ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>)
- ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
+ ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]]
+ ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32)
+ ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND5]], [[SHL3]]
+ ; CHECK-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
+ ; CHECK-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>)
+ ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(<4 x s16>) = G_AND [[CONCAT_VECTORS]], [[CONCAT_VECTORS1]]
+ ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[AND6]](<4 x s16>)
+ ; CHECK-NEXT: [[BITCAST8:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>)
+ ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST8]], [[C]](s32)
+ ; CHECK-NEXT: [[BITCAST9:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>)
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
+ ; CHECK-NEXT: [[BITCAST10:%[0-9]+]]:_(s32) = G_BITCAST [[UV8]](<2 x s16>)
+ ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST10]], [[C]](s32)
+ ; CHECK-NEXT: [[BITCAST11:%[0-9]+]]:_(s32) = G_BITCAST [[UV9]](<2 x s16>)
+ ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST8]], [[C1]]
+ ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]]
+ ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C]](s32)
+ ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL4]]
+ ; CHECK-NEXT: [[BITCAST12:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32)
+ ; CHECK-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[BITCAST9]], [[C1]]
+ ; CHECK-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[BITCAST10]], [[C1]]
+ ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C]](s32)
+ ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND9]], [[SHL5]]
+ ; CHECK-NEXT: [[BITCAST13:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32)
+ ; CHECK-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C1]]
+ ; CHECK-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[BITCAST11]], [[C1]]
+ ; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND12]], [[C]](s32)
+ ; CHECK-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND11]], [[SHL6]]
+ ; CHECK-NEXT: [[BITCAST14:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR6]](s32)
+ ; CHECK-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST12]](<2 x s16>), [[BITCAST13]](<2 x s16>), [[BITCAST14]](<2 x s16>)
+ ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS2]](<6 x s16>)
%0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2
%1:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5
%2:_(<3 x s16>), %3:_(<3 x s16>) = G_UNMERGE_VALUES %0
@@ -490,54 +508,102 @@ body: |
; CHECK-LABEL: name: test_and_v5s16
; CHECK: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<30 x s16>) = G_CONCAT_VECTORS [[DEF]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>)
- ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<5 x s16>), [[UV1:%[0-9]+]]:_(<5 x s16>), [[UV2:%[0-9]+]]:_(<5 x s16>), [[UV3:%[0-9]+]]:_(<5 x s16>), [[UV4:%[0-9]+]]:_(<5 x s16>), [[UV5:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<30 x s16>)
- ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<30 x s16>) = G_CONCAT_VECTORS [[DEF2]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>)
- ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(<5 x s16>), [[UV7:%[0-9]+]]:_(<5 x s16>), [[UV8:%[0-9]+]]:_(<5 x s16>), [[UV9:%[0-9]+]]:_(<5 x s16>), [[UV10:%[0-9]+]]:_(<5 x s16>), [[UV11:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<30 x s16>)
- ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[DEF1]], [[UV]](<5 x s16>), 0
- ; CHECK-NEXT: [[INSERT1:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[DEF1]], [[UV6]](<5 x s16>), 0
- ; CHECK-NEXT: [[UV12:%[0-9]+]]:_(<3 x s16>), [[UV13:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[INSERT]](<6 x s16>)
- ; CHECK-NEXT: [[UV14:%[0-9]+]]:_(<3 x s16>), [[UV15:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<6 x s16>)
- ; CHECK-NEXT: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[UV12]](<3 x s16>), 0
- ; CHECK-NEXT: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[UV14]](<3 x s16>), 0
- ; CHECK-NEXT: [[AND:%[0-9]+]]:_(<4 x s16>) = G_AND [[INSERT2]], [[INSERT3]]
- ; CHECK-NEXT: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[UV13]](<3 x s16>), 0
- ; CHECK-NEXT: [[INSERT5:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[UV15]](<3 x s16>), 0
- ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(<4 x s16>) = G_AND [[INSERT4]], [[INSERT5]]
- ; CHECK-NEXT: [[UV16:%[0-9]+]]:_(<2 x s16>), [[UV17:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[AND]](<4 x s16>)
- ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV16]](<2 x s16>)
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>)
+ ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
- ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV17]](<2 x s16>)
- ; CHECK-NEXT: [[UV18:%[0-9]+]]:_(<2 x s16>), [[UV19:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[AND1]](<4 x s16>)
- ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV18]](<2 x s16>)
- ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
- ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV19]](<2 x s16>)
+ ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+ ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
+ ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<6 x s16>)
+ ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
+ ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32)
+ ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>)
+ ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32)
+ ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>)
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
- ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]]
- ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]]
- ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32)
- ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL]]
- ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
- ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]]
- ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]]
- ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
- ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL1]]
- ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
- ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]]
- ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]]
- ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C]](s32)
- ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL2]]
- ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
- ; CHECK-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>)
- ; CHECK-NEXT: [[CONCAT_VECTORS3:%[0-9]+]]:_(<30 x s16>) = G_CONCAT_VECTORS [[CONCAT_VECTORS2]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>)
- ; CHECK-NEXT: [[UV20:%[0-9]+]]:_(<5 x s16>), [[UV21:%[0-9]+]]:_(<5 x s16>), [[UV22:%[0-9]+]]:_(<5 x s16>), [[UV23:%[0-9]+]]:_(<5 x s16>), [[UV24:%[0-9]+]]:_(<5 x s16>), [[UV25:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<30 x s16>)
- ; CHECK-NEXT: [[DEF4:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[INSERT6:%[0-9]+]]:_(<8 x s16>) = G_INSERT [[DEF4]], [[UV20]](<5 x s16>), 0
- ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT6]](<8 x s16>)
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]]
+ ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]]
+ ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+ ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+ ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]]
+ ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
+ ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C]](s32)
+ ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+ ; CHECK-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+ ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>)
+ ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]]
+ ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]]
+ ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C]](s32)
+ ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND3]], [[SHL2]]
+ ; CHECK-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+ ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]]
+ ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32)
+ ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND5]], [[SHL3]]
+ ; CHECK-NEXT: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
+ ; CHECK-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST8]](<2 x s16>), [[BITCAST9]](<2 x s16>)
+ ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(<4 x s16>) = G_AND [[CONCAT_VECTORS]], [[CONCAT_VECTORS1]]
+ ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[AND6]](<4 x s16>)
+ ; CHECK-NEXT: [[BITCAST10:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>)
+ ; CHECK-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST10]], [[C]](s32)
+ ; CHECK-NEXT: [[BITCAST11:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>)
+ ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]]
+ ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]]
+ ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C]](s32)
+ ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL4]]
+ ; CHECK-NEXT: [[BITCAST12:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32)
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
+ ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[COPY2]], [[C]](s32)
+ ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[COPY1]], [[SHL5]]
+ ; CHECK-NEXT: [[BITCAST13:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32)
+ ; CHECK-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST12]](<2 x s16>), [[BITCAST13]](<2 x s16>)
+ ; CHECK-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C1]]
+ ; CHECK-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]]
+ ; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C]](s32)
+ ; CHECK-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND9]], [[SHL6]]
+ ; CHECK-NEXT: [[BITCAST14:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR6]](s32)
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
+ ; CHECK-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[COPY4]], [[C]](s32)
+ ; CHECK-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[COPY3]], [[SHL7]]
+ ; CHECK-NEXT: [[BITCAST15:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR7]](s32)
+ ; CHECK-NEXT: [[CONCAT_VECTORS3:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST14]](<2 x s16>), [[BITCAST15]](<2 x s16>)
+ ; CHECK-NEXT: [[AND11:%[0-9]+]]:_(<4 x s16>) = G_AND [[CONCAT_VECTORS2]], [[CONCAT_VECTORS3]]
+ ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[AND11]](<4 x s16>)
+ ; CHECK-NEXT: [[BITCAST16:%[0-9]+]]:_(s32) = G_BITCAST [[UV8]](<2 x s16>)
+ ; CHECK-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST16]], [[C]](s32)
+ ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>), [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<8 x s16>)
+ ; CHECK-NEXT: [[BITCAST17:%[0-9]+]]:_(s32) = G_BITCAST [[UV12]](<2 x s16>)
+ ; CHECK-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST17]], [[C]](s32)
+ ; CHECK-NEXT: [[BITCAST18:%[0-9]+]]:_(s32) = G_BITCAST [[UV13]](<2 x s16>)
+ ; CHECK-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST18]], [[C]](s32)
+ ; CHECK-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[BITCAST10]], [[C1]]
+ ; CHECK-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C1]]
+ ; CHECK-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C]](s32)
+ ; CHECK-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL8]]
+ ; CHECK-NEXT: [[BITCAST19:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR8]](s32)
+ ; CHECK-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[BITCAST11]], [[C1]]
+ ; CHECK-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[BITCAST16]], [[C1]]
+ ; CHECK-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C]](s32)
+ ; CHECK-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND14]], [[SHL9]]
+ ; CHECK-NEXT: [[BITCAST20:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR9]](s32)
+ ; CHECK-NEXT: [[AND16:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C1]]
+ ; CHECK-NEXT: [[AND17:%[0-9]+]]:_(s32) = G_AND [[LSHR6]], [[C1]]
+ ; CHECK-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND17]], [[C]](s32)
+ ; CHECK-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[AND16]], [[SHL10]]
+ ; CHECK-NEXT: [[BITCAST21:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR10]](s32)
+ ; CHECK-NEXT: [[AND18:%[0-9]+]]:_(s32) = G_AND [[BITCAST18]], [[C1]]
+ ; CHECK-NEXT: [[AND19:%[0-9]+]]:_(s32) = G_AND [[LSHR7]], [[C1]]
+ ; CHECK-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND19]], [[C]](s32)
+ ; CHECK-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[AND18]], [[SHL11]]
+ ; CHECK-NEXT: [[BITCAST22:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR11]](s32)
+ ; CHECK-NEXT: [[CONCAT_VECTORS4:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BITCAST19]](<2 x s16>), [[BITCAST20]](<2 x s16>), [[BITCAST21]](<2 x s16>), [[BITCAST22]](<2 x s16>)
+ ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS4]](<8 x s16>)
%0:_(<5 x s16>) = G_IMPLICIT_DEF
%1:_(<5 x s16>) = G_IMPLICIT_DEF
%2:_(<5 x s16>) = G_AND %0, %1
@@ -552,34 +618,15 @@ body: |
bb.0:
; CHECK-LABEL: name: test_and_v3s8
- ; CHECK: [[DEF:%[0-9]+]]:_(<3 x s8>) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<3 x s8>) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(<4 x s8>) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[DEF2]], [[DEF]](<3 x s8>), 0
- ; CHECK-NEXT: [[INSERT1:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[DEF2]], [[DEF1]](<3 x s8>), 0
- ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[INSERT]](<4 x s8>)
- ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8), [[UV6:%[0-9]+]]:_(s8), [[UV7:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[INSERT1]](<4 x s8>)
- ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UV]](s8)
- ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV4]](s8)
- ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[ANYEXT1]]
- ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[AND]](s32)
- ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[UV1]](s8)
- ; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[UV5]](s8)
- ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ANYEXT2]], [[ANYEXT3]]
- ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[AND1]](s32)
- ; CHECK-NEXT: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[UV2]](s8)
- ; CHECK-NEXT: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[UV6]](s8)
- ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ANYEXT4]], [[ANYEXT5]]
- ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s8) = G_TRUNC [[AND2]](s32)
- ; CHECK-NEXT: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[UV3]](s8)
- ; CHECK-NEXT: [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[UV7]](s8)
- ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ANYEXT6]], [[ANYEXT7]]
- ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s8) = G_TRUNC [[AND3]](s32)
- ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s8>) = G_BUILD_VECTOR [[TRUNC]](s8), [[TRUNC1]](s8), [[TRUNC2]](s8), [[TRUNC3]](s8)
- ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s8>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<4 x s8>), [[DEF2]](<4 x s8>), [[DEF2]](<4 x s8>)
- ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(<3 x s8>), [[UV9:%[0-9]+]]:_(<3 x s8>), [[UV10:%[0-9]+]]:_(<3 x s8>), [[UV11:%[0-9]+]]:_(<3 x s8>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s8>)
- ; CHECK-NEXT: [[ANYEXT8:%[0-9]+]]:_(<3 x s32>) = G_ANYEXT [[UV8]](<3 x s8>)
- ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[ANYEXT8]](<3 x s32>)
+ ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<4 x s32>)
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](<4 x s32>)
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[UV4]]
+ ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[UV5]]
+ ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[UV6]]
+ ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[AND]](s32), [[AND1]](s32), [[AND2]](s32)
+ ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>)
%0:_(<3 x s8>) = G_IMPLICIT_DEF
%1:_(<3 x s8>) = G_IMPLICIT_DEF
%2:_(<3 x s8>) = G_AND %0, %1
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-anyext.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-anyext.mir
index ffbf3f55dfb27..c6f0a6dc349c8 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-anyext.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-anyext.mir
@@ -114,10 +114,7 @@ body: |
; CHECK-LABEL: name: test_anyext_v3s16_to_v3s32
; CHECK: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1
- ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[COPY]](<4 x s16>), 0
- ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0
- ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>)
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>)
; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ashr.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ashr.mir
index 5c982bd92352e..b1165aa5cd486 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ashr.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ashr.mir
@@ -1,8 +1,8 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -O0 -run-pass=legalizer -global-isel-abort=0 -o - %s | FileCheck -check-prefix=SI %s
-# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer -global-isel-abort=0 -o - %s | FileCheck -check-prefix=VI %s
-# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -O0 -run-pass=legalizer -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9PLUS %s
-# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -O0 -run-pass=legalizer -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9PLUS %s
+# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -O0 -run-pass=legalizer -o - %s | FileCheck -check-prefix=SI %s
+# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer -o - %s | FileCheck -check-prefix=VI %s
+# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -O0 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX9PLUS %s
+# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -O0 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX9PLUS %s
---
name: test_ashr_s32_s32
@@ -532,43 +532,40 @@ body: |
; SI-LABEL: name: test_ashr_v3s64_v3s32
; SI: [[COPY:%[0-9]+]]:_(<4 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
- ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<3 x s64>) = G_EXTRACT [[COPY]](<4 x s64>), 0
+ ; SI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<4 x s64>)
; SI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr8_vgpr9_vgpr10
- ; SI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[EXTRACT]](<3 x s64>)
- ; SI-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>)
- ; SI-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[UV]], [[UV3]](s32)
- ; SI-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[UV1]], [[UV4]](s32)
- ; SI-NEXT: [[ASHR2:%[0-9]+]]:_(s64) = G_ASHR [[UV2]], [[UV5]](s32)
- ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[ASHR]](s64), [[ASHR1]](s64), [[ASHR2]](s64)
+ ; SI-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>)
+ ; SI-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[UV]], [[UV4]](s32)
+ ; SI-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[UV1]], [[UV5]](s32)
+ ; SI-NEXT: [[ASHR2:%[0-9]+]]:_(s64) = G_ASHR [[UV2]], [[UV6]](s32)
; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
- ; SI-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0
- ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>)
+ ; SI-NEXT: [[UV7:%[0-9]+]]:_(s64), [[UV8:%[0-9]+]]:_(s64), [[UV9:%[0-9]+]]:_(s64), [[UV10:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>)
+ ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[ASHR]](s64), [[ASHR1]](s64), [[ASHR2]](s64), [[UV10]](s64)
+ ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>)
; VI-LABEL: name: test_ashr_v3s64_v3s32
; VI: [[COPY:%[0-9]+]]:_(<4 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
- ; VI-NEXT: [[EXTRACT:%[0-9]+]]:_(<3 x s64>) = G_EXTRACT [[COPY]](<4 x s64>), 0
+ ; VI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<4 x s64>)
; VI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr8_vgpr9_vgpr10
- ; VI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[EXTRACT]](<3 x s64>)
- ; VI-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>)
- ; VI-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[UV]], [[UV3]](s32)
- ; VI-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[UV1]], [[UV4]](s32)
- ; VI-NEXT: [[ASHR2:%[0-9]+]]:_(s64) = G_ASHR [[UV2]], [[UV5]](s32)
- ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[ASHR]](s64), [[ASHR1]](s64), [[ASHR2]](s64)
+ ; VI-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>)
+ ; VI-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[UV]], [[UV4]](s32)
+ ; VI-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[UV1]], [[UV5]](s32)
+ ; VI-NEXT: [[ASHR2:%[0-9]+]]:_(s64) = G_ASHR [[UV2]], [[UV6]](s32)
; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
- ; VI-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0
- ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>)
+ ; VI-NEXT: [[UV7:%[0-9]+]]:_(s64), [[UV8:%[0-9]+]]:_(s64), [[UV9:%[0-9]+]]:_(s64), [[UV10:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>)
+ ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[ASHR]](s64), [[ASHR1]](s64), [[ASHR2]](s64), [[UV10]](s64)
+ ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>)
; GFX9PLUS-LABEL: name: test_ashr_v3s64_v3s32
; GFX9PLUS: [[COPY:%[0-9]+]]:_(<4 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
- ; GFX9PLUS-NEXT: [[EXTRACT:%[0-9]+]]:_(<3 x s64>) = G_EXTRACT [[COPY]](<4 x s64>), 0
+ ; GFX9PLUS-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<4 x s64>)
; GFX9PLUS-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr8_vgpr9_vgpr10
- ; GFX9PLUS-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[EXTRACT]](<3 x s64>)
- ; GFX9PLUS-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>)
- ; GFX9PLUS-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[UV]], [[UV3]](s32)
- ; GFX9PLUS-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[UV1]], [[UV4]](s32)
- ; GFX9PLUS-NEXT: [[ASHR2:%[0-9]+]]:_(s64) = G_ASHR [[UV2]], [[UV5]](s32)
- ; GFX9PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[ASHR]](s64), [[ASHR1]](s64), [[ASHR2]](s64)
+ ; GFX9PLUS-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>)
+ ; GFX9PLUS-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[UV]], [[UV4]](s32)
+ ; GFX9PLUS-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[UV1]], [[UV5]](s32)
+ ; GFX9PLUS-NEXT: [[ASHR2:%[0-9]+]]:_(s64) = G_ASHR [[UV2]], [[UV6]](s32)
; GFX9PLUS-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
- ; GFX9PLUS-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0
- ; GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>)
+ ; GFX9PLUS-NEXT: [[UV7:%[0-9]+]]:_(s64), [[UV8:%[0-9]+]]:_(s64), [[UV9:%[0-9]+]]:_(s64), [[UV10:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>)
+ ; GFX9PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[ASHR]](s64), [[ASHR1]](s64), [[ASHR2]](s64), [[UV10]](s64)
+ ; GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>)
%0:_(<4 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
%1:_(<3 x s64>) = G_EXTRACT %0, 0
%2:_(<3 x s32>) = COPY $vgpr8_vgpr9_vgpr10
@@ -798,32 +795,33 @@ body: |
; GFX9PLUS-LABEL: name: test_ashr_v3s16_v3s16
; GFX9PLUS: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2
; GFX9PLUS-NEXT: [[COPY1:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5
- ; GFX9PLUS-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>)
- ; GFX9PLUS-NEXT: [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>)
- ; GFX9PLUS-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; GFX9PLUS-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV]](<3 x s16>), 0
- ; GFX9PLUS-NEXT: [[EXTRACT:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[INSERT]](<4 x s16>), 0
- ; GFX9PLUS-NEXT: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV]](<3 x s16>), 0
- ; GFX9PLUS-NEXT: [[EXTRACT1:%[0-9]+]]:_(s16) = G_EXTRACT [[INSERT1]](<4 x s16>), 32
- ; GFX9PLUS-NEXT: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV2]](<3 x s16>), 0
- ; GFX9PLUS-NEXT: [[EXTRACT2:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[INSERT2]](<4 x s16>), 0
- ; GFX9PLUS-NEXT: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV2]](<3 x s16>), 0
- ; GFX9PLUS-NEXT: [[EXTRACT3:%[0-9]+]]:_(s16) = G_EXTRACT [[INSERT3]](<4 x s16>), 32
- ; GFX9PLUS-NEXT: [[ASHR:%[0-9]+]]:_(<2 x s16>) = G_ASHR [[EXTRACT]], [[EXTRACT2]](<2 x s16>)
- ; GFX9PLUS-NEXT: [[ASHR1:%[0-9]+]]:_(s16) = G_ASHR [[EXTRACT1]], [[EXTRACT3]](s16)
- ; GFX9PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[ASHR]](<2 x s16>)
+ ; GFX9PLUS-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>)
+ ; GFX9PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
; GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; GFX9PLUS-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
- ; GFX9PLUS-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; GFX9PLUS-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>)
- ; GFX9PLUS-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>)
- ; GFX9PLUS-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
- ; GFX9PLUS-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>)
+ ; GFX9PLUS-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+ ; GFX9PLUS-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32)
; GFX9PLUS-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST]](s32), [[LSHR]](s32)
+ ; GFX9PLUS-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>)
+ ; GFX9PLUS-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
+ ; GFX9PLUS-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
+ ; GFX9PLUS-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>)
+ ; GFX9PLUS-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32)
+ ; GFX9PLUS-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST2]](s32), [[LSHR1]](s32)
+ ; GFX9PLUS-NEXT: [[ASHR:%[0-9]+]]:_(<2 x s16>) = G_ASHR [[BUILD_VECTOR_TRUNC]], [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
+ ; GFX9PLUS-NEXT: [[ASHR1:%[0-9]+]]:_(s16) = G_ASHR [[TRUNC]], [[TRUNC1]](s16)
+ ; GFX9PLUS-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[ASHR]](<2 x s16>)
+ ; GFX9PLUS-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32)
+ ; GFX9PLUS-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+ ; GFX9PLUS-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
+ ; GFX9PLUS-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>)
+ ; GFX9PLUS-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32)
+ ; GFX9PLUS-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>)
+ ; GFX9PLUS-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST4]](s32), [[LSHR2]](s32)
; GFX9PLUS-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ASHR1]](s16)
- ; GFX9PLUS-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[BITCAST1]](s32)
- ; GFX9PLUS-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR1]](s32), [[BITCAST2]](s32)
- ; GFX9PLUS-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>)
+ ; GFX9PLUS-NEXT: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[BITCAST5]](s32)
+ ; GFX9PLUS-NEXT: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR3]](s32), [[BITCAST6]](s32)
+ ; GFX9PLUS-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[BUILD_VECTOR_TRUNC3]](<2 x s16>), [[BUILD_VECTOR_TRUNC4]](<2 x s16>)
; GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
%0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2
%1:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-bitcast.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-bitcast.mir
index ab67960768106..c7c4e07342000 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-bitcast.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-bitcast.mir
@@ -511,10 +511,9 @@ body: |
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32)
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
- ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>)
- ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
- ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C2]](s32)
+ ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32)
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s16) = COPY [[TRUNC]](s16)
; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[COPY1]]
@@ -549,19 +548,18 @@ body: |
; CHECK-LABEL: name: test_bitcast_v4s8_to_v2s16
; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>)
- ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>)
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32)
- ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[UV4]](s32)
- ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[TRUNC1]]
- ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32)
- ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[UV5]](s32)
- ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[TRUNC2]], [[TRUNC3]]
- ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[UV2]](s32)
- ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[UV6]](s32)
- ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[TRUNC4]], [[TRUNC5]]
- ; CHECK-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[UV3]](s32)
- ; CHECK-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[UV7]](s32)
- ; CHECK-NEXT: [[ADD3:%[0-9]+]]:_(s16) = G_ADD [[TRUNC6]], [[TRUNC7]]
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s16) = COPY [[TRUNC]](s16)
+ ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[COPY1]], [[TRUNC]]
+ ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32)
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s16) = COPY [[TRUNC1]](s16)
+ ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[COPY2]], [[TRUNC1]]
+ ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[UV2]](s32)
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s16) = COPY [[TRUNC2]](s16)
+ ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[COPY3]], [[TRUNC2]]
+ ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[UV3]](s32)
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s16) = COPY [[TRUNC3]](s16)
+ ; CHECK-NEXT: [[ADD3:%[0-9]+]]:_(s16) = G_ADD [[COPY4]], [[TRUNC3]]
; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
; CHECK-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[ADD]], [[C]]
; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[ADD1]], [[C]]
@@ -602,14 +600,13 @@ body: |
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32)
; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C3]](s32)
; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+ ; CHECK-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C4]](s32)
; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+ ; CHECK-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C5]](s32)
; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 28
- ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>)
- ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C3]](s32)
- ; CHECK-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C4]](s32)
- ; CHECK-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C5]](s32)
- ; CHECK-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C6]](s32)
+ ; CHECK-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C6]](s32)
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s16) = COPY [[TRUNC]](s16)
; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[COPY1]]
@@ -660,31 +657,30 @@ body: |
; CHECK-LABEL: name: test_bitcast_v8s4_to_v2s16
; CHECK: [[COPY:%[0-9]+]]:_(<8 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<8 x s32>)
- ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<8 x s32>)
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32)
- ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[UV8]](s32)
- ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[TRUNC1]]
- ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32)
- ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[UV9]](s32)
- ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[TRUNC2]], [[TRUNC3]]
- ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[UV2]](s32)
- ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[UV10]](s32)
- ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[TRUNC4]], [[TRUNC5]]
- ; CHECK-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[UV3]](s32)
- ; CHECK-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[UV11]](s32)
- ; CHECK-NEXT: [[ADD3:%[0-9]+]]:_(s16) = G_ADD [[TRUNC6]], [[TRUNC7]]
- ; CHECK-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[UV4]](s32)
- ; CHECK-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[UV12]](s32)
- ; CHECK-NEXT: [[ADD4:%[0-9]+]]:_(s16) = G_ADD [[TRUNC8]], [[TRUNC9]]
- ; CHECK-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[UV5]](s32)
- ; CHECK-NEXT: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[UV13]](s32)
- ; CHECK-NEXT: [[ADD5:%[0-9]+]]:_(s16) = G_ADD [[TRUNC10]], [[TRUNC11]]
- ; CHECK-NEXT: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[UV6]](s32)
- ; CHECK-NEXT: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[UV14]](s32)
- ; CHECK-NEXT: [[ADD6:%[0-9]+]]:_(s16) = G_ADD [[TRUNC12]], [[TRUNC13]]
- ; CHECK-NEXT: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[UV7]](s32)
- ; CHECK-NEXT: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[UV15]](s32)
- ; CHECK-NEXT: [[ADD7:%[0-9]+]]:_(s16) = G_ADD [[TRUNC14]], [[TRUNC15]]
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s16) = COPY [[TRUNC]](s16)
+ ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[COPY1]], [[TRUNC]]
+ ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32)
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s16) = COPY [[TRUNC1]](s16)
+ ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[COPY2]], [[TRUNC1]]
+ ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[UV2]](s32)
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s16) = COPY [[TRUNC2]](s16)
+ ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[COPY3]], [[TRUNC2]]
+ ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[UV3]](s32)
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s16) = COPY [[TRUNC3]](s16)
+ ; CHECK-NEXT: [[ADD3:%[0-9]+]]:_(s16) = G_ADD [[COPY4]], [[TRUNC3]]
+ ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[UV4]](s32)
+ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s16) = COPY [[TRUNC4]](s16)
+ ; CHECK-NEXT: [[ADD4:%[0-9]+]]:_(s16) = G_ADD [[COPY5]], [[TRUNC4]]
+ ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[UV5]](s32)
+ ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s16) = COPY [[TRUNC5]](s16)
+ ; CHECK-NEXT: [[ADD5:%[0-9]+]]:_(s16) = G_ADD [[COPY6]], [[TRUNC5]]
+ ; CHECK-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[UV6]](s32)
+ ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s16) = COPY [[TRUNC6]](s16)
+ ; CHECK-NEXT: [[ADD6:%[0-9]+]]:_(s16) = G_ADD [[COPY7]], [[TRUNC6]]
+ ; CHECK-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[UV7]](s32)
+ ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s16) = COPY [[TRUNC7]](s16)
+ ; CHECK-NEXT: [[ADD7:%[0-9]+]]:_(s16) = G_ADD [[COPY8]], [[TRUNC7]]
; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 15
; CHECK-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[ADD]], [[C]]
; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[ADD1]], [[C]]
@@ -757,12 +753,11 @@ body: |
; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32)
; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
- ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BITCAST]](<4 x s16>)
- ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
+ ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32)
; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32)
; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32)
- ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
+ ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
; CHECK-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32)
; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32)
; CHECK-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32)
@@ -858,6 +853,10 @@ body: |
; CHECK-LABEL: name: test_bitcast_v8s8_to_v2s32
; CHECK: [[COPY:%[0-9]+]]:_(<8 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<8 x s32>)
+ ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[UV]](s32)
+ ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[UV1]](s32)
+ ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s8) = G_TRUNC [[UV2]](s32)
+ ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s8) = G_TRUNC [[UV3]](s32)
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]]
; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C]]
@@ -872,15 +871,18 @@ body: |
; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C3]](s32)
; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
- ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<8 x s32>)
- ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[UV12]], [[C]]
- ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[UV13]], [[C]]
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s8) = COPY [[TRUNC]](s8)
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s8) = COPY [[TRUNC1]](s8)
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s8) = COPY [[TRUNC2]](s8)
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s8) = COPY [[TRUNC3]](s8)
+ ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[UV4]], [[C]]
+ ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[UV5]], [[C]]
; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C1]](s32)
; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
- ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[UV14]], [[C]]
+ ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[UV6]], [[C]]
; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C2]](s32)
; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
- ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[UV15]], [[C]]
+ ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[UV7]], [[C]]
; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C3]](s32)
; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
@@ -1106,6 +1108,22 @@ body: |
; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32)
+ ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>)
+ ; CHECK-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32)
+ ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>)
+ ; CHECK-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32)
+ ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>)
+ ; CHECK-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32)
+ ; CHECK-NEXT: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>)
+ ; CHECK-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C]](s32)
+ ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s4) = G_TRUNC [[BITCAST]](s32)
+ ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s4) = G_TRUNC [[LSHR]](s32)
+ ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s4) = G_TRUNC [[BITCAST1]](s32)
+ ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s4) = G_TRUNC [[LSHR1]](s32)
+ ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s4) = G_TRUNC [[BITCAST2]](s32)
+ ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s4) = G_TRUNC [[LSHR2]](s32)
+ ; CHECK-NEXT: [[TRUNC6:%[0-9]+]]:_(s4) = G_TRUNC [[BITCAST3]](s32)
+ ; CHECK-NEXT: [[TRUNC7:%[0-9]+]]:_(s4) = G_TRUNC [[LSHR3]](s32)
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 15
; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]]
; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]]
@@ -1135,15 +1153,14 @@ body: |
; CHECK-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 28
; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C7]](s32)
; CHECK-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[OR5]], [[SHL6]]
- ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>), [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>), [[UV14:%[0-9]+]]:_(<2 x s16>), [[UV15:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<16 x s16>)
- ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV12]](<2 x s16>)
- ; CHECK-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32)
- ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV13]](<2 x s16>)
- ; CHECK-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32)
- ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV14]](<2 x s16>)
- ; CHECK-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32)
- ; CHECK-NEXT: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV15]](<2 x s16>)
- ; CHECK-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C]](s32)
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s4) = COPY [[TRUNC]](s4)
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s4) = COPY [[TRUNC1]](s4)
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s4) = COPY [[TRUNC2]](s4)
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s4) = COPY [[TRUNC3]](s4)
+ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s4) = COPY [[TRUNC4]](s4)
+ ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s4) = COPY [[TRUNC5]](s4)
+ ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s4) = COPY [[TRUNC6]](s4)
+ ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s4) = COPY [[TRUNC7]](s4)
; CHECK-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]]
; CHECK-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C1]]
; CHECK-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C2]](s32)
@@ -1367,25 +1384,24 @@ body: |
; CHECK-LABEL: name: test_bitcast_v6s8_to_v3s16
; CHECK: [[COPY:%[0-9]+]]:_(<6 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<6 x s32>)
- ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<6 x s32>)
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32)
- ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[UV6]](s32)
- ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[TRUNC1]]
- ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32)
- ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[UV7]](s32)
- ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[TRUNC2]], [[TRUNC3]]
- ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[UV2]](s32)
- ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[UV8]](s32)
- ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[TRUNC4]], [[TRUNC5]]
- ; CHECK-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[UV3]](s32)
- ; CHECK-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[UV9]](s32)
- ; CHECK-NEXT: [[ADD3:%[0-9]+]]:_(s16) = G_ADD [[TRUNC6]], [[TRUNC7]]
- ; CHECK-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[UV4]](s32)
- ; CHECK-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[UV10]](s32)
- ; CHECK-NEXT: [[ADD4:%[0-9]+]]:_(s16) = G_ADD [[TRUNC8]], [[TRUNC9]]
- ; CHECK-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[UV5]](s32)
- ; CHECK-NEXT: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[UV11]](s32)
- ; CHECK-NEXT: [[ADD5:%[0-9]+]]:_(s16) = G_ADD [[TRUNC10]], [[TRUNC11]]
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s16) = COPY [[TRUNC]](s16)
+ ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[COPY1]], [[TRUNC]]
+ ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32)
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s16) = COPY [[TRUNC1]](s16)
+ ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[COPY2]], [[TRUNC1]]
+ ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[UV2]](s32)
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s16) = COPY [[TRUNC2]](s16)
+ ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[COPY3]], [[TRUNC2]]
+ ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[UV3]](s32)
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s16) = COPY [[TRUNC3]](s16)
+ ; CHECK-NEXT: [[ADD3:%[0-9]+]]:_(s16) = G_ADD [[COPY4]], [[TRUNC3]]
+ ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[UV4]](s32)
+ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s16) = COPY [[TRUNC4]](s16)
+ ; CHECK-NEXT: [[ADD4:%[0-9]+]]:_(s16) = G_ADD [[COPY5]], [[TRUNC4]]
+ ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[UV5]](s32)
+ ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s16) = COPY [[TRUNC5]](s16)
+ ; CHECK-NEXT: [[ADD5:%[0-9]+]]:_(s16) = G_ADD [[COPY6]], [[TRUNC5]]
; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
; CHECK-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[ADD]], [[C]]
; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[ADD1]], [[C]]
@@ -1433,15 +1449,14 @@ body: |
; CHECK: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>)
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32)
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s16) = COPY [[TRUNC]](s16)
; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32)
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s16) = COPY [[TRUNC1]](s16)
; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[UV2]](s32)
- ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>)
- ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[UV3]](s32)
- ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[UV4]](s32)
- ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[UV5]](s32)
- ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[TRUNC3]]
- ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[TRUNC1]], [[TRUNC4]]
- ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[TRUNC2]], [[TRUNC5]]
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s16) = COPY [[TRUNC2]](s16)
+ ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[COPY1]], [[TRUNC]]
+ ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[COPY2]], [[TRUNC1]]
+ ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[COPY3]], [[TRUNC2]]
; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[ADD]], [[C]](s16)
; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD]](s16)
@@ -1562,30 +1577,38 @@ body: |
; CHECK-LABEL: name: test_bitcast_v16s8_to_v2s64
; CHECK: [[COPY:%[0-9]+]]:_(<16 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<16 x s32>)
+ ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[UV]](s32)
+ ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[UV1]](s32)
+ ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s8) = G_TRUNC [[UV2]](s32)
+ ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s8) = G_TRUNC [[UV3]](s32)
+ ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s8) = G_TRUNC [[UV4]](s32)
+ ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s8) = G_TRUNC [[UV5]](s32)
+ ; CHECK-NEXT: [[TRUNC6:%[0-9]+]]:_(s8) = G_TRUNC [[UV6]](s32)
+ ; CHECK-NEXT: [[TRUNC7:%[0-9]+]]:_(s8) = G_TRUNC [[UV7]](s32)
; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
- ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32)
- ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]]
- ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32)
- ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]]
+ ; CHECK-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32)
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C]]
+ ; CHECK-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32)
+ ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC9]], [[C]]
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C1]](s16)
; CHECK-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
- ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[UV2]](s32)
- ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C]]
- ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[UV3]](s32)
- ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C]]
+ ; CHECK-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[UV2]](s32)
+ ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C]]
+ ; CHECK-NEXT: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[UV3]](s32)
+ ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC11]], [[C]]
; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C1]](s16)
; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
- ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[UV4]](s32)
- ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C]]
- ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[UV5]](s32)
- ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C]]
+ ; CHECK-NEXT: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[UV4]](s32)
+ ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C]]
+ ; CHECK-NEXT: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[UV5]](s32)
+ ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC13]], [[C]]
; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C1]](s16)
; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]]
- ; CHECK-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[UV6]](s32)
- ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C]]
- ; CHECK-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[UV7]](s32)
- ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C]]
+ ; CHECK-NEXT: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[UV6]](s32)
+ ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C]]
+ ; CHECK-NEXT: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[UV7]](s32)
+ ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC15]], [[C]]
; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C1]](s16)
; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]]
; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
@@ -1598,29 +1621,36 @@ body: |
; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C2]](s32)
; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32)
- ; CHECK-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<16 x s32>)
- ; CHECK-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[UV24]](s32)
- ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C]]
- ; CHECK-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[UV25]](s32)
- ; CHECK-NEXT: [[AND9:%[0-9]+]]:_(s16) = G_AND [[TRUNC9]], [[C]]
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s8) = COPY [[TRUNC]](s8)
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s8) = COPY [[TRUNC1]](s8)
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s8) = COPY [[TRUNC2]](s8)
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s8) = COPY [[TRUNC3]](s8)
+ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s8) = COPY [[TRUNC4]](s8)
+ ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s8) = COPY [[TRUNC5]](s8)
+ ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s8) = COPY [[TRUNC6]](s8)
+ ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s8) = COPY [[TRUNC7]](s8)
+ ; CHECK-NEXT: [[TRUNC16:%[0-9]+]]:_(s16) = G_TRUNC [[UV8]](s32)
+ ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC16]], [[C]]
+ ; CHECK-NEXT: [[TRUNC17:%[0-9]+]]:_(s16) = G_TRUNC [[UV9]](s32)
+ ; CHECK-NEXT: [[AND9:%[0-9]+]]:_(s16) = G_AND [[TRUNC17]], [[C]]
; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C1]](s16)
; CHECK-NEXT: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL6]]
- ; CHECK-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[UV26]](s32)
- ; CHECK-NEXT: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C]]
- ; CHECK-NEXT: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[UV27]](s32)
- ; CHECK-NEXT: [[AND11:%[0-9]+]]:_(s16) = G_AND [[TRUNC11]], [[C]]
+ ; CHECK-NEXT: [[TRUNC18:%[0-9]+]]:_(s16) = G_TRUNC [[UV10]](s32)
+ ; CHECK-NEXT: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC18]], [[C]]
+ ; CHECK-NEXT: [[TRUNC19:%[0-9]+]]:_(s16) = G_TRUNC [[UV11]](s32)
+ ; CHECK-NEXT: [[AND11:%[0-9]+]]:_(s16) = G_AND [[TRUNC19]], [[C]]
; CHECK-NEXT: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C1]](s16)
; CHECK-NEXT: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL7]]
- ; CHECK-NEXT: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[UV28]](s32)
- ; CHECK-NEXT: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C]]
- ; CHECK-NEXT: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[UV29]](s32)
- ; CHECK-NEXT: [[AND13:%[0-9]+]]:_(s16) = G_AND [[TRUNC13]], [[C]]
+ ; CHECK-NEXT: [[TRUNC20:%[0-9]+]]:_(s16) = G_TRUNC [[UV12]](s32)
+ ; CHECK-NEXT: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC20]], [[C]]
+ ; CHECK-NEXT: [[TRUNC21:%[0-9]+]]:_(s16) = G_TRUNC [[UV13]](s32)
+ ; CHECK-NEXT: [[AND13:%[0-9]+]]:_(s16) = G_AND [[TRUNC21]], [[C]]
; CHECK-NEXT: [[SHL8:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C1]](s16)
; CHECK-NEXT: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL8]]
- ; CHECK-NEXT: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[UV30]](s32)
- ; CHECK-NEXT: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C]]
- ; CHECK-NEXT: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[UV31]](s32)
- ; CHECK-NEXT: [[AND15:%[0-9]+]]:_(s16) = G_AND [[TRUNC15]], [[C]]
+ ; CHECK-NEXT: [[TRUNC22:%[0-9]+]]:_(s16) = G_TRUNC [[UV14]](s32)
+ ; CHECK-NEXT: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC22]], [[C]]
+ ; CHECK-NEXT: [[TRUNC23:%[0-9]+]]:_(s16) = G_TRUNC [[UV15]](s32)
+ ; CHECK-NEXT: [[AND15:%[0-9]+]]:_(s16) = G_AND [[TRUNC23]], [[C]]
; CHECK-NEXT: [[SHL9:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C1]](s16)
; CHECK-NEXT: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL9]]
; CHECK-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16)
@@ -1745,6 +1775,10 @@ body: |
; CHECK-LABEL: name: test_bitcast_v16s8_to_v4s32
; CHECK: [[COPY:%[0-9]+]]:_(<16 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<16 x s32>)
+ ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[UV]](s32)
+ ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[UV1]](s32)
+ ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s8) = G_TRUNC [[UV2]](s32)
+ ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s8) = G_TRUNC [[UV3]](s32)
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]]
; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C]]
@@ -1759,37 +1793,66 @@ body: |
; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C3]](s32)
; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
- ; CHECK-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<16 x s32>)
- ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[UV20]], [[C]]
- ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[UV21]], [[C]]
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s8) = COPY [[TRUNC]](s8)
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s8) = COPY [[TRUNC1]](s8)
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s8) = COPY [[TRUNC2]](s8)
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s8) = COPY [[TRUNC3]](s8)
+ ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s8) = G_TRUNC [[UV4]](s32)
+ ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s8) = G_TRUNC [[UV5]](s32)
+ ; CHECK-NEXT: [[TRUNC6:%[0-9]+]]:_(s8) = G_TRUNC [[UV6]](s32)
+ ; CHECK-NEXT: [[TRUNC7:%[0-9]+]]:_(s8) = G_TRUNC [[UV7]](s32)
+ ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[UV4]], [[C]]
+ ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[UV5]], [[C]]
; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C1]](s32)
; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
- ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[UV22]], [[C]]
+ ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[UV6]], [[C]]
; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C2]](s32)
; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
- ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[UV23]], [[C]]
+ ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[UV7]], [[C]]
; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C3]](s32)
; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
- ; CHECK-NEXT: [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32), [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32), [[UV36:%[0-9]+]]:_(s32), [[UV37:%[0-9]+]]:_(s32), [[UV38:%[0-9]+]]:_(s32), [[UV39:%[0-9]+]]:_(s32), [[UV40:%[0-9]+]]:_(s32), [[UV41:%[0-9]+]]:_(s32), [[UV42:%[0-9]+]]:_(s32), [[UV43:%[0-9]+]]:_(s32), [[UV44:%[0-9]+]]:_(s32), [[UV45:%[0-9]+]]:_(s32), [[UV46:%[0-9]+]]:_(s32), [[UV47:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<16 x s32>)
- ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[UV40]], [[C]]
- ; CHECK-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[UV41]], [[C]]
+ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s8) = COPY [[TRUNC]](s8)
+ ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s8) = COPY [[TRUNC1]](s8)
+ ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s8) = COPY [[TRUNC2]](s8)
+ ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s8) = COPY [[TRUNC3]](s8)
+ ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s8) = COPY [[TRUNC4]](s8)
+ ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s8) = COPY [[TRUNC5]](s8)
+ ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s8) = COPY [[TRUNC6]](s8)
+ ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s8) = COPY [[TRUNC7]](s8)
+ ; CHECK-NEXT: [[TRUNC8:%[0-9]+]]:_(s8) = G_TRUNC [[UV8]](s32)
+ ; CHECK-NEXT: [[TRUNC9:%[0-9]+]]:_(s8) = G_TRUNC [[UV9]](s32)
+ ; CHECK-NEXT: [[TRUNC10:%[0-9]+]]:_(s8) = G_TRUNC [[UV10]](s32)
+ ; CHECK-NEXT: [[TRUNC11:%[0-9]+]]:_(s8) = G_TRUNC [[UV11]](s32)
+ ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[UV8]], [[C]]
+ ; CHECK-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[UV9]], [[C]]
; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C1]](s32)
; CHECK-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
- ; CHECK-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[UV42]], [[C]]
+ ; CHECK-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[UV10]], [[C]]
; CHECK-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C2]](s32)
; CHECK-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
- ; CHECK-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[UV43]], [[C]]
+ ; CHECK-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[UV11]], [[C]]
; CHECK-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C3]](s32)
; CHECK-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
- ; CHECK-NEXT: [[UV48:%[0-9]+]]:_(s32), [[UV49:%[0-9]+]]:_(s32), [[UV50:%[0-9]+]]:_(s32), [[UV51:%[0-9]+]]:_(s32), [[UV52:%[0-9]+]]:_(s32), [[UV53:%[0-9]+]]:_(s32), [[UV54:%[0-9]+]]:_(s32), [[UV55:%[0-9]+]]:_(s32), [[UV56:%[0-9]+]]:_(s32), [[UV57:%[0-9]+]]:_(s32), [[UV58:%[0-9]+]]:_(s32), [[UV59:%[0-9]+]]:_(s32), [[UV60:%[0-9]+]]:_(s32), [[UV61:%[0-9]+]]:_(s32), [[UV62:%[0-9]+]]:_(s32), [[UV63:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<16 x s32>)
- ; CHECK-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[UV60]], [[C]]
- ; CHECK-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[UV61]], [[C]]
+ ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s8) = COPY [[TRUNC]](s8)
+ ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s8) = COPY [[TRUNC1]](s8)
+ ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s8) = COPY [[TRUNC2]](s8)
+ ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s8) = COPY [[TRUNC3]](s8)
+ ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s8) = COPY [[TRUNC4]](s8)
+ ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s8) = COPY [[TRUNC5]](s8)
+ ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s8) = COPY [[TRUNC6]](s8)
+ ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(s8) = COPY [[TRUNC7]](s8)
+ ; CHECK-NEXT: [[COPY21:%[0-9]+]]:_(s8) = COPY [[TRUNC8]](s8)
+ ; CHECK-NEXT: [[COPY22:%[0-9]+]]:_(s8) = COPY [[TRUNC9]](s8)
+ ; CHECK-NEXT: [[COPY23:%[0-9]+]]:_(s8) = COPY [[TRUNC10]](s8)
+ ; CHECK-NEXT: [[COPY24:%[0-9]+]]:_(s8) = COPY [[TRUNC11]](s8)
+ ; CHECK-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[UV12]], [[C]]
+ ; CHECK-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[UV13]], [[C]]
; CHECK-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C1]](s32)
; CHECK-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
- ; CHECK-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[UV62]], [[C]]
+ ; CHECK-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[UV14]], [[C]]
; CHECK-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C2]](s32)
; CHECK-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
- ; CHECK-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[UV63]], [[C]]
+ ; CHECK-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[UV15]], [[C]]
; CHECK-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C3]](s32)
; CHECK-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32)
@@ -1885,61 +1948,124 @@ body: |
; CHECK-LABEL: name: test_bitcast_v16s8_to_v8s16
; CHECK: [[COPY:%[0-9]+]]:_(<16 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<16 x s32>)
+ ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[UV]](s32)
+ ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[UV1]](s32)
; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
- ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32)
- ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]]
- ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32)
- ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]]
+ ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32)
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C]]
+ ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32)
+ ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C]]
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C1]](s16)
; CHECK-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
- ; CHECK-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<16 x s32>)
- ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[UV18]](s32)
- ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C]]
- ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[UV19]](s32)
- ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C]]
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s8) = COPY [[TRUNC]](s8)
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s8) = COPY [[TRUNC1]](s8)
+ ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s8) = G_TRUNC [[UV2]](s32)
+ ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s8) = G_TRUNC [[UV3]](s32)
+ ; CHECK-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[UV2]](s32)
+ ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C]]
+ ; CHECK-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[UV3]](s32)
+ ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C]]
; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C1]](s16)
; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
- ; CHECK-NEXT: [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32), [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32), [[UV36:%[0-9]+]]:_(s32), [[UV37:%[0-9]+]]:_(s32), [[UV38:%[0-9]+]]:_(s32), [[UV39:%[0-9]+]]:_(s32), [[UV40:%[0-9]+]]:_(s32), [[UV41:%[0-9]+]]:_(s32), [[UV42:%[0-9]+]]:_(s32), [[UV43:%[0-9]+]]:_(s32), [[UV44:%[0-9]+]]:_(s32), [[UV45:%[0-9]+]]:_(s32), [[UV46:%[0-9]+]]:_(s32), [[UV47:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<16 x s32>)
- ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[UV36]](s32)
- ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C]]
- ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[UV37]](s32)
- ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C]]
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s8) = COPY [[TRUNC]](s8)
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s8) = COPY [[TRUNC1]](s8)
+ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s8) = COPY [[TRUNC4]](s8)
+ ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s8) = COPY [[TRUNC5]](s8)
+ ; CHECK-NEXT: [[TRUNC8:%[0-9]+]]:_(s8) = G_TRUNC [[UV4]](s32)
+ ; CHECK-NEXT: [[TRUNC9:%[0-9]+]]:_(s8) = G_TRUNC [[UV5]](s32)
+ ; CHECK-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[UV4]](s32)
+ ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C]]
+ ; CHECK-NEXT: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[UV5]](s32)
+ ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC11]], [[C]]
; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C1]](s16)
; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]]
- ; CHECK-NEXT: [[UV48:%[0-9]+]]:_(s32), [[UV49:%[0-9]+]]:_(s32), [[UV50:%[0-9]+]]:_(s32), [[UV51:%[0-9]+]]:_(s32), [[UV52:%[0-9]+]]:_(s32), [[UV53:%[0-9]+]]:_(s32), [[UV54:%[0-9]+]]:_(s32), [[UV55:%[0-9]+]]:_(s32), [[UV56:%[0-9]+]]:_(s32), [[UV57:%[0-9]+]]:_(s32), [[UV58:%[0-9]+]]:_(s32), [[UV59:%[0-9]+]]:_(s32), [[UV60:%[0-9]+]]:_(s32), [[UV61:%[0-9]+]]:_(s32), [[UV62:%[0-9]+]]:_(s32), [[UV63:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<16 x s32>)
- ; CHECK-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[UV54]](s32)
- ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C]]
- ; CHECK-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[UV55]](s32)
- ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C]]
+ ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s8) = COPY [[TRUNC]](s8)
+ ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s8) = COPY [[TRUNC1]](s8)
+ ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s8) = COPY [[TRUNC4]](s8)
+ ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s8) = COPY [[TRUNC5]](s8)
+ ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s8) = COPY [[TRUNC8]](s8)
+ ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s8) = COPY [[TRUNC9]](s8)
+ ; CHECK-NEXT: [[TRUNC12:%[0-9]+]]:_(s8) = G_TRUNC [[UV6]](s32)
+ ; CHECK-NEXT: [[TRUNC13:%[0-9]+]]:_(s8) = G_TRUNC [[UV7]](s32)
+ ; CHECK-NEXT: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[UV6]](s32)
+ ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C]]
+ ; CHECK-NEXT: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[UV7]](s32)
+ ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC15]], [[C]]
; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C1]](s16)
; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]]
- ; CHECK-NEXT: [[UV64:%[0-9]+]]:_(s32), [[UV65:%[0-9]+]]:_(s32), [[UV66:%[0-9]+]]:_(s32), [[UV67:%[0-9]+]]:_(s32), [[UV68:%[0-9]+]]:_(s32), [[UV69:%[0-9]+]]:_(s32), [[UV70:%[0-9]+]]:_(s32), [[UV71:%[0-9]+]]:_(s32), [[UV72:%[0-9]+]]:_(s32), [[UV73:%[0-9]+]]:_(s32), [[UV74:%[0-9]+]]:_(s32), [[UV75:%[0-9]+]]:_(s32), [[UV76:%[0-9]+]]:_(s32), [[UV77:%[0-9]+]]:_(s32), [[UV78:%[0-9]+]]:_(s32), [[UV79:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<16 x s32>)
- ; CHECK-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[UV72]](s32)
- ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C]]
- ; CHECK-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[UV73]](s32)
- ; CHECK-NEXT: [[AND9:%[0-9]+]]:_(s16) = G_AND [[TRUNC9]], [[C]]
+ ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s8) = COPY [[TRUNC]](s8)
+ ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s8) = COPY [[TRUNC1]](s8)
+ ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s8) = COPY [[TRUNC4]](s8)
+ ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s8) = COPY [[TRUNC5]](s8)
+ ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s8) = COPY [[TRUNC8]](s8)
+ ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s8) = COPY [[TRUNC9]](s8)
+ ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s8) = COPY [[TRUNC12]](s8)
+ ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(s8) = COPY [[TRUNC13]](s8)
+ ; CHECK-NEXT: [[TRUNC16:%[0-9]+]]:_(s8) = G_TRUNC [[UV8]](s32)
+ ; CHECK-NEXT: [[TRUNC17:%[0-9]+]]:_(s8) = G_TRUNC [[UV9]](s32)
+ ; CHECK-NEXT: [[TRUNC18:%[0-9]+]]:_(s16) = G_TRUNC [[UV8]](s32)
+ ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC18]], [[C]]
+ ; CHECK-NEXT: [[TRUNC19:%[0-9]+]]:_(s16) = G_TRUNC [[UV9]](s32)
+ ; CHECK-NEXT: [[AND9:%[0-9]+]]:_(s16) = G_AND [[TRUNC19]], [[C]]
; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C1]](s16)
; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL4]]
- ; CHECK-NEXT: [[UV80:%[0-9]+]]:_(s32), [[UV81:%[0-9]+]]:_(s32), [[UV82:%[0-9]+]]:_(s32), [[UV83:%[0-9]+]]:_(s32), [[UV84:%[0-9]+]]:_(s32), [[UV85:%[0-9]+]]:_(s32), [[UV86:%[0-9]+]]:_(s32), [[UV87:%[0-9]+]]:_(s32), [[UV88:%[0-9]+]]:_(s32), [[UV89:%[0-9]+]]:_(s32), [[UV90:%[0-9]+]]:_(s32), [[UV91:%[0-9]+]]:_(s32), [[UV92:%[0-9]+]]:_(s32), [[UV93:%[0-9]+]]:_(s32), [[UV94:%[0-9]+]]:_(s32), [[UV95:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<16 x s32>)
- ; CHECK-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[UV90]](s32)
- ; CHECK-NEXT: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C]]
- ; CHECK-NEXT: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[UV91]](s32)
- ; CHECK-NEXT: [[AND11:%[0-9]+]]:_(s16) = G_AND [[TRUNC11]], [[C]]
+ ; CHECK-NEXT: [[COPY21:%[0-9]+]]:_(s8) = COPY [[TRUNC]](s8)
+ ; CHECK-NEXT: [[COPY22:%[0-9]+]]:_(s8) = COPY [[TRUNC1]](s8)
+ ; CHECK-NEXT: [[COPY23:%[0-9]+]]:_(s8) = COPY [[TRUNC4]](s8)
+ ; CHECK-NEXT: [[COPY24:%[0-9]+]]:_(s8) = COPY [[TRUNC5]](s8)
+ ; CHECK-NEXT: [[COPY25:%[0-9]+]]:_(s8) = COPY [[TRUNC8]](s8)
+ ; CHECK-NEXT: [[COPY26:%[0-9]+]]:_(s8) = COPY [[TRUNC9]](s8)
+ ; CHECK-NEXT: [[COPY27:%[0-9]+]]:_(s8) = COPY [[TRUNC12]](s8)
+ ; CHECK-NEXT: [[COPY28:%[0-9]+]]:_(s8) = COPY [[TRUNC13]](s8)
+ ; CHECK-NEXT: [[COPY29:%[0-9]+]]:_(s8) = COPY [[TRUNC16]](s8)
+ ; CHECK-NEXT: [[COPY30:%[0-9]+]]:_(s8) = COPY [[TRUNC17]](s8)
+ ; CHECK-NEXT: [[TRUNC20:%[0-9]+]]:_(s8) = G_TRUNC [[UV10]](s32)
+ ; CHECK-NEXT: [[TRUNC21:%[0-9]+]]:_(s8) = G_TRUNC [[UV11]](s32)
+ ; CHECK-NEXT: [[TRUNC22:%[0-9]+]]:_(s16) = G_TRUNC [[UV10]](s32)
+ ; CHECK-NEXT: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC22]], [[C]]
+ ; CHECK-NEXT: [[TRUNC23:%[0-9]+]]:_(s16) = G_TRUNC [[UV11]](s32)
+ ; CHECK-NEXT: [[AND11:%[0-9]+]]:_(s16) = G_AND [[TRUNC23]], [[C]]
; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C1]](s16)
; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL5]]
- ; CHECK-NEXT: [[UV96:%[0-9]+]]:_(s32), [[UV97:%[0-9]+]]:_(s32), [[UV98:%[0-9]+]]:_(s32), [[UV99:%[0-9]+]]:_(s32), [[UV100:%[0-9]+]]:_(s32), [[UV101:%[0-9]+]]:_(s32), [[UV102:%[0-9]+]]:_(s32), [[UV103:%[0-9]+]]:_(s32), [[UV104:%[0-9]+]]:_(s32), [[UV105:%[0-9]+]]:_(s32), [[UV106:%[0-9]+]]:_(s32), [[UV107:%[0-9]+]]:_(s32), [[UV108:%[0-9]+]]:_(s32), [[UV109:%[0-9]+]]:_(s32), [[UV110:%[0-9]+]]:_(s32), [[UV111:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<16 x s32>)
- ; CHECK-NEXT: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[UV108]](s32)
- ; CHECK-NEXT: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C]]
- ; CHECK-NEXT: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[UV109]](s32)
- ; CHECK-NEXT: [[AND13:%[0-9]+]]:_(s16) = G_AND [[TRUNC13]], [[C]]
+ ; CHECK-NEXT: [[COPY31:%[0-9]+]]:_(s8) = COPY [[TRUNC]](s8)
+ ; CHECK-NEXT: [[COPY32:%[0-9]+]]:_(s8) = COPY [[TRUNC1]](s8)
+ ; CHECK-NEXT: [[COPY33:%[0-9]+]]:_(s8) = COPY [[TRUNC4]](s8)
+ ; CHECK-NEXT: [[COPY34:%[0-9]+]]:_(s8) = COPY [[TRUNC5]](s8)
+ ; CHECK-NEXT: [[COPY35:%[0-9]+]]:_(s8) = COPY [[TRUNC8]](s8)
+ ; CHECK-NEXT: [[COPY36:%[0-9]+]]:_(s8) = COPY [[TRUNC9]](s8)
+ ; CHECK-NEXT: [[COPY37:%[0-9]+]]:_(s8) = COPY [[TRUNC12]](s8)
+ ; CHECK-NEXT: [[COPY38:%[0-9]+]]:_(s8) = COPY [[TRUNC13]](s8)
+ ; CHECK-NEXT: [[COPY39:%[0-9]+]]:_(s8) = COPY [[TRUNC16]](s8)
+ ; CHECK-NEXT: [[COPY40:%[0-9]+]]:_(s8) = COPY [[TRUNC17]](s8)
+ ; CHECK-NEXT: [[COPY41:%[0-9]+]]:_(s8) = COPY [[TRUNC20]](s8)
+ ; CHECK-NEXT: [[COPY42:%[0-9]+]]:_(s8) = COPY [[TRUNC21]](s8)
+ ; CHECK-NEXT: [[TRUNC24:%[0-9]+]]:_(s8) = G_TRUNC [[UV12]](s32)
+ ; CHECK-NEXT: [[TRUNC25:%[0-9]+]]:_(s8) = G_TRUNC [[UV13]](s32)
+ ; CHECK-NEXT: [[TRUNC26:%[0-9]+]]:_(s16) = G_TRUNC [[UV12]](s32)
+ ; CHECK-NEXT: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC26]], [[C]]
+ ; CHECK-NEXT: [[TRUNC27:%[0-9]+]]:_(s16) = G_TRUNC [[UV13]](s32)
+ ; CHECK-NEXT: [[AND13:%[0-9]+]]:_(s16) = G_AND [[TRUNC27]], [[C]]
; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C1]](s16)
; CHECK-NEXT: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL6]]
- ; CHECK-NEXT: [[UV112:%[0-9]+]]:_(s32), [[UV113:%[0-9]+]]:_(s32), [[UV114:%[0-9]+]]:_(s32), [[UV115:%[0-9]+]]:_(s32), [[UV116:%[0-9]+]]:_(s32), [[UV117:%[0-9]+]]:_(s32), [[UV118:%[0-9]+]]:_(s32), [[UV119:%[0-9]+]]:_(s32), [[UV120:%[0-9]+]]:_(s32), [[UV121:%[0-9]+]]:_(s32), [[UV122:%[0-9]+]]:_(s32), [[UV123:%[0-9]+]]:_(s32), [[UV124:%[0-9]+]]:_(s32), [[UV125:%[0-9]+]]:_(s32), [[UV126:%[0-9]+]]:_(s32), [[UV127:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<16 x s32>)
- ; CHECK-NEXT: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[UV126]](s32)
- ; CHECK-NEXT: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C]]
- ; CHECK-NEXT: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[UV127]](s32)
- ; CHECK-NEXT: [[AND15:%[0-9]+]]:_(s16) = G_AND [[TRUNC15]], [[C]]
+ ; CHECK-NEXT: [[COPY43:%[0-9]+]]:_(s8) = COPY [[TRUNC]](s8)
+ ; CHECK-NEXT: [[COPY44:%[0-9]+]]:_(s8) = COPY [[TRUNC1]](s8)
+ ; CHECK-NEXT: [[COPY45:%[0-9]+]]:_(s8) = COPY [[TRUNC4]](s8)
+ ; CHECK-NEXT: [[COPY46:%[0-9]+]]:_(s8) = COPY [[TRUNC5]](s8)
+ ; CHECK-NEXT: [[COPY47:%[0-9]+]]:_(s8) = COPY [[TRUNC8]](s8)
+ ; CHECK-NEXT: [[COPY48:%[0-9]+]]:_(s8) = COPY [[TRUNC9]](s8)
+ ; CHECK-NEXT: [[COPY49:%[0-9]+]]:_(s8) = COPY [[TRUNC12]](s8)
+ ; CHECK-NEXT: [[COPY50:%[0-9]+]]:_(s8) = COPY [[TRUNC13]](s8)
+ ; CHECK-NEXT: [[COPY51:%[0-9]+]]:_(s8) = COPY [[TRUNC16]](s8)
+ ; CHECK-NEXT: [[COPY52:%[0-9]+]]:_(s8) = COPY [[TRUNC17]](s8)
+ ; CHECK-NEXT: [[COPY53:%[0-9]+]]:_(s8) = COPY [[TRUNC20]](s8)
+ ; CHECK-NEXT: [[COPY54:%[0-9]+]]:_(s8) = COPY [[TRUNC21]](s8)
+ ; CHECK-NEXT: [[COPY55:%[0-9]+]]:_(s8) = COPY [[TRUNC24]](s8)
+ ; CHECK-NEXT: [[COPY56:%[0-9]+]]:_(s8) = COPY [[TRUNC25]](s8)
+ ; CHECK-NEXT: [[TRUNC28:%[0-9]+]]:_(s16) = G_TRUNC [[UV14]](s32)
+ ; CHECK-NEXT: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC28]], [[C]]
+ ; CHECK-NEXT: [[TRUNC29:%[0-9]+]]:_(s16) = G_TRUNC [[UV15]](s32)
+ ; CHECK-NEXT: [[AND15:%[0-9]+]]:_(s16) = G_AND [[TRUNC29]], [[C]]
; CHECK-NEXT: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C1]](s16)
; CHECK-NEXT: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL7]]
; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
@@ -2210,21 +2336,20 @@ body: |
; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32)
; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32)
- ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<12 x s16>)
- ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV10]](<2 x s16>)
+ ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>)
; CHECK-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32)
; CHECK-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32)
; CHECK-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR4]](s32)
- ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV11]](<2 x s16>)
+ ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>)
; CHECK-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST5]](s32)
; CHECK-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32)
; CHECK-NEXT: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR5]](s32)
- ; CHECK-NEXT: [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>), [[UV14:%[0-9]+]]:_(<2 x s16>), [[UV15:%[0-9]+]]:_(<2 x s16>), [[UV16:%[0-9]+]]:_(<2 x s16>), [[UV17:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<12 x s16>)
- ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV12]](<2 x s16>)
+ ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<12 x s16>)
+ ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>)
; CHECK-NEXT: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST6]](s32)
; CHECK-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32)
; CHECK-NEXT: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR6]](s32)
- ; CHECK-NEXT: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV13]](<2 x s16>)
+ ; CHECK-NEXT: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>)
; CHECK-NEXT: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST7]](s32)
; CHECK-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C]](s32)
; CHECK-NEXT: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR7]](s32)
@@ -2253,20 +2378,19 @@ body: |
; CHECK-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C]](s32)
; CHECK-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]]
; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32)
- ; CHECK-NEXT: [[UV18:%[0-9]+]]:_(<2 x s16>), [[UV19:%[0-9]+]]:_(<2 x s16>), [[UV20:%[0-9]+]]:_(<2 x s16>), [[UV21:%[0-9]+]]:_(<2 x s16>), [[UV22:%[0-9]+]]:_(<2 x s16>), [[UV23:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<12 x s16>)
- ; CHECK-NEXT: [[BITCAST8:%[0-9]+]]:_(s32) = G_BITCAST [[UV20]](<2 x s16>)
+ ; CHECK-NEXT: [[BITCAST8:%[0-9]+]]:_(s32) = G_BITCAST [[UV8]](<2 x s16>)
; CHECK-NEXT: [[TRUNC16:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST8]](s32)
; CHECK-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST8]], [[C]](s32)
; CHECK-NEXT: [[TRUNC17:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR8]](s32)
- ; CHECK-NEXT: [[BITCAST9:%[0-9]+]]:_(s32) = G_BITCAST [[UV21]](<2 x s16>)
+ ; CHECK-NEXT: [[BITCAST9:%[0-9]+]]:_(s32) = G_BITCAST [[UV9]](<2 x s16>)
; CHECK-NEXT: [[TRUNC18:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST9]](s32)
; CHECK-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST9]], [[C]](s32)
; CHECK-NEXT: [[TRUNC19:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR9]](s32)
- ; CHECK-NEXT: [[BITCAST10:%[0-9]+]]:_(s32) = G_BITCAST [[UV22]](<2 x s16>)
+ ; CHECK-NEXT: [[BITCAST10:%[0-9]+]]:_(s32) = G_BITCAST [[UV10]](<2 x s16>)
; CHECK-NEXT: [[TRUNC20:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST10]](s32)
; CHECK-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST10]], [[C]](s32)
; CHECK-NEXT: [[TRUNC21:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR10]](s32)
- ; CHECK-NEXT: [[BITCAST11:%[0-9]+]]:_(s32) = G_BITCAST [[UV23]](<2 x s16>)
+ ; CHECK-NEXT: [[BITCAST11:%[0-9]+]]:_(s32) = G_BITCAST [[UV11]](<2 x s16>)
; CHECK-NEXT: [[TRUNC22:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST11]](s32)
; CHECK-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST11]], [[C]](s32)
; CHECK-NEXT: [[TRUNC23:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR11]](s32)
@@ -2388,35 +2512,50 @@ body: |
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
+ ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+ ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32)
+ ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
+ ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
+ ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
+ ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32)
+ ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
+ ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32)
+ ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
+ ; CHECK-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32)
+ ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32)
+ ; CHECK-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32)
+ ; CHECK-NEXT: [[TRUNC8:%[0-9]+]]:_(s8) = G_TRUNC [[BITCAST]](s32)
+ ; CHECK-NEXT: [[TRUNC9:%[0-9]+]]:_(s8) = G_TRUNC [[LSHR]](s32)
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
; CHECK-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]]
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16)
; CHECK-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
- ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<8 x s16>)
- ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>)
- ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32)
- ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
- ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s8) = COPY [[TRUNC8]](s8)
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s8) = COPY [[TRUNC9]](s8)
+ ; CHECK-NEXT: [[TRUNC10:%[0-9]+]]:_(s8) = G_TRUNC [[BITCAST1]](s32)
+ ; CHECK-NEXT: [[TRUNC11:%[0-9]+]]:_(s8) = G_TRUNC [[LSHR1]](s32)
; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]]
; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C1]]
; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C2]](s16)
; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
- ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<8 x s16>)
- ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV10]](<2 x s16>)
- ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32)
- ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
- ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32)
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s8) = COPY [[TRUNC8]](s8)
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s8) = COPY [[TRUNC9]](s8)
+ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s8) = COPY [[TRUNC10]](s8)
+ ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s8) = COPY [[TRUNC11]](s8)
+ ; CHECK-NEXT: [[TRUNC12:%[0-9]+]]:_(s8) = G_TRUNC [[BITCAST2]](s32)
+ ; CHECK-NEXT: [[TRUNC13:%[0-9]+]]:_(s8) = G_TRUNC [[LSHR2]](s32)
; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C1]]
; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C1]]
; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C2]](s16)
; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]]
- ; CHECK-NEXT: [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>), [[UV14:%[0-9]+]]:_(<2 x s16>), [[UV15:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<8 x s16>)
- ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV15]](<2 x s16>)
- ; CHECK-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32)
- ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32)
- ; CHECK-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32)
+ ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s8) = COPY [[TRUNC8]](s8)
+ ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s8) = COPY [[TRUNC9]](s8)
+ ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s8) = COPY [[TRUNC10]](s8)
+ ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s8) = COPY [[TRUNC11]](s8)
+ ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s8) = COPY [[TRUNC12]](s8)
+ ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s8) = COPY [[TRUNC13]](s8)
; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C1]]
; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C1]]
; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C2]](s16)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-extract-vector-elt.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-extract-vector-elt.mir
index 55dfdbcc9ec03..c624320344b17 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-extract-vector-elt.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-extract-vector-elt.mir
@@ -9,8 +9,9 @@ body: |
liveins: $vgpr0_vgpr1
; CHECK-LABEL: name: extract_vector_elt_0_v2i32
; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
- ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY]](<2 x s32>), 0
- ; CHECK-NEXT: $vgpr0 = COPY [[EXTRACT]](s32)
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV]](s32)
+ ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](s32)
%0:_(<2 x s32>) = COPY $vgpr0_vgpr1
%1:_(s32) = G_CONSTANT i32 0
%2:_(s32) = G_EXTRACT_VECTOR_ELT %0, %1
@@ -24,8 +25,9 @@ body: |
liveins: $vgpr0_vgpr1
; CHECK-LABEL: name: extract_vector_elt_1_v2i32
; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
- ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY]](<2 x s32>), 32
- ; CHECK-NEXT: $vgpr0 = COPY [[EXTRACT]](s32)
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV1]](s32)
+ ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](s32)
%0:_(<2 x s32>) = COPY $vgpr0_vgpr1
%1:_(s32) = G_CONSTANT i32 1
%2:_(s32) = G_EXTRACT_VECTOR_ELT %0, %1
@@ -39,8 +41,9 @@ body: |
liveins: $vgpr0_vgpr1
; CHECK-LABEL: name: extract_vector_elt_2_v2i32
; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
- ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY]](<2 x s32>), 32
- ; CHECK-NEXT: $vgpr0 = COPY [[EXTRACT]](s32)
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV1]](s32)
+ ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](s32)
%0:_(<2 x s32>) = COPY $vgpr0_vgpr1
%1:_(s32) = G_CONSTANT i32 1
%2:_(s32) = G_EXTRACT_VECTOR_ELT %0, %1
@@ -54,8 +57,9 @@ body: |
liveins: $vgpr0_vgpr1_vgpr2
; CHECK-LABEL: name: extract_vector_elt_0_v3i32
; CHECK: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2
- ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY]](<3 x s32>), 0
- ; CHECK-NEXT: $vgpr0 = COPY [[EXTRACT]](s32)
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>)
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV]](s32)
+ ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](s32)
%0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2
%1:_(s32) = G_CONSTANT i32 0
%2:_(s32) = G_EXTRACT_VECTOR_ELT %0, %1
@@ -69,8 +73,9 @@ body: |
liveins: $vgpr0_vgpr1_vgpr2_vgpr3
; CHECK-LABEL: name: extract_vector_elt_0_v4i32
; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
- ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY]](<4 x s32>), 0
- ; CHECK-NEXT: $vgpr0 = COPY [[EXTRACT]](s32)
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>)
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV]](s32)
+ ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](s32)
%0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
%1:_(s32) = G_CONSTANT i32 0
%2:_(s32) = G_EXTRACT_VECTOR_ELT %0, %1
@@ -205,8 +210,9 @@ body: |
; CHECK-LABEL: name: extract_vector_elt_0_v2i8_i32
; CHECK: [[DEF:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[DEF]](<2 x s32>), 0
- ; CHECK-NEXT: $vgpr0 = COPY [[EXTRACT]](s32)
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<2 x s32>)
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[UV]](s32)
+ ; CHECK-NEXT: $vgpr0 = COPY [[COPY]](s32)
%0:_(<2 x s8>) = G_IMPLICIT_DEF
%1:_(s32) = G_CONSTANT i32 0
%2:_(s8) = G_EXTRACT_VECTOR_ELT %0, %1
@@ -241,8 +247,9 @@ body: |
; CHECK-LABEL: name: extract_vector_elt_0_v2i1_i32
; CHECK: [[DEF:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[DEF]](<2 x s32>), 0
- ; CHECK-NEXT: $vgpr0 = COPY [[EXTRACT]](s32)
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<2 x s32>)
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[UV]](s32)
+ ; CHECK-NEXT: $vgpr0 = COPY [[COPY]](s32)
%0:_(<2 x s1>) = G_IMPLICIT_DEF
%1:_(s32) = G_CONSTANT i32 0
%2:_(s1) = G_EXTRACT_VECTOR_ELT %0, %1
@@ -258,8 +265,9 @@ body: |
; CHECK-LABEL: name: extract_vector_elt_0_v2i1_i1
; CHECK: [[DEF:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[DEF]](<2 x s32>), 0
- ; CHECK-NEXT: $vgpr0 = COPY [[EXTRACT]](s32)
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<2 x s32>)
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[UV]](s32)
+ ; CHECK-NEXT: $vgpr0 = COPY [[COPY]](s32)
%0:_(<2 x s1>) = G_IMPLICIT_DEF
%1:_(s1) = G_CONSTANT i1 false
%2:_(s1) = G_EXTRACT_VECTOR_ELT %0, %1
@@ -301,11 +309,8 @@ body: |
; CHECK-LABEL: name: extract_vector_elt_v2s8_constidx_0_i32
; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
- ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
- ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32)
- ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[LSHR]](s32)
- ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[BUILD_VECTOR]](<2 x s32>), 0
- ; CHECK-NEXT: $vgpr0 = COPY [[EXTRACT]](s32)
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; CHECK-NEXT: $vgpr0 = COPY [[COPY2]](s32)
%0:_(s32) = COPY $vgpr0
%1:_(s32) = COPY $vgpr1
%2:_(s16) = G_TRUNC %0
@@ -328,9 +333,8 @@ body: |
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32)
- ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[LSHR]](s32)
- ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[BUILD_VECTOR]](<2 x s32>), 32
- ; CHECK-NEXT: $vgpr0 = COPY [[EXTRACT]](s32)
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+ ; CHECK-NEXT: $vgpr0 = COPY [[COPY2]](s32)
%0:_(s32) = COPY $vgpr0
%1:_(s32) = COPY $vgpr1
%2:_(s16) = G_TRUNC %0
@@ -1017,8 +1021,9 @@ body: |
; CHECK-LABEL: name: extract_vector_elt_v3s16_idx0_i32
; CHECK: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2
- ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY]](<3 x s32>), 0
- ; CHECK-NEXT: $vgpr0 = COPY [[EXTRACT]](s32)
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>)
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV]](s32)
+ ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](s32)
%0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2
%1:_(s32) = G_CONSTANT i32 0
%2:_(<3 x s16>) = G_TRUNC %0
@@ -1036,8 +1041,9 @@ body: |
; CHECK-LABEL: name: extract_vector_elt_v3s16_idx1_i32
; CHECK: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2
- ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY]](<3 x s32>), 32
- ; CHECK-NEXT: $vgpr0 = COPY [[EXTRACT]](s32)
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>)
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV1]](s32)
+ ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](s32)
%0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2
%1:_(s32) = G_CONSTANT i32 1
%2:_(<3 x s16>) = G_TRUNC %0
@@ -1055,8 +1061,9 @@ body: |
; CHECK-LABEL: name: extract_vector_elt_v3s16_idx2_i32
; CHECK: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2
- ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY]](<3 x s32>), 64
- ; CHECK-NEXT: $vgpr0 = COPY [[EXTRACT]](s32)
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>)
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV2]](s32)
+ ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](s32)
%0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2
%1:_(s32) = G_CONSTANT i32 2
%2:_(<3 x s16>) = G_TRUNC %0
@@ -1164,8 +1171,9 @@ body: |
; CHECK-LABEL: name: extract_vector_elt_0_v2i64
; CHECK: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
- ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s64) = G_EXTRACT [[COPY]](<2 x s64>), 0
- ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[EXTRACT]](s64)
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>)
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY [[UV]](s64)
+ ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[COPY1]](s64)
%0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
%1:_(s32) = G_CONSTANT i32 0
%2:_(s64) = G_EXTRACT_VECTOR_ELT %0, %1
@@ -1181,8 +1189,9 @@ body: |
; CHECK-LABEL: name: extract_vector_elt_0_v8i64
; CHECK: [[DEF:%[0-9]+]]:_(<8 x s64>) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s64) = G_EXTRACT [[DEF]](<8 x s64>), 0
- ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[EXTRACT]](s64)
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64), [[UV6:%[0-9]+]]:_(s64), [[UV7:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<8 x s64>)
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY [[UV]](s64)
+ ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[COPY]](s64)
%0:_(<8 x s64>) = G_IMPLICIT_DEF
%1:_(s32) = G_CONSTANT i32 0
%2:_(s64) = G_EXTRACT_VECTOR_ELT %0, %1
@@ -1198,8 +1207,9 @@ body: |
; CHECK-LABEL: name: extract_vector_elt_0_v16i64
; CHECK: [[DEF:%[0-9]+]]:_(<16 x s64>) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s64) = G_EXTRACT [[DEF]](<16 x s64>), 0
- ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[EXTRACT]](s64)
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64), [[UV6:%[0-9]+]]:_(s64), [[UV7:%[0-9]+]]:_(s64), [[UV8:%[0-9]+]]:_(s64), [[UV9:%[0-9]+]]:_(s64), [[UV10:%[0-9]+]]:_(s64), [[UV11:%[0-9]+]]:_(s64), [[UV12:%[0-9]+]]:_(s64), [[UV13:%[0-9]+]]:_(s64), [[UV14:%[0-9]+]]:_(s64), [[UV15:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<16 x s64>)
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY [[UV]](s64)
+ ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[COPY]](s64)
%0:_(<16 x s64>) = G_IMPLICIT_DEF
%1:_(s32) = G_CONSTANT i32 0
%2:_(s64) = G_EXTRACT_VECTOR_ELT %0, %1
@@ -1215,8 +1225,9 @@ body: |
liveins: $vgpr0_vgpr1_vgpr2_vgpr3
; CHECK-LABEL: name: extract_vector_elt_look_through_trunc_0_v4i32
; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
- ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY]](<4 x s32>), 0
- ; CHECK-NEXT: $vgpr0 = COPY [[EXTRACT]](s32)
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>)
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV]](s32)
+ ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](s32)
%0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
%1:_(s64) = G_CONSTANT i64 0
%2:_(s32) = G_TRUNC %1
@@ -1234,8 +1245,9 @@ body: |
; CHECK-LABEL: name: extract_vector_elt_7_v64s32
; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $sgpr0_sgpr1
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p1) :: (load (<16 x s32>), align 4, addrspace 4)
- ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[LOAD]](<16 x s32>), 224
- ; CHECK-NEXT: S_ENDPGM 0, implicit [[EXTRACT]](s32)
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<16 x s32>)
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV7]](s32)
+ ; CHECK-NEXT: S_ENDPGM 0, implicit [[COPY1]](s32)
%0:_(p1) = COPY $sgpr0_sgpr1
%1:_(s32) = G_CONSTANT i32 7
%2:_(<64 x s32>) = G_LOAD %0 :: (load (<64 x s32>), align 4, addrspace 4)
@@ -1255,8 +1267,9 @@ body: |
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 128
; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load (<16 x s32>) from unknown-address + 128, align 4, addrspace 4)
- ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[LOAD]](<16 x s32>), 32
- ; CHECK-NEXT: S_ENDPGM 0, implicit [[EXTRACT]](s32)
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<16 x s32>)
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV1]](s32)
+ ; CHECK-NEXT: S_ENDPGM 0, implicit [[COPY1]](s32)
%0:_(p1) = COPY $sgpr0_sgpr1
%1:_(s32) = G_CONSTANT i32 33
%2:_(<64 x s32>) = G_LOAD %0 :: (load (<64 x s32>), align 4, addrspace 4)
@@ -1294,218 +1307,13 @@ body: |
; CHECK-LABEL: name: extract_vector_elt_33_v64p3
; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $sgpr0_sgpr1
- ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p1) :: (load (<16 x s32>), align 4, addrspace 4)
- ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<16 x p3>) = G_BITCAST [[LOAD]](<16 x s32>)
- ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 64
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 128
; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
- ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load (<16 x s32>) from unknown-address + 64, align 4, addrspace 4)
- ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<16 x p3>) = G_BITCAST [[LOAD1]](<16 x s32>)
- ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 128
- ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
- ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD1]](p1) :: (load (<16 x s32>) from unknown-address + 128, align 4, addrspace 4)
- ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<16 x p3>) = G_BITCAST [[LOAD2]](<16 x s32>)
- ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 192
- ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
- ; CHECK-NEXT: [[LOAD3:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD2]](p1) :: (load (<16 x s32>) from unknown-address + 192, align 4, addrspace 4)
- ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<16 x p3>) = G_BITCAST [[LOAD3]](<16 x s32>)
- ; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.0
+ ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load (<16 x s32>) from unknown-address + 128, align 4, addrspace 4)
+ ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<16 x p3>) = G_BITCAST [[LOAD]](<16 x s32>)
; CHECK-NEXT: [[UV:%[0-9]+]]:_(p3), [[UV1:%[0-9]+]]:_(p3), [[UV2:%[0-9]+]]:_(p3), [[UV3:%[0-9]+]]:_(p3), [[UV4:%[0-9]+]]:_(p3), [[UV5:%[0-9]+]]:_(p3), [[UV6:%[0-9]+]]:_(p3), [[UV7:%[0-9]+]]:_(p3), [[UV8:%[0-9]+]]:_(p3), [[UV9:%[0-9]+]]:_(p3), [[UV10:%[0-9]+]]:_(p3), [[UV11:%[0-9]+]]:_(p3), [[UV12:%[0-9]+]]:_(p3), [[UV13:%[0-9]+]]:_(p3), [[UV14:%[0-9]+]]:_(p3), [[UV15:%[0-9]+]]:_(p3) = G_UNMERGE_VALUES [[BITCAST]](<16 x p3>)
- ; CHECK-NEXT: [[UV16:%[0-9]+]]:_(p3), [[UV17:%[0-9]+]]:_(p3), [[UV18:%[0-9]+]]:_(p3), [[UV19:%[0-9]+]]:_(p3), [[UV20:%[0-9]+]]:_(p3), [[UV21:%[0-9]+]]:_(p3), [[UV22:%[0-9]+]]:_(p3), [[UV23:%[0-9]+]]:_(p3), [[UV24:%[0-9]+]]:_(p3), [[UV25:%[0-9]+]]:_(p3), [[UV26:%[0-9]+]]:_(p3), [[UV27:%[0-9]+]]:_(p3), [[UV28:%[0-9]+]]:_(p3), [[UV29:%[0-9]+]]:_(p3), [[UV30:%[0-9]+]]:_(p3), [[UV31:%[0-9]+]]:_(p3) = G_UNMERGE_VALUES [[BITCAST1]](<16 x p3>)
- ; CHECK-NEXT: [[UV32:%[0-9]+]]:_(p3), [[UV33:%[0-9]+]]:_(p3), [[UV34:%[0-9]+]]:_(p3), [[UV35:%[0-9]+]]:_(p3), [[UV36:%[0-9]+]]:_(p3), [[UV37:%[0-9]+]]:_(p3), [[UV38:%[0-9]+]]:_(p3), [[UV39:%[0-9]+]]:_(p3), [[UV40:%[0-9]+]]:_(p3), [[UV41:%[0-9]+]]:_(p3), [[UV42:%[0-9]+]]:_(p3), [[UV43:%[0-9]+]]:_(p3), [[UV44:%[0-9]+]]:_(p3), [[UV45:%[0-9]+]]:_(p3), [[UV46:%[0-9]+]]:_(p3), [[UV47:%[0-9]+]]:_(p3) = G_UNMERGE_VALUES [[BITCAST2]](<16 x p3>)
- ; CHECK-NEXT: [[UV48:%[0-9]+]]:_(p3), [[UV49:%[0-9]+]]:_(p3), [[UV50:%[0-9]+]]:_(p3), [[UV51:%[0-9]+]]:_(p3), [[UV52:%[0-9]+]]:_(p3), [[UV53:%[0-9]+]]:_(p3), [[UV54:%[0-9]+]]:_(p3), [[UV55:%[0-9]+]]:_(p3), [[UV56:%[0-9]+]]:_(p3), [[UV57:%[0-9]+]]:_(p3), [[UV58:%[0-9]+]]:_(p3), [[UV59:%[0-9]+]]:_(p3), [[UV60:%[0-9]+]]:_(p3), [[UV61:%[0-9]+]]:_(p3), [[UV62:%[0-9]+]]:_(p3), [[UV63:%[0-9]+]]:_(p3) = G_UNMERGE_VALUES [[BITCAST3]](<16 x p3>)
- ; CHECK-NEXT: G_STORE [[UV]](p3), [[FRAME_INDEX]](p5) :: (store (p3) into %stack.0, align 256, addrspace 5)
- ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
- ; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C3]](s32)
- ; CHECK-NEXT: G_STORE [[UV1]](p3), [[PTR_ADD3]](p5) :: (store (p3) into %stack.0 + 4, basealign 256, addrspace 5)
- ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
- ; CHECK-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C4]](s32)
- ; CHECK-NEXT: G_STORE [[UV2]](p3), [[PTR_ADD4]](p5) :: (store (p3) into %stack.0 + 8, align 8, basealign 256, addrspace 5)
- ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
- ; CHECK-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C5]](s32)
- ; CHECK-NEXT: G_STORE [[UV3]](p3), [[PTR_ADD5]](p5) :: (store (p3) into %stack.0 + 12, basealign 256, addrspace 5)
- ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; CHECK-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C6]](s32)
- ; CHECK-NEXT: G_STORE [[UV4]](p3), [[PTR_ADD6]](p5) :: (store (p3) into %stack.0 + 16, align 16, basealign 256, addrspace 5)
- ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
- ; CHECK-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C7]](s32)
- ; CHECK-NEXT: G_STORE [[UV5]](p3), [[PTR_ADD7]](p5) :: (store (p3) into %stack.0 + 20, basealign 256, addrspace 5)
- ; CHECK-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
- ; CHECK-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C8]](s32)
- ; CHECK-NEXT: G_STORE [[UV6]](p3), [[PTR_ADD8]](p5) :: (store (p3) into %stack.0 + 24, align 8, basealign 256, addrspace 5)
- ; CHECK-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 28
- ; CHECK-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C9]](s32)
- ; CHECK-NEXT: G_STORE [[UV7]](p3), [[PTR_ADD9]](p5) :: (store (p3) into %stack.0 + 28, basealign 256, addrspace 5)
- ; CHECK-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
- ; CHECK-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C10]](s32)
- ; CHECK-NEXT: G_STORE [[UV8]](p3), [[PTR_ADD10]](p5) :: (store (p3) into %stack.0 + 32, align 32, basealign 256, addrspace 5)
- ; CHECK-NEXT: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 36
- ; CHECK-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C11]](s32)
- ; CHECK-NEXT: G_STORE [[UV9]](p3), [[PTR_ADD11]](p5) :: (store (p3) into %stack.0 + 36, basealign 256, addrspace 5)
- ; CHECK-NEXT: [[C12:%[0-9]+]]:_(s32) = G_CONSTANT i32 40
- ; CHECK-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C12]](s32)
- ; CHECK-NEXT: G_STORE [[UV10]](p3), [[PTR_ADD12]](p5) :: (store (p3) into %stack.0 + 40, align 8, basealign 256, addrspace 5)
- ; CHECK-NEXT: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 44
- ; CHECK-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C13]](s32)
- ; CHECK-NEXT: G_STORE [[UV11]](p3), [[PTR_ADD13]](p5) :: (store (p3) into %stack.0 + 44, basealign 256, addrspace 5)
- ; CHECK-NEXT: [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 48
- ; CHECK-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C14]](s32)
- ; CHECK-NEXT: G_STORE [[UV12]](p3), [[PTR_ADD14]](p5) :: (store (p3) into %stack.0 + 48, align 16, basealign 256, addrspace 5)
- ; CHECK-NEXT: [[C15:%[0-9]+]]:_(s32) = G_CONSTANT i32 52
- ; CHECK-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C15]](s32)
- ; CHECK-NEXT: G_STORE [[UV13]](p3), [[PTR_ADD15]](p5) :: (store (p3) into %stack.0 + 52, basealign 256, addrspace 5)
- ; CHECK-NEXT: [[C16:%[0-9]+]]:_(s32) = G_CONSTANT i32 56
- ; CHECK-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C16]](s32)
- ; CHECK-NEXT: G_STORE [[UV14]](p3), [[PTR_ADD16]](p5) :: (store (p3) into %stack.0 + 56, align 8, basealign 256, addrspace 5)
- ; CHECK-NEXT: [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 60
- ; CHECK-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C17]](s32)
- ; CHECK-NEXT: G_STORE [[UV15]](p3), [[PTR_ADD17]](p5) :: (store (p3) into %stack.0 + 60, basealign 256, addrspace 5)
- ; CHECK-NEXT: [[C18:%[0-9]+]]:_(s32) = G_CONSTANT i32 64
- ; CHECK-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C18]](s32)
- ; CHECK-NEXT: G_STORE [[UV16]](p3), [[PTR_ADD18]](p5) :: (store (p3) into %stack.0 + 64, align 64, basealign 256, addrspace 5)
- ; CHECK-NEXT: [[C19:%[0-9]+]]:_(s32) = G_CONSTANT i32 68
- ; CHECK-NEXT: [[PTR_ADD19:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C19]](s32)
- ; CHECK-NEXT: G_STORE [[UV17]](p3), [[PTR_ADD19]](p5) :: (store (p3) into %stack.0 + 68, basealign 256, addrspace 5)
- ; CHECK-NEXT: [[C20:%[0-9]+]]:_(s32) = G_CONSTANT i32 72
- ; CHECK-NEXT: [[PTR_ADD20:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C20]](s32)
- ; CHECK-NEXT: G_STORE [[UV18]](p3), [[PTR_ADD20]](p5) :: (store (p3) into %stack.0 + 72, align 8, basealign 256, addrspace 5)
- ; CHECK-NEXT: [[C21:%[0-9]+]]:_(s32) = G_CONSTANT i32 76
- ; CHECK-NEXT: [[PTR_ADD21:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C21]](s32)
- ; CHECK-NEXT: G_STORE [[UV19]](p3), [[PTR_ADD21]](p5) :: (store (p3) into %stack.0 + 76, basealign 256, addrspace 5)
- ; CHECK-NEXT: [[C22:%[0-9]+]]:_(s32) = G_CONSTANT i32 80
- ; CHECK-NEXT: [[PTR_ADD22:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C22]](s32)
- ; CHECK-NEXT: G_STORE [[UV20]](p3), [[PTR_ADD22]](p5) :: (store (p3) into %stack.0 + 80, align 16, basealign 256, addrspace 5)
- ; CHECK-NEXT: [[C23:%[0-9]+]]:_(s32) = G_CONSTANT i32 84
- ; CHECK-NEXT: [[PTR_ADD23:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C23]](s32)
- ; CHECK-NEXT: G_STORE [[UV21]](p3), [[PTR_ADD23]](p5) :: (store (p3) into %stack.0 + 84, basealign 256, addrspace 5)
- ; CHECK-NEXT: [[C24:%[0-9]+]]:_(s32) = G_CONSTANT i32 88
- ; CHECK-NEXT: [[PTR_ADD24:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C24]](s32)
- ; CHECK-NEXT: G_STORE [[UV22]](p3), [[PTR_ADD24]](p5) :: (store (p3) into %stack.0 + 88, align 8, basealign 256, addrspace 5)
- ; CHECK-NEXT: [[C25:%[0-9]+]]:_(s32) = G_CONSTANT i32 92
- ; CHECK-NEXT: [[PTR_ADD25:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C25]](s32)
- ; CHECK-NEXT: G_STORE [[UV23]](p3), [[PTR_ADD25]](p5) :: (store (p3) into %stack.0 + 92, basealign 256, addrspace 5)
- ; CHECK-NEXT: [[C26:%[0-9]+]]:_(s32) = G_CONSTANT i32 96
- ; CHECK-NEXT: [[PTR_ADD26:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C26]](s32)
- ; CHECK-NEXT: G_STORE [[UV24]](p3), [[PTR_ADD26]](p5) :: (store (p3) into %stack.0 + 96, align 32, basealign 256, addrspace 5)
- ; CHECK-NEXT: [[C27:%[0-9]+]]:_(s32) = G_CONSTANT i32 100
- ; CHECK-NEXT: [[PTR_ADD27:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C27]](s32)
- ; CHECK-NEXT: G_STORE [[UV25]](p3), [[PTR_ADD27]](p5) :: (store (p3) into %stack.0 + 100, basealign 256, addrspace 5)
- ; CHECK-NEXT: [[C28:%[0-9]+]]:_(s32) = G_CONSTANT i32 104
- ; CHECK-NEXT: [[PTR_ADD28:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C28]](s32)
- ; CHECK-NEXT: G_STORE [[UV26]](p3), [[PTR_ADD28]](p5) :: (store (p3) into %stack.0 + 104, align 8, basealign 256, addrspace 5)
- ; CHECK-NEXT: [[C29:%[0-9]+]]:_(s32) = G_CONSTANT i32 108
- ; CHECK-NEXT: [[PTR_ADD29:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C29]](s32)
- ; CHECK-NEXT: G_STORE [[UV27]](p3), [[PTR_ADD29]](p5) :: (store (p3) into %stack.0 + 108, basealign 256, addrspace 5)
- ; CHECK-NEXT: [[C30:%[0-9]+]]:_(s32) = G_CONSTANT i32 112
- ; CHECK-NEXT: [[PTR_ADD30:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C30]](s32)
- ; CHECK-NEXT: G_STORE [[UV28]](p3), [[PTR_ADD30]](p5) :: (store (p3) into %stack.0 + 112, align 16, basealign 256, addrspace 5)
- ; CHECK-NEXT: [[C31:%[0-9]+]]:_(s32) = G_CONSTANT i32 116
- ; CHECK-NEXT: [[PTR_ADD31:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C31]](s32)
- ; CHECK-NEXT: G_STORE [[UV29]](p3), [[PTR_ADD31]](p5) :: (store (p3) into %stack.0 + 116, basealign 256, addrspace 5)
- ; CHECK-NEXT: [[C32:%[0-9]+]]:_(s32) = G_CONSTANT i32 120
- ; CHECK-NEXT: [[PTR_ADD32:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C32]](s32)
- ; CHECK-NEXT: G_STORE [[UV30]](p3), [[PTR_ADD32]](p5) :: (store (p3) into %stack.0 + 120, align 8, basealign 256, addrspace 5)
- ; CHECK-NEXT: [[C33:%[0-9]+]]:_(s32) = G_CONSTANT i32 124
- ; CHECK-NEXT: [[PTR_ADD33:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C33]](s32)
- ; CHECK-NEXT: G_STORE [[UV31]](p3), [[PTR_ADD33]](p5) :: (store (p3) into %stack.0 + 124, basealign 256, addrspace 5)
- ; CHECK-NEXT: [[C34:%[0-9]+]]:_(s32) = G_CONSTANT i32 128
- ; CHECK-NEXT: [[PTR_ADD34:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C34]](s32)
- ; CHECK-NEXT: G_STORE [[UV32]](p3), [[PTR_ADD34]](p5) :: (store (p3) into %stack.0 + 128, align 128, basealign 256, addrspace 5)
- ; CHECK-NEXT: [[C35:%[0-9]+]]:_(s32) = G_CONSTANT i32 132
- ; CHECK-NEXT: [[PTR_ADD35:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C35]](s32)
- ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p5) = COPY [[PTR_ADD35]](p5)
- ; CHECK-NEXT: G_STORE [[UV33]](p3), [[COPY1]](p5) :: (store (p3) into %stack.0 + 132, basealign 256, addrspace 5)
- ; CHECK-NEXT: [[C36:%[0-9]+]]:_(s32) = G_CONSTANT i32 136
- ; CHECK-NEXT: [[PTR_ADD36:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C36]](s32)
- ; CHECK-NEXT: G_STORE [[UV34]](p3), [[PTR_ADD36]](p5) :: (store (p3) into %stack.0 + 136, align 8, basealign 256, addrspace 5)
- ; CHECK-NEXT: [[C37:%[0-9]+]]:_(s32) = G_CONSTANT i32 140
- ; CHECK-NEXT: [[PTR_ADD37:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C37]](s32)
- ; CHECK-NEXT: G_STORE [[UV35]](p3), [[PTR_ADD37]](p5) :: (store (p3) into %stack.0 + 140, basealign 256, addrspace 5)
- ; CHECK-NEXT: [[C38:%[0-9]+]]:_(s32) = G_CONSTANT i32 144
- ; CHECK-NEXT: [[PTR_ADD38:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C38]](s32)
- ; CHECK-NEXT: G_STORE [[UV36]](p3), [[PTR_ADD38]](p5) :: (store (p3) into %stack.0 + 144, align 16, basealign 256, addrspace 5)
- ; CHECK-NEXT: [[C39:%[0-9]+]]:_(s32) = G_CONSTANT i32 148
- ; CHECK-NEXT: [[PTR_ADD39:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C39]](s32)
- ; CHECK-NEXT: G_STORE [[UV37]](p3), [[PTR_ADD39]](p5) :: (store (p3) into %stack.0 + 148, basealign 256, addrspace 5)
- ; CHECK-NEXT: [[C40:%[0-9]+]]:_(s32) = G_CONSTANT i32 152
- ; CHECK-NEXT: [[PTR_ADD40:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C40]](s32)
- ; CHECK-NEXT: G_STORE [[UV38]](p3), [[PTR_ADD40]](p5) :: (store (p3) into %stack.0 + 152, align 8, basealign 256, addrspace 5)
- ; CHECK-NEXT: [[C41:%[0-9]+]]:_(s32) = G_CONSTANT i32 156
- ; CHECK-NEXT: [[PTR_ADD41:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C41]](s32)
- ; CHECK-NEXT: G_STORE [[UV39]](p3), [[PTR_ADD41]](p5) :: (store (p3) into %stack.0 + 156, basealign 256, addrspace 5)
- ; CHECK-NEXT: [[C42:%[0-9]+]]:_(s32) = G_CONSTANT i32 160
- ; CHECK-NEXT: [[PTR_ADD42:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C42]](s32)
- ; CHECK-NEXT: G_STORE [[UV40]](p3), [[PTR_ADD42]](p5) :: (store (p3) into %stack.0 + 160, align 32, basealign 256, addrspace 5)
- ; CHECK-NEXT: [[C43:%[0-9]+]]:_(s32) = G_CONSTANT i32 164
- ; CHECK-NEXT: [[PTR_ADD43:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C43]](s32)
- ; CHECK-NEXT: G_STORE [[UV41]](p3), [[PTR_ADD43]](p5) :: (store (p3) into %stack.0 + 164, basealign 256, addrspace 5)
- ; CHECK-NEXT: [[C44:%[0-9]+]]:_(s32) = G_CONSTANT i32 168
- ; CHECK-NEXT: [[PTR_ADD44:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C44]](s32)
- ; CHECK-NEXT: G_STORE [[UV42]](p3), [[PTR_ADD44]](p5) :: (store (p3) into %stack.0 + 168, align 8, basealign 256, addrspace 5)
- ; CHECK-NEXT: [[C45:%[0-9]+]]:_(s32) = G_CONSTANT i32 172
- ; CHECK-NEXT: [[PTR_ADD45:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C45]](s32)
- ; CHECK-NEXT: G_STORE [[UV43]](p3), [[PTR_ADD45]](p5) :: (store (p3) into %stack.0 + 172, basealign 256, addrspace 5)
- ; CHECK-NEXT: [[C46:%[0-9]+]]:_(s32) = G_CONSTANT i32 176
- ; CHECK-NEXT: [[PTR_ADD46:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C46]](s32)
- ; CHECK-NEXT: G_STORE [[UV44]](p3), [[PTR_ADD46]](p5) :: (store (p3) into %stack.0 + 176, align 16, basealign 256, addrspace 5)
- ; CHECK-NEXT: [[C47:%[0-9]+]]:_(s32) = G_CONSTANT i32 180
- ; CHECK-NEXT: [[PTR_ADD47:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C47]](s32)
- ; CHECK-NEXT: G_STORE [[UV45]](p3), [[PTR_ADD47]](p5) :: (store (p3) into %stack.0 + 180, basealign 256, addrspace 5)
- ; CHECK-NEXT: [[C48:%[0-9]+]]:_(s32) = G_CONSTANT i32 184
- ; CHECK-NEXT: [[PTR_ADD48:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C48]](s32)
- ; CHECK-NEXT: G_STORE [[UV46]](p3), [[PTR_ADD48]](p5) :: (store (p3) into %stack.0 + 184, align 8, basealign 256, addrspace 5)
- ; CHECK-NEXT: [[C49:%[0-9]+]]:_(s32) = G_CONSTANT i32 188
- ; CHECK-NEXT: [[PTR_ADD49:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C49]](s32)
- ; CHECK-NEXT: G_STORE [[UV47]](p3), [[PTR_ADD49]](p5) :: (store (p3) into %stack.0 + 188, basealign 256, addrspace 5)
- ; CHECK-NEXT: [[C50:%[0-9]+]]:_(s32) = G_CONSTANT i32 192
- ; CHECK-NEXT: [[PTR_ADD50:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C50]](s32)
- ; CHECK-NEXT: G_STORE [[UV48]](p3), [[PTR_ADD50]](p5) :: (store (p3) into %stack.0 + 192, align 64, basealign 256, addrspace 5)
- ; CHECK-NEXT: [[C51:%[0-9]+]]:_(s32) = G_CONSTANT i32 196
- ; CHECK-NEXT: [[PTR_ADD51:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C51]](s32)
- ; CHECK-NEXT: G_STORE [[UV49]](p3), [[PTR_ADD51]](p5) :: (store (p3) into %stack.0 + 196, basealign 256, addrspace 5)
- ; CHECK-NEXT: [[C52:%[0-9]+]]:_(s32) = G_CONSTANT i32 200
- ; CHECK-NEXT: [[PTR_ADD52:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C52]](s32)
- ; CHECK-NEXT: G_STORE [[UV50]](p3), [[PTR_ADD52]](p5) :: (store (p3) into %stack.0 + 200, align 8, basealign 256, addrspace 5)
- ; CHECK-NEXT: [[C53:%[0-9]+]]:_(s32) = G_CONSTANT i32 204
- ; CHECK-NEXT: [[PTR_ADD53:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C53]](s32)
- ; CHECK-NEXT: G_STORE [[UV51]](p3), [[PTR_ADD53]](p5) :: (store (p3) into %stack.0 + 204, basealign 256, addrspace 5)
- ; CHECK-NEXT: [[C54:%[0-9]+]]:_(s32) = G_CONSTANT i32 208
- ; CHECK-NEXT: [[PTR_ADD54:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C54]](s32)
- ; CHECK-NEXT: G_STORE [[UV52]](p3), [[PTR_ADD54]](p5) :: (store (p3) into %stack.0 + 208, align 16, basealign 256, addrspace 5)
- ; CHECK-NEXT: [[C55:%[0-9]+]]:_(s32) = G_CONSTANT i32 212
- ; CHECK-NEXT: [[PTR_ADD55:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C55]](s32)
- ; CHECK-NEXT: G_STORE [[UV53]](p3), [[PTR_ADD55]](p5) :: (store (p3) into %stack.0 + 212, basealign 256, addrspace 5)
- ; CHECK-NEXT: [[C56:%[0-9]+]]:_(s32) = G_CONSTANT i32 216
- ; CHECK-NEXT: [[PTR_ADD56:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C56]](s32)
- ; CHECK-NEXT: G_STORE [[UV54]](p3), [[PTR_ADD56]](p5) :: (store (p3) into %stack.0 + 216, align 8, basealign 256, addrspace 5)
- ; CHECK-NEXT: [[C57:%[0-9]+]]:_(s32) = G_CONSTANT i32 220
- ; CHECK-NEXT: [[PTR_ADD57:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C57]](s32)
- ; CHECK-NEXT: G_STORE [[UV55]](p3), [[PTR_ADD57]](p5) :: (store (p3) into %stack.0 + 220, basealign 256, addrspace 5)
- ; CHECK-NEXT: [[C58:%[0-9]+]]:_(s32) = G_CONSTANT i32 224
- ; CHECK-NEXT: [[PTR_ADD58:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C58]](s32)
- ; CHECK-NEXT: G_STORE [[UV56]](p3), [[PTR_ADD58]](p5) :: (store (p3) into %stack.0 + 224, align 32, basealign 256, addrspace 5)
- ; CHECK-NEXT: [[C59:%[0-9]+]]:_(s32) = G_CONSTANT i32 228
- ; CHECK-NEXT: [[PTR_ADD59:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C59]](s32)
- ; CHECK-NEXT: G_STORE [[UV57]](p3), [[PTR_ADD59]](p5) :: (store (p3) into %stack.0 + 228, basealign 256, addrspace 5)
- ; CHECK-NEXT: [[C60:%[0-9]+]]:_(s32) = G_CONSTANT i32 232
- ; CHECK-NEXT: [[PTR_ADD60:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C60]](s32)
- ; CHECK-NEXT: G_STORE [[UV58]](p3), [[PTR_ADD60]](p5) :: (store (p3) into %stack.0 + 232, align 8, basealign 256, addrspace 5)
- ; CHECK-NEXT: [[C61:%[0-9]+]]:_(s32) = G_CONSTANT i32 236
- ; CHECK-NEXT: [[PTR_ADD61:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C61]](s32)
- ; CHECK-NEXT: G_STORE [[UV59]](p3), [[PTR_ADD61]](p5) :: (store (p3) into %stack.0 + 236, basealign 256, addrspace 5)
- ; CHECK-NEXT: [[C62:%[0-9]+]]:_(s32) = G_CONSTANT i32 240
- ; CHECK-NEXT: [[PTR_ADD62:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C62]](s32)
- ; CHECK-NEXT: G_STORE [[UV60]](p3), [[PTR_ADD62]](p5) :: (store (p3) into %stack.0 + 240, align 16, basealign 256, addrspace 5)
- ; CHECK-NEXT: [[C63:%[0-9]+]]:_(s32) = G_CONSTANT i32 244
- ; CHECK-NEXT: [[PTR_ADD63:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C63]](s32)
- ; CHECK-NEXT: G_STORE [[UV61]](p3), [[PTR_ADD63]](p5) :: (store (p3) into %stack.0 + 244, basealign 256, addrspace 5)
- ; CHECK-NEXT: [[C64:%[0-9]+]]:_(s32) = G_CONSTANT i32 248
- ; CHECK-NEXT: [[PTR_ADD64:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C64]](s32)
- ; CHECK-NEXT: G_STORE [[UV62]](p3), [[PTR_ADD64]](p5) :: (store (p3) into %stack.0 + 248, align 8, basealign 256, addrspace 5)
- ; CHECK-NEXT: [[C65:%[0-9]+]]:_(s32) = G_CONSTANT i32 252
- ; CHECK-NEXT: [[PTR_ADD65:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C65]](s32)
- ; CHECK-NEXT: G_STORE [[UV63]](p3), [[PTR_ADD65]](p5) :: (store (p3) into %stack.0 + 252, basealign 256, addrspace 5)
- ; CHECK-NEXT: [[LOAD4:%[0-9]+]]:_(p3) = G_LOAD [[PTR_ADD35]](p5) :: (load (p3) from %stack.0 + 132, addrspace 5)
- ; CHECK-NEXT: S_ENDPGM 0, implicit [[LOAD4]](p3)
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p3) = COPY [[UV1]](p3)
+ ; CHECK-NEXT: S_ENDPGM 0, implicit [[COPY1]](p3)
%0:_(p1) = COPY $sgpr0_sgpr1
%1:_(s32) = G_CONSTANT i32 33
%2:_(<64 x p3>) = G_LOAD %0 :: (load (<64 x p3>), align 4, addrspace 4)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-extract.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-extract.mir
index e8e863a964c93..ba80462858f13 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-extract.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-extract.mir
@@ -187,8 +187,9 @@ body: |
; CHECK-LABEL: name: test_extract_s32_v2s32_offset0
; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
- ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY]](<2 x s32>), 0
- ; CHECK-NEXT: $vgpr0 = COPY [[EXTRACT]](s32)
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV]](s32)
+ ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](s32)
%0:_(<2 x s32>) = COPY $vgpr0_vgpr1
%1:_(s32) = G_EXTRACT %0, 0
$vgpr0 = COPY %1
@@ -201,8 +202,9 @@ body: |
; CHECK-LABEL: name: test_extract_s32_v2s32_offset32
; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
- ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY]](<2 x s32>), 32
- ; CHECK-NEXT: $vgpr0 = COPY [[EXTRACT]](s32)
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV1]](s32)
+ ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](s32)
%0:_(<2 x s32>) = COPY $vgpr0_vgpr1
%1:_(s32) = G_EXTRACT %0, 32
$vgpr0 = COPY %1
@@ -215,8 +217,9 @@ body: |
; CHECK-LABEL: name: test_extract_s32_v3s32_offset0
; CHECK: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2
- ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY]](<3 x s32>), 0
- ; CHECK-NEXT: $vgpr0 = COPY [[EXTRACT]](s32)
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>)
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV]](s32)
+ ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](s32)
%0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2
%1:_(s32) = G_EXTRACT %0, 0
$vgpr0 = COPY %1
@@ -229,8 +232,9 @@ body: |
; CHECK-LABEL: name: test_extract_s32_v3s32_offset32
; CHECK: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2
- ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY]](<3 x s32>), 32
- ; CHECK-NEXT: $vgpr0 = COPY [[EXTRACT]](s32)
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>)
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV1]](s32)
+ ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](s32)
%0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2
%1:_(s32) = G_EXTRACT %0, 32
$vgpr0 = COPY %1
@@ -243,8 +247,9 @@ body: |
; CHECK-LABEL: name: test_extract_s32_v3s32_offset64
; CHECK: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2
- ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY]](<3 x s32>), 64
- ; CHECK-NEXT: $vgpr0 = COPY [[EXTRACT]](s32)
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>)
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV2]](s32)
+ ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](s32)
%0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2
%1:_(s32) = G_EXTRACT %0, 64
$vgpr0 = COPY %1
@@ -257,8 +262,9 @@ body: |
; CHECK-LABEL: name: test_extract_s32_v4s32_offset0
; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
- ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY]](<4 x s32>), 0
- ; CHECK-NEXT: $vgpr0 = COPY [[EXTRACT]](s32)
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>)
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV]](s32)
+ ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](s32)
%0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
%1:_(s32) = G_EXTRACT %0, 0
$vgpr0 = COPY %1
@@ -313,8 +319,9 @@ body: |
; CHECK-LABEL: name: test_extract_v2s32_v4s32_offset0
; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
- ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(<2 x s32>) = G_EXTRACT [[COPY]](<4 x s32>), 0
- ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[EXTRACT]](<2 x s32>)
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>)
+ ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32)
+ ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
%0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
%1:_(<2 x s32>) = G_EXTRACT %0, 0
$vgpr0_vgpr1 = COPY %1
@@ -328,8 +335,9 @@ body: |
; CHECK-LABEL: name: test_extract_v2s32_v4s32_offset32
; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
- ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(<2 x s32>) = G_EXTRACT [[COPY]](<4 x s32>), 32
- ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[EXTRACT]](<2 x s32>)
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>)
+ ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV1]](s32), [[UV2]](s32)
+ ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
%0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
%1:_(<2 x s32>) = G_EXTRACT %0, 32
$vgpr0_vgpr1 = COPY %1
@@ -343,8 +351,9 @@ body: |
; CHECK-LABEL: name: test_extract_v2s32_v4s32_offset64
; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
- ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(<2 x s32>) = G_EXTRACT [[COPY]](<4 x s32>), 64
- ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[EXTRACT]](<2 x s32>)
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>)
+ ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV2]](s32), [[UV3]](s32)
+ ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
%0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
%1:_(<2 x s32>) = G_EXTRACT %0, 64
$vgpr0_vgpr1 = COPY %1
@@ -357,8 +366,9 @@ body: |
; CHECK-LABEL: name: test_extract_s64_v4s32_offset0
; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
- ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s64) = G_EXTRACT [[COPY]](<4 x s32>), 0
- ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[EXTRACT]](s64)
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>)
+ ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV]](s32), [[UV1]](s32)
+ ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64)
%0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
%1:_(s64) = G_EXTRACT %0, 0
$vgpr0_vgpr1 = COPY %1
@@ -372,8 +382,9 @@ body: |
; CHECK-LABEL: name: test_extract_s64_v4s32_offset32
; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
- ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s64) = G_EXTRACT [[COPY]](<4 x s32>), 32
- ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[EXTRACT]](s64)
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>)
+ ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV1]](s32), [[UV2]](s32)
+ ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64)
%0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
%1:_(s64) = G_EXTRACT %0, 32
$vgpr0_vgpr1 = COPY %1
@@ -387,8 +398,9 @@ body: |
; CHECK-LABEL: name: test_extract_s64_v4s32_offset64
; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
- ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s64) = G_EXTRACT [[COPY]](<4 x s32>), 64
- ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[EXTRACT]](s64)
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>)
+ ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV2]](s32), [[UV3]](s32)
+ ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64)
%0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
%1:_(s64) = G_EXTRACT %0, 64
$vgpr0_vgpr1 = COPY %1
@@ -401,8 +413,9 @@ body: |
; CHECK-LABEL: name: test_extract_p0_v4s32_offset0
; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
- ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(p0) = G_EXTRACT [[COPY]](<4 x s32>), 0
- ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[EXTRACT]](p0)
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>)
+ ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[UV]](s32), [[UV1]](s32)
+ ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p0)
%0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
%1:_(p0) = G_EXTRACT %0, 0
$vgpr0_vgpr1 = COPY %1
@@ -416,8 +429,9 @@ body: |
; CHECK-LABEL: name: test_extract_p0_v4s32_offset32
; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
- ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(p0) = G_EXTRACT [[COPY]](<4 x s32>), 32
- ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[EXTRACT]](p0)
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>)
+ ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[UV1]](s32), [[UV2]](s32)
+ ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p0)
%0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
%1:_(p0) = G_EXTRACT %0, 32
$vgpr0_vgpr1 = COPY %1
@@ -431,8 +445,9 @@ body: |
; CHECK-LABEL: name: test_extract_p0_v4s32_offset64
; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
- ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(p0) = G_EXTRACT [[COPY]](<4 x s32>), 64
- ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[EXTRACT]](p0)
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>)
+ ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[UV2]](s32), [[UV3]](s32)
+ ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p0)
%0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
%1:_(p0) = G_EXTRACT %0, 64
$vgpr0_vgpr1 = COPY %1
@@ -444,13 +459,8 @@ body: |
bb.0:
; CHECK-LABEL: name: extract_s8_v4s8_offset0
; CHECK: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s32>), [[UV1:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[DEF]](<4 x s32>)
- ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV]](<2 x s32>)
- ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV1]](<2 x s32>)
- ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[TRUNC]](<2 x s16>), [[TRUNC1]](<2 x s16>)
- ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s16) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0
- ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[EXTRACT]](s16)
- ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32)
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<4 x s32>)
+ ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32)
%0:_(<4 x s8>) = G_IMPLICIT_DEF
%1:_(s8) = G_EXTRACT %0, 0
%2:_(s32) = G_ANYEXT %1
@@ -463,13 +473,8 @@ body: |
bb.0:
; CHECK-LABEL: name: extract_s8_v4s8_offset8
; CHECK: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s32>), [[UV1:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[DEF]](<4 x s32>)
- ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV]](<2 x s32>)
- ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV1]](<2 x s32>)
- ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[TRUNC]](<2 x s16>), [[TRUNC1]](<2 x s16>)
- ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s16) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 16
- ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[EXTRACT]](s16)
- ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32)
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<4 x s32>)
+ ; CHECK-NEXT: $vgpr0 = COPY [[UV1]](s32)
%0:_(<4 x s8>) = G_IMPLICIT_DEF
%1:_(s8) = G_EXTRACT %0, 8
%2:_(s32) = G_ANYEXT %1
@@ -482,13 +487,8 @@ body: |
bb.0:
; CHECK-LABEL: name: extract_s8_v4s8_offset16
; CHECK: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s32>), [[UV1:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[DEF]](<4 x s32>)
- ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV]](<2 x s32>)
- ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV1]](<2 x s32>)
- ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[TRUNC]](<2 x s16>), [[TRUNC1]](<2 x s16>)
- ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s16) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 32
- ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[EXTRACT]](s16)
- ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32)
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<4 x s32>)
+ ; CHECK-NEXT: $vgpr0 = COPY [[UV2]](s32)
%0:_(<4 x s8>) = G_IMPLICIT_DEF
%1:_(s8) = G_EXTRACT %0, 16
%2:_(s32) = G_ANYEXT %1
@@ -501,13 +501,8 @@ body: |
bb.0:
; CHECK-LABEL: name: extract_s8_v4s8_offset24
; CHECK: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s32>), [[UV1:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[DEF]](<4 x s32>)
- ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV]](<2 x s32>)
- ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV1]](<2 x s32>)
- ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[TRUNC]](<2 x s16>), [[TRUNC1]](<2 x s16>)
- ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s16) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 48
- ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[EXTRACT]](s16)
- ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32)
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<4 x s32>)
+ ; CHECK-NEXT: $vgpr0 = COPY [[UV3]](s32)
%0:_(<4 x s8>) = G_IMPLICIT_DEF
%1:_(s8) = G_EXTRACT %0, 24
%2:_(s32) = G_ANYEXT %1
@@ -522,26 +517,7 @@ body: |
; CHECK-LABEL: name: extract_s8_v3s8_offset16
; CHECK: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<4 x s32>)
- ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
- ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]]
- ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C]]
- ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
- ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
- ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
- ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C]]
- ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C1]](s32)
- ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
- ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
- ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[DEF1]](<2 x s16>)
- ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>)
- ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[UV4]](<3 x s16>), 0
- ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s16) = G_EXTRACT [[INSERT]](<4 x s16>), 32
- ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[EXTRACT]](s16)
- ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32)
+ ; CHECK-NEXT: $vgpr0 = COPY [[UV2]](s32)
%0:_(<3 x s8>) = G_IMPLICIT_DEF
%1:_(s8) = G_EXTRACT %0, 16
%2:_(s32) = G_ANYEXT %1
@@ -555,21 +531,7 @@ body: |
; CHECK-LABEL: name: extract_s8_v5s1_offset4
; CHECK: [[DEF:%[0-9]+]]:_(<5 x s32>) = G_IMPLICIT_DEF
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<5 x s32>)
- ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32)
- ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV2]](s32), [[UV3]](s32)
- ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV4]](s32), [[DEF1]](s32)
- ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[BUILD_VECTOR]](<2 x s32>)
- ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[BUILD_VECTOR1]](<2 x s32>)
- ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[BUILD_VECTOR2]](<2 x s32>)
- ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<10 x s16>) = G_CONCAT_VECTORS [[TRUNC]](<2 x s16>), [[TRUNC1]](<2 x s16>), [[TRUNC2]](<2 x s16>), [[DEF2]](<2 x s16>), [[DEF2]](<2 x s16>)
- ; CHECK-NEXT: [[UV5:%[0-9]+]]:_(<5 x s16>), [[UV6:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<10 x s16>)
- ; CHECK-NEXT: [[DEF3:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[DEF3]], [[UV5]](<5 x s16>), 0
- ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s16) = G_EXTRACT [[INSERT]](<6 x s16>), 64
- ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[EXTRACT]](s16)
- ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32)
+ ; CHECK-NEXT: $vgpr0 = COPY [[UV4]](s32)
%0:_(<5 x s1>) = G_IMPLICIT_DEF
%1:_(s1) = G_EXTRACT %0, 4
%2:_(s32) = G_ANYEXT %1
@@ -582,8 +544,17 @@ body: |
bb.0:
; CHECK-LABEL: name: extract_v2s16_v4s16_offset32
; CHECK: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 32
- ; CHECK-NEXT: $vgpr0 = COPY [[EXTRACT]](<2 x s16>)
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
+ ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]]
+ ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]]
+ ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+ ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+ ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
%0:_(<4 x s16>) = G_IMPLICIT_DEF
%1:_(<2 x s16>) = G_EXTRACT %0, 32
$vgpr0 = COPY %1
@@ -595,8 +566,17 @@ body: |
bb.0:
; CHECK-LABEL: name: extract_v2s16_v6s16_offset32
; CHECK: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[DEF]](<6 x s16>), 32
- ; CHECK-NEXT: $vgpr0 = COPY [[EXTRACT]](<2 x s16>)
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>)
+ ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]]
+ ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]]
+ ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+ ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+ ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
%0:_(<6 x s16>) = G_IMPLICIT_DEF
%1:_(<2 x s16>) = G_EXTRACT %0, 32
$vgpr0 = COPY %1
@@ -823,13 +803,9 @@ body: |
; CHECK-LABEL: name: extract_s16_v3s16_offset0
; CHECK: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
- ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>)
- ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0
- ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s16) = G_EXTRACT [[INSERT]](<4 x s16>), 0
- ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[EXTRACT]](s16)
- ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32)
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
+ ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+ ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST]](s32)
%0:_(<3 x s16>) = G_IMPLICIT_DEF
%1:_(s16) = G_EXTRACT %0, 0
%2:_(s32) = G_ANYEXT %1
@@ -908,12 +884,17 @@ body: |
; CHECK-LABEL: name: extract_v2s16_v3s16_offset0
; CHECK: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
- ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>)
- ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0
- ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[INSERT]](<4 x s16>), 0
- ; CHECK-NEXT: $vgpr0 = COPY [[EXTRACT]](<2 x s16>)
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
+ ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]]
+ ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]]
+ ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+ ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+ ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
%0:_(<3 x s16>) = G_IMPLICIT_DEF
%1:_(<2 x s16>) = G_EXTRACT %0, 0
$vgpr0 = COPY %1
@@ -926,12 +907,17 @@ body: |
; CHECK-LABEL: name: extract_v2s16_v5s16_offset0
; CHECK: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<30 x s16>) = G_CONCAT_VECTORS [[DEF]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>)
- ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<5 x s16>), [[UV1:%[0-9]+]]:_(<5 x s16>), [[UV2:%[0-9]+]]:_(<5 x s16>), [[UV3:%[0-9]+]]:_(<5 x s16>), [[UV4:%[0-9]+]]:_(<5 x s16>), [[UV5:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<30 x s16>)
- ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[DEF1]], [[UV]](<5 x s16>), 0
- ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[INSERT]](<6 x s16>), 0
- ; CHECK-NEXT: $vgpr0 = COPY [[EXTRACT]](<2 x s16>)
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>)
+ ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]]
+ ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]]
+ ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+ ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+ ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
%0:_(<5 x s16>) = G_IMPLICIT_DEF
%1:_(<2 x s16>) = G_EXTRACT %0, 0
$vgpr0 = COPY %1
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fabs.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fabs.mir
index df4112042a138..16bb69c872ea0 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fabs.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fabs.mir
@@ -231,25 +231,19 @@ body: |
; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[FABS]](<2 x s16>)
; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32)
; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[FABS1]](<2 x s16>)
- ; SI-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[FABS]](<2 x s16>)
- ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32)
- ; SI-NEXT: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[FABS1]](<2 x s16>)
; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]]
; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]]
; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C]](s32)
; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND3]], [[SHL2]]
- ; SI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+ ; SI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]]
- ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[BITCAST6]], [[C1]]
- ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C]](s32)
+ ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32)
; SI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND5]], [[SHL3]]
- ; SI-NEXT: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
- ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]]
- ; SI-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[BITCAST7]], [[C1]]
- ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C]](s32)
- ; SI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL4]]
- ; SI-NEXT: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32)
- ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST8]](<2 x s16>), [[BITCAST9]](<2 x s16>), [[BITCAST10]](<2 x s16>)
+ ; SI-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
+ ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
+ ; SI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL4]]
+ ; SI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32)
+ ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>)
; SI-NEXT: S_NOP 0, implicit [[CONCAT_VECTORS]](<6 x s16>)
; VI-LABEL: name: test_fabs_v3s16
; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
@@ -274,25 +268,19 @@ body: |
; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[FABS]](<2 x s16>)
; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32)
; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[FABS1]](<2 x s16>)
- ; VI-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[FABS]](<2 x s16>)
- ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32)
- ; VI-NEXT: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[FABS1]](<2 x s16>)
; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]]
; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]]
; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C]](s32)
; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND3]], [[SHL2]]
- ; VI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+ ; VI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]]
- ; VI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[BITCAST6]], [[C1]]
- ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C]](s32)
+ ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32)
; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND5]], [[SHL3]]
- ; VI-NEXT: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
- ; VI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]]
- ; VI-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[BITCAST7]], [[C1]]
- ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C]](s32)
- ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL4]]
- ; VI-NEXT: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32)
- ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST8]](<2 x s16>), [[BITCAST9]](<2 x s16>), [[BITCAST10]](<2 x s16>)
+ ; VI-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
+ ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
+ ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL4]]
+ ; VI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32)
+ ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>)
; VI-NEXT: S_NOP 0, implicit [[CONCAT_VECTORS]](<6 x s16>)
; GFX9-LABEL: name: test_fabs_v3s16
; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
@@ -309,12 +297,9 @@ body: |
; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[FABS]](<2 x s16>)
; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[FABS1]](<2 x s16>)
- ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[FABS]](<2 x s16>)
- ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32)
- ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[FABS1]](<2 x s16>)
; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST2]](s32), [[LSHR1]](s32)
- ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST3]](s32), [[BITCAST4]](s32)
- ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR2]](s32), [[BITCAST5]](s32)
+ ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST3]](s32), [[BITCAST2]](s32)
+ ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR1]](s32), [[BITCAST3]](s32)
; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[BUILD_VECTOR_TRUNC3]](<2 x s16>), [[BUILD_VECTOR_TRUNC4]](<2 x s16>)
; GFX9-NEXT: S_NOP 0, implicit [[CONCAT_VECTORS]](<6 x s16>)
%0:_(<3 x s16>) = G_IMPLICIT_DEF
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fadd.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fadd.mir
index e500e59bff7d0..aaff85970beca 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fadd.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fadd.mir
@@ -446,21 +446,21 @@ body: |
; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST]](s32), [[LSHR]](s32)
- ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
- ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST1]](s32), [[DEF]](s32)
; GFX9-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>)
; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>)
+ ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST]](s32), [[LSHR]](s32)
+ ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST1]](s32), [[DEF]](s32)
; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST2]](s32), [[LSHR1]](s32)
; GFX9-NEXT: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST3]](s32), [[DEF]](s32)
; GFX9-NEXT: [[FADD:%[0-9]+]]:_(<2 x s16>) = G_FADD [[BUILD_VECTOR_TRUNC]], [[BUILD_VECTOR_TRUNC2]]
; GFX9-NEXT: [[FADD1:%[0-9]+]]:_(<2 x s16>) = G_FADD [[BUILD_VECTOR_TRUNC1]], [[BUILD_VECTOR_TRUNC3]]
- ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[FADD]](<2 x s16>)
; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32)
; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[FADD1]](<2 x s16>)
+ ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
; GFX9-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>)
; GFX9-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>)
; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fdiv.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fdiv.mir
index 4e3a1a74a38c9..f230dbc1ae689 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fdiv.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fdiv.mir
@@ -1382,7 +1382,6 @@ body: |
; SI-LABEL: name: test_fdiv_v3s16
; SI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; SI-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
@@ -1391,6 +1390,7 @@ body: |
; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32)
+ ; SI-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
; SI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>)
; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32)
@@ -1451,7 +1451,6 @@ body: |
; SI-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>)
; VI-LABEL: name: test_fdiv_v3s16
; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; VI-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
@@ -1460,6 +1459,7 @@ body: |
; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32)
+ ; VI-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
; VI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>)
; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32)
@@ -1492,7 +1492,6 @@ body: |
; VI-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>)
; GFX9-LABEL: name: test_fdiv_v3s16
; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
@@ -1501,6 +1500,7 @@ body: |
; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32)
+ ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
; GFX9-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>)
; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32)
@@ -1533,7 +1533,6 @@ body: |
; GFX9-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>)
; GFX9-UNSAFE-LABEL: name: test_fdiv_v3s16
; GFX9-UNSAFE: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; GFX9-UNSAFE-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
; GFX9-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
; GFX9-UNSAFE-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
; GFX9-UNSAFE-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
@@ -1542,6 +1541,7 @@ body: |
; GFX9-UNSAFE-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
; GFX9-UNSAFE-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
; GFX9-UNSAFE-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32)
+ ; GFX9-UNSAFE-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
; GFX9-UNSAFE-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>)
; GFX9-UNSAFE-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
; GFX9-UNSAFE-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32)
@@ -1562,7 +1562,6 @@ body: |
; GFX9-UNSAFE-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>)
; GFX10-LABEL: name: test_fdiv_v3s16
; GFX10: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; GFX10-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
; GFX10-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
@@ -1571,6 +1570,7 @@ body: |
; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32)
+ ; GFX10-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
; GFX10-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>)
; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fma.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fma.mir
index bcb249055a247..fed4cfcc793a4 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fma.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fma.mir
@@ -518,27 +518,27 @@ body: |
; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST]](s32), [[LSHR]](s32)
- ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
- ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST1]](s32), [[DEF]](s32)
; GFX9-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>)
; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>)
- ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST2]](s32), [[LSHR1]](s32)
- ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST3]](s32), [[DEF]](s32)
; GFX9-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY2]](<6 x s16>)
; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>)
; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32)
; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>)
+ ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST]](s32), [[LSHR]](s32)
+ ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST1]](s32), [[DEF]](s32)
+ ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST2]](s32), [[LSHR1]](s32)
+ ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST3]](s32), [[DEF]](s32)
; GFX9-NEXT: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST4]](s32), [[LSHR2]](s32)
; GFX9-NEXT: [[BUILD_VECTOR_TRUNC5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST5]](s32), [[DEF]](s32)
; GFX9-NEXT: [[FMA:%[0-9]+]]:_(<2 x s16>) = G_FMA [[BUILD_VECTOR_TRUNC]], [[BUILD_VECTOR_TRUNC2]], [[BUILD_VECTOR_TRUNC4]]
; GFX9-NEXT: [[FMA1:%[0-9]+]]:_(<2 x s16>) = G_FMA [[BUILD_VECTOR_TRUNC1]], [[BUILD_VECTOR_TRUNC3]], [[BUILD_VECTOR_TRUNC5]]
- ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
; GFX9-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[FMA]](<2 x s16>)
; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32)
; GFX9-NEXT: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[FMA1]](<2 x s16>)
+ ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
; GFX9-NEXT: [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>)
; GFX9-NEXT: [[BITCAST8:%[0-9]+]]:_(s32) = G_BITCAST [[UV9]](<2 x s16>)
; GFX9-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST8]], [[C]](s32)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmaxnum.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmaxnum.mir
index 3edb265c710d6..acae7a10d5095 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmaxnum.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmaxnum.mir
@@ -1,8 +1,8 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=SI %s
-# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=VI %s
-# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=GFX9 %s
-# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=GFX9 %s
+# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=legalizer %s -o - | FileCheck -check-prefix=SI %s
+# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer %s -o - | FileCheck -check-prefix=VI %s
+# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s
+# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s
---
name: test_fmaxnum_s32_ieee_mode_on
@@ -496,32 +496,38 @@ body: |
; GFX9-LABEL: name: test_fmaxnum_v3s16
; GFX9: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2
; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5
- ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>)
- ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>)
- ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; GFX9-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV]](<3 x s16>), 0
- ; GFX9-NEXT: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV2]](<3 x s16>), 0
- ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>)
- ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<4 x s16>)
- ; GFX9-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[UV4]]
- ; GFX9-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[UV6]]
- ; GFX9-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(<2 x s16>) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]]
- ; GFX9-NEXT: [[FCANONICALIZE2:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[UV5]]
- ; GFX9-NEXT: [[FCANONICALIZE3:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[UV7]]
- ; GFX9-NEXT: [[FMAXNUM_IEEE1:%[0-9]+]]:_(<2 x s16>) = G_FMAXNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]]
- ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[FMAXNUM_IEEE]](<2 x s16>)
+ ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>)
+ ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
- ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[FMAXNUM_IEEE1]](<2 x s16>)
- ; GFX9-NEXT: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>)
- ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV8]](<2 x s16>)
+ ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+ ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>)
+ ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
- ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV9]](<2 x s16>)
+ ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>)
; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST]](s32), [[LSHR]](s32)
- ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST1]](s32), [[BITCAST2]](s32)
- ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR1]](s32), [[BITCAST3]](s32)
- ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>)
+ ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST1]](s32), [[DEF]](s32)
+ ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST2]](s32), [[LSHR1]](s32)
+ ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST3]](s32), [[DEF]](s32)
+ ; GFX9-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[BUILD_VECTOR_TRUNC]]
+ ; GFX9-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[BUILD_VECTOR_TRUNC2]]
+ ; GFX9-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(<2 x s16>) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]]
+ ; GFX9-NEXT: [[FCANONICALIZE2:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[BUILD_VECTOR_TRUNC1]]
+ ; GFX9-NEXT: [[FCANONICALIZE3:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[BUILD_VECTOR_TRUNC3]]
+ ; GFX9-NEXT: [[FMAXNUM_IEEE1:%[0-9]+]]:_(<2 x s16>) = G_FMAXNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]]
+ ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[FMAXNUM_IEEE]](<2 x s16>)
+ ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32)
+ ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[FMAXNUM_IEEE1]](<2 x s16>)
+ ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+ ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>)
+ ; GFX9-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>)
+ ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32)
+ ; GFX9-NEXT: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>)
+ ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST4]](s32), [[LSHR2]](s32)
+ ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST5]](s32), [[BITCAST6]](s32)
+ ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC6:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR3]](s32), [[BITCAST7]](s32)
+ ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC4]](<2 x s16>), [[BUILD_VECTOR_TRUNC5]](<2 x s16>), [[BUILD_VECTOR_TRUNC6]](<2 x s16>)
; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
%0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2
%1:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fminnum.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fminnum.mir
index 097b096f82b71..7c8934a28edd6 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fminnum.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fminnum.mir
@@ -1,8 +1,8 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=SI %s
-# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=VI %s
-# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=GFX9 %s
-# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=GFX9 %s
+# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=legalizer %s -o - | FileCheck -check-prefix=SI %s
+# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer %s -o - | FileCheck -check-prefix=VI %s
+# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s
+# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s
---
name: test_fminnum_s32_ieee_mode_on
@@ -496,32 +496,38 @@ body: |
; GFX9-LABEL: name: test_fminnum_v3s16
; GFX9: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2
; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5
- ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>)
- ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>)
- ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; GFX9-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV]](<3 x s16>), 0
- ; GFX9-NEXT: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV2]](<3 x s16>), 0
- ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>)
- ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<4 x s16>)
- ; GFX9-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[UV4]]
- ; GFX9-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[UV6]]
- ; GFX9-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(<2 x s16>) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]]
- ; GFX9-NEXT: [[FCANONICALIZE2:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[UV5]]
- ; GFX9-NEXT: [[FCANONICALIZE3:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[UV7]]
- ; GFX9-NEXT: [[FMINNUM_IEEE1:%[0-9]+]]:_(<2 x s16>) = G_FMINNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]]
- ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[FMINNUM_IEEE]](<2 x s16>)
+ ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>)
+ ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
- ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[FMINNUM_IEEE1]](<2 x s16>)
- ; GFX9-NEXT: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>)
- ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV8]](<2 x s16>)
+ ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+ ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>)
+ ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
- ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV9]](<2 x s16>)
+ ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>)
; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST]](s32), [[LSHR]](s32)
- ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST1]](s32), [[BITCAST2]](s32)
- ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR1]](s32), [[BITCAST3]](s32)
- ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>)
+ ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST1]](s32), [[DEF]](s32)
+ ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST2]](s32), [[LSHR1]](s32)
+ ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST3]](s32), [[DEF]](s32)
+ ; GFX9-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[BUILD_VECTOR_TRUNC]]
+ ; GFX9-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[BUILD_VECTOR_TRUNC2]]
+ ; GFX9-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(<2 x s16>) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]]
+ ; GFX9-NEXT: [[FCANONICALIZE2:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[BUILD_VECTOR_TRUNC1]]
+ ; GFX9-NEXT: [[FCANONICALIZE3:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[BUILD_VECTOR_TRUNC3]]
+ ; GFX9-NEXT: [[FMINNUM_IEEE1:%[0-9]+]]:_(<2 x s16>) = G_FMINNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]]
+ ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[FMINNUM_IEEE]](<2 x s16>)
+ ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32)
+ ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[FMINNUM_IEEE1]](<2 x s16>)
+ ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+ ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>)
+ ; GFX9-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>)
+ ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32)
+ ; GFX9-NEXT: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>)
+ ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST4]](s32), [[LSHR2]](s32)
+ ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST5]](s32), [[BITCAST6]](s32)
+ ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC6:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR3]](s32), [[BITCAST7]](s32)
+ ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC4]](<2 x s16>), [[BUILD_VECTOR_TRUNC5]](<2 x s16>), [[BUILD_VECTOR_TRUNC6]](<2 x s16>)
; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
%0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2
%1:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmul.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmul.mir
index 970c9f64565f3..5e86ea25e6742 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmul.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmul.mir
@@ -1,8 +1,8 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=SI %s
-# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=VI %s
-# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=GFX9PLUS %s
-# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=GFX9PLUS %s
+# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=legalizer %s -o - | FileCheck -check-prefix=SI %s
+# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer %s -o - | FileCheck -check-prefix=VI %s
+# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9PLUS %s
+# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9PLUS %s
---
name: test_fmul_s32
@@ -433,21 +433,21 @@ body: |
; GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; GFX9PLUS-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
; GFX9PLUS-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; GFX9PLUS-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST]](s32), [[LSHR]](s32)
- ; GFX9PLUS-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
- ; GFX9PLUS-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST1]](s32), [[DEF]](s32)
; GFX9PLUS-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>)
; GFX9PLUS-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
; GFX9PLUS-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
; GFX9PLUS-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>)
+ ; GFX9PLUS-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST]](s32), [[LSHR]](s32)
+ ; GFX9PLUS-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; GFX9PLUS-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST1]](s32), [[DEF]](s32)
; GFX9PLUS-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST2]](s32), [[LSHR1]](s32)
; GFX9PLUS-NEXT: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST3]](s32), [[DEF]](s32)
; GFX9PLUS-NEXT: [[FMUL:%[0-9]+]]:_(<2 x s16>) = G_FMUL [[BUILD_VECTOR_TRUNC]], [[BUILD_VECTOR_TRUNC2]]
; GFX9PLUS-NEXT: [[FMUL1:%[0-9]+]]:_(<2 x s16>) = G_FMUL [[BUILD_VECTOR_TRUNC1]], [[BUILD_VECTOR_TRUNC3]]
- ; GFX9PLUS-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
; GFX9PLUS-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[FMUL]](<2 x s16>)
; GFX9PLUS-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32)
; GFX9PLUS-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[FMUL1]](<2 x s16>)
+ ; GFX9PLUS-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
; GFX9PLUS-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>)
; GFX9PLUS-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>)
; GFX9PLUS-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-freeze.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-freeze.mir
index ddf5ec6db384b..f0d91b08d0077 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-freeze.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-freeze.mir
@@ -223,26 +223,9 @@ body: |
; CHECK-LABEL: name: test_freeze_s1056
; CHECK: [[COPY:%[0-9]+]]:_(s512) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
- ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s512)
- ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](s64)
- ; CHECK-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](s64)
- ; CHECK-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](s64)
- ; CHECK-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](s64)
- ; CHECK-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](s64)
- ; CHECK-NEXT: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](s64)
- ; CHECK-NEXT: [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](s64)
- ; CHECK-NEXT: [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](s64)
- ; CHECK-NEXT: [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](s64)
- ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s1024) = G_MERGE_VALUES [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32), [[UV4]](s32), [[UV5]](s32), [[UV6]](s32), [[UV7]](s32), [[UV8]](s32), [[UV9]](s32), [[UV10]](s32), [[UV11]](s32), [[UV12]](s32), [[UV13]](s32), [[UV14]](s32), [[UV15]](s32), [[UV16]](s32), [[UV17]](s32), [[UV18]](s32), [[UV19]](s32), [[UV20]](s32), [[UV21]](s32), [[UV22]](s32), [[UV23]](s32), [[UV24]](s32), [[UV25]](s32), [[UV26]](s32), [[UV27]](s32), [[UV28]](s32), [[UV29]](s32), [[UV30]](s32), [[UV31]](s32)
- ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s1024) = G_MERGE_VALUES [[UV32]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32)
- ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s1024) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s1024) = G_FREEZE [[MV]]
- ; CHECK-NEXT: [[FREEZE1:%[0-9]+]]:_(s1024) = G_FREEZE [[MV1]]
- ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(s33792) = G_MERGE_VALUES [[FREEZE]](s1024), [[FREEZE1]](s1024), [[DEF2]](s1024), [[DEF2]](s1024), [[DEF2]](s1024), [[DEF2]](s1024), [[DEF2]](s1024), [[DEF2]](s1024), [[DEF2]](s1024), [[DEF2]](s1024), [[DEF2]](s1024), [[DEF2]](s1024), [[DEF2]](s1024), [[DEF2]](s1024), [[DEF2]](s1024), [[DEF2]](s1024), [[DEF2]](s1024), [[DEF2]](s1024), [[DEF2]](s1024), [[DEF2]](s1024), [[DEF2]](s1024), [[DEF2]](s1024), [[DEF2]](s1024), [[DEF2]](s1024), [[DEF2]](s1024), [[DEF2]](s1024), [[DEF2]](s1024), [[DEF2]](s1024), [[DEF2]](s1024), [[DEF2]](s1024), [[DEF2]](s1024), [[DEF2]](s1024), [[DEF2]](s1024)
- ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s1056) = G_TRUNC [[MV2]](s33792)
- ; CHECK-NEXT: S_NOP 0, implicit [[TRUNC]](s1056)
+ ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s1056) = G_ANYEXT [[COPY]](s512)
+ ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s1056) = G_FREEZE [[ANYEXT]]
+ ; CHECK-NEXT: S_NOP 0, implicit [[FREEZE]](s1056)
%0:_(s512) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
%1:_(s1056) = G_ANYEXT %0
%2:_(s1056) = G_FREEZE %1
@@ -417,19 +400,18 @@ body: |
; CHECK-LABEL: name: test_freeze_v33s32
; CHECK: [[DEF:%[0-9]+]]:_(<16 x s32>) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<16 x s32>)
; CHECK-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<16 x s32>)
- ; CHECK-NEXT: [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32), [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32), [[UV36:%[0-9]+]]:_(s32), [[UV37:%[0-9]+]]:_(s32), [[UV38:%[0-9]+]]:_(s32), [[UV39:%[0-9]+]]:_(s32), [[UV40:%[0-9]+]]:_(s32), [[UV41:%[0-9]+]]:_(s32), [[UV42:%[0-9]+]]:_(s32), [[UV43:%[0-9]+]]:_(s32), [[UV44:%[0-9]+]]:_(s32), [[UV45:%[0-9]+]]:_(s32), [[UV46:%[0-9]+]]:_(s32), [[UV47:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<16 x s32>)
- ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32), [[UV4]](s32), [[UV5]](s32), [[UV6]](s32), [[UV7]](s32), [[UV8]](s32), [[UV9]](s32), [[UV10]](s32), [[UV11]](s32), [[UV12]](s32), [[UV13]](s32), [[UV14]](s32), [[UV15]](s32)
; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[UV16]](s32), [[UV17]](s32), [[UV18]](s32), [[UV19]](s32), [[UV20]](s32), [[UV21]](s32), [[UV22]](s32), [[UV23]](s32), [[UV24]](s32), [[UV25]](s32), [[UV26]](s32), [[UV27]](s32), [[UV28]](s32), [[UV29]](s32), [[UV30]](s32), [[UV31]](s32)
- ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[UV32]](s32), [[DEF1]](s32), [[DEF1]](s32), [[DEF1]](s32), [[DEF1]](s32), [[DEF1]](s32), [[DEF1]](s32), [[DEF1]](s32), [[DEF1]](s32), [[DEF1]](s32), [[DEF1]](s32), [[DEF1]](s32), [[DEF1]](s32), [[DEF1]](s32), [[DEF1]](s32), [[DEF1]](s32)
; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(<16 x s32>) = G_FREEZE [[BUILD_VECTOR]]
; CHECK-NEXT: [[FREEZE1:%[0-9]+]]:_(<16 x s32>) = G_FREEZE [[BUILD_VECTOR1]]
- ; CHECK-NEXT: [[FREEZE2:%[0-9]+]]:_(<16 x s32>) = G_FREEZE [[BUILD_VECTOR2]]
- ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<528 x s32>) = G_CONCAT_VECTORS [[FREEZE]](<16 x s32>), [[FREEZE1]](<16 x s32>), [[FREEZE2]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>)
- ; CHECK-NEXT: [[UV48:%[0-9]+]]:_(<33 x s32>), [[UV49:%[0-9]+]]:_(<33 x s32>), [[UV50:%[0-9]+]]:_(<33 x s32>), [[UV51:%[0-9]+]]:_(<33 x s32>), [[UV52:%[0-9]+]]:_(<33 x s32>), [[UV53:%[0-9]+]]:_(<33 x s32>), [[UV54:%[0-9]+]]:_(<33 x s32>), [[UV55:%[0-9]+]]:_(<33 x s32>), [[UV56:%[0-9]+]]:_(<33 x s32>), [[UV57:%[0-9]+]]:_(<33 x s32>), [[UV58:%[0-9]+]]:_(<33 x s32>), [[UV59:%[0-9]+]]:_(<33 x s32>), [[UV60:%[0-9]+]]:_(<33 x s32>), [[UV61:%[0-9]+]]:_(<33 x s32>), [[UV62:%[0-9]+]]:_(<33 x s32>), [[UV63:%[0-9]+]]:_(<33 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<528 x s32>)
- ; CHECK-NEXT: S_NOP 0, implicit [[UV48]](<33 x s32>)
+ ; CHECK-NEXT: [[FREEZE2:%[0-9]+]]:_(s32) = G_FREEZE [[DEF1]]
+ ; CHECK-NEXT: [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32), [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32), [[UV36:%[0-9]+]]:_(s32), [[UV37:%[0-9]+]]:_(s32), [[UV38:%[0-9]+]]:_(s32), [[UV39:%[0-9]+]]:_(s32), [[UV40:%[0-9]+]]:_(s32), [[UV41:%[0-9]+]]:_(s32), [[UV42:%[0-9]+]]:_(s32), [[UV43:%[0-9]+]]:_(s32), [[UV44:%[0-9]+]]:_(s32), [[UV45:%[0-9]+]]:_(s32), [[UV46:%[0-9]+]]:_(s32), [[UV47:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FREEZE]](<16 x s32>)
+ ; CHECK-NEXT: [[UV48:%[0-9]+]]:_(s32), [[UV49:%[0-9]+]]:_(s32), [[UV50:%[0-9]+]]:_(s32), [[UV51:%[0-9]+]]:_(s32), [[UV52:%[0-9]+]]:_(s32), [[UV53:%[0-9]+]]:_(s32), [[UV54:%[0-9]+]]:_(s32), [[UV55:%[0-9]+]]:_(s32), [[UV56:%[0-9]+]]:_(s32), [[UV57:%[0-9]+]]:_(s32), [[UV58:%[0-9]+]]:_(s32), [[UV59:%[0-9]+]]:_(s32), [[UV60:%[0-9]+]]:_(s32), [[UV61:%[0-9]+]]:_(s32), [[UV62:%[0-9]+]]:_(s32), [[UV63:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FREEZE1]](<16 x s32>)
+ ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<33 x s32>) = G_BUILD_VECTOR [[UV32]](s32), [[UV33]](s32), [[UV34]](s32), [[UV35]](s32), [[UV36]](s32), [[UV37]](s32), [[UV38]](s32), [[UV39]](s32), [[UV40]](s32), [[UV41]](s32), [[UV42]](s32), [[UV43]](s32), [[UV44]](s32), [[UV45]](s32), [[UV46]](s32), [[UV47]](s32), [[UV48]](s32), [[UV49]](s32), [[UV50]](s32), [[UV51]](s32), [[UV52]](s32), [[UV53]](s32), [[UV54]](s32), [[UV55]](s32), [[UV56]](s32), [[UV57]](s32), [[UV58]](s32), [[UV59]](s32), [[UV60]](s32), [[UV61]](s32), [[UV62]](s32), [[UV63]](s32), [[FREEZE2]](s32)
+ ; CHECK-NEXT: S_NOP 0, implicit [[BUILD_VECTOR2]](<33 x s32>)
%0:_(<33 x s32>) = G_IMPLICIT_DEF
%1:_(<33 x s32>) = G_FREEZE %0
S_NOP 0, implicit %1
@@ -530,16 +512,13 @@ body: |
; CHECK-LABEL: name: test_freeze_v3s8
; CHECK: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2
- ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<3 x s8>) = G_TRUNC [[COPY]](<3 x s32>)
- ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s8>) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[DEF]], [[TRUNC]](<3 x s8>), 0
- ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(<4 x s32>) = G_ANYEXT [[INSERT]](<4 x s8>)
- ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(<4 x s32>) = G_FREEZE [[ANYEXT]]
- ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[FREEZE]](<4 x s32>)
- ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s8>) = G_CONCAT_VECTORS [[TRUNC1]](<4 x s8>), [[DEF]](<4 x s8>), [[DEF]](<4 x s8>)
- ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<3 x s8>), [[UV1:%[0-9]+]]:_(<3 x s8>), [[UV2:%[0-9]+]]:_(<3 x s8>), [[UV3:%[0-9]+]]:_(<3 x s8>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s8>)
- ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(<3 x s32>) = G_ANYEXT [[UV]](<3 x s8>)
- ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[ANYEXT1]](<3 x s32>)
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>)
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[DEF]](s32)
+ ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(<4 x s32>) = G_FREEZE [[BUILD_VECTOR]]
+ ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FREEZE]](<4 x s32>)
+ ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV3]](s32), [[UV4]](s32), [[UV5]](s32)
+ ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR1]](<3 x s32>)
%0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2
%1:_(<3 x s8>) = G_TRUNC %0
%2:_(<3 x s8>) = G_FREEZE %1
@@ -569,24 +548,26 @@ body: |
; CHECK-LABEL: name: test_freeze_v3s16
; CHECK: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>)
- ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32)
- ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV2]](s32), [[DEF]](s32)
- ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[BUILD_VECTOR]](<2 x s32>)
- ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[BUILD_VECTOR1]](<2 x s32>)
- ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[TRUNC]](<2 x s16>), [[TRUNC1]](<2 x s16>), [[DEF1]](<2 x s16>)
- ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(<3 x s16>), [[UV4:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>)
- ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[UV3]](<3 x s16>), 0
- ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(<4 x s16>) = G_FREEZE [[INSERT]]
- ; CHECK-NEXT: [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[FREEZE]](<4 x s16>)
- ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>)
- ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
- ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>)
- ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[BITCAST]](s32), [[LSHR]](s32), [[BITCAST1]](s32)
- ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR2]](<3 x s32>)
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]]
+ ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C]]
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
+ ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+ ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C]]
+ ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C1]](s32)
+ ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+ ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+ ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>)
+ ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(<4 x s16>) = G_FREEZE [[CONCAT_VECTORS]]
+ ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[FREEZE]](<4 x s16>)
+ ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
+ ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C1]](s32)
+ ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>)
+ ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[BITCAST2]](s32), [[LSHR]](s32), [[BITCAST3]](s32)
+ ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>)
%0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2
%1:_(<3 x s16>) = G_TRUNC %0
%2:_(<3 x s16>) = G_FREEZE %1
@@ -616,28 +597,33 @@ body: |
; CHECK-LABEL: name: test_freeze_v5s16
; CHECK: [[COPY:%[0-9]+]]:_(<5 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<5 x s32>)
- ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32)
- ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV2]](s32), [[UV3]](s32)
- ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV4]](s32), [[DEF]](s32)
- ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[BUILD_VECTOR]](<2 x s32>)
- ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[BUILD_VECTOR1]](<2 x s32>)
- ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[BUILD_VECTOR2]](<2 x s32>)
- ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<10 x s16>) = G_CONCAT_VECTORS [[TRUNC]](<2 x s16>), [[TRUNC1]](<2 x s16>), [[TRUNC2]](<2 x s16>), [[DEF1]](<2 x s16>), [[DEF1]](<2 x s16>)
- ; CHECK-NEXT: [[UV5:%[0-9]+]]:_(<5 x s16>), [[UV6:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<10 x s16>)
- ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[DEF2]], [[UV5]](<5 x s16>), 0
- ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(<6 x s16>) = G_FREEZE [[INSERT]]
- ; CHECK-NEXT: [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[FREEZE]](<6 x s16>)
- ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>)
- ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
- ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV8]](<2 x s16>)
- ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
- ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV9]](<2 x s16>)
- ; CHECK-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[BITCAST]](s32), [[LSHR]](s32), [[BITCAST1]](s32), [[LSHR1]](s32), [[BITCAST2]](s32)
- ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = COPY [[BUILD_VECTOR3]](<5 x s32>)
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]]
+ ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C]]
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
+ ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+ ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C]]
+ ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[UV3]], [[C]]
+ ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C1]](s32)
+ ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+ ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+ ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[UV4]], [[C]]
+ ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C1]](s32)
+ ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+ ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+ ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>)
+ ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(<6 x s16>) = G_FREEZE [[CONCAT_VECTORS]]
+ ; CHECK-NEXT: [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[FREEZE]](<6 x s16>)
+ ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>)
+ ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32)
+ ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>)
+ ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C1]](s32)
+ ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>)
+ ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[BITCAST3]](s32), [[LSHR]](s32), [[BITCAST4]](s32), [[LSHR1]](s32), [[BITCAST5]](s32)
+ ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = COPY [[BUILD_VECTOR]](<5 x s32>)
%0:_(<5 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4
%1:_(<5 x s16>) = G_TRUNC %0
%2:_(<5 x s16>) = G_FREEZE %1
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fshl.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fshl.mir
index d3d2e0294f7c1..ed51693e83a35 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fshl.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fshl.mir
@@ -798,19 +798,19 @@ body: |
; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>)
+ ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY2]](<2 x s16>)
+ ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
+ ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY3]](<2 x s16>)
+ ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[COPY4]](<2 x s16>)
+ ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32)
+ ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[COPY5]](<2 x s16>)
; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST]](s32), [[LSHR]](s32)
; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[DEF]](s32)
; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST1]](s32), [[COPY6]](s32)
- ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY2]](<2 x s16>)
- ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
- ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY3]](<2 x s16>)
; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST2]](s32), [[LSHR1]](s32)
; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[DEF]](s32)
; GFX9-NEXT: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST3]](s32), [[COPY7]](s32)
- ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[COPY4]](<2 x s16>)
- ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32)
- ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[COPY5]](<2 x s16>)
; GFX9-NEXT: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST4]](s32), [[LSHR2]](s32)
; GFX9-NEXT: [[BUILD_VECTOR_TRUNC5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST5]](s32), [[DEF]](s32)
; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 15
@@ -845,17 +845,20 @@ body: |
; GFX9-NEXT: [[LSHR5:%[0-9]+]]:_(<2 x s16>) = G_LSHR [[BUILD_VECTOR_TRUNC3]], [[BUILD_VECTOR_TRUNC11]](<2 x s16>)
; GFX9-NEXT: [[LSHR6:%[0-9]+]]:_(<2 x s16>) = G_LSHR [[LSHR5]], [[AND3]](<2 x s16>)
; GFX9-NEXT: [[OR1:%[0-9]+]]:_(<2 x s16>) = G_OR [[SHL1]], [[LSHR6]]
+ ; GFX9-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[OR]](<2 x s16>)
+ ; GFX9-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32)
+ ; GFX9-NEXT: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[OR1]](<2 x s16>)
; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; GFX9-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[OR1]](<2 x s16>)
; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>)
- ; GFX9-NEXT: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
- ; GFX9-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C]](s32)
- ; GFX9-NEXT: [[BITCAST8:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC12:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST6]](s32), [[BITCAST7]](s32)
- ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC13:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR7]](s32), [[BITCAST8]](s32)
- ; GFX9-NEXT: $vgpr0 = COPY [[OR]](<2 x s16>)
- ; GFX9-NEXT: $vgpr1 = COPY [[BUILD_VECTOR_TRUNC12]](<2 x s16>)
- ; GFX9-NEXT: $vgpr2 = COPY [[BUILD_VECTOR_TRUNC13]](<2 x s16>)
+ ; GFX9-NEXT: [[BITCAST8:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+ ; GFX9-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST8]], [[C]](s32)
+ ; GFX9-NEXT: [[BITCAST9:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+ ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC12:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST6]](s32), [[LSHR7]](s32)
+ ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC13:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST7]](s32), [[BITCAST8]](s32)
+ ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC14:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR8]](s32), [[BITCAST9]](s32)
+ ; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC12]](<2 x s16>)
+ ; GFX9-NEXT: $vgpr1 = COPY [[BUILD_VECTOR_TRUNC13]](<2 x s16>)
+ ; GFX9-NEXT: $vgpr2 = COPY [[BUILD_VECTOR_TRUNC14]](<2 x s16>)
%0:_(<2 x s16>) = COPY $vgpr0
%1:_(<2 x s16>) = COPY $vgpr1
%2:_(<2 x s16>) = COPY $vgpr2
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fshr.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fshr.mir
index e791b23c839f0..c2d2557cae8fc 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fshr.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fshr.mir
@@ -674,8 +674,7 @@ body: |
; SI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]]
; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; SI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
- ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY6]], [[C]](s32)
+ ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32)
; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
; SI-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
; SI-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 1
@@ -701,22 +700,22 @@ body: |
; SI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[AND7]](s16)
; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[ZEXT2]](s32)
; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32)
- ; SI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C6]](s32)
+ ; SI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C6]](s32)
; SI-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]]
- ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND9]], [[COPY7]](s32)
+ ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND9]], [[COPY6]](s32)
; SI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[AND8]](s16)
; SI-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C1]]
; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[AND10]], [[ZEXT3]](s32)
; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR6]](s32)
; SI-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[TRUNC2]], [[TRUNC3]]
+ ; SI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C6]](s32)
+ ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[BITCAST2]], [[COPY7]](s32)
; SI-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C6]](s32)
- ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[BITCAST2]], [[COPY8]](s32)
- ; SI-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C6]](s32)
- ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[COPY9]](s32)
+ ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[COPY8]](s32)
+ ; SI-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
; SI-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
- ; SI-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
- ; SI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[COPY11]], [[C]](s32)
- ; SI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[COPY10]], [[SHL6]]
+ ; SI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[COPY10]], [[C]](s32)
+ ; SI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[COPY9]], [[SHL6]]
; SI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32)
; SI-NEXT: [[XOR2:%[0-9]+]]:_(<2 x s16>) = G_XOR [[BITCAST6]], [[BITCAST8]]
; SI-NEXT: [[BITCAST9:%[0-9]+]]:_(s32) = G_BITCAST [[XOR2]](<2 x s16>)
@@ -730,9 +729,9 @@ body: |
; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR2]](s16)
; SI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[ANYEXT]], [[ZEXT4]](s32)
; SI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[SHL7]](s32)
- ; SI-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[C6]](s32)
+ ; SI-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY [[C6]](s32)
; SI-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[SHL4]], [[C1]]
- ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[AND13]], [[COPY12]](s32)
+ ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[AND13]], [[COPY11]](s32)
; SI-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[AND12]](s16)
; SI-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LSHR8]], [[C1]]
; SI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[AND14]], [[ZEXT5]](s32)
@@ -745,116 +744,76 @@ body: |
; SI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[OR3]](s16)
; SI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[ANYEXT1]], [[ZEXT6]](s32)
; SI-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[SHL8]](s32)
- ; SI-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[C6]](s32)
+ ; SI-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[C6]](s32)
; SI-NEXT: [[AND17:%[0-9]+]]:_(s32) = G_AND [[SHL5]], [[C1]]
- ; SI-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[AND17]], [[COPY13]](s32)
+ ; SI-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[AND17]], [[COPY12]](s32)
; SI-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[AND16]](s16)
; SI-NEXT: [[AND18:%[0-9]+]]:_(s32) = G_AND [[LSHR10]], [[C1]]
; SI-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[AND18]], [[ZEXT7]](s32)
; SI-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR11]](s32)
; SI-NEXT: [[OR6:%[0-9]+]]:_(s16) = G_OR [[TRUNC8]], [[TRUNC9]]
- ; SI-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[OR5]](s16)
- ; SI-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16)
- ; SI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXT9]], [[C]](s32)
- ; SI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[ZEXT8]], [[SHL9]]
- ; SI-NEXT: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR7]](s32)
; SI-NEXT: [[AND19:%[0-9]+]]:_(s16) = G_AND [[C3]], [[C4]]
; SI-NEXT: [[XOR5:%[0-9]+]]:_(s16) = G_XOR [[C3]], [[C5]]
; SI-NEXT: [[AND20:%[0-9]+]]:_(s16) = G_AND [[XOR5]], [[C4]]
- ; SI-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[AND19]](s16)
- ; SI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[BITCAST1]], [[ZEXT10]](s32)
- ; SI-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[SHL10]](s32)
- ; SI-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[C6]](s32)
+ ; SI-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[AND19]](s16)
+ ; SI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[BITCAST1]], [[ZEXT8]](s32)
+ ; SI-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[SHL9]](s32)
+ ; SI-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[C6]](s32)
; SI-NEXT: [[AND21:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]]
- ; SI-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[AND21]], [[COPY14]](s32)
- ; SI-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[AND20]](s16)
+ ; SI-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[AND21]], [[COPY13]](s32)
+ ; SI-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[AND20]](s16)
; SI-NEXT: [[AND22:%[0-9]+]]:_(s32) = G_AND [[LSHR12]], [[C1]]
- ; SI-NEXT: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[AND22]], [[ZEXT11]](s32)
+ ; SI-NEXT: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[AND22]], [[ZEXT9]](s32)
; SI-NEXT: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR13]](s32)
- ; SI-NEXT: [[OR8:%[0-9]+]]:_(s16) = G_OR [[TRUNC10]], [[TRUNC11]]
- ; SI-NEXT: [[AND23:%[0-9]+]]:_(s16) = G_AND [[C3]], [[C4]]
- ; SI-NEXT: [[XOR6:%[0-9]+]]:_(s16) = G_XOR [[C3]], [[C5]]
- ; SI-NEXT: [[AND24:%[0-9]+]]:_(s16) = G_AND [[XOR6]], [[C4]]
- ; SI-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[AND23]](s16)
- ; SI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
- ; SI-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[DEF]], [[ZEXT12]](s32)
- ; SI-NEXT: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[SHL11]](s32)
- ; SI-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[C6]](s32)
- ; SI-NEXT: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[C2]], [[COPY15]](s32)
- ; SI-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[AND24]](s16)
- ; SI-NEXT: [[AND25:%[0-9]+]]:_(s32) = G_AND [[LSHR14]], [[C1]]
- ; SI-NEXT: [[LSHR15:%[0-9]+]]:_(s32) = G_LSHR [[AND25]], [[ZEXT13]](s32)
- ; SI-NEXT: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR15]](s32)
- ; SI-NEXT: [[OR9:%[0-9]+]]:_(s16) = G_OR [[TRUNC12]], [[TRUNC13]]
- ; SI-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[C6]](s32)
- ; SI-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[BITCAST3]], [[COPY16]](s32)
+ ; SI-NEXT: [[OR7:%[0-9]+]]:_(s16) = G_OR [[TRUNC10]], [[TRUNC11]]
+ ; SI-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[C6]](s32)
+ ; SI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[BITCAST3]], [[COPY14]](s32)
+ ; SI-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
+ ; SI-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
+ ; SI-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C]](s32)
+ ; SI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL11]]
+ ; SI-NEXT: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR8]](s32)
+ ; SI-NEXT: [[XOR6:%[0-9]+]]:_(<2 x s16>) = G_XOR [[BITCAST7]], [[BITCAST10]]
+ ; SI-NEXT: [[BITCAST11:%[0-9]+]]:_(s32) = G_BITCAST [[XOR6]](<2 x s16>)
+ ; SI-NEXT: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST11]](s32)
+ ; SI-NEXT: [[AND23:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C4]]
+ ; SI-NEXT: [[XOR7:%[0-9]+]]:_(s16) = G_XOR [[TRUNC12]], [[C5]]
+ ; SI-NEXT: [[AND24:%[0-9]+]]:_(s16) = G_AND [[XOR7]], [[C4]]
+ ; SI-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[AND23]](s16)
+ ; SI-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[OR7]](s16)
+ ; SI-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[ANYEXT2]], [[ZEXT10]](s32)
+ ; SI-NEXT: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[SHL12]](s32)
; SI-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[C6]](s32)
- ; SI-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[DEF]](s32)
- ; SI-NEXT: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[COPY17]](s32)
- ; SI-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
- ; SI-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
- ; SI-NEXT: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[COPY20]], [[C]](s32)
- ; SI-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[COPY19]], [[SHL14]]
- ; SI-NEXT: [[BITCAST11:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR10]](s32)
- ; SI-NEXT: [[XOR7:%[0-9]+]]:_(<2 x s16>) = G_XOR [[BITCAST7]], [[BITCAST11]]
- ; SI-NEXT: [[BITCAST12:%[0-9]+]]:_(s32) = G_BITCAST [[XOR7]](<2 x s16>)
- ; SI-NEXT: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST12]](s32)
+ ; SI-NEXT: [[AND25:%[0-9]+]]:_(s32) = G_AND [[SHL10]], [[C1]]
+ ; SI-NEXT: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[AND25]], [[COPY17]](s32)
+ ; SI-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[AND24]](s16)
+ ; SI-NEXT: [[AND26:%[0-9]+]]:_(s32) = G_AND [[LSHR14]], [[C1]]
+ ; SI-NEXT: [[LSHR15:%[0-9]+]]:_(s32) = G_LSHR [[AND26]], [[ZEXT11]](s32)
+ ; SI-NEXT: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR15]](s32)
+ ; SI-NEXT: [[OR9:%[0-9]+]]:_(s16) = G_OR [[TRUNC13]], [[TRUNC14]]
+ ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+ ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
+ ; SI-NEXT: [[BITCAST12:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
; SI-NEXT: [[LSHR16:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST12]], [[C]](s32)
- ; SI-NEXT: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR16]](s32)
- ; SI-NEXT: [[AND26:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C4]]
- ; SI-NEXT: [[XOR8:%[0-9]+]]:_(s16) = G_XOR [[TRUNC14]], [[C5]]
- ; SI-NEXT: [[AND27:%[0-9]+]]:_(s16) = G_AND [[XOR8]], [[C4]]
- ; SI-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[AND26]](s16)
- ; SI-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[OR8]](s16)
- ; SI-NEXT: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[ANYEXT2]], [[ZEXT14]](s32)
- ; SI-NEXT: [[TRUNC16:%[0-9]+]]:_(s16) = G_TRUNC [[SHL15]](s32)
- ; SI-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY [[C6]](s32)
- ; SI-NEXT: [[AND28:%[0-9]+]]:_(s32) = G_AND [[SHL12]], [[C1]]
- ; SI-NEXT: [[LSHR17:%[0-9]+]]:_(s32) = G_LSHR [[AND28]], [[COPY21]](s32)
- ; SI-NEXT: [[ZEXT15:%[0-9]+]]:_(s32) = G_ZEXT [[AND27]](s16)
- ; SI-NEXT: [[AND29:%[0-9]+]]:_(s32) = G_AND [[LSHR17]], [[C1]]
- ; SI-NEXT: [[LSHR18:%[0-9]+]]:_(s32) = G_LSHR [[AND29]], [[ZEXT15]](s32)
- ; SI-NEXT: [[TRUNC17:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR18]](s32)
- ; SI-NEXT: [[OR11:%[0-9]+]]:_(s16) = G_OR [[TRUNC16]], [[TRUNC17]]
- ; SI-NEXT: [[AND30:%[0-9]+]]:_(s16) = G_AND [[TRUNC15]], [[C4]]
- ; SI-NEXT: [[XOR9:%[0-9]+]]:_(s16) = G_XOR [[TRUNC15]], [[C5]]
- ; SI-NEXT: [[AND31:%[0-9]+]]:_(s16) = G_AND [[XOR9]], [[C4]]
- ; SI-NEXT: [[ZEXT16:%[0-9]+]]:_(s32) = G_ZEXT [[AND30]](s16)
- ; SI-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[OR9]](s16)
- ; SI-NEXT: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[ANYEXT3]], [[ZEXT16]](s32)
- ; SI-NEXT: [[TRUNC18:%[0-9]+]]:_(s16) = G_TRUNC [[SHL16]](s32)
- ; SI-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY [[C6]](s32)
- ; SI-NEXT: [[AND32:%[0-9]+]]:_(s32) = G_AND [[SHL13]], [[C1]]
- ; SI-NEXT: [[LSHR19:%[0-9]+]]:_(s32) = G_LSHR [[AND32]], [[COPY22]](s32)
- ; SI-NEXT: [[ZEXT17:%[0-9]+]]:_(s32) = G_ZEXT [[AND31]](s16)
- ; SI-NEXT: [[AND33:%[0-9]+]]:_(s32) = G_AND [[LSHR19]], [[C1]]
- ; SI-NEXT: [[LSHR20:%[0-9]+]]:_(s32) = G_LSHR [[AND33]], [[ZEXT17]](s32)
- ; SI-NEXT: [[TRUNC19:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR20]](s32)
- ; SI-NEXT: [[OR12:%[0-9]+]]:_(s16) = G_OR [[TRUNC18]], [[TRUNC19]]
- ; SI-NEXT: [[ZEXT18:%[0-9]+]]:_(s32) = G_ZEXT [[OR11]](s16)
- ; SI-NEXT: [[ZEXT19:%[0-9]+]]:_(s32) = G_ZEXT [[OR12]](s16)
- ; SI-NEXT: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[ZEXT19]], [[C]](s32)
- ; SI-NEXT: [[OR13:%[0-9]+]]:_(s32) = G_OR [[ZEXT18]], [[SHL17]]
- ; SI-NEXT: [[BITCAST13:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR13]](s32)
- ; SI-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; SI-NEXT: [[BITCAST14:%[0-9]+]]:_(s32) = G_BITCAST [[BITCAST13]](<2 x s16>)
- ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>)
- ; SI-NEXT: [[BITCAST15:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
- ; SI-NEXT: [[LSHR21:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST15]], [[C]](s32)
- ; SI-NEXT: [[BITCAST16:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; SI-NEXT: [[AND34:%[0-9]+]]:_(s32) = G_AND [[BITCAST14]], [[C1]]
- ; SI-NEXT: [[AND35:%[0-9]+]]:_(s32) = G_AND [[BITCAST15]], [[C1]]
- ; SI-NEXT: [[SHL18:%[0-9]+]]:_(s32) = G_SHL [[AND35]], [[C]](s32)
- ; SI-NEXT: [[OR14:%[0-9]+]]:_(s32) = G_OR [[AND34]], [[SHL18]]
- ; SI-NEXT: [[BITCAST17:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR14]](s32)
- ; SI-NEXT: [[AND36:%[0-9]+]]:_(s32) = G_AND [[LSHR21]], [[C1]]
- ; SI-NEXT: [[AND37:%[0-9]+]]:_(s32) = G_AND [[BITCAST16]], [[C1]]
- ; SI-NEXT: [[SHL19:%[0-9]+]]:_(s32) = G_SHL [[AND37]], [[C]](s32)
- ; SI-NEXT: [[OR15:%[0-9]+]]:_(s32) = G_OR [[AND36]], [[SHL19]]
- ; SI-NEXT: [[BITCAST18:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR15]](s32)
- ; SI-NEXT: $vgpr0 = COPY [[BITCAST10]](<2 x s16>)
- ; SI-NEXT: $vgpr1 = COPY [[BITCAST17]](<2 x s16>)
- ; SI-NEXT: $vgpr2 = COPY [[BITCAST18]](<2 x s16>)
+ ; SI-NEXT: [[BITCAST13:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+ ; SI-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[OR5]](s16)
+ ; SI-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16)
+ ; SI-NEXT: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[ZEXT13]], [[C]](s32)
+ ; SI-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT12]], [[SHL13]]
+ ; SI-NEXT: [[BITCAST14:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR10]](s32)
+ ; SI-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16)
+ ; SI-NEXT: [[AND27:%[0-9]+]]:_(s32) = G_AND [[BITCAST12]], [[C1]]
+ ; SI-NEXT: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[AND27]], [[C]](s32)
+ ; SI-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT14]], [[SHL14]]
+ ; SI-NEXT: [[BITCAST15:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR11]](s32)
+ ; SI-NEXT: [[AND28:%[0-9]+]]:_(s32) = G_AND [[LSHR16]], [[C1]]
+ ; SI-NEXT: [[AND29:%[0-9]+]]:_(s32) = G_AND [[BITCAST13]], [[C1]]
+ ; SI-NEXT: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[AND29]], [[C]](s32)
+ ; SI-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[AND28]], [[SHL15]]
+ ; SI-NEXT: [[BITCAST16:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR12]](s32)
+ ; SI-NEXT: $vgpr0 = COPY [[BITCAST14]](<2 x s16>)
+ ; SI-NEXT: $vgpr1 = COPY [[BITCAST15]](<2 x s16>)
+ ; SI-NEXT: $vgpr2 = COPY [[BITCAST16]](<2 x s16>)
; VI-LABEL: name: test_fshr_v3s16_v3s16
; VI: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
@@ -869,7 +828,6 @@ body: |
; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>)
; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32)
- ; VI-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY2]](<2 x s16>)
; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32)
; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
@@ -932,75 +890,52 @@ body: |
; VI-NEXT: [[LSHR10:%[0-9]+]]:_(s16) = G_LSHR [[SHL5]], [[C3]](s16)
; VI-NEXT: [[LSHR11:%[0-9]+]]:_(s16) = G_LSHR [[LSHR10]], [[AND10]](s16)
; VI-NEXT: [[OR6:%[0-9]+]]:_(s16) = G_OR [[SHL8]], [[LSHR11]]
- ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR5]](s16)
- ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16)
- ; VI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32)
- ; VI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL9]]
- ; VI-NEXT: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR7]](s32)
; VI-NEXT: [[AND11:%[0-9]+]]:_(s16) = G_AND [[C3]], [[C4]]
; VI-NEXT: [[XOR5:%[0-9]+]]:_(s16) = G_XOR [[C3]], [[C5]]
; VI-NEXT: [[AND12:%[0-9]+]]:_(s16) = G_AND [[XOR5]], [[C4]]
- ; VI-NEXT: [[SHL10:%[0-9]+]]:_(s16) = G_SHL [[TRUNC2]], [[AND11]](s16)
+ ; VI-NEXT: [[SHL9:%[0-9]+]]:_(s16) = G_SHL [[TRUNC2]], [[AND11]](s16)
; VI-NEXT: [[LSHR12:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC5]], [[C3]](s16)
; VI-NEXT: [[LSHR13:%[0-9]+]]:_(s16) = G_LSHR [[LSHR12]], [[AND12]](s16)
- ; VI-NEXT: [[OR8:%[0-9]+]]:_(s16) = G_OR [[SHL10]], [[LSHR13]]
- ; VI-NEXT: [[AND13:%[0-9]+]]:_(s16) = G_AND [[C3]], [[C4]]
- ; VI-NEXT: [[XOR6:%[0-9]+]]:_(s16) = G_XOR [[C3]], [[C5]]
- ; VI-NEXT: [[AND14:%[0-9]+]]:_(s16) = G_AND [[XOR6]], [[C4]]
- ; VI-NEXT: [[SHL11:%[0-9]+]]:_(s16) = G_SHL [[DEF]], [[AND13]](s16)
- ; VI-NEXT: [[LSHR14:%[0-9]+]]:_(s16) = G_LSHR [[DEF]], [[C3]](s16)
- ; VI-NEXT: [[LSHR15:%[0-9]+]]:_(s16) = G_LSHR [[LSHR14]], [[AND14]](s16)
- ; VI-NEXT: [[OR9:%[0-9]+]]:_(s16) = G_OR [[SHL11]], [[LSHR15]]
- ; VI-NEXT: [[SHL12:%[0-9]+]]:_(s16) = G_SHL [[TRUNC5]], [[C3]](s16)
- ; VI-NEXT: [[SHL13:%[0-9]+]]:_(s16) = G_SHL [[DEF]], [[C3]](s16)
+ ; VI-NEXT: [[OR7:%[0-9]+]]:_(s16) = G_OR [[SHL9]], [[LSHR13]]
+ ; VI-NEXT: [[SHL10:%[0-9]+]]:_(s16) = G_SHL [[TRUNC5]], [[C3]](s16)
; VI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
; VI-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
- ; VI-NEXT: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[COPY8]], [[C]](s32)
- ; VI-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[COPY7]], [[SHL14]]
- ; VI-NEXT: [[BITCAST11:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR10]](s32)
- ; VI-NEXT: [[XOR7:%[0-9]+]]:_(<2 x s16>) = G_XOR [[BITCAST7]], [[BITCAST11]]
- ; VI-NEXT: [[BITCAST12:%[0-9]+]]:_(s32) = G_BITCAST [[XOR7]](<2 x s16>)
- ; VI-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST12]](s32)
+ ; VI-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[COPY8]], [[C]](s32)
+ ; VI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[COPY7]], [[SHL11]]
+ ; VI-NEXT: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR8]](s32)
+ ; VI-NEXT: [[XOR6:%[0-9]+]]:_(<2 x s16>) = G_XOR [[BITCAST7]], [[BITCAST10]]
+ ; VI-NEXT: [[BITCAST11:%[0-9]+]]:_(s32) = G_BITCAST [[XOR6]](<2 x s16>)
+ ; VI-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST11]](s32)
+ ; VI-NEXT: [[AND13:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C4]]
+ ; VI-NEXT: [[XOR7:%[0-9]+]]:_(s16) = G_XOR [[TRUNC8]], [[C5]]
+ ; VI-NEXT: [[AND14:%[0-9]+]]:_(s16) = G_AND [[XOR7]], [[C4]]
+ ; VI-NEXT: [[SHL12:%[0-9]+]]:_(s16) = G_SHL [[OR7]], [[AND13]](s16)
+ ; VI-NEXT: [[LSHR14:%[0-9]+]]:_(s16) = G_LSHR [[SHL10]], [[C3]](s16)
+ ; VI-NEXT: [[LSHR15:%[0-9]+]]:_(s16) = G_LSHR [[LSHR14]], [[AND14]](s16)
+ ; VI-NEXT: [[OR9:%[0-9]+]]:_(s16) = G_OR [[SHL12]], [[LSHR15]]
+ ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+ ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
+ ; VI-NEXT: [[BITCAST12:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
; VI-NEXT: [[LSHR16:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST12]], [[C]](s32)
- ; VI-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR16]](s32)
- ; VI-NEXT: [[AND15:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C4]]
- ; VI-NEXT: [[XOR8:%[0-9]+]]:_(s16) = G_XOR [[TRUNC8]], [[C5]]
- ; VI-NEXT: [[AND16:%[0-9]+]]:_(s16) = G_AND [[XOR8]], [[C4]]
- ; VI-NEXT: [[SHL15:%[0-9]+]]:_(s16) = G_SHL [[OR8]], [[AND15]](s16)
- ; VI-NEXT: [[LSHR17:%[0-9]+]]:_(s16) = G_LSHR [[SHL12]], [[C3]](s16)
- ; VI-NEXT: [[LSHR18:%[0-9]+]]:_(s16) = G_LSHR [[LSHR17]], [[AND16]](s16)
- ; VI-NEXT: [[OR11:%[0-9]+]]:_(s16) = G_OR [[SHL15]], [[LSHR18]]
- ; VI-NEXT: [[AND17:%[0-9]+]]:_(s16) = G_AND [[TRUNC9]], [[C4]]
- ; VI-NEXT: [[XOR9:%[0-9]+]]:_(s16) = G_XOR [[TRUNC9]], [[C5]]
- ; VI-NEXT: [[AND18:%[0-9]+]]:_(s16) = G_AND [[XOR9]], [[C4]]
- ; VI-NEXT: [[SHL16:%[0-9]+]]:_(s16) = G_SHL [[OR9]], [[AND17]](s16)
- ; VI-NEXT: [[LSHR19:%[0-9]+]]:_(s16) = G_LSHR [[SHL13]], [[C3]](s16)
- ; VI-NEXT: [[LSHR20:%[0-9]+]]:_(s16) = G_LSHR [[LSHR19]], [[AND18]](s16)
- ; VI-NEXT: [[OR12:%[0-9]+]]:_(s16) = G_OR [[SHL16]], [[LSHR20]]
- ; VI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR11]](s16)
- ; VI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR12]](s16)
- ; VI-NEXT: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32)
- ; VI-NEXT: [[OR13:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL17]]
- ; VI-NEXT: [[BITCAST13:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR13]](s32)
- ; VI-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; VI-NEXT: [[BITCAST14:%[0-9]+]]:_(s32) = G_BITCAST [[BITCAST13]](<2 x s16>)
- ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>)
- ; VI-NEXT: [[BITCAST15:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
- ; VI-NEXT: [[LSHR21:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST15]], [[C]](s32)
- ; VI-NEXT: [[BITCAST16:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; VI-NEXT: [[AND19:%[0-9]+]]:_(s32) = G_AND [[BITCAST14]], [[C1]]
- ; VI-NEXT: [[AND20:%[0-9]+]]:_(s32) = G_AND [[BITCAST15]], [[C1]]
- ; VI-NEXT: [[SHL18:%[0-9]+]]:_(s32) = G_SHL [[AND20]], [[C]](s32)
- ; VI-NEXT: [[OR14:%[0-9]+]]:_(s32) = G_OR [[AND19]], [[SHL18]]
- ; VI-NEXT: [[BITCAST17:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR14]](s32)
- ; VI-NEXT: [[AND21:%[0-9]+]]:_(s32) = G_AND [[LSHR21]], [[C1]]
- ; VI-NEXT: [[AND22:%[0-9]+]]:_(s32) = G_AND [[BITCAST16]], [[C1]]
- ; VI-NEXT: [[SHL19:%[0-9]+]]:_(s32) = G_SHL [[AND22]], [[C]](s32)
- ; VI-NEXT: [[OR15:%[0-9]+]]:_(s32) = G_OR [[AND21]], [[SHL19]]
- ; VI-NEXT: [[BITCAST18:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR15]](s32)
- ; VI-NEXT: $vgpr0 = COPY [[BITCAST10]](<2 x s16>)
- ; VI-NEXT: $vgpr1 = COPY [[BITCAST17]](<2 x s16>)
- ; VI-NEXT: $vgpr2 = COPY [[BITCAST18]](<2 x s16>)
+ ; VI-NEXT: [[BITCAST13:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+ ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR5]](s16)
+ ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16)
+ ; VI-NEXT: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32)
+ ; VI-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL13]]
+ ; VI-NEXT: [[BITCAST14:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR10]](s32)
+ ; VI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16)
+ ; VI-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[BITCAST12]], [[C1]]
+ ; VI-NEXT: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C]](s32)
+ ; VI-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL14]]
+ ; VI-NEXT: [[BITCAST15:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR11]](s32)
+ ; VI-NEXT: [[AND16:%[0-9]+]]:_(s32) = G_AND [[LSHR16]], [[C1]]
+ ; VI-NEXT: [[AND17:%[0-9]+]]:_(s32) = G_AND [[BITCAST13]], [[C1]]
+ ; VI-NEXT: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[AND17]], [[C]](s32)
+ ; VI-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[AND16]], [[SHL15]]
+ ; VI-NEXT: [[BITCAST16:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR12]](s32)
+ ; VI-NEXT: $vgpr0 = COPY [[BITCAST14]](<2 x s16>)
+ ; VI-NEXT: $vgpr1 = COPY [[BITCAST15]](<2 x s16>)
+ ; VI-NEXT: $vgpr2 = COPY [[BITCAST16]](<2 x s16>)
; GFX9-LABEL: name: test_fshr_v3s16_v3s16
; GFX9: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
@@ -1012,19 +947,19 @@ body: |
; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>)
+ ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY2]](<2 x s16>)
+ ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
+ ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY3]](<2 x s16>)
+ ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[COPY4]](<2 x s16>)
+ ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32)
+ ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[COPY5]](<2 x s16>)
; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST]](s32), [[LSHR]](s32)
; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[DEF]](s32)
; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST1]](s32), [[COPY6]](s32)
- ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY2]](<2 x s16>)
- ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
- ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY3]](<2 x s16>)
; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST2]](s32), [[LSHR1]](s32)
; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[DEF]](s32)
; GFX9-NEXT: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST3]](s32), [[COPY7]](s32)
- ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[COPY4]](<2 x s16>)
- ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32)
- ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[COPY5]](<2 x s16>)
; GFX9-NEXT: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST4]](s32), [[LSHR2]](s32)
; GFX9-NEXT: [[BUILD_VECTOR_TRUNC5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST5]](s32), [[DEF]](s32)
; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 15
@@ -1059,17 +994,20 @@ body: |
; GFX9-NEXT: [[SHL3:%[0-9]+]]:_(<2 x s16>) = G_SHL [[SHL2]], [[AND3]](<2 x s16>)
; GFX9-NEXT: [[LSHR4:%[0-9]+]]:_(<2 x s16>) = G_LSHR [[BUILD_VECTOR_TRUNC3]], [[AND2]](<2 x s16>)
; GFX9-NEXT: [[OR1:%[0-9]+]]:_(<2 x s16>) = G_OR [[SHL3]], [[LSHR4]]
+ ; GFX9-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[OR]](<2 x s16>)
+ ; GFX9-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32)
+ ; GFX9-NEXT: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[OR1]](<2 x s16>)
; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; GFX9-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[OR1]](<2 x s16>)
; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>)
- ; GFX9-NEXT: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
- ; GFX9-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C]](s32)
- ; GFX9-NEXT: [[BITCAST8:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC12:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST6]](s32), [[BITCAST7]](s32)
- ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC13:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR5]](s32), [[BITCAST8]](s32)
- ; GFX9-NEXT: $vgpr0 = COPY [[OR]](<2 x s16>)
- ; GFX9-NEXT: $vgpr1 = COPY [[BUILD_VECTOR_TRUNC12]](<2 x s16>)
- ; GFX9-NEXT: $vgpr2 = COPY [[BUILD_VECTOR_TRUNC13]](<2 x s16>)
+ ; GFX9-NEXT: [[BITCAST8:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+ ; GFX9-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST8]], [[C]](s32)
+ ; GFX9-NEXT: [[BITCAST9:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+ ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC12:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST6]](s32), [[LSHR5]](s32)
+ ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC13:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST7]](s32), [[BITCAST8]](s32)
+ ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC14:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR6]](s32), [[BITCAST9]](s32)
+ ; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC12]](<2 x s16>)
+ ; GFX9-NEXT: $vgpr1 = COPY [[BUILD_VECTOR_TRUNC13]](<2 x s16>)
+ ; GFX9-NEXT: $vgpr2 = COPY [[BUILD_VECTOR_TRUNC14]](<2 x s16>)
%0:_(<2 x s16>) = COPY $vgpr0
%1:_(<2 x s16>) = COPY $vgpr1
%2:_(<2 x s16>) = COPY $vgpr2
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-implicit-def.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-implicit-def.mir
index 00eed0e2a64cb..7d6db970ad6ba 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-implicit-def.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-implicit-def.mir
@@ -327,11 +327,11 @@ body: |
; CHECK-LABEL: name: test_implicit_def_v33s32
; CHECK: [[DEF:%[0-9]+]]:_(<16 x s32>) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<16 x s32>)
- ; CHECK-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<16 x s32>)
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; CHECK-NEXT: G_STORE [[UV]](s32), [[COPY]](p1) :: (volatile store (s32), addrspace 1)
- ; CHECK-NEXT: G_STORE [[UV16]](s32), [[COPY]](p1) :: (volatile store (s32), addrspace 1)
+ ; CHECK-NEXT: G_STORE [[DEF1]](s32), [[COPY]](p1) :: (volatile store (s32), addrspace 1)
%0:_(<33 x s32>) = G_IMPLICIT_DEF
%1:_(s32), %2:_(s32), %3:_(s32), %4:_(s32), %5:_(s32), %6:_(s32), %7:_(s32), %8:_(s32), %9:_(s32), %10:_(s32), %11:_(s32), %12:_(s32), %13:_(s32), %14:_(s32), %15:_(s32), %16:_(s32), %17:_(s32), %18:_(s32), %19:_(s32), %20:_(s32), %21:_(s32), %22:_(s32), %23:_(s32), %24:_(s32), %25:_(s32), %26:_(s32), %27:_(s32), %28:_(s32), %29:_(s32), %30:_(s32), %31:_(s32), %32:_(s32), %33:_(s32) = G_UNMERGE_VALUES %0
%34:_(p1) = COPY $vgpr0_vgpr1
@@ -428,11 +428,28 @@ body: |
; CHECK-LABEL: name: test_implicit_def_v3s16
; CHECK: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
+ ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
+ ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
- ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>)
- ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0
- ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>)
+ ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>)
+ ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
+ ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]]
+ ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]]
+ ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+ ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+ ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]]
+ ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]]
+ ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32)
+ ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+ ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+ ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>)
+ ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>)
%0:_(<3 x s16>) = G_IMPLICIT_DEF
%1:_(<4 x s16>) = G_IMPLICIT_DEF
%2:_(<4 x s16>) = G_INSERT %1, %0, 0
@@ -458,12 +475,42 @@ body: |
; CHECK-LABEL: name: test_implicit_def_v5s16
; CHECK: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<30 x s16>) = G_CONCAT_VECTORS [[DEF]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>)
- ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<5 x s16>), [[UV1:%[0-9]+]]:_(<5 x s16>), [[UV2:%[0-9]+]]:_(<5 x s16>), [[UV3:%[0-9]+]]:_(<5 x s16>), [[UV4:%[0-9]+]]:_(<5 x s16>), [[UV5:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<30 x s16>)
- ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<8 x s16>) = G_INSERT [[DEF2]], [[UV]](<5 x s16>), 0
- ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT]](<8 x s16>)
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>)
+ ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
+ ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+ ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
+ ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<8 x s16>)
+ ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>)
+ ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32)
+ ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>)
+ ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32)
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]]
+ ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]]
+ ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+ ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+ ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]]
+ ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]]
+ ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32)
+ ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+ ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+ ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]]
+ ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]]
+ ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
+ ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+ ; CHECK-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+ ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]]
+ ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C1]]
+ ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C]](s32)
+ ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
+ ; CHECK-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
+ ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>)
+ ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<8 x s16>)
%0:_(<5 x s16>) = G_IMPLICIT_DEF
%1:_(<8 x s16>) = G_IMPLICIT_DEF
%2:_(<8 x s16>) = G_INSERT %1, %0, 0
@@ -478,8 +525,40 @@ body: |
; CHECK-LABEL: name: test_implicit_def_v6s16
; CHECK: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<8 x s16>) = G_INSERT [[DEF1]], [[DEF]](<6 x s16>), 0
- ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT]](<8 x s16>)
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<8 x s16>)
+ ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
+ ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>)
+ ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>)
+ ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
+ ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>)
+ ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
+ ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>)
+ ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32)
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]]
+ ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]]
+ ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+ ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+ ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]]
+ ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]]
+ ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32)
+ ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+ ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+ ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]]
+ ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C1]]
+ ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
+ ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+ ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+ ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]]
+ ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]]
+ ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C]](s32)
+ ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
+ ; CHECK-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
+ ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>)
+ ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<8 x s16>)
%0:_(<6 x s16>) = G_IMPLICIT_DEF
%1:_(<8 x s16>) = G_IMPLICIT_DEF
%2:_(<8 x s16>) = G_INSERT %1, %0, 0
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-insert-vector-elt.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-insert-vector-elt.mir
index f5b89b7e9de9c..1f037cc5775a7 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-insert-vector-elt.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-insert-vector-elt.mir
@@ -10,8 +10,9 @@ body: |
; CHECK-LABEL: name: insert_vector_elt_0_v2s32
; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
- ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<2 x s32>) = G_INSERT [[COPY]], [[COPY1]](s32), 0
- ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[INSERT]](<2 x s32>)
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
+ ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[UV1]](s32)
+ ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
%0:_(<2 x s32>) = COPY $vgpr0_vgpr1
%1:_(s32) = COPY $vgpr2
%2:_(s32) = G_CONSTANT i32 0
@@ -28,8 +29,9 @@ body: |
; CHECK-LABEL: name: insert_vector_elt_1_v2s32
; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
- ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<2 x s32>) = G_INSERT [[COPY]], [[COPY1]](s32), 32
- ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[INSERT]](<2 x s32>)
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
+ ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[COPY1]](s32)
+ ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
%0:_(<2 x s32>) = COPY $vgpr0_vgpr1
%1:_(s32) = COPY $vgpr2
%2:_(s32) = G_CONSTANT i32 1
@@ -107,8 +109,9 @@ body: |
; CHECK-LABEL: name: insert_vector_elt_0_v16s64
; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<16 x s64>) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<16 x s64>) = G_INSERT [[DEF]], [[COPY]](s64), 0
- ; CHECK-NEXT: S_ENDPGM 0, implicit [[INSERT]](<16 x s64>)
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64), [[UV6:%[0-9]+]]:_(s64), [[UV7:%[0-9]+]]:_(s64), [[UV8:%[0-9]+]]:_(s64), [[UV9:%[0-9]+]]:_(s64), [[UV10:%[0-9]+]]:_(s64), [[UV11:%[0-9]+]]:_(s64), [[UV12:%[0-9]+]]:_(s64), [[UV13:%[0-9]+]]:_(s64), [[UV14:%[0-9]+]]:_(s64), [[UV15:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<16 x s64>)
+ ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s64>) = G_BUILD_VECTOR [[COPY]](s64), [[UV1]](s64), [[UV2]](s64), [[UV3]](s64), [[UV4]](s64), [[UV5]](s64), [[UV6]](s64), [[UV7]](s64), [[UV8]](s64), [[UV9]](s64), [[UV10]](s64), [[UV11]](s64), [[UV12]](s64), [[UV13]](s64), [[UV14]](s64), [[UV15]](s64)
+ ; CHECK-NEXT: S_ENDPGM 0, implicit [[BUILD_VECTOR]](<16 x s64>)
%0:_(s64) = COPY $vgpr0_vgpr1
%1:_(<16 x s64>) = G_IMPLICIT_DEF
%2:_(s32) = G_CONSTANT i32 0
@@ -126,8 +129,9 @@ body: |
; CHECK-LABEL: name: insert_vector_elt_0_v2s32_s8
; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
- ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<2 x s32>) = G_INSERT [[COPY]], [[COPY1]](s32), 0
- ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[INSERT]](<2 x s32>)
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
+ ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[UV1]](s32)
+ ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
%0:_(<2 x s32>) = COPY $vgpr0_vgpr1
%1:_(s32) = COPY $vgpr2
%2:_(s8) = G_CONSTANT i8 0
@@ -145,8 +149,9 @@ body: |
; CHECK-LABEL: name: insert_vector_elt_0_v2i8_i32
; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<2 x s32>) = G_INSERT [[DEF]], [[COPY]](s32), 0
- ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[INSERT]](<2 x s32>)
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<2 x s32>)
+ ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[UV1]](s32)
+ ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
%0:_(s32) = COPY $vgpr0
%1:_(s8) = G_TRUNC %0
%2:_(<2 x s8>) = G_IMPLICIT_DEF
@@ -166,8 +171,9 @@ body: |
; CHECK-LABEL: name: insert_vector_elt_v4s32_s32_look_through_trunc_0
; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr4
- ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s32>) = G_INSERT [[COPY]], [[COPY1]](s32), 0
- ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT]](<4 x s32>)
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>)
+ ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32)
+ ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>)
%0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
%1:_(s32) = COPY $vgpr4
%2:_(s64) = G_CONSTANT i64 0
@@ -305,55 +311,62 @@ body: |
; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
; CHECK-NEXT: [[LOAD3:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD2]](p1) :: (load (<16 x s32>) from unknown-address + 192, align 4, addrspace 4)
; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 12345
- ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<32 x s32>) = G_CONCAT_VECTORS [[LOAD2]](<16 x s32>), [[LOAD3]](<16 x s32>)
- ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<32 x s32>) = G_INSERT [[CONCAT_VECTORS]], [[C3]](s32), 32
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD2]](<16 x s32>)
+ ; CHECK-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD3]](<16 x s32>)
+ ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[C3]](s32), [[UV2]](s32), [[UV3]](s32)
+ ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV4]](s32), [[UV5]](s32), [[UV6]](s32), [[UV7]](s32)
+ ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV8]](s32), [[UV9]](s32), [[UV10]](s32), [[UV11]](s32)
+ ; CHECK-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV12]](s32), [[UV13]](s32), [[UV14]](s32), [[UV15]](s32)
+ ; CHECK-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV16]](s32), [[UV17]](s32), [[UV18]](s32), [[UV19]](s32)
+ ; CHECK-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV20]](s32), [[UV21]](s32), [[UV22]](s32), [[UV23]](s32)
+ ; CHECK-NEXT: [[BUILD_VECTOR6:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV24]](s32), [[UV25]](s32), [[UV26]](s32), [[UV27]](s32)
+ ; CHECK-NEXT: [[BUILD_VECTOR7:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV28]](s32), [[UV29]](s32), [[UV30]](s32), [[UV31]](s32)
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
- ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>), [[UV2:%[0-9]+]]:_(<4 x s32>), [[UV3:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[LOAD]](<16 x s32>)
- ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(<4 x s32>), [[UV5:%[0-9]+]]:_(<4 x s32>), [[UV6:%[0-9]+]]:_(<4 x s32>), [[UV7:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[LOAD1]](<16 x s32>)
- ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(<4 x s32>), [[UV9:%[0-9]+]]:_(<4 x s32>), [[UV10:%[0-9]+]]:_(<4 x s32>), [[UV11:%[0-9]+]]:_(<4 x s32>), [[UV12:%[0-9]+]]:_(<4 x s32>), [[UV13:%[0-9]+]]:_(<4 x s32>), [[UV14:%[0-9]+]]:_(<4 x s32>), [[UV15:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[INSERT]](<32 x s32>)
- ; CHECK-NEXT: G_STORE [[UV]](<4 x s32>), [[COPY1]](p1) :: (store (<4 x s32>), align 4, addrspace 1)
+ ; CHECK-NEXT: [[UV32:%[0-9]+]]:_(<4 x s32>), [[UV33:%[0-9]+]]:_(<4 x s32>), [[UV34:%[0-9]+]]:_(<4 x s32>), [[UV35:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[LOAD]](<16 x s32>)
+ ; CHECK-NEXT: [[UV36:%[0-9]+]]:_(<4 x s32>), [[UV37:%[0-9]+]]:_(<4 x s32>), [[UV38:%[0-9]+]]:_(<4 x s32>), [[UV39:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[LOAD1]](<16 x s32>)
+ ; CHECK-NEXT: G_STORE [[UV32]](<4 x s32>), [[COPY1]](p1) :: (store (<4 x s32>), align 4, addrspace 1)
; CHECK-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C4]](s64)
- ; CHECK-NEXT: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD3]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 4, addrspace 1)
+ ; CHECK-NEXT: G_STORE [[UV33]](<4 x s32>), [[PTR_ADD3]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 4, addrspace 1)
; CHECK-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
; CHECK-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C5]](s64)
- ; CHECK-NEXT: G_STORE [[UV2]](<4 x s32>), [[PTR_ADD4]](p1) :: (store (<4 x s32>) into unknown-address + 32, align 4, addrspace 1)
+ ; CHECK-NEXT: G_STORE [[UV34]](<4 x s32>), [[PTR_ADD4]](p1) :: (store (<4 x s32>) into unknown-address + 32, align 4, addrspace 1)
; CHECK-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 48
; CHECK-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C6]](s64)
- ; CHECK-NEXT: G_STORE [[UV3]](<4 x s32>), [[PTR_ADD5]](p1) :: (store (<4 x s32>) into unknown-address + 48, align 4, addrspace 1)
+ ; CHECK-NEXT: G_STORE [[UV35]](<4 x s32>), [[PTR_ADD5]](p1) :: (store (<4 x s32>) into unknown-address + 48, align 4, addrspace 1)
; CHECK-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C]](s64)
- ; CHECK-NEXT: G_STORE [[UV4]](<4 x s32>), [[PTR_ADD6]](p1) :: (store (<4 x s32>) into unknown-address + 64, align 4, addrspace 1)
+ ; CHECK-NEXT: G_STORE [[UV36]](<4 x s32>), [[PTR_ADD6]](p1) :: (store (<4 x s32>) into unknown-address + 64, align 4, addrspace 1)
; CHECK-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 80
; CHECK-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C7]](s64)
- ; CHECK-NEXT: G_STORE [[UV5]](<4 x s32>), [[PTR_ADD7]](p1) :: (store (<4 x s32>) into unknown-address + 80, align 4, addrspace 1)
+ ; CHECK-NEXT: G_STORE [[UV37]](<4 x s32>), [[PTR_ADD7]](p1) :: (store (<4 x s32>) into unknown-address + 80, align 4, addrspace 1)
; CHECK-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 96
; CHECK-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C8]](s64)
- ; CHECK-NEXT: G_STORE [[UV6]](<4 x s32>), [[PTR_ADD8]](p1) :: (store (<4 x s32>) into unknown-address + 96, align 4, addrspace 1)
+ ; CHECK-NEXT: G_STORE [[UV38]](<4 x s32>), [[PTR_ADD8]](p1) :: (store (<4 x s32>) into unknown-address + 96, align 4, addrspace 1)
; CHECK-NEXT: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 112
; CHECK-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C9]](s64)
- ; CHECK-NEXT: G_STORE [[UV7]](<4 x s32>), [[PTR_ADD9]](p1) :: (store (<4 x s32>) into unknown-address + 112, align 4, addrspace 1)
+ ; CHECK-NEXT: G_STORE [[UV39]](<4 x s32>), [[PTR_ADD9]](p1) :: (store (<4 x s32>) into unknown-address + 112, align 4, addrspace 1)
; CHECK-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C1]](s64)
- ; CHECK-NEXT: G_STORE [[UV8]](<4 x s32>), [[PTR_ADD10]](p1) :: (store (<4 x s32>) into unknown-address + 128, align 4, addrspace 1)
+ ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[PTR_ADD10]](p1) :: (store (<4 x s32>) into unknown-address + 128, align 4, addrspace 1)
; CHECK-NEXT: [[C10:%[0-9]+]]:_(s64) = G_CONSTANT i64 144
; CHECK-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C10]](s64)
- ; CHECK-NEXT: G_STORE [[UV9]](<4 x s32>), [[PTR_ADD11]](p1) :: (store (<4 x s32>) into unknown-address + 144, align 4, addrspace 1)
+ ; CHECK-NEXT: G_STORE [[BUILD_VECTOR1]](<4 x s32>), [[PTR_ADD11]](p1) :: (store (<4 x s32>) into unknown-address + 144, align 4, addrspace 1)
; CHECK-NEXT: [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 160
; CHECK-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C11]](s64)
- ; CHECK-NEXT: G_STORE [[UV10]](<4 x s32>), [[PTR_ADD12]](p1) :: (store (<4 x s32>) into unknown-address + 160, align 4, addrspace 1)
+ ; CHECK-NEXT: G_STORE [[BUILD_VECTOR2]](<4 x s32>), [[PTR_ADD12]](p1) :: (store (<4 x s32>) into unknown-address + 160, align 4, addrspace 1)
; CHECK-NEXT: [[C12:%[0-9]+]]:_(s64) = G_CONSTANT i64 176
; CHECK-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C12]](s64)
- ; CHECK-NEXT: G_STORE [[UV11]](<4 x s32>), [[PTR_ADD13]](p1) :: (store (<4 x s32>) into unknown-address + 176, align 4, addrspace 1)
+ ; CHECK-NEXT: G_STORE [[BUILD_VECTOR3]](<4 x s32>), [[PTR_ADD13]](p1) :: (store (<4 x s32>) into unknown-address + 176, align 4, addrspace 1)
; CHECK-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C2]](s64)
- ; CHECK-NEXT: G_STORE [[UV12]](<4 x s32>), [[PTR_ADD14]](p1) :: (store (<4 x s32>) into unknown-address + 192, align 4, addrspace 1)
+ ; CHECK-NEXT: G_STORE [[BUILD_VECTOR4]](<4 x s32>), [[PTR_ADD14]](p1) :: (store (<4 x s32>) into unknown-address + 192, align 4, addrspace 1)
; CHECK-NEXT: [[C13:%[0-9]+]]:_(s64) = G_CONSTANT i64 208
; CHECK-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C13]](s64)
- ; CHECK-NEXT: G_STORE [[UV13]](<4 x s32>), [[PTR_ADD15]](p1) :: (store (<4 x s32>) into unknown-address + 208, align 4, addrspace 1)
+ ; CHECK-NEXT: G_STORE [[BUILD_VECTOR5]](<4 x s32>), [[PTR_ADD15]](p1) :: (store (<4 x s32>) into unknown-address + 208, align 4, addrspace 1)
; CHECK-NEXT: [[C14:%[0-9]+]]:_(s64) = G_CONSTANT i64 224
; CHECK-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C14]](s64)
- ; CHECK-NEXT: G_STORE [[UV14]](<4 x s32>), [[PTR_ADD16]](p1) :: (store (<4 x s32>) into unknown-address + 224, align 4, addrspace 1)
+ ; CHECK-NEXT: G_STORE [[BUILD_VECTOR6]](<4 x s32>), [[PTR_ADD16]](p1) :: (store (<4 x s32>) into unknown-address + 224, align 4, addrspace 1)
; CHECK-NEXT: [[C15:%[0-9]+]]:_(s64) = G_CONSTANT i64 240
; CHECK-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C15]](s64)
- ; CHECK-NEXT: G_STORE [[UV15]](<4 x s32>), [[PTR_ADD17]](p1) :: (store (<4 x s32>) into unknown-address + 240, align 4, addrspace 1)
+ ; CHECK-NEXT: G_STORE [[BUILD_VECTOR7]](<4 x s32>), [[PTR_ADD17]](p1) :: (store (<4 x s32>) into unknown-address + 240, align 4, addrspace 1)
%0:_(p1) = COPY $sgpr0_sgpr1
%1:_(s32) = G_CONSTANT i32 33
%2:_(<64 x s32>) = G_LOAD %0 :: (load (<64 x s32>), align 4, addrspace 4)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-insert.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-insert.mir
index 1a8b37cf7d4ca..b6e4c91dec085 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-insert.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-insert.mir
@@ -409,8 +409,9 @@ body: |
; CHECK-LABEL: name: test_insert_v2s32_s32_offset0
; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
- ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<2 x s32>) = G_INSERT [[COPY]], [[COPY1]](s32), 0
- ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[INSERT]](<2 x s32>)
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
+ ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[UV1]](s32)
+ ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
%0:_(<2 x s32>) = COPY $vgpr0_vgpr1
%1:_(s32) = COPY $vgpr2
%2:_(<2 x s32>) = G_INSERT %0, %1, 0
@@ -425,8 +426,9 @@ body: |
; CHECK-LABEL: name: test_insert_v2s32_s32_offset32
; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
- ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<2 x s32>) = G_INSERT [[COPY]], [[COPY1]](s32), 32
- ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[INSERT]](<2 x s32>)
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
+ ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[COPY1]](s32)
+ ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
%0:_(<2 x s32>) = COPY $vgpr0_vgpr1
%1:_(s32) = COPY $vgpr2
%2:_(<2 x s32>) = G_INSERT %0, %1, 32
@@ -441,8 +443,9 @@ body: |
; CHECK-LABEL: name: test_insert_v3s32_s32_offset0
; CHECK: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr3
- ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[COPY]], [[COPY1]](s32), 0
- ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[INSERT]](<3 x s32>)
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>)
+ ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[UV1]](s32), [[UV2]](s32)
+ ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>)
%0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2
%1:_(s32) = COPY $vgpr3
%2:_(<3 x s32>) = G_INSERT %0, %1, 0
@@ -457,8 +460,9 @@ body: |
; CHECK-LABEL: name: test_insert_v3s32_s32_offset32
; CHECK: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr3
- ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[COPY]], [[COPY1]](s32), 32
- ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[INSERT]](<3 x s32>)
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>)
+ ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[COPY1]](s32), [[UV2]](s32)
+ ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>)
%0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2
%1:_(s32) = COPY $vgpr3
%2:_(<3 x s32>) = G_INSERT %0, %1, 32
@@ -473,8 +477,9 @@ body: |
; CHECK-LABEL: name: test_insert_v3s32_s32_offset64
; CHECK: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr3
- ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[COPY]], [[COPY1]](s32), 64
- ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[INSERT]](<3 x s32>)
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>)
+ ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[COPY1]](s32)
+ ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>)
%0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2
%1:_(s32) = COPY $vgpr3
%2:_(<3 x s32>) = G_INSERT %0, %1, 64
@@ -489,8 +494,9 @@ body: |
; CHECK-LABEL: name: test_insert_v4s32_s32_offset0
; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr4
- ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s32>) = G_INSERT [[COPY]], [[COPY1]](s32), 0
- ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT]](<4 x s32>)
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>)
+ ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32)
+ ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>)
%0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
%1:_(s32) = COPY $vgpr4
%2:_(<4 x s32>) = G_INSERT %0, %1, 0
@@ -505,8 +511,9 @@ body: |
; CHECK-LABEL: name: test_insert_v4s32_s32_offset32
; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr4
- ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s32>) = G_INSERT [[COPY]], [[COPY1]](s32), 32
- ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT]](<4 x s32>)
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>)
+ ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[COPY1]](s32), [[UV2]](s32), [[UV3]](s32)
+ ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>)
%0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
%1:_(s32) = COPY $vgpr4
%2:_(<4 x s32>) = G_INSERT %0, %1, 32
@@ -521,8 +528,9 @@ body: |
; CHECK-LABEL: name: test_insert_v4s32_s32_offset64
; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr4
- ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s32>) = G_INSERT [[COPY]], [[COPY1]](s32), 64
- ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT]](<4 x s32>)
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>)
+ ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[COPY1]](s32), [[UV3]](s32)
+ ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>)
%0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
%1:_(s32) = COPY $vgpr4
%2:_(<4 x s32>) = G_INSERT %0, %1, 64
@@ -537,8 +545,9 @@ body: |
; CHECK-LABEL: name: test_insert_v4s32_s32_offset96
; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr4
- ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s32>) = G_INSERT [[COPY]], [[COPY1]](s32), 96
- ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT]](<4 x s32>)
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>)
+ ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[COPY1]](s32)
+ ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>)
%0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
%1:_(s32) = COPY $vgpr4
%2:_(<4 x s32>) = G_INSERT %0, %1, 96
@@ -553,8 +562,10 @@ body: |
; CHECK-LABEL: name: test_insert_v4s32_s64_offset0
; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr4_vgpr5
- ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s32>) = G_INSERT [[COPY]], [[COPY1]](s64), 0
- ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT]](<4 x s32>)
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>)
+ ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64)
+ ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV4]](s32), [[UV5]](s32), [[UV2]](s32), [[UV3]](s32)
+ ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>)
%0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
%1:_(s64) = COPY $vgpr4_vgpr5
%2:_(<4 x s32>) = G_INSERT %0, %1, 0
@@ -569,8 +580,10 @@ body: |
; CHECK-LABEL: name: test_insert_v4s32_s64_offset32
; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr4_vgpr5
- ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s32>) = G_INSERT [[COPY]], [[COPY1]](s64), 32
- ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT]](<4 x s32>)
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>)
+ ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64)
+ ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV4]](s32), [[UV5]](s32), [[UV3]](s32)
+ ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>)
%0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
%1:_(s64) = COPY $vgpr4_vgpr5
%2:_(<4 x s32>) = G_INSERT %0, %1, 32
@@ -585,8 +598,10 @@ body: |
; CHECK-LABEL: name: test_insert_v4s32_s64_offset64
; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr4_vgpr5
- ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s32>) = G_INSERT [[COPY]], [[COPY1]](s64), 64
- ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT]](<4 x s32>)
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>)
+ ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64)
+ ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV4]](s32), [[UV5]](s32)
+ ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>)
%0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
%1:_(s64) = COPY $vgpr4_vgpr5
%2:_(<4 x s32>) = G_INSERT %0, %1, 64
@@ -601,8 +616,10 @@ body: |
; CHECK-LABEL: name: test_insert_v4s32_s96_offset0
; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr4_vgpr5_vgpr6
- ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s32>) = G_INSERT [[COPY]], [[COPY1]](s96), 0
- ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT]](<4 x s32>)
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>)
+ ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s96)
+ ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV4]](s32), [[UV5]](s32), [[UV6]](s32), [[UV3]](s32)
+ ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>)
%0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
%1:_(s96) = COPY $vgpr4_vgpr5_vgpr6
%2:_(<4 x s32>) = G_INSERT %0, %1, 0
@@ -617,8 +634,10 @@ body: |
; CHECK-LABEL: name: test_insert_v4s32_s96_offset32
; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr4_vgpr5_vgpr6
- ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s32>) = G_INSERT [[COPY]], [[COPY1]](s96), 32
- ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT]](<4 x s32>)
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>)
+ ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s96)
+ ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV4]](s32), [[UV5]](s32), [[UV6]](s32)
+ ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>)
%0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
%1:_(s96) = COPY $vgpr4_vgpr5_vgpr6
%2:_(<4 x s32>) = G_INSERT %0, %1, 32
@@ -633,8 +652,10 @@ body: |
; CHECK-LABEL: name: test_insert_v4s32_v2s32_offset0
; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr4_vgpr5
- ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s32>) = G_INSERT [[COPY]], [[COPY1]](<2 x s32>), 0
- ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT]](<4 x s32>)
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>)
+ ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>)
+ ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV4]](s32), [[UV5]](s32), [[UV2]](s32), [[UV3]](s32)
+ ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>)
%0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
%1:_(<2 x s32>) = COPY $vgpr4_vgpr5
%2:_(<4 x s32>) = G_INSERT %0, %1, 0
@@ -649,8 +670,10 @@ body: |
; CHECK-LABEL: name: test_insert_v4s32_v2s32_offset32
; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr4_vgpr5
- ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s32>) = G_INSERT [[COPY]], [[COPY1]](<2 x s32>), 32
- ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT]](<4 x s32>)
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>)
+ ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>)
+ ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV4]](s32), [[UV5]](s32), [[UV3]](s32)
+ ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>)
%0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
%1:_(<2 x s32>) = COPY $vgpr4_vgpr5
%2:_(<4 x s32>) = G_INSERT %0, %1, 32
@@ -665,8 +688,10 @@ body: |
; CHECK-LABEL: name: test_insert_v4s32_v2s32_offset64
; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr4_vgpr5
- ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s32>) = G_INSERT [[COPY]], [[COPY1]](<2 x s32>), 64
- ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT]](<4 x s32>)
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>)
+ ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>)
+ ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV4]](s32), [[UV5]](s32)
+ ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>)
%0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
%1:_(<2 x s32>) = COPY $vgpr4_vgpr5
%2:_(<4 x s32>) = G_INSERT %0, %1, 64
@@ -681,8 +706,10 @@ body: |
; CHECK-LABEL: name: test_insert_v4s32_v3s32_offset0
; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr4_vgpr5_vgpr6
- ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s32>) = G_INSERT [[COPY]], [[COPY1]](<3 x s32>), 0
- ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT]](<4 x s32>)
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>)
+ ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>)
+ ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV4]](s32), [[UV5]](s32), [[UV6]](s32), [[UV3]](s32)
+ ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>)
%0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
%1:_(<3 x s32>) = COPY $vgpr4_vgpr5_vgpr6
%2:_(<4 x s32>) = G_INSERT %0, %1, 0
@@ -697,8 +724,10 @@ body: |
; CHECK-LABEL: name: test_insert_v4s32_v3s32_offset32
; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr4_vgpr5_vgpr6
- ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s32>) = G_INSERT [[COPY]], [[COPY1]](<3 x s32>), 32
- ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT]](<4 x s32>)
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>)
+ ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>)
+ ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV4]](s32), [[UV5]](s32), [[UV6]](s32)
+ ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>)
%0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
%1:_(<3 x s32>) = COPY $vgpr4_vgpr5_vgpr6
%2:_(<4 x s32>) = G_INSERT %0, %1, 32
@@ -763,11 +792,13 @@ body: |
; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>)
- ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
- ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]]
- ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -65536
- ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]]
- ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[AND]]
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]]
+ ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]]
+ ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+ ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
; CHECK-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
%0:_(<2 x s16>) = COPY $vgpr0
@@ -813,11 +844,11 @@ body: |
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>)
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
- ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]]
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C]]
+ ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]]
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C1]](s32)
- ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C]]
- ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL]]
+ ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
+ ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
; CHECK-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
%0:_(<2 x s16>) = COPY $vgpr0
@@ -834,16 +865,29 @@ body: |
; CHECK-LABEL: name: test_insert_v3s16_s16_offset0
; CHECK: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1
- ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[COPY]](<4 x s16>), 0
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>)
+ ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
+ ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
- ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0
- ; CHECK-NEXT: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[TRUNC]](s16), 0
- ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF]](<4 x s16>), [[DEF]](<4 x s16>)
- ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>)
- ; CHECK-NEXT: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV]](<3 x s16>), 0
- ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[INSERT2]](<4 x s16>)
+ ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
+ ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
+ ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]]
+ ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]]
+ ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+ ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+ ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]]
+ ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]]
+ ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32)
+ ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+ ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+ ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>)
+ ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>)
%0:_(<4 x s16>) = COPY $vgpr0_vgpr1
%1:_(<3 x s16>) = G_EXTRACT %0, 0
%2:_(s32) = COPY $vgpr2
@@ -861,16 +905,28 @@ body: |
; CHECK-LABEL: name: test_insert_v3s16_s16_offset16
; CHECK: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1
- ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[COPY]](<4 x s16>), 0
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>)
+ ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
- ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0
- ; CHECK-NEXT: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[TRUNC]](s16), 16
- ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF]](<4 x s16>), [[DEF]](<4 x s16>)
- ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>)
- ; CHECK-NEXT: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV]](<3 x s16>), 0
- ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[INSERT2]](<4 x s16>)
+ ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
+ ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
+ ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]]
+ ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]]
+ ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+ ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+ ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]]
+ ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]]
+ ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32)
+ ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+ ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+ ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>)
+ ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>)
%0:_(<4 x s16>) = COPY $vgpr0_vgpr1
%1:_(<3 x s16>) = G_EXTRACT %0, 0
%2:_(s32) = COPY $vgpr2
@@ -888,16 +944,28 @@ body: |
; CHECK-LABEL: name: test_insert_v3s16_s16_offset32
; CHECK: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1
- ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[COPY]](<4 x s16>), 0
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>)
+ ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
- ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0
- ; CHECK-NEXT: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[TRUNC]](s16), 32
- ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF]](<4 x s16>), [[DEF]](<4 x s16>)
- ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>)
- ; CHECK-NEXT: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV]](<3 x s16>), 0
- ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[INSERT2]](<4 x s16>)
+ ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
+ ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
+ ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]]
+ ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]]
+ ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+ ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+ ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]]
+ ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]]
+ ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32)
+ ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+ ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+ ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>)
+ ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>)
%0:_(<4 x s16>) = COPY $vgpr0_vgpr1
%1:_(<3 x s16>) = G_EXTRACT %0, 0
%2:_(s32) = COPY $vgpr2
@@ -915,15 +983,29 @@ body: |
; CHECK-LABEL: name: test_insert_v3s16_v2s16_offset0
; CHECK: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1
- ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[COPY]](<4 x s16>), 0
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>)
+ ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2
+ ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>)
+ ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0
- ; CHECK-NEXT: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[COPY1]](<2 x s16>), 0
- ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF]](<4 x s16>), [[DEF]](<4 x s16>)
- ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>)
- ; CHECK-NEXT: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV]](<3 x s16>), 0
- ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[INSERT2]](<4 x s16>)
+ ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
+ ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
+ ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]]
+ ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]]
+ ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+ ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+ ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]]
+ ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]]
+ ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32)
+ ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+ ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+ ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>)
+ ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>)
%0:_(<4 x s16>) = COPY $vgpr0_vgpr1
%1:_(<3 x s16>) = G_EXTRACT %0, 0
%2:_(<2 x s16>) = COPY $vgpr2
@@ -940,15 +1022,29 @@ body: |
; CHECK-LABEL: name: test_insert_v3s16_v2s16_offset16
; CHECK: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1
- ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[COPY]](<4 x s16>), 0
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>)
+ ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2
+ ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>)
+ ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0
- ; CHECK-NEXT: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[COPY1]](<2 x s16>), 16
- ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF]](<4 x s16>), [[DEF]](<4 x s16>)
- ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>)
- ; CHECK-NEXT: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV]](<3 x s16>), 0
- ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[INSERT2]](<4 x s16>)
+ ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
+ ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
+ ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]]
+ ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]]
+ ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+ ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+ ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]]
+ ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]]
+ ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32)
+ ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+ ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+ ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>)
+ ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>)
%0:_(<4 x s16>) = COPY $vgpr0_vgpr1
%1:_(<3 x s16>) = G_EXTRACT %0, 0
%2:_(<2 x s16>) = COPY $vgpr2
@@ -965,15 +1061,28 @@ body: |
; CHECK-LABEL: name: test_insert_v3s16_s32_offset0
; CHECK: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1
- ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[COPY]](<4 x s16>), 0
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>)
+ ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32)
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0
- ; CHECK-NEXT: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[COPY1]](s32), 0
- ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF]](<4 x s16>), [[DEF]](<4 x s16>)
- ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>)
- ; CHECK-NEXT: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV]](<3 x s16>), 0
- ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[INSERT2]](<4 x s16>)
+ ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
+ ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
+ ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]]
+ ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]]
+ ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+ ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+ ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]]
+ ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]]
+ ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32)
+ ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+ ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+ ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>)
+ ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>)
%0:_(<4 x s16>) = COPY $vgpr0_vgpr1
%1:_(<3 x s16>) = G_EXTRACT %0, 0
%2:_(s32) = COPY $vgpr2
@@ -990,15 +1099,28 @@ body: |
; CHECK-LABEL: name: test_insert_v3s16_s32_offset16
; CHECK: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1
- ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[COPY]](<4 x s16>), 0
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>)
+ ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32)
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0
- ; CHECK-NEXT: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[COPY1]](s32), 16
- ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF]](<4 x s16>), [[DEF]](<4 x s16>)
- ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>)
- ; CHECK-NEXT: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV]](<3 x s16>), 0
- ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[INSERT2]](<4 x s16>)
+ ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
+ ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
+ ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]]
+ ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]]
+ ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+ ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+ ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]]
+ ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]]
+ ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32)
+ ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+ ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+ ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>)
+ ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>)
%0:_(<4 x s16>) = COPY $vgpr0_vgpr1
%1:_(<3 x s16>) = G_EXTRACT %0, 0
%2:_(s32) = COPY $vgpr2
@@ -1016,9 +1138,25 @@ body: |
; CHECK-LABEL: name: test_insert_v4s16_s16_offset0
; CHECK: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
- ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
- ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[COPY]], [[TRUNC]](s16), 0
- ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>)
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>)
+ ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
+ ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+ ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]]
+ ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]]
+ ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+ ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+ ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]]
+ ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]]
+ ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32)
+ ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+ ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+ ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>)
+ ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>)
%0:_(<4 x s16>) = COPY $vgpr0_vgpr1
%1:_(s32) = COPY $vgpr2
%2:_(s16) = G_TRUNC %1
@@ -1034,9 +1172,24 @@ body: |
; CHECK-LABEL: name: test_insert_v4s16_s16_offset16
; CHECK: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
- ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
- ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[COPY]], [[TRUNC]](s16), 16
- ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>)
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>)
+ ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+ ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]]
+ ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]]
+ ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+ ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+ ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]]
+ ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]]
+ ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32)
+ ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+ ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+ ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>)
+ ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>)
%0:_(<4 x s16>) = COPY $vgpr0_vgpr1
%1:_(s32) = COPY $vgpr2
%2:_(s16) = G_TRUNC %1
@@ -1052,9 +1205,25 @@ body: |
; CHECK-LABEL: name: test_insert_v4s16_s16_offset32
; CHECK: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
- ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
- ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[COPY]], [[TRUNC]](s16), 32
- ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>)
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>)
+ ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
+ ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+ ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]]
+ ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]]
+ ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+ ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+ ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]]
+ ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]]
+ ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32)
+ ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+ ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+ ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>)
+ ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>)
%0:_(<4 x s16>) = COPY $vgpr0_vgpr1
%1:_(s32) = COPY $vgpr2
%2:_(s16) = G_TRUNC %1
@@ -1070,9 +1239,24 @@ body: |
; CHECK-LABEL: name: test_insert_v4s16_s16_offset48
; CHECK: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
- ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
- ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[COPY]], [[TRUNC]](s16), 48
- ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>)
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>)
+ ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
+ ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]]
+ ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]]
+ ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+ ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+ ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]]
+ ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]]
+ ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32)
+ ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+ ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+ ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>)
+ ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>)
%0:_(<4 x s16>) = COPY $vgpr0_vgpr1
%1:_(s32) = COPY $vgpr2
%2:_(s16) = G_TRUNC %1
@@ -1088,8 +1272,25 @@ body: |
; CHECK-LABEL: name: test_insert_v4s16_v2s16_offset0
; CHECK: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2
- ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[COPY]], [[COPY1]](<2 x s16>), 0
- ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>)
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>)
+ ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
+ ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>)
+ ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]]
+ ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]]
+ ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+ ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+ ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]]
+ ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]]
+ ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32)
+ ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+ ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+ ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>)
+ ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>)
%0:_(<4 x s16>) = COPY $vgpr0_vgpr1
%1:_(<2 x s16>) = COPY $vgpr2
%2:_(<4 x s16>) = G_INSERT %0, %1, 0
@@ -1104,8 +1305,26 @@ body: |
; CHECK-LABEL: name: test_insert_v4s16_v2s16_offset16
; CHECK: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2
- ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[COPY]], [[COPY1]](<2 x s16>), 16
- ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>)
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>)
+ ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+ ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
+ ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>)
+ ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]]
+ ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]]
+ ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+ ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+ ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]]
+ ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]]
+ ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32)
+ ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+ ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+ ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>)
+ ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>)
%0:_(<4 x s16>) = COPY $vgpr0_vgpr1
%1:_(<2 x s16>) = COPY $vgpr2
%2:_(<4 x s16>) = G_INSERT %0, %1, 16
@@ -1120,8 +1339,25 @@ body: |
; CHECK-LABEL: name: test_insert_v4s16_v2s16_offset32
; CHECK: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2
- ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[COPY]], [[COPY1]](<2 x s16>), 32
- ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>)
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>)
+ ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
+ ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>)
+ ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]]
+ ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]]
+ ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+ ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+ ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]]
+ ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]]
+ ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32)
+ ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+ ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+ ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>)
+ ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>)
%0:_(<4 x s16>) = COPY $vgpr0_vgpr1
%1:_(<2 x s16>) = COPY $vgpr2
%2:_(<4 x s16>) = G_INSERT %0, %1, 32
@@ -1136,9 +1372,27 @@ body: |
; CHECK-LABEL: name: test_insert_v4s16_v3s16_offset0
; CHECK: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3
- ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[COPY1]](<4 x s16>), 0
- ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[COPY]], [[EXTRACT]](<3 x s16>), 0
- ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>)
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>)
+ ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
+ ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+ ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>)
+ ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
+ ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]]
+ ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]]
+ ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+ ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+ ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]]
+ ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]]
+ ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32)
+ ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+ ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+ ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>)
+ ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>)
%0:_(<4 x s16>) = COPY $vgpr0_vgpr1
%1:_(<4 x s16>) = COPY $vgpr2_vgpr3
%2:_(<3 x s16>) = G_EXTRACT %1, 0
@@ -1154,9 +1408,26 @@ body: |
; CHECK-LABEL: name: test_insert_v4s16_v3s16_offset16
; CHECK: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3
- ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[COPY1]](<4 x s16>), 0
- ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[COPY]], [[EXTRACT]](<3 x s16>), 16
- ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>)
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>)
+ ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
+ ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+ ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>)
+ ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]]
+ ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]]
+ ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+ ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+ ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]]
+ ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]]
+ ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32)
+ ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+ ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+ ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>)
+ ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>)
%0:_(<4 x s16>) = COPY $vgpr0_vgpr1
%1:_(<4 x s16>) = COPY $vgpr2_vgpr3
%2:_(<3 x s16>) = G_EXTRACT %1, 0
@@ -1172,8 +1443,24 @@ body: |
; CHECK-LABEL: name: test_insert_v4s16_s32_offset0
; CHECK: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
- ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[COPY]], [[COPY1]](s32), 0
- ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>)
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>)
+ ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
+ ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32)
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]]
+ ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]]
+ ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+ ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+ ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]]
+ ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]]
+ ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32)
+ ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+ ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+ ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>)
+ ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>)
%0:_(<4 x s16>) = COPY $vgpr0_vgpr1
%1:_(s32) = COPY $vgpr2
%2:_(<4 x s16>) = G_INSERT %0, %1, 0
@@ -1188,8 +1475,25 @@ body: |
; CHECK-LABEL: name: test_insert_v4s16_s32_offset16
; CHECK: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
- ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[COPY]], [[COPY1]](s32), 16
- ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>)
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>)
+ ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+ ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
+ ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32)
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]]
+ ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]]
+ ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+ ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+ ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]]
+ ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]]
+ ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32)
+ ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+ ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+ ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>)
+ ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>)
%0:_(<4 x s16>) = COPY $vgpr0_vgpr1
%1:_(s32) = COPY $vgpr2
%2:_(<4 x s16>) = G_INSERT %0, %1, 16
@@ -1204,8 +1508,24 @@ body: |
; CHECK-LABEL: name: test_insert_v4s16_s32_offset32
; CHECK: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
- ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[COPY]], [[COPY1]](s32), 32
- ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>)
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>)
+ ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
+ ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32)
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]]
+ ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]]
+ ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+ ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+ ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]]
+ ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]]
+ ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32)
+ ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+ ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+ ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>)
+ ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>)
%0:_(<4 x s16>) = COPY $vgpr0_vgpr1
%1:_(s32) = COPY $vgpr2
%2:_(<4 x s16>) = G_INSERT %0, %1, 32
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.2d.d16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.2d.d16.ll
index e814e66d5d820..271b7b23380a6 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.2d.d16.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.2d.d16.ll
@@ -117,23 +117,20 @@ define amdgpu_ps <3 x half> @image_load_v3f16(<8 x i32> inreg %rsrc, i32 %s, i32
; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<3 x s16>) from custom "ImageResource", align 8)
; UNPACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<3 x s32>)
- ; UNPACKED-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; UNPACKED-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
- ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; UNPACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
- ; UNPACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C1]]
- ; UNPACKED-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C1]]
- ; UNPACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+ ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+ ; UNPACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]]
+ ; UNPACKED-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C]]
+ ; UNPACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; UNPACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
; UNPACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
- ; UNPACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
- ; UNPACKED-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C1]]
- ; UNPACKED-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]]
- ; UNPACKED-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32)
+ ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; UNPACKED-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C]]
+ ; UNPACKED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; UNPACKED-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C1]](s32)
; UNPACKED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
- ; UNPACKED-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
- ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
- ; UNPACKED-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>)
+ ; UNPACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+ ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>)
+ ; UNPACKED-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>)
; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
; PACKED-LABEL: name: image_load_v3f16
; PACKED: bb.1 (%ir-block.0):
@@ -153,19 +150,23 @@ define amdgpu_ps <3 x half> @image_load_v3f16(<8 x i32> inreg %rsrc, i32 %s, i32
; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s16>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<3 x s16>) from custom "ImageResource", align 8)
; PACKED-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s16>)
- ; PACKED-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; PACKED-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; PACKED-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; PACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
+ ; PACKED-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
; PACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; PACKED-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
+ ; PACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
; PACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
; PACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]]
- ; PACKED-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]]
+ ; PACKED-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]]
; PACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
; PACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
; PACKED-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
- ; PACKED-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>)
- ; PACKED-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>)
+ ; PACKED-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]]
+ ; PACKED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; PACKED-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32)
+ ; PACKED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+ ; PACKED-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+ ; PACKED-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>)
+ ; PACKED-NEXT: $vgpr1 = COPY [[BITCAST3]](<2 x s16>)
; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
%tex = call <3 x half> @llvm.amdgcn.image.load.2d.v3f16.i32(i32 7, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
ret <3 x half> %tex
@@ -363,23 +364,20 @@ define amdgpu_ps <3 x half> @image_load_tfe_v3f16(<8 x i32> inreg %rsrc, i32 %s,
; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<3 x s16>) from custom "ImageResource", align 8)
; UNPACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
; UNPACKED-NEXT: G_STORE [[UV3]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
- ; UNPACKED-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; UNPACKED-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>)
- ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>)
- ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; UNPACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
- ; UNPACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C1]]
- ; UNPACKED-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C1]]
- ; UNPACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+ ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+ ; UNPACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]]
+ ; UNPACKED-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C]]
+ ; UNPACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; UNPACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
; UNPACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
- ; UNPACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
- ; UNPACKED-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C1]]
- ; UNPACKED-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]]
- ; UNPACKED-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32)
+ ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; UNPACKED-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C]]
+ ; UNPACKED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; UNPACKED-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C1]](s32)
; UNPACKED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
- ; UNPACKED-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
- ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
- ; UNPACKED-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>)
+ ; UNPACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+ ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>)
+ ; UNPACKED-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>)
; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
; PACKED-LABEL: name: image_load_tfe_v3f16
; PACKED: bb.1 (%ir-block.0):
@@ -402,20 +400,24 @@ define amdgpu_ps <3 x half> @image_load_tfe_v3f16(<8 x i32> inreg %rsrc, i32 %s,
; PACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<3 x s32>)
; PACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32)
; PACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV1]](s32)
- ; PACKED-NEXT: G_STORE [[UV2]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
- ; PACKED-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; PACKED-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[BITCAST1]](<2 x s16>)
- ; PACKED-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>)
- ; PACKED-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
+ ; PACKED-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[BITCAST]](<2 x s16>)
; PACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; PACKED-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
+ ; PACKED-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[BITCAST1]](<2 x s16>)
+ ; PACKED-NEXT: G_STORE [[UV2]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
; PACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
; PACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]]
- ; PACKED-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]]
+ ; PACKED-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]]
; PACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
; PACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
; PACKED-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
- ; PACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>)
- ; PACKED-NEXT: $vgpr1 = COPY [[BITCAST4]](<2 x s16>)
+ ; PACKED-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]]
+ ; PACKED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; PACKED-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32)
+ ; PACKED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+ ; PACKED-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+ ; PACKED-NEXT: $vgpr0 = COPY [[BITCAST4]](<2 x s16>)
+ ; PACKED-NEXT: $vgpr1 = COPY [[BITCAST5]](<2 x s16>)
; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
%res = call { <3 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v3f16i32s.i32(i32 7, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0)
%tex = extractvalue { <3 x half>, i32 } %res, 0
@@ -607,23 +609,19 @@ define amdgpu_ps <3 x half> @image_load_v3f16_dmask_1100(<8 x i32> inreg %rsrc,
; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<2 x s16>) from custom "ImageResource")
; UNPACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>)
- ; UNPACKED-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; UNPACKED-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
- ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; UNPACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
- ; UNPACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C1]]
- ; UNPACKED-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C1]]
- ; UNPACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+ ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+ ; UNPACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]]
+ ; UNPACKED-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C]]
+ ; UNPACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; UNPACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
; UNPACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
- ; UNPACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
; UNPACKED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; UNPACKED-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]]
- ; UNPACKED-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32)
+ ; UNPACKED-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C1]](s32)
; UNPACKED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL1]]
- ; UNPACKED-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
- ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
- ; UNPACKED-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>)
+ ; UNPACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+ ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>)
+ ; UNPACKED-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>)
; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
; PACKED-LABEL: name: image_load_v3f16_dmask_1100
; PACKED: bb.1 (%ir-block.0):
@@ -642,19 +640,20 @@ define amdgpu_ps <3 x half> @image_load_v3f16_dmask_1100(<8 x i32> inreg %rsrc,
; PACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s16>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<2 x s16>) from custom "ImageResource")
- ; PACKED-NEXT: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
- ; PACKED-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; PACKED-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[DEF]](<2 x s16>)
- ; PACKED-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>)
- ; PACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+ ; PACKED-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s16>)
; PACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; PACKED-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
; PACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
; PACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]]
- ; PACKED-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]]
+ ; PACKED-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]]
; PACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
; PACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
- ; PACKED-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
- ; PACKED-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s16>)
+ ; PACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; PACKED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; PACKED-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32)
+ ; PACKED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL1]]
+ ; PACKED-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+ ; PACKED-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
; PACKED-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>)
; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
%tex = call <3 x half> @llvm.amdgcn.image.load.2d.v3f16.i32(i32 3, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
@@ -679,22 +678,17 @@ define amdgpu_ps <3 x half> @image_load_v3f16_dmask_1000(<8 x i32> inreg %rsrc,
; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource")
- ; UNPACKED-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; UNPACKED-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
- ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; UNPACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
- ; UNPACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[AMDGPU_INTRIN_IMAGE_LOAD]], [[C1]]
- ; UNPACKED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; UNPACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32)
+ ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+ ; UNPACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[AMDGPU_INTRIN_IMAGE_LOAD]], [[C]]
+ ; UNPACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; UNPACKED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; UNPACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C2]](s32)
; UNPACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
- ; UNPACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
- ; UNPACKED-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]]
- ; UNPACKED-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
- ; UNPACKED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL1]]
- ; UNPACKED-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
- ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
- ; UNPACKED-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>)
+ ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; UNPACKED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C1]], [[SHL]]
+ ; UNPACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+ ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>)
+ ; UNPACKED-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>)
; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
; PACKED-LABEL: name: image_load_v3f16_dmask_1000
; PACKED: bb.1 (%ir-block.0):
@@ -713,19 +707,20 @@ define amdgpu_ps <3 x half> @image_load_v3f16_dmask_1000(<8 x i32> inreg %rsrc,
; PACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s16>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource")
- ; PACKED-NEXT: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
- ; PACKED-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; PACKED-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[DEF]](<2 x s16>)
- ; PACKED-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>)
- ; PACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+ ; PACKED-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s16>)
; PACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; PACKED-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
; PACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
; PACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]]
- ; PACKED-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]]
+ ; PACKED-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]]
; PACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
; PACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
- ; PACKED-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
- ; PACKED-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s16>)
+ ; PACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; PACKED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; PACKED-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32)
+ ; PACKED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL1]]
+ ; PACKED-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+ ; PACKED-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
; PACKED-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>)
; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
%tex = call <3 x half> @llvm.amdgcn.image.load.2d.v3f16.i32(i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
@@ -740,27 +735,24 @@ define amdgpu_ps <3 x half> @image_load_v3f16_dmask_0000(<8 x i32> inreg %rsrc,
; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
; UNPACKED-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; UNPACKED-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
; UNPACKED-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; UNPACKED-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
; UNPACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; UNPACKED-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>)
- ; UNPACKED-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
; UNPACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
; UNPACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]]
; UNPACKED-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]]
; UNPACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
; UNPACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
- ; UNPACKED-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; UNPACKED-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
; UNPACKED-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]]
- ; UNPACKED-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]]
- ; UNPACKED-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32)
+ ; UNPACKED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; UNPACKED-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32)
; UNPACKED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
- ; UNPACKED-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
- ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST3]](<2 x s16>)
- ; UNPACKED-NEXT: $vgpr1 = COPY [[BITCAST4]](<2 x s16>)
+ ; UNPACKED-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+ ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>)
+ ; UNPACKED-NEXT: $vgpr1 = COPY [[BITCAST3]](<2 x s16>)
; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
; PACKED-LABEL: name: image_load_v3f16_dmask_0000
; PACKED: bb.1 (%ir-block.0):
@@ -769,27 +761,24 @@ define amdgpu_ps <3 x half> @image_load_v3f16_dmask_0000(<8 x i32> inreg %rsrc,
; PACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; PACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
; PACKED-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; PACKED-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
; PACKED-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
; PACKED-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
; PACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; PACKED-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
; PACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; PACKED-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>)
- ; PACKED-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
; PACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
; PACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]]
; PACKED-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]]
; PACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
; PACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
- ; PACKED-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; PACKED-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
; PACKED-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]]
- ; PACKED-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]]
- ; PACKED-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32)
+ ; PACKED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; PACKED-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32)
; PACKED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
- ; PACKED-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
- ; PACKED-NEXT: $vgpr0 = COPY [[BITCAST3]](<2 x s16>)
- ; PACKED-NEXT: $vgpr1 = COPY [[BITCAST4]](<2 x s16>)
+ ; PACKED-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+ ; PACKED-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>)
+ ; PACKED-NEXT: $vgpr1 = COPY [[BITCAST3]](<2 x s16>)
; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
%tex = call <3 x half> @llvm.amdgcn.image.load.2d.v3f16.i32(i32 0, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
ret <3 x half> %tex
@@ -1187,23 +1176,19 @@ define amdgpu_ps <3 x half> @image_load_tfe_v3f16_dmask_1100(<8 x i32> inreg %rs
; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<2 x s16>) from custom "ImageResource")
; UNPACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<3 x s32>)
; UNPACKED-NEXT: G_STORE [[UV2]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
- ; UNPACKED-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; UNPACKED-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>)
- ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
- ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; UNPACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
- ; UNPACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C1]]
- ; UNPACKED-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C1]]
- ; UNPACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+ ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+ ; UNPACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]]
+ ; UNPACKED-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C]]
+ ; UNPACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; UNPACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
; UNPACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
- ; UNPACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
; UNPACKED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; UNPACKED-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]]
- ; UNPACKED-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32)
+ ; UNPACKED-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C1]](s32)
; UNPACKED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL1]]
- ; UNPACKED-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
- ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
- ; UNPACKED-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>)
+ ; UNPACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+ ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>)
+ ; UNPACKED-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>)
; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
; PACKED-LABEL: name: image_load_tfe_v3f16_dmask_1100
; PACKED: bb.1 (%ir-block.0):
@@ -1225,20 +1210,21 @@ define amdgpu_ps <3 x half> @image_load_tfe_v3f16_dmask_1100(<8 x i32> inreg %rs
; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<2 x s16>) from custom "ImageResource")
; PACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>)
; PACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32)
- ; PACKED-NEXT: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
- ; PACKED-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
- ; PACKED-NEXT: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; PACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[DEF1]](<2 x s16>)
- ; PACKED-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>)
- ; PACKED-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
+ ; PACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[BITCAST]](<2 x s16>)
; PACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; PACKED-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
+ ; PACKED-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
; PACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
; PACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]]
- ; PACKED-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]]
+ ; PACKED-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]]
; PACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
; PACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
- ; PACKED-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
- ; PACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>)
+ ; PACKED-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; PACKED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; PACKED-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32)
+ ; PACKED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL1]]
+ ; PACKED-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+ ; PACKED-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>)
; PACKED-NEXT: $vgpr1 = COPY [[BITCAST3]](<2 x s16>)
; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
%res = call { <3 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v3f16i32s.i32(i32 3, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0)
@@ -1269,22 +1255,17 @@ define amdgpu_ps <3 x half> @image_load_tfe_v3f16_dmask_1000(<8 x i32> inreg %rs
; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource")
; UNPACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>)
; UNPACKED-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
- ; UNPACKED-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; UNPACKED-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>)
- ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
- ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; UNPACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
- ; UNPACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C1]]
- ; UNPACKED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; UNPACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32)
+ ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+ ; UNPACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]]
+ ; UNPACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; UNPACKED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; UNPACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C2]](s32)
; UNPACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
- ; UNPACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
- ; UNPACKED-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]]
- ; UNPACKED-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
- ; UNPACKED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL1]]
- ; UNPACKED-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
- ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
- ; UNPACKED-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>)
+ ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; UNPACKED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C1]], [[SHL]]
+ ; UNPACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+ ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>)
+ ; UNPACKED-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>)
; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
; PACKED-LABEL: name: image_load_tfe_v3f16_dmask_1000
; PACKED: bb.1 (%ir-block.0):
@@ -1306,20 +1287,21 @@ define amdgpu_ps <3 x half> @image_load_tfe_v3f16_dmask_1000(<8 x i32> inreg %rs
; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource")
; PACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>)
; PACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32)
- ; PACKED-NEXT: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
- ; PACKED-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
- ; PACKED-NEXT: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; PACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[DEF1]](<2 x s16>)
- ; PACKED-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>)
- ; PACKED-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
+ ; PACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[BITCAST]](<2 x s16>)
; PACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; PACKED-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
+ ; PACKED-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
; PACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
; PACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]]
- ; PACKED-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]]
+ ; PACKED-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]]
; PACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
; PACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
- ; PACKED-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
- ; PACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>)
+ ; PACKED-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; PACKED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; PACKED-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32)
+ ; PACKED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL1]]
+ ; PACKED-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+ ; PACKED-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>)
; PACKED-NEXT: $vgpr1 = COPY [[BITCAST3]](<2 x s16>)
; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
%res = call { <3 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v3f16i32s.i32(i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0)
@@ -1350,22 +1332,17 @@ define amdgpu_ps <3 x half> @image_load_tfe_v3f16_dmask_0000(<8 x i32> inreg %rs
; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource")
; UNPACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>)
; UNPACKED-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
- ; UNPACKED-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; UNPACKED-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>)
- ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
- ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; UNPACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
- ; UNPACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C1]]
- ; UNPACKED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; UNPACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32)
+ ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+ ; UNPACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]]
+ ; UNPACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; UNPACKED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; UNPACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C2]](s32)
; UNPACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
- ; UNPACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
- ; UNPACKED-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]]
- ; UNPACKED-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
- ; UNPACKED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL1]]
- ; UNPACKED-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
- ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
- ; UNPACKED-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>)
+ ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; UNPACKED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C1]], [[SHL]]
+ ; UNPACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+ ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>)
+ ; UNPACKED-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>)
; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
; PACKED-LABEL: name: image_load_tfe_v3f16_dmask_0000
; PACKED: bb.1 (%ir-block.0):
@@ -1387,20 +1364,21 @@ define amdgpu_ps <3 x half> @image_load_tfe_v3f16_dmask_0000(<8 x i32> inreg %rs
; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource")
; PACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>)
; PACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32)
- ; PACKED-NEXT: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
- ; PACKED-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
- ; PACKED-NEXT: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; PACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[DEF1]](<2 x s16>)
- ; PACKED-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>)
- ; PACKED-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
+ ; PACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[BITCAST]](<2 x s16>)
; PACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; PACKED-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
+ ; PACKED-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
; PACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
; PACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]]
- ; PACKED-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]]
+ ; PACKED-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]]
; PACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
; PACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
- ; PACKED-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
- ; PACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>)
+ ; PACKED-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; PACKED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; PACKED-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32)
+ ; PACKED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL1]]
+ ; PACKED-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+ ; PACKED-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>)
; PACKED-NEXT: $vgpr1 = COPY [[BITCAST3]](<2 x s16>)
; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
%res = call { <3 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v3f16i32s.i32(i32 0, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.store.2d.d16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.store.2d.d16.ll
index 1d84858080dac..862d88de3b5c4 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.store.2d.d16.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.store.2d.d16.ll
@@ -1,8 +1,8 @@
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tonga -stop-after=legalizer -global-isel-abort=0 -o - %s | FileCheck -check-prefix=UNPACKED %s
-; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx810 -stop-after=legalizer -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX81 %s
-; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -stop-after=legalizer -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s
-; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -stop-after=legalizer -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX10 %s
+; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tonga -stop-after=legalizer -o - %s | FileCheck -check-prefix=UNPACKED %s
+; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx810 -stop-after=legalizer -o - %s | FileCheck -check-prefix=GFX81 %s
+; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -stop-after=legalizer -o - %s | FileCheck -check-prefix=GFX9 %s
+; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -stop-after=legalizer -o - %s | FileCheck -check-prefix=GFX10 %s
define amdgpu_ps void @image_store_f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t, half %data) {
; UNPACKED-LABEL: name: image_store_f16
@@ -195,14 +195,12 @@ define amdgpu_ps void @image_store_v3f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t,
; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
; UNPACKED-NEXT: [[COPY10:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2
; UNPACKED-NEXT: [[COPY11:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3
- ; UNPACKED-NEXT: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
- ; UNPACKED-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY10]](<2 x s16>), [[COPY11]](<2 x s16>), [[DEF]](<2 x s16>)
- ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[CONCAT_VECTORS]](<6 x s16>)
- ; UNPACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](s96)
+ ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY10]](<2 x s16>)
; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; UNPACKED-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
+ ; UNPACKED-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
+ ; UNPACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY11]](<2 x s16>)
; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
- ; UNPACKED-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[LSHR]](s32), [[UV1]](s32)
+ ; UNPACKED-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[BITCAST]](s32), [[LSHR]](s32), [[BITCAST1]](s32)
; UNPACKED-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[BUILD_VECTOR2]](<3 x s32>), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store (<3 x s16>) into custom "ImageResource", align 8)
; UNPACKED-NEXT: S_ENDPGM 0
; GFX81-LABEL: name: image_store_v3f16
@@ -222,29 +220,27 @@ define amdgpu_ps void @image_store_v3f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t,
; GFX81-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
; GFX81-NEXT: [[COPY10:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2
; GFX81-NEXT: [[COPY11:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3
- ; GFX81-NEXT: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
- ; GFX81-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY10]](<2 x s16>), [[COPY11]](<2 x s16>), [[DEF]](<2 x s16>)
- ; GFX81-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[CONCAT_VECTORS]](<6 x s16>)
- ; GFX81-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](s96)
+ ; GFX81-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY10]](<2 x s16>)
; GFX81-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; GFX81-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
+ ; GFX81-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
+ ; GFX81-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY11]](<2 x s16>)
; GFX81-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
; GFX81-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
- ; GFX81-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C1]]
+ ; GFX81-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]]
; GFX81-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]]
; GFX81-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
; GFX81-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
- ; GFX81-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
- ; GFX81-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C1]]
+ ; GFX81-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; GFX81-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]]
; GFX81-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; GFX81-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32)
; GFX81-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
- ; GFX81-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+ ; GFX81-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
; GFX81-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL1]]
- ; GFX81-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
- ; GFX81-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>)
- ; GFX81-NEXT: [[BITCAST4:%[0-9]+]]:_(<3 x s32>) = G_BITCAST [[CONCAT_VECTORS1]](<6 x s16>)
- ; GFX81-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[BITCAST4]](<3 x s32>), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store (<3 x s16>) into custom "ImageResource", align 8)
+ ; GFX81-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+ ; GFX81-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>)
+ ; GFX81-NEXT: [[BITCAST5:%[0-9]+]]:_(<3 x s32>) = G_BITCAST [[CONCAT_VECTORS]](<6 x s16>)
+ ; GFX81-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[BITCAST5]](<3 x s32>), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store (<3 x s16>) into custom "ImageResource", align 8)
; GFX81-NEXT: S_ENDPGM 0
; GFX9-LABEL: name: image_store_v3f16
; GFX9: bb.1 (%ir-block.0):
@@ -263,19 +259,16 @@ define amdgpu_ps void @image_store_v3f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t,
; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2
; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3
- ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
- ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY10]](<2 x s16>), [[COPY11]](<2 x s16>), [[DEF]](<2 x s16>)
- ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[CONCAT_VECTORS]](<6 x s16>)
- ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](s96)
+ ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY10]](<2 x s16>)
; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
- ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[UV]](s32), [[LSHR]](s32)
- ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
- ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[UV1]](s32), [[DEF1]](s32)
- ; GFX9-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[DEF]](<2 x s16>)
- ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(<3 x s16>), [[UV4:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<6 x s16>)
+ ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
+ ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY11]](<2 x s16>)
; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
- ; GFX9-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[UV3]](<3 x s16>), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store (<3 x s16>) into custom "ImageResource", align 8)
+ ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST]](s32), [[LSHR]](s32)
+ ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST1]](s32), [[DEF]](s32)
+ ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
+ ; GFX9-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[CONCAT_VECTORS]](<4 x s16>), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store (<3 x s16>) into custom "ImageResource", align 8)
; GFX9-NEXT: S_ENDPGM 0
; GFX10-LABEL: name: image_store_v3f16
; GFX10: bb.1 (%ir-block.0):
@@ -294,19 +287,16 @@ define amdgpu_ps void @image_store_v3f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t,
; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2
; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3
- ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
- ; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY10]](<2 x s16>), [[COPY11]](<2 x s16>), [[DEF]](<2 x s16>)
- ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[CONCAT_VECTORS]](<6 x s16>)
- ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](s96)
+ ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY10]](<2 x s16>)
; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; GFX10-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
- ; GFX10-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[UV]](s32), [[LSHR]](s32)
- ; GFX10-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
- ; GFX10-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[UV1]](s32), [[DEF1]](s32)
- ; GFX10-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[DEF]](<2 x s16>)
- ; GFX10-NEXT: [[UV3:%[0-9]+]]:_(<3 x s16>), [[UV4:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<6 x s16>)
+ ; GFX10-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
+ ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY11]](<2 x s16>)
; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
- ; GFX10-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[UV3]](<3 x s16>), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store (<3 x s16>) into custom "ImageResource", align 8)
+ ; GFX10-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST]](s32), [[LSHR]](s32)
+ ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; GFX10-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST1]](s32), [[DEF]](s32)
+ ; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
+ ; GFX10-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[CONCAT_VECTORS]](<4 x s16>), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store (<3 x s16>) into custom "ImageResource", align 8)
; GFX10-NEXT: S_ENDPGM 0
call void @llvm.amdgcn.image.store.2d.v3f16.i32(<3 x half> %in, i32 7, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
ret void
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.s.buffer.load.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.s.buffer.load.mir
index 97d0d92cbe3ac..27b6aaed74a30 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.s.buffer.load.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.s.buffer.load.mir
@@ -30,10 +30,9 @@ body: |
; GCN: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; GCN-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[COPY]](<4 x s32>), [[C]](s32), 0 :: (dereferenceable invariant load (s96), align 4)
- ; GCN-NEXT: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF
- ; GCN-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s32>) = G_CONCAT_VECTORS [[AMDGPU_S_BUFFER_LOAD]](<4 x s32>), [[DEF]](<4 x s32>), [[DEF]](<4 x s32>)
- ; GCN-NEXT: [[UV:%[0-9]+]]:_(<3 x s32>), [[UV1:%[0-9]+]]:_(<3 x s32>), [[UV2:%[0-9]+]]:_(<3 x s32>), [[UV3:%[0-9]+]]:_(<3 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s32>)
- ; GCN-NEXT: S_ENDPGM 0, implicit [[UV]](<3 x s32>)
+ ; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_S_BUFFER_LOAD]](<4 x s32>)
+ ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32)
+ ; GCN-NEXT: S_ENDPGM 0, implicit [[BUILD_VECTOR]](<3 x s32>)
%0:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
%1:_(s32) = G_CONSTANT i32 0
%2:_(<3 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.s.buffer.load), %0, %1, 0
@@ -51,10 +50,9 @@ body: |
; GCN: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; GCN-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[COPY]](<4 x s32>), [[C]](s32), 0 :: (dereferenceable invariant load (s96), align 4)
- ; GCN-NEXT: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF
- ; GCN-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s32>) = G_CONCAT_VECTORS [[AMDGPU_S_BUFFER_LOAD]](<4 x s32>), [[DEF]](<4 x s32>), [[DEF]](<4 x s32>)
- ; GCN-NEXT: [[UV:%[0-9]+]]:_(<3 x s32>), [[UV1:%[0-9]+]]:_(<3 x s32>), [[UV2:%[0-9]+]]:_(<3 x s32>), [[UV3:%[0-9]+]]:_(<3 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s32>)
- ; GCN-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x p3>) = G_BITCAST [[UV]](<3 x s32>)
+ ; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_S_BUFFER_LOAD]](<4 x s32>)
+ ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32)
+ ; GCN-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x p3>) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
; GCN-NEXT: S_ENDPGM 0, implicit [[BITCAST]](<3 x p3>)
%0:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
%1:_(s32) = G_CONSTANT i32 0
@@ -73,10 +71,9 @@ body: |
; GCN: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; GCN-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[COPY]](<4 x s32>), [[C]](s32), 0 :: (dereferenceable invariant load (s96), align 4)
- ; GCN-NEXT: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF
- ; GCN-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s32>) = G_CONCAT_VECTORS [[AMDGPU_S_BUFFER_LOAD]](<4 x s32>), [[DEF]](<4 x s32>), [[DEF]](<4 x s32>)
- ; GCN-NEXT: [[UV:%[0-9]+]]:_(<3 x s32>), [[UV1:%[0-9]+]]:_(<3 x s32>), [[UV2:%[0-9]+]]:_(<3 x s32>), [[UV3:%[0-9]+]]:_(<3 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s32>)
- ; GCN-NEXT: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[UV]](<3 x s32>)
+ ; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_S_BUFFER_LOAD]](<4 x s32>)
+ ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32)
+ ; GCN-NEXT: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
; GCN-NEXT: S_ENDPGM 0, implicit [[BITCAST]](<6 x s16>)
%0:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
%1:_(s32) = G_CONSTANT i32 0
@@ -95,10 +92,9 @@ body: |
; GCN: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; GCN-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:_(<8 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[COPY]](<4 x s32>), [[C]](s32), 0 :: (dereferenceable invariant load (s192), align 4)
- ; GCN-NEXT: [[DEF:%[0-9]+]]:_(<8 x s32>) = G_IMPLICIT_DEF
- ; GCN-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<24 x s32>) = G_CONCAT_VECTORS [[AMDGPU_S_BUFFER_LOAD]](<8 x s32>), [[DEF]](<8 x s32>), [[DEF]](<8 x s32>)
- ; GCN-NEXT: [[UV:%[0-9]+]]:_(<6 x s32>), [[UV1:%[0-9]+]]:_(<6 x s32>), [[UV2:%[0-9]+]]:_(<6 x s32>), [[UV3:%[0-9]+]]:_(<6 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<24 x s32>)
- ; GCN-NEXT: S_ENDPGM 0, implicit [[UV]](<6 x s32>)
+ ; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_S_BUFFER_LOAD]](<8 x s32>)
+ ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<6 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32), [[UV4]](s32), [[UV5]](s32)
+ ; GCN-NEXT: S_ENDPGM 0, implicit [[BUILD_VECTOR]](<6 x s32>)
%0:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
%1:_(s32) = G_CONSTANT i32 0
%2:_(<6 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.s.buffer.load), %0, %1, 0
@@ -116,10 +112,9 @@ body: |
; GCN: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; GCN-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:_(<4 x s64>) = G_AMDGPU_S_BUFFER_LOAD [[COPY]](<4 x s32>), [[C]](s32), 0 :: (dereferenceable invariant load (s192), align 4)
- ; GCN-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
- ; GCN-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s64>) = G_CONCAT_VECTORS [[AMDGPU_S_BUFFER_LOAD]](<4 x s64>), [[DEF]](<4 x s64>), [[DEF]](<4 x s64>)
- ; GCN-NEXT: [[UV:%[0-9]+]]:_(<3 x s64>), [[UV1:%[0-9]+]]:_(<3 x s64>), [[UV2:%[0-9]+]]:_(<3 x s64>), [[UV3:%[0-9]+]]:_(<3 x s64>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s64>)
- ; GCN-NEXT: S_ENDPGM 0, implicit [[UV]](<3 x s64>)
+ ; GCN-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[AMDGPU_S_BUFFER_LOAD]](<4 x s64>)
+ ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[UV2]](s64)
+ ; GCN-NEXT: S_ENDPGM 0, implicit [[BUILD_VECTOR]](<3 x s64>)
%0:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
%1:_(s32) = G_CONSTANT i32 0
%2:_(<3 x s64>) = G_INTRINSIC intrinsic(@llvm.amdgcn.s.buffer.load), %0, %1, 0
@@ -137,24 +132,21 @@ body: |
; GCN: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; GCN-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[COPY]](<4 x s32>), [[C]](s32), 0 :: (dereferenceable invariant load (s96), align 4)
- ; GCN-NEXT: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF
- ; GCN-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s32>) = G_CONCAT_VECTORS [[AMDGPU_S_BUFFER_LOAD]](<4 x s32>), [[DEF]](<4 x s32>), [[DEF]](<4 x s32>)
- ; GCN-NEXT: [[UV:%[0-9]+]]:_(<3 x s32>), [[UV1:%[0-9]+]]:_(<3 x s32>), [[UV2:%[0-9]+]]:_(<3 x s32>), [[UV3:%[0-9]+]]:_(<3 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s32>)
- ; GCN-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](<3 x s32>)
+ ; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_S_BUFFER_LOAD]](<4 x s32>)
; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
- ; GCN-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C1]](s32)
+ ; GCN-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32)
; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; GCN-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C2]](s32)
+ ; GCN-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32)
; GCN-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
- ; GCN-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C3]](s32)
- ; GCN-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C1]](s32)
- ; GCN-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C2]](s32)
- ; GCN-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C3]](s32)
- ; GCN-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C1]](s32)
- ; GCN-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C2]](s32)
- ; GCN-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C3]](s32)
+ ; GCN-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C3]](s32)
+ ; GCN-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32)
+ ; GCN-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32)
+ ; GCN-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C3]](s32)
+ ; GCN-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C1]](s32)
+ ; GCN-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C2]](s32)
+ ; GCN-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C3]](s32)
; GCN-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
- ; GCN-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV4]], [[C4]]
+ ; GCN-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C4]]
; GCN-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C4]]
; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
@@ -164,7 +156,7 @@ body: |
; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
; GCN-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
- ; GCN-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[UV5]], [[C4]]
+ ; GCN-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C4]]
; GCN-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C4]]
; GCN-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32)
; GCN-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
@@ -174,7 +166,7 @@ body: |
; GCN-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32)
; GCN-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
; GCN-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
- ; GCN-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[UV6]], [[C4]]
+ ; GCN-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C4]]
; GCN-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LSHR6]], [[C4]]
; GCN-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C2]](s32)
; GCN-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL4]]
@@ -184,8 +176,8 @@ body: |
; GCN-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C2]](s32)
; GCN-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND10]], [[SHL5]]
; GCN-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32)
- ; GCN-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>)
- ; GCN-NEXT: S_ENDPGM 0, implicit [[CONCAT_VECTORS1]](<12 x s16>)
+ ; GCN-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>)
+ ; GCN-NEXT: S_ENDPGM 0, implicit [[CONCAT_VECTORS]](<12 x s16>)
%0:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
%1:_(s32) = G_CONSTANT i32 0
%2:_(<12 x s8>) = G_INTRINSIC intrinsic(@llvm.amdgcn.s.buffer.load), %0, %1, 0
@@ -204,10 +196,9 @@ body: |
; GCN: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; GCN-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[COPY]](<4 x s32>), [[C]](s32), 0 :: (dereferenceable invariant load (s96), align 4)
- ; GCN-NEXT: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF
- ; GCN-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s32>) = G_CONCAT_VECTORS [[AMDGPU_S_BUFFER_LOAD]](<4 x s32>), [[DEF]](<4 x s32>), [[DEF]](<4 x s32>)
- ; GCN-NEXT: [[UV:%[0-9]+]]:_(<3 x s32>), [[UV1:%[0-9]+]]:_(<3 x s32>), [[UV2:%[0-9]+]]:_(<3 x s32>), [[UV3:%[0-9]+]]:_(<3 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s32>)
- ; GCN-NEXT: S_ENDPGM 0, implicit [[UV]](<3 x s32>)
+ ; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_S_BUFFER_LOAD]](<4 x s32>)
+ ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32)
+ ; GCN-NEXT: S_ENDPGM 0, implicit [[BUILD_VECTOR]](<3 x s32>)
%0:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
%1:_(s32) = G_CONSTANT i32 0
%2:_(<3 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.s.buffer.load), %0, %1, 0
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-constant.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-constant.mir
index a95c47eb578c9..067e66444ac67 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-constant.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-constant.mir
@@ -1160,16 +1160,10 @@ body: |
; CI-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p4) :: (load (<3 x s32>) from unknown-address + 16, align 4, addrspace 4)
; CI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>)
; CI-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<3 x s32>)
- ; CI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
- ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32)
- ; CI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV3]](s32), [[UV4]](s32), [[UV5]](s32)
- ; CI-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV6]](s32), [[DEF]](s32), [[DEF]](s32)
- ; CI-NEXT: [[DEF1:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF
- ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<21 x s32>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<3 x s32>), [[BUILD_VECTOR1]](<3 x s32>), [[BUILD_VECTOR2]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>)
- ; CI-NEXT: [[UV7:%[0-9]+]]:_(<7 x s32>), [[UV8:%[0-9]+]]:_(<7 x s32>), [[UV9:%[0-9]+]]:_(<7 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<21 x s32>)
- ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s224) = G_BITCAST [[UV7]](<7 x s32>)
- ; CI-NEXT: [[DEF2:%[0-9]+]]:_(s256) = G_IMPLICIT_DEF
- ; CI-NEXT: [[INSERT:%[0-9]+]]:_(s256) = G_INSERT [[DEF2]], [[BITCAST]](s224), 0
+ ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<7 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32), [[UV4]](s32), [[UV5]](s32), [[UV6]](s32)
+ ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s224) = G_BITCAST [[BUILD_VECTOR]](<7 x s32>)
+ ; CI-NEXT: [[DEF:%[0-9]+]]:_(s256) = G_IMPLICIT_DEF
+ ; CI-NEXT: [[INSERT:%[0-9]+]]:_(s256) = G_INSERT [[DEF]], [[BITCAST]](s224), 0
; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](s256)
; VI-LABEL: name: test_load_constant_s224_align4
; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
@@ -1179,16 +1173,10 @@ body: |
; VI-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p4) :: (load (<3 x s32>) from unknown-address + 16, align 4, addrspace 4)
; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>)
; VI-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<3 x s32>)
- ; VI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
- ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32)
- ; VI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV3]](s32), [[UV4]](s32), [[UV5]](s32)
- ; VI-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV6]](s32), [[DEF]](s32), [[DEF]](s32)
- ; VI-NEXT: [[DEF1:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF
- ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<21 x s32>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<3 x s32>), [[BUILD_VECTOR1]](<3 x s32>), [[BUILD_VECTOR2]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>)
- ; VI-NEXT: [[UV7:%[0-9]+]]:_(<7 x s32>), [[UV8:%[0-9]+]]:_(<7 x s32>), [[UV9:%[0-9]+]]:_(<7 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<21 x s32>)
- ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s224) = G_BITCAST [[UV7]](<7 x s32>)
- ; VI-NEXT: [[DEF2:%[0-9]+]]:_(s256) = G_IMPLICIT_DEF
- ; VI-NEXT: [[INSERT:%[0-9]+]]:_(s256) = G_INSERT [[DEF2]], [[BITCAST]](s224), 0
+ ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<7 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32), [[UV4]](s32), [[UV5]](s32), [[UV6]](s32)
+ ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s224) = G_BITCAST [[BUILD_VECTOR]](<7 x s32>)
+ ; VI-NEXT: [[DEF:%[0-9]+]]:_(s256) = G_IMPLICIT_DEF
+ ; VI-NEXT: [[INSERT:%[0-9]+]]:_(s256) = G_INSERT [[DEF]], [[BITCAST]](s224), 0
; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](s256)
; GFX9-LABEL: name: test_load_constant_s224_align4
; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
@@ -1198,16 +1186,10 @@ body: |
; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p4) :: (load (<3 x s32>) from unknown-address + 16, align 4, addrspace 4)
; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>)
; GFX9-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<3 x s32>)
- ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
- ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32)
- ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV3]](s32), [[UV4]](s32), [[UV5]](s32)
- ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV6]](s32), [[DEF]](s32), [[DEF]](s32)
- ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF
- ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<21 x s32>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<3 x s32>), [[BUILD_VECTOR1]](<3 x s32>), [[BUILD_VECTOR2]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>)
- ; GFX9-NEXT: [[UV7:%[0-9]+]]:_(<7 x s32>), [[UV8:%[0-9]+]]:_(<7 x s32>), [[UV9:%[0-9]+]]:_(<7 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<21 x s32>)
- ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s224) = G_BITCAST [[UV7]](<7 x s32>)
- ; GFX9-NEXT: [[DEF2:%[0-9]+]]:_(s256) = G_IMPLICIT_DEF
- ; GFX9-NEXT: [[INSERT:%[0-9]+]]:_(s256) = G_INSERT [[DEF2]], [[BITCAST]](s224), 0
+ ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<7 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32), [[UV4]](s32), [[UV5]](s32), [[UV6]](s32)
+ ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s224) = G_BITCAST [[BUILD_VECTOR]](<7 x s32>)
+ ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s256) = G_IMPLICIT_DEF
+ ; GFX9-NEXT: [[INSERT:%[0-9]+]]:_(s256) = G_INSERT [[DEF]], [[BITCAST]](s224), 0
; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](s256)
%0:_(p4) = COPY $vgpr0_vgpr1
%1:_(s224) = G_LOAD %0 :: (load (s224), align 4, addrspace 4)
@@ -3364,12 +3346,12 @@ body: |
; CI-LABEL: name: test_load_constant_v3s16_align8
; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
; CI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p4) :: (load (<4 x s16>), addrspace 4)
- ; CI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
; CI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>)
; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; CI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+ ; CI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
; CI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
; CI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
; CI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
@@ -3395,12 +3377,12 @@ body: |
; VI-LABEL: name: test_load_constant_v3s16_align8
; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p4) :: (load (<4 x s16>), addrspace 4)
- ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>)
; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+ ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
; VI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
@@ -3426,12 +3408,12 @@ body: |
; GFX9-LABEL: name: test_load_constant_v3s16_align8
; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p4) :: (load (<4 x s16>), addrspace 4)
- ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>)
; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+ ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
; GFX9-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
@@ -4935,24 +4917,27 @@ body: |
; CI-LABEL: name: test_load_constant_v3s64_align32
; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
; CI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p4) :: (load (<4 x s64>), addrspace 4)
- ; CI-NEXT: [[EXTRACT:%[0-9]+]]:_(<3 x s64>) = G_EXTRACT [[LOAD]](<4 x s64>), 0
+ ; CI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<4 x s64>)
; CI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
- ; CI-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s64>), 0
- ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>)
+ ; CI-NEXT: [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64), [[UV6:%[0-9]+]]:_(s64), [[UV7:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>)
+ ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[UV2]](s64), [[UV7]](s64)
+ ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>)
; VI-LABEL: name: test_load_constant_v3s64_align32
; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p4) :: (load (<4 x s64>), addrspace 4)
- ; VI-NEXT: [[EXTRACT:%[0-9]+]]:_(<3 x s64>) = G_EXTRACT [[LOAD]](<4 x s64>), 0
+ ; VI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<4 x s64>)
; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
- ; VI-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s64>), 0
- ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>)
+ ; VI-NEXT: [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64), [[UV6:%[0-9]+]]:_(s64), [[UV7:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>)
+ ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[UV2]](s64), [[UV7]](s64)
+ ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>)
; GFX9-LABEL: name: test_load_constant_v3s64_align32
; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p4) :: (load (<4 x s64>), addrspace 4)
- ; GFX9-NEXT: [[EXTRACT:%[0-9]+]]:_(<3 x s64>) = G_EXTRACT [[LOAD]](<4 x s64>), 0
+ ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<4 x s64>)
; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
- ; GFX9-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s64>), 0
- ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>)
+ ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64), [[UV6:%[0-9]+]]:_(s64), [[UV7:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>)
+ ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[UV2]](s64), [[UV7]](s64)
+ ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>)
%0:_(p4) = COPY $vgpr0_vgpr1
%1:_(<3 x s64>) = G_LOAD %0 :: (load (<3 x s64>), align 32, addrspace 4)
%2:_(<4 x s64>) = G_IMPLICIT_DEF
@@ -4973,10 +4958,10 @@ body: |
; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p4) :: (load (s64) from unknown-address + 16, addrspace 4)
; CI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>)
- ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64)
; CI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
- ; CI-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0
- ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>)
+ ; CI-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>)
+ ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64)
+ ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>)
; VI-LABEL: name: test_load_constant_v3s64_align8
; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p4) :: (load (<2 x s64>), align 8, addrspace 4)
@@ -4984,10 +4969,10 @@ body: |
; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p4) :: (load (s64) from unknown-address + 16, addrspace 4)
; VI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>)
- ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64)
; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
- ; VI-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0
- ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>)
+ ; VI-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>)
+ ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64)
+ ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>)
; GFX9-LABEL: name: test_load_constant_v3s64_align8
; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p4) :: (load (<2 x s64>), align 8, addrspace 4)
@@ -4995,10 +4980,10 @@ body: |
; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p4) :: (load (s64) from unknown-address + 16, addrspace 4)
; GFX9-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>)
- ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64)
; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
- ; GFX9-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0
- ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>)
+ ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>)
+ ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64)
+ ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>)
%0:_(p4) = COPY $vgpr0_vgpr1
%1:_(<3 x s64>) = G_LOAD %0 :: (load (<3 x s64>), align 8, addrspace 4)
%2:_(<4 x s64>) = G_IMPLICIT_DEF
@@ -5119,10 +5104,10 @@ body: |
; CI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C5]](s32)
; CI-NEXT: [[SHL20:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT2]], [[COPY2]](s32)
; CI-NEXT: [[OR20:%[0-9]+]]:_(s64) = G_OR [[SHL20]], [[ZEXT2]]
- ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64), [[OR20]](s64)
; CI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
- ; CI-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0
- ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>)
+ ; CI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>)
+ ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64), [[OR20]](s64), [[UV3]](s64)
+ ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>)
; VI-LABEL: name: test_load_constant_v3s64_align1
; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p4) :: (load (s8), addrspace 4)
@@ -5230,10 +5215,10 @@ body: |
; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C5]](s32)
; VI-NEXT: [[SHL20:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT2]], [[COPY2]](s32)
; VI-NEXT: [[OR20:%[0-9]+]]:_(s64) = G_OR [[SHL20]], [[ZEXT2]]
- ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64), [[OR20]](s64)
; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
- ; VI-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0
- ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>)
+ ; VI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>)
+ ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64), [[OR20]](s64), [[UV3]](s64)
+ ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>)
; GFX9-LABEL: name: test_load_constant_v3s64_align1
; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
; GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p4) :: (load (s8), addrspace 4)
@@ -5341,10 +5326,10 @@ body: |
; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C5]](s32)
; GFX9-NEXT: [[SHL20:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT2]], [[COPY2]](s32)
; GFX9-NEXT: [[OR20:%[0-9]+]]:_(s64) = G_OR [[SHL20]], [[ZEXT2]]
- ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64), [[OR20]](s64)
; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
- ; GFX9-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0
- ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>)
+ ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>)
+ ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64), [[OR20]](s64), [[UV3]](s64)
+ ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>)
%0:_(p4) = COPY $vgpr0_vgpr1
%1:_(<3 x s64>) = G_LOAD %0 :: (load (<3 x s64>), align 1, addrspace 4)
%2:_(<4 x s64>) = G_IMPLICIT_DEF
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-flat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-flat.mir
index e08cfd5ffa096..7b047ddd27e33 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-flat.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-flat.mir
@@ -1010,16 +1010,10 @@ body: |
; CI-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p0) :: (load (<3 x s32>) from unknown-address + 16, align 4)
; CI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>)
; CI-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<3 x s32>)
- ; CI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
- ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32)
- ; CI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV3]](s32), [[UV4]](s32), [[UV5]](s32)
- ; CI-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV6]](s32), [[DEF]](s32), [[DEF]](s32)
- ; CI-NEXT: [[DEF1:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF
- ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<21 x s32>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<3 x s32>), [[BUILD_VECTOR1]](<3 x s32>), [[BUILD_VECTOR2]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>)
- ; CI-NEXT: [[UV7:%[0-9]+]]:_(<7 x s32>), [[UV8:%[0-9]+]]:_(<7 x s32>), [[UV9:%[0-9]+]]:_(<7 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<21 x s32>)
- ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s224) = G_BITCAST [[UV7]](<7 x s32>)
- ; CI-NEXT: [[DEF2:%[0-9]+]]:_(s256) = G_IMPLICIT_DEF
- ; CI-NEXT: [[INSERT:%[0-9]+]]:_(s256) = G_INSERT [[DEF2]], [[BITCAST]](s224), 0
+ ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<7 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32), [[UV4]](s32), [[UV5]](s32), [[UV6]](s32)
+ ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s224) = G_BITCAST [[BUILD_VECTOR]](<7 x s32>)
+ ; CI-NEXT: [[DEF:%[0-9]+]]:_(s256) = G_IMPLICIT_DEF
+ ; CI-NEXT: [[INSERT:%[0-9]+]]:_(s256) = G_INSERT [[DEF]], [[BITCAST]](s224), 0
; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](s256)
; VI-LABEL: name: test_load_flat_s224_align4
; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
@@ -1029,16 +1023,10 @@ body: |
; VI-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p0) :: (load (<3 x s32>) from unknown-address + 16, align 4)
; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>)
; VI-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<3 x s32>)
- ; VI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
- ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32)
- ; VI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV3]](s32), [[UV4]](s32), [[UV5]](s32)
- ; VI-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV6]](s32), [[DEF]](s32), [[DEF]](s32)
- ; VI-NEXT: [[DEF1:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF
- ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<21 x s32>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<3 x s32>), [[BUILD_VECTOR1]](<3 x s32>), [[BUILD_VECTOR2]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>)
- ; VI-NEXT: [[UV7:%[0-9]+]]:_(<7 x s32>), [[UV8:%[0-9]+]]:_(<7 x s32>), [[UV9:%[0-9]+]]:_(<7 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<21 x s32>)
- ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s224) = G_BITCAST [[UV7]](<7 x s32>)
- ; VI-NEXT: [[DEF2:%[0-9]+]]:_(s256) = G_IMPLICIT_DEF
- ; VI-NEXT: [[INSERT:%[0-9]+]]:_(s256) = G_INSERT [[DEF2]], [[BITCAST]](s224), 0
+ ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<7 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32), [[UV4]](s32), [[UV5]](s32), [[UV6]](s32)
+ ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s224) = G_BITCAST [[BUILD_VECTOR]](<7 x s32>)
+ ; VI-NEXT: [[DEF:%[0-9]+]]:_(s256) = G_IMPLICIT_DEF
+ ; VI-NEXT: [[INSERT:%[0-9]+]]:_(s256) = G_INSERT [[DEF]], [[BITCAST]](s224), 0
; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](s256)
; GFX9-LABEL: name: test_load_flat_s224_align4
; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
@@ -1048,16 +1036,10 @@ body: |
; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p0) :: (load (<3 x s32>) from unknown-address + 16, align 4)
; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>)
; GFX9-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<3 x s32>)
- ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
- ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32)
- ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV3]](s32), [[UV4]](s32), [[UV5]](s32)
- ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV6]](s32), [[DEF]](s32), [[DEF]](s32)
- ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF
- ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<21 x s32>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<3 x s32>), [[BUILD_VECTOR1]](<3 x s32>), [[BUILD_VECTOR2]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>)
- ; GFX9-NEXT: [[UV7:%[0-9]+]]:_(<7 x s32>), [[UV8:%[0-9]+]]:_(<7 x s32>), [[UV9:%[0-9]+]]:_(<7 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<21 x s32>)
- ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s224) = G_BITCAST [[UV7]](<7 x s32>)
- ; GFX9-NEXT: [[DEF2:%[0-9]+]]:_(s256) = G_IMPLICIT_DEF
- ; GFX9-NEXT: [[INSERT:%[0-9]+]]:_(s256) = G_INSERT [[DEF2]], [[BITCAST]](s224), 0
+ ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<7 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32), [[UV4]](s32), [[UV5]](s32), [[UV6]](s32)
+ ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s224) = G_BITCAST [[BUILD_VECTOR]](<7 x s32>)
+ ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s256) = G_IMPLICIT_DEF
+ ; GFX9-NEXT: [[INSERT:%[0-9]+]]:_(s256) = G_INSERT [[DEF]], [[BITCAST]](s224), 0
; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](s256)
%0:_(p0) = COPY $vgpr0_vgpr1
%1:_(s224) = G_LOAD %0 :: (load (s224), align 4, addrspace 0)
@@ -3510,12 +3492,12 @@ body: |
; CI-LABEL: name: test_load_flat_v3s16_align8
; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
; CI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p0) :: (load (<4 x s16>))
- ; CI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
; CI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>)
; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; CI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+ ; CI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
; CI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
; CI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
; CI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
@@ -3541,12 +3523,12 @@ body: |
; VI-LABEL: name: test_load_flat_v3s16_align8
; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p0) :: (load (<4 x s16>))
- ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>)
; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+ ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
; VI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
@@ -3572,12 +3554,12 @@ body: |
; GFX9-LABEL: name: test_load_flat_v3s16_align8
; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p0) :: (load (<4 x s16>))
- ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>)
; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+ ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
; GFX9-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
@@ -4944,10 +4926,10 @@ body: |
; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p0) :: (load (s64) from unknown-address + 16, align 16)
; CI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>)
- ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64)
; CI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
- ; CI-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0
- ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>)
+ ; CI-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>)
+ ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64)
+ ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>)
; VI-LABEL: name: test_load_flat_v3s64_align32
; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load (<2 x s64>), align 32)
@@ -4955,10 +4937,10 @@ body: |
; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p0) :: (load (s64) from unknown-address + 16, align 16)
; VI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>)
- ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64)
; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
- ; VI-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0
- ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>)
+ ; VI-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>)
+ ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64)
+ ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>)
; GFX9-LABEL: name: test_load_flat_v3s64_align32
; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load (<2 x s64>), align 32)
@@ -4966,10 +4948,10 @@ body: |
; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p0) :: (load (s64) from unknown-address + 16, align 16)
; GFX9-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>)
- ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64)
; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
- ; GFX9-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0
- ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>)
+ ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>)
+ ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64)
+ ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>)
%0:_(p0) = COPY $vgpr0_vgpr1
%1:_(<3 x s64>) = G_LOAD %0 :: (load (<3 x s64>), align 32, addrspace 0)
%2:_(<4 x s64>) = G_IMPLICIT_DEF
@@ -4990,10 +4972,10 @@ body: |
; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p0) :: (load (s64) from unknown-address + 16)
; CI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>)
- ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64)
; CI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
- ; CI-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0
- ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>)
+ ; CI-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>)
+ ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64)
+ ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>)
; VI-LABEL: name: test_load_flat_v3s64_align8
; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load (<2 x s64>), align 8)
@@ -5001,10 +4983,10 @@ body: |
; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p0) :: (load (s64) from unknown-address + 16)
; VI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>)
- ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64)
; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
- ; VI-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0
- ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>)
+ ; VI-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>)
+ ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64)
+ ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>)
; GFX9-LABEL: name: test_load_flat_v3s64_align8
; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load (<2 x s64>), align 8)
@@ -5012,10 +4994,10 @@ body: |
; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p0) :: (load (s64) from unknown-address + 16)
; GFX9-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>)
- ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64)
; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
- ; GFX9-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0
- ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>)
+ ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>)
+ ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64)
+ ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>)
%0:_(p0) = COPY $vgpr0_vgpr1
%1:_(<3 x s64>) = G_LOAD %0 :: (load (<3 x s64>), align 8, addrspace 0)
%2:_(<4 x s64>) = G_IMPLICIT_DEF
@@ -5136,10 +5118,10 @@ body: |
; CI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C5]](s32)
; CI-NEXT: [[SHL20:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT2]], [[COPY2]](s32)
; CI-NEXT: [[OR20:%[0-9]+]]:_(s64) = G_OR [[SHL20]], [[ZEXT2]]
- ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64), [[OR20]](s64)
; CI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
- ; CI-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0
- ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>)
+ ; CI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>)
+ ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64), [[OR20]](s64), [[UV3]](s64)
+ ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>)
; VI-LABEL: name: test_load_flat_v3s64_align1
; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8))
@@ -5247,10 +5229,10 @@ body: |
; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C5]](s32)
; VI-NEXT: [[SHL20:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT2]], [[COPY2]](s32)
; VI-NEXT: [[OR20:%[0-9]+]]:_(s64) = G_OR [[SHL20]], [[ZEXT2]]
- ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64), [[OR20]](s64)
; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
- ; VI-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0
- ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>)
+ ; VI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>)
+ ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64), [[OR20]](s64), [[UV3]](s64)
+ ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>)
; GFX9-LABEL: name: test_load_flat_v3s64_align1
; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
; GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8))
@@ -5358,10 +5340,10 @@ body: |
; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C5]](s32)
; GFX9-NEXT: [[SHL20:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT2]], [[COPY2]](s32)
; GFX9-NEXT: [[OR20:%[0-9]+]]:_(s64) = G_OR [[SHL20]], [[ZEXT2]]
- ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64), [[OR20]](s64)
; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
- ; GFX9-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0
- ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>)
+ ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>)
+ ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64), [[OR20]](s64), [[UV3]](s64)
+ ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>)
%0:_(p0) = COPY $vgpr0_vgpr1
%1:_(<3 x s64>) = G_LOAD %0 :: (load (<3 x s64>), align 1, addrspace 0)
%2:_(<4 x s64>) = G_IMPLICIT_DEF
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-global.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-global.mir
index 70538d0097a74..acaf1dab33f8f 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-global.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-global.mir
@@ -1203,8 +1203,9 @@ body: |
; SI-LABEL: name: test_load_global_s96_align16
; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; SI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1)
- ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<3 x s32>) = G_EXTRACT [[LOAD]](<4 x s32>), 0
- ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[EXTRACT]](<3 x s32>)
+ ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>)
+ ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32)
+ ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96)
; CI-HSA-LABEL: name: test_load_global_s96_align16
; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
@@ -1775,16 +1776,10 @@ body: |
; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s32) from unknown-address + 24, addrspace 1)
; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<2 x s32>)
; SI-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>)
- ; SI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
- ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV2]](s32), [[UV3]](s32), [[UV4]](s32)
- ; SI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV5]](s32), [[UV]](s32), [[UV1]](s32)
- ; SI-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD2]](s32), [[DEF]](s32), [[DEF]](s32)
- ; SI-NEXT: [[DEF1:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF
- ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<21 x s32>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<3 x s32>), [[BUILD_VECTOR1]](<3 x s32>), [[BUILD_VECTOR2]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>)
- ; SI-NEXT: [[UV6:%[0-9]+]]:_(<7 x s32>), [[UV7:%[0-9]+]]:_(<7 x s32>), [[UV8:%[0-9]+]]:_(<7 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<21 x s32>)
- ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s224) = G_BITCAST [[UV6]](<7 x s32>)
- ; SI-NEXT: [[DEF2:%[0-9]+]]:_(s256) = G_IMPLICIT_DEF
- ; SI-NEXT: [[INSERT:%[0-9]+]]:_(s256) = G_INSERT [[DEF2]], [[BITCAST]](s224), 0
+ ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<7 x s32>) = G_BUILD_VECTOR [[UV2]](s32), [[UV3]](s32), [[UV4]](s32), [[UV5]](s32), [[UV]](s32), [[UV1]](s32), [[LOAD2]](s32)
+ ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s224) = G_BITCAST [[BUILD_VECTOR]](<7 x s32>)
+ ; SI-NEXT: [[DEF:%[0-9]+]]:_(s256) = G_IMPLICIT_DEF
+ ; SI-NEXT: [[INSERT:%[0-9]+]]:_(s256) = G_INSERT [[DEF]], [[BITCAST]](s224), 0
; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](s256)
; CI-HSA-LABEL: name: test_load_global_s224_align4
; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
@@ -1794,16 +1789,10 @@ body: |
; CI-HSA-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load (<3 x s32>) from unknown-address + 16, align 4, addrspace 1)
; CI-HSA-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>)
; CI-HSA-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<3 x s32>)
- ; CI-HSA-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
- ; CI-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32)
- ; CI-HSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV3]](s32), [[UV4]](s32), [[UV5]](s32)
- ; CI-HSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV6]](s32), [[DEF]](s32), [[DEF]](s32)
- ; CI-HSA-NEXT: [[DEF1:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF
- ; CI-HSA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<21 x s32>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<3 x s32>), [[BUILD_VECTOR1]](<3 x s32>), [[BUILD_VECTOR2]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>)
- ; CI-HSA-NEXT: [[UV7:%[0-9]+]]:_(<7 x s32>), [[UV8:%[0-9]+]]:_(<7 x s32>), [[UV9:%[0-9]+]]:_(<7 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<21 x s32>)
- ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s224) = G_BITCAST [[UV7]](<7 x s32>)
- ; CI-HSA-NEXT: [[DEF2:%[0-9]+]]:_(s256) = G_IMPLICIT_DEF
- ; CI-HSA-NEXT: [[INSERT:%[0-9]+]]:_(s256) = G_INSERT [[DEF2]], [[BITCAST]](s224), 0
+ ; CI-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<7 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32), [[UV4]](s32), [[UV5]](s32), [[UV6]](s32)
+ ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s224) = G_BITCAST [[BUILD_VECTOR]](<7 x s32>)
+ ; CI-HSA-NEXT: [[DEF:%[0-9]+]]:_(s256) = G_IMPLICIT_DEF
+ ; CI-HSA-NEXT: [[INSERT:%[0-9]+]]:_(s256) = G_INSERT [[DEF]], [[BITCAST]](s224), 0
; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](s256)
; CI-MESA-LABEL: name: test_load_global_s224_align4
; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
@@ -1813,16 +1802,10 @@ body: |
; CI-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load (<3 x s32>) from unknown-address + 16, align 4, addrspace 1)
; CI-MESA-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>)
; CI-MESA-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<3 x s32>)
- ; CI-MESA-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
- ; CI-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32)
- ; CI-MESA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV3]](s32), [[UV4]](s32), [[UV5]](s32)
- ; CI-MESA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV6]](s32), [[DEF]](s32), [[DEF]](s32)
- ; CI-MESA-NEXT: [[DEF1:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF
- ; CI-MESA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<21 x s32>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<3 x s32>), [[BUILD_VECTOR1]](<3 x s32>), [[BUILD_VECTOR2]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>)
- ; CI-MESA-NEXT: [[UV7:%[0-9]+]]:_(<7 x s32>), [[UV8:%[0-9]+]]:_(<7 x s32>), [[UV9:%[0-9]+]]:_(<7 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<21 x s32>)
- ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s224) = G_BITCAST [[UV7]](<7 x s32>)
- ; CI-MESA-NEXT: [[DEF2:%[0-9]+]]:_(s256) = G_IMPLICIT_DEF
- ; CI-MESA-NEXT: [[INSERT:%[0-9]+]]:_(s256) = G_INSERT [[DEF2]], [[BITCAST]](s224), 0
+ ; CI-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<7 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32), [[UV4]](s32), [[UV5]](s32), [[UV6]](s32)
+ ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s224) = G_BITCAST [[BUILD_VECTOR]](<7 x s32>)
+ ; CI-MESA-NEXT: [[DEF:%[0-9]+]]:_(s256) = G_IMPLICIT_DEF
+ ; CI-MESA-NEXT: [[INSERT:%[0-9]+]]:_(s256) = G_INSERT [[DEF]], [[BITCAST]](s224), 0
; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](s256)
; VI-LABEL: name: test_load_global_s224_align4
; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
@@ -1832,16 +1815,10 @@ body: |
; VI-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load (<3 x s32>) from unknown-address + 16, align 4, addrspace 1)
; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>)
; VI-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<3 x s32>)
- ; VI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
- ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32)
- ; VI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV3]](s32), [[UV4]](s32), [[UV5]](s32)
- ; VI-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV6]](s32), [[DEF]](s32), [[DEF]](s32)
- ; VI-NEXT: [[DEF1:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF
- ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<21 x s32>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<3 x s32>), [[BUILD_VECTOR1]](<3 x s32>), [[BUILD_VECTOR2]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>)
- ; VI-NEXT: [[UV7:%[0-9]+]]:_(<7 x s32>), [[UV8:%[0-9]+]]:_(<7 x s32>), [[UV9:%[0-9]+]]:_(<7 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<21 x s32>)
- ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s224) = G_BITCAST [[UV7]](<7 x s32>)
- ; VI-NEXT: [[DEF2:%[0-9]+]]:_(s256) = G_IMPLICIT_DEF
- ; VI-NEXT: [[INSERT:%[0-9]+]]:_(s256) = G_INSERT [[DEF2]], [[BITCAST]](s224), 0
+ ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<7 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32), [[UV4]](s32), [[UV5]](s32), [[UV6]](s32)
+ ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s224) = G_BITCAST [[BUILD_VECTOR]](<7 x s32>)
+ ; VI-NEXT: [[DEF:%[0-9]+]]:_(s256) = G_IMPLICIT_DEF
+ ; VI-NEXT: [[INSERT:%[0-9]+]]:_(s256) = G_INSERT [[DEF]], [[BITCAST]](s224), 0
; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](s256)
; GFX9-HSA-LABEL: name: test_load_global_s224_align4
; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
@@ -1851,16 +1828,10 @@ body: |
; GFX9-HSA-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load (<3 x s32>) from unknown-address + 16, align 4, addrspace 1)
; GFX9-HSA-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>)
; GFX9-HSA-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<3 x s32>)
- ; GFX9-HSA-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
- ; GFX9-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32)
- ; GFX9-HSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV3]](s32), [[UV4]](s32), [[UV5]](s32)
- ; GFX9-HSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV6]](s32), [[DEF]](s32), [[DEF]](s32)
- ; GFX9-HSA-NEXT: [[DEF1:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF
- ; GFX9-HSA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<21 x s32>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<3 x s32>), [[BUILD_VECTOR1]](<3 x s32>), [[BUILD_VECTOR2]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>)
- ; GFX9-HSA-NEXT: [[UV7:%[0-9]+]]:_(<7 x s32>), [[UV8:%[0-9]+]]:_(<7 x s32>), [[UV9:%[0-9]+]]:_(<7 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<21 x s32>)
- ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s224) = G_BITCAST [[UV7]](<7 x s32>)
- ; GFX9-HSA-NEXT: [[DEF2:%[0-9]+]]:_(s256) = G_IMPLICIT_DEF
- ; GFX9-HSA-NEXT: [[INSERT:%[0-9]+]]:_(s256) = G_INSERT [[DEF2]], [[BITCAST]](s224), 0
+ ; GFX9-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<7 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32), [[UV4]](s32), [[UV5]](s32), [[UV6]](s32)
+ ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s224) = G_BITCAST [[BUILD_VECTOR]](<7 x s32>)
+ ; GFX9-HSA-NEXT: [[DEF:%[0-9]+]]:_(s256) = G_IMPLICIT_DEF
+ ; GFX9-HSA-NEXT: [[INSERT:%[0-9]+]]:_(s256) = G_INSERT [[DEF]], [[BITCAST]](s224), 0
; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](s256)
; GFX9-MESA-LABEL: name: test_load_global_s224_align4
; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
@@ -1870,16 +1841,10 @@ body: |
; GFX9-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load (<3 x s32>) from unknown-address + 16, align 4, addrspace 1)
; GFX9-MESA-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>)
; GFX9-MESA-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<3 x s32>)
- ; GFX9-MESA-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
- ; GFX9-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32)
- ; GFX9-MESA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV3]](s32), [[UV4]](s32), [[UV5]](s32)
- ; GFX9-MESA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV6]](s32), [[DEF]](s32), [[DEF]](s32)
- ; GFX9-MESA-NEXT: [[DEF1:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF
- ; GFX9-MESA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<21 x s32>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<3 x s32>), [[BUILD_VECTOR1]](<3 x s32>), [[BUILD_VECTOR2]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>)
- ; GFX9-MESA-NEXT: [[UV7:%[0-9]+]]:_(<7 x s32>), [[UV8:%[0-9]+]]:_(<7 x s32>), [[UV9:%[0-9]+]]:_(<7 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<21 x s32>)
- ; GFX9-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s224) = G_BITCAST [[UV7]](<7 x s32>)
- ; GFX9-MESA-NEXT: [[DEF2:%[0-9]+]]:_(s256) = G_IMPLICIT_DEF
- ; GFX9-MESA-NEXT: [[INSERT:%[0-9]+]]:_(s256) = G_INSERT [[DEF2]], [[BITCAST]](s224), 0
+ ; GFX9-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<7 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32), [[UV4]](s32), [[UV5]](s32), [[UV6]](s32)
+ ; GFX9-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s224) = G_BITCAST [[BUILD_VECTOR]](<7 x s32>)
+ ; GFX9-MESA-NEXT: [[DEF:%[0-9]+]]:_(s256) = G_IMPLICIT_DEF
+ ; GFX9-MESA-NEXT: [[INSERT:%[0-9]+]]:_(s256) = G_INSERT [[DEF]], [[BITCAST]](s224), 0
; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](s256)
%0:_(p1) = COPY $vgpr0_vgpr1
%1:_(s224) = G_LOAD %0 :: (load (s224), align 4, addrspace 1)
@@ -5837,12 +5802,12 @@ body: |
; SI-LABEL: name: test_load_global_v3s16_align8
; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; SI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load (<4 x s16>), addrspace 1)
- ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>)
; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+ ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
; SI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
@@ -5868,12 +5833,12 @@ body: |
; CI-HSA-LABEL: name: test_load_global_v3s16_align8
; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load (<4 x s16>), addrspace 1)
- ; CI-HSA-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
; CI-HSA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>)
; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
; CI-HSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; CI-HSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
; CI-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+ ; CI-HSA-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
; CI-HSA-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
; CI-HSA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
; CI-HSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
@@ -5899,12 +5864,12 @@ body: |
; CI-MESA-LABEL: name: test_load_global_v3s16_align8
; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load (<4 x s16>), addrspace 1)
- ; CI-MESA-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
; CI-MESA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>)
; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
; CI-MESA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; CI-MESA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
; CI-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+ ; CI-MESA-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
; CI-MESA-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
; CI-MESA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
; CI-MESA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
@@ -5930,12 +5895,12 @@ body: |
; VI-LABEL: name: test_load_global_v3s16_align8
; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load (<4 x s16>), addrspace 1)
- ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>)
; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+ ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
; VI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
@@ -5961,12 +5926,12 @@ body: |
; GFX9-HSA-LABEL: name: test_load_global_v3s16_align8
; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load (<4 x s16>), addrspace 1)
- ; GFX9-HSA-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
; GFX9-HSA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>)
; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
; GFX9-HSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; GFX9-HSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
; GFX9-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+ ; GFX9-HSA-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
; GFX9-HSA-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
; GFX9-HSA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
; GFX9-HSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
@@ -5979,12 +5944,12 @@ body: |
; GFX9-MESA-LABEL: name: test_load_global_v3s16_align8
; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load (<4 x s16>), addrspace 1)
- ; GFX9-MESA-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
; GFX9-MESA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>)
; GFX9-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
; GFX9-MESA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; GFX9-MESA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
; GFX9-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+ ; GFX9-MESA-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
; GFX9-MESA-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
; GFX9-MESA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
; GFX9-MESA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
@@ -6996,7 +6961,6 @@ body: |
; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; SI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1)
; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>)
- ; SI-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s16>)
; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
@@ -7004,6 +6968,7 @@ body: |
; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
+ ; SI-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
; SI-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>)
; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>)
; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
@@ -7029,7 +6994,6 @@ body: |
; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1)
; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>)
- ; CI-HSA-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
; CI-HSA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s16>)
; CI-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
; CI-HSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
@@ -7037,6 +7001,7 @@ body: |
; CI-HSA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
; CI-HSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
; CI-HSA-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
+ ; CI-HSA-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
; CI-HSA-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>)
; CI-HSA-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>)
; CI-HSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
@@ -7062,7 +7027,6 @@ body: |
; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1)
; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>)
- ; CI-MESA-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
; CI-MESA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s16>)
; CI-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
; CI-MESA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
@@ -7070,6 +7034,7 @@ body: |
; CI-MESA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
; CI-MESA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
; CI-MESA-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
+ ; CI-MESA-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
; CI-MESA-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>)
; CI-MESA-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>)
; CI-MESA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
@@ -7095,7 +7060,6 @@ body: |
; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1)
; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>)
- ; VI-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s16>)
; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
@@ -7103,6 +7067,7 @@ body: |
; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
+ ; VI-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
; VI-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>)
; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>)
; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
@@ -7128,7 +7093,6 @@ body: |
; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1)
; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>)
- ; GFX9-HSA-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
; GFX9-HSA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s16>)
; GFX9-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
; GFX9-HSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
@@ -7136,6 +7100,7 @@ body: |
; GFX9-HSA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
; GFX9-HSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
; GFX9-HSA-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
+ ; GFX9-HSA-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
; GFX9-HSA-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>)
; GFX9-HSA-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>)
; GFX9-HSA-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST1]](s32), [[LSHR]](s32)
@@ -7148,7 +7113,6 @@ body: |
; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1)
; GFX9-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>)
- ; GFX9-MESA-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
; GFX9-MESA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s16>)
; GFX9-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
; GFX9-MESA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
@@ -7156,6 +7120,7 @@ body: |
; GFX9-MESA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
; GFX9-MESA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
; GFX9-MESA-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
+ ; GFX9-MESA-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
; GFX9-MESA-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>)
; GFX9-MESA-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>)
; GFX9-MESA-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST1]](s32), [[LSHR]](s32)
@@ -7233,21 +7198,21 @@ body: |
; CI-HSA-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
; CI-HSA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>)
; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
- ; CI-HSA-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; CI-HSA-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
- ; CI-HSA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C5]]
- ; CI-HSA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C5]]
- ; CI-HSA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+ ; CI-HSA-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+ ; CI-HSA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C4]]
+ ; CI-HSA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C4]]
+ ; CI-HSA-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; CI-HSA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C5]](s32)
; CI-HSA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
; CI-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
- ; CI-HSA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C5]]
- ; CI-HSA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C5]]
- ; CI-HSA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32)
+ ; CI-HSA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C4]]
+ ; CI-HSA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C4]]
+ ; CI-HSA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C5]](s32)
; CI-HSA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
; CI-HSA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
- ; CI-HSA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C5]]
- ; CI-HSA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C5]]
- ; CI-HSA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+ ; CI-HSA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C4]]
+ ; CI-HSA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C4]]
+ ; CI-HSA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C5]](s32)
; CI-HSA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
; CI-HSA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
; CI-HSA-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
@@ -7271,21 +7236,21 @@ body: |
; CI-MESA-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
; CI-MESA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>)
; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
- ; CI-MESA-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; CI-MESA-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
- ; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C5]]
- ; CI-MESA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C5]]
- ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+ ; CI-MESA-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+ ; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C4]]
+ ; CI-MESA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C4]]
+ ; CI-MESA-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C5]](s32)
; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
; CI-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
- ; CI-MESA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C5]]
- ; CI-MESA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C5]]
- ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32)
+ ; CI-MESA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C4]]
+ ; CI-MESA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C4]]
+ ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C5]](s32)
; CI-MESA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
; CI-MESA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
- ; CI-MESA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C5]]
- ; CI-MESA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C5]]
- ; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+ ; CI-MESA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C4]]
+ ; CI-MESA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C4]]
+ ; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C5]](s32)
; CI-MESA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
; CI-MESA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
; CI-MESA-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
@@ -7309,21 +7274,21 @@ body: |
; VI-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>)
; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
- ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; VI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
- ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C5]]
- ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C5]]
- ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+ ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+ ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C4]]
+ ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C4]]
+ ; VI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C5]](s32)
; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
- ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C5]]
- ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C5]]
- ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32)
+ ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C4]]
+ ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C4]]
+ ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C5]](s32)
; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
- ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C5]]
- ; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C5]]
- ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+ ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C4]]
+ ; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C4]]
+ ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C5]](s32)
; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
; VI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
@@ -7446,21 +7411,21 @@ body: |
; CI-HSA-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
; CI-HSA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>)
; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
- ; CI-HSA-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; CI-HSA-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
- ; CI-HSA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C5]]
- ; CI-HSA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C5]]
- ; CI-HSA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+ ; CI-HSA-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+ ; CI-HSA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C4]]
+ ; CI-HSA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C4]]
+ ; CI-HSA-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; CI-HSA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C5]](s32)
; CI-HSA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
; CI-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
- ; CI-HSA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C5]]
- ; CI-HSA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C5]]
- ; CI-HSA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32)
+ ; CI-HSA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C4]]
+ ; CI-HSA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C4]]
+ ; CI-HSA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C5]](s32)
; CI-HSA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
; CI-HSA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
- ; CI-HSA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C5]]
- ; CI-HSA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C5]]
- ; CI-HSA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+ ; CI-HSA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C4]]
+ ; CI-HSA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C4]]
+ ; CI-HSA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C5]](s32)
; CI-HSA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
; CI-HSA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
; CI-HSA-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
@@ -7484,21 +7449,21 @@ body: |
; CI-MESA-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
; CI-MESA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>)
; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
- ; CI-MESA-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; CI-MESA-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
- ; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C5]]
- ; CI-MESA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C5]]
- ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+ ; CI-MESA-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+ ; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C4]]
+ ; CI-MESA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C4]]
+ ; CI-MESA-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C5]](s32)
; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
; CI-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
- ; CI-MESA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C5]]
- ; CI-MESA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C5]]
- ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32)
+ ; CI-MESA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C4]]
+ ; CI-MESA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C4]]
+ ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C5]](s32)
; CI-MESA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
; CI-MESA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
- ; CI-MESA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C5]]
- ; CI-MESA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C5]]
- ; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+ ; CI-MESA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C4]]
+ ; CI-MESA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C4]]
+ ; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C5]](s32)
; CI-MESA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
; CI-MESA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
; CI-MESA-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
@@ -7522,21 +7487,21 @@ body: |
; VI-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>)
; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
- ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; VI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
- ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C5]]
- ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C5]]
- ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+ ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+ ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C4]]
+ ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C4]]
+ ; VI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C5]](s32)
; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
- ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C5]]
- ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C5]]
- ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32)
+ ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C4]]
+ ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C4]]
+ ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C5]](s32)
; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
- ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C5]]
- ; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C5]]
- ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+ ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C4]]
+ ; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C4]]
+ ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C5]](s32)
; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
; VI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
@@ -7625,21 +7590,21 @@ body: |
; SI-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>)
; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
- ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; SI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
- ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C5]]
- ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C5]]
- ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+ ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+ ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C4]]
+ ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C4]]
+ ; SI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C5]](s32)
; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
- ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C5]]
- ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C5]]
- ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32)
+ ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C4]]
+ ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C4]]
+ ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C5]](s32)
; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
- ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C5]]
- ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C5]]
- ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+ ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C4]]
+ ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C4]]
+ ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C5]](s32)
; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
; SI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
@@ -7663,21 +7628,21 @@ body: |
; CI-HSA-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
; CI-HSA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>)
; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
- ; CI-HSA-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; CI-HSA-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
- ; CI-HSA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C5]]
- ; CI-HSA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C5]]
- ; CI-HSA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+ ; CI-HSA-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+ ; CI-HSA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C4]]
+ ; CI-HSA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C4]]
+ ; CI-HSA-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; CI-HSA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C5]](s32)
; CI-HSA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
; CI-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
- ; CI-HSA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C5]]
- ; CI-HSA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C5]]
- ; CI-HSA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32)
+ ; CI-HSA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C4]]
+ ; CI-HSA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C4]]
+ ; CI-HSA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C5]](s32)
; CI-HSA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
; CI-HSA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
- ; CI-HSA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C5]]
- ; CI-HSA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C5]]
- ; CI-HSA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+ ; CI-HSA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C4]]
+ ; CI-HSA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C4]]
+ ; CI-HSA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C5]](s32)
; CI-HSA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
; CI-HSA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
; CI-HSA-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
@@ -7701,21 +7666,21 @@ body: |
; CI-MESA-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
; CI-MESA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>)
; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
- ; CI-MESA-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; CI-MESA-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
- ; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C5]]
- ; CI-MESA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C5]]
- ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+ ; CI-MESA-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+ ; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C4]]
+ ; CI-MESA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C4]]
+ ; CI-MESA-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C5]](s32)
; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
; CI-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
- ; CI-MESA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C5]]
- ; CI-MESA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C5]]
- ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32)
+ ; CI-MESA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C4]]
+ ; CI-MESA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C4]]
+ ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C5]](s32)
; CI-MESA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
; CI-MESA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
- ; CI-MESA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C5]]
- ; CI-MESA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C5]]
- ; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+ ; CI-MESA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C4]]
+ ; CI-MESA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C4]]
+ ; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C5]](s32)
; CI-MESA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
; CI-MESA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
; CI-MESA-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
@@ -7739,21 +7704,21 @@ body: |
; VI-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>)
; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
- ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; VI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
- ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C5]]
- ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C5]]
- ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+ ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+ ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C4]]
+ ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C4]]
+ ; VI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C5]](s32)
; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
- ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C5]]
- ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C5]]
- ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32)
+ ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C4]]
+ ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C4]]
+ ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C5]](s32)
; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
- ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C5]]
- ; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C5]]
- ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+ ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C4]]
+ ; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C4]]
+ ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C5]](s32)
; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
; VI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
@@ -7864,21 +7829,21 @@ body: |
; SI-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>)
; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
- ; SI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; SI-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
- ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C7]]
- ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[OR1]], [[C7]]
- ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C6]](s32)
+ ; SI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+ ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C6]]
+ ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[OR1]], [[C6]]
+ ; SI-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C7]](s32)
; SI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL5]]
; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32)
- ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[OR2]], [[C7]]
- ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[OR3]], [[C7]]
- ; SI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+ ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[OR2]], [[C6]]
+ ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[OR3]], [[C6]]
+ ; SI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C7]](s32)
; SI-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL6]]
; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR6]](s32)
- ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[OR4]], [[C7]]
- ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C7]]
- ; SI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C6]](s32)
+ ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[OR4]], [[C6]]
+ ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C6]]
+ ; SI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C7]](s32)
; SI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL7]]
; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR7]](s32)
; SI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
@@ -7902,21 +7867,21 @@ body: |
; CI-HSA-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
; CI-HSA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>)
; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
- ; CI-HSA-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; CI-HSA-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
- ; CI-HSA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C5]]
- ; CI-HSA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C5]]
- ; CI-HSA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+ ; CI-HSA-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+ ; CI-HSA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C4]]
+ ; CI-HSA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C4]]
+ ; CI-HSA-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; CI-HSA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C5]](s32)
; CI-HSA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
; CI-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
- ; CI-HSA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C5]]
- ; CI-HSA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C5]]
- ; CI-HSA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32)
+ ; CI-HSA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C4]]
+ ; CI-HSA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C4]]
+ ; CI-HSA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C5]](s32)
; CI-HSA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
; CI-HSA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
- ; CI-HSA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C5]]
- ; CI-HSA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C5]]
- ; CI-HSA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+ ; CI-HSA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C4]]
+ ; CI-HSA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C4]]
+ ; CI-HSA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C5]](s32)
; CI-HSA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
; CI-HSA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
; CI-HSA-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
@@ -7962,21 +7927,21 @@ body: |
; CI-MESA-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
; CI-MESA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>)
; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
- ; CI-MESA-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; CI-MESA-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
- ; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C7]]
- ; CI-MESA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[OR1]], [[C7]]
- ; CI-MESA-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C6]](s32)
+ ; CI-MESA-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+ ; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C6]]
+ ; CI-MESA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[OR1]], [[C6]]
+ ; CI-MESA-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; CI-MESA-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C7]](s32)
; CI-MESA-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL5]]
; CI-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32)
- ; CI-MESA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[OR2]], [[C7]]
- ; CI-MESA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[OR3]], [[C7]]
- ; CI-MESA-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+ ; CI-MESA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[OR2]], [[C6]]
+ ; CI-MESA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[OR3]], [[C6]]
+ ; CI-MESA-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C7]](s32)
; CI-MESA-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL6]]
; CI-MESA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR6]](s32)
- ; CI-MESA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[OR4]], [[C7]]
- ; CI-MESA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C7]]
- ; CI-MESA-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C6]](s32)
+ ; CI-MESA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[OR4]], [[C6]]
+ ; CI-MESA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C6]]
+ ; CI-MESA-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C7]](s32)
; CI-MESA-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL7]]
; CI-MESA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR7]](s32)
; CI-MESA-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
@@ -8022,21 +7987,21 @@ body: |
; VI-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>)
; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
- ; VI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; VI-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
- ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C7]]
- ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[OR1]], [[C7]]
- ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C6]](s32)
+ ; VI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+ ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C6]]
+ ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[OR1]], [[C6]]
+ ; VI-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C7]](s32)
; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL5]]
; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32)
- ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[OR2]], [[C7]]
- ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[OR3]], [[C7]]
- ; VI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+ ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[OR2]], [[C6]]
+ ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[OR3]], [[C6]]
+ ; VI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C7]](s32)
; VI-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL6]]
; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR6]](s32)
- ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[OR4]], [[C7]]
- ; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C7]]
- ; VI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C6]](s32)
+ ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[OR4]], [[C6]]
+ ; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C6]]
+ ; VI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C7]](s32)
; VI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL7]]
; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR7]](s32)
; VI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
@@ -8132,8 +8097,9 @@ body: |
; SI-LABEL: name: test_load_global_v6s16_align16
; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; SI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1)
- ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<3 x s32>) = G_EXTRACT [[LOAD]](<4 x s32>), 0
- ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[EXTRACT]](<3 x s32>)
+ ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>)
+ ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32)
+ ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x s16>)
; CI-HSA-LABEL: name: test_load_global_v6s16_align16
; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
@@ -8621,7 +8587,6 @@ body: |
; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; SI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1)
; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>)
- ; SI-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s16>)
; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
@@ -8631,6 +8596,7 @@ body: |
; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32)
; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
+ ; SI-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
; SI-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>)
; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>)
; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
@@ -8662,7 +8628,6 @@ body: |
; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1)
; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>)
- ; CI-HSA-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
; CI-HSA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s16>)
; CI-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
; CI-HSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
@@ -8672,6 +8637,7 @@ body: |
; CI-HSA-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
; CI-HSA-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32)
; CI-HSA-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
+ ; CI-HSA-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
; CI-HSA-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>)
; CI-HSA-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>)
; CI-HSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
@@ -8703,7 +8669,6 @@ body: |
; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1)
; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>)
- ; CI-MESA-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
; CI-MESA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s16>)
; CI-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
; CI-MESA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
@@ -8713,6 +8678,7 @@ body: |
; CI-MESA-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
; CI-MESA-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32)
; CI-MESA-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
+ ; CI-MESA-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
; CI-MESA-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>)
; CI-MESA-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>)
; CI-MESA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
@@ -8744,7 +8710,6 @@ body: |
; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1)
; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>)
- ; VI-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s16>)
; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
@@ -8754,6 +8719,7 @@ body: |
; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32)
; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
+ ; VI-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
; VI-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>)
; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>)
; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
@@ -8785,7 +8751,6 @@ body: |
; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1)
; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>)
- ; GFX9-HSA-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
; GFX9-HSA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s16>)
; GFX9-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
; GFX9-HSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
@@ -8795,6 +8760,7 @@ body: |
; GFX9-HSA-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
; GFX9-HSA-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32)
; GFX9-HSA-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
+ ; GFX9-HSA-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
; GFX9-HSA-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>)
; GFX9-HSA-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>)
; GFX9-HSA-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST1]](s32), [[LSHR]](s32)
@@ -8809,7 +8775,6 @@ body: |
; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1)
; GFX9-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>)
- ; GFX9-MESA-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
; GFX9-MESA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s16>)
; GFX9-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
; GFX9-MESA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
@@ -8819,6 +8784,7 @@ body: |
; GFX9-MESA-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
; GFX9-MESA-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32)
; GFX9-MESA-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
+ ; GFX9-MESA-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
; GFX9-MESA-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>)
; GFX9-MESA-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>)
; GFX9-MESA-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST1]](s32), [[LSHR]](s32)
@@ -8871,26 +8837,26 @@ body: |
; SI-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>)
; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
- ; SI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; SI-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
- ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C7]]
- ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C7]]
- ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C6]](s32)
+ ; SI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+ ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C6]]
+ ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C6]]
+ ; SI-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C7]](s32)
; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
- ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C7]]
- ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C7]]
- ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+ ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C6]]
+ ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C6]]
+ ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C7]](s32)
; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
- ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C7]]
- ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C7]]
- ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C6]](s32)
+ ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C6]]
+ ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C6]]
+ ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C7]](s32)
; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
- ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C7]]
- ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C7]]
- ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
+ ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C6]]
+ ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C6]]
+ ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C7]](s32)
; SI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
; SI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
@@ -8921,26 +8887,26 @@ body: |
; CI-HSA-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
; CI-HSA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>)
; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
- ; CI-HSA-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; CI-HSA-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
- ; CI-HSA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C7]]
- ; CI-HSA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C7]]
- ; CI-HSA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C6]](s32)
+ ; CI-HSA-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+ ; CI-HSA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C6]]
+ ; CI-HSA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C6]]
+ ; CI-HSA-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; CI-HSA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C7]](s32)
; CI-HSA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
; CI-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
- ; CI-HSA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C7]]
- ; CI-HSA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C7]]
- ; CI-HSA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+ ; CI-HSA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C6]]
+ ; CI-HSA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C6]]
+ ; CI-HSA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C7]](s32)
; CI-HSA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
; CI-HSA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
- ; CI-HSA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C7]]
- ; CI-HSA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C7]]
- ; CI-HSA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C6]](s32)
+ ; CI-HSA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C6]]
+ ; CI-HSA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C6]]
+ ; CI-HSA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C7]](s32)
; CI-HSA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
; CI-HSA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
- ; CI-HSA-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C7]]
- ; CI-HSA-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C7]]
- ; CI-HSA-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
+ ; CI-HSA-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C6]]
+ ; CI-HSA-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C6]]
+ ; CI-HSA-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C7]](s32)
; CI-HSA-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
; CI-HSA-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
; CI-HSA-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
@@ -8971,26 +8937,26 @@ body: |
; CI-MESA-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
; CI-MESA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>)
; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
- ; CI-MESA-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; CI-MESA-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
- ; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C7]]
- ; CI-MESA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C7]]
- ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C6]](s32)
+ ; CI-MESA-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+ ; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C6]]
+ ; CI-MESA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C6]]
+ ; CI-MESA-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C7]](s32)
; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
; CI-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
- ; CI-MESA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C7]]
- ; CI-MESA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C7]]
- ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+ ; CI-MESA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C6]]
+ ; CI-MESA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C6]]
+ ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C7]](s32)
; CI-MESA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
; CI-MESA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
- ; CI-MESA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C7]]
- ; CI-MESA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C7]]
- ; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C6]](s32)
+ ; CI-MESA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C6]]
+ ; CI-MESA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C6]]
+ ; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C7]](s32)
; CI-MESA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
; CI-MESA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
- ; CI-MESA-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C7]]
- ; CI-MESA-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C7]]
- ; CI-MESA-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
+ ; CI-MESA-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C6]]
+ ; CI-MESA-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C6]]
+ ; CI-MESA-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C7]](s32)
; CI-MESA-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
; CI-MESA-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
; CI-MESA-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
@@ -9021,26 +8987,26 @@ body: |
; VI-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>)
; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
- ; VI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; VI-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
- ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C7]]
- ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C7]]
- ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C6]](s32)
+ ; VI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+ ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C6]]
+ ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C6]]
+ ; VI-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C7]](s32)
; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
- ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C7]]
- ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C7]]
- ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+ ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C6]]
+ ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C6]]
+ ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C7]](s32)
; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
- ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C7]]
- ; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C7]]
- ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C6]](s32)
+ ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C6]]
+ ; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C6]]
+ ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C7]](s32)
; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
- ; VI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C7]]
- ; VI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C7]]
- ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
+ ; VI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C6]]
+ ; VI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C6]]
+ ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C7]](s32)
; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
; VI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
@@ -9153,26 +9119,26 @@ body: |
; SI-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>)
; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
- ; SI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; SI-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
- ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C7]]
- ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C7]]
- ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C6]](s32)
+ ; SI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+ ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C6]]
+ ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C6]]
+ ; SI-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C7]](s32)
; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
- ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C7]]
- ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C7]]
- ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+ ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C6]]
+ ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C6]]
+ ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C7]](s32)
; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
- ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C7]]
- ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C7]]
- ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C6]](s32)
+ ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C6]]
+ ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C6]]
+ ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C7]](s32)
; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
- ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C7]]
- ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C7]]
- ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
+ ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C6]]
+ ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C6]]
+ ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C7]](s32)
; SI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
; SI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
@@ -9203,26 +9169,26 @@ body: |
; CI-HSA-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
; CI-HSA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>)
; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
- ; CI-HSA-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; CI-HSA-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
- ; CI-HSA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C7]]
- ; CI-HSA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C7]]
- ; CI-HSA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C6]](s32)
+ ; CI-HSA-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+ ; CI-HSA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C6]]
+ ; CI-HSA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C6]]
+ ; CI-HSA-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; CI-HSA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C7]](s32)
; CI-HSA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
; CI-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
- ; CI-HSA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C7]]
- ; CI-HSA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C7]]
- ; CI-HSA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+ ; CI-HSA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C6]]
+ ; CI-HSA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C6]]
+ ; CI-HSA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C7]](s32)
; CI-HSA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
; CI-HSA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
- ; CI-HSA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C7]]
- ; CI-HSA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C7]]
- ; CI-HSA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C6]](s32)
+ ; CI-HSA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C6]]
+ ; CI-HSA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C6]]
+ ; CI-HSA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C7]](s32)
; CI-HSA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
; CI-HSA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
- ; CI-HSA-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C7]]
- ; CI-HSA-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C7]]
- ; CI-HSA-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
+ ; CI-HSA-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C6]]
+ ; CI-HSA-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C6]]
+ ; CI-HSA-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C7]](s32)
; CI-HSA-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
; CI-HSA-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
; CI-HSA-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
@@ -9253,26 +9219,26 @@ body: |
; CI-MESA-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
; CI-MESA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>)
; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
- ; CI-MESA-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; CI-MESA-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
- ; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C7]]
- ; CI-MESA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C7]]
- ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C6]](s32)
+ ; CI-MESA-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+ ; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C6]]
+ ; CI-MESA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C6]]
+ ; CI-MESA-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C7]](s32)
; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
; CI-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
- ; CI-MESA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C7]]
- ; CI-MESA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C7]]
- ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+ ; CI-MESA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C6]]
+ ; CI-MESA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C6]]
+ ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C7]](s32)
; CI-MESA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
; CI-MESA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
- ; CI-MESA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C7]]
- ; CI-MESA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C7]]
- ; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C6]](s32)
+ ; CI-MESA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C6]]
+ ; CI-MESA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C6]]
+ ; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C7]](s32)
; CI-MESA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
; CI-MESA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
- ; CI-MESA-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C7]]
- ; CI-MESA-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C7]]
- ; CI-MESA-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
+ ; CI-MESA-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C6]]
+ ; CI-MESA-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C6]]
+ ; CI-MESA-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C7]](s32)
; CI-MESA-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
; CI-MESA-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
; CI-MESA-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
@@ -9303,26 +9269,26 @@ body: |
; VI-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>)
; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
- ; VI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; VI-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
- ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C7]]
- ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C7]]
- ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C6]](s32)
+ ; VI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+ ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C6]]
+ ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C6]]
+ ; VI-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C7]](s32)
; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
- ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C7]]
- ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C7]]
- ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+ ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C6]]
+ ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C6]]
+ ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C7]](s32)
; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
- ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C7]]
- ; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C7]]
- ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C6]](s32)
+ ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C6]]
+ ; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C6]]
+ ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C7]](s32)
; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
- ; VI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C7]]
- ; VI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C7]]
- ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
+ ; VI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C6]]
+ ; VI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C6]]
+ ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C7]](s32)
; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
; VI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
@@ -9435,26 +9401,26 @@ body: |
; SI-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>)
; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
- ; SI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; SI-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
- ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C7]]
- ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C7]]
- ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C6]](s32)
+ ; SI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+ ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C6]]
+ ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C6]]
+ ; SI-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C7]](s32)
; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
- ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C7]]
- ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C7]]
- ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+ ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C6]]
+ ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C6]]
+ ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C7]](s32)
; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
- ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C7]]
- ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C7]]
- ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C6]](s32)
+ ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C6]]
+ ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C6]]
+ ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C7]](s32)
; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
- ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C7]]
- ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C7]]
- ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
+ ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C6]]
+ ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C6]]
+ ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C7]](s32)
; SI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
; SI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
@@ -9485,26 +9451,26 @@ body: |
; CI-HSA-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
; CI-HSA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>)
; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
- ; CI-HSA-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; CI-HSA-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
- ; CI-HSA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C7]]
- ; CI-HSA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C7]]
- ; CI-HSA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C6]](s32)
+ ; CI-HSA-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+ ; CI-HSA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C6]]
+ ; CI-HSA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C6]]
+ ; CI-HSA-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; CI-HSA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C7]](s32)
; CI-HSA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
; CI-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
- ; CI-HSA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C7]]
- ; CI-HSA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C7]]
- ; CI-HSA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+ ; CI-HSA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C6]]
+ ; CI-HSA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C6]]
+ ; CI-HSA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C7]](s32)
; CI-HSA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
; CI-HSA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
- ; CI-HSA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C7]]
- ; CI-HSA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C7]]
- ; CI-HSA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C6]](s32)
+ ; CI-HSA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C6]]
+ ; CI-HSA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C6]]
+ ; CI-HSA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C7]](s32)
; CI-HSA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
; CI-HSA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
- ; CI-HSA-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C7]]
- ; CI-HSA-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C7]]
- ; CI-HSA-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
+ ; CI-HSA-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C6]]
+ ; CI-HSA-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C6]]
+ ; CI-HSA-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C7]](s32)
; CI-HSA-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
; CI-HSA-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
; CI-HSA-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
@@ -9535,26 +9501,26 @@ body: |
; CI-MESA-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
; CI-MESA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>)
; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
- ; CI-MESA-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; CI-MESA-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
- ; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C7]]
- ; CI-MESA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C7]]
- ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C6]](s32)
+ ; CI-MESA-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+ ; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C6]]
+ ; CI-MESA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C6]]
+ ; CI-MESA-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C7]](s32)
; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
; CI-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
- ; CI-MESA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C7]]
- ; CI-MESA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C7]]
- ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+ ; CI-MESA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C6]]
+ ; CI-MESA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C6]]
+ ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C7]](s32)
; CI-MESA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
; CI-MESA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
- ; CI-MESA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C7]]
- ; CI-MESA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C7]]
- ; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C6]](s32)
+ ; CI-MESA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C6]]
+ ; CI-MESA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C6]]
+ ; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C7]](s32)
; CI-MESA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
; CI-MESA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
- ; CI-MESA-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C7]]
- ; CI-MESA-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C7]]
- ; CI-MESA-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
+ ; CI-MESA-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C6]]
+ ; CI-MESA-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C6]]
+ ; CI-MESA-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C7]](s32)
; CI-MESA-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
; CI-MESA-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
; CI-MESA-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
@@ -9585,26 +9551,26 @@ body: |
; VI-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>)
; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
- ; VI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; VI-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
- ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C7]]
- ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C7]]
- ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C6]](s32)
+ ; VI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+ ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C6]]
+ ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C6]]
+ ; VI-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C7]](s32)
; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
- ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C7]]
- ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C7]]
- ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+ ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C6]]
+ ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C6]]
+ ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C7]](s32)
; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
- ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C7]]
- ; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C7]]
- ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C6]](s32)
+ ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C6]]
+ ; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C6]]
+ ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C7]](s32)
; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
- ; VI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C7]]
- ; VI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C7]]
- ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
+ ; VI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C6]]
+ ; VI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C6]]
+ ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C7]](s32)
; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
; VI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
@@ -9747,26 +9713,26 @@ body: |
; SI-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>)
; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
- ; SI-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; SI-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
- ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C9]]
- ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[OR1]], [[C9]]
- ; SI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C8]](s32)
+ ; SI-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+ ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C8]]
+ ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[OR1]], [[C8]]
+ ; SI-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; SI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C9]](s32)
; SI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL7]]
; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR7]](s32)
- ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[OR2]], [[C9]]
- ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[OR3]], [[C9]]
- ; SI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C8]](s32)
+ ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[OR2]], [[C8]]
+ ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[OR3]], [[C8]]
+ ; SI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C9]](s32)
; SI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL8]]
; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR8]](s32)
- ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[OR4]], [[C9]]
- ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[OR5]], [[C9]]
- ; SI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C8]](s32)
+ ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[OR4]], [[C8]]
+ ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[OR5]], [[C8]]
+ ; SI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C9]](s32)
; SI-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL9]]
; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR9]](s32)
- ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[OR6]], [[C9]]
- ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C9]]
- ; SI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C8]](s32)
+ ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[OR6]], [[C8]]
+ ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C8]]
+ ; SI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C9]](s32)
; SI-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL10]]
; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR10]](s32)
; SI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
@@ -9797,26 +9763,26 @@ body: |
; CI-HSA-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
; CI-HSA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>)
; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
- ; CI-HSA-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; CI-HSA-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
- ; CI-HSA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C7]]
- ; CI-HSA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C7]]
- ; CI-HSA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C6]](s32)
+ ; CI-HSA-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+ ; CI-HSA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C6]]
+ ; CI-HSA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C6]]
+ ; CI-HSA-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; CI-HSA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C7]](s32)
; CI-HSA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
; CI-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
- ; CI-HSA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C7]]
- ; CI-HSA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C7]]
- ; CI-HSA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32)
+ ; CI-HSA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C6]]
+ ; CI-HSA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C6]]
+ ; CI-HSA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C7]](s32)
; CI-HSA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
; CI-HSA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
- ; CI-HSA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C7]]
- ; CI-HSA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C7]]
- ; CI-HSA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C6]](s32)
+ ; CI-HSA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C6]]
+ ; CI-HSA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C6]]
+ ; CI-HSA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C7]](s32)
; CI-HSA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
; CI-HSA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
- ; CI-HSA-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C7]]
- ; CI-HSA-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C7]]
- ; CI-HSA-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
+ ; CI-HSA-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C6]]
+ ; CI-HSA-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C6]]
+ ; CI-HSA-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C7]](s32)
; CI-HSA-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
; CI-HSA-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
; CI-HSA-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
@@ -9877,26 +9843,26 @@ body: |
; CI-MESA-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
; CI-MESA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>)
; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
- ; CI-MESA-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; CI-MESA-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
- ; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C9]]
- ; CI-MESA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[OR1]], [[C9]]
- ; CI-MESA-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C8]](s32)
+ ; CI-MESA-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+ ; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C8]]
+ ; CI-MESA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[OR1]], [[C8]]
+ ; CI-MESA-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; CI-MESA-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C9]](s32)
; CI-MESA-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL7]]
; CI-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR7]](s32)
- ; CI-MESA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[OR2]], [[C9]]
- ; CI-MESA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[OR3]], [[C9]]
- ; CI-MESA-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C8]](s32)
+ ; CI-MESA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[OR2]], [[C8]]
+ ; CI-MESA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[OR3]], [[C8]]
+ ; CI-MESA-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C9]](s32)
; CI-MESA-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL8]]
; CI-MESA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR8]](s32)
- ; CI-MESA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[OR4]], [[C9]]
- ; CI-MESA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[OR5]], [[C9]]
- ; CI-MESA-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C8]](s32)
+ ; CI-MESA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[OR4]], [[C8]]
+ ; CI-MESA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[OR5]], [[C8]]
+ ; CI-MESA-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C9]](s32)
; CI-MESA-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL9]]
; CI-MESA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR9]](s32)
- ; CI-MESA-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[OR6]], [[C9]]
- ; CI-MESA-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C9]]
- ; CI-MESA-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C8]](s32)
+ ; CI-MESA-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[OR6]], [[C8]]
+ ; CI-MESA-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C8]]
+ ; CI-MESA-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C9]](s32)
; CI-MESA-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL10]]
; CI-MESA-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR10]](s32)
; CI-MESA-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
@@ -9957,26 +9923,26 @@ body: |
; VI-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>)
; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
- ; VI-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; VI-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
- ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C9]]
- ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[OR1]], [[C9]]
- ; VI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C8]](s32)
+ ; VI-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+ ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C8]]
+ ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[OR1]], [[C8]]
+ ; VI-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; VI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C9]](s32)
; VI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL7]]
; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR7]](s32)
- ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[OR2]], [[C9]]
- ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[OR3]], [[C9]]
- ; VI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C8]](s32)
+ ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[OR2]], [[C8]]
+ ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[OR3]], [[C8]]
+ ; VI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C9]](s32)
; VI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL8]]
; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR8]](s32)
- ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[OR4]], [[C9]]
- ; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[OR5]], [[C9]]
- ; VI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C8]](s32)
+ ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[OR4]], [[C8]]
+ ; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[OR5]], [[C8]]
+ ; VI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C9]](s32)
; VI-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL9]]
; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR9]](s32)
- ; VI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[OR6]], [[C9]]
- ; VI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C9]]
- ; VI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C8]](s32)
+ ; VI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[OR6]], [[C8]]
+ ; VI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C8]]
+ ; VI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C9]](s32)
; VI-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL10]]
; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR10]](s32)
; VI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
@@ -10504,8 +10470,9 @@ body: |
; SI-LABEL: name: test_load_global_v3s32_align16
; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; SI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1)
- ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<3 x s32>) = G_EXTRACT [[LOAD]](<4 x s32>), 0
- ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[EXTRACT]](<3 x s32>)
+ ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>)
+ ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32)
+ ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>)
; CI-HSA-LABEL: name: test_load_global_v3s32_align16
; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 16, addrspace 1)
@@ -11453,45 +11420,51 @@ body: |
; SI-LABEL: name: test_load_global_v3s64_align32
; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; SI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p1) :: (load (<4 x s64>), addrspace 1)
- ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<3 x s64>) = G_EXTRACT [[LOAD]](<4 x s64>), 0
+ ; SI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<4 x s64>)
; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
- ; SI-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s64>), 0
- ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>)
+ ; SI-NEXT: [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64), [[UV6:%[0-9]+]]:_(s64), [[UV7:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>)
+ ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[UV2]](s64), [[UV7]](s64)
+ ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>)
; CI-HSA-LABEL: name: test_load_global_v3s64_align32
; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p1) :: (load (<4 x s64>), addrspace 1)
- ; CI-HSA-NEXT: [[EXTRACT:%[0-9]+]]:_(<3 x s64>) = G_EXTRACT [[LOAD]](<4 x s64>), 0
+ ; CI-HSA-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<4 x s64>)
; CI-HSA-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
- ; CI-HSA-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s64>), 0
- ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>)
+ ; CI-HSA-NEXT: [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64), [[UV6:%[0-9]+]]:_(s64), [[UV7:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>)
+ ; CI-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[UV2]](s64), [[UV7]](s64)
+ ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>)
; CI-MESA-LABEL: name: test_load_global_v3s64_align32
; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p1) :: (load (<4 x s64>), addrspace 1)
- ; CI-MESA-NEXT: [[EXTRACT:%[0-9]+]]:_(<3 x s64>) = G_EXTRACT [[LOAD]](<4 x s64>), 0
+ ; CI-MESA-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<4 x s64>)
; CI-MESA-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
- ; CI-MESA-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s64>), 0
- ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>)
+ ; CI-MESA-NEXT: [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64), [[UV6:%[0-9]+]]:_(s64), [[UV7:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>)
+ ; CI-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[UV2]](s64), [[UV7]](s64)
+ ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>)
; VI-LABEL: name: test_load_global_v3s64_align32
; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p1) :: (load (<4 x s64>), addrspace 1)
- ; VI-NEXT: [[EXTRACT:%[0-9]+]]:_(<3 x s64>) = G_EXTRACT [[LOAD]](<4 x s64>), 0
+ ; VI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<4 x s64>)
; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
- ; VI-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s64>), 0
- ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>)
+ ; VI-NEXT: [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64), [[UV6:%[0-9]+]]:_(s64), [[UV7:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>)
+ ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[UV2]](s64), [[UV7]](s64)
+ ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>)
; GFX9-HSA-LABEL: name: test_load_global_v3s64_align32
; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p1) :: (load (<4 x s64>), addrspace 1)
- ; GFX9-HSA-NEXT: [[EXTRACT:%[0-9]+]]:_(<3 x s64>) = G_EXTRACT [[LOAD]](<4 x s64>), 0
+ ; GFX9-HSA-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<4 x s64>)
; GFX9-HSA-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
- ; GFX9-HSA-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s64>), 0
- ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>)
+ ; GFX9-HSA-NEXT: [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64), [[UV6:%[0-9]+]]:_(s64), [[UV7:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>)
+ ; GFX9-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[UV2]](s64), [[UV7]](s64)
+ ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>)
; GFX9-MESA-LABEL: name: test_load_global_v3s64_align32
; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p1) :: (load (<4 x s64>), addrspace 1)
- ; GFX9-MESA-NEXT: [[EXTRACT:%[0-9]+]]:_(<3 x s64>) = G_EXTRACT [[LOAD]](<4 x s64>), 0
+ ; GFX9-MESA-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<4 x s64>)
; GFX9-MESA-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
- ; GFX9-MESA-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s64>), 0
- ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>)
+ ; GFX9-MESA-NEXT: [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64), [[UV6:%[0-9]+]]:_(s64), [[UV7:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>)
+ ; GFX9-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[UV2]](s64), [[UV7]](s64)
+ ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>)
%0:_(p1) = COPY $vgpr0_vgpr1
%1:_(<3 x s64>) = G_LOAD %0 :: (load (<3 x s64>), align 32, addrspace 1)
%2:_(<4 x s64>) = G_IMPLICIT_DEF
@@ -11512,10 +11485,10 @@ body: |
; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p1) :: (load (s64) from unknown-address + 16, addrspace 1)
; SI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>)
- ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64)
; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
- ; SI-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0
- ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>)
+ ; SI-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>)
+ ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64)
+ ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>)
; CI-HSA-LABEL: name: test_load_global_v3s64_align8
; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load (<2 x s64>), align 8, addrspace 1)
@@ -11523,10 +11496,10 @@ body: |
; CI-HSA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
; CI-HSA-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p1) :: (load (s64) from unknown-address + 16, addrspace 1)
; CI-HSA-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>)
- ; CI-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64)
; CI-HSA-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
- ; CI-HSA-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0
- ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>)
+ ; CI-HSA-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>)
+ ; CI-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64)
+ ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>)
; CI-MESA-LABEL: name: test_load_global_v3s64_align8
; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load (<2 x s64>), align 8, addrspace 1)
@@ -11534,10 +11507,10 @@ body: |
; CI-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
; CI-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p1) :: (load (s64) from unknown-address + 16, addrspace 1)
; CI-MESA-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>)
- ; CI-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64)
; CI-MESA-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
- ; CI-MESA-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0
- ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>)
+ ; CI-MESA-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>)
+ ; CI-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64)
+ ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>)
; VI-LABEL: name: test_load_global_v3s64_align8
; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load (<2 x s64>), align 8, addrspace 1)
@@ -11545,10 +11518,10 @@ body: |
; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p1) :: (load (s64) from unknown-address + 16, addrspace 1)
; VI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>)
- ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64)
; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
- ; VI-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0
- ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>)
+ ; VI-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>)
+ ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64)
+ ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>)
; GFX9-HSA-LABEL: name: test_load_global_v3s64_align8
; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load (<2 x s64>), align 8, addrspace 1)
@@ -11556,10 +11529,10 @@ body: |
; GFX9-HSA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
; GFX9-HSA-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p1) :: (load (s64) from unknown-address + 16, addrspace 1)
; GFX9-HSA-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>)
- ; GFX9-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64)
; GFX9-HSA-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
- ; GFX9-HSA-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0
- ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>)
+ ; GFX9-HSA-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>)
+ ; GFX9-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64)
+ ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>)
; GFX9-MESA-LABEL: name: test_load_global_v3s64_align8
; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load (<2 x s64>), align 8, addrspace 1)
@@ -11567,10 +11540,10 @@ body: |
; GFX9-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
; GFX9-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p1) :: (load (s64) from unknown-address + 16, addrspace 1)
; GFX9-MESA-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>)
- ; GFX9-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64)
; GFX9-MESA-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
- ; GFX9-MESA-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0
- ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>)
+ ; GFX9-MESA-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>)
+ ; GFX9-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64)
+ ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>)
%0:_(p1) = COPY $vgpr0_vgpr1
%1:_(<3 x s64>) = G_LOAD %0 :: (load (<3 x s64>), align 8, addrspace 1)
%2:_(<4 x s64>) = G_IMPLICIT_DEF
@@ -11691,10 +11664,10 @@ body: |
; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C5]](s32)
; SI-NEXT: [[SHL20:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT2]], [[COPY2]](s32)
; SI-NEXT: [[OR20:%[0-9]+]]:_(s64) = G_OR [[SHL20]], [[ZEXT2]]
- ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64), [[OR20]](s64)
; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
- ; SI-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0
- ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>)
+ ; SI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>)
+ ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64), [[OR20]](s64), [[UV3]](s64)
+ ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>)
; CI-HSA-LABEL: name: test_load_global_v3s64_align1
; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load (<2 x s64>), align 1, addrspace 1)
@@ -11702,10 +11675,10 @@ body: |
; CI-HSA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
; CI-HSA-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p1) :: (load (s64) from unknown-address + 16, align 1, addrspace 1)
; CI-HSA-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>)
- ; CI-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64)
; CI-HSA-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
- ; CI-HSA-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0
- ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>)
+ ; CI-HSA-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>)
+ ; CI-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64)
+ ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>)
; CI-MESA-LABEL: name: test_load_global_v3s64_align1
; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; CI-MESA-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 1)
@@ -11813,10 +11786,10 @@ body: |
; CI-MESA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C5]](s32)
; CI-MESA-NEXT: [[SHL20:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT2]], [[COPY2]](s32)
; CI-MESA-NEXT: [[OR20:%[0-9]+]]:_(s64) = G_OR [[SHL20]], [[ZEXT2]]
- ; CI-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64), [[OR20]](s64)
; CI-MESA-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
- ; CI-MESA-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0
- ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>)
+ ; CI-MESA-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>)
+ ; CI-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64), [[OR20]](s64), [[UV3]](s64)
+ ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>)
; VI-LABEL: name: test_load_global_v3s64_align1
; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 1)
@@ -11924,10 +11897,10 @@ body: |
; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C5]](s32)
; VI-NEXT: [[SHL20:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT2]], [[COPY2]](s32)
; VI-NEXT: [[OR20:%[0-9]+]]:_(s64) = G_OR [[SHL20]], [[ZEXT2]]
- ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64), [[OR20]](s64)
; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
- ; VI-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0
- ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>)
+ ; VI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>)
+ ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64), [[OR20]](s64), [[UV3]](s64)
+ ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>)
; GFX9-HSA-LABEL: name: test_load_global_v3s64_align1
; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load (<2 x s64>), align 1, addrspace 1)
@@ -11935,10 +11908,10 @@ body: |
; GFX9-HSA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
; GFX9-HSA-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p1) :: (load (s64) from unknown-address + 16, align 1, addrspace 1)
; GFX9-HSA-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>)
- ; GFX9-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64)
; GFX9-HSA-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
- ; GFX9-HSA-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0
- ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>)
+ ; GFX9-HSA-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>)
+ ; GFX9-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64)
+ ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>)
; GFX9-MESA-LABEL: name: test_load_global_v3s64_align1
; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; GFX9-MESA-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 1)
@@ -12046,10 +12019,10 @@ body: |
; GFX9-MESA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C5]](s32)
; GFX9-MESA-NEXT: [[SHL20:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT2]], [[COPY2]](s32)
; GFX9-MESA-NEXT: [[OR20:%[0-9]+]]:_(s64) = G_OR [[SHL20]], [[ZEXT2]]
- ; GFX9-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64), [[OR20]](s64)
; GFX9-MESA-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
- ; GFX9-MESA-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0
- ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>)
+ ; GFX9-MESA-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>)
+ ; GFX9-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64), [[OR20]](s64), [[UV3]](s64)
+ ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>)
%0:_(p1) = COPY $vgpr0_vgpr1
%1:_(<3 x s64>) = G_LOAD %0 :: (load (<3 x s64>), align 1, addrspace 1)
%2:_(<4 x s64>) = G_IMPLICIT_DEF
@@ -14947,17 +14920,18 @@ body: |
; SI-LABEL: name: test_global_v2s96_align16
; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; SI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1)
- ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<3 x s32>) = G_EXTRACT [[LOAD]](<4 x s32>), 0
- ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[EXTRACT]](<3 x s32>)
+ ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>)
+ ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32)
+ ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
; SI-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load (<2 x s32>) from unknown-address + 12, align 4, addrspace 1)
; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C1]](s64)
; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s32) from unknown-address + 20, addrspace 1)
- ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<2 x s32>)
- ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD2]](s32)
- ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
+ ; SI-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<2 x s32>)
+ ; SI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV4]](s32), [[UV5]](s32), [[LOAD2]](s32)
+ ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>)
; SI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96)
; SI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96)
; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-local.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-local.mir
index 17f6b68307ea5..46eabff12de2f 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-local.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-local.mir
@@ -7733,12 +7733,12 @@ body: |
; SI-LABEL: name: test_load_local_v3s16_align8
; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
; SI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), addrspace 3)
- ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>)
; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+ ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
; SI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
@@ -7764,12 +7764,12 @@ body: |
; CI-LABEL: name: test_load_local_v3s16_align8
; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
; CI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), addrspace 3)
- ; CI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
; CI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>)
; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; CI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+ ; CI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
; CI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
; CI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
; CI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
@@ -7795,12 +7795,12 @@ body: |
; CI-DS128-LABEL: name: test_load_local_v3s16_align8
; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), addrspace 3)
- ; CI-DS128-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
; CI-DS128-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>)
; CI-DS128-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
; CI-DS128-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; CI-DS128-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
; CI-DS128-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+ ; CI-DS128-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
; CI-DS128-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
; CI-DS128-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
; CI-DS128-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
@@ -7826,12 +7826,12 @@ body: |
; VI-LABEL: name: test_load_local_v3s16_align8
; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), addrspace 3)
- ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>)
; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+ ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
; VI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
@@ -7857,12 +7857,12 @@ body: |
; GFX9-LABEL: name: test_load_local_v3s16_align8
; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), addrspace 3)
- ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>)
; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+ ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
; GFX9-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
@@ -7875,12 +7875,12 @@ body: |
; GFX9-UNALIGNED-LABEL: name: test_load_local_v3s16_align8
; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), addrspace 3)
- ; GFX9-UNALIGNED-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
; GFX9-UNALIGNED-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>)
; GFX9-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
; GFX9-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; GFX9-UNALIGNED-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
; GFX9-UNALIGNED-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+ ; GFX9-UNALIGNED-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
; GFX9-UNALIGNED-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
; GFX9-UNALIGNED-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
; GFX9-UNALIGNED-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
@@ -7893,12 +7893,12 @@ body: |
; GFX10-LABEL: name: test_load_local_v3s16_align8
; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), addrspace 3)
- ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
; GFX10-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>)
; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; GFX10-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+ ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
; GFX10-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
; GFX10-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
@@ -7911,12 +7911,12 @@ body: |
; GFX10-UNALIGNED-LABEL: name: test_load_local_v3s16_align8
; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), addrspace 3)
- ; GFX10-UNALIGNED-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
; GFX10-UNALIGNED-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>)
; GFX10-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; GFX10-UNALIGNED-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
; GFX10-UNALIGNED-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+ ; GFX10-UNALIGNED-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
; GFX10-UNALIGNED-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
; GFX10-UNALIGNED-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
; GFX10-UNALIGNED-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
@@ -11741,10 +11741,10 @@ body: |
; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD1]](p3) :: (load (s64) from unknown-address + 16, align 16, addrspace 3)
- ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[LOAD]](s64), [[LOAD1]](s64), [[LOAD2]](s64)
; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
- ; SI-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0
- ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>)
+ ; SI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>)
+ ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[LOAD]](s64), [[LOAD1]](s64), [[LOAD2]](s64), [[UV3]](s64)
+ ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>)
; CI-LABEL: name: test_load_local_v3s64_align32
; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
; CI-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), align 32, addrspace 3)
@@ -11754,10 +11754,10 @@ body: |
; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD1]](p3) :: (load (s64) from unknown-address + 16, align 16, addrspace 3)
- ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[LOAD]](s64), [[LOAD1]](s64), [[LOAD2]](s64)
; CI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
- ; CI-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0
- ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>)
+ ; CI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>)
+ ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[LOAD]](s64), [[LOAD1]](s64), [[LOAD2]](s64), [[UV3]](s64)
+ ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>)
; CI-DS128-LABEL: name: test_load_local_v3s64_align32
; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p3) :: (load (<2 x s64>), align 32, addrspace 3)
@@ -11765,10 +11765,10 @@ body: |
; CI-DS128-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
; CI-DS128-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p3) :: (load (s64) from unknown-address + 16, align 16, addrspace 3)
; CI-DS128-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>)
- ; CI-DS128-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64)
; CI-DS128-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
- ; CI-DS128-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0
- ; CI-DS128-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>)
+ ; CI-DS128-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>)
+ ; CI-DS128-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64)
+ ; CI-DS128-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>)
; VI-LABEL: name: test_load_local_v3s64_align32
; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p3) :: (load (<2 x s64>), align 32, addrspace 3)
@@ -11776,10 +11776,10 @@ body: |
; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p3) :: (load (s64) from unknown-address + 16, align 16, addrspace 3)
; VI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>)
- ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64)
; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
- ; VI-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0
- ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>)
+ ; VI-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>)
+ ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64)
+ ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>)
; GFX9-LABEL: name: test_load_local_v3s64_align32
; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p3) :: (load (<2 x s64>), align 32, addrspace 3)
@@ -11787,10 +11787,10 @@ body: |
; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p3) :: (load (s64) from unknown-address + 16, align 16, addrspace 3)
; GFX9-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>)
- ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64)
; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
- ; GFX9-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0
- ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>)
+ ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>)
+ ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64)
+ ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>)
; GFX9-UNALIGNED-LABEL: name: test_load_local_v3s64_align32
; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p3) :: (load (<2 x s64>), align 32, addrspace 3)
@@ -11798,10 +11798,10 @@ body: |
; GFX9-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
; GFX9-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p3) :: (load (s64) from unknown-address + 16, align 16, addrspace 3)
; GFX9-UNALIGNED-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>)
- ; GFX9-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64)
; GFX9-UNALIGNED-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
- ; GFX9-UNALIGNED-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0
- ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>)
+ ; GFX9-UNALIGNED-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>)
+ ; GFX9-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64)
+ ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>)
; GFX10-LABEL: name: test_load_local_v3s64_align32
; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p3) :: (load (<2 x s64>), align 32, addrspace 3)
@@ -11809,10 +11809,10 @@ body: |
; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p3) :: (load (s64) from unknown-address + 16, align 16, addrspace 3)
; GFX10-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>)
- ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64)
; GFX10-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
- ; GFX10-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0
- ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>)
+ ; GFX10-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>)
+ ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64)
+ ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>)
; GFX10-UNALIGNED-LABEL: name: test_load_local_v3s64_align32
; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p3) :: (load (<2 x s64>), align 32, addrspace 3)
@@ -11820,10 +11820,10 @@ body: |
; GFX10-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
; GFX10-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p3) :: (load (s64) from unknown-address + 16, align 16, addrspace 3)
; GFX10-UNALIGNED-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>)
- ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64)
; GFX10-UNALIGNED-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
- ; GFX10-UNALIGNED-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0
- ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>)
+ ; GFX10-UNALIGNED-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>)
+ ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64)
+ ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>)
%0:_(p3) = COPY $vgpr0
%1:_(<3 x s64>) = G_LOAD %0 :: (load (<3 x s64>), align 32, addrspace 3)
%2:_(<4 x s64>) = G_IMPLICIT_DEF
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-private.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-private.mir
index c13629c8dc03e..cca3ae38e94ef 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-private.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-private.mir
@@ -7496,10 +7496,10 @@ body: |
; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
; SI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s32) from unknown-address + 20, addrspace 5)
; SI-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD4]](s32), [[LOAD5]](s32)
- ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64)
; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
- ; SI-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0
- ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>)
+ ; SI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>)
+ ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[UV3]](s64)
+ ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>)
; CI-LABEL: name: test_load_private_v3s64_align32
; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 32, addrspace 5)
@@ -7519,10 +7519,10 @@ body: |
; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
; CI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s32) from unknown-address + 20, addrspace 5)
; CI-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD4]](s32), [[LOAD5]](s32)
- ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64)
; CI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
- ; CI-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0
- ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>)
+ ; CI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>)
+ ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[UV3]](s64)
+ ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>)
; VI-LABEL: name: test_load_private_v3s64_align32
; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 32, addrspace 5)
@@ -7542,10 +7542,10 @@ body: |
; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
; VI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s32) from unknown-address + 20, addrspace 5)
; VI-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD4]](s32), [[LOAD5]](s32)
- ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64)
; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
- ; VI-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0
- ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>)
+ ; VI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>)
+ ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[UV3]](s64)
+ ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>)
; GFX9-LABEL: name: test_load_private_v3s64_align32
; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 32, addrspace 5)
@@ -7565,10 +7565,10 @@ body: |
; GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
; GFX9-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s32) from unknown-address + 20, addrspace 5)
; GFX9-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD4]](s32), [[LOAD5]](s32)
- ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64)
; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
- ; GFX9-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0
- ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>)
+ ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>)
+ ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[UV3]](s64)
+ ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>)
%0:_(p5) = COPY $vgpr0
%1:_(<3 x s64>) = G_LOAD %0 :: (load (<3 x s64>), align 32, addrspace 5)
%2:_(<4 x s64>) = G_IMPLICIT_DEF
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-lshr.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-lshr.mir
index f61de3cc898c9..cf4144166b51a 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-lshr.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-lshr.mir
@@ -1,8 +1,8 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -O0 -run-pass=legalizer -global-isel-abort=0 -o - %s | FileCheck -check-prefix=SI %s
-# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer -global-isel-abort=0 -o - %s | FileCheck -check-prefix=VI %s
-# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -O0 -run-pass=legalizer -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s
-# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -O0 -run-pass=legalizer -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s
+# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -O0 -run-pass=legalizer -o - %s | FileCheck -check-prefix=SI %s
+# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer -o - %s | FileCheck -check-prefix=VI %s
+# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -O0 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX9 %s
+# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -O0 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX9 %s
---
name: test_lshr_s32_s32
@@ -414,43 +414,40 @@ body: |
; SI-LABEL: name: test_lshr_v3s64_v3s32
; SI: [[COPY:%[0-9]+]]:_(<4 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
- ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<3 x s64>) = G_EXTRACT [[COPY]](<4 x s64>), 0
+ ; SI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<4 x s64>)
; SI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr8_vgpr9_vgpr10
- ; SI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[EXTRACT]](<3 x s64>)
- ; SI-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>)
- ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV]], [[UV3]](s32)
- ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[UV1]], [[UV4]](s32)
- ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s64) = G_LSHR [[UV2]], [[UV5]](s32)
- ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[LSHR]](s64), [[LSHR1]](s64), [[LSHR2]](s64)
+ ; SI-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>)
+ ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV]], [[UV4]](s32)
+ ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[UV1]], [[UV5]](s32)
+ ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s64) = G_LSHR [[UV2]], [[UV6]](s32)
; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
- ; SI-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0
- ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>)
+ ; SI-NEXT: [[UV7:%[0-9]+]]:_(s64), [[UV8:%[0-9]+]]:_(s64), [[UV9:%[0-9]+]]:_(s64), [[UV10:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>)
+ ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[LSHR]](s64), [[LSHR1]](s64), [[LSHR2]](s64), [[UV10]](s64)
+ ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>)
; VI-LABEL: name: test_lshr_v3s64_v3s32
; VI: [[COPY:%[0-9]+]]:_(<4 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
- ; VI-NEXT: [[EXTRACT:%[0-9]+]]:_(<3 x s64>) = G_EXTRACT [[COPY]](<4 x s64>), 0
+ ; VI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<4 x s64>)
; VI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr8_vgpr9_vgpr10
- ; VI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[EXTRACT]](<3 x s64>)
- ; VI-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>)
- ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV]], [[UV3]](s32)
- ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[UV1]], [[UV4]](s32)
- ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s64) = G_LSHR [[UV2]], [[UV5]](s32)
- ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[LSHR]](s64), [[LSHR1]](s64), [[LSHR2]](s64)
+ ; VI-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>)
+ ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV]], [[UV4]](s32)
+ ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[UV1]], [[UV5]](s32)
+ ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s64) = G_LSHR [[UV2]], [[UV6]](s32)
; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
- ; VI-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0
- ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>)
+ ; VI-NEXT: [[UV7:%[0-9]+]]:_(s64), [[UV8:%[0-9]+]]:_(s64), [[UV9:%[0-9]+]]:_(s64), [[UV10:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>)
+ ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[LSHR]](s64), [[LSHR1]](s64), [[LSHR2]](s64), [[UV10]](s64)
+ ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>)
; GFX9-LABEL: name: test_lshr_v3s64_v3s32
; GFX9: [[COPY:%[0-9]+]]:_(<4 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
- ; GFX9-NEXT: [[EXTRACT:%[0-9]+]]:_(<3 x s64>) = G_EXTRACT [[COPY]](<4 x s64>), 0
+ ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<4 x s64>)
; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr8_vgpr9_vgpr10
- ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[EXTRACT]](<3 x s64>)
- ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>)
- ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV]], [[UV3]](s32)
- ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[UV1]], [[UV4]](s32)
- ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s64) = G_LSHR [[UV2]], [[UV5]](s32)
- ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[LSHR]](s64), [[LSHR1]](s64), [[LSHR2]](s64)
+ ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>)
+ ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV]], [[UV4]](s32)
+ ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[UV1]], [[UV5]](s32)
+ ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s64) = G_LSHR [[UV2]], [[UV6]](s32)
; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
- ; GFX9-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0
- ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>)
+ ; GFX9-NEXT: [[UV7:%[0-9]+]]:_(s64), [[UV8:%[0-9]+]]:_(s64), [[UV9:%[0-9]+]]:_(s64), [[UV10:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>)
+ ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[LSHR]](s64), [[LSHR1]](s64), [[LSHR2]](s64), [[UV10]](s64)
+ ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>)
%0:_(<4 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
%1:_(<3 x s64>) = G_EXTRACT %0, 0
%2:_(<3 x s32>) = COPY $vgpr8_vgpr9_vgpr10
@@ -593,17 +590,12 @@ body: |
; SI-LABEL: name: test_lshr_v3s16_v3s16
; SI: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1
; SI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3
- ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[COPY]](<4 x s16>), 0
- ; SI-NEXT: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[COPY1]](<4 x s16>), 0
- ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; SI-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0
- ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>)
+ ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>)
; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; SI-NEXT: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT1]](<3 x s16>), 0
- ; SI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<4 x s16>)
+ ; SI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>)
; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
@@ -617,29 +609,26 @@ body: |
; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]]
; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]]
; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND5]], [[AND4]](s32)
+ ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+ ; SI-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
+ ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>)
+ ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32)
; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]]
; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C1]]
; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C]](s32)
; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL]]
- ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
; SI-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C1]]
- ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32)
+ ; SI-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C1]]
+ ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C]](s32)
; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL1]]
- ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
- ; SI-NEXT: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
- ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[DEF1]](<2 x s16>)
- ; SI-NEXT: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>)
- ; SI-NEXT: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV4]](<3 x s16>), 0
- ; SI-NEXT: $vgpr0_vgpr1 = COPY [[INSERT2]](<4 x s16>)
+ ; SI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+ ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>)
+ ; SI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>)
; VI-LABEL: name: test_lshr_v3s16_v3s16
; VI: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1
; VI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3
- ; VI-NEXT: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[COPY]](<4 x s16>), 0
- ; VI-NEXT: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[COPY1]](<4 x s16>), 0
- ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; VI-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0
- ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>)
+ ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>)
; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
@@ -647,8 +636,7 @@ body: |
; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32)
- ; VI-NEXT: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT1]](<3 x s16>), 0
- ; VI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<4 x s16>)
+ ; VI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>)
; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32)
; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
@@ -658,49 +646,52 @@ body: |
; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[TRUNC3]](s16)
; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC1]], [[TRUNC4]](s16)
; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC2]], [[TRUNC5]](s16)
+ ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+ ; VI-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
+ ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>)
+ ; VI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32)
; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LSHR2]](s16)
; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[LSHR3]](s16)
; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32)
; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]]
- ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
; VI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[LSHR4]](s16)
- ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C]](s32)
+ ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+ ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C1]]
+ ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32)
; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]]
- ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
- ; VI-NEXT: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
- ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[DEF1]](<2 x s16>)
- ; VI-NEXT: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>)
- ; VI-NEXT: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV4]](<3 x s16>), 0
- ; VI-NEXT: $vgpr0_vgpr1 = COPY [[INSERT2]](<4 x s16>)
+ ; VI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+ ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>)
+ ; VI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>)
; GFX9-LABEL: name: test_lshr_v3s16_v3s16
; GFX9: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1
; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3
- ; GFX9-NEXT: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[COPY]](<4 x s16>), 0
- ; GFX9-NEXT: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[COPY1]](<4 x s16>), 0
- ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; GFX9-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0
- ; GFX9-NEXT: [[EXTRACT2:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[INSERT]](<4 x s16>), 0
- ; GFX9-NEXT: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0
- ; GFX9-NEXT: [[EXTRACT3:%[0-9]+]]:_(s16) = G_EXTRACT [[INSERT1]](<4 x s16>), 32
- ; GFX9-NEXT: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT1]](<3 x s16>), 0
- ; GFX9-NEXT: [[EXTRACT4:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[INSERT2]](<4 x s16>), 0
- ; GFX9-NEXT: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT1]](<3 x s16>), 0
- ; GFX9-NEXT: [[EXTRACT5:%[0-9]+]]:_(s16) = G_EXTRACT [[INSERT3]](<4 x s16>), 32
- ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(<2 x s16>) = G_LSHR [[EXTRACT2]], [[EXTRACT4]](<2 x s16>)
- ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[EXTRACT3]], [[EXTRACT5]](s16)
- ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[LSHR]](<2 x s16>)
+ ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>)
+ ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
- ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST]](s32), [[LSHR2]](s32)
- ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR1]](s16)
- ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
- ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[DEF1]](s32)
- ; GFX9-NEXT: [[DEF2:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
- ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[DEF2]](<2 x s16>)
- ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>)
- ; GFX9-NEXT: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV]](<3 x s16>), 0
- ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[INSERT4]](<4 x s16>)
+ ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
+ ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+ ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32)
+ ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>)
+ ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
+ ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
+ ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
+ ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32)
+ ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST]](s32), [[LSHR]](s32)
+ ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST2]](s32), [[LSHR1]](s32)
+ ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(<2 x s16>) = G_LSHR [[BUILD_VECTOR_TRUNC]], [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
+ ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[TRUNC1]](s16)
+ ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[LSHR2]](<2 x s16>)
+ ; GFX9-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32)
+ ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+ ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
+ ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>)
+ ; GFX9-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32)
+ ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST4]](s32), [[LSHR4]](s32)
+ ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR3]](s16)
+ ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[LSHR5]](s32)
+ ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[BUILD_VECTOR_TRUNC3]](<2 x s16>)
+ ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>)
%0:_(<4 x s16>) = COPY $vgpr0_vgpr1
%1:_(<4 x s16>) = COPY $vgpr2_vgpr3
%2:_(<3 x s16>) = G_EXTRACT %0, 0
@@ -807,32 +798,33 @@ body: |
; GFX9-LABEL: name: test_ashr_v3s16_v3s16
; GFX9: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2
; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5
- ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>)
- ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>)
- ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; GFX9-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV]](<3 x s16>), 0
- ; GFX9-NEXT: [[EXTRACT:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[INSERT]](<4 x s16>), 0
- ; GFX9-NEXT: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV]](<3 x s16>), 0
- ; GFX9-NEXT: [[EXTRACT1:%[0-9]+]]:_(s16) = G_EXTRACT [[INSERT1]](<4 x s16>), 32
- ; GFX9-NEXT: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV2]](<3 x s16>), 0
- ; GFX9-NEXT: [[EXTRACT2:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[INSERT2]](<4 x s16>), 0
- ; GFX9-NEXT: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV2]](<3 x s16>), 0
- ; GFX9-NEXT: [[EXTRACT3:%[0-9]+]]:_(s16) = G_EXTRACT [[INSERT3]](<4 x s16>), 32
- ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(<2 x s16>) = G_LSHR [[EXTRACT]], [[EXTRACT2]](<2 x s16>)
- ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[EXTRACT1]], [[EXTRACT3]](s16)
- ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[LSHR]](<2 x s16>)
+ ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>)
+ ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
- ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>)
- ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>)
- ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
- ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>)
- ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST]](s32), [[LSHR2]](s32)
- ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR1]](s16)
- ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[BITCAST1]](s32)
- ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR3]](s32), [[BITCAST2]](s32)
- ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>)
+ ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
+ ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+ ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32)
+ ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST]](s32), [[LSHR]](s32)
+ ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>)
+ ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
+ ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
+ ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>)
+ ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32)
+ ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST2]](s32), [[LSHR1]](s32)
+ ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(<2 x s16>) = G_LSHR [[BUILD_VECTOR_TRUNC]], [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
+ ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[TRUNC1]](s16)
+ ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[LSHR2]](<2 x s16>)
+ ; GFX9-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32)
+ ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+ ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
+ ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>)
+ ; GFX9-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32)
+ ; GFX9-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>)
+ ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST4]](s32), [[LSHR4]](s32)
+ ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR3]](s16)
+ ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[BITCAST5]](s32)
+ ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR5]](s32), [[BITCAST6]](s32)
+ ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[BUILD_VECTOR_TRUNC3]](<2 x s16>), [[BUILD_VECTOR_TRUNC4]](<2 x s16>)
; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
%0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2
%1:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-or.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-or.mir
index b376bff0ca958..60cdfdd685b11 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-or.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-or.mir
@@ -1,5 +1,5 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer -allow-ginsert-as-artifact=0 -global-isel-abort=0 %s -o - | FileCheck %s
+# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer %s -o - | FileCheck %s
---
name: test_or_s32
@@ -305,18 +305,14 @@ body: |
; CHECK: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>)
- ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32)
- ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV2]](s32), [[DEF]](s32)
- ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF
; CHECK-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>)
- ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV3]](s32), [[UV4]](s32)
- ; CHECK-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV5]](s32), [[DEF]](s32)
- ; CHECK-NEXT: [[OR:%[0-9]+]]:_(<2 x s32>) = G_OR [[BUILD_VECTOR]], [[BUILD_VECTOR2]]
- ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(<2 x s32>) = G_OR [[BUILD_VECTOR1]], [[BUILD_VECTOR3]]
- ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s32>) = G_CONCAT_VECTORS [[OR]](<2 x s32>), [[OR1]](<2 x s32>), [[DEF1]](<2 x s32>)
- ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(<3 x s32>), [[UV7:%[0-9]+]]:_(<3 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s32>)
- ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[UV6]](<3 x s32>)
+ ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV3]](s32), [[UV4]](s32)
+ ; CHECK-NEXT: [[OR:%[0-9]+]]:_(<2 x s32>) = G_OR [[BUILD_VECTOR]], [[BUILD_VECTOR1]]
+ ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[UV2]], [[UV5]]
+ ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[OR]](<2 x s32>)
+ ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV6]](s32), [[UV7]](s32), [[OR1]](s32)
+ ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR2]](<3 x s32>)
%0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2
%1:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5
%2:_(<3 x s32>) = G_OR %0, %1
@@ -353,23 +349,20 @@ body: |
; CHECK: [[DEF:%[0-9]+]]:_(<5 x s32>) = G_IMPLICIT_DEF
; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<5 x s32>) = G_IMPLICIT_DEF
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<5 x s32>)
- ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32)
; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV2]](s32), [[UV3]](s32)
- ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV4]](s32), [[DEF2]](s32)
- ; CHECK-NEXT: [[DEF3:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF
; CHECK-NEXT: [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](<5 x s32>)
- ; CHECK-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV5]](s32), [[UV6]](s32)
- ; CHECK-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV7]](s32), [[UV8]](s32)
- ; CHECK-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV9]](s32), [[DEF2]](s32)
- ; CHECK-NEXT: [[OR:%[0-9]+]]:_(<2 x s32>) = G_OR [[BUILD_VECTOR]], [[BUILD_VECTOR3]]
- ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(<2 x s32>) = G_OR [[BUILD_VECTOR1]], [[BUILD_VECTOR4]]
- ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(<2 x s32>) = G_OR [[BUILD_VECTOR2]], [[BUILD_VECTOR5]]
- ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<10 x s32>) = G_CONCAT_VECTORS [[OR]](<2 x s32>), [[OR1]](<2 x s32>), [[OR2]](<2 x s32>), [[DEF3]](<2 x s32>), [[DEF3]](<2 x s32>)
- ; CHECK-NEXT: [[UV10:%[0-9]+]]:_(<5 x s32>), [[UV11:%[0-9]+]]:_(<5 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<10 x s32>)
- ; CHECK-NEXT: [[DEF4:%[0-9]+]]:_(<8 x s32>) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<8 x s32>) = G_INSERT [[DEF4]], [[UV10]](<5 x s32>), 0
- ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<8 x s32>)
+ ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV5]](s32), [[UV6]](s32)
+ ; CHECK-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV7]](s32), [[UV8]](s32)
+ ; CHECK-NEXT: [[OR:%[0-9]+]]:_(<2 x s32>) = G_OR [[BUILD_VECTOR]], [[BUILD_VECTOR2]]
+ ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(<2 x s32>) = G_OR [[BUILD_VECTOR1]], [[BUILD_VECTOR3]]
+ ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[UV4]], [[UV9]]
+ ; CHECK-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[OR]](<2 x s32>)
+ ; CHECK-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[OR1]](<2 x s32>)
+ ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(<8 x s32>) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF2]](<8 x s32>)
+ ; CHECK-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[UV10]](s32), [[UV11]](s32), [[UV12]](s32), [[UV13]](s32), [[OR2]](s32), [[UV19]](s32), [[UV20]](s32), [[UV21]](s32)
+ ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR4]](<8 x s32>)
%0:_(<5 x s32>) = G_IMPLICIT_DEF
%1:_(<5 x s32>) = G_IMPLICIT_DEF
%2:_(<5 x s32>) = G_OR %0, %1
@@ -424,40 +417,65 @@ body: |
; CHECK-LABEL: name: test_or_v3s16
; CHECK: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5
- ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>)
- ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>)
- ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV]](<3 x s16>), 0
- ; CHECK-NEXT: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV2]](<3 x s16>), 0
- ; CHECK-NEXT: [[OR:%[0-9]+]]:_(<4 x s16>) = G_OR [[INSERT]], [[INSERT1]]
- ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[OR]](<4 x s16>)
- ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>)
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>)
+ ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
- ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>)
- ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>)
- ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>)
- ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
- ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>)
+ ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]]
; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]]
; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
- ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
- ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+ ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+ ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]]
- ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]]
- ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32)
- ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
- ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+ ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
+ ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY2]], [[C]](s32)
+ ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+ ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+ ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>)
+ ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>)
+ ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
+ ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32)
+ ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>)
+ ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]]
; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]]
- ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]]
- ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
- ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
- ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
- ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>)
- ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
+ ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C]](s32)
+ ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND3]], [[SHL2]]
+ ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+ ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]]
+ ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32)
+ ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND5]], [[SHL3]]
+ ; CHECK-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
+ ; CHECK-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>)
+ ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(<4 x s16>) = G_OR [[CONCAT_VECTORS]], [[CONCAT_VECTORS1]]
+ ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[OR4]](<4 x s16>)
+ ; CHECK-NEXT: [[BITCAST8:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>)
+ ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST8]], [[C]](s32)
+ ; CHECK-NEXT: [[BITCAST9:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>)
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
+ ; CHECK-NEXT: [[BITCAST10:%[0-9]+]]:_(s32) = G_BITCAST [[UV8]](<2 x s16>)
+ ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST10]], [[C]](s32)
+ ; CHECK-NEXT: [[BITCAST11:%[0-9]+]]:_(s32) = G_BITCAST [[UV9]](<2 x s16>)
+ ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[BITCAST8]], [[C1]]
+ ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]]
+ ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C]](s32)
+ ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL4]]
+ ; CHECK-NEXT: [[BITCAST12:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32)
+ ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[BITCAST9]], [[C1]]
+ ; CHECK-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[BITCAST10]], [[C1]]
+ ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C]](s32)
+ ; CHECK-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL5]]
+ ; CHECK-NEXT: [[BITCAST13:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR6]](s32)
+ ; CHECK-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C1]]
+ ; CHECK-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[BITCAST11]], [[C1]]
+ ; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C]](s32)
+ ; CHECK-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[AND10]], [[SHL6]]
+ ; CHECK-NEXT: [[BITCAST14:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR7]](s32)
+ ; CHECK-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST12]](<2 x s16>), [[BITCAST13]](<2 x s16>), [[BITCAST14]](<2 x s16>)
+ ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS2]](<6 x s16>)
%0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2
%1:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5
%2:_(<3 x s16>), %3:_(<3 x s16>) = G_UNMERGE_VALUES %0
@@ -493,54 +511,102 @@ body: |
; CHECK-LABEL: name: test_or_v5s16
; CHECK: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<30 x s16>) = G_CONCAT_VECTORS [[DEF]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>)
- ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<5 x s16>), [[UV1:%[0-9]+]]:_(<5 x s16>), [[UV2:%[0-9]+]]:_(<5 x s16>), [[UV3:%[0-9]+]]:_(<5 x s16>), [[UV4:%[0-9]+]]:_(<5 x s16>), [[UV5:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<30 x s16>)
- ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<30 x s16>) = G_CONCAT_VECTORS [[DEF2]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>)
- ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(<5 x s16>), [[UV7:%[0-9]+]]:_(<5 x s16>), [[UV8:%[0-9]+]]:_(<5 x s16>), [[UV9:%[0-9]+]]:_(<5 x s16>), [[UV10:%[0-9]+]]:_(<5 x s16>), [[UV11:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<30 x s16>)
- ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[DEF1]], [[UV]](<5 x s16>), 0
- ; CHECK-NEXT: [[INSERT1:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[DEF1]], [[UV6]](<5 x s16>), 0
- ; CHECK-NEXT: [[UV12:%[0-9]+]]:_(<3 x s16>), [[UV13:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[INSERT]](<6 x s16>)
- ; CHECK-NEXT: [[UV14:%[0-9]+]]:_(<3 x s16>), [[UV15:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<6 x s16>)
- ; CHECK-NEXT: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[UV12]](<3 x s16>), 0
- ; CHECK-NEXT: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[UV14]](<3 x s16>), 0
- ; CHECK-NEXT: [[OR:%[0-9]+]]:_(<4 x s16>) = G_OR [[INSERT2]], [[INSERT3]]
- ; CHECK-NEXT: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[UV13]](<3 x s16>), 0
- ; CHECK-NEXT: [[INSERT5:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[UV15]](<3 x s16>), 0
- ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(<4 x s16>) = G_OR [[INSERT4]], [[INSERT5]]
- ; CHECK-NEXT: [[UV16:%[0-9]+]]:_(<2 x s16>), [[UV17:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[OR]](<4 x s16>)
- ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV16]](<2 x s16>)
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>)
+ ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
- ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV17]](<2 x s16>)
- ; CHECK-NEXT: [[UV18:%[0-9]+]]:_(<2 x s16>), [[UV19:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[OR1]](<4 x s16>)
- ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV18]](<2 x s16>)
- ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
- ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV19]](<2 x s16>)
+ ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+ ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
+ ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<6 x s16>)
+ ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
+ ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32)
+ ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>)
+ ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32)
+ ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>)
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]]
; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]]
; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
- ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
- ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+ ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+ ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]]
- ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]]
- ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32)
- ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
- ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
- ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]]
- ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]]
- ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
- ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
- ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32)
- ; CHECK-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>)
- ; CHECK-NEXT: [[CONCAT_VECTORS3:%[0-9]+]]:_(<30 x s16>) = G_CONCAT_VECTORS [[CONCAT_VECTORS2]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>)
- ; CHECK-NEXT: [[UV20:%[0-9]+]]:_(<5 x s16>), [[UV21:%[0-9]+]]:_(<5 x s16>), [[UV22:%[0-9]+]]:_(<5 x s16>), [[UV23:%[0-9]+]]:_(<5 x s16>), [[UV24:%[0-9]+]]:_(<5 x s16>), [[UV25:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<30 x s16>)
- ; CHECK-NEXT: [[DEF4:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[INSERT6:%[0-9]+]]:_(<8 x s16>) = G_INSERT [[DEF4]], [[UV20]](<5 x s16>), 0
- ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT6]](<8 x s16>)
+ ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
+ ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C]](s32)
+ ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+ ; CHECK-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+ ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>)
+ ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]]
+ ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]]
+ ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C]](s32)
+ ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND3]], [[SHL2]]
+ ; CHECK-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+ ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]]
+ ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32)
+ ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND5]], [[SHL3]]
+ ; CHECK-NEXT: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
+ ; CHECK-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST8]](<2 x s16>), [[BITCAST9]](<2 x s16>)
+ ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(<4 x s16>) = G_OR [[CONCAT_VECTORS]], [[CONCAT_VECTORS1]]
+ ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[OR4]](<4 x s16>)
+ ; CHECK-NEXT: [[BITCAST10:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>)
+ ; CHECK-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST10]], [[C]](s32)
+ ; CHECK-NEXT: [[BITCAST11:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>)
+ ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]]
+ ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]]
+ ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C]](s32)
+ ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL4]]
+ ; CHECK-NEXT: [[BITCAST12:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32)
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
+ ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[COPY2]], [[C]](s32)
+ ; CHECK-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[COPY1]], [[SHL5]]
+ ; CHECK-NEXT: [[BITCAST13:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR6]](s32)
+ ; CHECK-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST12]](<2 x s16>), [[BITCAST13]](<2 x s16>)
+ ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C1]]
+ ; CHECK-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]]
+ ; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C]](s32)
+ ; CHECK-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
+ ; CHECK-NEXT: [[BITCAST14:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR7]](s32)
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
+ ; CHECK-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[COPY4]], [[C]](s32)
+ ; CHECK-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[COPY3]], [[SHL7]]
+ ; CHECK-NEXT: [[BITCAST15:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR8]](s32)
+ ; CHECK-NEXT: [[CONCAT_VECTORS3:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST14]](<2 x s16>), [[BITCAST15]](<2 x s16>)
+ ; CHECK-NEXT: [[OR9:%[0-9]+]]:_(<4 x s16>) = G_OR [[CONCAT_VECTORS2]], [[CONCAT_VECTORS3]]
+ ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[OR9]](<4 x s16>)
+ ; CHECK-NEXT: [[BITCAST16:%[0-9]+]]:_(s32) = G_BITCAST [[UV8]](<2 x s16>)
+ ; CHECK-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST16]], [[C]](s32)
+ ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>), [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<8 x s16>)
+ ; CHECK-NEXT: [[BITCAST17:%[0-9]+]]:_(s32) = G_BITCAST [[UV12]](<2 x s16>)
+ ; CHECK-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST17]], [[C]](s32)
+ ; CHECK-NEXT: [[BITCAST18:%[0-9]+]]:_(s32) = G_BITCAST [[UV13]](<2 x s16>)
+ ; CHECK-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST18]], [[C]](s32)
+ ; CHECK-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[BITCAST10]], [[C1]]
+ ; CHECK-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C1]]
+ ; CHECK-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C]](s32)
+ ; CHECK-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[AND10]], [[SHL8]]
+ ; CHECK-NEXT: [[BITCAST19:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR10]](s32)
+ ; CHECK-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[BITCAST11]], [[C1]]
+ ; CHECK-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[BITCAST16]], [[C1]]
+ ; CHECK-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C]](s32)
+ ; CHECK-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
+ ; CHECK-NEXT: [[BITCAST20:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR11]](s32)
+ ; CHECK-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C1]]
+ ; CHECK-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LSHR6]], [[C1]]
+ ; CHECK-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C]](s32)
+ ; CHECK-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[AND14]], [[SHL10]]
+ ; CHECK-NEXT: [[BITCAST21:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR12]](s32)
+ ; CHECK-NEXT: [[AND16:%[0-9]+]]:_(s32) = G_AND [[BITCAST18]], [[C1]]
+ ; CHECK-NEXT: [[AND17:%[0-9]+]]:_(s32) = G_AND [[LSHR7]], [[C1]]
+ ; CHECK-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND17]], [[C]](s32)
+ ; CHECK-NEXT: [[OR13:%[0-9]+]]:_(s32) = G_OR [[AND16]], [[SHL11]]
+ ; CHECK-NEXT: [[BITCAST22:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR13]](s32)
+ ; CHECK-NEXT: [[CONCAT_VECTORS4:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BITCAST19]](<2 x s16>), [[BITCAST20]](<2 x s16>), [[BITCAST21]](<2 x s16>), [[BITCAST22]](<2 x s16>)
+ ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS4]](<8 x s16>)
%0:_(<5 x s16>) = G_IMPLICIT_DEF
%1:_(<5 x s16>) = G_IMPLICIT_DEF
%2:_(<5 x s16>) = G_OR %0, %1
@@ -555,34 +621,15 @@ body: |
bb.0:
; CHECK-LABEL: name: test_or_v3s8
- ; CHECK: [[DEF:%[0-9]+]]:_(<3 x s8>) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<3 x s8>) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(<4 x s8>) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[DEF2]], [[DEF]](<3 x s8>), 0
- ; CHECK-NEXT: [[INSERT1:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[DEF2]], [[DEF1]](<3 x s8>), 0
- ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[INSERT]](<4 x s8>)
- ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8), [[UV6:%[0-9]+]]:_(s8), [[UV7:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[INSERT1]](<4 x s8>)
- ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UV]](s8)
- ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV4]](s8)
- ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ANYEXT]], [[ANYEXT1]]
- ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[OR]](s32)
- ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[UV1]](s8)
- ; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[UV5]](s8)
- ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ANYEXT2]], [[ANYEXT3]]
- ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[OR1]](s32)
- ; CHECK-NEXT: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[UV2]](s8)
- ; CHECK-NEXT: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[UV6]](s8)
- ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ANYEXT4]], [[ANYEXT5]]
- ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s8) = G_TRUNC [[OR2]](s32)
- ; CHECK-NEXT: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[UV3]](s8)
- ; CHECK-NEXT: [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[UV7]](s8)
- ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[ANYEXT6]], [[ANYEXT7]]
- ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s8) = G_TRUNC [[OR3]](s32)
- ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s8>) = G_BUILD_VECTOR [[TRUNC]](s8), [[TRUNC1]](s8), [[TRUNC2]](s8), [[TRUNC3]](s8)
- ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s8>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<4 x s8>), [[DEF2]](<4 x s8>), [[DEF2]](<4 x s8>)
- ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(<3 x s8>), [[UV9:%[0-9]+]]:_(<3 x s8>), [[UV10:%[0-9]+]]:_(<3 x s8>), [[UV11:%[0-9]+]]:_(<3 x s8>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s8>)
- ; CHECK-NEXT: [[ANYEXT8:%[0-9]+]]:_(<3 x s32>) = G_ANYEXT [[UV8]](<3 x s8>)
- ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[ANYEXT8]](<3 x s32>)
+ ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<4 x s32>)
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](<4 x s32>)
+ ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[UV]], [[UV4]]
+ ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[UV1]], [[UV5]]
+ ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[UV2]], [[UV6]]
+ ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32)
+ ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>)
%0:_(<3 x s8>) = G_IMPLICIT_DEF
%1:_(<3 x s8>) = G_IMPLICIT_DEF
%2:_(<3 x s8>) = G_OR %0, %1
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-phi.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-phi.mir
index 7b867c833e1fd..5be7981d5c722 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-phi.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-phi.mir
@@ -127,77 +127,77 @@ body: |
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]]
- ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[COPY]](<4 x s16>), 0
- ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0
- ; CHECK-NEXT: G_BRCOND [[ICMP]](s1), %bb.1
- ; CHECK-NEXT: G_BR %bb.2
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: bb.1:
- ; CHECK-NEXT: successors: %bb.2(0x80000000)
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0
- ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<4 x s16>)
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>)
; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32)
; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; CHECK-NEXT: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0
- ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT2]](<4 x s16>)
- ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
- ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C1]](s32)
- ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
- ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[BITCAST]], [[BITCAST2]]
- ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[LSHR]], [[LSHR1]]
- ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[BITCAST1]], [[BITCAST3]]
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
- ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ADD]], [[C2]]
- ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ADD1]], [[C2]]
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C2]]
+ ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]]
; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
- ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
- ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ADD2]], [[C2]]
- ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C3]], [[C1]](s32)
+ ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C2]]
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+ ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY2]], [[C1]](s32)
; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
- ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
- ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[DEF2]](<2 x s16>)
- ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>)
- ; CHECK-NEXT: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV4]](<3 x s16>), 0
+ ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+ ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>)
+ ; CHECK-NEXT: G_BRCOND [[ICMP]](s1), %bb.1
; CHECK-NEXT: G_BR %bb.2
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: bb.2:
- ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(<4 x s16>) = G_PHI [[INSERT]](<4 x s16>), %bb.0, [[INSERT3]](<4 x s16>), %bb.1
- ; CHECK-NEXT: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[PHI]](<4 x s16>)
- ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>)
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.2(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[BITCAST]], [[BITCAST]]
+ ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[LSHR]], [[LSHR]]
+ ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[BITCAST1]], [[BITCAST1]]
+ ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+ ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ADD]], [[C3]]
+ ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[ADD1]], [[C3]]
; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C4]](s32)
- ; CHECK-NEXT: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>)
- ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF3]](<4 x s16>)
- ; CHECK-NEXT: [[BITCAST8:%[0-9]+]]:_(s32) = G_BITCAST [[UV8]](<2 x s16>)
- ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST8]], [[C4]](s32)
- ; CHECK-NEXT: [[BITCAST9:%[0-9]+]]:_(s32) = G_BITCAST [[UV9]](<2 x s16>)
- ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
- ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST6]], [[C5]]
- ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C5]]
; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C4]](s32)
; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND3]], [[SHL2]]
- ; CHECK-NEXT: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
- ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST7]], [[C5]]
- ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[BITCAST8]], [[C5]]
- ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C4]](s32)
+ ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+ ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[ADD2]], [[C3]]
+ ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[C5]], [[C4]](s32)
; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND5]], [[SHL3]]
- ; CHECK-NEXT: [[BITCAST11:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
- ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C5]]
- ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[BITCAST9]], [[C5]]
- ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C4]](s32)
- ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL4]]
- ; CHECK-NEXT: [[BITCAST12:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32)
- ; CHECK-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST10]](<2 x s16>), [[BITCAST11]](<2 x s16>), [[BITCAST12]](<2 x s16>)
- ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS1]](<6 x s16>)
+ ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
+ ; CHECK-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>)
+ ; CHECK-NEXT: G_BR %bb.2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(<4 x s16>) = G_PHI [[CONCAT_VECTORS]](<4 x s16>), %bb.0, [[CONCAT_VECTORS1]](<4 x s16>), %bb.1
+ ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[PHI]](<4 x s16>)
+ ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
+ ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C6]](s32)
+ ; CHECK-NEXT: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
+ ; CHECK-NEXT: [[BITCAST8:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>)
+ ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST8]], [[C6]](s32)
+ ; CHECK-NEXT: [[BITCAST9:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>)
+ ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+ ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[BITCAST6]], [[C7]]
+ ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C7]]
+ ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
+ ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL4]]
+ ; CHECK-NEXT: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32)
+ ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[BITCAST7]], [[C7]]
+ ; CHECK-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[BITCAST8]], [[C7]]
+ ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C6]](s32)
+ ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL5]]
+ ; CHECK-NEXT: [[BITCAST11:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32)
+ ; CHECK-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C7]]
+ ; CHECK-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[BITCAST9]], [[C7]]
+ ; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
+ ; CHECK-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND10]], [[SHL6]]
+ ; CHECK-NEXT: [[BITCAST12:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR6]](s32)
+ ; CHECK-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST10]](<2 x s16>), [[BITCAST11]](<2 x s16>), [[BITCAST12]](<2 x s16>)
+ ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS2]](<6 x s16>)
; CHECK-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31
bb.0:
successors: %bb.1, %bb.2
@@ -252,10 +252,9 @@ body: |
; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32)
; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
- ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>)
- ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
+ ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C1]](s32)
- ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
+ ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32)
; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[BITCAST]], [[BITCAST2]]
; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[LSHR]], [[LSHR2]]
@@ -935,38 +934,39 @@ body: |
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr8
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]]
- ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(<3 x s64>) = G_EXTRACT [[COPY]](<4 x s64>), 0
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<4 x s64>)
+ ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[UV2]](s64)
; CHECK-NEXT: G_BRCOND [[ICMP]](s1), %bb.1
; CHECK-NEXT: G_BR %bb.2
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: successors: %bb.2(0x80000000)
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[EXTRACT]](<3 x s64>)
- ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[EXTRACT]](<3 x s64>)
+ ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64)
; CHECK-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64)
- ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64)
- ; CHECK-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV6]], [[UV8]]
- ; CHECK-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV7]], [[UV9]], [[UADDO1]]
+ ; CHECK-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV4]], [[UV6]]
+ ; CHECK-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV5]], [[UV7]], [[UADDO1]]
; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32)
+ ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64)
; CHECK-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64)
- ; CHECK-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV4]](s64)
- ; CHECK-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UV10]], [[UV12]]
- ; CHECK-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UV11]], [[UV13]], [[UADDO3]]
+ ; CHECK-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UV8]], [[UV10]]
+ ; CHECK-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UV9]], [[UV11]], [[UADDO3]]
; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO2]](s32), [[UADDE2]](s32)
+ ; CHECK-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64)
; CHECK-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64)
- ; CHECK-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV5]](s64)
- ; CHECK-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[UV14]], [[UV16]]
- ; CHECK-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UV15]], [[UV17]], [[UADDO5]]
+ ; CHECK-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[UV12]], [[UV14]]
+ ; CHECK-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UV13]], [[UV15]], [[UADDO5]]
; CHECK-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO4]](s32), [[UADDE4]](s32)
- ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64)
+ ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64)
; CHECK-NEXT: G_BR %bb.2
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2:
- ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(<3 x s64>) = G_PHI [[EXTRACT]](<3 x s64>), %bb.0, [[BUILD_VECTOR]](<3 x s64>), %bb.1
+ ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(<3 x s64>) = G_PHI [[BUILD_VECTOR]](<3 x s64>), %bb.0, [[BUILD_VECTOR1]](<3 x s64>), %bb.1
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[PHI]](<3 x s64>), 0
- ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>)
+ ; CHECK-NEXT: [[UV16:%[0-9]+]]:_(s64), [[UV17:%[0-9]+]]:_(s64), [[UV18:%[0-9]+]]:_(s64), [[UV19:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>)
+ ; CHECK-NEXT: [[UV20:%[0-9]+]]:_(s64), [[UV21:%[0-9]+]]:_(s64), [[UV22:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[PHI]](<3 x s64>)
+ ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV20]](s64), [[UV21]](s64), [[UV22]](s64), [[UV19]](s64)
+ ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR2]](<4 x s64>)
; CHECK-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31
bb.0:
successors: %bb.1, %bb.2
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-saddsat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-saddsat.mir
index 7b0547e8b03b2..e39849ad6dd7e 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-saddsat.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-saddsat.mir
@@ -424,11 +424,9 @@ body: |
; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
; GFX6-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; GFX6-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>)
- ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>)
- ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
- ; GFX6-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>)
- ; GFX6-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32)
+ ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
+ ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
+ ; GFX6-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[BITCAST]], [[C]](s32)
; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[C]](s32)
; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647
@@ -443,7 +441,7 @@ body: |
; GFX6-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SHL]], [[SMIN1]]
; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[ADD]], [[C]](s32)
; GFX6-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32)
- ; GFX6-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[BITCAST3]], [[C]](s32)
+ ; GFX6-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[BITCAST2]], [[C]](s32)
; GFX6-NEXT: [[SMAX2:%[0-9]+]]:_(s32) = G_SMAX [[SHL2]], [[C3]]
; GFX6-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[SMAX2]]
; GFX6-NEXT: [[SMIN2:%[0-9]+]]:_(s32) = G_SMIN [[SHL2]], [[C3]]
@@ -463,27 +461,27 @@ body: |
; GFX6-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[SHL4]], [[SMIN5]]
; GFX6-NEXT: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[ADD2]], [[C]](s32)
; GFX6-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; GFX6-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; GFX6-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>)
- ; GFX6-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32)
- ; GFX6-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>)
+ ; GFX6-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
+ ; GFX6-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
+ ; GFX6-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32)
+ ; GFX6-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>)
; GFX6-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
; GFX6-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ASHR]], [[C4]]
; GFX6-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ASHR1]], [[C4]]
; GFX6-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL6]]
- ; GFX6-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; GFX6-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
; GFX6-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ASHR2]], [[C4]]
- ; GFX6-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C4]]
+ ; GFX6-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C4]]
; GFX6-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32)
; GFX6-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL7]]
- ; GFX6-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+ ; GFX6-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
; GFX6-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C4]]
- ; GFX6-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C4]]
+ ; GFX6-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C4]]
; GFX6-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
; GFX6-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL8]]
- ; GFX6-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
- ; GFX6-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>)
+ ; GFX6-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+ ; GFX6-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>)
; GFX6-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
; GFX8-LABEL: name: saddsat_v3s16
; GFX8: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2
@@ -495,13 +493,11 @@ body: |
; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32)
- ; GFX8-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>)
- ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>)
- ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
+ ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
; GFX8-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
- ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>)
- ; GFX8-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32)
- ; GFX8-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32)
+ ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
+ ; GFX8-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32)
+ ; GFX8-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
; GFX8-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32)
; GFX8-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 32767
; GFX8-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768
@@ -528,27 +524,27 @@ body: |
; GFX8-NEXT: [[SMIN5:%[0-9]+]]:_(s16) = G_SMIN [[SMAX5]], [[SUB4]]
; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[TRUNC2]], [[SMIN5]]
; GFX8-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; GFX8-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; GFX8-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>)
- ; GFX8-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32)
- ; GFX8-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>)
+ ; GFX8-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
+ ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
+ ; GFX8-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32)
+ ; GFX8-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>)
; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ADD]](s16)
; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[ADD1]](s16)
; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32)
; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]]
- ; GFX8-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; GFX8-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[ADD2]](s16)
; GFX8-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
- ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C4]]
+ ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C4]]
; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32)
; GFX8-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]]
- ; GFX8-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+ ; GFX8-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C4]]
- ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C4]]
+ ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C4]]
; GFX8-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32)
; GFX8-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL2]]
- ; GFX8-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
- ; GFX8-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>)
+ ; GFX8-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+ ; GFX8-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>)
; GFX8-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
; GFX9-LABEL: name: saddsat_v3s16
; GFX9: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2
@@ -557,29 +553,27 @@ body: |
; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+ ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
+ ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
+ ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST]](s32), [[LSHR]](s32)
; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST1]](s32), [[DEF]](s32)
- ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>)
- ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>)
- ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
- ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>)
- ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32)
- ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR1]](s32), [[BITCAST3]](s32)
+ ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR1]](s32), [[BITCAST2]](s32)
; GFX9-NEXT: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR2]](s32), [[DEF]](s32)
; GFX9-NEXT: [[SADDSAT:%[0-9]+]]:_(<2 x s16>) = G_SADDSAT [[BUILD_VECTOR_TRUNC]], [[BUILD_VECTOR_TRUNC2]]
; GFX9-NEXT: [[SADDSAT1:%[0-9]+]]:_(<2 x s16>) = G_SADDSAT [[BUILD_VECTOR_TRUNC1]], [[BUILD_VECTOR_TRUNC3]]
+ ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[SADDSAT]](<2 x s16>)
+ ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32)
+ ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[SADDSAT1]](<2 x s16>)
; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[SADDSAT]](<2 x s16>)
- ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32)
- ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[SADDSAT1]](<2 x s16>)
- ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>)
- ; GFX9-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>)
- ; GFX9-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32)
- ; GFX9-NEXT: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>)
- ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST4]](s32), [[LSHR3]](s32)
- ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST5]](s32), [[BITCAST6]](s32)
- ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC6:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR4]](s32), [[BITCAST7]](s32)
+ ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>)
+ ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
+ ; GFX9-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32)
+ ; GFX9-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>)
+ ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST3]](s32), [[LSHR3]](s32)
+ ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST4]](s32), [[BITCAST5]](s32)
+ ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC6:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR4]](s32), [[BITCAST6]](s32)
; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC4]](<2 x s16>), [[BUILD_VECTOR_TRUNC5]](<2 x s16>), [[BUILD_VECTOR_TRUNC6]](<2 x s16>)
; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
%0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-select.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-select.mir
index cac4b9b9864ea..fa1488e87ffa4 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-select.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-select.mir
@@ -260,35 +260,23 @@ body: |
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr1_vgpr2_vgpr3
; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr4_vgpr5_vgpr6
- ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<3 x s8>) = G_TRUNC [[COPY1]](<3 x s32>)
- ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(<3 x s8>) = G_TRUNC [[COPY2]](<3 x s32>)
; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](s32), [[C]]
- ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s8>) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[DEF]], [[TRUNC]](<3 x s8>), 0
- ; CHECK-NEXT: [[INSERT1:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[DEF]], [[TRUNC1]](<3 x s8>), 0
- ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[INSERT]](<4 x s8>)
- ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8), [[UV6:%[0-9]+]]:_(s8), [[UV7:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[INSERT1]](<4 x s8>)
- ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[UV]](s8)
- ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[UV4]](s8)
- ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[ANYEXT]], [[ANYEXT1]]
- ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s8) = G_TRUNC [[SELECT]](s16)
- ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s16) = G_ANYEXT [[UV1]](s8)
- ; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(s16) = G_ANYEXT [[UV5]](s8)
- ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[ANYEXT2]], [[ANYEXT3]]
- ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s8) = G_TRUNC [[SELECT1]](s16)
- ; CHECK-NEXT: [[ANYEXT4:%[0-9]+]]:_(s16) = G_ANYEXT [[UV2]](s8)
- ; CHECK-NEXT: [[ANYEXT5:%[0-9]+]]:_(s16) = G_ANYEXT [[UV6]](s8)
- ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[ANYEXT4]], [[ANYEXT5]]
- ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s8) = G_TRUNC [[SELECT2]](s16)
- ; CHECK-NEXT: [[ANYEXT6:%[0-9]+]]:_(s16) = G_ANYEXT [[UV3]](s8)
- ; CHECK-NEXT: [[ANYEXT7:%[0-9]+]]:_(s16) = G_ANYEXT [[UV7]](s8)
- ; CHECK-NEXT: [[SELECT3:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[ANYEXT6]], [[ANYEXT7]]
- ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s8) = G_TRUNC [[SELECT3]](s16)
- ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s8>) = G_BUILD_VECTOR [[TRUNC2]](s8), [[TRUNC3]](s8), [[TRUNC4]](s8), [[TRUNC5]](s8)
- ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s8>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<4 x s8>), [[DEF]](<4 x s8>), [[DEF]](<4 x s8>)
- ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(<3 x s8>), [[UV9:%[0-9]+]]:_(<3 x s8>), [[UV10:%[0-9]+]]:_(<3 x s8>), [[UV11:%[0-9]+]]:_(<3 x s8>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s8>)
- ; CHECK-NEXT: [[ANYEXT8:%[0-9]+]]:_(<3 x s32>) = G_ANYEXT [[UV8]](<3 x s8>)
- ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[ANYEXT8]](<3 x s32>)
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>)
+ ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<3 x s32>)
+ ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32)
+ ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[UV3]](s32)
+ ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[TRUNC]], [[TRUNC1]]
+ ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32)
+ ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[UV4]](s32)
+ ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[TRUNC2]], [[TRUNC3]]
+ ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[UV2]](s32)
+ ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[UV5]](s32)
+ ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[TRUNC4]], [[TRUNC5]]
+ ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT]](s16)
+ ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT1]](s16)
+ ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT2]](s16)
+ ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32)
+ ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>)
%0:_(s32) = COPY $vgpr0
%1:_(s32) = G_CONSTANT i32 0
%2:_(<3 x s32>) = COPY $vgpr1_vgpr2_vgpr3
@@ -384,40 +372,65 @@ body: |
; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr6
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY2]](s32), [[C]]
- ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>)
- ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>)
- ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV]](<3 x s16>), 0
- ; CHECK-NEXT: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV2]](<3 x s16>), 0
- ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(<4 x s16>) = G_SELECT [[ICMP]](s1), [[INSERT]], [[INSERT1]]
- ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[SELECT]](<4 x s16>)
- ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>)
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>)
+ ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32)
- ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>)
- ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>)
- ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>)
- ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C1]](s32)
- ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>)
+ ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C2]]
; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]]
; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
- ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C2]]
- ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C2]]
- ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C1]](s32)
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+ ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY3]], [[C1]](s32)
; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
- ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+ ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+ ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>)
+ ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>)
+ ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
+ ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C1]](s32)
+ ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>)
+ ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C2]]
; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C2]]
- ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C2]]
- ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C1]](s32)
- ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+ ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C1]](s32)
+ ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND3]], [[SHL2]]
; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
- ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>)
- ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
+ ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C2]]
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+ ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[COPY4]], [[C1]](s32)
+ ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND5]], [[SHL3]]
+ ; CHECK-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
+ ; CHECK-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>)
+ ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(<4 x s16>) = G_SELECT [[ICMP]](s1), [[CONCAT_VECTORS]], [[CONCAT_VECTORS1]]
+ ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[SELECT]](<4 x s16>)
+ ; CHECK-NEXT: [[BITCAST8:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>)
+ ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST8]], [[C1]](s32)
+ ; CHECK-NEXT: [[BITCAST9:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>)
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
+ ; CHECK-NEXT: [[BITCAST10:%[0-9]+]]:_(s32) = G_BITCAST [[UV8]](<2 x s16>)
+ ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST10]], [[C1]](s32)
+ ; CHECK-NEXT: [[BITCAST11:%[0-9]+]]:_(s32) = G_BITCAST [[UV9]](<2 x s16>)
+ ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[BITCAST8]], [[C2]]
+ ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C2]]
+ ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C1]](s32)
+ ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL4]]
+ ; CHECK-NEXT: [[BITCAST12:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32)
+ ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[BITCAST9]], [[C2]]
+ ; CHECK-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[BITCAST10]], [[C2]]
+ ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C1]](s32)
+ ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL5]]
+ ; CHECK-NEXT: [[BITCAST13:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32)
+ ; CHECK-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C2]]
+ ; CHECK-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[BITCAST11]], [[C2]]
+ ; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C1]](s32)
+ ; CHECK-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND10]], [[SHL6]]
+ ; CHECK-NEXT: [[BITCAST14:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR6]](s32)
+ ; CHECK-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST12]](<2 x s16>), [[BITCAST13]](<2 x s16>), [[BITCAST14]](<2 x s16>)
+ ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS2]](<6 x s16>)
%0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2
%1:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5
%2:_(s32) = COPY $vgpr6
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sext-inreg.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sext-inreg.mir
index 46a4616d19330..5529b114ac7bb 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sext-inreg.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sext-inreg.mir
@@ -705,10 +705,10 @@ body: |
; GFX9-NEXT: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY2]](s32), [[COPY3]](s32)
; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(<2 x s16>) = G_SHL [[BUILD_VECTOR_TRUNC1]], [[BUILD_VECTOR_TRUNC3]](<2 x s16>)
; GFX9-NEXT: [[ASHR1:%[0-9]+]]:_(<2 x s16>) = G_ASHR [[SHL1]], [[BUILD_VECTOR_TRUNC3]](<2 x s16>)
- ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[ASHR]](<2 x s16>)
; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[ASHR1]](<2 x s16>)
+ ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
; GFX9-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>)
; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sext.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sext.mir
index 09064e3dec7dc..088d586cae03c 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sext.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sext.mir
@@ -109,10 +109,7 @@ body: |
; CHECK-LABEL: name: test_sext_v3s16_to_v3s32
; CHECK: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1
- ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[COPY]](<4 x s16>), 0
- ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0
- ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>)
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>)
; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-shl.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-shl.mir
index b406ed427ab03..a1a14756cf6f5 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-shl.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-shl.mir
@@ -1,8 +1,8 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -O0 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=SI %s
-# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=VI %s
-# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -O0 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=GFX9 %s
-# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -O0 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=GFX9 %s
+# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -O0 -run-pass=legalizer %s -o - | FileCheck -check-prefix=SI %s
+# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer %s -o - | FileCheck -check-prefix=VI %s
+# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -O0 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s
+# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -O0 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s
---
name: test_shl_s32_s32
@@ -405,43 +405,40 @@ body: |
; SI-LABEL: name: test_shl_v3s64_v3s32
; SI: [[COPY:%[0-9]+]]:_(<4 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
- ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<3 x s64>) = G_EXTRACT [[COPY]](<4 x s64>), 0
+ ; SI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<4 x s64>)
; SI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr8_vgpr9_vgpr10
- ; SI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[EXTRACT]](<3 x s64>)
- ; SI-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>)
- ; SI-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV]], [[UV3]](s32)
- ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[UV1]], [[UV4]](s32)
- ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[UV2]], [[UV5]](s32)
- ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[SHL]](s64), [[SHL1]](s64), [[SHL2]](s64)
+ ; SI-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>)
+ ; SI-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV]], [[UV4]](s32)
+ ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[UV1]], [[UV5]](s32)
+ ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[UV2]], [[UV6]](s32)
; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
- ; SI-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0
- ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>)
+ ; SI-NEXT: [[UV7:%[0-9]+]]:_(s64), [[UV8:%[0-9]+]]:_(s64), [[UV9:%[0-9]+]]:_(s64), [[UV10:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>)
+ ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[SHL]](s64), [[SHL1]](s64), [[SHL2]](s64), [[UV10]](s64)
+ ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>)
; VI-LABEL: name: test_shl_v3s64_v3s32
; VI: [[COPY:%[0-9]+]]:_(<4 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
- ; VI-NEXT: [[EXTRACT:%[0-9]+]]:_(<3 x s64>) = G_EXTRACT [[COPY]](<4 x s64>), 0
+ ; VI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<4 x s64>)
; VI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr8_vgpr9_vgpr10
- ; VI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[EXTRACT]](<3 x s64>)
- ; VI-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>)
- ; VI-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV]], [[UV3]](s32)
- ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[UV1]], [[UV4]](s32)
- ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[UV2]], [[UV5]](s32)
- ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[SHL]](s64), [[SHL1]](s64), [[SHL2]](s64)
+ ; VI-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>)
+ ; VI-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV]], [[UV4]](s32)
+ ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[UV1]], [[UV5]](s32)
+ ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[UV2]], [[UV6]](s32)
; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
- ; VI-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0
- ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>)
+ ; VI-NEXT: [[UV7:%[0-9]+]]:_(s64), [[UV8:%[0-9]+]]:_(s64), [[UV9:%[0-9]+]]:_(s64), [[UV10:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>)
+ ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[SHL]](s64), [[SHL1]](s64), [[SHL2]](s64), [[UV10]](s64)
+ ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>)
; GFX9-LABEL: name: test_shl_v3s64_v3s32
; GFX9: [[COPY:%[0-9]+]]:_(<4 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
- ; GFX9-NEXT: [[EXTRACT:%[0-9]+]]:_(<3 x s64>) = G_EXTRACT [[COPY]](<4 x s64>), 0
+ ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<4 x s64>)
; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr8_vgpr9_vgpr10
- ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[EXTRACT]](<3 x s64>)
- ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>)
- ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV]], [[UV3]](s32)
- ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[UV1]], [[UV4]](s32)
- ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[UV2]], [[UV5]](s32)
- ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[SHL]](s64), [[SHL1]](s64), [[SHL2]](s64)
+ ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>)
+ ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV]], [[UV4]](s32)
+ ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[UV1]], [[UV5]](s32)
+ ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[UV2]], [[UV6]](s32)
; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
- ; GFX9-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0
- ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>)
+ ; GFX9-NEXT: [[UV7:%[0-9]+]]:_(s64), [[UV8:%[0-9]+]]:_(s64), [[UV9:%[0-9]+]]:_(s64), [[UV10:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>)
+ ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[SHL]](s64), [[SHL1]](s64), [[SHL2]](s64), [[UV10]](s64)
+ ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>)
%0:_(<4 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
%1:_(<3 x s64>) = G_EXTRACT %0, 0
%2:_(<3 x s32>) = COPY $vgpr8_vgpr9_vgpr10
@@ -664,32 +661,33 @@ body: |
; GFX9-LABEL: name: test_shl_v3s16_v3s16
; GFX9: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2
; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5
- ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>)
- ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>)
- ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; GFX9-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV]](<3 x s16>), 0
- ; GFX9-NEXT: [[EXTRACT:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[INSERT]](<4 x s16>), 0
- ; GFX9-NEXT: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV]](<3 x s16>), 0
- ; GFX9-NEXT: [[EXTRACT1:%[0-9]+]]:_(s16) = G_EXTRACT [[INSERT1]](<4 x s16>), 32
- ; GFX9-NEXT: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV2]](<3 x s16>), 0
- ; GFX9-NEXT: [[EXTRACT2:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[INSERT2]](<4 x s16>), 0
- ; GFX9-NEXT: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV2]](<3 x s16>), 0
- ; GFX9-NEXT: [[EXTRACT3:%[0-9]+]]:_(s16) = G_EXTRACT [[INSERT3]](<4 x s16>), 32
- ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(<2 x s16>) = G_SHL [[EXTRACT]], [[EXTRACT2]](<2 x s16>)
- ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[EXTRACT1]], [[EXTRACT3]](s16)
- ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[SHL]](<2 x s16>)
+ ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>)
+ ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
- ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>)
- ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>)
- ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
- ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>)
+ ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+ ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32)
; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST]](s32), [[LSHR]](s32)
+ ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>)
+ ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
+ ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
+ ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>)
+ ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32)
+ ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST2]](s32), [[LSHR1]](s32)
+ ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(<2 x s16>) = G_SHL [[BUILD_VECTOR_TRUNC]], [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
+ ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[TRUNC1]](s16)
+ ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[SHL]](<2 x s16>)
+ ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32)
+ ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+ ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
+ ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>)
+ ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32)
+ ; GFX9-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>)
+ ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST4]](s32), [[LSHR2]](s32)
; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SHL1]](s16)
- ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[BITCAST1]](s32)
- ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR1]](s32), [[BITCAST2]](s32)
- ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>)
+ ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[BITCAST5]](s32)
+ ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR3]](s32), [[BITCAST6]](s32)
+ ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[BUILD_VECTOR_TRUNC3]](<2 x s16>), [[BUILD_VECTOR_TRUNC4]](<2 x s16>)
; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
%0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2
%1:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-shuffle-vector.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-shuffle-vector.mir
index 8dc47c4e362d1..8bbde77b42697 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-shuffle-vector.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-shuffle-vector.mir
@@ -35,9 +35,11 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3
- ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY]](<2 x s32>), 0
- ; CHECK-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY]](<2 x s32>), 32
- ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[EXTRACT]](s32), [[EXTRACT1]](s32)
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32)
+ ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV3]](s32)
+ ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY3]](s32)
; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
%0:_(<2 x s32>) = COPY $vgpr0_vgpr1
%1:_(<2 x s32>) = COPY $vgpr2_vgpr3
@@ -59,9 +61,11 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3
- ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY]](<2 x s32>), 32
- ; CHECK-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY]](<2 x s32>), 0
- ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[EXTRACT]](s32), [[EXTRACT1]](s32)
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV1]](s32)
+ ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV2]](s32)
+ ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY3]](s32)
; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
%0:_(<2 x s32>) = COPY $vgpr0_vgpr1
%1:_(<2 x s32>) = COPY $vgpr2_vgpr3
@@ -83,9 +87,11 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3
- ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY]](<2 x s32>), 0
- ; CHECK-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY]](<2 x s32>), 0
- ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[EXTRACT]](s32), [[EXTRACT1]](s32)
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32)
+ ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV2]](s32)
+ ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY3]](s32)
; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
%0:_(<2 x s32>) = COPY $vgpr0_vgpr1
%1:_(<2 x s32>) = COPY $vgpr2_vgpr3
@@ -131,8 +137,9 @@ body: |
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY]](<2 x s32>), 0
- ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[DEF]](s32), [[EXTRACT]](s32)
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32)
+ ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[DEF]](s32), [[COPY2]](s32)
; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
%0:_(<2 x s32>) = COPY $vgpr0_vgpr1
%1:_(<2 x s32>) = COPY $vgpr2_vgpr3
@@ -154,9 +161,10 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3
- ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY]](<2 x s32>), 0
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32)
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[EXTRACT]](s32), [[DEF]](s32)
+ ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[DEF]](s32)
; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
%0:_(<2 x s32>) = COPY $vgpr0_vgpr1
%1:_(<2 x s32>) = COPY $vgpr2_vgpr3
@@ -178,10 +186,13 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5
- ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](<3 x s32>), 0
- ; CHECK-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY]](<3 x s32>), 64
- ; CHECK-NEXT: [[EXTRACT2:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY]](<3 x s32>), 32
- ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[EXTRACT]](s32), [[EXTRACT1]](s32), [[EXTRACT2]](s32)
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>)
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32)
+ ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>)
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV5]](s32)
+ ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>)
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV7]](s32)
+ ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32)
; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>)
%0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2
%1:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5
@@ -203,9 +214,11 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5
- ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY]](<3 x s32>), 64
- ; CHECK-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY]](<3 x s32>), 32
- ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[EXTRACT]](s32), [[EXTRACT1]](s32)
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>)
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV2]](s32)
+ ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>)
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV4]](s32)
+ ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY3]](s32)
; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
%0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2
%1:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5
@@ -293,50 +306,29 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3
- ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[COPY]](<4 x s16>), 0
- ; CHECK-NEXT: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[COPY1]](<4 x s16>), 0
- ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT1]](<3 x s16>), 0
- ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>)
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>)
; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
- ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[BITCAST]](s32), [[LSHR]](s32), [[BITCAST1]](s32)
- ; CHECK-NEXT: [[EXTRACT2:%[0-9]+]]:_(s32) = G_EXTRACT [[BUILD_VECTOR]](<3 x s32>), 64
- ; CHECK-NEXT: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0
- ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<4 x s16>)
- ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
- ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
- ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
- ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[BITCAST2]](s32), [[LSHR1]](s32), [[BITCAST3]](s32)
- ; CHECK-NEXT: [[EXTRACT3:%[0-9]+]]:_(s32) = G_EXTRACT [[BUILD_VECTOR1]](<3 x s32>), 32
- ; CHECK-NEXT: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT1]](<3 x s16>), 0
- ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT2]](<4 x s16>)
- ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>)
- ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32)
- ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>)
- ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[BITCAST4]](s32), [[LSHR2]](s32), [[BITCAST5]](s32)
- ; CHECK-NEXT: [[EXTRACT4:%[0-9]+]]:_(s32) = G_EXTRACT [[BUILD_VECTOR2]](<3 x s32>), 0
- ; CHECK-NEXT: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0
- ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT3]](<4 x s16>)
- ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>)
- ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32)
- ; CHECK-NEXT: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>)
- ; CHECK-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[BITCAST6]](s32), [[LSHR3]](s32), [[BITCAST7]](s32)
- ; CHECK-NEXT: [[EXTRACT5:%[0-9]+]]:_(s32) = G_EXTRACT [[BUILD_VECTOR3]](<3 x s32>), 0
+ ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>)
+ ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
+ ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32)
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32)
+ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
- ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[EXTRACT2]], [[C1]]
- ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[EXTRACT3]], [[C1]]
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]]
+ ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]]
; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
- ; CHECK-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
- ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[EXTRACT4]], [[C1]]
- ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[EXTRACT5]], [[C1]]
+ ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]]
+ ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]]
; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32)
; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
- ; CHECK-NEXT: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
- ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST8]](<2 x s16>), [[BITCAST9]](<2 x s16>)
+ ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+ ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>)
; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>)
%0:_(<4 x s16>) = COPY $vgpr0_vgpr1
%1:_(<4 x s16>) = COPY $vgpr2_vgpr3
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-shuffle-vector.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-shuffle-vector.s16.mir
index e5a21d494c4c6..faff7290f6d55 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-shuffle-vector.s16.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-shuffle-vector.s16.mir
@@ -862,66 +862,48 @@ body: |
; GFX8-NEXT: {{ $}}
; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1
; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3
- ; GFX8-NEXT: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[COPY]](<4 x s16>), 0
- ; GFX8-NEXT: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[COPY1]](<4 x s16>), 0
- ; GFX8-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; GFX8-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT1]](<3 x s16>), 0
- ; GFX8-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>)
+ ; GFX8-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>)
; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
- ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[BITCAST]](s32), [[LSHR]](s32), [[BITCAST1]](s32)
- ; GFX8-NEXT: [[EXTRACT2:%[0-9]+]]:_(s32) = G_EXTRACT [[BUILD_VECTOR]](<3 x s32>), 64
- ; GFX8-NEXT: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0
- ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<4 x s16>)
- ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
- ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
- ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
- ; GFX8-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[BITCAST2]](s32), [[LSHR1]](s32), [[BITCAST3]](s32)
- ; GFX8-NEXT: [[EXTRACT3:%[0-9]+]]:_(s32) = G_EXTRACT [[BUILD_VECTOR1]](<3 x s32>), 32
- ; GFX8-NEXT: [[EXTRACT4:%[0-9]+]]:_(s32) = G_EXTRACT [[BUILD_VECTOR]](<3 x s32>), 0
- ; GFX8-NEXT: [[EXTRACT5:%[0-9]+]]:_(s32) = G_EXTRACT [[BUILD_VECTOR1]](<3 x s32>), 0
+ ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>)
+ ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
+ ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
+ ; GFX8-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32)
+ ; GFX8-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+ ; GFX8-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32)
+ ; GFX8-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
- ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[EXTRACT2]], [[C1]]
- ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[EXTRACT3]], [[C1]]
+ ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]]
+ ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]]
; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
- ; GFX8-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
- ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[EXTRACT4]], [[C1]]
- ; GFX8-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[EXTRACT5]], [[C1]]
+ ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]]
+ ; GFX8-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]]
; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32)
; GFX8-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
- ; GFX8-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
- ; GFX8-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>)
+ ; GFX8-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+ ; GFX8-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>)
; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>)
; GFX9-LABEL: name: shufflevector_v4s16_v3s16_2_0
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
; GFX9-NEXT: {{ $}}
; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1
; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3
- ; GFX9-NEXT: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[COPY]](<4 x s16>), 0
- ; GFX9-NEXT: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[COPY1]](<4 x s16>), 0
- ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; GFX9-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT1]](<3 x s16>), 0
- ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>)
+ ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>)
; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
- ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[BITCAST]](s32), [[LSHR]](s32), [[BITCAST1]](s32)
- ; GFX9-NEXT: [[EXTRACT2:%[0-9]+]]:_(s32) = G_EXTRACT [[BUILD_VECTOR]](<3 x s32>), 64
- ; GFX9-NEXT: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0
- ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<4 x s16>)
- ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
- ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
- ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
- ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[BITCAST2]](s32), [[LSHR1]](s32), [[BITCAST3]](s32)
- ; GFX9-NEXT: [[EXTRACT3:%[0-9]+]]:_(s32) = G_EXTRACT [[BUILD_VECTOR1]](<3 x s32>), 32
- ; GFX9-NEXT: [[EXTRACT4:%[0-9]+]]:_(s32) = G_EXTRACT [[BUILD_VECTOR]](<3 x s32>), 0
- ; GFX9-NEXT: [[EXTRACT5:%[0-9]+]]:_(s32) = G_EXTRACT [[BUILD_VECTOR1]](<3 x s32>), 0
- ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[EXTRACT2]](s32), [[EXTRACT3]](s32)
- ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[EXTRACT4]](s32), [[EXTRACT5]](s32)
+ ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>)
+ ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
+ ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
+ ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32)
+ ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+ ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32)
+ ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
+ ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY2]](s32), [[COPY3]](s32)
+ ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY4]](s32), [[COPY5]](s32)
; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>)
%0:_(<4 x s16>) = COPY $vgpr0_vgpr1
@@ -948,12 +930,14 @@ body: |
; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3
; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[COPY]](<4 x s16>)
; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; GFX8-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<2 x s32>), 0
+ ; GFX8-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<2 x s32>)
+ ; GFX8-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32)
; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[EXTRACT]], [[C1]](s32)
+ ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C1]](s32)
; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[COPY]](<4 x s16>)
- ; GFX8-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST1]](<2 x s32>), 0
- ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[EXTRACT1]], [[C]](s32)
+ ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST1]](<2 x s32>)
+ ; GFX8-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV2]](s32)
+ ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C]](s32)
; GFX8-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]]
; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C2]]
@@ -968,12 +952,14 @@ body: |
; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3
; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[COPY]](<4 x s16>)
; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; GFX9-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<2 x s32>), 0
+ ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<2 x s32>)
+ ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32)
; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[EXTRACT]], [[C1]](s32)
+ ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C1]](s32)
; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[COPY]](<4 x s16>)
- ; GFX9-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST1]](<2 x s32>), 0
- ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[EXTRACT1]], [[C]](s32)
+ ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST1]](<2 x s32>)
+ ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV2]](s32)
+ ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C]](s32)
; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR]](s32), [[LSHR1]](s32)
; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>)
%0:_(<4 x s16>) = COPY $vgpr0_vgpr1
@@ -997,12 +983,14 @@ body: |
; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1
; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3
; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[COPY]](<4 x s16>)
- ; GFX8-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<2 x s32>), 0
+ ; GFX8-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<2 x s32>)
+ ; GFX8-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32)
; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[EXTRACT]], [[C]](s32)
+ ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32)
; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[COPY]](<4 x s16>)
- ; GFX8-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST1]](<2 x s32>), 32
- ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[EXTRACT1]], [[C]](s32)
+ ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST1]](<2 x s32>)
+ ; GFX8-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV3]](s32)
+ ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C]](s32)
; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]]
; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]]
@@ -1016,12 +1004,14 @@ body: |
; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1
; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3
; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[COPY]](<4 x s16>)
- ; GFX9-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<2 x s32>), 0
+ ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<2 x s32>)
+ ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32)
; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[EXTRACT]], [[C]](s32)
+ ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32)
; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[COPY]](<4 x s16>)
- ; GFX9-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST1]](<2 x s32>), 32
- ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[EXTRACT1]], [[C]](s32)
+ ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST1]](<2 x s32>)
+ ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV3]](s32)
+ ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C]](s32)
; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR]](s32), [[LSHR1]](s32)
; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>)
%0:_(<4 x s16>) = COPY $vgpr0_vgpr1
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-smax.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-smax.mir
index f336e7b5aa8f7..ce0afe0b50cbe 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-smax.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-smax.mir
@@ -1,8 +1,8 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=SI %s
-# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=VI %s
-# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=GFX9 %s
-# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=GFX9 %s
+# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=legalizer %s -o - | FileCheck -check-prefix=SI %s
+# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer %s -o - | FileCheck -check-prefix=VI %s
+# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s
+# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s
---
name: test_smax_s32
@@ -327,12 +327,12 @@ body: |
; SI-LABEL: name: test_smax_v3s16
; SI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; SI-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+ ; SI-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
; SI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>)
; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
@@ -350,7 +350,6 @@ body: |
; SI-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>)
; VI-LABEL: name: test_smax_v3s16
; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; VI-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
@@ -359,6 +358,7 @@ body: |
; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32)
+ ; VI-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
; VI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>)
; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32)
@@ -376,23 +376,27 @@ body: |
; VI-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>)
; GFX9-LABEL: name: test_smax_v3s16
; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
- ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>)
- ; GFX9-NEXT: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; GFX9-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF2]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
- ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>)
- ; GFX9-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0
- ; GFX9-NEXT: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV4]](<3 x s16>), 0
- ; GFX9-NEXT: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>)
- ; GFX9-NEXT: [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<4 x s16>)
- ; GFX9-NEXT: [[SMAX:%[0-9]+]]:_(<2 x s16>) = G_SMAX [[UV8]], [[UV10]]
- ; GFX9-NEXT: [[SMAX1:%[0-9]+]]:_(<2 x s16>) = G_SMAX [[UV9]], [[UV11]]
- ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[SMAX]](<2 x s16>)
+ ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
+ ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
- ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[SMAX1]](<2 x s16>)
- ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[BITCAST]](s32), [[LSHR]](s32), [[BITCAST1]](s32)
+ ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+ ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+ ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>)
+ ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
+ ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
+ ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
+ ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST]](s32), [[LSHR]](s32)
+ ; GFX9-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST1]](s32), [[DEF2]](s32)
+ ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST2]](s32), [[LSHR1]](s32)
+ ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST3]](s32), [[DEF2]](s32)
+ ; GFX9-NEXT: [[SMAX:%[0-9]+]]:_(<2 x s16>) = G_SMAX [[BUILD_VECTOR_TRUNC]], [[BUILD_VECTOR_TRUNC2]]
+ ; GFX9-NEXT: [[SMAX1:%[0-9]+]]:_(<2 x s16>) = G_SMAX [[BUILD_VECTOR_TRUNC1]], [[BUILD_VECTOR_TRUNC3]]
+ ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[SMAX]](<2 x s16>)
+ ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32)
+ ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[SMAX1]](<2 x s16>)
+ ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[BITCAST4]](s32), [[LSHR2]](s32), [[BITCAST5]](s32)
; GFX9-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>)
%0:_(<3 x s16>) = G_IMPLICIT_DEF
%1:_(<3 x s16>) = G_IMPLICIT_DEF
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-smin.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-smin.mir
index 60ae4743796cc..08df7e1697344 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-smin.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-smin.mir
@@ -1,8 +1,8 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=SI %s
-# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=VI %s
-# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=GFX9 %s
-# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=GFX9 %s
+# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=legalizer %s -o - | FileCheck -check-prefix=SI %s
+# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer %s -o - | FileCheck -check-prefix=VI %s
+# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s
+# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s
---
name: test_smin_s32
@@ -327,12 +327,12 @@ body: |
; SI-LABEL: name: test_smin_v3s16
; SI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; SI-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+ ; SI-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
; SI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>)
; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
@@ -350,7 +350,6 @@ body: |
; SI-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>)
; VI-LABEL: name: test_smin_v3s16
; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; VI-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
@@ -359,6 +358,7 @@ body: |
; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32)
+ ; VI-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
; VI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>)
; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32)
@@ -376,23 +376,27 @@ body: |
; VI-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>)
; GFX9-LABEL: name: test_smin_v3s16
; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
- ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>)
- ; GFX9-NEXT: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; GFX9-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF2]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
- ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>)
- ; GFX9-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0
- ; GFX9-NEXT: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV4]](<3 x s16>), 0
- ; GFX9-NEXT: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>)
- ; GFX9-NEXT: [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<4 x s16>)
- ; GFX9-NEXT: [[SMIN:%[0-9]+]]:_(<2 x s16>) = G_SMIN [[UV8]], [[UV10]]
- ; GFX9-NEXT: [[SMIN1:%[0-9]+]]:_(<2 x s16>) = G_SMIN [[UV9]], [[UV11]]
- ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[SMIN]](<2 x s16>)
+ ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
+ ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
- ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[SMIN1]](<2 x s16>)
- ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[BITCAST]](s32), [[LSHR]](s32), [[BITCAST1]](s32)
+ ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+ ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+ ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>)
+ ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
+ ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
+ ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
+ ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST]](s32), [[LSHR]](s32)
+ ; GFX9-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST1]](s32), [[DEF2]](s32)
+ ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST2]](s32), [[LSHR1]](s32)
+ ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST3]](s32), [[DEF2]](s32)
+ ; GFX9-NEXT: [[SMIN:%[0-9]+]]:_(<2 x s16>) = G_SMIN [[BUILD_VECTOR_TRUNC]], [[BUILD_VECTOR_TRUNC2]]
+ ; GFX9-NEXT: [[SMIN1:%[0-9]+]]:_(<2 x s16>) = G_SMIN [[BUILD_VECTOR_TRUNC1]], [[BUILD_VECTOR_TRUNC3]]
+ ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[SMIN]](<2 x s16>)
+ ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32)
+ ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[SMIN1]](<2 x s16>)
+ ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[BITCAST4]](s32), [[LSHR2]](s32), [[BITCAST5]](s32)
; GFX9-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>)
%0:_(<3 x s16>) = G_IMPLICIT_DEF
%1:_(<3 x s16>) = G_IMPLICIT_DEF
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sshlsat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sshlsat.mir
index 21a553538466c..ddfa0e0669c3f 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sshlsat.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sshlsat.mir
@@ -479,11 +479,9 @@ body: |
; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
; GFX6-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; GFX6-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>)
- ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>)
- ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
- ; GFX6-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>)
- ; GFX6-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32)
+ ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
+ ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
+ ; GFX6-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
; GFX6-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]]
; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[BITCAST]], [[C]](s32)
@@ -497,7 +495,7 @@ body: |
; GFX6-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL]](s32), [[ASHR]]
; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SELECT]], [[SHL1]]
; GFX6-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SELECT1]], [[C]](s32)
- ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]]
+ ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]]
; GFX6-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32)
; GFX6-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[SHL2]], [[AND1]](s32)
; GFX6-NEXT: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[SHL3]], [[AND1]](s32)
@@ -516,26 +514,26 @@ body: |
; GFX6-NEXT: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SELECT4]], [[SHL5]]
; GFX6-NEXT: [[ASHR5:%[0-9]+]]:_(s32) = G_ASHR [[SELECT5]], [[C]](s32)
; GFX6-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; GFX6-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; GFX6-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>)
- ; GFX6-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32)
- ; GFX6-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>)
+ ; GFX6-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
+ ; GFX6-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
+ ; GFX6-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32)
+ ; GFX6-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>)
; GFX6-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ASHR1]], [[C1]]
; GFX6-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[ASHR3]], [[C1]]
; GFX6-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C]](s32)
; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND3]], [[SHL6]]
- ; GFX6-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; GFX6-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
; GFX6-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[ASHR5]], [[C1]]
- ; GFX6-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]]
+ ; GFX6-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]]
; GFX6-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C]](s32)
; GFX6-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND5]], [[SHL7]]
- ; GFX6-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+ ; GFX6-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
; GFX6-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C1]]
- ; GFX6-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]]
+ ; GFX6-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]]
; GFX6-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C]](s32)
; GFX6-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL8]]
- ; GFX6-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
- ; GFX6-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>)
+ ; GFX6-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+ ; GFX6-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>)
; GFX6-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
; GFX8-LABEL: name: sshlsat_v3s16
; GFX8: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2
@@ -547,13 +545,11 @@ body: |
; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32)
- ; GFX8-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>)
- ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>)
- ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
+ ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
; GFX8-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
- ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>)
- ; GFX8-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32)
- ; GFX8-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32)
+ ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
+ ; GFX8-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32)
+ ; GFX8-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
; GFX8-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32)
; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[TRUNC3]](s16)
; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SHL]], [[TRUNC3]](s16)
@@ -577,27 +573,27 @@ body: |
; GFX8-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC2]](s16), [[ASHR2]]
; GFX8-NEXT: [[SELECT5:%[0-9]+]]:_(s16) = G_SELECT [[ICMP5]](s1), [[SELECT4]], [[SHL2]]
; GFX8-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; GFX8-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; GFX8-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>)
- ; GFX8-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32)
- ; GFX8-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>)
+ ; GFX8-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
+ ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
+ ; GFX8-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32)
+ ; GFX8-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>)
; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[SELECT1]](s16)
; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[SELECT3]](s16)
; GFX8-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32)
; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL3]]
- ; GFX8-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; GFX8-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[SELECT5]](s16)
; GFX8-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
- ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C4]]
+ ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C4]]
; GFX8-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32)
; GFX8-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL4]]
- ; GFX8-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+ ; GFX8-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C4]]
- ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C4]]
+ ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C4]]
; GFX8-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32)
; GFX8-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL5]]
- ; GFX8-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
- ; GFX8-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>)
+ ; GFX8-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+ ; GFX8-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>)
; GFX8-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
; GFX9-LABEL: name: sshlsat_v3s16
; GFX9: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2
@@ -609,13 +605,11 @@ body: |
; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32)
- ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>)
- ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>)
- ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
+ ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
- ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>)
- ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32)
- ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32)
+ ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
+ ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32)
+ ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32)
; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[TRUNC3]](s16)
; GFX9-NEXT: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SHL]], [[TRUNC3]](s16)
@@ -639,16 +633,16 @@ body: |
; GFX9-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC2]](s16), [[ASHR2]]
; GFX9-NEXT: [[SELECT5:%[0-9]+]]:_(s16) = G_SELECT [[ICMP5]](s1), [[SELECT4]], [[SHL2]]
; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>)
- ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32)
- ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>)
+ ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
+ ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
+ ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32)
+ ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>)
; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT1]](s16)
; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT3]](s16)
; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32)
; GFX9-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT5]](s16)
- ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[BITCAST4]](s32)
- ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR3]](s32), [[BITCAST5]](s32)
+ ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[BITCAST3]](s32)
+ ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR3]](s32), [[BITCAST4]](s32)
; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>)
; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
%0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ssubsat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ssubsat.mir
index e2516e5d79f71..d6ae62a28a932 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ssubsat.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ssubsat.mir
@@ -424,11 +424,9 @@ body: |
; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
; GFX6-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; GFX6-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>)
- ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>)
- ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
- ; GFX6-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>)
- ; GFX6-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32)
+ ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
+ ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
+ ; GFX6-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[BITCAST]], [[C]](s32)
; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[C]](s32)
; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647
@@ -443,7 +441,7 @@ body: |
; GFX6-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SHL]], [[SMIN1]]
; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SUB2]], [[C]](s32)
; GFX6-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32)
- ; GFX6-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[BITCAST3]], [[C]](s32)
+ ; GFX6-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[BITCAST2]], [[C]](s32)
; GFX6-NEXT: [[SMAX2:%[0-9]+]]:_(s32) = G_SMAX [[SHL2]], [[C3]]
; GFX6-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SMAX2]], [[C1]]
; GFX6-NEXT: [[SMIN2:%[0-9]+]]:_(s32) = G_SMIN [[SHL2]], [[C3]]
@@ -463,27 +461,27 @@ body: |
; GFX6-NEXT: [[SUB8:%[0-9]+]]:_(s32) = G_SUB [[SHL4]], [[SMIN5]]
; GFX6-NEXT: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[SUB8]], [[C]](s32)
; GFX6-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; GFX6-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; GFX6-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>)
- ; GFX6-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32)
- ; GFX6-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>)
+ ; GFX6-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
+ ; GFX6-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
+ ; GFX6-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32)
+ ; GFX6-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>)
; GFX6-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
; GFX6-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ASHR]], [[C4]]
; GFX6-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ASHR1]], [[C4]]
; GFX6-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL6]]
- ; GFX6-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; GFX6-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
; GFX6-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ASHR2]], [[C4]]
- ; GFX6-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C4]]
+ ; GFX6-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C4]]
; GFX6-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32)
; GFX6-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL7]]
- ; GFX6-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+ ; GFX6-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
; GFX6-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C4]]
- ; GFX6-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C4]]
+ ; GFX6-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C4]]
; GFX6-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
; GFX6-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL8]]
- ; GFX6-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
- ; GFX6-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>)
+ ; GFX6-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+ ; GFX6-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>)
; GFX6-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
; GFX8-LABEL: name: ssubsat_v3s16
; GFX8: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2
@@ -495,13 +493,11 @@ body: |
; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32)
- ; GFX8-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>)
- ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>)
- ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
+ ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
; GFX8-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
- ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>)
- ; GFX8-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32)
- ; GFX8-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32)
+ ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
+ ; GFX8-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32)
+ ; GFX8-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
; GFX8-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32)
; GFX8-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 32767
; GFX8-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768
@@ -528,27 +524,27 @@ body: |
; GFX8-NEXT: [[SMIN5:%[0-9]+]]:_(s16) = G_SMIN [[SMAX5]], [[SUB7]]
; GFX8-NEXT: [[SUB8:%[0-9]+]]:_(s16) = G_SUB [[TRUNC2]], [[SMIN5]]
; GFX8-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; GFX8-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; GFX8-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>)
- ; GFX8-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32)
- ; GFX8-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>)
+ ; GFX8-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
+ ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
+ ; GFX8-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32)
+ ; GFX8-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>)
; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[SUB2]](s16)
; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[SUB5]](s16)
; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32)
; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]]
- ; GFX8-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; GFX8-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[SUB8]](s16)
; GFX8-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
- ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C4]]
+ ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C4]]
; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32)
; GFX8-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]]
- ; GFX8-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+ ; GFX8-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C4]]
- ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C4]]
+ ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C4]]
; GFX8-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32)
; GFX8-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL2]]
- ; GFX8-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
- ; GFX8-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>)
+ ; GFX8-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+ ; GFX8-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>)
; GFX8-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
; GFX9-LABEL: name: ssubsat_v3s16
; GFX9: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2
@@ -557,29 +553,27 @@ body: |
; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+ ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
+ ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
+ ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST]](s32), [[LSHR]](s32)
; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST1]](s32), [[DEF]](s32)
- ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>)
- ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>)
- ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
- ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>)
- ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32)
- ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR1]](s32), [[BITCAST3]](s32)
+ ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR1]](s32), [[BITCAST2]](s32)
; GFX9-NEXT: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR2]](s32), [[DEF]](s32)
; GFX9-NEXT: [[SSUBSAT:%[0-9]+]]:_(<2 x s16>) = G_SSUBSAT [[BUILD_VECTOR_TRUNC]], [[BUILD_VECTOR_TRUNC2]]
; GFX9-NEXT: [[SSUBSAT1:%[0-9]+]]:_(<2 x s16>) = G_SSUBSAT [[BUILD_VECTOR_TRUNC1]], [[BUILD_VECTOR_TRUNC3]]
+ ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[SSUBSAT]](<2 x s16>)
+ ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32)
+ ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[SSUBSAT1]](<2 x s16>)
; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[SSUBSAT]](<2 x s16>)
- ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32)
- ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[SSUBSAT1]](<2 x s16>)
- ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>)
- ; GFX9-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>)
- ; GFX9-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32)
- ; GFX9-NEXT: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>)
- ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST4]](s32), [[LSHR3]](s32)
- ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST5]](s32), [[BITCAST6]](s32)
- ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC6:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR4]](s32), [[BITCAST7]](s32)
+ ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>)
+ ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
+ ; GFX9-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32)
+ ; GFX9-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>)
+ ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST3]](s32), [[LSHR3]](s32)
+ ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST4]](s32), [[BITCAST5]](s32)
+ ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC6:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR4]](s32), [[BITCAST6]](s32)
; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC4]](<2 x s16>), [[BUILD_VECTOR_TRUNC5]](<2 x s16>), [[BUILD_VECTOR_TRUNC6]](<2 x s16>)
; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
%0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store-global.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store-global.mir
index c64f0951965d6..d4f19a54f59c5 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store-global.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store-global.mir
@@ -1,16 +1,9 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -O0 -run-pass=legalizer -global-isel-abort=0 -pass-remarks-missed='gisel.*' %s 2> %t.err -o - | FileCheck -check-prefix=SI %s
-# RUN: FileCheck -check-prefix=ERR %s < %t.err
-# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=bonaire -O0 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=CI %s
-# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=VI %s
-# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -O0 -run-pass=legalizer -global-isel-abort=0 -pass-remarks-missed='gisel.*' 2> %t.err %s -o - | FileCheck -check-prefix=GFX9 %s
-# RUN: FileCheck -check-prefix=ERR %s < %t.err
-# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -O0 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=GFX9 %s
-
-
-# ERR-NOT: remark
-# ERR: remark: <unknown>:0:0: unable to legalize instruction: %{{[0-9]+}}:_(<132 x s16>) = G_CONCAT_VECTORS {{.*}} (in function: test_store_global_v11s16_align4)
-# ERR-NOT: remark
+# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -O0 -run-pass=legalizer %s -o - | FileCheck -check-prefix=SI %s
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=bonaire -O0 -run-pass=legalizer %s -o - | FileCheck -check-prefix=CI %s
+# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer %s -o - | FileCheck -check-prefix=VI %s
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -O0 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -O0 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s
---
name: test_store_global_s1_align1
@@ -2388,9 +2381,7 @@ body: |
; SI-LABEL: name: test_store_global_v3s32_align1
; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; SI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4
- ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<2 x s32>) = G_EXTRACT [[COPY1]](<3 x s32>), 0
- ; SI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](<3 x s32>), 64
- ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[EXTRACT]](<2 x s32>)
+ ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>)
; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32)
; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32)
@@ -2429,7 +2420,7 @@ body: |
; SI-NEXT: G_STORE [[LSHR5]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1)
; SI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64)
- ; SI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[EXTRACT1]](s32)
+ ; SI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[UV2]](s32)
; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[COPY7]], [[C]](s32)
; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64)
; SI-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
@@ -2523,9 +2514,7 @@ body: |
; SI-LABEL: name: test_store_global_v3s32_align2
; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; SI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4
- ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<2 x s32>) = G_EXTRACT [[COPY1]](<3 x s32>), 0
- ; SI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](<3 x s32>), 64
- ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[EXTRACT]](<2 x s32>)
+ ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>)
; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32)
; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32)
@@ -2542,7 +2531,7 @@ body: |
; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1)
; SI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
- ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[EXTRACT1]](s32)
+ ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV2]](s32)
; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32)
; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
; SI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1)
@@ -2594,12 +2583,12 @@ body: |
; SI-LABEL: name: test_store_global_v3s32_align4
; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; SI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4
- ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<2 x s32>) = G_EXTRACT [[COPY1]](<3 x s32>), 0
- ; SI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](<3 x s32>), 64
- ; SI-NEXT: G_STORE [[EXTRACT]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), align 4, addrspace 1)
+ ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>)
+ ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32)
+ ; SI-NEXT: G_STORE [[BUILD_VECTOR]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), align 4, addrspace 1)
; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
- ; SI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 8, addrspace 1)
+ ; SI-NEXT: G_STORE [[UV2]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 8, addrspace 1)
; CI-LABEL: name: test_store_global_v3s32_align4
; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; CI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4
@@ -2626,12 +2615,12 @@ body: |
; SI-LABEL: name: test_store_global_v3s32_align8
; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; SI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4
- ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<2 x s32>) = G_EXTRACT [[COPY1]](<3 x s32>), 0
- ; SI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](<3 x s32>), 64
- ; SI-NEXT: G_STORE [[EXTRACT]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), addrspace 1)
+ ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>)
+ ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32)
+ ; SI-NEXT: G_STORE [[BUILD_VECTOR]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), addrspace 1)
; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
- ; SI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 8, align 8, addrspace 1)
+ ; SI-NEXT: G_STORE [[UV2]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 8, align 8, addrspace 1)
; CI-LABEL: name: test_store_global_v3s32_align8
; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; CI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4
@@ -2658,12 +2647,12 @@ body: |
; SI-LABEL: name: test_store_global_v3s32_align16
; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; SI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4
- ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<2 x s32>) = G_EXTRACT [[COPY1]](<3 x s32>), 0
- ; SI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](<3 x s32>), 64
- ; SI-NEXT: G_STORE [[EXTRACT]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), align 16, addrspace 1)
+ ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>)
+ ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32)
+ ; SI-NEXT: G_STORE [[BUILD_VECTOR]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), align 16, addrspace 1)
; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
- ; SI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 8, align 8, addrspace 1)
+ ; SI-NEXT: G_STORE [[UV2]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 8, align 8, addrspace 1)
; CI-LABEL: name: test_store_global_v3s32_align16
; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; CI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4
@@ -4091,9 +4080,7 @@ body: |
; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; SI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr2_vgpr3_vgpr4
; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x s32>) = G_BITCAST [[COPY1]](s96)
- ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<2 x s32>) = G_EXTRACT [[BITCAST]](<3 x s32>), 0
- ; SI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<3 x s32>), 64
- ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[EXTRACT]](<2 x s32>)
+ ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<3 x s32>)
; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32)
; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32)
@@ -4132,7 +4119,7 @@ body: |
; SI-NEXT: G_STORE [[LSHR5]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1)
; SI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64)
- ; SI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[EXTRACT1]](s32)
+ ; SI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[UV2]](s32)
; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[COPY7]], [[C]](s32)
; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64)
; SI-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
@@ -4230,9 +4217,7 @@ body: |
; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; SI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr2_vgpr3_vgpr4
; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x s32>) = G_BITCAST [[COPY1]](s96)
- ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<2 x s32>) = G_EXTRACT [[BITCAST]](<3 x s32>), 0
- ; SI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<3 x s32>), 64
- ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[EXTRACT]](<2 x s32>)
+ ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<3 x s32>)
; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32)
; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32)
@@ -4249,7 +4234,7 @@ body: |
; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1)
; SI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
- ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[EXTRACT1]](s32)
+ ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV2]](s32)
; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32)
; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64)
; SI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1)
@@ -4305,12 +4290,12 @@ body: |
; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; SI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr2_vgpr3_vgpr4
; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x s32>) = G_BITCAST [[COPY1]](s96)
- ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<2 x s32>) = G_EXTRACT [[BITCAST]](<3 x s32>), 0
- ; SI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<3 x s32>), 64
- ; SI-NEXT: G_STORE [[EXTRACT]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), align 4, addrspace 1)
+ ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<3 x s32>)
+ ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32)
+ ; SI-NEXT: G_STORE [[BUILD_VECTOR]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), align 4, addrspace 1)
; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
- ; SI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 8, addrspace 1)
+ ; SI-NEXT: G_STORE [[UV2]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 8, addrspace 1)
; CI-LABEL: name: test_store_global_s96_align4
; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; CI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr2_vgpr3_vgpr4
@@ -4341,12 +4326,12 @@ body: |
; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; SI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr2_vgpr3_vgpr4
; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x s32>) = G_BITCAST [[COPY1]](s96)
- ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<2 x s32>) = G_EXTRACT [[BITCAST]](<3 x s32>), 0
- ; SI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<3 x s32>), 64
- ; SI-NEXT: G_STORE [[EXTRACT]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), addrspace 1)
+ ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<3 x s32>)
+ ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32)
+ ; SI-NEXT: G_STORE [[BUILD_VECTOR]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), addrspace 1)
; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
- ; SI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 8, align 8, addrspace 1)
+ ; SI-NEXT: G_STORE [[UV2]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 8, align 8, addrspace 1)
; CI-LABEL: name: test_store_global_s96_align8
; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; CI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr2_vgpr3_vgpr4
@@ -4377,12 +4362,12 @@ body: |
; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; SI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr2_vgpr3_vgpr4
; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x s32>) = G_BITCAST [[COPY1]](s96)
- ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<2 x s32>) = G_EXTRACT [[BITCAST]](<3 x s32>), 0
- ; SI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<3 x s32>), 64
- ; SI-NEXT: G_STORE [[EXTRACT]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), align 16, addrspace 1)
+ ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<3 x s32>)
+ ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32)
+ ; SI-NEXT: G_STORE [[BUILD_VECTOR]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), align 16, addrspace 1)
; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
- ; SI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 8, align 8, addrspace 1)
+ ; SI-NEXT: G_STORE [[UV2]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 8, align 8, addrspace 1)
; CI-LABEL: name: test_store_global_s96_align16
; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; CI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr2_vgpr3_vgpr4
@@ -4763,9 +4748,7 @@ body: |
; SI-LABEL: name: test_store_global_v5s32_align1
; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; SI-NEXT: [[COPY1:%[0-9]+]]:_(<5 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
- ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[COPY1]](<5 x s32>), 0
- ; SI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](<5 x s32>), 128
- ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[EXTRACT]](<4 x s32>)
+ ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<5 x s32>)
; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32)
; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32)
@@ -4838,7 +4821,7 @@ body: |
; SI-NEXT: G_STORE [[LSHR11]](s32), [[PTR_ADD14]](p1) :: (store (s8) into unknown-address + 15, addrspace 1)
; SI-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
; SI-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64)
- ; SI-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[EXTRACT1]](s32)
+ ; SI-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[UV4]](s32)
; SI-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[COPY13]], [[C]](s32)
; SI-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s64)
; SI-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
@@ -4856,18 +4839,16 @@ body: |
; CI-LABEL: name: test_store_global_v5s32_align1
; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; CI-NEXT: [[COPY1:%[0-9]+]]:_(<5 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
- ; CI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[COPY1]](<5 x s32>), 0
- ; CI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](<5 x s32>), 128
- ; CI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 1, addrspace 1)
+ ; CI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<5 x s32>)
+ ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32)
+ ; CI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 1, addrspace 1)
; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
- ; CI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 1, addrspace 1)
+ ; CI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 1, addrspace 1)
; VI-LABEL: name: test_store_global_v5s32_align1
; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; VI-NEXT: [[COPY1:%[0-9]+]]:_(<5 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
- ; VI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[COPY1]](<5 x s32>), 0
- ; VI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](<5 x s32>), 128
- ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[EXTRACT]](<4 x s32>)
+ ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<5 x s32>)
; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32)
; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32)
@@ -4940,7 +4921,7 @@ body: |
; VI-NEXT: G_STORE [[ANYEXT7]](s32), [[PTR_ADD14]](p1) :: (store (s8) into unknown-address + 15, addrspace 1)
; VI-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
; VI-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64)
- ; VI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[EXTRACT1]](s32)
+ ; VI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[UV4]](s32)
; VI-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[COPY6]], [[C]](s32)
; VI-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s64)
; VI-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[COPY6]](s32)
@@ -4958,12 +4939,12 @@ body: |
; GFX9-LABEL: name: test_store_global_v5s32_align1
; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<5 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
- ; GFX9-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[COPY1]](<5 x s32>), 0
- ; GFX9-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](<5 x s32>), 128
- ; GFX9-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 1, addrspace 1)
+ ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<5 x s32>)
+ ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32)
+ ; GFX9-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 1, addrspace 1)
; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
- ; GFX9-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 1, addrspace 1)
+ ; GFX9-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 1, addrspace 1)
%0:_(p1) = COPY $vgpr0_vgpr1
%1:_(<5 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
G_STORE %1, %0 :: (store (<5 x s32>), align 1, addrspace 1)
@@ -4978,9 +4959,7 @@ body: |
; SI-LABEL: name: test_store_global_v5s32_align2
; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; SI-NEXT: [[COPY1:%[0-9]+]]:_(<5 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
- ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[COPY1]](<5 x s32>), 0
- ; SI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](<5 x s32>), 128
- ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[EXTRACT]](<4 x s32>)
+ ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<5 x s32>)
; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32)
; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32)
@@ -5011,7 +4990,7 @@ body: |
; SI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD6]](p1) :: (store (s16) into unknown-address + 14, addrspace 1)
; SI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
- ; SI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[EXTRACT1]](s32)
+ ; SI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[UV4]](s32)
; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[COPY6]], [[C]](s32)
; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64)
; SI-NEXT: G_STORE [[COPY6]](s32), [[PTR_ADD7]](p1) :: (store (s16) into unknown-address + 16, addrspace 1)
@@ -5019,18 +4998,16 @@ body: |
; CI-LABEL: name: test_store_global_v5s32_align2
; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; CI-NEXT: [[COPY1:%[0-9]+]]:_(<5 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
- ; CI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[COPY1]](<5 x s32>), 0
- ; CI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](<5 x s32>), 128
- ; CI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 2, addrspace 1)
+ ; CI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<5 x s32>)
+ ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32)
+ ; CI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 2, addrspace 1)
; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
- ; CI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 2, addrspace 1)
+ ; CI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 2, addrspace 1)
; VI-LABEL: name: test_store_global_v5s32_align2
; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; VI-NEXT: [[COPY1:%[0-9]+]]:_(<5 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
- ; VI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[COPY1]](<5 x s32>), 0
- ; VI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](<5 x s32>), 128
- ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[EXTRACT]](<4 x s32>)
+ ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<5 x s32>)
; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32)
; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32)
@@ -5061,7 +5038,7 @@ body: |
; VI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD6]](p1) :: (store (s16) into unknown-address + 14, addrspace 1)
; VI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
- ; VI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[EXTRACT1]](s32)
+ ; VI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[UV4]](s32)
; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[COPY6]], [[C]](s32)
; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64)
; VI-NEXT: G_STORE [[COPY6]](s32), [[PTR_ADD7]](p1) :: (store (s16) into unknown-address + 16, addrspace 1)
@@ -5069,12 +5046,12 @@ body: |
; GFX9-LABEL: name: test_store_global_v5s32_align2
; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<5 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
- ; GFX9-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[COPY1]](<5 x s32>), 0
- ; GFX9-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](<5 x s32>), 128
- ; GFX9-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 2, addrspace 1)
+ ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<5 x s32>)
+ ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32)
+ ; GFX9-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 2, addrspace 1)
; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
- ; GFX9-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 2, addrspace 1)
+ ; GFX9-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 2, addrspace 1)
%0:_(p1) = COPY $vgpr0_vgpr1
%1:_(<5 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
G_STORE %1, %0 :: (store (<5 x s32>), align 2, addrspace 1)
@@ -5089,39 +5066,39 @@ body: |
; SI-LABEL: name: test_store_global_v5s32_align4
; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; SI-NEXT: [[COPY1:%[0-9]+]]:_(<5 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
- ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[COPY1]](<5 x s32>), 0
- ; SI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](<5 x s32>), 128
- ; SI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1)
+ ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<5 x s32>)
+ ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32)
+ ; SI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1)
; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
- ; SI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, addrspace 1)
+ ; SI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, addrspace 1)
; CI-LABEL: name: test_store_global_v5s32_align4
; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; CI-NEXT: [[COPY1:%[0-9]+]]:_(<5 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
- ; CI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[COPY1]](<5 x s32>), 0
- ; CI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](<5 x s32>), 128
- ; CI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1)
+ ; CI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<5 x s32>)
+ ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32)
+ ; CI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1)
; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
- ; CI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, addrspace 1)
+ ; CI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, addrspace 1)
; VI-LABEL: name: test_store_global_v5s32_align4
; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; VI-NEXT: [[COPY1:%[0-9]+]]:_(<5 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
- ; VI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[COPY1]](<5 x s32>), 0
- ; VI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](<5 x s32>), 128
- ; VI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1)
+ ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<5 x s32>)
+ ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32)
+ ; VI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1)
; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
- ; VI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, addrspace 1)
+ ; VI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, addrspace 1)
; GFX9-LABEL: name: test_store_global_v5s32_align4
; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<5 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
- ; GFX9-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[COPY1]](<5 x s32>), 0
- ; GFX9-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](<5 x s32>), 128
- ; GFX9-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1)
+ ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<5 x s32>)
+ ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32)
+ ; GFX9-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1)
; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
- ; GFX9-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, addrspace 1)
+ ; GFX9-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, addrspace 1)
%0:_(p1) = COPY $vgpr0_vgpr1
%1:_(<5 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
G_STORE %1, %0 :: (store (<5 x s32>), align 4, addrspace 1)
@@ -5136,39 +5113,39 @@ body: |
; SI-LABEL: name: test_store_global_v5s32_align8
; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; SI-NEXT: [[COPY1:%[0-9]+]]:_(<5 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
- ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[COPY1]](<5 x s32>), 0
- ; SI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](<5 x s32>), 128
- ; SI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1)
+ ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<5 x s32>)
+ ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32)
+ ; SI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1)
; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
- ; SI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 8, addrspace 1)
+ ; SI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 8, addrspace 1)
; CI-LABEL: name: test_store_global_v5s32_align8
; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; CI-NEXT: [[COPY1:%[0-9]+]]:_(<5 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
- ; CI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[COPY1]](<5 x s32>), 0
- ; CI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](<5 x s32>), 128
- ; CI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1)
+ ; CI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<5 x s32>)
+ ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32)
+ ; CI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1)
; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
- ; CI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 8, addrspace 1)
+ ; CI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 8, addrspace 1)
; VI-LABEL: name: test_store_global_v5s32_align8
; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; VI-NEXT: [[COPY1:%[0-9]+]]:_(<5 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
- ; VI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[COPY1]](<5 x s32>), 0
- ; VI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](<5 x s32>), 128
- ; VI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1)
+ ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<5 x s32>)
+ ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32)
+ ; VI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1)
; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
- ; VI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 8, addrspace 1)
+ ; VI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 8, addrspace 1)
; GFX9-LABEL: name: test_store_global_v5s32_align8
; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<5 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
- ; GFX9-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[COPY1]](<5 x s32>), 0
- ; GFX9-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](<5 x s32>), 128
- ; GFX9-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1)
+ ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<5 x s32>)
+ ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32)
+ ; GFX9-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1)
; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
- ; GFX9-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 8, addrspace 1)
+ ; GFX9-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 8, addrspace 1)
%0:_(p1) = COPY $vgpr0_vgpr1
%1:_(<5 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
G_STORE %1, %0 :: (store (<5 x s32>), align 8, addrspace 1)
@@ -5183,39 +5160,39 @@ body: |
; SI-LABEL: name: test_store_global_v5s32_align16
; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; SI-NEXT: [[COPY1:%[0-9]+]]:_(<5 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
- ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[COPY1]](<5 x s32>), 0
- ; SI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](<5 x s32>), 128
- ; SI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1)
+ ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<5 x s32>)
+ ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32)
+ ; SI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1)
; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
- ; SI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1)
+ ; SI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1)
; CI-LABEL: name: test_store_global_v5s32_align16
; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; CI-NEXT: [[COPY1:%[0-9]+]]:_(<5 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
- ; CI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[COPY1]](<5 x s32>), 0
- ; CI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](<5 x s32>), 128
- ; CI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1)
+ ; CI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<5 x s32>)
+ ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32)
+ ; CI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1)
; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
- ; CI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1)
+ ; CI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1)
; VI-LABEL: name: test_store_global_v5s32_align16
; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; VI-NEXT: [[COPY1:%[0-9]+]]:_(<5 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
- ; VI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[COPY1]](<5 x s32>), 0
- ; VI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](<5 x s32>), 128
- ; VI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1)
+ ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<5 x s32>)
+ ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32)
+ ; VI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1)
; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
- ; VI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1)
+ ; VI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1)
; GFX9-LABEL: name: test_store_global_v5s32_align16
; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<5 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
- ; GFX9-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[COPY1]](<5 x s32>), 0
- ; GFX9-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](<5 x s32>), 128
- ; GFX9-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1)
+ ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<5 x s32>)
+ ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32)
+ ; GFX9-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1)
; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
- ; GFX9-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1)
+ ; GFX9-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1)
%0:_(p1) = COPY $vgpr0_vgpr1
%1:_(<5 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
G_STORE %1, %0 :: (store (<5 x s32>), align 16, addrspace 1)
@@ -5230,9 +5207,7 @@ body: |
; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; SI-NEXT: [[COPY1:%[0-9]+]]:_(<5 x p3>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](<5 x p3>)
- ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0
- ; SI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128
- ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[EXTRACT]](<4 x s32>)
+ ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>)
; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32)
; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32)
@@ -5305,7 +5280,7 @@ body: |
; SI-NEXT: G_STORE [[LSHR11]](s32), [[PTR_ADD14]](p1) :: (store (s8) into unknown-address + 15, addrspace 1)
; SI-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
; SI-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64)
- ; SI-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[EXTRACT1]](s32)
+ ; SI-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[UV4]](s32)
; SI-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[COPY13]], [[C]](s32)
; SI-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s64)
; SI-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
@@ -5324,19 +5299,17 @@ body: |
; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; CI-NEXT: [[COPY1:%[0-9]+]]:_(<5 x p3>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](<5 x p3>)
- ; CI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0
- ; CI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128
- ; CI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 1, addrspace 1)
+ ; CI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>)
+ ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32)
+ ; CI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 1, addrspace 1)
; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
- ; CI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 1, addrspace 1)
+ ; CI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 1, addrspace 1)
; VI-LABEL: name: test_store_global_v5p3_align1
; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; VI-NEXT: [[COPY1:%[0-9]+]]:_(<5 x p3>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](<5 x p3>)
- ; VI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0
- ; VI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128
- ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[EXTRACT]](<4 x s32>)
+ ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>)
; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32)
; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32)
@@ -5409,7 +5382,7 @@ body: |
; VI-NEXT: G_STORE [[ANYEXT7]](s32), [[PTR_ADD14]](p1) :: (store (s8) into unknown-address + 15, addrspace 1)
; VI-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
; VI-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64)
- ; VI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[EXTRACT1]](s32)
+ ; VI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[UV4]](s32)
; VI-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[COPY6]], [[C]](s32)
; VI-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s64)
; VI-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[COPY6]](s32)
@@ -5428,12 +5401,12 @@ body: |
; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<5 x p3>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](<5 x p3>)
- ; GFX9-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0
- ; GFX9-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128
- ; GFX9-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 1, addrspace 1)
+ ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>)
+ ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32)
+ ; GFX9-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 1, addrspace 1)
; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
- ; GFX9-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 1, addrspace 1)
+ ; GFX9-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 1, addrspace 1)
%0:_(p1) = COPY $vgpr0_vgpr1
%1:_(<5 x p3>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
G_STORE %1, %0 :: (store (<5 x p3>), align 1, addrspace 1)
@@ -5449,9 +5422,7 @@ body: |
; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; SI-NEXT: [[COPY1:%[0-9]+]]:_(<5 x p3>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](<5 x p3>)
- ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0
- ; SI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128
- ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[EXTRACT]](<4 x s32>)
+ ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>)
; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32)
; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32)
@@ -5482,7 +5453,7 @@ body: |
; SI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD6]](p1) :: (store (s16) into unknown-address + 14, addrspace 1)
; SI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
- ; SI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[EXTRACT1]](s32)
+ ; SI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[UV4]](s32)
; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[COPY6]], [[C]](s32)
; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64)
; SI-NEXT: G_STORE [[COPY6]](s32), [[PTR_ADD7]](p1) :: (store (s16) into unknown-address + 16, addrspace 1)
@@ -5491,19 +5462,17 @@ body: |
; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; CI-NEXT: [[COPY1:%[0-9]+]]:_(<5 x p3>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](<5 x p3>)
- ; CI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0
- ; CI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128
- ; CI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 2, addrspace 1)
+ ; CI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>)
+ ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32)
+ ; CI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 2, addrspace 1)
; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
- ; CI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 2, addrspace 1)
+ ; CI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 2, addrspace 1)
; VI-LABEL: name: test_store_global_v5p3_align2
; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; VI-NEXT: [[COPY1:%[0-9]+]]:_(<5 x p3>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](<5 x p3>)
- ; VI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0
- ; VI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128
- ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[EXTRACT]](<4 x s32>)
+ ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>)
; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32)
; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32)
@@ -5534,7 +5503,7 @@ body: |
; VI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD6]](p1) :: (store (s16) into unknown-address + 14, addrspace 1)
; VI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
- ; VI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[EXTRACT1]](s32)
+ ; VI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[UV4]](s32)
; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[COPY6]], [[C]](s32)
; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64)
; VI-NEXT: G_STORE [[COPY6]](s32), [[PTR_ADD7]](p1) :: (store (s16) into unknown-address + 16, addrspace 1)
@@ -5543,12 +5512,12 @@ body: |
; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<5 x p3>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](<5 x p3>)
- ; GFX9-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0
- ; GFX9-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128
- ; GFX9-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 2, addrspace 1)
+ ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>)
+ ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32)
+ ; GFX9-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 2, addrspace 1)
; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
- ; GFX9-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 2, addrspace 1)
+ ; GFX9-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 2, addrspace 1)
%0:_(p1) = COPY $vgpr0_vgpr1
%1:_(<5 x p3>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
G_STORE %1, %0 :: (store (<5 x p3>), align 2, addrspace 1)
@@ -5564,42 +5533,42 @@ body: |
; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; SI-NEXT: [[COPY1:%[0-9]+]]:_(<5 x p3>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](<5 x p3>)
- ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0
- ; SI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128
- ; SI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1)
+ ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>)
+ ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32)
+ ; SI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1)
; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
- ; SI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, addrspace 1)
+ ; SI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, addrspace 1)
; CI-LABEL: name: test_store_global_v5p3_align4
; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; CI-NEXT: [[COPY1:%[0-9]+]]:_(<5 x p3>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](<5 x p3>)
- ; CI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0
- ; CI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128
- ; CI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1)
+ ; CI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>)
+ ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32)
+ ; CI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1)
; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
- ; CI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, addrspace 1)
+ ; CI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, addrspace 1)
; VI-LABEL: name: test_store_global_v5p3_align4
; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; VI-NEXT: [[COPY1:%[0-9]+]]:_(<5 x p3>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](<5 x p3>)
- ; VI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0
- ; VI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128
- ; VI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1)
+ ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>)
+ ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32)
+ ; VI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1)
; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
- ; VI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, addrspace 1)
+ ; VI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, addrspace 1)
; GFX9-LABEL: name: test_store_global_v5p3_align4
; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<5 x p3>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](<5 x p3>)
- ; GFX9-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0
- ; GFX9-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128
- ; GFX9-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1)
+ ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>)
+ ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32)
+ ; GFX9-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1)
; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
- ; GFX9-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, addrspace 1)
+ ; GFX9-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, addrspace 1)
%0:_(p1) = COPY $vgpr0_vgpr1
%1:_(<5 x p3>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
G_STORE %1, %0 :: (store (<5 x p3>), align 4, addrspace 1)
@@ -5615,42 +5584,42 @@ body: |
; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; SI-NEXT: [[COPY1:%[0-9]+]]:_(<5 x p3>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](<5 x p3>)
- ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0
- ; SI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128
- ; SI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1)
+ ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>)
+ ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32)
+ ; SI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1)
; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
- ; SI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 8, addrspace 1)
+ ; SI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 8, addrspace 1)
; CI-LABEL: name: test_store_global_v5p3_align8
; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; CI-NEXT: [[COPY1:%[0-9]+]]:_(<5 x p3>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](<5 x p3>)
- ; CI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0
- ; CI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128
- ; CI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1)
+ ; CI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>)
+ ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32)
+ ; CI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1)
; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
- ; CI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 8, addrspace 1)
+ ; CI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 8, addrspace 1)
; VI-LABEL: name: test_store_global_v5p3_align8
; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; VI-NEXT: [[COPY1:%[0-9]+]]:_(<5 x p3>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](<5 x p3>)
- ; VI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0
- ; VI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128
- ; VI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1)
+ ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>)
+ ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32)
+ ; VI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1)
; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
- ; VI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 8, addrspace 1)
+ ; VI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 8, addrspace 1)
; GFX9-LABEL: name: test_store_global_v5p3_align8
; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<5 x p3>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](<5 x p3>)
- ; GFX9-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0
- ; GFX9-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128
- ; GFX9-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1)
+ ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>)
+ ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32)
+ ; GFX9-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1)
; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
- ; GFX9-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 8, addrspace 1)
+ ; GFX9-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 8, addrspace 1)
%0:_(p1) = COPY $vgpr0_vgpr1
%1:_(<5 x p3>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
G_STORE %1, %0 :: (store (<5 x p3>), align 8, addrspace 1)
@@ -5666,42 +5635,42 @@ body: |
; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; SI-NEXT: [[COPY1:%[0-9]+]]:_(<5 x p3>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](<5 x p3>)
- ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0
- ; SI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128
- ; SI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1)
+ ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>)
+ ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32)
+ ; SI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1)
; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
- ; SI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1)
+ ; SI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1)
; CI-LABEL: name: test_store_global_v5p3_align16
; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; CI-NEXT: [[COPY1:%[0-9]+]]:_(<5 x p3>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](<5 x p3>)
- ; CI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0
- ; CI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128
- ; CI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1)
+ ; CI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>)
+ ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32)
+ ; CI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1)
; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
- ; CI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1)
+ ; CI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1)
; VI-LABEL: name: test_store_global_v5p3_align16
; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; VI-NEXT: [[COPY1:%[0-9]+]]:_(<5 x p3>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](<5 x p3>)
- ; VI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0
- ; VI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128
- ; VI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1)
+ ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>)
+ ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32)
+ ; VI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1)
; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
- ; VI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1)
+ ; VI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1)
; GFX9-LABEL: name: test_store_global_v5p3_align16
; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<5 x p3>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](<5 x p3>)
- ; GFX9-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0
- ; GFX9-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128
- ; GFX9-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1)
+ ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>)
+ ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32)
+ ; GFX9-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1)
; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
- ; GFX9-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1)
+ ; GFX9-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1)
%0:_(p1) = COPY $vgpr0_vgpr1
%1:_(<5 x p3>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
G_STORE %1, %0 :: (store (<5 x p3>), align 16, addrspace 1)
@@ -5717,42 +5686,42 @@ body: |
; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; SI-NEXT: [[DEF:%[0-9]+]]:_(<10 x s16>) = G_IMPLICIT_DEF
; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[DEF]](<10 x s16>)
- ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0
- ; SI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128
- ; SI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1)
+ ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>)
+ ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32)
+ ; SI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1)
; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
- ; SI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1)
+ ; SI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1)
; CI-LABEL: name: test_store_global_v10s16_align4
; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; CI-NEXT: [[DEF:%[0-9]+]]:_(<10 x s16>) = G_IMPLICIT_DEF
; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[DEF]](<10 x s16>)
- ; CI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0
- ; CI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128
- ; CI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1)
+ ; CI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>)
+ ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32)
+ ; CI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1)
; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
- ; CI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1)
+ ; CI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1)
; VI-LABEL: name: test_store_global_v10s16_align4
; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; VI-NEXT: [[DEF:%[0-9]+]]:_(<10 x s16>) = G_IMPLICIT_DEF
; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[DEF]](<10 x s16>)
- ; VI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0
- ; VI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128
- ; VI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1)
+ ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>)
+ ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32)
+ ; VI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1)
; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
- ; VI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1)
+ ; VI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1)
; GFX9-LABEL: name: test_store_global_v10s16_align4
; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<10 x s16>) = G_IMPLICIT_DEF
; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[DEF]](<10 x s16>)
- ; GFX9-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0
- ; GFX9-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128
- ; GFX9-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1)
+ ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>)
+ ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32)
+ ; GFX9-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1)
; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
- ; GFX9-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1)
+ ; GFX9-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1)
%0:_(p1) = COPY $vgpr0_vgpr1
%1:_(<10 x s16>) = G_IMPLICIT_DEF
G_STORE %1, %0 :: (store (<10 x s16>), align 16, addrspace 1)
@@ -5767,159 +5736,182 @@ body: |
; SI-LABEL: name: test_store_global_v11s16_align4
; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; SI-NEXT: [[DEF:%[0-9]+]]:_(<12 x s16>) = G_IMPLICIT_DEF
- ; SI-NEXT: [[DEF1:%[0-9]+]]:_(<12 x s16>) = G_IMPLICIT_DEF
; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<12 x s16>)
- ; SI-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>)
- ; SI-NEXT: [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>), [[UV14:%[0-9]+]]:_(<2 x s16>), [[UV15:%[0-9]+]]:_(<2 x s16>), [[UV16:%[0-9]+]]:_(<2 x s16>), [[UV17:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>)
- ; SI-NEXT: [[UV18:%[0-9]+]]:_(<2 x s16>), [[UV19:%[0-9]+]]:_(<2 x s16>), [[UV20:%[0-9]+]]:_(<2 x s16>), [[UV21:%[0-9]+]]:_(<2 x s16>), [[UV22:%[0-9]+]]:_(<2 x s16>), [[UV23:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>)
- ; SI-NEXT: [[UV24:%[0-9]+]]:_(<2 x s16>), [[UV25:%[0-9]+]]:_(<2 x s16>), [[UV26:%[0-9]+]]:_(<2 x s16>), [[UV27:%[0-9]+]]:_(<2 x s16>), [[UV28:%[0-9]+]]:_(<2 x s16>), [[UV29:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>)
- ; SI-NEXT: [[UV30:%[0-9]+]]:_(<2 x s16>), [[UV31:%[0-9]+]]:_(<2 x s16>), [[UV32:%[0-9]+]]:_(<2 x s16>), [[UV33:%[0-9]+]]:_(<2 x s16>), [[UV34:%[0-9]+]]:_(<2 x s16>), [[UV35:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>)
- ; SI-NEXT: [[UV36:%[0-9]+]]:_(<2 x s16>), [[UV37:%[0-9]+]]:_(<2 x s16>), [[UV38:%[0-9]+]]:_(<2 x s16>), [[UV39:%[0-9]+]]:_(<2 x s16>), [[UV40:%[0-9]+]]:_(<2 x s16>), [[UV41:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>)
- ; SI-NEXT: [[UV42:%[0-9]+]]:_(<2 x s16>), [[UV43:%[0-9]+]]:_(<2 x s16>), [[UV44:%[0-9]+]]:_(<2 x s16>), [[UV45:%[0-9]+]]:_(<2 x s16>), [[UV46:%[0-9]+]]:_(<2 x s16>), [[UV47:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>)
- ; SI-NEXT: [[UV48:%[0-9]+]]:_(<2 x s16>), [[UV49:%[0-9]+]]:_(<2 x s16>), [[UV50:%[0-9]+]]:_(<2 x s16>), [[UV51:%[0-9]+]]:_(<2 x s16>), [[UV52:%[0-9]+]]:_(<2 x s16>), [[UV53:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>)
- ; SI-NEXT: [[UV54:%[0-9]+]]:_(<2 x s16>), [[UV55:%[0-9]+]]:_(<2 x s16>), [[UV56:%[0-9]+]]:_(<2 x s16>), [[UV57:%[0-9]+]]:_(<2 x s16>), [[UV58:%[0-9]+]]:_(<2 x s16>), [[UV59:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>)
- ; SI-NEXT: [[UV60:%[0-9]+]]:_(<2 x s16>), [[UV61:%[0-9]+]]:_(<2 x s16>), [[UV62:%[0-9]+]]:_(<2 x s16>), [[UV63:%[0-9]+]]:_(<2 x s16>), [[UV64:%[0-9]+]]:_(<2 x s16>), [[UV65:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>)
- ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<132 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[UV1]](<2 x s16>), [[UV2]](<2 x s16>), [[UV3]](<2 x s16>), [[UV4]](<2 x s16>), [[UV5]](<2 x s16>), [[UV6]](<2 x s16>), [[UV7]](<2 x s16>), [[UV8]](<2 x s16>), [[UV9]](<2 x s16>), [[UV10]](<2 x s16>), [[UV11]](<2 x s16>), [[UV12]](<2 x s16>), [[UV13]](<2 x s16>), [[UV14]](<2 x s16>), [[UV15]](<2 x s16>), [[UV16]](<2 x s16>), [[UV17]](<2 x s16>), [[UV18]](<2 x s16>), [[UV19]](<2 x s16>), [[UV20]](<2 x s16>), [[UV21]](<2 x s16>), [[UV22]](<2 x s16>), [[UV23]](<2 x s16>), [[UV24]](<2 x s16>), [[UV25]](<2 x s16>), [[UV26]](<2 x s16>), [[UV27]](<2 x s16>), [[UV28]](<2 x s16>), [[UV29]](<2 x s16>), [[UV30]](<2 x s16>), [[UV31]](<2 x s16>), [[UV32]](<2 x s16>), [[UV33]](<2 x s16>), [[UV34]](<2 x s16>), [[UV35]](<2 x s16>), [[UV36]](<2 x s16>), [[UV37]](<2 x s16>), [[UV38]](<2 x s16>), [[UV39]](<2 x s16>), [[UV40]](<2 x s16>), [[UV41]](<2 x s16>), [[UV42]](<2 x s16>), [[UV43]](<2 x s16>), [[UV44]](<2 x s16>), [[UV45]](<2 x s16>), [[UV46]](<2 x s16>), [[UV47]](<2 x s16>), [[UV48]](<2 x s16>), [[UV49]](<2 x s16>), [[UV50]](<2 x s16>), [[UV51]](<2 x s16>), [[UV52]](<2 x s16>), [[UV53]](<2 x s16>), [[UV54]](<2 x s16>), [[UV55]](<2 x s16>), [[UV56]](<2 x s16>), [[UV57]](<2 x s16>), [[UV58]](<2 x s16>), [[UV59]](<2 x s16>), [[UV60]](<2 x s16>), [[UV61]](<2 x s16>), [[UV62]](<2 x s16>), [[UV63]](<2 x s16>), [[UV64]](<2 x s16>), [[UV65]](<2 x s16>)
- ; SI-NEXT: [[UV66:%[0-9]+]]:_(<11 x s16>), [[UV67:%[0-9]+]]:_(<11 x s16>), [[UV68:%[0-9]+]]:_(<11 x s16>), [[UV69:%[0-9]+]]:_(<11 x s16>), [[UV70:%[0-9]+]]:_(<11 x s16>), [[UV71:%[0-9]+]]:_(<11 x s16>), [[UV72:%[0-9]+]]:_(<11 x s16>), [[UV73:%[0-9]+]]:_(<11 x s16>), [[UV74:%[0-9]+]]:_(<11 x s16>), [[UV75:%[0-9]+]]:_(<11 x s16>), [[UV76:%[0-9]+]]:_(<11 x s16>), [[UV77:%[0-9]+]]:_(<11 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<132 x s16>)
- ; SI-NEXT: [[INSERT:%[0-9]+]]:_(<12 x s16>) = G_INSERT [[DEF1]], [[UV66]](<11 x s16>), 0
- ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<8 x s16>) = G_EXTRACT [[INSERT]](<12 x s16>), 0
- ; SI-NEXT: [[INSERT1:%[0-9]+]]:_(<12 x s16>) = G_INSERT [[DEF1]], [[UV66]](<11 x s16>), 0
- ; SI-NEXT: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT1]](<12 x s16>), 128
- ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[EXTRACT]](<8 x s16>)
- ; SI-NEXT: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1)
- ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
- ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
- ; SI-NEXT: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; SI-NEXT: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[EXTRACT1]](<3 x s16>), 0
- ; SI-NEXT: [[UV78:%[0-9]+]]:_(<2 x s16>), [[UV79:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT2]](<4 x s16>)
- ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV78]](<2 x s16>)
- ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
- ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV79]](<2 x s16>)
- ; SI-NEXT: G_STORE [[BITCAST1]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 16, align 16, addrspace 1)
- ; SI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
- ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C2]](s64)
- ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 18, addrspace 1)
- ; SI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
- ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C3]](s64)
- ; SI-NEXT: G_STORE [[BITCAST2]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 20, align 4, addrspace 1)
+ ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+ ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
+ ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+ ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
+ ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
+ ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
+ ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
+ ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32)
+ ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>)
+ ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32)
+ ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>)
+ ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+ ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]]
+ ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]]
+ ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+ ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+ ; SI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]]
+ ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]]
+ ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32)
+ ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+ ; SI-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+ ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]]
+ ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]]
+ ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
+ ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+ ; SI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+ ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]]
+ ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C1]]
+ ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C]](s32)
+ ; SI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
+ ; SI-NEXT: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
+ ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>), [[BITCAST9]](<2 x s16>)
+ ; SI-NEXT: [[BITCAST10:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[CONCAT_VECTORS]](<8 x s16>)
+ ; SI-NEXT: G_STORE [[BITCAST10]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1)
+ ; SI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+ ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+ ; SI-NEXT: G_STORE [[BITCAST4]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 16, align 16, addrspace 1)
+ ; SI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+ ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C3]](s64)
+ ; SI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 18, addrspace 1)
+ ; SI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+ ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C4]](s64)
+ ; SI-NEXT: G_STORE [[BITCAST5]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 20, align 4, addrspace 1)
; CI-LABEL: name: test_store_global_v11s16_align4
; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; CI-NEXT: [[DEF:%[0-9]+]]:_(<12 x s16>) = G_IMPLICIT_DEF
- ; CI-NEXT: [[DEF1:%[0-9]+]]:_(<12 x s16>) = G_IMPLICIT_DEF
; CI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<12 x s16>)
- ; CI-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>)
- ; CI-NEXT: [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>), [[UV14:%[0-9]+]]:_(<2 x s16>), [[UV15:%[0-9]+]]:_(<2 x s16>), [[UV16:%[0-9]+]]:_(<2 x s16>), [[UV17:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>)
- ; CI-NEXT: [[UV18:%[0-9]+]]:_(<2 x s16>), [[UV19:%[0-9]+]]:_(<2 x s16>), [[UV20:%[0-9]+]]:_(<2 x s16>), [[UV21:%[0-9]+]]:_(<2 x s16>), [[UV22:%[0-9]+]]:_(<2 x s16>), [[UV23:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>)
- ; CI-NEXT: [[UV24:%[0-9]+]]:_(<2 x s16>), [[UV25:%[0-9]+]]:_(<2 x s16>), [[UV26:%[0-9]+]]:_(<2 x s16>), [[UV27:%[0-9]+]]:_(<2 x s16>), [[UV28:%[0-9]+]]:_(<2 x s16>), [[UV29:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>)
- ; CI-NEXT: [[UV30:%[0-9]+]]:_(<2 x s16>), [[UV31:%[0-9]+]]:_(<2 x s16>), [[UV32:%[0-9]+]]:_(<2 x s16>), [[UV33:%[0-9]+]]:_(<2 x s16>), [[UV34:%[0-9]+]]:_(<2 x s16>), [[UV35:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>)
- ; CI-NEXT: [[UV36:%[0-9]+]]:_(<2 x s16>), [[UV37:%[0-9]+]]:_(<2 x s16>), [[UV38:%[0-9]+]]:_(<2 x s16>), [[UV39:%[0-9]+]]:_(<2 x s16>), [[UV40:%[0-9]+]]:_(<2 x s16>), [[UV41:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>)
- ; CI-NEXT: [[UV42:%[0-9]+]]:_(<2 x s16>), [[UV43:%[0-9]+]]:_(<2 x s16>), [[UV44:%[0-9]+]]:_(<2 x s16>), [[UV45:%[0-9]+]]:_(<2 x s16>), [[UV46:%[0-9]+]]:_(<2 x s16>), [[UV47:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>)
- ; CI-NEXT: [[UV48:%[0-9]+]]:_(<2 x s16>), [[UV49:%[0-9]+]]:_(<2 x s16>), [[UV50:%[0-9]+]]:_(<2 x s16>), [[UV51:%[0-9]+]]:_(<2 x s16>), [[UV52:%[0-9]+]]:_(<2 x s16>), [[UV53:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>)
- ; CI-NEXT: [[UV54:%[0-9]+]]:_(<2 x s16>), [[UV55:%[0-9]+]]:_(<2 x s16>), [[UV56:%[0-9]+]]:_(<2 x s16>), [[UV57:%[0-9]+]]:_(<2 x s16>), [[UV58:%[0-9]+]]:_(<2 x s16>), [[UV59:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>)
- ; CI-NEXT: [[UV60:%[0-9]+]]:_(<2 x s16>), [[UV61:%[0-9]+]]:_(<2 x s16>), [[UV62:%[0-9]+]]:_(<2 x s16>), [[UV63:%[0-9]+]]:_(<2 x s16>), [[UV64:%[0-9]+]]:_(<2 x s16>), [[UV65:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>)
- ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<132 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[UV1]](<2 x s16>), [[UV2]](<2 x s16>), [[UV3]](<2 x s16>), [[UV4]](<2 x s16>), [[UV5]](<2 x s16>), [[UV6]](<2 x s16>), [[UV7]](<2 x s16>), [[UV8]](<2 x s16>), [[UV9]](<2 x s16>), [[UV10]](<2 x s16>), [[UV11]](<2 x s16>), [[UV12]](<2 x s16>), [[UV13]](<2 x s16>), [[UV14]](<2 x s16>), [[UV15]](<2 x s16>), [[UV16]](<2 x s16>), [[UV17]](<2 x s16>), [[UV18]](<2 x s16>), [[UV19]](<2 x s16>), [[UV20]](<2 x s16>), [[UV21]](<2 x s16>), [[UV22]](<2 x s16>), [[UV23]](<2 x s16>), [[UV24]](<2 x s16>), [[UV25]](<2 x s16>), [[UV26]](<2 x s16>), [[UV27]](<2 x s16>), [[UV28]](<2 x s16>), [[UV29]](<2 x s16>), [[UV30]](<2 x s16>), [[UV31]](<2 x s16>), [[UV32]](<2 x s16>), [[UV33]](<2 x s16>), [[UV34]](<2 x s16>), [[UV35]](<2 x s16>), [[UV36]](<2 x s16>), [[UV37]](<2 x s16>), [[UV38]](<2 x s16>), [[UV39]](<2 x s16>), [[UV40]](<2 x s16>), [[UV41]](<2 x s16>), [[UV42]](<2 x s16>), [[UV43]](<2 x s16>), [[UV44]](<2 x s16>), [[UV45]](<2 x s16>), [[UV46]](<2 x s16>), [[UV47]](<2 x s16>), [[UV48]](<2 x s16>), [[UV49]](<2 x s16>), [[UV50]](<2 x s16>), [[UV51]](<2 x s16>), [[UV52]](<2 x s16>), [[UV53]](<2 x s16>), [[UV54]](<2 x s16>), [[UV55]](<2 x s16>), [[UV56]](<2 x s16>), [[UV57]](<2 x s16>), [[UV58]](<2 x s16>), [[UV59]](<2 x s16>), [[UV60]](<2 x s16>), [[UV61]](<2 x s16>), [[UV62]](<2 x s16>), [[UV63]](<2 x s16>), [[UV64]](<2 x s16>), [[UV65]](<2 x s16>)
- ; CI-NEXT: [[UV66:%[0-9]+]]:_(<11 x s16>), [[UV67:%[0-9]+]]:_(<11 x s16>), [[UV68:%[0-9]+]]:_(<11 x s16>), [[UV69:%[0-9]+]]:_(<11 x s16>), [[UV70:%[0-9]+]]:_(<11 x s16>), [[UV71:%[0-9]+]]:_(<11 x s16>), [[UV72:%[0-9]+]]:_(<11 x s16>), [[UV73:%[0-9]+]]:_(<11 x s16>), [[UV74:%[0-9]+]]:_(<11 x s16>), [[UV75:%[0-9]+]]:_(<11 x s16>), [[UV76:%[0-9]+]]:_(<11 x s16>), [[UV77:%[0-9]+]]:_(<11 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<132 x s16>)
- ; CI-NEXT: [[INSERT:%[0-9]+]]:_(<12 x s16>) = G_INSERT [[DEF1]], [[UV66]](<11 x s16>), 0
- ; CI-NEXT: [[EXTRACT:%[0-9]+]]:_(<8 x s16>) = G_EXTRACT [[INSERT]](<12 x s16>), 0
- ; CI-NEXT: [[INSERT1:%[0-9]+]]:_(<12 x s16>) = G_INSERT [[DEF1]], [[UV66]](<11 x s16>), 0
- ; CI-NEXT: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT1]](<12 x s16>), 128
- ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[EXTRACT]](<8 x s16>)
- ; CI-NEXT: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1)
- ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
- ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
- ; CI-NEXT: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; CI-NEXT: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[EXTRACT1]](<3 x s16>), 0
- ; CI-NEXT: [[UV78:%[0-9]+]]:_(<2 x s16>), [[UV79:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT2]](<4 x s16>)
- ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV78]](<2 x s16>)
- ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; CI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
- ; CI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV79]](<2 x s16>)
- ; CI-NEXT: G_STORE [[BITCAST1]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 16, align 16, addrspace 1)
- ; CI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
- ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C2]](s64)
- ; CI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 18, addrspace 1)
- ; CI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
- ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C3]](s64)
- ; CI-NEXT: G_STORE [[BITCAST2]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 20, align 4, addrspace 1)
+ ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+ ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; CI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
+ ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+ ; CI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
+ ; CI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
+ ; CI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
+ ; CI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
+ ; CI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32)
+ ; CI-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>)
+ ; CI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32)
+ ; CI-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>)
+ ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+ ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]]
+ ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]]
+ ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+ ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+ ; CI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; CI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]]
+ ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]]
+ ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32)
+ ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+ ; CI-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+ ; CI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]]
+ ; CI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]]
+ ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
+ ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+ ; CI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+ ; CI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]]
+ ; CI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C1]]
+ ; CI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C]](s32)
+ ; CI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
+ ; CI-NEXT: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
+ ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>), [[BITCAST9]](<2 x s16>)
+ ; CI-NEXT: [[BITCAST10:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[CONCAT_VECTORS]](<8 x s16>)
+ ; CI-NEXT: G_STORE [[BITCAST10]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1)
+ ; CI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+ ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+ ; CI-NEXT: G_STORE [[BITCAST4]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 16, align 16, addrspace 1)
+ ; CI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+ ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C3]](s64)
+ ; CI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 18, addrspace 1)
+ ; CI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+ ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C4]](s64)
+ ; CI-NEXT: G_STORE [[BITCAST5]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 20, align 4, addrspace 1)
; VI-LABEL: name: test_store_global_v11s16_align4
; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; VI-NEXT: [[DEF:%[0-9]+]]:_(<12 x s16>) = G_IMPLICIT_DEF
- ; VI-NEXT: [[DEF1:%[0-9]+]]:_(<12 x s16>) = G_IMPLICIT_DEF
; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<12 x s16>)
- ; VI-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>)
- ; VI-NEXT: [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>), [[UV14:%[0-9]+]]:_(<2 x s16>), [[UV15:%[0-9]+]]:_(<2 x s16>), [[UV16:%[0-9]+]]:_(<2 x s16>), [[UV17:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>)
- ; VI-NEXT: [[UV18:%[0-9]+]]:_(<2 x s16>), [[UV19:%[0-9]+]]:_(<2 x s16>), [[UV20:%[0-9]+]]:_(<2 x s16>), [[UV21:%[0-9]+]]:_(<2 x s16>), [[UV22:%[0-9]+]]:_(<2 x s16>), [[UV23:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>)
- ; VI-NEXT: [[UV24:%[0-9]+]]:_(<2 x s16>), [[UV25:%[0-9]+]]:_(<2 x s16>), [[UV26:%[0-9]+]]:_(<2 x s16>), [[UV27:%[0-9]+]]:_(<2 x s16>), [[UV28:%[0-9]+]]:_(<2 x s16>), [[UV29:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>)
- ; VI-NEXT: [[UV30:%[0-9]+]]:_(<2 x s16>), [[UV31:%[0-9]+]]:_(<2 x s16>), [[UV32:%[0-9]+]]:_(<2 x s16>), [[UV33:%[0-9]+]]:_(<2 x s16>), [[UV34:%[0-9]+]]:_(<2 x s16>), [[UV35:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>)
- ; VI-NEXT: [[UV36:%[0-9]+]]:_(<2 x s16>), [[UV37:%[0-9]+]]:_(<2 x s16>), [[UV38:%[0-9]+]]:_(<2 x s16>), [[UV39:%[0-9]+]]:_(<2 x s16>), [[UV40:%[0-9]+]]:_(<2 x s16>), [[UV41:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>)
- ; VI-NEXT: [[UV42:%[0-9]+]]:_(<2 x s16>), [[UV43:%[0-9]+]]:_(<2 x s16>), [[UV44:%[0-9]+]]:_(<2 x s16>), [[UV45:%[0-9]+]]:_(<2 x s16>), [[UV46:%[0-9]+]]:_(<2 x s16>), [[UV47:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>)
- ; VI-NEXT: [[UV48:%[0-9]+]]:_(<2 x s16>), [[UV49:%[0-9]+]]:_(<2 x s16>), [[UV50:%[0-9]+]]:_(<2 x s16>), [[UV51:%[0-9]+]]:_(<2 x s16>), [[UV52:%[0-9]+]]:_(<2 x s16>), [[UV53:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>)
- ; VI-NEXT: [[UV54:%[0-9]+]]:_(<2 x s16>), [[UV55:%[0-9]+]]:_(<2 x s16>), [[UV56:%[0-9]+]]:_(<2 x s16>), [[UV57:%[0-9]+]]:_(<2 x s16>), [[UV58:%[0-9]+]]:_(<2 x s16>), [[UV59:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>)
- ; VI-NEXT: [[UV60:%[0-9]+]]:_(<2 x s16>), [[UV61:%[0-9]+]]:_(<2 x s16>), [[UV62:%[0-9]+]]:_(<2 x s16>), [[UV63:%[0-9]+]]:_(<2 x s16>), [[UV64:%[0-9]+]]:_(<2 x s16>), [[UV65:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>)
- ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<132 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[UV1]](<2 x s16>), [[UV2]](<2 x s16>), [[UV3]](<2 x s16>), [[UV4]](<2 x s16>), [[UV5]](<2 x s16>), [[UV6]](<2 x s16>), [[UV7]](<2 x s16>), [[UV8]](<2 x s16>), [[UV9]](<2 x s16>), [[UV10]](<2 x s16>), [[UV11]](<2 x s16>), [[UV12]](<2 x s16>), [[UV13]](<2 x s16>), [[UV14]](<2 x s16>), [[UV15]](<2 x s16>), [[UV16]](<2 x s16>), [[UV17]](<2 x s16>), [[UV18]](<2 x s16>), [[UV19]](<2 x s16>), [[UV20]](<2 x s16>), [[UV21]](<2 x s16>), [[UV22]](<2 x s16>), [[UV23]](<2 x s16>), [[UV24]](<2 x s16>), [[UV25]](<2 x s16>), [[UV26]](<2 x s16>), [[UV27]](<2 x s16>), [[UV28]](<2 x s16>), [[UV29]](<2 x s16>), [[UV30]](<2 x s16>), [[UV31]](<2 x s16>), [[UV32]](<2 x s16>), [[UV33]](<2 x s16>), [[UV34]](<2 x s16>), [[UV35]](<2 x s16>), [[UV36]](<2 x s16>), [[UV37]](<2 x s16>), [[UV38]](<2 x s16>), [[UV39]](<2 x s16>), [[UV40]](<2 x s16>), [[UV41]](<2 x s16>), [[UV42]](<2 x s16>), [[UV43]](<2 x s16>), [[UV44]](<2 x s16>), [[UV45]](<2 x s16>), [[UV46]](<2 x s16>), [[UV47]](<2 x s16>), [[UV48]](<2 x s16>), [[UV49]](<2 x s16>), [[UV50]](<2 x s16>), [[UV51]](<2 x s16>), [[UV52]](<2 x s16>), [[UV53]](<2 x s16>), [[UV54]](<2 x s16>), [[UV55]](<2 x s16>), [[UV56]](<2 x s16>), [[UV57]](<2 x s16>), [[UV58]](<2 x s16>), [[UV59]](<2 x s16>), [[UV60]](<2 x s16>), [[UV61]](<2 x s16>), [[UV62]](<2 x s16>), [[UV63]](<2 x s16>), [[UV64]](<2 x s16>), [[UV65]](<2 x s16>)
- ; VI-NEXT: [[UV66:%[0-9]+]]:_(<11 x s16>), [[UV67:%[0-9]+]]:_(<11 x s16>), [[UV68:%[0-9]+]]:_(<11 x s16>), [[UV69:%[0-9]+]]:_(<11 x s16>), [[UV70:%[0-9]+]]:_(<11 x s16>), [[UV71:%[0-9]+]]:_(<11 x s16>), [[UV72:%[0-9]+]]:_(<11 x s16>), [[UV73:%[0-9]+]]:_(<11 x s16>), [[UV74:%[0-9]+]]:_(<11 x s16>), [[UV75:%[0-9]+]]:_(<11 x s16>), [[UV76:%[0-9]+]]:_(<11 x s16>), [[UV77:%[0-9]+]]:_(<11 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<132 x s16>)
- ; VI-NEXT: [[INSERT:%[0-9]+]]:_(<12 x s16>) = G_INSERT [[DEF1]], [[UV66]](<11 x s16>), 0
- ; VI-NEXT: [[EXTRACT:%[0-9]+]]:_(<8 x s16>) = G_EXTRACT [[INSERT]](<12 x s16>), 0
- ; VI-NEXT: [[INSERT1:%[0-9]+]]:_(<12 x s16>) = G_INSERT [[DEF1]], [[UV66]](<11 x s16>), 0
- ; VI-NEXT: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT1]](<12 x s16>), 128
- ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[EXTRACT]](<8 x s16>)
- ; VI-NEXT: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1)
- ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
- ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
- ; VI-NEXT: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; VI-NEXT: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[EXTRACT1]](<3 x s16>), 0
- ; VI-NEXT: [[UV78:%[0-9]+]]:_(<2 x s16>), [[UV79:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT2]](<4 x s16>)
- ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV78]](<2 x s16>)
- ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
- ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV79]](<2 x s16>)
- ; VI-NEXT: G_STORE [[BITCAST1]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 16, align 16, addrspace 1)
- ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
- ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C2]](s64)
- ; VI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 18, addrspace 1)
- ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
- ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C3]](s64)
- ; VI-NEXT: G_STORE [[BITCAST2]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 20, align 4, addrspace 1)
+ ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+ ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
+ ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+ ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
+ ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
+ ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
+ ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
+ ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32)
+ ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>)
+ ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32)
+ ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>)
+ ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+ ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]]
+ ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]]
+ ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+ ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+ ; VI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]]
+ ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]]
+ ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32)
+ ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+ ; VI-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+ ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]]
+ ; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]]
+ ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
+ ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+ ; VI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+ ; VI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]]
+ ; VI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C1]]
+ ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C]](s32)
+ ; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
+ ; VI-NEXT: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
+ ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>), [[BITCAST9]](<2 x s16>)
+ ; VI-NEXT: [[BITCAST10:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[CONCAT_VECTORS]](<8 x s16>)
+ ; VI-NEXT: G_STORE [[BITCAST10]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1)
+ ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+ ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+ ; VI-NEXT: G_STORE [[BITCAST4]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 16, align 16, addrspace 1)
+ ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+ ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C3]](s64)
+ ; VI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 18, addrspace 1)
+ ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+ ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C4]](s64)
+ ; VI-NEXT: G_STORE [[BITCAST5]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 20, align 4, addrspace 1)
; GFX9-LABEL: name: test_store_global_v11s16_align4
; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<12 x s16>) = G_IMPLICIT_DEF
- ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<12 x s16>) = G_IMPLICIT_DEF
; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<12 x s16>)
- ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>)
- ; GFX9-NEXT: [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>), [[UV14:%[0-9]+]]:_(<2 x s16>), [[UV15:%[0-9]+]]:_(<2 x s16>), [[UV16:%[0-9]+]]:_(<2 x s16>), [[UV17:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>)
- ; GFX9-NEXT: [[UV18:%[0-9]+]]:_(<2 x s16>), [[UV19:%[0-9]+]]:_(<2 x s16>), [[UV20:%[0-9]+]]:_(<2 x s16>), [[UV21:%[0-9]+]]:_(<2 x s16>), [[UV22:%[0-9]+]]:_(<2 x s16>), [[UV23:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>)
- ; GFX9-NEXT: [[UV24:%[0-9]+]]:_(<2 x s16>), [[UV25:%[0-9]+]]:_(<2 x s16>), [[UV26:%[0-9]+]]:_(<2 x s16>), [[UV27:%[0-9]+]]:_(<2 x s16>), [[UV28:%[0-9]+]]:_(<2 x s16>), [[UV29:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>)
- ; GFX9-NEXT: [[UV30:%[0-9]+]]:_(<2 x s16>), [[UV31:%[0-9]+]]:_(<2 x s16>), [[UV32:%[0-9]+]]:_(<2 x s16>), [[UV33:%[0-9]+]]:_(<2 x s16>), [[UV34:%[0-9]+]]:_(<2 x s16>), [[UV35:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>)
- ; GFX9-NEXT: [[UV36:%[0-9]+]]:_(<2 x s16>), [[UV37:%[0-9]+]]:_(<2 x s16>), [[UV38:%[0-9]+]]:_(<2 x s16>), [[UV39:%[0-9]+]]:_(<2 x s16>), [[UV40:%[0-9]+]]:_(<2 x s16>), [[UV41:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>)
- ; GFX9-NEXT: [[UV42:%[0-9]+]]:_(<2 x s16>), [[UV43:%[0-9]+]]:_(<2 x s16>), [[UV44:%[0-9]+]]:_(<2 x s16>), [[UV45:%[0-9]+]]:_(<2 x s16>), [[UV46:%[0-9]+]]:_(<2 x s16>), [[UV47:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>)
- ; GFX9-NEXT: [[UV48:%[0-9]+]]:_(<2 x s16>), [[UV49:%[0-9]+]]:_(<2 x s16>), [[UV50:%[0-9]+]]:_(<2 x s16>), [[UV51:%[0-9]+]]:_(<2 x s16>), [[UV52:%[0-9]+]]:_(<2 x s16>), [[UV53:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>)
- ; GFX9-NEXT: [[UV54:%[0-9]+]]:_(<2 x s16>), [[UV55:%[0-9]+]]:_(<2 x s16>), [[UV56:%[0-9]+]]:_(<2 x s16>), [[UV57:%[0-9]+]]:_(<2 x s16>), [[UV58:%[0-9]+]]:_(<2 x s16>), [[UV59:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>)
- ; GFX9-NEXT: [[UV60:%[0-9]+]]:_(<2 x s16>), [[UV61:%[0-9]+]]:_(<2 x s16>), [[UV62:%[0-9]+]]:_(<2 x s16>), [[UV63:%[0-9]+]]:_(<2 x s16>), [[UV64:%[0-9]+]]:_(<2 x s16>), [[UV65:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>)
- ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<132 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[UV1]](<2 x s16>), [[UV2]](<2 x s16>), [[UV3]](<2 x s16>), [[UV4]](<2 x s16>), [[UV5]](<2 x s16>), [[UV6]](<2 x s16>), [[UV7]](<2 x s16>), [[UV8]](<2 x s16>), [[UV9]](<2 x s16>), [[UV10]](<2 x s16>), [[UV11]](<2 x s16>), [[UV12]](<2 x s16>), [[UV13]](<2 x s16>), [[UV14]](<2 x s16>), [[UV15]](<2 x s16>), [[UV16]](<2 x s16>), [[UV17]](<2 x s16>), [[UV18]](<2 x s16>), [[UV19]](<2 x s16>), [[UV20]](<2 x s16>), [[UV21]](<2 x s16>), [[UV22]](<2 x s16>), [[UV23]](<2 x s16>), [[UV24]](<2 x s16>), [[UV25]](<2 x s16>), [[UV26]](<2 x s16>), [[UV27]](<2 x s16>), [[UV28]](<2 x s16>), [[UV29]](<2 x s16>), [[UV30]](<2 x s16>), [[UV31]](<2 x s16>), [[UV32]](<2 x s16>), [[UV33]](<2 x s16>), [[UV34]](<2 x s16>), [[UV35]](<2 x s16>), [[UV36]](<2 x s16>), [[UV37]](<2 x s16>), [[UV38]](<2 x s16>), [[UV39]](<2 x s16>), [[UV40]](<2 x s16>), [[UV41]](<2 x s16>), [[UV42]](<2 x s16>), [[UV43]](<2 x s16>), [[UV44]](<2 x s16>), [[UV45]](<2 x s16>), [[UV46]](<2 x s16>), [[UV47]](<2 x s16>), [[UV48]](<2 x s16>), [[UV49]](<2 x s16>), [[UV50]](<2 x s16>), [[UV51]](<2 x s16>), [[UV52]](<2 x s16>), [[UV53]](<2 x s16>), [[UV54]](<2 x s16>), [[UV55]](<2 x s16>), [[UV56]](<2 x s16>), [[UV57]](<2 x s16>), [[UV58]](<2 x s16>), [[UV59]](<2 x s16>), [[UV60]](<2 x s16>), [[UV61]](<2 x s16>), [[UV62]](<2 x s16>), [[UV63]](<2 x s16>), [[UV64]](<2 x s16>), [[UV65]](<2 x s16>)
- ; GFX9-NEXT: [[UV66:%[0-9]+]]:_(<11 x s16>), [[UV67:%[0-9]+]]:_(<11 x s16>), [[UV68:%[0-9]+]]:_(<11 x s16>), [[UV69:%[0-9]+]]:_(<11 x s16>), [[UV70:%[0-9]+]]:_(<11 x s16>), [[UV71:%[0-9]+]]:_(<11 x s16>), [[UV72:%[0-9]+]]:_(<11 x s16>), [[UV73:%[0-9]+]]:_(<11 x s16>), [[UV74:%[0-9]+]]:_(<11 x s16>), [[UV75:%[0-9]+]]:_(<11 x s16>), [[UV76:%[0-9]+]]:_(<11 x s16>), [[UV77:%[0-9]+]]:_(<11 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<132 x s16>)
- ; GFX9-NEXT: [[INSERT:%[0-9]+]]:_(<12 x s16>) = G_INSERT [[DEF1]], [[UV66]](<11 x s16>), 0
- ; GFX9-NEXT: [[EXTRACT:%[0-9]+]]:_(<8 x s16>) = G_EXTRACT [[INSERT]](<12 x s16>), 0
- ; GFX9-NEXT: [[INSERT1:%[0-9]+]]:_(<12 x s16>) = G_INSERT [[DEF1]], [[UV66]](<11 x s16>), 0
- ; GFX9-NEXT: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT1]](<12 x s16>), 128
- ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[EXTRACT]](<8 x s16>)
- ; GFX9-NEXT: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1)
- ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
- ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
- ; GFX9-NEXT: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; GFX9-NEXT: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[EXTRACT1]](<3 x s16>), 0
- ; GFX9-NEXT: [[UV78:%[0-9]+]]:_(<2 x s16>), [[UV79:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT2]](<4 x s16>)
- ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV78]](<2 x s16>)
- ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
- ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV79]](<2 x s16>)
- ; GFX9-NEXT: G_STORE [[BITCAST1]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 16, align 16, addrspace 1)
+ ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+ ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
+ ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+ ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
+ ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
+ ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
+ ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
+ ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32)
+ ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>)
+ ; GFX9-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32)
+ ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>)
+ ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST]](s32), [[LSHR]](s32)
+ ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST1]](s32), [[LSHR1]](s32)
+ ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST2]](s32), [[LSHR2]](s32)
+ ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST3]](s32), [[LSHR3]](s32)
+ ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[BUILD_VECTOR_TRUNC3]](<2 x s16>)
+ ; GFX9-NEXT: [[BITCAST6:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[CONCAT_VECTORS]](<8 x s16>)
+ ; GFX9-NEXT: G_STORE [[BITCAST6]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1)
+ ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+ ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+ ; GFX9-NEXT: G_STORE [[BITCAST4]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 16, align 16, addrspace 1)
; GFX9-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C2]](s64)
- ; GFX9-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 18, addrspace 1)
+ ; GFX9-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 18, addrspace 1)
; GFX9-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C3]](s64)
- ; GFX9-NEXT: G_STORE [[BITCAST2]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 20, align 4, addrspace 1)
+ ; GFX9-NEXT: G_STORE [[BITCAST5]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 20, align 4, addrspace 1)
%0:_(p1) = COPY $vgpr0_vgpr1
%1:_(<11 x s16>) = G_IMPLICIT_DEF
G_STORE %1, %0 :: (store (<11 x s16>), align 16, addrspace 1)
@@ -5935,42 +5927,46 @@ body: |
; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; SI-NEXT: [[DEF:%[0-9]+]]:_(<12 x s16>) = G_IMPLICIT_DEF
; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<6 x s32>) = G_BITCAST [[DEF]](<12 x s16>)
- ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<6 x s32>), 0
- ; SI-NEXT: [[EXTRACT1:%[0-9]+]]:_(<2 x s32>) = G_EXTRACT [[BITCAST]](<6 x s32>), 128
- ; SI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1)
+ ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<6 x s32>)
+ ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32)
+ ; SI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV4]](s32), [[UV5]](s32)
+ ; SI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1)
; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
- ; SI-NEXT: G_STORE [[EXTRACT1]](<2 x s32>), [[PTR_ADD]](p1) :: (store (<2 x s32>) into unknown-address + 16, align 16, addrspace 1)
+ ; SI-NEXT: G_STORE [[BUILD_VECTOR1]](<2 x s32>), [[PTR_ADD]](p1) :: (store (<2 x s32>) into unknown-address + 16, align 16, addrspace 1)
; CI-LABEL: name: test_store_global_v12s16_align4
; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; CI-NEXT: [[DEF:%[0-9]+]]:_(<12 x s16>) = G_IMPLICIT_DEF
; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<6 x s32>) = G_BITCAST [[DEF]](<12 x s16>)
- ; CI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<6 x s32>), 0
- ; CI-NEXT: [[EXTRACT1:%[0-9]+]]:_(<2 x s32>) = G_EXTRACT [[BITCAST]](<6 x s32>), 128
- ; CI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1)
+ ; CI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<6 x s32>)
+ ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32)
+ ; CI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV4]](s32), [[UV5]](s32)
+ ; CI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1)
; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
- ; CI-NEXT: G_STORE [[EXTRACT1]](<2 x s32>), [[PTR_ADD]](p1) :: (store (<2 x s32>) into unknown-address + 16, align 16, addrspace 1)
+ ; CI-NEXT: G_STORE [[BUILD_VECTOR1]](<2 x s32>), [[PTR_ADD]](p1) :: (store (<2 x s32>) into unknown-address + 16, align 16, addrspace 1)
; VI-LABEL: name: test_store_global_v12s16_align4
; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; VI-NEXT: [[DEF:%[0-9]+]]:_(<12 x s16>) = G_IMPLICIT_DEF
; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<6 x s32>) = G_BITCAST [[DEF]](<12 x s16>)
- ; VI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<6 x s32>), 0
- ; VI-NEXT: [[EXTRACT1:%[0-9]+]]:_(<2 x s32>) = G_EXTRACT [[BITCAST]](<6 x s32>), 128
- ; VI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1)
+ ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<6 x s32>)
+ ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32)
+ ; VI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV4]](s32), [[UV5]](s32)
+ ; VI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1)
; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
- ; VI-NEXT: G_STORE [[EXTRACT1]](<2 x s32>), [[PTR_ADD]](p1) :: (store (<2 x s32>) into unknown-address + 16, align 16, addrspace 1)
+ ; VI-NEXT: G_STORE [[BUILD_VECTOR1]](<2 x s32>), [[PTR_ADD]](p1) :: (store (<2 x s32>) into unknown-address + 16, align 16, addrspace 1)
; GFX9-LABEL: name: test_store_global_v12s16_align4
; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<12 x s16>) = G_IMPLICIT_DEF
; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<6 x s32>) = G_BITCAST [[DEF]](<12 x s16>)
- ; GFX9-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<6 x s32>), 0
- ; GFX9-NEXT: [[EXTRACT1:%[0-9]+]]:_(<2 x s32>) = G_EXTRACT [[BITCAST]](<6 x s32>), 128
- ; GFX9-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1)
+ ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<6 x s32>)
+ ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32)
+ ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV4]](s32), [[UV5]](s32)
+ ; GFX9-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1)
; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
- ; GFX9-NEXT: G_STORE [[EXTRACT1]](<2 x s32>), [[PTR_ADD]](p1) :: (store (<2 x s32>) into unknown-address + 16, align 16, addrspace 1)
+ ; GFX9-NEXT: G_STORE [[BUILD_VECTOR1]](<2 x s32>), [[PTR_ADD]](p1) :: (store (<2 x s32>) into unknown-address + 16, align 16, addrspace 1)
%0:_(p1) = COPY $vgpr0_vgpr1
%1:_(<12 x s16>) = G_IMPLICIT_DEF
G_STORE %1, %0 :: (store (<12 x s16>), align 16, addrspace 1)
@@ -5986,9 +5982,7 @@ body: |
; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; SI-NEXT: [[COPY1:%[0-9]+]]:_(s160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](s160)
- ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0
- ; SI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128
- ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[EXTRACT]](<4 x s32>)
+ ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>)
; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32)
; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32)
@@ -6061,7 +6055,7 @@ body: |
; SI-NEXT: G_STORE [[LSHR11]](s32), [[PTR_ADD14]](p1) :: (store (s8) into unknown-address + 15, addrspace 1)
; SI-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
; SI-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64)
- ; SI-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[EXTRACT1]](s32)
+ ; SI-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[UV4]](s32)
; SI-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[COPY13]], [[C]](s32)
; SI-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s64)
; SI-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
@@ -6080,19 +6074,17 @@ body: |
; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; CI-NEXT: [[COPY1:%[0-9]+]]:_(s160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](s160)
- ; CI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0
- ; CI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128
- ; CI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 1, addrspace 1)
+ ; CI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>)
+ ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32)
+ ; CI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 1, addrspace 1)
; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
- ; CI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 1, addrspace 1)
+ ; CI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 1, addrspace 1)
; VI-LABEL: name: test_store_global_s160_align1
; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; VI-NEXT: [[COPY1:%[0-9]+]]:_(s160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](s160)
- ; VI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0
- ; VI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128
- ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[EXTRACT]](<4 x s32>)
+ ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>)
; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32)
; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32)
@@ -6165,7 +6157,7 @@ body: |
; VI-NEXT: G_STORE [[ANYEXT7]](s32), [[PTR_ADD14]](p1) :: (store (s8) into unknown-address + 15, addrspace 1)
; VI-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
; VI-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64)
- ; VI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[EXTRACT1]](s32)
+ ; VI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[UV4]](s32)
; VI-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[COPY6]], [[C]](s32)
; VI-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s64)
; VI-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[COPY6]](s32)
@@ -6184,12 +6176,12 @@ body: |
; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](s160)
- ; GFX9-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0
- ; GFX9-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128
- ; GFX9-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 1, addrspace 1)
+ ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>)
+ ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32)
+ ; GFX9-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 1, addrspace 1)
; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
- ; GFX9-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 1, addrspace 1)
+ ; GFX9-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 1, addrspace 1)
%0:_(p1) = COPY $vgpr0_vgpr1
%1:_(s160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
G_STORE %1, %0 :: (store (s160), align 1, addrspace 1)
@@ -6205,9 +6197,7 @@ body: |
; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; SI-NEXT: [[COPY1:%[0-9]+]]:_(s160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](s160)
- ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0
- ; SI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128
- ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[EXTRACT]](<4 x s32>)
+ ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>)
; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32)
; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32)
@@ -6238,7 +6228,7 @@ body: |
; SI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD6]](p1) :: (store (s16) into unknown-address + 14, addrspace 1)
; SI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
- ; SI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[EXTRACT1]](s32)
+ ; SI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[UV4]](s32)
; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[COPY6]], [[C]](s32)
; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64)
; SI-NEXT: G_STORE [[COPY6]](s32), [[PTR_ADD7]](p1) :: (store (s16) into unknown-address + 16, addrspace 1)
@@ -6247,19 +6237,17 @@ body: |
; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; CI-NEXT: [[COPY1:%[0-9]+]]:_(s160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](s160)
- ; CI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0
- ; CI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128
- ; CI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 2, addrspace 1)
+ ; CI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>)
+ ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32)
+ ; CI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 2, addrspace 1)
; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
- ; CI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 2, addrspace 1)
+ ; CI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 2, addrspace 1)
; VI-LABEL: name: test_store_global_s160_align2
; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; VI-NEXT: [[COPY1:%[0-9]+]]:_(s160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](s160)
- ; VI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0
- ; VI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128
- ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[EXTRACT]](<4 x s32>)
+ ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>)
; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32)
; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32)
@@ -6290,7 +6278,7 @@ body: |
; VI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD6]](p1) :: (store (s16) into unknown-address + 14, addrspace 1)
; VI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
- ; VI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[EXTRACT1]](s32)
+ ; VI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[UV4]](s32)
; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[COPY6]], [[C]](s32)
; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64)
; VI-NEXT: G_STORE [[COPY6]](s32), [[PTR_ADD7]](p1) :: (store (s16) into unknown-address + 16, addrspace 1)
@@ -6299,12 +6287,12 @@ body: |
; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](s160)
- ; GFX9-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0
- ; GFX9-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128
- ; GFX9-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 2, addrspace 1)
+ ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>)
+ ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32)
+ ; GFX9-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 2, addrspace 1)
; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
- ; GFX9-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 2, addrspace 1)
+ ; GFX9-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 2, addrspace 1)
%0:_(p1) = COPY $vgpr0_vgpr1
%1:_(s160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
G_STORE %1, %0 :: (store (s160), align 2, addrspace 1)
@@ -6320,42 +6308,42 @@ body: |
; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; SI-NEXT: [[COPY1:%[0-9]+]]:_(s160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](s160)
- ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0
- ; SI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128
- ; SI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1)
+ ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>)
+ ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32)
+ ; SI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1)
; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
- ; SI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, addrspace 1)
+ ; SI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, addrspace 1)
; CI-LABEL: name: test_store_global_s160_align4
; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; CI-NEXT: [[COPY1:%[0-9]+]]:_(s160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](s160)
- ; CI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0
- ; CI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128
- ; CI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1)
+ ; CI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>)
+ ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32)
+ ; CI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1)
; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
- ; CI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, addrspace 1)
+ ; CI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, addrspace 1)
; VI-LABEL: name: test_store_global_s160_align4
; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; VI-NEXT: [[COPY1:%[0-9]+]]:_(s160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](s160)
- ; VI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0
- ; VI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128
- ; VI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1)
+ ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>)
+ ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32)
+ ; VI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1)
; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
- ; VI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, addrspace 1)
+ ; VI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, addrspace 1)
; GFX9-LABEL: name: test_store_global_s160_align4
; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](s160)
- ; GFX9-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0
- ; GFX9-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128
- ; GFX9-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1)
+ ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>)
+ ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32)
+ ; GFX9-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1)
; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
- ; GFX9-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, addrspace 1)
+ ; GFX9-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, addrspace 1)
%0:_(p1) = COPY $vgpr0_vgpr1
%1:_(s160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
G_STORE %1, %0 :: (store (s160), align 4, addrspace 1)
@@ -6371,42 +6359,42 @@ body: |
; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; SI-NEXT: [[COPY1:%[0-9]+]]:_(s160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](s160)
- ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0
- ; SI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128
- ; SI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1)
+ ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>)
+ ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32)
+ ; SI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1)
; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
- ; SI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 8, addrspace 1)
+ ; SI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 8, addrspace 1)
; CI-LABEL: name: test_store_global_s160_align8
; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; CI-NEXT: [[COPY1:%[0-9]+]]:_(s160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](s160)
- ; CI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0
- ; CI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128
- ; CI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1)
+ ; CI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>)
+ ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32)
+ ; CI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1)
; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
- ; CI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 8, addrspace 1)
+ ; CI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 8, addrspace 1)
; VI-LABEL: name: test_store_global_s160_align8
; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; VI-NEXT: [[COPY1:%[0-9]+]]:_(s160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](s160)
- ; VI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0
- ; VI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128
- ; VI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1)
+ ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>)
+ ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32)
+ ; VI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1)
; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
- ; VI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 8, addrspace 1)
+ ; VI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 8, addrspace 1)
; GFX9-LABEL: name: test_store_global_s160_align8
; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](s160)
- ; GFX9-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0
- ; GFX9-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128
- ; GFX9-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1)
+ ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>)
+ ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32)
+ ; GFX9-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1)
; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
- ; GFX9-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 8, addrspace 1)
+ ; GFX9-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 8, addrspace 1)
%0:_(p1) = COPY $vgpr0_vgpr1
%1:_(s160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
G_STORE %1, %0 :: (store (s160), align 8, addrspace 1)
@@ -6422,42 +6410,42 @@ body: |
; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; SI-NEXT: [[COPY1:%[0-9]+]]:_(s160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](s160)
- ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0
- ; SI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128
- ; SI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1)
+ ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>)
+ ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32)
+ ; SI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1)
; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
- ; SI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1)
+ ; SI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1)
; CI-LABEL: name: test_store_global_s160_align16
; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; CI-NEXT: [[COPY1:%[0-9]+]]:_(s160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](s160)
- ; CI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0
- ; CI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128
- ; CI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1)
+ ; CI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>)
+ ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32)
+ ; CI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1)
; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
- ; CI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1)
+ ; CI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1)
; VI-LABEL: name: test_store_global_s160_align16
; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; VI-NEXT: [[COPY1:%[0-9]+]]:_(s160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](s160)
- ; VI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0
- ; VI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128
- ; VI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1)
+ ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>)
+ ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32)
+ ; VI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1)
; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
- ; VI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1)
+ ; VI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1)
; GFX9-LABEL: name: test_store_global_s160_align16
; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](s160)
- ; GFX9-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0
- ; GFX9-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128
- ; GFX9-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1)
+ ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>)
+ ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32)
+ ; GFX9-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1)
; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
- ; GFX9-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1)
+ ; GFX9-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1)
%0:_(p1) = COPY $vgpr0_vgpr1
%1:_(s160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
G_STORE %1, %0 :: (store (s160), align 16, addrspace 1)
@@ -7797,11 +7785,9 @@ body: |
; SI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4
; SI-NEXT: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr5_vgpr6_vgpr7
; SI-NEXT: [[COPY3:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr7_vgpr8_vgpr9
- ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<9 x s32>) = G_CONCAT_VECTORS [[COPY1]](<3 x s32>), [[COPY2]](<3 x s32>), [[COPY3]](<3 x s32>)
- ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 0
- ; SI-NEXT: [[EXTRACT1:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 128
- ; SI-NEXT: [[EXTRACT2:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY3]](<3 x s32>), 64
- ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[EXTRACT]](<4 x s32>)
+ ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>)
+ ; SI-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<3 x s32>)
+ ; SI-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY3]](<3 x s32>)
; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV]](s32)
; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32)
@@ -7874,7 +7860,6 @@ body: |
; SI-NEXT: G_STORE [[LSHR11]](s32), [[PTR_ADD14]](p1) :: (store (s8) into unknown-address + 15, addrspace 1)
; SI-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
; SI-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64)
- ; SI-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[EXTRACT1]](<4 x s32>)
; SI-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[UV4]](s32)
; SI-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[COPY15]], [[C]](s32)
; SI-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s64)
@@ -7940,7 +7925,7 @@ body: |
; SI-NEXT: G_STORE [[LSHR23]](s32), [[PTR_ADD30]](p1) :: (store (s8) into unknown-address + 31, addrspace 1)
; SI-NEXT: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
; SI-NEXT: [[PTR_ADD31:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C9]](s64)
- ; SI-NEXT: [[COPY27:%[0-9]+]]:_(s32) = COPY [[EXTRACT2]](s32)
+ ; SI-NEXT: [[COPY27:%[0-9]+]]:_(s32) = COPY [[UV8]](s32)
; SI-NEXT: [[LSHR24:%[0-9]+]]:_(s32) = G_LSHR [[COPY27]], [[C]](s32)
; SI-NEXT: [[PTR_ADD32:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD31]], [[C1]](s64)
; SI-NEXT: [[COPY28:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
@@ -7960,27 +7945,26 @@ body: |
; CI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4
; CI-NEXT: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr5_vgpr6_vgpr7
; CI-NEXT: [[COPY3:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr7_vgpr8_vgpr9
- ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<9 x s32>) = G_CONCAT_VECTORS [[COPY1]](<3 x s32>), [[COPY2]](<3 x s32>), [[COPY3]](<3 x s32>)
- ; CI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 0
- ; CI-NEXT: [[EXTRACT1:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 128
- ; CI-NEXT: [[EXTRACT2:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY3]](<3 x s32>), 64
- ; CI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 1, addrspace 1)
+ ; CI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>)
+ ; CI-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<3 x s32>)
+ ; CI-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY3]](<3 x s32>)
+ ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32)
+ ; CI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV4]](s32), [[UV5]](s32), [[UV6]](s32), [[UV7]](s32)
+ ; CI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 1, addrspace 1)
; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
- ; CI-NEXT: G_STORE [[EXTRACT1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 1, addrspace 1)
+ ; CI-NEXT: G_STORE [[BUILD_VECTOR1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 1, addrspace 1)
; CI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
- ; CI-NEXT: G_STORE [[EXTRACT2]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, align 1, addrspace 1)
+ ; CI-NEXT: G_STORE [[UV8]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, align 1, addrspace 1)
; VI-LABEL: name: test_store_global_v9s32_align1
; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; VI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4
; VI-NEXT: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr5_vgpr6_vgpr7
; VI-NEXT: [[COPY3:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr7_vgpr8_vgpr9
- ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<9 x s32>) = G_CONCAT_VECTORS [[COPY1]](<3 x s32>), [[COPY2]](<3 x s32>), [[COPY3]](<3 x s32>)
- ; VI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 0
- ; VI-NEXT: [[EXTRACT1:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 128
- ; VI-NEXT: [[EXTRACT2:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY3]](<3 x s32>), 64
- ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[EXTRACT]](<4 x s32>)
+ ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>)
+ ; VI-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<3 x s32>)
+ ; VI-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY3]](<3 x s32>)
; VI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV]](s32)
; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32)
@@ -8053,7 +8037,6 @@ body: |
; VI-NEXT: G_STORE [[ANYEXT7]](s32), [[PTR_ADD14]](p1) :: (store (s8) into unknown-address + 15, addrspace 1)
; VI-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
; VI-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64)
- ; VI-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[EXTRACT1]](<4 x s32>)
; VI-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[UV4]](s32)
; VI-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[COPY8]], [[C]](s32)
; VI-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s64)
@@ -8119,7 +8102,7 @@ body: |
; VI-NEXT: G_STORE [[ANYEXT15]](s32), [[PTR_ADD30]](p1) :: (store (s8) into unknown-address + 31, addrspace 1)
; VI-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
; VI-NEXT: [[PTR_ADD31:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64)
- ; VI-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[EXTRACT2]](s32)
+ ; VI-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[UV8]](s32)
; VI-NEXT: [[LSHR24:%[0-9]+]]:_(s32) = G_LSHR [[COPY12]], [[C]](s32)
; VI-NEXT: [[PTR_ADD32:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD31]], [[C1]](s64)
; VI-NEXT: [[TRUNC16:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32)
@@ -8139,17 +8122,18 @@ body: |
; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4
; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr5_vgpr6_vgpr7
; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr7_vgpr8_vgpr9
- ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<9 x s32>) = G_CONCAT_VECTORS [[COPY1]](<3 x s32>), [[COPY2]](<3 x s32>), [[COPY3]](<3 x s32>)
- ; GFX9-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 0
- ; GFX9-NEXT: [[EXTRACT1:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 128
- ; GFX9-NEXT: [[EXTRACT2:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY3]](<3 x s32>), 64
- ; GFX9-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 1, addrspace 1)
+ ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>)
+ ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<3 x s32>)
+ ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY3]](<3 x s32>)
+ ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32)
+ ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV4]](s32), [[UV5]](s32), [[UV6]](s32), [[UV7]](s32)
+ ; GFX9-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 1, addrspace 1)
; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
- ; GFX9-NEXT: G_STORE [[EXTRACT1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 1, addrspace 1)
+ ; GFX9-NEXT: G_STORE [[BUILD_VECTOR1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 1, addrspace 1)
; GFX9-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
- ; GFX9-NEXT: G_STORE [[EXTRACT2]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, align 1, addrspace 1)
+ ; GFX9-NEXT: G_STORE [[UV8]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, align 1, addrspace 1)
%0:_(p1) = COPY $vgpr0_vgpr1
%1:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4
%2:_(<3 x s32>) = COPY $vgpr5_vgpr6_vgpr7
@@ -8169,11 +8153,9 @@ body: |
; SI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4
; SI-NEXT: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr5_vgpr6_vgpr7
; SI-NEXT: [[COPY3:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr7_vgpr8_vgpr9
- ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<9 x s32>) = G_CONCAT_VECTORS [[COPY1]](<3 x s32>), [[COPY2]](<3 x s32>), [[COPY3]](<3 x s32>)
- ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 0
- ; SI-NEXT: [[EXTRACT1:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 128
- ; SI-NEXT: [[EXTRACT2:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY3]](<3 x s32>), 64
- ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[EXTRACT]](<4 x s32>)
+ ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>)
+ ; SI-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<3 x s32>)
+ ; SI-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY3]](<3 x s32>)
; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV]](s32)
; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32)
@@ -8204,7 +8186,6 @@ body: |
; SI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD6]](p1) :: (store (s16) into unknown-address + 14, addrspace 1)
; SI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
- ; SI-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[EXTRACT1]](<4 x s32>)
; SI-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[UV4]](s32)
; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[COPY8]], [[C]](s32)
; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64)
@@ -8230,7 +8211,7 @@ body: |
; SI-NEXT: G_STORE [[LSHR7]](s32), [[PTR_ADD14]](p1) :: (store (s16) into unknown-address + 30, addrspace 1)
; SI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
; SI-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64)
- ; SI-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[EXTRACT2]](s32)
+ ; SI-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[UV8]](s32)
; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[COPY12]], [[C]](s32)
; SI-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s64)
; SI-NEXT: G_STORE [[COPY12]](s32), [[PTR_ADD15]](p1) :: (store (s16) into unknown-address + 32, addrspace 1)
@@ -8240,27 +8221,26 @@ body: |
; CI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4
; CI-NEXT: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr5_vgpr6_vgpr7
; CI-NEXT: [[COPY3:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr7_vgpr8_vgpr9
- ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<9 x s32>) = G_CONCAT_VECTORS [[COPY1]](<3 x s32>), [[COPY2]](<3 x s32>), [[COPY3]](<3 x s32>)
- ; CI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 0
- ; CI-NEXT: [[EXTRACT1:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 128
- ; CI-NEXT: [[EXTRACT2:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY3]](<3 x s32>), 64
- ; CI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 2, addrspace 1)
+ ; CI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>)
+ ; CI-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<3 x s32>)
+ ; CI-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY3]](<3 x s32>)
+ ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32)
+ ; CI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV4]](s32), [[UV5]](s32), [[UV6]](s32), [[UV7]](s32)
+ ; CI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 2, addrspace 1)
; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
- ; CI-NEXT: G_STORE [[EXTRACT1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 2, addrspace 1)
+ ; CI-NEXT: G_STORE [[BUILD_VECTOR1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 2, addrspace 1)
; CI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
- ; CI-NEXT: G_STORE [[EXTRACT2]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, align 2, addrspace 1)
+ ; CI-NEXT: G_STORE [[UV8]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, align 2, addrspace 1)
; VI-LABEL: name: test_store_global_v9s32_align2
; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; VI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4
; VI-NEXT: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr5_vgpr6_vgpr7
; VI-NEXT: [[COPY3:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr7_vgpr8_vgpr9
- ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<9 x s32>) = G_CONCAT_VECTORS [[COPY1]](<3 x s32>), [[COPY2]](<3 x s32>), [[COPY3]](<3 x s32>)
- ; VI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 0
- ; VI-NEXT: [[EXTRACT1:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 128
- ; VI-NEXT: [[EXTRACT2:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY3]](<3 x s32>), 64
- ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[EXTRACT]](<4 x s32>)
+ ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>)
+ ; VI-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<3 x s32>)
+ ; VI-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY3]](<3 x s32>)
; VI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV]](s32)
; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32)
@@ -8291,7 +8271,6 @@ body: |
; VI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD6]](p1) :: (store (s16) into unknown-address + 14, addrspace 1)
; VI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
- ; VI-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[EXTRACT1]](<4 x s32>)
; VI-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[UV4]](s32)
; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[COPY8]], [[C]](s32)
; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64)
@@ -8317,7 +8296,7 @@ body: |
; VI-NEXT: G_STORE [[LSHR7]](s32), [[PTR_ADD14]](p1) :: (store (s16) into unknown-address + 30, addrspace 1)
; VI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
; VI-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64)
- ; VI-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[EXTRACT2]](s32)
+ ; VI-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[UV8]](s32)
; VI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[COPY12]], [[C]](s32)
; VI-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s64)
; VI-NEXT: G_STORE [[COPY12]](s32), [[PTR_ADD15]](p1) :: (store (s16) into unknown-address + 32, addrspace 1)
@@ -8327,17 +8306,18 @@ body: |
; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4
; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr5_vgpr6_vgpr7
; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr7_vgpr8_vgpr9
- ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<9 x s32>) = G_CONCAT_VECTORS [[COPY1]](<3 x s32>), [[COPY2]](<3 x s32>), [[COPY3]](<3 x s32>)
- ; GFX9-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 0
- ; GFX9-NEXT: [[EXTRACT1:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 128
- ; GFX9-NEXT: [[EXTRACT2:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY3]](<3 x s32>), 64
- ; GFX9-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 2, addrspace 1)
+ ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>)
+ ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<3 x s32>)
+ ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY3]](<3 x s32>)
+ ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32)
+ ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV4]](s32), [[UV5]](s32), [[UV6]](s32), [[UV7]](s32)
+ ; GFX9-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 2, addrspace 1)
; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
- ; GFX9-NEXT: G_STORE [[EXTRACT1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 2, addrspace 1)
+ ; GFX9-NEXT: G_STORE [[BUILD_VECTOR1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 2, addrspace 1)
; GFX9-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
- ; GFX9-NEXT: G_STORE [[EXTRACT2]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, align 2, addrspace 1)
+ ; GFX9-NEXT: G_STORE [[UV8]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, align 2, addrspace 1)
%0:_(p1) = COPY $vgpr0_vgpr1
%1:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4
%2:_(<3 x s32>) = COPY $vgpr5_vgpr6_vgpr7
@@ -8357,65 +8337,69 @@ body: |
; SI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4
; SI-NEXT: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr5_vgpr6_vgpr7
; SI-NEXT: [[COPY3:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr7_vgpr8_vgpr9
- ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<9 x s32>) = G_CONCAT_VECTORS [[COPY1]](<3 x s32>), [[COPY2]](<3 x s32>), [[COPY3]](<3 x s32>)
- ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 0
- ; SI-NEXT: [[EXTRACT1:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 128
- ; SI-NEXT: [[EXTRACT2:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY3]](<3 x s32>), 64
- ; SI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1)
+ ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>)
+ ; SI-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<3 x s32>)
+ ; SI-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY3]](<3 x s32>)
+ ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32)
+ ; SI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV4]](s32), [[UV5]](s32), [[UV6]](s32), [[UV7]](s32)
+ ; SI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1)
; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
- ; SI-NEXT: G_STORE [[EXTRACT1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 4, addrspace 1)
+ ; SI-NEXT: G_STORE [[BUILD_VECTOR1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 4, addrspace 1)
; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
- ; SI-NEXT: G_STORE [[EXTRACT2]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, addrspace 1)
+ ; SI-NEXT: G_STORE [[UV8]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, addrspace 1)
; CI-LABEL: name: test_store_global_v9s32_align4
; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; CI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4
; CI-NEXT: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr5_vgpr6_vgpr7
; CI-NEXT: [[COPY3:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr7_vgpr8_vgpr9
- ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<9 x s32>) = G_CONCAT_VECTORS [[COPY1]](<3 x s32>), [[COPY2]](<3 x s32>), [[COPY3]](<3 x s32>)
- ; CI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 0
- ; CI-NEXT: [[EXTRACT1:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 128
- ; CI-NEXT: [[EXTRACT2:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY3]](<3 x s32>), 64
- ; CI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1)
+ ; CI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>)
+ ; CI-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<3 x s32>)
+ ; CI-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY3]](<3 x s32>)
+ ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32)
+ ; CI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV4]](s32), [[UV5]](s32), [[UV6]](s32), [[UV7]](s32)
+ ; CI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1)
; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
- ; CI-NEXT: G_STORE [[EXTRACT1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 4, addrspace 1)
+ ; CI-NEXT: G_STORE [[BUILD_VECTOR1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 4, addrspace 1)
; CI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
- ; CI-NEXT: G_STORE [[EXTRACT2]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, addrspace 1)
+ ; CI-NEXT: G_STORE [[UV8]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, addrspace 1)
; VI-LABEL: name: test_store_global_v9s32_align4
; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; VI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4
; VI-NEXT: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr5_vgpr6_vgpr7
; VI-NEXT: [[COPY3:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr7_vgpr8_vgpr9
- ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<9 x s32>) = G_CONCAT_VECTORS [[COPY1]](<3 x s32>), [[COPY2]](<3 x s32>), [[COPY3]](<3 x s32>)
- ; VI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 0
- ; VI-NEXT: [[EXTRACT1:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 128
- ; VI-NEXT: [[EXTRACT2:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY3]](<3 x s32>), 64
- ; VI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1)
+ ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>)
+ ; VI-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<3 x s32>)
+ ; VI-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY3]](<3 x s32>)
+ ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32)
+ ; VI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV4]](s32), [[UV5]](s32), [[UV6]](s32), [[UV7]](s32)
+ ; VI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1)
; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
- ; VI-NEXT: G_STORE [[EXTRACT1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 4, addrspace 1)
+ ; VI-NEXT: G_STORE [[BUILD_VECTOR1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 4, addrspace 1)
; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
- ; VI-NEXT: G_STORE [[EXTRACT2]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, addrspace 1)
+ ; VI-NEXT: G_STORE [[UV8]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, addrspace 1)
; GFX9-LABEL: name: test_store_global_v9s32_align4
; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4
; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr5_vgpr6_vgpr7
; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr7_vgpr8_vgpr9
- ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<9 x s32>) = G_CONCAT_VECTORS [[COPY1]](<3 x s32>), [[COPY2]](<3 x s32>), [[COPY3]](<3 x s32>)
- ; GFX9-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 0
- ; GFX9-NEXT: [[EXTRACT1:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 128
- ; GFX9-NEXT: [[EXTRACT2:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY3]](<3 x s32>), 64
- ; GFX9-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1)
+ ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>)
+ ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<3 x s32>)
+ ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY3]](<3 x s32>)
+ ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32)
+ ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV4]](s32), [[UV5]](s32), [[UV6]](s32), [[UV7]](s32)
+ ; GFX9-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1)
; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
- ; GFX9-NEXT: G_STORE [[EXTRACT1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 4, addrspace 1)
+ ; GFX9-NEXT: G_STORE [[BUILD_VECTOR1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 4, addrspace 1)
; GFX9-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
- ; GFX9-NEXT: G_STORE [[EXTRACT2]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, addrspace 1)
+ ; GFX9-NEXT: G_STORE [[UV8]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, addrspace 1)
%0:_(p1) = COPY $vgpr0_vgpr1
%1:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4
%2:_(<3 x s32>) = COPY $vgpr5_vgpr6_vgpr7
@@ -8435,65 +8419,69 @@ body: |
; SI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4
; SI-NEXT: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr5_vgpr6_vgpr7
; SI-NEXT: [[COPY3:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr7_vgpr8_vgpr9
- ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<9 x s32>) = G_CONCAT_VECTORS [[COPY1]](<3 x s32>), [[COPY2]](<3 x s32>), [[COPY3]](<3 x s32>)
- ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 0
- ; SI-NEXT: [[EXTRACT1:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 128
- ; SI-NEXT: [[EXTRACT2:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY3]](<3 x s32>), 64
- ; SI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1)
+ ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>)
+ ; SI-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<3 x s32>)
+ ; SI-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY3]](<3 x s32>)
+ ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32)
+ ; SI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV4]](s32), [[UV5]](s32), [[UV6]](s32), [[UV7]](s32)
+ ; SI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1)
; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
- ; SI-NEXT: G_STORE [[EXTRACT1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 8, addrspace 1)
+ ; SI-NEXT: G_STORE [[BUILD_VECTOR1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 8, addrspace 1)
; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
- ; SI-NEXT: G_STORE [[EXTRACT2]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, align 8, addrspace 1)
+ ; SI-NEXT: G_STORE [[UV8]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, align 8, addrspace 1)
; CI-LABEL: name: test_store_global_v9s32_align8
; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; CI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4
; CI-NEXT: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr5_vgpr6_vgpr7
; CI-NEXT: [[COPY3:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr7_vgpr8_vgpr9
- ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<9 x s32>) = G_CONCAT_VECTORS [[COPY1]](<3 x s32>), [[COPY2]](<3 x s32>), [[COPY3]](<3 x s32>)
- ; CI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 0
- ; CI-NEXT: [[EXTRACT1:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 128
- ; CI-NEXT: [[EXTRACT2:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY3]](<3 x s32>), 64
- ; CI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1)
+ ; CI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>)
+ ; CI-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<3 x s32>)
+ ; CI-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY3]](<3 x s32>)
+ ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32)
+ ; CI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV4]](s32), [[UV5]](s32), [[UV6]](s32), [[UV7]](s32)
+ ; CI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1)
; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
- ; CI-NEXT: G_STORE [[EXTRACT1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 8, addrspace 1)
+ ; CI-NEXT: G_STORE [[BUILD_VECTOR1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 8, addrspace 1)
; CI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
- ; CI-NEXT: G_STORE [[EXTRACT2]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, align 8, addrspace 1)
+ ; CI-NEXT: G_STORE [[UV8]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, align 8, addrspace 1)
; VI-LABEL: name: test_store_global_v9s32_align8
; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; VI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4
; VI-NEXT: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr5_vgpr6_vgpr7
; VI-NEXT: [[COPY3:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr7_vgpr8_vgpr9
- ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<9 x s32>) = G_CONCAT_VECTORS [[COPY1]](<3 x s32>), [[COPY2]](<3 x s32>), [[COPY3]](<3 x s32>)
- ; VI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 0
- ; VI-NEXT: [[EXTRACT1:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 128
- ; VI-NEXT: [[EXTRACT2:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY3]](<3 x s32>), 64
- ; VI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1)
+ ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>)
+ ; VI-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<3 x s32>)
+ ; VI-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY3]](<3 x s32>)
+ ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32)
+ ; VI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV4]](s32), [[UV5]](s32), [[UV6]](s32), [[UV7]](s32)
+ ; VI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1)
; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
- ; VI-NEXT: G_STORE [[EXTRACT1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 8, addrspace 1)
+ ; VI-NEXT: G_STORE [[BUILD_VECTOR1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 8, addrspace 1)
; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
- ; VI-NEXT: G_STORE [[EXTRACT2]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, align 8, addrspace 1)
+ ; VI-NEXT: G_STORE [[UV8]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, align 8, addrspace 1)
; GFX9-LABEL: name: test_store_global_v9s32_align8
; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4
; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr5_vgpr6_vgpr7
; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr7_vgpr8_vgpr9
- ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<9 x s32>) = G_CONCAT_VECTORS [[COPY1]](<3 x s32>), [[COPY2]](<3 x s32>), [[COPY3]](<3 x s32>)
- ; GFX9-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 0
- ; GFX9-NEXT: [[EXTRACT1:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 128
- ; GFX9-NEXT: [[EXTRACT2:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY3]](<3 x s32>), 64
- ; GFX9-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1)
+ ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>)
+ ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<3 x s32>)
+ ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY3]](<3 x s32>)
+ ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32)
+ ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV4]](s32), [[UV5]](s32), [[UV6]](s32), [[UV7]](s32)
+ ; GFX9-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1)
; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
- ; GFX9-NEXT: G_STORE [[EXTRACT1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 8, addrspace 1)
+ ; GFX9-NEXT: G_STORE [[BUILD_VECTOR1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 8, addrspace 1)
; GFX9-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
- ; GFX9-NEXT: G_STORE [[EXTRACT2]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, align 8, addrspace 1)
+ ; GFX9-NEXT: G_STORE [[UV8]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, align 8, addrspace 1)
%0:_(p1) = COPY $vgpr0_vgpr1
%1:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4
%2:_(<3 x s32>) = COPY $vgpr5_vgpr6_vgpr7
@@ -8513,65 +8501,69 @@ body: |
; SI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4
; SI-NEXT: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr5_vgpr6_vgpr7
; SI-NEXT: [[COPY3:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr7_vgpr8_vgpr9
- ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<9 x s32>) = G_CONCAT_VECTORS [[COPY1]](<3 x s32>), [[COPY2]](<3 x s32>), [[COPY3]](<3 x s32>)
- ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 0
- ; SI-NEXT: [[EXTRACT1:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 128
- ; SI-NEXT: [[EXTRACT2:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY3]](<3 x s32>), 64
- ; SI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1)
+ ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>)
+ ; SI-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<3 x s32>)
+ ; SI-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY3]](<3 x s32>)
+ ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32)
+ ; SI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV4]](s32), [[UV5]](s32), [[UV6]](s32), [[UV7]](s32)
+ ; SI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1)
; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
- ; SI-NEXT: G_STORE [[EXTRACT1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, addrspace 1)
+ ; SI-NEXT: G_STORE [[BUILD_VECTOR1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, addrspace 1)
; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
- ; SI-NEXT: G_STORE [[EXTRACT2]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, align 16, addrspace 1)
+ ; SI-NEXT: G_STORE [[UV8]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, align 16, addrspace 1)
; CI-LABEL: name: test_store_global_v9s32_align16
; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; CI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4
; CI-NEXT: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr5_vgpr6_vgpr7
; CI-NEXT: [[COPY3:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr7_vgpr8_vgpr9
- ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<9 x s32>) = G_CONCAT_VECTORS [[COPY1]](<3 x s32>), [[COPY2]](<3 x s32>), [[COPY3]](<3 x s32>)
- ; CI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 0
- ; CI-NEXT: [[EXTRACT1:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 128
- ; CI-NEXT: [[EXTRACT2:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY3]](<3 x s32>), 64
- ; CI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1)
+ ; CI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>)
+ ; CI-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<3 x s32>)
+ ; CI-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY3]](<3 x s32>)
+ ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32)
+ ; CI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV4]](s32), [[UV5]](s32), [[UV6]](s32), [[UV7]](s32)
+ ; CI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1)
; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
- ; CI-NEXT: G_STORE [[EXTRACT1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, addrspace 1)
+ ; CI-NEXT: G_STORE [[BUILD_VECTOR1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, addrspace 1)
; CI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
- ; CI-NEXT: G_STORE [[EXTRACT2]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, align 16, addrspace 1)
+ ; CI-NEXT: G_STORE [[UV8]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, align 16, addrspace 1)
; VI-LABEL: name: test_store_global_v9s32_align16
; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; VI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4
; VI-NEXT: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr5_vgpr6_vgpr7
; VI-NEXT: [[COPY3:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr7_vgpr8_vgpr9
- ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<9 x s32>) = G_CONCAT_VECTORS [[COPY1]](<3 x s32>), [[COPY2]](<3 x s32>), [[COPY3]](<3 x s32>)
- ; VI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 0
- ; VI-NEXT: [[EXTRACT1:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 128
- ; VI-NEXT: [[EXTRACT2:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY3]](<3 x s32>), 64
- ; VI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1)
+ ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>)
+ ; VI-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<3 x s32>)
+ ; VI-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY3]](<3 x s32>)
+ ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32)
+ ; VI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV4]](s32), [[UV5]](s32), [[UV6]](s32), [[UV7]](s32)
+ ; VI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1)
; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
- ; VI-NEXT: G_STORE [[EXTRACT1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, addrspace 1)
+ ; VI-NEXT: G_STORE [[BUILD_VECTOR1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, addrspace 1)
; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
- ; VI-NEXT: G_STORE [[EXTRACT2]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, align 16, addrspace 1)
+ ; VI-NEXT: G_STORE [[UV8]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, align 16, addrspace 1)
; GFX9-LABEL: name: test_store_global_v9s32_align16
; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4
; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr5_vgpr6_vgpr7
; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr7_vgpr8_vgpr9
- ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<9 x s32>) = G_CONCAT_VECTORS [[COPY1]](<3 x s32>), [[COPY2]](<3 x s32>), [[COPY3]](<3 x s32>)
- ; GFX9-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 0
- ; GFX9-NEXT: [[EXTRACT1:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 128
- ; GFX9-NEXT: [[EXTRACT2:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY3]](<3 x s32>), 64
- ; GFX9-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1)
+ ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>)
+ ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<3 x s32>)
+ ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY3]](<3 x s32>)
+ ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32)
+ ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV4]](s32), [[UV5]](s32), [[UV6]](s32), [[UV7]](s32)
+ ; GFX9-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1)
; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
- ; GFX9-NEXT: G_STORE [[EXTRACT1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, addrspace 1)
+ ; GFX9-NEXT: G_STORE [[BUILD_VECTOR1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, addrspace 1)
; GFX9-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
- ; GFX9-NEXT: G_STORE [[EXTRACT2]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, align 16, addrspace 1)
+ ; GFX9-NEXT: G_STORE [[UV8]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, align 16, addrspace 1)
%0:_(p1) = COPY $vgpr0_vgpr1
%1:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4
%2:_(<3 x s32>) = COPY $vgpr5_vgpr6_vgpr7
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store.mir
index 778d6932f8b09..2b562df4f134a 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store.mir
@@ -152,12 +152,12 @@ body: |
; SI-LABEL: name: test_store_global_v3s32
; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; SI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4
- ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<2 x s32>) = G_EXTRACT [[COPY1]](<3 x s32>), 0
- ; SI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](<3 x s32>), 64
- ; SI-NEXT: G_STORE [[EXTRACT]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), align 4, addrspace 1)
+ ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>)
+ ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32)
+ ; SI-NEXT: G_STORE [[BUILD_VECTOR]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), align 4, addrspace 1)
; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
- ; SI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 8, addrspace 1)
+ ; SI-NEXT: G_STORE [[UV2]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 8, addrspace 1)
; VI-LABEL: name: test_store_global_v3s32
; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; VI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4
@@ -473,12 +473,12 @@ body: |
; SI: [[COPY:%[0-9]+]]:_(s96) = COPY $vgpr0_vgpr1_vgpr2
; SI-NEXT: [[COPY1:%[0-9]+]]:_(p1) = COPY $vgpr3_vgpr4
; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x s32>) = G_BITCAST [[COPY]](s96)
- ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<2 x s32>) = G_EXTRACT [[BITCAST]](<3 x s32>), 0
- ; SI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<3 x s32>), 64
- ; SI-NEXT: G_STORE [[EXTRACT]](<2 x s32>), [[COPY1]](p1) :: (store (<2 x s32>), align 16, addrspace 1)
+ ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<3 x s32>)
+ ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32)
+ ; SI-NEXT: G_STORE [[BUILD_VECTOR]](<2 x s32>), [[COPY1]](p1) :: (store (<2 x s32>), align 16, addrspace 1)
; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C]](s64)
- ; SI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 8, align 8, addrspace 1)
+ ; SI-NEXT: G_STORE [[UV2]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 8, align 8, addrspace 1)
; VI-LABEL: name: test_store_global_96
; VI: [[COPY:%[0-9]+]]:_(s96) = COPY $vgpr0_vgpr1_vgpr2
; VI-NEXT: [[COPY1:%[0-9]+]]:_(p1) = COPY $vgpr3_vgpr4
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-uaddsat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-uaddsat.mir
index 411bbc68b47c7..9c210beb285a1 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-uaddsat.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-uaddsat.mir
@@ -329,11 +329,9 @@ body: |
; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
; GFX6-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; GFX6-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>)
- ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>)
- ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
- ; GFX6-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>)
- ; GFX6-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32)
+ ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
+ ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
+ ; GFX6-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[BITCAST]], [[C]](s32)
; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[C]](s32)
; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
@@ -342,7 +340,7 @@ body: |
; GFX6-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SHL]], [[UMIN]]
; GFX6-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[ADD]], [[C]](s32)
; GFX6-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32)
- ; GFX6-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[BITCAST3]], [[C]](s32)
+ ; GFX6-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[BITCAST2]], [[C]](s32)
; GFX6-NEXT: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[SHL2]], [[C1]]
; GFX6-NEXT: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[XOR1]], [[SHL3]]
; GFX6-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SHL2]], [[UMIN1]]
@@ -354,27 +352,27 @@ body: |
; GFX6-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[SHL4]], [[UMIN2]]
; GFX6-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[ADD2]], [[C]](s32)
; GFX6-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; GFX6-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; GFX6-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>)
- ; GFX6-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32)
- ; GFX6-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>)
+ ; GFX6-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
+ ; GFX6-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
+ ; GFX6-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32)
+ ; GFX6-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>)
; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
; GFX6-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C2]]
; GFX6-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C2]]
; GFX6-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL6]]
- ; GFX6-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; GFX6-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
; GFX6-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C2]]
- ; GFX6-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C2]]
+ ; GFX6-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C2]]
; GFX6-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32)
; GFX6-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL7]]
- ; GFX6-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+ ; GFX6-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
; GFX6-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR6]], [[C2]]
- ; GFX6-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C2]]
+ ; GFX6-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C2]]
; GFX6-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
; GFX6-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL8]]
- ; GFX6-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
- ; GFX6-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>)
+ ; GFX6-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+ ; GFX6-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>)
; GFX6-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
; GFX8-LABEL: name: uaddsat_v3s16
; GFX8: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2
@@ -386,39 +384,37 @@ body: |
; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32)
- ; GFX8-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>)
- ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>)
- ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
+ ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
; GFX8-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
- ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>)
- ; GFX8-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32)
- ; GFX8-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32)
+ ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
+ ; GFX8-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32)
+ ; GFX8-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
; GFX8-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32)
; GFX8-NEXT: [[UADDSAT:%[0-9]+]]:_(s16) = G_UADDSAT [[TRUNC]], [[TRUNC3]]
; GFX8-NEXT: [[UADDSAT1:%[0-9]+]]:_(s16) = G_UADDSAT [[TRUNC1]], [[TRUNC4]]
; GFX8-NEXT: [[UADDSAT2:%[0-9]+]]:_(s16) = G_UADDSAT [[TRUNC2]], [[TRUNC5]]
; GFX8-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; GFX8-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; GFX8-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>)
- ; GFX8-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32)
- ; GFX8-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>)
+ ; GFX8-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
+ ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
+ ; GFX8-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32)
+ ; GFX8-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>)
; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDSAT]](s16)
; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDSAT1]](s16)
; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32)
; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]]
- ; GFX8-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; GFX8-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDSAT2]](s16)
; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
- ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]]
+ ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]]
; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32)
; GFX8-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]]
- ; GFX8-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+ ; GFX8-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C1]]
- ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]]
+ ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]]
; GFX8-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32)
; GFX8-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL2]]
- ; GFX8-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
- ; GFX8-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>)
+ ; GFX8-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+ ; GFX8-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>)
; GFX8-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
; GFX9-LABEL: name: uaddsat_v3s16
; GFX9: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2
@@ -427,29 +423,27 @@ body: |
; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+ ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
+ ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
+ ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST]](s32), [[LSHR]](s32)
; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST1]](s32), [[DEF]](s32)
- ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>)
- ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>)
- ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
- ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>)
- ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32)
- ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR1]](s32), [[BITCAST3]](s32)
+ ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR1]](s32), [[BITCAST2]](s32)
; GFX9-NEXT: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR2]](s32), [[DEF]](s32)
; GFX9-NEXT: [[UADDSAT:%[0-9]+]]:_(<2 x s16>) = G_UADDSAT [[BUILD_VECTOR_TRUNC]], [[BUILD_VECTOR_TRUNC2]]
; GFX9-NEXT: [[UADDSAT1:%[0-9]+]]:_(<2 x s16>) = G_UADDSAT [[BUILD_VECTOR_TRUNC1]], [[BUILD_VECTOR_TRUNC3]]
+ ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UADDSAT]](<2 x s16>)
+ ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32)
+ ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UADDSAT1]](<2 x s16>)
; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UADDSAT]](<2 x s16>)
- ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32)
- ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UADDSAT1]](<2 x s16>)
- ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>)
- ; GFX9-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>)
- ; GFX9-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32)
- ; GFX9-NEXT: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>)
- ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST4]](s32), [[LSHR3]](s32)
- ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST5]](s32), [[BITCAST6]](s32)
- ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC6:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR4]](s32), [[BITCAST7]](s32)
+ ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>)
+ ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
+ ; GFX9-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32)
+ ; GFX9-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>)
+ ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST3]](s32), [[LSHR3]](s32)
+ ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST4]](s32), [[BITCAST5]](s32)
+ ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC6:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR4]](s32), [[BITCAST6]](s32)
; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC4]](<2 x s16>), [[BUILD_VECTOR_TRUNC5]](<2 x s16>), [[BUILD_VECTOR_TRUNC6]](<2 x s16>)
; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
%0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umax.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umax.mir
index d3d1c0b65d987..83976e15d8201 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umax.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umax.mir
@@ -1,8 +1,8 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=SI %s
-# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=VI %s
-# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=GFX9 %s
-# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=GFX9 %s
+# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=legalizer %s -o - | FileCheck -check-prefix=SI %s
+# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer %s -o - | FileCheck -check-prefix=VI %s
+# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s
+# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s
---
name: test_umax_s32
@@ -331,12 +331,12 @@ body: |
; SI-LABEL: name: test_umax_v3s16
; SI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; SI-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+ ; SI-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
; SI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>)
; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
@@ -355,7 +355,6 @@ body: |
; SI-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>)
; VI-LABEL: name: test_umax_v3s16
; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; VI-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
@@ -364,6 +363,7 @@ body: |
; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32)
+ ; VI-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
; VI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>)
; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32)
@@ -381,23 +381,27 @@ body: |
; VI-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>)
; GFX9-LABEL: name: test_umax_v3s16
; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
- ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>)
- ; GFX9-NEXT: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; GFX9-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF2]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
- ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>)
- ; GFX9-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0
- ; GFX9-NEXT: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV4]](<3 x s16>), 0
- ; GFX9-NEXT: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>)
- ; GFX9-NEXT: [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<4 x s16>)
- ; GFX9-NEXT: [[UMAX:%[0-9]+]]:_(<2 x s16>) = G_UMAX [[UV8]], [[UV10]]
- ; GFX9-NEXT: [[UMAX1:%[0-9]+]]:_(<2 x s16>) = G_UMAX [[UV9]], [[UV11]]
- ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UMAX]](<2 x s16>)
+ ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
+ ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
- ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UMAX1]](<2 x s16>)
- ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[BITCAST]](s32), [[LSHR]](s32), [[BITCAST1]](s32)
+ ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+ ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+ ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>)
+ ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
+ ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
+ ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
+ ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST]](s32), [[LSHR]](s32)
+ ; GFX9-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST1]](s32), [[DEF2]](s32)
+ ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST2]](s32), [[LSHR1]](s32)
+ ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST3]](s32), [[DEF2]](s32)
+ ; GFX9-NEXT: [[UMAX:%[0-9]+]]:_(<2 x s16>) = G_UMAX [[BUILD_VECTOR_TRUNC]], [[BUILD_VECTOR_TRUNC2]]
+ ; GFX9-NEXT: [[UMAX1:%[0-9]+]]:_(<2 x s16>) = G_UMAX [[BUILD_VECTOR_TRUNC1]], [[BUILD_VECTOR_TRUNC3]]
+ ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UMAX]](<2 x s16>)
+ ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32)
+ ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UMAX1]](<2 x s16>)
+ ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[BITCAST4]](s32), [[LSHR2]](s32), [[BITCAST5]](s32)
; GFX9-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>)
%0:_(<3 x s16>) = G_IMPLICIT_DEF
%1:_(<3 x s16>) = G_IMPLICIT_DEF
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umin.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umin.mir
index ec873d966ffe7..d08f57f961230 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umin.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umin.mir
@@ -1,8 +1,8 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=SI %s
-# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=VI %s
-# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=GFX9 %s
-# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=GFX9 %s
+# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=legalizer %s -o - | FileCheck -check-prefix=SI %s
+# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer %s -o - | FileCheck -check-prefix=VI %s
+# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s
+# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s
---
name: test_umin_s32
@@ -331,12 +331,12 @@ body: |
; SI-LABEL: name: test_umin_v3s16
; SI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; SI-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+ ; SI-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
; SI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>)
; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
@@ -355,7 +355,6 @@ body: |
; SI-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>)
; VI-LABEL: name: test_umin_v3s16
; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; VI-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
@@ -364,6 +363,7 @@ body: |
; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32)
+ ; VI-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
; VI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>)
; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32)
@@ -381,23 +381,27 @@ body: |
; VI-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>)
; GFX9-LABEL: name: test_umin_v3s16
; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
- ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>)
- ; GFX9-NEXT: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; GFX9-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF2]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>)
- ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>)
- ; GFX9-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0
- ; GFX9-NEXT: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV4]](<3 x s16>), 0
- ; GFX9-NEXT: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>)
- ; GFX9-NEXT: [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<4 x s16>)
- ; GFX9-NEXT: [[UMIN:%[0-9]+]]:_(<2 x s16>) = G_UMIN [[UV8]], [[UV10]]
- ; GFX9-NEXT: [[UMIN1:%[0-9]+]]:_(<2 x s16>) = G_UMIN [[UV9]], [[UV11]]
- ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UMIN]](<2 x s16>)
+ ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
+ ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
- ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UMIN1]](<2 x s16>)
- ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[BITCAST]](s32), [[LSHR]](s32), [[BITCAST1]](s32)
+ ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+ ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+ ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>)
+ ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
+ ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
+ ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
+ ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST]](s32), [[LSHR]](s32)
+ ; GFX9-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST1]](s32), [[DEF2]](s32)
+ ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST2]](s32), [[LSHR1]](s32)
+ ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST3]](s32), [[DEF2]](s32)
+ ; GFX9-NEXT: [[UMIN:%[0-9]+]]:_(<2 x s16>) = G_UMIN [[BUILD_VECTOR_TRUNC]], [[BUILD_VECTOR_TRUNC2]]
+ ; GFX9-NEXT: [[UMIN1:%[0-9]+]]:_(<2 x s16>) = G_UMIN [[BUILD_VECTOR_TRUNC1]], [[BUILD_VECTOR_TRUNC3]]
+ ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UMIN]](<2 x s16>)
+ ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32)
+ ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UMIN1]](<2 x s16>)
+ ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[BITCAST4]](s32), [[LSHR2]](s32), [[BITCAST5]](s32)
; GFX9-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>)
%0:_(<3 x s16>) = G_IMPLICIT_DEF
%1:_(<3 x s16>) = G_IMPLICIT_DEF
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umulh.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umulh.mir
index 830c6459e4284..eac097ae0a265 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umulh.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umulh.mir
@@ -443,67 +443,51 @@ body: |
; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
- ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
- ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32)
- ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32)
- ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32)
- ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[COPY4]](s32)
- ; GFX9-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[COPY5]](s32)
- ; GFX9-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32)
+ ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32)
+ ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY4]](s32)
; GFX9-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
; GFX9-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]]
; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]]
; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[AND]](s16)
; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[AND1]](s16)
; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32)
- ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C]]
- ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C]]
+ ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C]]
+ ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C]]
; GFX9-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[AND2]](s16)
; GFX9-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[AND3]](s16)
; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[ANYEXT3]](s32)
; GFX9-NEXT: [[MUL:%[0-9]+]]:_(<2 x s16>) = G_MUL [[BUILD_VECTOR_TRUNC]], [[BUILD_VECTOR_TRUNC1]]
- ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
- ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
- ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY6]](s32), [[C1]](s32)
+ ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+ ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+ ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
+ ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY6]](s32), [[C2]](s32)
; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(<2 x s16>) = G_LSHR [[MUL]], [[BUILD_VECTOR_TRUNC2]](<2 x s16>)
- ; GFX9-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C]]
- ; GFX9-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C]]
- ; GFX9-NEXT: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[AND4]](s16)
- ; GFX9-NEXT: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[AND5]](s16)
- ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT4]](s32), [[ANYEXT5]](s32)
+ ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32)
+ ; GFX9-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C]]
+ ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[COPY5]](s32)
+ ; GFX9-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C]]
+ ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(s16) = G_MUL [[AND4]], [[AND5]]
+ ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[MUL1]], [[C1]](s16)
+ ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[LSHR]](<2 x s16>)
+ ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C3]](s32)
+ ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; GFX9-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
; GFX9-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C]]
+ ; GFX9-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32)
; GFX9-NEXT: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C]]
- ; GFX9-NEXT: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[AND6]](s16)
- ; GFX9-NEXT: [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[AND7]](s16)
- ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT6]](s32), [[ANYEXT7]](s32)
- ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(<2 x s16>) = G_MUL [[BUILD_VECTOR_TRUNC3]], [[BUILD_VECTOR_TRUNC4]]
- ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
- ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
- ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY7]](s32), [[COPY8]](s32)
- ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(<2 x s16>) = G_LSHR [[MUL1]], [[BUILD_VECTOR_TRUNC5]](<2 x s16>)
- ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[LSHR]](<2 x s16>)
- ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32)
- ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[LSHR1]](<2 x s16>)
- ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
- ; GFX9-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
- ; GFX9-NEXT: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C]]
- ; GFX9-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32)
- ; GFX9-NEXT: [[AND9:%[0-9]+]]:_(s16) = G_AND [[TRUNC9]], [[C]]
- ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
- ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C3]](s16)
- ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL]]
- ; GFX9-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32)
- ; GFX9-NEXT: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C]]
- ; GFX9-NEXT: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[DEF1]](s32)
- ; GFX9-NEXT: [[AND11:%[0-9]+]]:_(s16) = G_AND [[TRUNC11]], [[C]]
- ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C3]](s16)
- ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL1]]
+ ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C1]](s16)
+ ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL]]
+ ; GFX9-NEXT: [[AND8:%[0-9]+]]:_(s16) = G_AND [[LSHR1]], [[C]]
+ ; GFX9-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32)
+ ; GFX9-NEXT: [[AND9:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C]]
+ ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C1]](s16)
+ ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL1]]
; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
; GFX9-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
- ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C2]](s32)
+ ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C3]](s32)
; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
; GFX9-NEXT: $vgpr0 = COPY [[OR2]](s32)
%0:_(s32) = COPY $vgpr0
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ushlsat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ushlsat.mir
index 7c93db9ba2af3..84c4eec55f2da 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ushlsat.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ushlsat.mir
@@ -407,11 +407,9 @@ body: |
; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
; GFX6-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; GFX6-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>)
- ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>)
- ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
- ; GFX6-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>)
- ; GFX6-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32)
+ ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
+ ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
+ ; GFX6-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
; GFX6-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]]
; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[BITCAST]], [[C]](s32)
@@ -421,7 +419,7 @@ body: |
; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL]](s32), [[LSHR3]]
; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C2]], [[SHL1]]
; GFX6-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[SELECT]], [[C]](s32)
- ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]]
+ ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]]
; GFX6-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32)
; GFX6-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[SHL2]], [[AND1]](s32)
; GFX6-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[SHL3]], [[AND1]](s32)
@@ -436,26 +434,26 @@ body: |
; GFX6-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[C2]], [[SHL5]]
; GFX6-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[SELECT2]], [[C]](s32)
; GFX6-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; GFX6-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; GFX6-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>)
- ; GFX6-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32)
- ; GFX6-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>)
+ ; GFX6-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
+ ; GFX6-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
+ ; GFX6-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32)
+ ; GFX6-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>)
; GFX6-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C1]]
; GFX6-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR6]], [[C1]]
; GFX6-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C]](s32)
; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND3]], [[SHL6]]
- ; GFX6-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; GFX6-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
; GFX6-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR8]], [[C1]]
- ; GFX6-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]]
+ ; GFX6-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]]
; GFX6-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C]](s32)
; GFX6-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND5]], [[SHL7]]
- ; GFX6-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+ ; GFX6-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
; GFX6-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR9]], [[C1]]
- ; GFX6-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]]
+ ; GFX6-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]]
; GFX6-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C]](s32)
; GFX6-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL8]]
- ; GFX6-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
- ; GFX6-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>)
+ ; GFX6-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+ ; GFX6-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>)
; GFX6-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
; GFX8-LABEL: name: ushlsat_v3s16
; GFX8: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2
@@ -467,13 +465,11 @@ body: |
; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32)
- ; GFX8-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>)
- ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>)
- ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
+ ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
; GFX8-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
- ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>)
- ; GFX8-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32)
- ; GFX8-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32)
+ ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
+ ; GFX8-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32)
+ ; GFX8-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
; GFX8-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32)
; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[TRUNC3]](s16)
; GFX8-NEXT: [[LSHR3:%[0-9]+]]:_(s16) = G_LSHR [[SHL]], [[TRUNC3]](s16)
@@ -489,27 +485,27 @@ body: |
; GFX8-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC2]](s16), [[LSHR5]]
; GFX8-NEXT: [[SELECT2:%[0-9]+]]:_(s16) = G_SELECT [[ICMP2]](s1), [[C1]], [[SHL2]]
; GFX8-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; GFX8-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; GFX8-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>)
- ; GFX8-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32)
- ; GFX8-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>)
+ ; GFX8-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
+ ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
+ ; GFX8-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32)
+ ; GFX8-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>)
; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[SELECT]](s16)
; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[SELECT1]](s16)
; GFX8-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32)
; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL3]]
- ; GFX8-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; GFX8-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[SELECT2]](s16)
; GFX8-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
- ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C2]]
+ ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C2]]
; GFX8-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32)
; GFX8-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL4]]
- ; GFX8-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+ ; GFX8-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR6]], [[C2]]
- ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C2]]
+ ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C2]]
; GFX8-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32)
; GFX8-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL5]]
- ; GFX8-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
- ; GFX8-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>)
+ ; GFX8-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+ ; GFX8-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>)
; GFX8-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
; GFX9-LABEL: name: ushlsat_v3s16
; GFX9: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2
@@ -521,13 +517,11 @@ body: |
; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32)
- ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>)
- ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>)
- ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
+ ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
- ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>)
- ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32)
- ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32)
+ ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
+ ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32)
+ ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32)
; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[TRUNC3]](s16)
; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s16) = G_LSHR [[SHL]], [[TRUNC3]](s16)
@@ -543,16 +537,16 @@ body: |
; GFX9-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC2]](s16), [[LSHR5]]
; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(s16) = G_SELECT [[ICMP2]](s1), [[C1]], [[SHL2]]
; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>)
- ; GFX9-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32)
- ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>)
+ ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
+ ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
+ ; GFX9-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32)
+ ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>)
; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT]](s16)
; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT1]](s16)
; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32)
; GFX9-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT2]](s16)
- ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[BITCAST4]](s32)
- ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR6]](s32), [[BITCAST5]](s32)
+ ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[BITCAST3]](s32)
+ ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR6]](s32), [[BITCAST4]](s32)
; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>)
; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
%0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-usubsat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-usubsat.mir
index 6221a15da5d51..fe8966843e35c 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-usubsat.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-usubsat.mir
@@ -317,18 +317,16 @@ body: |
; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
; GFX6-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; GFX6-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>)
- ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>)
- ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
- ; GFX6-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>)
- ; GFX6-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32)
+ ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
+ ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
+ ; GFX6-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[BITCAST]], [[C]](s32)
; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[C]](s32)
; GFX6-NEXT: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[SHL]], [[SHL1]]
; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[SHL]], [[UMIN]]
; GFX6-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[SUB]], [[C]](s32)
; GFX6-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32)
- ; GFX6-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[BITCAST3]], [[C]](s32)
+ ; GFX6-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[BITCAST2]], [[C]](s32)
; GFX6-NEXT: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[SHL2]], [[SHL3]]
; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[SHL2]], [[UMIN1]]
; GFX6-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[SUB1]], [[C]](s32)
@@ -338,27 +336,27 @@ body: |
; GFX6-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SHL4]], [[UMIN2]]
; GFX6-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[SUB2]], [[C]](s32)
; GFX6-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; GFX6-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; GFX6-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>)
- ; GFX6-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32)
- ; GFX6-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>)
+ ; GFX6-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
+ ; GFX6-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
+ ; GFX6-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32)
+ ; GFX6-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>)
; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
; GFX6-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C1]]
; GFX6-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C1]]
; GFX6-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL6]]
- ; GFX6-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; GFX6-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
; GFX6-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C1]]
- ; GFX6-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]]
+ ; GFX6-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]]
; GFX6-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32)
; GFX6-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL7]]
- ; GFX6-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+ ; GFX6-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
; GFX6-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR6]], [[C1]]
- ; GFX6-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]]
+ ; GFX6-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]]
; GFX6-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
; GFX6-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL8]]
- ; GFX6-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
- ; GFX6-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>)
+ ; GFX6-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+ ; GFX6-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>)
; GFX6-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
; GFX8-LABEL: name: usubsat_v3s16
; GFX8: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2
@@ -370,39 +368,37 @@ body: |
; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32)
- ; GFX8-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>)
- ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>)
- ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
+ ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
; GFX8-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
- ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>)
- ; GFX8-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32)
- ; GFX8-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32)
+ ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
+ ; GFX8-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32)
+ ; GFX8-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
; GFX8-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32)
; GFX8-NEXT: [[USUBSAT:%[0-9]+]]:_(s16) = G_USUBSAT [[TRUNC]], [[TRUNC3]]
; GFX8-NEXT: [[USUBSAT1:%[0-9]+]]:_(s16) = G_USUBSAT [[TRUNC1]], [[TRUNC4]]
; GFX8-NEXT: [[USUBSAT2:%[0-9]+]]:_(s16) = G_USUBSAT [[TRUNC2]], [[TRUNC5]]
; GFX8-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; GFX8-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; GFX8-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>)
- ; GFX8-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32)
- ; GFX8-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>)
+ ; GFX8-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
+ ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
+ ; GFX8-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32)
+ ; GFX8-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>)
; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[USUBSAT]](s16)
; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[USUBSAT1]](s16)
; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32)
; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]]
- ; GFX8-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; GFX8-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[USUBSAT2]](s16)
; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
- ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]]
+ ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]]
; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32)
; GFX8-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]]
- ; GFX8-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+ ; GFX8-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C1]]
- ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]]
+ ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]]
; GFX8-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32)
; GFX8-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL2]]
- ; GFX8-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
- ; GFX8-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>)
+ ; GFX8-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+ ; GFX8-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>)
; GFX8-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
; GFX9-LABEL: name: usubsat_v3s16
; GFX9: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2
@@ -411,29 +407,27 @@ body: |
; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+ ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
+ ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
+ ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST]](s32), [[LSHR]](s32)
; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST1]](s32), [[DEF]](s32)
- ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>)
- ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>)
- ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
- ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>)
- ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32)
- ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR1]](s32), [[BITCAST3]](s32)
+ ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR1]](s32), [[BITCAST2]](s32)
; GFX9-NEXT: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR2]](s32), [[DEF]](s32)
; GFX9-NEXT: [[USUBSAT:%[0-9]+]]:_(<2 x s16>) = G_USUBSAT [[BUILD_VECTOR_TRUNC]], [[BUILD_VECTOR_TRUNC2]]
; GFX9-NEXT: [[USUBSAT1:%[0-9]+]]:_(<2 x s16>) = G_USUBSAT [[BUILD_VECTOR_TRUNC1]], [[BUILD_VECTOR_TRUNC3]]
+ ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[USUBSAT]](<2 x s16>)
+ ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32)
+ ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[USUBSAT1]](<2 x s16>)
; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[USUBSAT]](<2 x s16>)
- ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32)
- ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[USUBSAT1]](<2 x s16>)
- ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>)
- ; GFX9-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>)
- ; GFX9-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32)
- ; GFX9-NEXT: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>)
- ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST4]](s32), [[LSHR3]](s32)
- ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST5]](s32), [[BITCAST6]](s32)
- ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC6:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR4]](s32), [[BITCAST7]](s32)
+ ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>)
+ ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
+ ; GFX9-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32)
+ ; GFX9-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>)
+ ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST3]](s32), [[LSHR3]](s32)
+ ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST4]](s32), [[BITCAST5]](s32)
+ ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC6:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR4]](s32), [[BITCAST6]](s32)
; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC4]](<2 x s16>), [[BUILD_VECTOR_TRUNC5]](<2 x s16>), [[BUILD_VECTOR_TRUNC6]](<2 x s16>)
; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
%0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-vector-args-gfx8-plus.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-vector-args-gfx8-plus.mir
index 3d718081ba925..7e00c75a2a515 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-vector-args-gfx8-plus.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-vector-args-gfx8-plus.mir
@@ -1,6 +1,6 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer -global-isel-abort=0 -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX8 %s
-# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -run-pass=legalizer -global-isel-abort=0 -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX9 %s
+# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX8 %s
+# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -run-pass=legalizer -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX9 %s
--- |
@@ -73,100 +73,88 @@ body: |
; GFX8-LABEL: name: add_v3i16
; GFX8: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
- ; GFX8-NEXT: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
- ; GFX8-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[DEF]](<2 x s16>)
- ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[CONCAT_VECTORS]](<6 x s16>)
- ; GFX8-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](s96)
- ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32)
+ ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>)
+ ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
+ ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
- ; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32)
+ ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>)
+ ; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32)
; GFX8-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2
; GFX8-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3
- ; GFX8-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>), [[DEF]](<2 x s16>)
- ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[CONCAT_VECTORS1]](<6 x s16>)
- ; GFX8-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST1]](s96)
- ; GFX8-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[UV3]](s32)
- ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32)
+ ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY2]](<2 x s16>)
+ ; GFX8-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32)
+ ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
; GFX8-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
- ; GFX8-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[UV4]](s32)
+ ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY3]](<2 x s16>)
+ ; GFX8-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32)
; GFX8-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[TRUNC3]]
; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[TRUNC1]], [[TRUNC4]]
; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[TRUNC2]], [[TRUNC5]]
- ; GFX8-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; GFX8-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>)
- ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>)
; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ADD]](s16)
; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[ADD1]](s16)
; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32)
; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]]
- ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; GFX8-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[ADD2]](s16)
- ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
- ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]]
- ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32)
+ ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C]](s32)
; GFX8-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]]
- ; GFX8-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
- ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST3]](<2 x s16>)
- ; GFX8-NEXT: $vgpr1 = COPY [[BITCAST4]](<2 x s16>)
+ ; GFX8-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+ ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST4]](<2 x s16>)
+ ; GFX8-NEXT: $vgpr1 = COPY [[BITCAST5]](<2 x s16>)
; GFX8-NEXT: [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]]
; GFX8-NEXT: S_SETPC_B64_return [[COPY5]], implicit $vgpr0, implicit $vgpr1
; GFX9-LABEL: name: add_v3i16
; GFX9: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
- ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
- ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[DEF]](<2 x s16>)
- ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[CONCAT_VECTORS]](<6 x s16>)
- ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](s96)
+ ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>)
; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
+ ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
+ ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>)
; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2
; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3
- ; GFX9-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>), [[DEF]](<2 x s16>)
- ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[CONCAT_VECTORS1]](<6 x s16>)
- ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST1]](s96)
- ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32)
+ ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY2]](<2 x s16>)
+ ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
+ ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY3]](<2 x s16>)
; GFX9-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
- ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[UV]](s32), [[LSHR]](s32)
- ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
- ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[UV1]](s32), [[DEF1]](s32)
- ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[UV3]](s32), [[LSHR1]](s32)
- ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[UV4]](s32), [[DEF1]](s32)
+ ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST]](s32), [[LSHR]](s32)
+ ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST1]](s32), [[DEF]](s32)
+ ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST2]](s32), [[LSHR1]](s32)
+ ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST3]](s32), [[DEF]](s32)
; GFX9-NEXT: [[ADD:%[0-9]+]]:_(<2 x s16>) = G_ADD [[BUILD_VECTOR_TRUNC]], [[BUILD_VECTOR_TRUNC2]]
; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(<2 x s16>) = G_ADD [[BUILD_VECTOR_TRUNC1]], [[BUILD_VECTOR_TRUNC3]]
- ; GFX9-NEXT: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[ADD1]](<2 x s16>)
- ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>)
- ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>)
- ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST2]](s32), [[BITCAST3]](s32)
- ; GFX9-NEXT: $vgpr0 = COPY [[ADD]](<2 x s16>)
- ; GFX9-NEXT: $vgpr1 = COPY [[BUILD_VECTOR_TRUNC4]](<2 x s16>)
+ ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[ADD]](<2 x s16>)
+ ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32)
+ ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[ADD1]](<2 x s16>)
+ ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST4]](s32), [[LSHR2]](s32)
+ ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST5]](s32), [[DEF]](s32)
+ ; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC4]](<2 x s16>)
+ ; GFX9-NEXT: $vgpr1 = COPY [[BUILD_VECTOR_TRUNC5]](<2 x s16>)
; GFX9-NEXT: [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]]
; GFX9-NEXT: S_SETPC_B64_return [[COPY5]], implicit $vgpr0, implicit $vgpr1
%3:_(<2 x s16>) = COPY $vgpr0
%4:_(<2 x s16>) = COPY $vgpr1
- %5:_(<2 x s16>) = G_IMPLICIT_DEF
- %6:_(<6 x s16>) = G_CONCAT_VECTORS %3(<2 x s16>), %4(<2 x s16>), %5(<2 x s16>)
- %19:_(s96) = G_BITCAST %6(<6 x s16>)
- %20:_(s48) = G_TRUNC %19(s96)
- %0:_(<3 x s16>) = G_BITCAST %20(s48)
- %8:_(<2 x s16>) = COPY $vgpr2
- %9:_(<2 x s16>) = COPY $vgpr3
- %10:_(<6 x s16>) = G_CONCAT_VECTORS %8(<2 x s16>), %9(<2 x s16>), %5(<2 x s16>)
- %21:_(s96) = G_BITCAST %10(<6 x s16>)
- %22:_(s48) = G_TRUNC %21(s96)
- %1:_(<3 x s16>) = G_BITCAST %22(s48)
+ %5:_(<4 x s16>) = G_CONCAT_VECTORS %3(<2 x s16>), %4(<2 x s16>)
+ %6:_(s16), %7:_(s16), %8:_(s16), %9:_(s16) = G_UNMERGE_VALUES %5(<4 x s16>)
+ %0:_(<3 x s16>) = G_BUILD_VECTOR %6(s16), %7(s16), %8(s16)
+ %10:_(<2 x s16>) = COPY $vgpr2
+ %11:_(<2 x s16>) = COPY $vgpr3
+ %12:_(<4 x s16>) = G_CONCAT_VECTORS %10(<2 x s16>), %11(<2 x s16>)
+ %13:_(s16), %14:_(s16), %15:_(s16), %16:_(s16) = G_UNMERGE_VALUES %12(<4 x s16>)
+ %1:_(<3 x s16>) = G_BUILD_VECTOR %13(s16), %14(s16), %15(s16)
%2:sgpr_64 = COPY $sgpr30_sgpr31
- %12:_(<3 x s16>) = G_ADD %0, %1
- %16:_(<3 x s16>) = G_IMPLICIT_DEF
- %17:_(<6 x s16>) = G_CONCAT_VECTORS %12(<3 x s16>), %16(<3 x s16>)
- %14:_(<2 x s16>), %15:_(<2 x s16>), %18:_(<2 x s16>) = G_UNMERGE_VALUES %17(<6 x s16>)
- $vgpr0 = COPY %14(<2 x s16>)
- $vgpr1 = COPY %15(<2 x s16>)
- %13:ccr_sgpr_64 = COPY %2
- S_SETPC_B64_return %13, implicit $vgpr0, implicit $vgpr1
+ %17:_(<3 x s16>) = G_ADD %0, %1
+ %21:_(s16), %22:_(s16), %23:_(s16) = G_UNMERGE_VALUES %17(<3 x s16>)
+ %24:_(s16) = G_IMPLICIT_DEF
+ %25:_(<4 x s16>) = G_BUILD_VECTOR %21(s16), %22(s16), %23(s16), %24(s16)
+ %19:_(<2 x s16>), %20:_(<2 x s16>) = G_UNMERGE_VALUES %25(<4 x s16>)
+ $vgpr0 = COPY %19(<2 x s16>)
+ $vgpr1 = COPY %20(<2 x s16>)
+ %18:ccr_sgpr_64 = COPY %2
+ S_SETPC_B64_return %18, implicit $vgpr0, implicit $vgpr1
...
---
@@ -178,116 +166,88 @@ body: |
; GFX8-LABEL: name: shl_v3i16
; GFX8: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
- ; GFX8-NEXT: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
- ; GFX8-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[DEF]](<2 x s16>)
- ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[CONCAT_VECTORS]](<6 x s16>)
- ; GFX8-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](s96)
- ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32)
+ ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>)
+ ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
+ ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
- ; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32)
+ ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>)
+ ; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32)
; GFX8-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2
; GFX8-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3
- ; GFX8-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>), [[DEF]](<2 x s16>)
- ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[CONCAT_VECTORS1]](<6 x s16>)
- ; GFX8-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST1]](s96)
- ; GFX8-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[UV3]](s32)
- ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32)
+ ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY2]](<2 x s16>)
+ ; GFX8-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32)
+ ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
; GFX8-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
- ; GFX8-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[UV4]](s32)
+ ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY3]](<2 x s16>)
+ ; GFX8-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32)
; GFX8-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[TRUNC3]](s16)
; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[TRUNC4]](s16)
; GFX8-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[TRUNC2]], [[TRUNC5]](s16)
- ; GFX8-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; GFX8-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>)
- ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>)
; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[SHL]](s16)
; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[SHL1]](s16)
; GFX8-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32)
; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL3]]
- ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; GFX8-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[SHL2]](s16)
- ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
- ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]]
- ; GFX8-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32)
+ ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; GFX8-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C]](s32)
; GFX8-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL4]]
- ; GFX8-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
- ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST3]](<2 x s16>)
- ; GFX8-NEXT: $vgpr1 = COPY [[BITCAST4]](<2 x s16>)
+ ; GFX8-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+ ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST4]](<2 x s16>)
+ ; GFX8-NEXT: $vgpr1 = COPY [[BITCAST5]](<2 x s16>)
; GFX8-NEXT: [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]]
; GFX8-NEXT: S_SETPC_B64_return [[COPY5]], implicit $vgpr0, implicit $vgpr1
; GFX9-LABEL: name: shl_v3i16
; GFX9: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
- ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
- ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[DEF]](<2 x s16>)
- ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[CONCAT_VECTORS]](<6 x s16>)
- ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](s96)
+ ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>)
; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
- ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32)
- ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[UV]](s32), [[LSHR]](s32)
- ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
- ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[UV1]](s32), [[DEF1]](s32)
- ; GFX9-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[DEF]](<2 x s16>)
- ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(<3 x s16>), [[UV4:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<6 x s16>)
+ ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
+ ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>)
+ ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32)
; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2
; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3
- ; GFX9-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>), [[DEF]](<2 x s16>)
- ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[CONCAT_VECTORS2]](<6 x s16>)
- ; GFX9-NEXT: [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST1]](s96)
- ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C]](s32)
- ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[UV6]](s32)
- ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[UV5]](s32), [[LSHR1]](s32)
- ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[UV6]](s32), [[DEF1]](s32)
- ; GFX9-NEXT: [[CONCAT_VECTORS3:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[BUILD_VECTOR_TRUNC3]](<2 x s16>), [[DEF]](<2 x s16>)
- ; GFX9-NEXT: [[UV8:%[0-9]+]]:_(<3 x s16>), [[UV9:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<6 x s16>)
+ ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY2]](<2 x s16>)
+ ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
+ ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY3]](<2 x s16>)
+ ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32)
; GFX9-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
- ; GFX9-NEXT: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; GFX9-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[UV3]](<3 x s16>), 0
- ; GFX9-NEXT: [[EXTRACT:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[INSERT]](<4 x s16>), 0
- ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s16) = COPY [[TRUNC]](s16)
- ; GFX9-NEXT: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[UV8]](<3 x s16>), 0
- ; GFX9-NEXT: [[EXTRACT1:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[INSERT1]](<4 x s16>), 0
- ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s16) = COPY [[TRUNC1]](s16)
- ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(<2 x s16>) = G_SHL [[EXTRACT]], [[EXTRACT1]](<2 x s16>)
- ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[COPY5]], [[COPY6]](s16)
- ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[SHL]](<2 x s16>)
- ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
- ; GFX9-NEXT: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; GFX9-NEXT: [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF3]](<4 x s16>)
- ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV10]](<2 x s16>)
- ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST2]](s32), [[LSHR2]](s32)
+ ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST]](s32), [[LSHR]](s32)
+ ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST2]](s32), [[LSHR1]](s32)
+ ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(<2 x s16>) = G_SHL [[BUILD_VECTOR_TRUNC]], [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
+ ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[TRUNC1]](s16)
+ ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[SHL]](<2 x s16>)
+ ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32)
+ ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST4]](s32), [[LSHR2]](s32)
; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SHL1]](s16)
- ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[BITCAST3]](s32)
- ; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC4]](<2 x s16>)
- ; GFX9-NEXT: $vgpr1 = COPY [[BUILD_VECTOR_TRUNC5]](<2 x s16>)
- ; GFX9-NEXT: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]]
- ; GFX9-NEXT: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1
+ ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[DEF]](s32)
+ ; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC2]](<2 x s16>)
+ ; GFX9-NEXT: $vgpr1 = COPY [[BUILD_VECTOR_TRUNC3]](<2 x s16>)
+ ; GFX9-NEXT: [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]]
+ ; GFX9-NEXT: S_SETPC_B64_return [[COPY5]], implicit $vgpr0, implicit $vgpr1
%3:_(<2 x s16>) = COPY $vgpr0
%4:_(<2 x s16>) = COPY $vgpr1
- %5:_(<2 x s16>) = G_IMPLICIT_DEF
- %6:_(<6 x s16>) = G_CONCAT_VECTORS %3(<2 x s16>), %4(<2 x s16>), %5(<2 x s16>)
- %19:_(s96) = G_BITCAST %6(<6 x s16>)
- %20:_(s48) = G_TRUNC %19(s96)
- %0:_(<3 x s16>) = G_BITCAST %20(s48)
- %8:_(<2 x s16>) = COPY $vgpr2
- %9:_(<2 x s16>) = COPY $vgpr3
- %10:_(<6 x s16>) = G_CONCAT_VECTORS %8(<2 x s16>), %9(<2 x s16>), %5(<2 x s16>)
- %21:_(s96) = G_BITCAST %10(<6 x s16>)
- %22:_(s48) = G_TRUNC %21(s96)
- %1:_(<3 x s16>) = G_BITCAST %22(s48)
+ %5:_(<4 x s16>) = G_CONCAT_VECTORS %3(<2 x s16>), %4(<2 x s16>)
+ %6:_(s16), %7:_(s16), %8:_(s16), %9:_(s16) = G_UNMERGE_VALUES %5(<4 x s16>)
+ %0:_(<3 x s16>) = G_BUILD_VECTOR %6(s16), %7(s16), %8(s16)
+ %10:_(<2 x s16>) = COPY $vgpr2
+ %11:_(<2 x s16>) = COPY $vgpr3
+ %12:_(<4 x s16>) = G_CONCAT_VECTORS %10(<2 x s16>), %11(<2 x s16>)
+ %13:_(s16), %14:_(s16), %15:_(s16), %16:_(s16) = G_UNMERGE_VALUES %12(<4 x s16>)
+ %1:_(<3 x s16>) = G_BUILD_VECTOR %13(s16), %14(s16), %15(s16)
%2:sgpr_64 = COPY $sgpr30_sgpr31
- %12:_(<3 x s16>) = G_SHL %0, %1(<3 x s16>)
- %16:_(<3 x s16>) = G_IMPLICIT_DEF
- %17:_(<6 x s16>) = G_CONCAT_VECTORS %12(<3 x s16>), %16(<3 x s16>)
- %14:_(<2 x s16>), %15:_(<2 x s16>), %18:_(<2 x s16>) = G_UNMERGE_VALUES %17(<6 x s16>)
- $vgpr0 = COPY %14(<2 x s16>)
- $vgpr1 = COPY %15(<2 x s16>)
- %13:ccr_sgpr_64 = COPY %2
- S_SETPC_B64_return %13, implicit $vgpr0, implicit $vgpr1
+ %17:_(<3 x s16>) = G_SHL %0, %1(<3 x s16>)
+ %21:_(s16), %22:_(s16), %23:_(s16) = G_UNMERGE_VALUES %17(<3 x s16>)
+ %24:_(s16) = G_IMPLICIT_DEF
+ %25:_(<4 x s16>) = G_BUILD_VECTOR %21(s16), %22(s16), %23(s16), %24(s16)
+ %19:_(<2 x s16>), %20:_(<2 x s16>) = G_UNMERGE_VALUES %25(<4 x s16>)
+ $vgpr0 = COPY %19(<2 x s16>)
+ $vgpr1 = COPY %20(<2 x s16>)
+ %18:ccr_sgpr_64 = COPY %2
+ S_SETPC_B64_return %18, implicit $vgpr0, implicit $vgpr1
...
---
@@ -389,31 +349,30 @@ body: |
; GFX8: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
; GFX8-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2
- ; GFX8-NEXT: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
- ; GFX8-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<10 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[COPY2]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>)
- ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(s160) = G_BITCAST [[CONCAT_VECTORS]](<10 x s16>)
- ; GFX8-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](s160)
- ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32)
+ ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>)
+ ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
+ ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
- ; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32)
- ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
+ ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>)
+ ; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32)
+ ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
; GFX8-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
- ; GFX8-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[UV2]](s32)
+ ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY2]](<2 x s16>)
+ ; GFX8-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32)
; GFX8-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3
; GFX8-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4
; GFX8-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5
- ; GFX8-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<10 x s16>) = G_CONCAT_VECTORS [[COPY3]](<2 x s16>), [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>)
- ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(s160) = G_BITCAST [[CONCAT_VECTORS1]](<10 x s16>)
- ; GFX8-NEXT: [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST1]](s160)
- ; GFX8-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[UV5]](s32)
- ; GFX8-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C]](s32)
+ ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY3]](<2 x s16>)
+ ; GFX8-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32)
+ ; GFX8-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32)
; GFX8-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32)
- ; GFX8-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[UV6]](s32)
- ; GFX8-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C]](s32)
+ ; GFX8-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[COPY4]](<2 x s16>)
+ ; GFX8-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32)
+ ; GFX8-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32)
; GFX8-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32)
- ; GFX8-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[UV7]](s32)
+ ; GFX8-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[COPY5]](<2 x s16>)
+ ; GFX8-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST5]](s32)
; GFX8-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC]]
; GFX8-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC5]]
; GFX8-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s16) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]]
@@ -429,103 +388,90 @@ body: |
; GFX8-NEXT: [[FCANONICALIZE8:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC4]]
; GFX8-NEXT: [[FCANONICALIZE9:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC9]]
; GFX8-NEXT: [[FMAXNUM_IEEE4:%[0-9]+]]:_(s16) = G_FMAXNUM_IEEE [[FCANONICALIZE8]], [[FCANONICALIZE9]]
- ; GFX8-NEXT: [[DEF1:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
- ; GFX8-NEXT: [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>), [[UV12:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<6 x s16>)
- ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV10]](<2 x s16>)
; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FMAXNUM_IEEE]](s16)
; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FMAXNUM_IEEE1]](s16)
; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32)
; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]]
- ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; GFX8-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FMAXNUM_IEEE2]](s16)
; GFX8-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[FMAXNUM_IEEE3]](s16)
; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32)
; GFX8-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]]
- ; GFX8-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+ ; GFX8-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
; GFX8-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[FMAXNUM_IEEE4]](s16)
- ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
- ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]]
- ; GFX8-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32)
+ ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; GFX8-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C]](s32)
; GFX8-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL2]]
- ; GFX8-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
- ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST3]](<2 x s16>)
- ; GFX8-NEXT: $vgpr1 = COPY [[BITCAST4]](<2 x s16>)
- ; GFX8-NEXT: $vgpr2 = COPY [[BITCAST5]](<2 x s16>)
+ ; GFX8-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+ ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST6]](<2 x s16>)
+ ; GFX8-NEXT: $vgpr1 = COPY [[BITCAST7]](<2 x s16>)
+ ; GFX8-NEXT: $vgpr2 = COPY [[BITCAST8]](<2 x s16>)
; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2
; GFX9-LABEL: name: maxnum_v5i16
; GFX9: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2
- ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
- ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<10 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[COPY2]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>)
- ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s160) = G_BITCAST [[CONCAT_VECTORS]](<10 x s16>)
- ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](s160)
+ ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>)
; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
- ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
- ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[UV]](s32), [[LSHR]](s32)
- ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[UV1]](s32), [[LSHR1]](s32)
- ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
- ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[UV2]](s32), [[DEF1]](s32)
- ; GFX9-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<10 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>)
- ; GFX9-NEXT: [[UV5:%[0-9]+]]:_(<5 x s16>), [[UV6:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<10 x s16>)
+ ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
+ ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>)
+ ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
+ ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY2]](<2 x s16>)
; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3
; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4
; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5
- ; GFX9-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<10 x s16>) = G_CONCAT_VECTORS [[COPY3]](<2 x s16>), [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>)
- ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s160) = G_BITCAST [[CONCAT_VECTORS2]](<10 x s16>)
- ; GFX9-NEXT: [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST1]](s160)
- ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C]](s32)
- ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV8]], [[C]](s32)
- ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[UV7]](s32), [[LSHR2]](s32)
- ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[UV8]](s32), [[LSHR3]](s32)
- ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[UV9]](s32), [[DEF1]](s32)
- ; GFX9-NEXT: [[CONCAT_VECTORS3:%[0-9]+]]:_(<10 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC3]](<2 x s16>), [[BUILD_VECTOR_TRUNC4]](<2 x s16>), [[BUILD_VECTOR_TRUNC5]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>)
- ; GFX9-NEXT: [[UV12:%[0-9]+]]:_(<5 x s16>), [[UV13:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<10 x s16>)
- ; GFX9-NEXT: [[DEF2:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
- ; GFX9-NEXT: [[INSERT:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[DEF2]], [[UV5]](<5 x s16>), 0
- ; GFX9-NEXT: [[INSERT1:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[DEF2]], [[UV12]](<5 x s16>), 0
- ; GFX9-NEXT: [[UV14:%[0-9]+]]:_(<2 x s16>), [[UV15:%[0-9]+]]:_(<2 x s16>), [[UV16:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<6 x s16>)
- ; GFX9-NEXT: [[UV17:%[0-9]+]]:_(<2 x s16>), [[UV18:%[0-9]+]]:_(<2 x s16>), [[UV19:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<6 x s16>)
- ; GFX9-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[UV14]]
- ; GFX9-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[UV17]]
+ ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY3]](<2 x s16>)
+ ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32)
+ ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[COPY4]](<2 x s16>)
+ ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32)
+ ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[COPY5]](<2 x s16>)
+ ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST]](s32), [[LSHR]](s32)
+ ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST1]](s32), [[LSHR1]](s32)
+ ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST2]](s32), [[DEF]](s32)
+ ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST3]](s32), [[LSHR2]](s32)
+ ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST4]](s32), [[LSHR3]](s32)
+ ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST5]](s32), [[DEF]](s32)
+ ; GFX9-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[BUILD_VECTOR_TRUNC]]
+ ; GFX9-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[BUILD_VECTOR_TRUNC3]]
; GFX9-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(<2 x s16>) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]]
- ; GFX9-NEXT: [[FCANONICALIZE2:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[UV15]]
- ; GFX9-NEXT: [[FCANONICALIZE3:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[UV18]]
+ ; GFX9-NEXT: [[FCANONICALIZE2:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[BUILD_VECTOR_TRUNC1]]
+ ; GFX9-NEXT: [[FCANONICALIZE3:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[BUILD_VECTOR_TRUNC4]]
; GFX9-NEXT: [[FMAXNUM_IEEE1:%[0-9]+]]:_(<2 x s16>) = G_FMAXNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]]
- ; GFX9-NEXT: [[FCANONICALIZE4:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[UV16]]
- ; GFX9-NEXT: [[FCANONICALIZE5:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[UV19]]
+ ; GFX9-NEXT: [[FCANONICALIZE4:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[BUILD_VECTOR_TRUNC2]]
+ ; GFX9-NEXT: [[FCANONICALIZE5:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[BUILD_VECTOR_TRUNC5]]
; GFX9-NEXT: [[FMAXNUM_IEEE2:%[0-9]+]]:_(<2 x s16>) = G_FMAXNUM_IEEE [[FCANONICALIZE4]], [[FCANONICALIZE5]]
- ; GFX9-NEXT: [[DEF3:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
- ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[FMAXNUM_IEEE2]](<2 x s16>)
- ; GFX9-NEXT: [[UV20:%[0-9]+]]:_(<2 x s16>), [[UV21:%[0-9]+]]:_(<2 x s16>), [[UV22:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF3]](<6 x s16>)
- ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV20]](<2 x s16>)
- ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC6:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST2]](s32), [[BITCAST3]](s32)
- ; GFX9-NEXT: $vgpr0 = COPY [[FMAXNUM_IEEE]](<2 x s16>)
- ; GFX9-NEXT: $vgpr1 = COPY [[FMAXNUM_IEEE1]](<2 x s16>)
- ; GFX9-NEXT: $vgpr2 = COPY [[BUILD_VECTOR_TRUNC6]](<2 x s16>)
+ ; GFX9-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[FMAXNUM_IEEE]](<2 x s16>)
+ ; GFX9-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32)
+ ; GFX9-NEXT: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[FMAXNUM_IEEE1]](<2 x s16>)
+ ; GFX9-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C]](s32)
+ ; GFX9-NEXT: [[BITCAST8:%[0-9]+]]:_(s32) = G_BITCAST [[FMAXNUM_IEEE2]](<2 x s16>)
+ ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC6:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST6]](s32), [[LSHR4]](s32)
+ ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC7:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST7]](s32), [[LSHR5]](s32)
+ ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC8:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST8]](s32), [[DEF]](s32)
+ ; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC6]](<2 x s16>)
+ ; GFX9-NEXT: $vgpr1 = COPY [[BUILD_VECTOR_TRUNC7]](<2 x s16>)
+ ; GFX9-NEXT: $vgpr2 = COPY [[BUILD_VECTOR_TRUNC8]](<2 x s16>)
; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2
%2:_(<2 x s16>) = COPY $vgpr0
%3:_(<2 x s16>) = COPY $vgpr1
%4:_(<2 x s16>) = COPY $vgpr2
- %5:_(<2 x s16>) = G_IMPLICIT_DEF
- %6:_(<10 x s16>) = G_CONCAT_VECTORS %2(<2 x s16>), %3(<2 x s16>), %4(<2 x s16>), %5(<2 x s16>), %5(<2 x s16>)
- %22:_(s160) = G_BITCAST %6(<10 x s16>)
- %23:_(s80) = G_TRUNC %22(s160)
- %0:_(<5 x s16>) = G_BITCAST %23(s80)
- %8:_(<2 x s16>) = COPY $vgpr3
- %9:_(<2 x s16>) = COPY $vgpr4
- %10:_(<2 x s16>) = COPY $vgpr5
- %11:_(<10 x s16>) = G_CONCAT_VECTORS %8(<2 x s16>), %9(<2 x s16>), %10(<2 x s16>), %5(<2 x s16>), %5(<2 x s16>)
- %24:_(s160) = G_BITCAST %11(<10 x s16>)
- %25:_(s80) = G_TRUNC %24(s160)
- %1:_(<5 x s16>) = G_BITCAST %25(s80)
- %14:_(<5 x s16>) = G_FMAXNUM %0, %1
- %18:_(<5 x s16>) = G_IMPLICIT_DEF
- %19:_(<10 x s16>) = G_CONCAT_VECTORS %14(<5 x s16>), %18(<5 x s16>)
- %15:_(<2 x s16>), %16:_(<2 x s16>), %17:_(<2 x s16>), %20:_(<2 x s16>), %21:_(<2 x s16>) = G_UNMERGE_VALUES %19(<10 x s16>)
- $vgpr0 = COPY %15(<2 x s16>)
- $vgpr1 = COPY %16(<2 x s16>)
- $vgpr2 = COPY %17(<2 x s16>)
+ %5:_(<6 x s16>) = G_CONCAT_VECTORS %2(<2 x s16>), %3(<2 x s16>), %4(<2 x s16>)
+ %6:_(s16), %7:_(s16), %8:_(s16), %9:_(s16), %10:_(s16), %11:_(s16) = G_UNMERGE_VALUES %5(<6 x s16>)
+ %0:_(<5 x s16>) = G_BUILD_VECTOR %6(s16), %7(s16), %8(s16), %9(s16), %10(s16)
+ %12:_(<2 x s16>) = COPY $vgpr3
+ %13:_(<2 x s16>) = COPY $vgpr4
+ %14:_(<2 x s16>) = COPY $vgpr5
+ %15:_(<6 x s16>) = G_CONCAT_VECTORS %12(<2 x s16>), %13(<2 x s16>), %14(<2 x s16>)
+ %16:_(s16), %17:_(s16), %18:_(s16), %19:_(s16), %20:_(s16), %21:_(s16) = G_UNMERGE_VALUES %15(<6 x s16>)
+ %1:_(<5 x s16>) = G_BUILD_VECTOR %16(s16), %17(s16), %18(s16), %19(s16), %20(s16)
+ %23:_(<5 x s16>) = G_FMAXNUM %0, %1
+ %27:_(s16), %28:_(s16), %29:_(s16), %30:_(s16), %31:_(s16) = G_UNMERGE_VALUES %23(<5 x s16>)
+ %32:_(s16) = G_IMPLICIT_DEF
+ %33:_(<6 x s16>) = G_BUILD_VECTOR %27(s16), %28(s16), %29(s16), %30(s16), %31(s16), %32(s16)
+ %24:_(<2 x s16>), %25:_(<2 x s16>), %26:_(<2 x s16>) = G_UNMERGE_VALUES %33(<6 x s16>)
+ $vgpr0 = COPY %24(<2 x s16>)
+ $vgpr1 = COPY %25(<2 x s16>)
+ $vgpr2 = COPY %26(<2 x s16>)
SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2
...
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-xor.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-xor.mir
index a0a2e3ee83032..e954104c2f70d 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-xor.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-xor.mir
@@ -1,5 +1,5 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer -allow-ginsert-as-artifact=0 -global-isel-abort=0 %s -o - | FileCheck %s
+# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer %s -o - | FileCheck %s
---
name: test_xor_s32
@@ -305,18 +305,14 @@ body: |
; CHECK: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>)
- ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32)
- ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV2]](s32), [[DEF]](s32)
- ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF
; CHECK-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>)
- ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV3]](s32), [[UV4]](s32)
- ; CHECK-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV5]](s32), [[DEF]](s32)
- ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(<2 x s32>) = G_XOR [[BUILD_VECTOR]], [[BUILD_VECTOR2]]
- ; CHECK-NEXT: [[XOR1:%[0-9]+]]:_(<2 x s32>) = G_XOR [[BUILD_VECTOR1]], [[BUILD_VECTOR3]]
- ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s32>) = G_CONCAT_VECTORS [[XOR]](<2 x s32>), [[XOR1]](<2 x s32>), [[DEF1]](<2 x s32>)
- ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(<3 x s32>), [[UV7:%[0-9]+]]:_(<3 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s32>)
- ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[UV6]](<3 x s32>)
+ ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV3]](s32), [[UV4]](s32)
+ ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(<2 x s32>) = G_XOR [[BUILD_VECTOR]], [[BUILD_VECTOR1]]
+ ; CHECK-NEXT: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[UV2]], [[UV5]]
+ ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](<2 x s32>)
+ ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV6]](s32), [[UV7]](s32), [[XOR1]](s32)
+ ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR2]](<3 x s32>)
%0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2
%1:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5
%2:_(<3 x s32>) = G_XOR %0, %1
@@ -353,23 +349,20 @@ body: |
; CHECK: [[DEF:%[0-9]+]]:_(<5 x s32>) = G_IMPLICIT_DEF
; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<5 x s32>) = G_IMPLICIT_DEF
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<5 x s32>)
- ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32)
; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV2]](s32), [[UV3]](s32)
- ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV4]](s32), [[DEF2]](s32)
- ; CHECK-NEXT: [[DEF3:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF
; CHECK-NEXT: [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](<5 x s32>)
- ; CHECK-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV5]](s32), [[UV6]](s32)
- ; CHECK-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV7]](s32), [[UV8]](s32)
- ; CHECK-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV9]](s32), [[DEF2]](s32)
- ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(<2 x s32>) = G_XOR [[BUILD_VECTOR]], [[BUILD_VECTOR3]]
- ; CHECK-NEXT: [[XOR1:%[0-9]+]]:_(<2 x s32>) = G_XOR [[BUILD_VECTOR1]], [[BUILD_VECTOR4]]
- ; CHECK-NEXT: [[XOR2:%[0-9]+]]:_(<2 x s32>) = G_XOR [[BUILD_VECTOR2]], [[BUILD_VECTOR5]]
- ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<10 x s32>) = G_CONCAT_VECTORS [[XOR]](<2 x s32>), [[XOR1]](<2 x s32>), [[XOR2]](<2 x s32>), [[DEF3]](<2 x s32>), [[DEF3]](<2 x s32>)
- ; CHECK-NEXT: [[UV10:%[0-9]+]]:_(<5 x s32>), [[UV11:%[0-9]+]]:_(<5 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<10 x s32>)
- ; CHECK-NEXT: [[DEF4:%[0-9]+]]:_(<8 x s32>) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<8 x s32>) = G_INSERT [[DEF4]], [[UV10]](<5 x s32>), 0
- ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<8 x s32>)
+ ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV5]](s32), [[UV6]](s32)
+ ; CHECK-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV7]](s32), [[UV8]](s32)
+ ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(<2 x s32>) = G_XOR [[BUILD_VECTOR]], [[BUILD_VECTOR2]]
+ ; CHECK-NEXT: [[XOR1:%[0-9]+]]:_(<2 x s32>) = G_XOR [[BUILD_VECTOR1]], [[BUILD_VECTOR3]]
+ ; CHECK-NEXT: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[UV4]], [[UV9]]
+ ; CHECK-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](<2 x s32>)
+ ; CHECK-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](<2 x s32>)
+ ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(<8 x s32>) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF2]](<8 x s32>)
+ ; CHECK-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[UV10]](s32), [[UV11]](s32), [[UV12]](s32), [[UV13]](s32), [[XOR2]](s32), [[UV19]](s32), [[UV20]](s32), [[UV21]](s32)
+ ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR4]](<8 x s32>)
%0:_(<5 x s32>) = G_IMPLICIT_DEF
%1:_(<5 x s32>) = G_IMPLICIT_DEF
%2:_(<5 x s32>) = G_XOR %0, %1
@@ -424,40 +417,65 @@ body: |
; CHECK-LABEL: name: test_xor_v3s16
; CHECK: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5
- ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>)
- ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>)
- ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV]](<3 x s16>), 0
- ; CHECK-NEXT: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV2]](<3 x s16>), 0
- ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(<4 x s16>) = G_XOR [[INSERT]], [[INSERT1]]
- ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[XOR]](<4 x s16>)
- ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>)
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>)
+ ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
- ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>)
- ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>)
- ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>)
- ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
- ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>)
+ ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]]
; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]]
; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
- ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]]
- ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]]
- ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32)
+ ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
+ ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY2]], [[C]](s32)
; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
- ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+ ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+ ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>)
+ ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>)
+ ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
+ ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32)
+ ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>)
+ ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]]
; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]]
- ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]]
- ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
- ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
+ ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C]](s32)
+ ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND3]], [[SHL2]]
; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
- ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>)
- ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
+ ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]]
+ ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32)
+ ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND5]], [[SHL3]]
+ ; CHECK-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
+ ; CHECK-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>)
+ ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(<4 x s16>) = G_XOR [[CONCAT_VECTORS]], [[CONCAT_VECTORS1]]
+ ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[XOR]](<4 x s16>)
+ ; CHECK-NEXT: [[BITCAST8:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>)
+ ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST8]], [[C]](s32)
+ ; CHECK-NEXT: [[BITCAST9:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>)
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
+ ; CHECK-NEXT: [[BITCAST10:%[0-9]+]]:_(s32) = G_BITCAST [[UV8]](<2 x s16>)
+ ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST10]], [[C]](s32)
+ ; CHECK-NEXT: [[BITCAST11:%[0-9]+]]:_(s32) = G_BITCAST [[UV9]](<2 x s16>)
+ ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[BITCAST8]], [[C1]]
+ ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]]
+ ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C]](s32)
+ ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL4]]
+ ; CHECK-NEXT: [[BITCAST12:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32)
+ ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[BITCAST9]], [[C1]]
+ ; CHECK-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[BITCAST10]], [[C1]]
+ ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C]](s32)
+ ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL5]]
+ ; CHECK-NEXT: [[BITCAST13:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32)
+ ; CHECK-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C1]]
+ ; CHECK-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[BITCAST11]], [[C1]]
+ ; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C]](s32)
+ ; CHECK-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND10]], [[SHL6]]
+ ; CHECK-NEXT: [[BITCAST14:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR6]](s32)
+ ; CHECK-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST12]](<2 x s16>), [[BITCAST13]](<2 x s16>), [[BITCAST14]](<2 x s16>)
+ ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS2]](<6 x s16>)
%0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2
%1:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5
%2:_(<3 x s16>), %3:_(<3 x s16>) = G_UNMERGE_VALUES %0
@@ -492,54 +510,102 @@ body: |
; CHECK-LABEL: name: test_xor_v5s16
; CHECK: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<30 x s16>) = G_CONCAT_VECTORS [[DEF]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>)
- ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<5 x s16>), [[UV1:%[0-9]+]]:_(<5 x s16>), [[UV2:%[0-9]+]]:_(<5 x s16>), [[UV3:%[0-9]+]]:_(<5 x s16>), [[UV4:%[0-9]+]]:_(<5 x s16>), [[UV5:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<30 x s16>)
- ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<30 x s16>) = G_CONCAT_VECTORS [[DEF2]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>)
- ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(<5 x s16>), [[UV7:%[0-9]+]]:_(<5 x s16>), [[UV8:%[0-9]+]]:_(<5 x s16>), [[UV9:%[0-9]+]]:_(<5 x s16>), [[UV10:%[0-9]+]]:_(<5 x s16>), [[UV11:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<30 x s16>)
- ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[DEF1]], [[UV]](<5 x s16>), 0
- ; CHECK-NEXT: [[INSERT1:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[DEF1]], [[UV6]](<5 x s16>), 0
- ; CHECK-NEXT: [[UV12:%[0-9]+]]:_(<3 x s16>), [[UV13:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[INSERT]](<6 x s16>)
- ; CHECK-NEXT: [[UV14:%[0-9]+]]:_(<3 x s16>), [[UV15:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<6 x s16>)
- ; CHECK-NEXT: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[UV12]](<3 x s16>), 0
- ; CHECK-NEXT: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[UV14]](<3 x s16>), 0
- ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(<4 x s16>) = G_XOR [[INSERT2]], [[INSERT3]]
- ; CHECK-NEXT: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[UV13]](<3 x s16>), 0
- ; CHECK-NEXT: [[INSERT5:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[UV15]](<3 x s16>), 0
- ; CHECK-NEXT: [[XOR1:%[0-9]+]]:_(<4 x s16>) = G_XOR [[INSERT4]], [[INSERT5]]
- ; CHECK-NEXT: [[UV16:%[0-9]+]]:_(<2 x s16>), [[UV17:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[XOR]](<4 x s16>)
- ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV16]](<2 x s16>)
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>)
+ ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
- ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV17]](<2 x s16>)
- ; CHECK-NEXT: [[UV18:%[0-9]+]]:_(<2 x s16>), [[UV19:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[XOR1]](<4 x s16>)
- ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV18]](<2 x s16>)
- ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
- ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV19]](<2 x s16>)
+ ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+ ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
+ ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<6 x s16>)
+ ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
+ ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32)
+ ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>)
+ ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32)
+ ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>)
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]]
; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]]
; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
- ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]]
- ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]]
- ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32)
+ ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
+ ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C]](s32)
; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
- ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
- ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]]
- ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]]
- ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
- ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
- ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
- ; CHECK-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>)
- ; CHECK-NEXT: [[CONCAT_VECTORS3:%[0-9]+]]:_(<30 x s16>) = G_CONCAT_VECTORS [[CONCAT_VECTORS2]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>)
- ; CHECK-NEXT: [[UV20:%[0-9]+]]:_(<5 x s16>), [[UV21:%[0-9]+]]:_(<5 x s16>), [[UV22:%[0-9]+]]:_(<5 x s16>), [[UV23:%[0-9]+]]:_(<5 x s16>), [[UV24:%[0-9]+]]:_(<5 x s16>), [[UV25:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<30 x s16>)
- ; CHECK-NEXT: [[DEF4:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[INSERT6:%[0-9]+]]:_(<8 x s16>) = G_INSERT [[DEF4]], [[UV20]](<5 x s16>), 0
- ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT6]](<8 x s16>)
+ ; CHECK-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+ ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>)
+ ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]]
+ ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]]
+ ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C]](s32)
+ ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND3]], [[SHL2]]
+ ; CHECK-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+ ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]]
+ ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32)
+ ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND5]], [[SHL3]]
+ ; CHECK-NEXT: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
+ ; CHECK-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST8]](<2 x s16>), [[BITCAST9]](<2 x s16>)
+ ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(<4 x s16>) = G_XOR [[CONCAT_VECTORS]], [[CONCAT_VECTORS1]]
+ ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[XOR]](<4 x s16>)
+ ; CHECK-NEXT: [[BITCAST10:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>)
+ ; CHECK-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST10]], [[C]](s32)
+ ; CHECK-NEXT: [[BITCAST11:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>)
+ ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]]
+ ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]]
+ ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C]](s32)
+ ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL4]]
+ ; CHECK-NEXT: [[BITCAST12:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32)
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
+ ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[COPY2]], [[C]](s32)
+ ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[COPY1]], [[SHL5]]
+ ; CHECK-NEXT: [[BITCAST13:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32)
+ ; CHECK-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST12]](<2 x s16>), [[BITCAST13]](<2 x s16>)
+ ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C1]]
+ ; CHECK-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]]
+ ; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C]](s32)
+ ; CHECK-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]]
+ ; CHECK-NEXT: [[BITCAST14:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR6]](s32)
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
+ ; CHECK-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[COPY4]], [[C]](s32)
+ ; CHECK-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[COPY3]], [[SHL7]]
+ ; CHECK-NEXT: [[BITCAST15:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR7]](s32)
+ ; CHECK-NEXT: [[CONCAT_VECTORS3:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST14]](<2 x s16>), [[BITCAST15]](<2 x s16>)
+ ; CHECK-NEXT: [[XOR1:%[0-9]+]]:_(<4 x s16>) = G_XOR [[CONCAT_VECTORS2]], [[CONCAT_VECTORS3]]
+ ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[XOR1]](<4 x s16>)
+ ; CHECK-NEXT: [[BITCAST16:%[0-9]+]]:_(s32) = G_BITCAST [[UV8]](<2 x s16>)
+ ; CHECK-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST16]], [[C]](s32)
+ ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>), [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<8 x s16>)
+ ; CHECK-NEXT: [[BITCAST17:%[0-9]+]]:_(s32) = G_BITCAST [[UV12]](<2 x s16>)
+ ; CHECK-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST17]], [[C]](s32)
+ ; CHECK-NEXT: [[BITCAST18:%[0-9]+]]:_(s32) = G_BITCAST [[UV13]](<2 x s16>)
+ ; CHECK-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST18]], [[C]](s32)
+ ; CHECK-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[BITCAST10]], [[C1]]
+ ; CHECK-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C1]]
+ ; CHECK-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C]](s32)
+ ; CHECK-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[AND10]], [[SHL8]]
+ ; CHECK-NEXT: [[BITCAST19:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR8]](s32)
+ ; CHECK-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[BITCAST11]], [[C1]]
+ ; CHECK-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[BITCAST16]], [[C1]]
+ ; CHECK-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C]](s32)
+ ; CHECK-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]]
+ ; CHECK-NEXT: [[BITCAST20:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR9]](s32)
+ ; CHECK-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C1]]
+ ; CHECK-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LSHR6]], [[C1]]
+ ; CHECK-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C]](s32)
+ ; CHECK-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[AND14]], [[SHL10]]
+ ; CHECK-NEXT: [[BITCAST21:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR10]](s32)
+ ; CHECK-NEXT: [[AND16:%[0-9]+]]:_(s32) = G_AND [[BITCAST18]], [[C1]]
+ ; CHECK-NEXT: [[AND17:%[0-9]+]]:_(s32) = G_AND [[LSHR7]], [[C1]]
+ ; CHECK-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND17]], [[C]](s32)
+ ; CHECK-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[AND16]], [[SHL11]]
+ ; CHECK-NEXT: [[BITCAST22:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR11]](s32)
+ ; CHECK-NEXT: [[CONCAT_VECTORS4:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BITCAST19]](<2 x s16>), [[BITCAST20]](<2 x s16>), [[BITCAST21]](<2 x s16>), [[BITCAST22]](<2 x s16>)
+ ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS4]](<8 x s16>)
%0:_(<5 x s16>) = G_IMPLICIT_DEF
%1:_(<5 x s16>) = G_IMPLICIT_DEF
%2:_(<5 x s16>) = G_XOR %0, %1
@@ -554,34 +620,15 @@ body: |
bb.0:
; CHECK-LABEL: name: test_xor_v3s8
- ; CHECK: [[DEF:%[0-9]+]]:_(<3 x s8>) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<3 x s8>) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(<4 x s8>) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[DEF2]], [[DEF]](<3 x s8>), 0
- ; CHECK-NEXT: [[INSERT1:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[DEF2]], [[DEF1]](<3 x s8>), 0
- ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[INSERT]](<4 x s8>)
- ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8), [[UV6:%[0-9]+]]:_(s8), [[UV7:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[INSERT1]](<4 x s8>)
- ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UV]](s8)
- ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV4]](s8)
- ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ANYEXT]], [[ANYEXT1]]
- ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[XOR]](s32)
- ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[UV1]](s8)
- ; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[UV5]](s8)
- ; CHECK-NEXT: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ANYEXT2]], [[ANYEXT3]]
- ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[XOR1]](s32)
- ; CHECK-NEXT: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[UV2]](s8)
- ; CHECK-NEXT: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[UV6]](s8)
- ; CHECK-NEXT: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[ANYEXT4]], [[ANYEXT5]]
- ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s8) = G_TRUNC [[XOR2]](s32)
- ; CHECK-NEXT: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[UV3]](s8)
- ; CHECK-NEXT: [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[UV7]](s8)
- ; CHECK-NEXT: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[ANYEXT6]], [[ANYEXT7]]
- ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s8) = G_TRUNC [[XOR3]](s32)
- ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s8>) = G_BUILD_VECTOR [[TRUNC]](s8), [[TRUNC1]](s8), [[TRUNC2]](s8), [[TRUNC3]](s8)
- ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s8>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<4 x s8>), [[DEF2]](<4 x s8>), [[DEF2]](<4 x s8>)
- ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(<3 x s8>), [[UV9:%[0-9]+]]:_(<3 x s8>), [[UV10:%[0-9]+]]:_(<3 x s8>), [[UV11:%[0-9]+]]:_(<3 x s8>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s8>)
- ; CHECK-NEXT: [[ANYEXT8:%[0-9]+]]:_(<3 x s32>) = G_ANYEXT [[UV8]](<3 x s8>)
- ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[ANYEXT8]](<3 x s32>)
+ ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<4 x s32>)
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](<4 x s32>)
+ ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[UV]], [[UV4]]
+ ; CHECK-NEXT: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[UV1]], [[UV5]]
+ ; CHECK-NEXT: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[UV2]], [[UV6]]
+ ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[XOR]](s32), [[XOR1]](s32), [[XOR2]](s32)
+ ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>)
%0:_(<3 x s8>) = G_IMPLICIT_DEF
%1:_(<3 x s8>) = G_IMPLICIT_DEF
%2:_(<3 x s8>) = G_XOR %0, %1
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-zext.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-zext.mir
index 5cac993556522..b62013d28e817 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-zext.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-zext.mir
@@ -142,10 +142,7 @@ body: |
; CHECK-LABEL: name: test_zext_v3s16_to_v3s32
; CHECK: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1
- ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[COPY]](<4 x s16>), 0
- ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0
- ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>)
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>)
; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
@@ -707,12 +704,6 @@ body: |
; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
; CHECK-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](s64)
; CHECK-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV8]], [[C2]](s32)
- ; CHECK-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](s64)
- ; CHECK-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV10]], [[C2]](s32)
- ; CHECK-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](s64)
- ; CHECK-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[UV12]], [[C2]](s32)
- ; CHECK-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](s64)
- ; CHECK-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[UV14]], [[C2]](s32)
; CHECK-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[UV4]], [[C3]]
; CHECK-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C3]]
; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND12]], [[C2]](s32)
@@ -737,28 +728,16 @@ body: |
; CHECK-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND20]], [[C2]](s32)
; CHECK-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[AND19]], [[SHL10]]
; CHECK-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32)
- ; CHECK-NEXT: [[AND21:%[0-9]+]]:_(s32) = G_AND [[UV10]], [[C3]]
- ; CHECK-NEXT: [[AND22:%[0-9]+]]:_(s32) = G_AND [[LSHR8]], [[C3]]
- ; CHECK-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND22]], [[C2]](s32)
- ; CHECK-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[AND21]], [[SHL11]]
- ; CHECK-NEXT: [[AND23:%[0-9]+]]:_(s32) = G_AND [[UV11]], [[C3]]
- ; CHECK-NEXT: [[AND24:%[0-9]+]]:_(s32) = G_AND [[UV12]], [[C3]]
- ; CHECK-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[AND24]], [[C2]](s32)
- ; CHECK-NEXT: [[OR13:%[0-9]+]]:_(s32) = G_OR [[AND23]], [[SHL12]]
+ ; CHECK-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND19]], [[C2]](s32)
+ ; CHECK-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[AND18]], [[SHL11]]
+ ; CHECK-NEXT: [[OR13:%[0-9]+]]:_(s32) = G_OR [[AND20]], [[SHL9]]
; CHECK-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR12]](s32), [[OR13]](s32)
- ; CHECK-NEXT: [[AND25:%[0-9]+]]:_(s32) = G_AND [[LSHR9]], [[C3]]
- ; CHECK-NEXT: [[AND26:%[0-9]+]]:_(s32) = G_AND [[UV13]], [[C3]]
- ; CHECK-NEXT: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[AND26]], [[C2]](s32)
- ; CHECK-NEXT: [[OR14:%[0-9]+]]:_(s32) = G_OR [[AND25]], [[SHL13]]
- ; CHECK-NEXT: [[AND27:%[0-9]+]]:_(s32) = G_AND [[UV14]], [[C3]]
- ; CHECK-NEXT: [[AND28:%[0-9]+]]:_(s32) = G_AND [[LSHR10]], [[C3]]
- ; CHECK-NEXT: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[AND28]], [[C2]](s32)
- ; CHECK-NEXT: [[OR15:%[0-9]+]]:_(s32) = G_OR [[AND27]], [[SHL14]]
- ; CHECK-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR14]](s32), [[OR15]](s32)
- ; CHECK-NEXT: [[AND29:%[0-9]+]]:_(s32) = G_AND [[UV15]], [[C3]]
- ; CHECK-NEXT: [[OR16:%[0-9]+]]:_(s32) = G_OR [[AND29]], [[SHL3]]
- ; CHECK-NEXT: [[OR17:%[0-9]+]]:_(s32) = G_OR [[COPY1]], [[SHL3]]
- ; CHECK-NEXT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR16]](s32), [[OR17]](s32)
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[OR11]](s32)
+ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[OR12]](s32)
+ ; CHECK-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32)
+ ; CHECK-NEXT: [[OR14:%[0-9]+]]:_(s32) = G_OR [[AND20]], [[SHL3]]
+ ; CHECK-NEXT: [[OR15:%[0-9]+]]:_(s32) = G_OR [[COPY1]], [[SHL3]]
+ ; CHECK-NEXT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR14]](s32), [[OR15]](s32)
; CHECK-NEXT: [[MV9:%[0-9]+]]:_(s384) = G_MERGE_VALUES [[MV3]](s64), [[MV4]](s64), [[MV5]](s64), [[MV6]](s64), [[MV7]](s64), [[MV8]](s64)
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s112) = G_TRUNC [[MV9]](s384)
; CHECK-NEXT: S_ENDPGM 0, implicit [[TRUNC]](s112)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.1d.d16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.1d.d16.ll
index 47c4ac2aa0428..88b3244df5fab 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.1d.d16.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.1d.d16.ll
@@ -509,13 +509,10 @@ define amdgpu_ps <3 x half> @load_1d_v3f16_xyz(<8 x i32> inreg %rsrc, i32 %s) {
; GFX8-UNPACKED-NEXT: s_mov_b32 s7, s9
; GFX8-UNPACKED-NEXT: image_load v[0:2], v0, s[0:7] dmask:0x7 unorm d16
; GFX8-UNPACKED-NEXT: s_mov_b32 s0, 0xffff
-; GFX8-UNPACKED-NEXT: s_and_b32 s1, s0, s0
-; GFX8-UNPACKED-NEXT: s_lshl_b32 s1, s1, 16
-; GFX8-UNPACKED-NEXT: v_mov_b32_e32 v3, s1
; GFX8-UNPACKED-NEXT: s_waitcnt vmcnt(0)
-; GFX8-UNPACKED-NEXT: v_and_b32_e32 v4, s0, v1
-; GFX8-UNPACKED-NEXT: v_or_b32_sdwa v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; GFX8-UNPACKED-NEXT: v_lshlrev_b32_e32 v2, 16, v4
+; GFX8-UNPACKED-NEXT: v_and_b32_e32 v3, s0, v1
+; GFX8-UNPACKED-NEXT: v_and_b32_e32 v1, s0, v2
+; GFX8-UNPACKED-NEXT: v_lshlrev_b32_e32 v2, 16, v3
; GFX8-UNPACKED-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX8-UNPACKED-NEXT: ; return to shader part epilog
;
@@ -530,12 +527,11 @@ define amdgpu_ps <3 x half> @load_1d_v3f16_xyz(<8 x i32> inreg %rsrc, i32 %s) {
; GFX8-PACKED-NEXT: s_mov_b32 s6, s8
; GFX8-PACKED-NEXT: s_mov_b32 s7, s9
; GFX8-PACKED-NEXT: image_load v[0:1], v0, s[0:7] dmask:0x7 unorm d16
-; GFX8-PACKED-NEXT: s_mov_b32 s0, 0xffff
-; GFX8-PACKED-NEXT: s_and_b32 s0, s0, s0
-; GFX8-PACKED-NEXT: s_lshl_b32 s0, s0, 16
-; GFX8-PACKED-NEXT: v_mov_b32_e32 v2, s0
; GFX8-PACKED-NEXT: s_waitcnt vmcnt(0)
-; GFX8-PACKED-NEXT: v_or_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GFX8-PACKED-NEXT: v_lshrrev_b32_e32 v2, 16, v0
+; GFX8-PACKED-NEXT: v_lshlrev_b32_e32 v2, 16, v2
+; GFX8-PACKED-NEXT: v_and_b32_e32 v1, 0xffff, v1
+; GFX8-PACKED-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX8-PACKED-NEXT: ; return to shader part epilog
;
; GFX9-LABEL: load_1d_v3f16_xyz:
@@ -552,7 +548,10 @@ define amdgpu_ps <3 x half> @load_1d_v3f16_xyz(<8 x i32> inreg %rsrc, i32 %s) {
; GFX9-NEXT: v_mov_b32_e32 v2, 0xffff
; GFX9-NEXT: s_lshl_b32 s0, s0, 16
; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: v_lshrrev_b32_e32 v3, 16, v0
+; GFX9-NEXT: v_lshlrev_b32_e32 v3, 16, v3
; GFX9-NEXT: v_and_or_b32 v1, v1, v2, s0
+; GFX9-NEXT: v_and_or_b32 v0, v0, v2, v3
; GFX9-NEXT: ; return to shader part epilog
;
; GFX10-LABEL: load_1d_v3f16_xyz:
@@ -565,11 +564,15 @@ define amdgpu_ps <3 x half> @load_1d_v3f16_xyz(<8 x i32> inreg %rsrc, i32 %s) {
; GFX10-NEXT: s_mov_b32 s5, s7
; GFX10-NEXT: s_mov_b32 s6, s8
; GFX10-NEXT: s_mov_b32 s7, s9
+; GFX10-NEXT: v_mov_b32_e32 v3, 0xffff
; GFX10-NEXT: image_load v[0:1], v0, s[0:7] dmask:0x7 dim:SQ_RSRC_IMG_1D unorm d16
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
; GFX10-NEXT: s_lshl_b32 s0, s0, 16
; GFX10-NEXT: s_waitcnt vmcnt(0)
-; GFX10-NEXT: v_and_or_b32 v1, 0xffff, v1, s0
+; GFX10-NEXT: v_lshrrev_b32_e32 v2, 16, v0
+; GFX10-NEXT: v_and_or_b32 v1, v1, v3, s0
+; GFX10-NEXT: v_lshlrev_b32_e32 v2, 16, v2
+; GFX10-NEXT: v_and_or_b32 v0, v0, v3, v2
; GFX10-NEXT: ; return to shader part epilog
%v = call <3 x half> @llvm.amdgcn.image.load.1d.v3f16.i32(i32 7, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
ret <3 x half> %v
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.s.buffer.load.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.s.buffer.load.ll
index 36ae4e61ab3c8..b93c0102f171f 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.s.buffer.load.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.s.buffer.load.ll
@@ -9,46 +9,49 @@
define amdgpu_ps i32 @s_buffer_load_i32(<4 x i32> inreg %rsrc, i32 inreg %soffset) {
; GFX6-LABEL: name: s_buffer_load_i32
; GFX6: bb.1 (%ir-block.0):
- ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6
- ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
- ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
- ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
- ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
- ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
- ; GFX6: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; GFX6: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (s32))
- ; GFX6: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
- ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec
- ; GFX6: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
- ; GFX6: SI_RETURN_TO_EPILOG implicit $sgpr0
+ ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6
+ ; GFX6-NEXT: {{ $}}
+ ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX6-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6
+ ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (s32))
+ ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
+ ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec
+ ; GFX6-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
+ ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0
; GFX7-LABEL: name: s_buffer_load_i32
; GFX7: bb.1 (%ir-block.0):
- ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6
- ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
- ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
- ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
- ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
- ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
- ; GFX7: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; GFX7: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (s32))
- ; GFX7: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
- ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec
- ; GFX7: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
- ; GFX7: SI_RETURN_TO_EPILOG implicit $sgpr0
+ ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6
+ ; GFX7-NEXT: {{ $}}
+ ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX7-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6
+ ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (s32))
+ ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
+ ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec
+ ; GFX7-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
+ ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0
; GFX8-LABEL: name: s_buffer_load_i32
; GFX8: bb.1 (%ir-block.0):
- ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6
- ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
- ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
- ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
- ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
- ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
- ; GFX8: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; GFX8: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (s32))
- ; GFX8: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
- ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec
- ; GFX8: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
- ; GFX8: SI_RETURN_TO_EPILOG implicit $sgpr0
+ ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6
+ ; GFX8-NEXT: {{ $}}
+ ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX8-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6
+ ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (s32))
+ ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
+ ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec
+ ; GFX8-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
+ ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0
%val = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %rsrc, i32 %soffset, i32 0)
ret i32 %val
}
@@ -56,46 +59,49 @@ define amdgpu_ps i32 @s_buffer_load_i32(<4 x i32> inreg %rsrc, i32 inreg %soffse
define amdgpu_ps i32 @s_buffer_load_i32_glc(<4 x i32> inreg %rsrc, i32 inreg %soffset) {
; GFX6-LABEL: name: s_buffer_load_i32_glc
; GFX6: bb.1 (%ir-block.0):
- ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6
- ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
- ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
- ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
- ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
- ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
- ; GFX6: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; GFX6: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[COPY4]], 1 :: (dereferenceable invariant load (s32))
- ; GFX6: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
- ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec
- ; GFX6: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
- ; GFX6: SI_RETURN_TO_EPILOG implicit $sgpr0
+ ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6
+ ; GFX6-NEXT: {{ $}}
+ ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX6-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6
+ ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[COPY4]], 1 :: (dereferenceable invariant load (s32))
+ ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
+ ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec
+ ; GFX6-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
+ ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0
; GFX7-LABEL: name: s_buffer_load_i32_glc
; GFX7: bb.1 (%ir-block.0):
- ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6
- ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
- ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
- ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
- ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
- ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
- ; GFX7: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; GFX7: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[COPY4]], 1 :: (dereferenceable invariant load (s32))
- ; GFX7: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
- ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec
- ; GFX7: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
- ; GFX7: SI_RETURN_TO_EPILOG implicit $sgpr0
+ ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6
+ ; GFX7-NEXT: {{ $}}
+ ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX7-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6
+ ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[COPY4]], 1 :: (dereferenceable invariant load (s32))
+ ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
+ ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec
+ ; GFX7-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
+ ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0
; GFX8-LABEL: name: s_buffer_load_i32_glc
; GFX8: bb.1 (%ir-block.0):
- ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6
- ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
- ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
- ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
- ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
- ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
- ; GFX8: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; GFX8: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[COPY4]], 1 :: (dereferenceable invariant load (s32))
- ; GFX8: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
- ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec
- ; GFX8: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
- ; GFX8: SI_RETURN_TO_EPILOG implicit $sgpr0
+ ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6
+ ; GFX8-NEXT: {{ $}}
+ ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX8-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6
+ ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[COPY4]], 1 :: (dereferenceable invariant load (s32))
+ ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
+ ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec
+ ; GFX8-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
+ ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0
%val = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %rsrc, i32 %soffset, i32 1)
ret i32 %val
}
@@ -103,61 +109,64 @@ define amdgpu_ps i32 @s_buffer_load_i32_glc(<4 x i32> inreg %rsrc, i32 inreg %so
define amdgpu_ps <2 x i32> @s_buffer_load_v2i32(<4 x i32> inreg %rsrc, i32 inreg %soffset) {
; GFX6-LABEL: name: s_buffer_load_v2i32
; GFX6: bb.1 (%ir-block.0):
- ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6
- ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
- ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
- ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
- ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
- ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
- ; GFX6: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; GFX6: [[S_BUFFER_LOAD_DWORDX2_SGPR:%[0-9]+]]:sreg_64_xexec = S_BUFFER_LOAD_DWORDX2_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (s64), align 4)
- ; GFX6: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX2_SGPR]].sub0
- ; GFX6: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX2_SGPR]].sub1
- ; GFX6: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY5]]
- ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec
- ; GFX6: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
- ; GFX6: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY6]]
- ; GFX6: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec
- ; GFX6: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
- ; GFX6: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1
+ ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6
+ ; GFX6-NEXT: {{ $}}
+ ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX6-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6
+ ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORDX2_SGPR:%[0-9]+]]:sreg_64_xexec = S_BUFFER_LOAD_DWORDX2_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (s64), align 4)
+ ; GFX6-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX2_SGPR]].sub0
+ ; GFX6-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX2_SGPR]].sub1
+ ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY5]]
+ ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec
+ ; GFX6-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
+ ; GFX6-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY6]]
+ ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec
+ ; GFX6-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
+ ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1
; GFX7-LABEL: name: s_buffer_load_v2i32
; GFX7: bb.1 (%ir-block.0):
- ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6
- ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
- ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
- ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
- ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
- ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
- ; GFX7: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; GFX7: [[S_BUFFER_LOAD_DWORDX2_SGPR:%[0-9]+]]:sreg_64_xexec = S_BUFFER_LOAD_DWORDX2_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (s64), align 4)
- ; GFX7: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX2_SGPR]].sub0
- ; GFX7: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX2_SGPR]].sub1
- ; GFX7: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY5]]
- ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec
- ; GFX7: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
- ; GFX7: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY6]]
- ; GFX7: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec
- ; GFX7: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
- ; GFX7: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1
+ ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6
+ ; GFX7-NEXT: {{ $}}
+ ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX7-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6
+ ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORDX2_SGPR:%[0-9]+]]:sreg_64_xexec = S_BUFFER_LOAD_DWORDX2_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (s64), align 4)
+ ; GFX7-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX2_SGPR]].sub0
+ ; GFX7-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX2_SGPR]].sub1
+ ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY5]]
+ ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec
+ ; GFX7-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
+ ; GFX7-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY6]]
+ ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec
+ ; GFX7-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
+ ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1
; GFX8-LABEL: name: s_buffer_load_v2i32
; GFX8: bb.1 (%ir-block.0):
- ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6
- ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
- ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
- ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
- ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
- ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
- ; GFX8: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; GFX8: [[S_BUFFER_LOAD_DWORDX2_SGPR:%[0-9]+]]:sreg_64_xexec = S_BUFFER_LOAD_DWORDX2_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (s64), align 4)
- ; GFX8: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX2_SGPR]].sub0
- ; GFX8: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX2_SGPR]].sub1
- ; GFX8: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY5]]
- ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec
- ; GFX8: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
- ; GFX8: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY6]]
- ; GFX8: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec
- ; GFX8: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
- ; GFX8: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1
+ ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6
+ ; GFX8-NEXT: {{ $}}
+ ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX8-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6
+ ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORDX2_SGPR:%[0-9]+]]:sreg_64_xexec = S_BUFFER_LOAD_DWORDX2_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (s64), align 4)
+ ; GFX8-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX2_SGPR]].sub0
+ ; GFX8-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX2_SGPR]].sub1
+ ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY5]]
+ ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec
+ ; GFX8-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
+ ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY6]]
+ ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec
+ ; GFX8-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
+ ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1
%val = call <2 x i32> @llvm.amdgcn.s.buffer.load.v2i32(<4 x i32> %rsrc, i32 %soffset, i32 0)
ret <2 x i32> %val
}
@@ -165,85 +174,79 @@ define amdgpu_ps <2 x i32> @s_buffer_load_v2i32(<4 x i32> inreg %rsrc, i32 inreg
define amdgpu_ps <3 x i32> @s_buffer_load_v3i32(<4 x i32> inreg %rsrc, i32 inreg %soffset) {
; GFX6-LABEL: name: s_buffer_load_v3i32
; GFX6: bb.1 (%ir-block.0):
- ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6
- ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
- ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
- ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
- ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
- ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
- ; GFX6: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; GFX6: [[S_BUFFER_LOAD_DWORDX4_SGPR:%[0-9]+]]:sgpr_128 = S_BUFFER_LOAD_DWORDX4_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (s96), align 4)
- ; GFX6: [[DEF:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF
- ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_512 = REG_SEQUENCE [[S_BUFFER_LOAD_DWORDX4_SGPR]], %subreg.sub0_sub1_sub2_sub3, [[DEF]], %subreg.sub4_sub5_sub6_sub7, [[DEF]], %subreg.sub8_sub9_sub10_sub11
- ; GFX6: [[COPY5:%[0-9]+]]:sgpr_512_with_sub0_sub1_sub2 = COPY [[REG_SEQUENCE1]]
- ; GFX6: [[COPY6:%[0-9]+]]:sgpr_96 = COPY [[COPY5]].sub0_sub1_sub2
- ; GFX6: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[COPY6]].sub0
- ; GFX6: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[COPY6]].sub1
- ; GFX6: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[COPY6]].sub2
- ; GFX6: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY7]]
- ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec
- ; GFX6: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
- ; GFX6: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[COPY8]]
- ; GFX6: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec
- ; GFX6: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
- ; GFX6: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[COPY9]]
- ; GFX6: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec
- ; GFX6: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]]
- ; GFX6: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2
+ ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6
+ ; GFX6-NEXT: {{ $}}
+ ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX6-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6
+ ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORDX4_SGPR:%[0-9]+]]:sgpr_128 = S_BUFFER_LOAD_DWORDX4_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (s96), align 4)
+ ; GFX6-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX4_SGPR]].sub0
+ ; GFX6-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX4_SGPR]].sub1
+ ; GFX6-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX4_SGPR]].sub2
+ ; GFX6-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX4_SGPR]].sub3
+ ; GFX6-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY5]]
+ ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec
+ ; GFX6-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
+ ; GFX6-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY6]]
+ ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec
+ ; GFX6-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
+ ; GFX6-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[COPY7]]
+ ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec
+ ; GFX6-NEXT: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]]
+ ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2
; GFX7-LABEL: name: s_buffer_load_v3i32
; GFX7: bb.1 (%ir-block.0):
- ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6
- ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
- ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
- ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
- ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
- ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
- ; GFX7: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; GFX7: [[S_BUFFER_LOAD_DWORDX4_SGPR:%[0-9]+]]:sgpr_128 = S_BUFFER_LOAD_DWORDX4_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (s96), align 4)
- ; GFX7: [[DEF:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF
- ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_512 = REG_SEQUENCE [[S_BUFFER_LOAD_DWORDX4_SGPR]], %subreg.sub0_sub1_sub2_sub3, [[DEF]], %subreg.sub4_sub5_sub6_sub7, [[DEF]], %subreg.sub8_sub9_sub10_sub11
- ; GFX7: [[COPY5:%[0-9]+]]:sgpr_512_with_sub0_sub1_sub2 = COPY [[REG_SEQUENCE1]]
- ; GFX7: [[COPY6:%[0-9]+]]:sgpr_96 = COPY [[COPY5]].sub0_sub1_sub2
- ; GFX7: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[COPY6]].sub0
- ; GFX7: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[COPY6]].sub1
- ; GFX7: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[COPY6]].sub2
- ; GFX7: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY7]]
- ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec
- ; GFX7: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
- ; GFX7: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[COPY8]]
- ; GFX7: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec
- ; GFX7: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
- ; GFX7: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[COPY9]]
- ; GFX7: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec
- ; GFX7: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]]
- ; GFX7: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2
+ ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6
+ ; GFX7-NEXT: {{ $}}
+ ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX7-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6
+ ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORDX4_SGPR:%[0-9]+]]:sgpr_128 = S_BUFFER_LOAD_DWORDX4_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (s96), align 4)
+ ; GFX7-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX4_SGPR]].sub0
+ ; GFX7-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX4_SGPR]].sub1
+ ; GFX7-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX4_SGPR]].sub2
+ ; GFX7-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX4_SGPR]].sub3
+ ; GFX7-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY5]]
+ ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec
+ ; GFX7-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
+ ; GFX7-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY6]]
+ ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec
+ ; GFX7-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
+ ; GFX7-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[COPY7]]
+ ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec
+ ; GFX7-NEXT: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]]
+ ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2
; GFX8-LABEL: name: s_buffer_load_v3i32
; GFX8: bb.1 (%ir-block.0):
- ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6
- ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
- ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
- ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
- ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
- ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
- ; GFX8: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; GFX8: [[S_BUFFER_LOAD_DWORDX4_SGPR:%[0-9]+]]:sgpr_128 = S_BUFFER_LOAD_DWORDX4_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (s96), align 4)
- ; GFX8: [[DEF:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF
- ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_512 = REG_SEQUENCE [[S_BUFFER_LOAD_DWORDX4_SGPR]], %subreg.sub0_sub1_sub2_sub3, [[DEF]], %subreg.sub4_sub5_sub6_sub7, [[DEF]], %subreg.sub8_sub9_sub10_sub11
- ; GFX8: [[COPY5:%[0-9]+]]:sgpr_512_with_sub0_sub1_sub2 = COPY [[REG_SEQUENCE1]]
- ; GFX8: [[COPY6:%[0-9]+]]:sgpr_96 = COPY [[COPY5]].sub0_sub1_sub2
- ; GFX8: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[COPY6]].sub0
- ; GFX8: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[COPY6]].sub1
- ; GFX8: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[COPY6]].sub2
- ; GFX8: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY7]]
- ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec
- ; GFX8: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
- ; GFX8: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[COPY8]]
- ; GFX8: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec
- ; GFX8: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
- ; GFX8: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[COPY9]]
- ; GFX8: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec
- ; GFX8: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]]
- ; GFX8: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2
+ ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6
+ ; GFX8-NEXT: {{ $}}
+ ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX8-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6
+ ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORDX4_SGPR:%[0-9]+]]:sgpr_128 = S_BUFFER_LOAD_DWORDX4_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (s96), align 4)
+ ; GFX8-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX4_SGPR]].sub0
+ ; GFX8-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX4_SGPR]].sub1
+ ; GFX8-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX4_SGPR]].sub2
+ ; GFX8-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX4_SGPR]].sub3
+ ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY5]]
+ ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec
+ ; GFX8-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
+ ; GFX8-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY6]]
+ ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec
+ ; GFX8-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
+ ; GFX8-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[COPY7]]
+ ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec
+ ; GFX8-NEXT: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]]
+ ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2
%val = call <3 x i32> @llvm.amdgcn.s.buffer.load.v3i32(<4 x i32> %rsrc, i32 %soffset, i32 0)
ret <3 x i32> %val
}
@@ -251,133 +254,136 @@ define amdgpu_ps <3 x i32> @s_buffer_load_v3i32(<4 x i32> inreg %rsrc, i32 inreg
define amdgpu_ps <8 x i32> @s_buffer_load_v8i32(<4 x i32> inreg %rsrc, i32 inreg %soffset) {
; GFX6-LABEL: name: s_buffer_load_v8i32
; GFX6: bb.1 (%ir-block.0):
- ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6
- ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
- ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
- ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
- ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
- ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
- ; GFX6: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; GFX6: [[S_BUFFER_LOAD_DWORDX8_SGPR:%[0-9]+]]:sgpr_256 = S_BUFFER_LOAD_DWORDX8_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (s256), align 4)
- ; GFX6: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub0
- ; GFX6: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub1
- ; GFX6: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub2
- ; GFX6: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub3
- ; GFX6: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub4
- ; GFX6: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub5
- ; GFX6: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub6
- ; GFX6: [[COPY12:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub7
- ; GFX6: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[COPY5]]
- ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY13]], implicit $exec
- ; GFX6: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
- ; GFX6: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[COPY6]]
- ; GFX6: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY14]], implicit $exec
- ; GFX6: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
- ; GFX6: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[COPY7]]
- ; GFX6: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY15]], implicit $exec
- ; GFX6: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]]
- ; GFX6: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[COPY8]]
- ; GFX6: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY16]], implicit $exec
- ; GFX6: $sgpr3 = COPY [[V_READFIRSTLANE_B32_3]]
- ; GFX6: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[COPY9]]
- ; GFX6: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY17]], implicit $exec
- ; GFX6: $sgpr4 = COPY [[V_READFIRSTLANE_B32_4]]
- ; GFX6: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[COPY10]]
- ; GFX6: [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY18]], implicit $exec
- ; GFX6: $sgpr5 = COPY [[V_READFIRSTLANE_B32_5]]
- ; GFX6: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[COPY11]]
- ; GFX6: [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY19]], implicit $exec
- ; GFX6: $sgpr6 = COPY [[V_READFIRSTLANE_B32_6]]
- ; GFX6: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[COPY12]]
- ; GFX6: [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY20]], implicit $exec
- ; GFX6: $sgpr7 = COPY [[V_READFIRSTLANE_B32_7]]
- ; GFX6: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7
+ ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6
+ ; GFX6-NEXT: {{ $}}
+ ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX6-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6
+ ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORDX8_SGPR:%[0-9]+]]:sgpr_256 = S_BUFFER_LOAD_DWORDX8_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (s256), align 4)
+ ; GFX6-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub0
+ ; GFX6-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub1
+ ; GFX6-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub2
+ ; GFX6-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub3
+ ; GFX6-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub4
+ ; GFX6-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub5
+ ; GFX6-NEXT: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub6
+ ; GFX6-NEXT: [[COPY12:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub7
+ ; GFX6-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[COPY5]]
+ ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY13]], implicit $exec
+ ; GFX6-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
+ ; GFX6-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[COPY6]]
+ ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY14]], implicit $exec
+ ; GFX6-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
+ ; GFX6-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[COPY7]]
+ ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY15]], implicit $exec
+ ; GFX6-NEXT: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]]
+ ; GFX6-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[COPY8]]
+ ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY16]], implicit $exec
+ ; GFX6-NEXT: $sgpr3 = COPY [[V_READFIRSTLANE_B32_3]]
+ ; GFX6-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[COPY9]]
+ ; GFX6-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY17]], implicit $exec
+ ; GFX6-NEXT: $sgpr4 = COPY [[V_READFIRSTLANE_B32_4]]
+ ; GFX6-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[COPY10]]
+ ; GFX6-NEXT: [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY18]], implicit $exec
+ ; GFX6-NEXT: $sgpr5 = COPY [[V_READFIRSTLANE_B32_5]]
+ ; GFX6-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[COPY11]]
+ ; GFX6-NEXT: [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY19]], implicit $exec
+ ; GFX6-NEXT: $sgpr6 = COPY [[V_READFIRSTLANE_B32_6]]
+ ; GFX6-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[COPY12]]
+ ; GFX6-NEXT: [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY20]], implicit $exec
+ ; GFX6-NEXT: $sgpr7 = COPY [[V_READFIRSTLANE_B32_7]]
+ ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7
; GFX7-LABEL: name: s_buffer_load_v8i32
; GFX7: bb.1 (%ir-block.0):
- ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6
- ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
- ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
- ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
- ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
- ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
- ; GFX7: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; GFX7: [[S_BUFFER_LOAD_DWORDX8_SGPR:%[0-9]+]]:sgpr_256 = S_BUFFER_LOAD_DWORDX8_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (s256), align 4)
- ; GFX7: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub0
- ; GFX7: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub1
- ; GFX7: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub2
- ; GFX7: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub3
- ; GFX7: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub4
- ; GFX7: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub5
- ; GFX7: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub6
- ; GFX7: [[COPY12:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub7
- ; GFX7: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[COPY5]]
- ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY13]], implicit $exec
- ; GFX7: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
- ; GFX7: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[COPY6]]
- ; GFX7: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY14]], implicit $exec
- ; GFX7: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
- ; GFX7: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[COPY7]]
- ; GFX7: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY15]], implicit $exec
- ; GFX7: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]]
- ; GFX7: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[COPY8]]
- ; GFX7: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY16]], implicit $exec
- ; GFX7: $sgpr3 = COPY [[V_READFIRSTLANE_B32_3]]
- ; GFX7: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[COPY9]]
- ; GFX7: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY17]], implicit $exec
- ; GFX7: $sgpr4 = COPY [[V_READFIRSTLANE_B32_4]]
- ; GFX7: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[COPY10]]
- ; GFX7: [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY18]], implicit $exec
- ; GFX7: $sgpr5 = COPY [[V_READFIRSTLANE_B32_5]]
- ; GFX7: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[COPY11]]
- ; GFX7: [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY19]], implicit $exec
- ; GFX7: $sgpr6 = COPY [[V_READFIRSTLANE_B32_6]]
- ; GFX7: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[COPY12]]
- ; GFX7: [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY20]], implicit $exec
- ; GFX7: $sgpr7 = COPY [[V_READFIRSTLANE_B32_7]]
- ; GFX7: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7
+ ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6
+ ; GFX7-NEXT: {{ $}}
+ ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX7-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6
+ ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORDX8_SGPR:%[0-9]+]]:sgpr_256 = S_BUFFER_LOAD_DWORDX8_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (s256), align 4)
+ ; GFX7-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub0
+ ; GFX7-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub1
+ ; GFX7-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub2
+ ; GFX7-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub3
+ ; GFX7-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub4
+ ; GFX7-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub5
+ ; GFX7-NEXT: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub6
+ ; GFX7-NEXT: [[COPY12:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub7
+ ; GFX7-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[COPY5]]
+ ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY13]], implicit $exec
+ ; GFX7-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
+ ; GFX7-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[COPY6]]
+ ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY14]], implicit $exec
+ ; GFX7-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
+ ; GFX7-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[COPY7]]
+ ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY15]], implicit $exec
+ ; GFX7-NEXT: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]]
+ ; GFX7-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[COPY8]]
+ ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY16]], implicit $exec
+ ; GFX7-NEXT: $sgpr3 = COPY [[V_READFIRSTLANE_B32_3]]
+ ; GFX7-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[COPY9]]
+ ; GFX7-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY17]], implicit $exec
+ ; GFX7-NEXT: $sgpr4 = COPY [[V_READFIRSTLANE_B32_4]]
+ ; GFX7-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[COPY10]]
+ ; GFX7-NEXT: [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY18]], implicit $exec
+ ; GFX7-NEXT: $sgpr5 = COPY [[V_READFIRSTLANE_B32_5]]
+ ; GFX7-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[COPY11]]
+ ; GFX7-NEXT: [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY19]], implicit $exec
+ ; GFX7-NEXT: $sgpr6 = COPY [[V_READFIRSTLANE_B32_6]]
+ ; GFX7-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[COPY12]]
+ ; GFX7-NEXT: [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY20]], implicit $exec
+ ; GFX7-NEXT: $sgpr7 = COPY [[V_READFIRSTLANE_B32_7]]
+ ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7
; GFX8-LABEL: name: s_buffer_load_v8i32
; GFX8: bb.1 (%ir-block.0):
- ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6
- ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
- ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
- ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
- ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
- ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
- ; GFX8: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; GFX8: [[S_BUFFER_LOAD_DWORDX8_SGPR:%[0-9]+]]:sgpr_256 = S_BUFFER_LOAD_DWORDX8_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (s256), align 4)
- ; GFX8: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub0
- ; GFX8: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub1
- ; GFX8: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub2
- ; GFX8: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub3
- ; GFX8: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub4
- ; GFX8: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub5
- ; GFX8: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub6
- ; GFX8: [[COPY12:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub7
- ; GFX8: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[COPY5]]
- ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY13]], implicit $exec
- ; GFX8: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
- ; GFX8: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[COPY6]]
- ; GFX8: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY14]], implicit $exec
- ; GFX8: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
- ; GFX8: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[COPY7]]
- ; GFX8: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY15]], implicit $exec
- ; GFX8: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]]
- ; GFX8: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[COPY8]]
- ; GFX8: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY16]], implicit $exec
- ; GFX8: $sgpr3 = COPY [[V_READFIRSTLANE_B32_3]]
- ; GFX8: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[COPY9]]
- ; GFX8: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY17]], implicit $exec
- ; GFX8: $sgpr4 = COPY [[V_READFIRSTLANE_B32_4]]
- ; GFX8: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[COPY10]]
- ; GFX8: [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY18]], implicit $exec
- ; GFX8: $sgpr5 = COPY [[V_READFIRSTLANE_B32_5]]
- ; GFX8: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[COPY11]]
- ; GFX8: [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY19]], implicit $exec
- ; GFX8: $sgpr6 = COPY [[V_READFIRSTLANE_B32_6]]
- ; GFX8: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[COPY12]]
- ; GFX8: [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY20]], implicit $exec
- ; GFX8: $sgpr7 = COPY [[V_READFIRSTLANE_B32_7]]
- ; GFX8: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7
+ ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6
+ ; GFX8-NEXT: {{ $}}
+ ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX8-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6
+ ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORDX8_SGPR:%[0-9]+]]:sgpr_256 = S_BUFFER_LOAD_DWORDX8_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (s256), align 4)
+ ; GFX8-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub0
+ ; GFX8-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub1
+ ; GFX8-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub2
+ ; GFX8-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub3
+ ; GFX8-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub4
+ ; GFX8-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub5
+ ; GFX8-NEXT: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub6
+ ; GFX8-NEXT: [[COPY12:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub7
+ ; GFX8-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[COPY5]]
+ ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY13]], implicit $exec
+ ; GFX8-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
+ ; GFX8-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[COPY6]]
+ ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY14]], implicit $exec
+ ; GFX8-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
+ ; GFX8-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[COPY7]]
+ ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY15]], implicit $exec
+ ; GFX8-NEXT: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]]
+ ; GFX8-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[COPY8]]
+ ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY16]], implicit $exec
+ ; GFX8-NEXT: $sgpr3 = COPY [[V_READFIRSTLANE_B32_3]]
+ ; GFX8-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[COPY9]]
+ ; GFX8-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY17]], implicit $exec
+ ; GFX8-NEXT: $sgpr4 = COPY [[V_READFIRSTLANE_B32_4]]
+ ; GFX8-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[COPY10]]
+ ; GFX8-NEXT: [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY18]], implicit $exec
+ ; GFX8-NEXT: $sgpr5 = COPY [[V_READFIRSTLANE_B32_5]]
+ ; GFX8-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[COPY11]]
+ ; GFX8-NEXT: [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY19]], implicit $exec
+ ; GFX8-NEXT: $sgpr6 = COPY [[V_READFIRSTLANE_B32_6]]
+ ; GFX8-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[COPY12]]
+ ; GFX8-NEXT: [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY20]], implicit $exec
+ ; GFX8-NEXT: $sgpr7 = COPY [[V_READFIRSTLANE_B32_7]]
+ ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7
%val = call <8 x i32> @llvm.amdgcn.s.buffer.load.v8i32(<4 x i32> %rsrc, i32 %soffset, i32 0)
ret <8 x i32> %val
}
@@ -385,229 +391,232 @@ define amdgpu_ps <8 x i32> @s_buffer_load_v8i32(<4 x i32> inreg %rsrc, i32 inreg
define amdgpu_ps <16 x i32> @s_buffer_load_v16i32(<4 x i32> inreg %rsrc, i32 inreg %soffset) {
; GFX6-LABEL: name: s_buffer_load_v16i32
; GFX6: bb.1 (%ir-block.0):
- ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6
- ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
- ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
- ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
- ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
- ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
- ; GFX6: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; GFX6: [[S_BUFFER_LOAD_DWORDX16_SGPR:%[0-9]+]]:sgpr_512 = S_BUFFER_LOAD_DWORDX16_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (s512), align 4)
- ; GFX6: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub0
- ; GFX6: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub1
- ; GFX6: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub2
- ; GFX6: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub3
- ; GFX6: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub4
- ; GFX6: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub5
- ; GFX6: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub6
- ; GFX6: [[COPY12:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub7
- ; GFX6: [[COPY13:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub8
- ; GFX6: [[COPY14:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub9
- ; GFX6: [[COPY15:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub10
- ; GFX6: [[COPY16:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub11
- ; GFX6: [[COPY17:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub12
- ; GFX6: [[COPY18:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub13
- ; GFX6: [[COPY19:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub14
- ; GFX6: [[COPY20:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub15
- ; GFX6: [[COPY21:%[0-9]+]]:vgpr_32 = COPY [[COPY5]]
- ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY21]], implicit $exec
- ; GFX6: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
- ; GFX6: [[COPY22:%[0-9]+]]:vgpr_32 = COPY [[COPY6]]
- ; GFX6: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY22]], implicit $exec
- ; GFX6: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
- ; GFX6: [[COPY23:%[0-9]+]]:vgpr_32 = COPY [[COPY7]]
- ; GFX6: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY23]], implicit $exec
- ; GFX6: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]]
- ; GFX6: [[COPY24:%[0-9]+]]:vgpr_32 = COPY [[COPY8]]
- ; GFX6: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY24]], implicit $exec
- ; GFX6: $sgpr3 = COPY [[V_READFIRSTLANE_B32_3]]
- ; GFX6: [[COPY25:%[0-9]+]]:vgpr_32 = COPY [[COPY9]]
- ; GFX6: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY25]], implicit $exec
- ; GFX6: $sgpr4 = COPY [[V_READFIRSTLANE_B32_4]]
- ; GFX6: [[COPY26:%[0-9]+]]:vgpr_32 = COPY [[COPY10]]
- ; GFX6: [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY26]], implicit $exec
- ; GFX6: $sgpr5 = COPY [[V_READFIRSTLANE_B32_5]]
- ; GFX6: [[COPY27:%[0-9]+]]:vgpr_32 = COPY [[COPY11]]
- ; GFX6: [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY27]], implicit $exec
- ; GFX6: $sgpr6 = COPY [[V_READFIRSTLANE_B32_6]]
- ; GFX6: [[COPY28:%[0-9]+]]:vgpr_32 = COPY [[COPY12]]
- ; GFX6: [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY28]], implicit $exec
- ; GFX6: $sgpr7 = COPY [[V_READFIRSTLANE_B32_7]]
- ; GFX6: [[COPY29:%[0-9]+]]:vgpr_32 = COPY [[COPY13]]
- ; GFX6: [[V_READFIRSTLANE_B32_8:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY29]], implicit $exec
- ; GFX6: $sgpr8 = COPY [[V_READFIRSTLANE_B32_8]]
- ; GFX6: [[COPY30:%[0-9]+]]:vgpr_32 = COPY [[COPY14]]
- ; GFX6: [[V_READFIRSTLANE_B32_9:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY30]], implicit $exec
- ; GFX6: $sgpr9 = COPY [[V_READFIRSTLANE_B32_9]]
- ; GFX6: [[COPY31:%[0-9]+]]:vgpr_32 = COPY [[COPY15]]
- ; GFX6: [[V_READFIRSTLANE_B32_10:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY31]], implicit $exec
- ; GFX6: $sgpr10 = COPY [[V_READFIRSTLANE_B32_10]]
- ; GFX6: [[COPY32:%[0-9]+]]:vgpr_32 = COPY [[COPY16]]
- ; GFX6: [[V_READFIRSTLANE_B32_11:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY32]], implicit $exec
- ; GFX6: $sgpr11 = COPY [[V_READFIRSTLANE_B32_11]]
- ; GFX6: [[COPY33:%[0-9]+]]:vgpr_32 = COPY [[COPY17]]
- ; GFX6: [[V_READFIRSTLANE_B32_12:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY33]], implicit $exec
- ; GFX6: $sgpr12 = COPY [[V_READFIRSTLANE_B32_12]]
- ; GFX6: [[COPY34:%[0-9]+]]:vgpr_32 = COPY [[COPY18]]
- ; GFX6: [[V_READFIRSTLANE_B32_13:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY34]], implicit $exec
- ; GFX6: $sgpr13 = COPY [[V_READFIRSTLANE_B32_13]]
- ; GFX6: [[COPY35:%[0-9]+]]:vgpr_32 = COPY [[COPY19]]
- ; GFX6: [[V_READFIRSTLANE_B32_14:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY35]], implicit $exec
- ; GFX6: $sgpr14 = COPY [[V_READFIRSTLANE_B32_14]]
- ; GFX6: [[COPY36:%[0-9]+]]:vgpr_32 = COPY [[COPY20]]
- ; GFX6: [[V_READFIRSTLANE_B32_15:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY36]], implicit $exec
- ; GFX6: $sgpr15 = COPY [[V_READFIRSTLANE_B32_15]]
- ; GFX6: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15
+ ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6
+ ; GFX6-NEXT: {{ $}}
+ ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX6-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6
+ ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORDX16_SGPR:%[0-9]+]]:sgpr_512 = S_BUFFER_LOAD_DWORDX16_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (s512), align 4)
+ ; GFX6-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub0
+ ; GFX6-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub1
+ ; GFX6-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub2
+ ; GFX6-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub3
+ ; GFX6-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub4
+ ; GFX6-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub5
+ ; GFX6-NEXT: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub6
+ ; GFX6-NEXT: [[COPY12:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub7
+ ; GFX6-NEXT: [[COPY13:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub8
+ ; GFX6-NEXT: [[COPY14:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub9
+ ; GFX6-NEXT: [[COPY15:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub10
+ ; GFX6-NEXT: [[COPY16:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub11
+ ; GFX6-NEXT: [[COPY17:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub12
+ ; GFX6-NEXT: [[COPY18:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub13
+ ; GFX6-NEXT: [[COPY19:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub14
+ ; GFX6-NEXT: [[COPY20:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub15
+ ; GFX6-NEXT: [[COPY21:%[0-9]+]]:vgpr_32 = COPY [[COPY5]]
+ ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY21]], implicit $exec
+ ; GFX6-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
+ ; GFX6-NEXT: [[COPY22:%[0-9]+]]:vgpr_32 = COPY [[COPY6]]
+ ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY22]], implicit $exec
+ ; GFX6-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
+ ; GFX6-NEXT: [[COPY23:%[0-9]+]]:vgpr_32 = COPY [[COPY7]]
+ ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY23]], implicit $exec
+ ; GFX6-NEXT: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]]
+ ; GFX6-NEXT: [[COPY24:%[0-9]+]]:vgpr_32 = COPY [[COPY8]]
+ ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY24]], implicit $exec
+ ; GFX6-NEXT: $sgpr3 = COPY [[V_READFIRSTLANE_B32_3]]
+ ; GFX6-NEXT: [[COPY25:%[0-9]+]]:vgpr_32 = COPY [[COPY9]]
+ ; GFX6-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY25]], implicit $exec
+ ; GFX6-NEXT: $sgpr4 = COPY [[V_READFIRSTLANE_B32_4]]
+ ; GFX6-NEXT: [[COPY26:%[0-9]+]]:vgpr_32 = COPY [[COPY10]]
+ ; GFX6-NEXT: [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY26]], implicit $exec
+ ; GFX6-NEXT: $sgpr5 = COPY [[V_READFIRSTLANE_B32_5]]
+ ; GFX6-NEXT: [[COPY27:%[0-9]+]]:vgpr_32 = COPY [[COPY11]]
+ ; GFX6-NEXT: [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY27]], implicit $exec
+ ; GFX6-NEXT: $sgpr6 = COPY [[V_READFIRSTLANE_B32_6]]
+ ; GFX6-NEXT: [[COPY28:%[0-9]+]]:vgpr_32 = COPY [[COPY12]]
+ ; GFX6-NEXT: [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY28]], implicit $exec
+ ; GFX6-NEXT: $sgpr7 = COPY [[V_READFIRSTLANE_B32_7]]
+ ; GFX6-NEXT: [[COPY29:%[0-9]+]]:vgpr_32 = COPY [[COPY13]]
+ ; GFX6-NEXT: [[V_READFIRSTLANE_B32_8:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY29]], implicit $exec
+ ; GFX6-NEXT: $sgpr8 = COPY [[V_READFIRSTLANE_B32_8]]
+ ; GFX6-NEXT: [[COPY30:%[0-9]+]]:vgpr_32 = COPY [[COPY14]]
+ ; GFX6-NEXT: [[V_READFIRSTLANE_B32_9:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY30]], implicit $exec
+ ; GFX6-NEXT: $sgpr9 = COPY [[V_READFIRSTLANE_B32_9]]
+ ; GFX6-NEXT: [[COPY31:%[0-9]+]]:vgpr_32 = COPY [[COPY15]]
+ ; GFX6-NEXT: [[V_READFIRSTLANE_B32_10:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY31]], implicit $exec
+ ; GFX6-NEXT: $sgpr10 = COPY [[V_READFIRSTLANE_B32_10]]
+ ; GFX6-NEXT: [[COPY32:%[0-9]+]]:vgpr_32 = COPY [[COPY16]]
+ ; GFX6-NEXT: [[V_READFIRSTLANE_B32_11:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY32]], implicit $exec
+ ; GFX6-NEXT: $sgpr11 = COPY [[V_READFIRSTLANE_B32_11]]
+ ; GFX6-NEXT: [[COPY33:%[0-9]+]]:vgpr_32 = COPY [[COPY17]]
+ ; GFX6-NEXT: [[V_READFIRSTLANE_B32_12:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY33]], implicit $exec
+ ; GFX6-NEXT: $sgpr12 = COPY [[V_READFIRSTLANE_B32_12]]
+ ; GFX6-NEXT: [[COPY34:%[0-9]+]]:vgpr_32 = COPY [[COPY18]]
+ ; GFX6-NEXT: [[V_READFIRSTLANE_B32_13:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY34]], implicit $exec
+ ; GFX6-NEXT: $sgpr13 = COPY [[V_READFIRSTLANE_B32_13]]
+ ; GFX6-NEXT: [[COPY35:%[0-9]+]]:vgpr_32 = COPY [[COPY19]]
+ ; GFX6-NEXT: [[V_READFIRSTLANE_B32_14:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY35]], implicit $exec
+ ; GFX6-NEXT: $sgpr14 = COPY [[V_READFIRSTLANE_B32_14]]
+ ; GFX6-NEXT: [[COPY36:%[0-9]+]]:vgpr_32 = COPY [[COPY20]]
+ ; GFX6-NEXT: [[V_READFIRSTLANE_B32_15:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY36]], implicit $exec
+ ; GFX6-NEXT: $sgpr15 = COPY [[V_READFIRSTLANE_B32_15]]
+ ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15
; GFX7-LABEL: name: s_buffer_load_v16i32
; GFX7: bb.1 (%ir-block.0):
- ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6
- ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
- ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
- ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
- ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
- ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
- ; GFX7: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; GFX7: [[S_BUFFER_LOAD_DWORDX16_SGPR:%[0-9]+]]:sgpr_512 = S_BUFFER_LOAD_DWORDX16_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (s512), align 4)
- ; GFX7: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub0
- ; GFX7: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub1
- ; GFX7: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub2
- ; GFX7: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub3
- ; GFX7: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub4
- ; GFX7: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub5
- ; GFX7: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub6
- ; GFX7: [[COPY12:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub7
- ; GFX7: [[COPY13:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub8
- ; GFX7: [[COPY14:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub9
- ; GFX7: [[COPY15:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub10
- ; GFX7: [[COPY16:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub11
- ; GFX7: [[COPY17:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub12
- ; GFX7: [[COPY18:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub13
- ; GFX7: [[COPY19:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub14
- ; GFX7: [[COPY20:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub15
- ; GFX7: [[COPY21:%[0-9]+]]:vgpr_32 = COPY [[COPY5]]
- ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY21]], implicit $exec
- ; GFX7: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
- ; GFX7: [[COPY22:%[0-9]+]]:vgpr_32 = COPY [[COPY6]]
- ; GFX7: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY22]], implicit $exec
- ; GFX7: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
- ; GFX7: [[COPY23:%[0-9]+]]:vgpr_32 = COPY [[COPY7]]
- ; GFX7: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY23]], implicit $exec
- ; GFX7: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]]
- ; GFX7: [[COPY24:%[0-9]+]]:vgpr_32 = COPY [[COPY8]]
- ; GFX7: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY24]], implicit $exec
- ; GFX7: $sgpr3 = COPY [[V_READFIRSTLANE_B32_3]]
- ; GFX7: [[COPY25:%[0-9]+]]:vgpr_32 = COPY [[COPY9]]
- ; GFX7: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY25]], implicit $exec
- ; GFX7: $sgpr4 = COPY [[V_READFIRSTLANE_B32_4]]
- ; GFX7: [[COPY26:%[0-9]+]]:vgpr_32 = COPY [[COPY10]]
- ; GFX7: [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY26]], implicit $exec
- ; GFX7: $sgpr5 = COPY [[V_READFIRSTLANE_B32_5]]
- ; GFX7: [[COPY27:%[0-9]+]]:vgpr_32 = COPY [[COPY11]]
- ; GFX7: [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY27]], implicit $exec
- ; GFX7: $sgpr6 = COPY [[V_READFIRSTLANE_B32_6]]
- ; GFX7: [[COPY28:%[0-9]+]]:vgpr_32 = COPY [[COPY12]]
- ; GFX7: [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY28]], implicit $exec
- ; GFX7: $sgpr7 = COPY [[V_READFIRSTLANE_B32_7]]
- ; GFX7: [[COPY29:%[0-9]+]]:vgpr_32 = COPY [[COPY13]]
- ; GFX7: [[V_READFIRSTLANE_B32_8:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY29]], implicit $exec
- ; GFX7: $sgpr8 = COPY [[V_READFIRSTLANE_B32_8]]
- ; GFX7: [[COPY30:%[0-9]+]]:vgpr_32 = COPY [[COPY14]]
- ; GFX7: [[V_READFIRSTLANE_B32_9:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY30]], implicit $exec
- ; GFX7: $sgpr9 = COPY [[V_READFIRSTLANE_B32_9]]
- ; GFX7: [[COPY31:%[0-9]+]]:vgpr_32 = COPY [[COPY15]]
- ; GFX7: [[V_READFIRSTLANE_B32_10:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY31]], implicit $exec
- ; GFX7: $sgpr10 = COPY [[V_READFIRSTLANE_B32_10]]
- ; GFX7: [[COPY32:%[0-9]+]]:vgpr_32 = COPY [[COPY16]]
- ; GFX7: [[V_READFIRSTLANE_B32_11:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY32]], implicit $exec
- ; GFX7: $sgpr11 = COPY [[V_READFIRSTLANE_B32_11]]
- ; GFX7: [[COPY33:%[0-9]+]]:vgpr_32 = COPY [[COPY17]]
- ; GFX7: [[V_READFIRSTLANE_B32_12:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY33]], implicit $exec
- ; GFX7: $sgpr12 = COPY [[V_READFIRSTLANE_B32_12]]
- ; GFX7: [[COPY34:%[0-9]+]]:vgpr_32 = COPY [[COPY18]]
- ; GFX7: [[V_READFIRSTLANE_B32_13:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY34]], implicit $exec
- ; GFX7: $sgpr13 = COPY [[V_READFIRSTLANE_B32_13]]
- ; GFX7: [[COPY35:%[0-9]+]]:vgpr_32 = COPY [[COPY19]]
- ; GFX7: [[V_READFIRSTLANE_B32_14:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY35]], implicit $exec
- ; GFX7: $sgpr14 = COPY [[V_READFIRSTLANE_B32_14]]
- ; GFX7: [[COPY36:%[0-9]+]]:vgpr_32 = COPY [[COPY20]]
- ; GFX7: [[V_READFIRSTLANE_B32_15:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY36]], implicit $exec
- ; GFX7: $sgpr15 = COPY [[V_READFIRSTLANE_B32_15]]
- ; GFX7: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15
+ ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6
+ ; GFX7-NEXT: {{ $}}
+ ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX7-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6
+ ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORDX16_SGPR:%[0-9]+]]:sgpr_512 = S_BUFFER_LOAD_DWORDX16_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (s512), align 4)
+ ; GFX7-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub0
+ ; GFX7-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub1
+ ; GFX7-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub2
+ ; GFX7-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub3
+ ; GFX7-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub4
+ ; GFX7-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub5
+ ; GFX7-NEXT: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub6
+ ; GFX7-NEXT: [[COPY12:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub7
+ ; GFX7-NEXT: [[COPY13:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub8
+ ; GFX7-NEXT: [[COPY14:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub9
+ ; GFX7-NEXT: [[COPY15:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub10
+ ; GFX7-NEXT: [[COPY16:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub11
+ ; GFX7-NEXT: [[COPY17:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub12
+ ; GFX7-NEXT: [[COPY18:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub13
+ ; GFX7-NEXT: [[COPY19:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub14
+ ; GFX7-NEXT: [[COPY20:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub15
+ ; GFX7-NEXT: [[COPY21:%[0-9]+]]:vgpr_32 = COPY [[COPY5]]
+ ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY21]], implicit $exec
+ ; GFX7-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
+ ; GFX7-NEXT: [[COPY22:%[0-9]+]]:vgpr_32 = COPY [[COPY6]]
+ ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY22]], implicit $exec
+ ; GFX7-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
+ ; GFX7-NEXT: [[COPY23:%[0-9]+]]:vgpr_32 = COPY [[COPY7]]
+ ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY23]], implicit $exec
+ ; GFX7-NEXT: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]]
+ ; GFX7-NEXT: [[COPY24:%[0-9]+]]:vgpr_32 = COPY [[COPY8]]
+ ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY24]], implicit $exec
+ ; GFX7-NEXT: $sgpr3 = COPY [[V_READFIRSTLANE_B32_3]]
+ ; GFX7-NEXT: [[COPY25:%[0-9]+]]:vgpr_32 = COPY [[COPY9]]
+ ; GFX7-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY25]], implicit $exec
+ ; GFX7-NEXT: $sgpr4 = COPY [[V_READFIRSTLANE_B32_4]]
+ ; GFX7-NEXT: [[COPY26:%[0-9]+]]:vgpr_32 = COPY [[COPY10]]
+ ; GFX7-NEXT: [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY26]], implicit $exec
+ ; GFX7-NEXT: $sgpr5 = COPY [[V_READFIRSTLANE_B32_5]]
+ ; GFX7-NEXT: [[COPY27:%[0-9]+]]:vgpr_32 = COPY [[COPY11]]
+ ; GFX7-NEXT: [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY27]], implicit $exec
+ ; GFX7-NEXT: $sgpr6 = COPY [[V_READFIRSTLANE_B32_6]]
+ ; GFX7-NEXT: [[COPY28:%[0-9]+]]:vgpr_32 = COPY [[COPY12]]
+ ; GFX7-NEXT: [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY28]], implicit $exec
+ ; GFX7-NEXT: $sgpr7 = COPY [[V_READFIRSTLANE_B32_7]]
+ ; GFX7-NEXT: [[COPY29:%[0-9]+]]:vgpr_32 = COPY [[COPY13]]
+ ; GFX7-NEXT: [[V_READFIRSTLANE_B32_8:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY29]], implicit $exec
+ ; GFX7-NEXT: $sgpr8 = COPY [[V_READFIRSTLANE_B32_8]]
+ ; GFX7-NEXT: [[COPY30:%[0-9]+]]:vgpr_32 = COPY [[COPY14]]
+ ; GFX7-NEXT: [[V_READFIRSTLANE_B32_9:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY30]], implicit $exec
+ ; GFX7-NEXT: $sgpr9 = COPY [[V_READFIRSTLANE_B32_9]]
+ ; GFX7-NEXT: [[COPY31:%[0-9]+]]:vgpr_32 = COPY [[COPY15]]
+ ; GFX7-NEXT: [[V_READFIRSTLANE_B32_10:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY31]], implicit $exec
+ ; GFX7-NEXT: $sgpr10 = COPY [[V_READFIRSTLANE_B32_10]]
+ ; GFX7-NEXT: [[COPY32:%[0-9]+]]:vgpr_32 = COPY [[COPY16]]
+ ; GFX7-NEXT: [[V_READFIRSTLANE_B32_11:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY32]], implicit $exec
+ ; GFX7-NEXT: $sgpr11 = COPY [[V_READFIRSTLANE_B32_11]]
+ ; GFX7-NEXT: [[COPY33:%[0-9]+]]:vgpr_32 = COPY [[COPY17]]
+ ; GFX7-NEXT: [[V_READFIRSTLANE_B32_12:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY33]], implicit $exec
+ ; GFX7-NEXT: $sgpr12 = COPY [[V_READFIRSTLANE_B32_12]]
+ ; GFX7-NEXT: [[COPY34:%[0-9]+]]:vgpr_32 = COPY [[COPY18]]
+ ; GFX7-NEXT: [[V_READFIRSTLANE_B32_13:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY34]], implicit $exec
+ ; GFX7-NEXT: $sgpr13 = COPY [[V_READFIRSTLANE_B32_13]]
+ ; GFX7-NEXT: [[COPY35:%[0-9]+]]:vgpr_32 = COPY [[COPY19]]
+ ; GFX7-NEXT: [[V_READFIRSTLANE_B32_14:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY35]], implicit $exec
+ ; GFX7-NEXT: $sgpr14 = COPY [[V_READFIRSTLANE_B32_14]]
+ ; GFX7-NEXT: [[COPY36:%[0-9]+]]:vgpr_32 = COPY [[COPY20]]
+ ; GFX7-NEXT: [[V_READFIRSTLANE_B32_15:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY36]], implicit $exec
+ ; GFX7-NEXT: $sgpr15 = COPY [[V_READFIRSTLANE_B32_15]]
+ ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15
; GFX8-LABEL: name: s_buffer_load_v16i32
; GFX8: bb.1 (%ir-block.0):
- ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6
- ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
- ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
- ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
- ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
- ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
- ; GFX8: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; GFX8: [[S_BUFFER_LOAD_DWORDX16_SGPR:%[0-9]+]]:sgpr_512 = S_BUFFER_LOAD_DWORDX16_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (s512), align 4)
- ; GFX8: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub0
- ; GFX8: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub1
- ; GFX8: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub2
- ; GFX8: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub3
- ; GFX8: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub4
- ; GFX8: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub5
- ; GFX8: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub6
- ; GFX8: [[COPY12:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub7
- ; GFX8: [[COPY13:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub8
- ; GFX8: [[COPY14:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub9
- ; GFX8: [[COPY15:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub10
- ; GFX8: [[COPY16:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub11
- ; GFX8: [[COPY17:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub12
- ; GFX8: [[COPY18:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub13
- ; GFX8: [[COPY19:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub14
- ; GFX8: [[COPY20:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub15
- ; GFX8: [[COPY21:%[0-9]+]]:vgpr_32 = COPY [[COPY5]]
- ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY21]], implicit $exec
- ; GFX8: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
- ; GFX8: [[COPY22:%[0-9]+]]:vgpr_32 = COPY [[COPY6]]
- ; GFX8: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY22]], implicit $exec
- ; GFX8: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
- ; GFX8: [[COPY23:%[0-9]+]]:vgpr_32 = COPY [[COPY7]]
- ; GFX8: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY23]], implicit $exec
- ; GFX8: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]]
- ; GFX8: [[COPY24:%[0-9]+]]:vgpr_32 = COPY [[COPY8]]
- ; GFX8: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY24]], implicit $exec
- ; GFX8: $sgpr3 = COPY [[V_READFIRSTLANE_B32_3]]
- ; GFX8: [[COPY25:%[0-9]+]]:vgpr_32 = COPY [[COPY9]]
- ; GFX8: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY25]], implicit $exec
- ; GFX8: $sgpr4 = COPY [[V_READFIRSTLANE_B32_4]]
- ; GFX8: [[COPY26:%[0-9]+]]:vgpr_32 = COPY [[COPY10]]
- ; GFX8: [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY26]], implicit $exec
- ; GFX8: $sgpr5 = COPY [[V_READFIRSTLANE_B32_5]]
- ; GFX8: [[COPY27:%[0-9]+]]:vgpr_32 = COPY [[COPY11]]
- ; GFX8: [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY27]], implicit $exec
- ; GFX8: $sgpr6 = COPY [[V_READFIRSTLANE_B32_6]]
- ; GFX8: [[COPY28:%[0-9]+]]:vgpr_32 = COPY [[COPY12]]
- ; GFX8: [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY28]], implicit $exec
- ; GFX8: $sgpr7 = COPY [[V_READFIRSTLANE_B32_7]]
- ; GFX8: [[COPY29:%[0-9]+]]:vgpr_32 = COPY [[COPY13]]
- ; GFX8: [[V_READFIRSTLANE_B32_8:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY29]], implicit $exec
- ; GFX8: $sgpr8 = COPY [[V_READFIRSTLANE_B32_8]]
- ; GFX8: [[COPY30:%[0-9]+]]:vgpr_32 = COPY [[COPY14]]
- ; GFX8: [[V_READFIRSTLANE_B32_9:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY30]], implicit $exec
- ; GFX8: $sgpr9 = COPY [[V_READFIRSTLANE_B32_9]]
- ; GFX8: [[COPY31:%[0-9]+]]:vgpr_32 = COPY [[COPY15]]
- ; GFX8: [[V_READFIRSTLANE_B32_10:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY31]], implicit $exec
- ; GFX8: $sgpr10 = COPY [[V_READFIRSTLANE_B32_10]]
- ; GFX8: [[COPY32:%[0-9]+]]:vgpr_32 = COPY [[COPY16]]
- ; GFX8: [[V_READFIRSTLANE_B32_11:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY32]], implicit $exec
- ; GFX8: $sgpr11 = COPY [[V_READFIRSTLANE_B32_11]]
- ; GFX8: [[COPY33:%[0-9]+]]:vgpr_32 = COPY [[COPY17]]
- ; GFX8: [[V_READFIRSTLANE_B32_12:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY33]], implicit $exec
- ; GFX8: $sgpr12 = COPY [[V_READFIRSTLANE_B32_12]]
- ; GFX8: [[COPY34:%[0-9]+]]:vgpr_32 = COPY [[COPY18]]
- ; GFX8: [[V_READFIRSTLANE_B32_13:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY34]], implicit $exec
- ; GFX8: $sgpr13 = COPY [[V_READFIRSTLANE_B32_13]]
- ; GFX8: [[COPY35:%[0-9]+]]:vgpr_32 = COPY [[COPY19]]
- ; GFX8: [[V_READFIRSTLANE_B32_14:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY35]], implicit $exec
- ; GFX8: $sgpr14 = COPY [[V_READFIRSTLANE_B32_14]]
- ; GFX8: [[COPY36:%[0-9]+]]:vgpr_32 = COPY [[COPY20]]
- ; GFX8: [[V_READFIRSTLANE_B32_15:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY36]], implicit $exec
- ; GFX8: $sgpr15 = COPY [[V_READFIRSTLANE_B32_15]]
- ; GFX8: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15
+ ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6
+ ; GFX8-NEXT: {{ $}}
+ ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX8-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6
+ ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORDX16_SGPR:%[0-9]+]]:sgpr_512 = S_BUFFER_LOAD_DWORDX16_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (s512), align 4)
+ ; GFX8-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub0
+ ; GFX8-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub1
+ ; GFX8-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub2
+ ; GFX8-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub3
+ ; GFX8-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub4
+ ; GFX8-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub5
+ ; GFX8-NEXT: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub6
+ ; GFX8-NEXT: [[COPY12:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub7
+ ; GFX8-NEXT: [[COPY13:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub8
+ ; GFX8-NEXT: [[COPY14:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub9
+ ; GFX8-NEXT: [[COPY15:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub10
+ ; GFX8-NEXT: [[COPY16:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub11
+ ; GFX8-NEXT: [[COPY17:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub12
+ ; GFX8-NEXT: [[COPY18:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub13
+ ; GFX8-NEXT: [[COPY19:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub14
+ ; GFX8-NEXT: [[COPY20:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub15
+ ; GFX8-NEXT: [[COPY21:%[0-9]+]]:vgpr_32 = COPY [[COPY5]]
+ ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY21]], implicit $exec
+ ; GFX8-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
+ ; GFX8-NEXT: [[COPY22:%[0-9]+]]:vgpr_32 = COPY [[COPY6]]
+ ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY22]], implicit $exec
+ ; GFX8-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
+ ; GFX8-NEXT: [[COPY23:%[0-9]+]]:vgpr_32 = COPY [[COPY7]]
+ ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY23]], implicit $exec
+ ; GFX8-NEXT: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]]
+ ; GFX8-NEXT: [[COPY24:%[0-9]+]]:vgpr_32 = COPY [[COPY8]]
+ ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY24]], implicit $exec
+ ; GFX8-NEXT: $sgpr3 = COPY [[V_READFIRSTLANE_B32_3]]
+ ; GFX8-NEXT: [[COPY25:%[0-9]+]]:vgpr_32 = COPY [[COPY9]]
+ ; GFX8-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY25]], implicit $exec
+ ; GFX8-NEXT: $sgpr4 = COPY [[V_READFIRSTLANE_B32_4]]
+ ; GFX8-NEXT: [[COPY26:%[0-9]+]]:vgpr_32 = COPY [[COPY10]]
+ ; GFX8-NEXT: [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY26]], implicit $exec
+ ; GFX8-NEXT: $sgpr5 = COPY [[V_READFIRSTLANE_B32_5]]
+ ; GFX8-NEXT: [[COPY27:%[0-9]+]]:vgpr_32 = COPY [[COPY11]]
+ ; GFX8-NEXT: [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY27]], implicit $exec
+ ; GFX8-NEXT: $sgpr6 = COPY [[V_READFIRSTLANE_B32_6]]
+ ; GFX8-NEXT: [[COPY28:%[0-9]+]]:vgpr_32 = COPY [[COPY12]]
+ ; GFX8-NEXT: [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY28]], implicit $exec
+ ; GFX8-NEXT: $sgpr7 = COPY [[V_READFIRSTLANE_B32_7]]
+ ; GFX8-NEXT: [[COPY29:%[0-9]+]]:vgpr_32 = COPY [[COPY13]]
+ ; GFX8-NEXT: [[V_READFIRSTLANE_B32_8:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY29]], implicit $exec
+ ; GFX8-NEXT: $sgpr8 = COPY [[V_READFIRSTLANE_B32_8]]
+ ; GFX8-NEXT: [[COPY30:%[0-9]+]]:vgpr_32 = COPY [[COPY14]]
+ ; GFX8-NEXT: [[V_READFIRSTLANE_B32_9:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY30]], implicit $exec
+ ; GFX8-NEXT: $sgpr9 = COPY [[V_READFIRSTLANE_B32_9]]
+ ; GFX8-NEXT: [[COPY31:%[0-9]+]]:vgpr_32 = COPY [[COPY15]]
+ ; GFX8-NEXT: [[V_READFIRSTLANE_B32_10:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY31]], implicit $exec
+ ; GFX8-NEXT: $sgpr10 = COPY [[V_READFIRSTLANE_B32_10]]
+ ; GFX8-NEXT: [[COPY32:%[0-9]+]]:vgpr_32 = COPY [[COPY16]]
+ ; GFX8-NEXT: [[V_READFIRSTLANE_B32_11:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY32]], implicit $exec
+ ; GFX8-NEXT: $sgpr11 = COPY [[V_READFIRSTLANE_B32_11]]
+ ; GFX8-NEXT: [[COPY33:%[0-9]+]]:vgpr_32 = COPY [[COPY17]]
+ ; GFX8-NEXT: [[V_READFIRSTLANE_B32_12:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY33]], implicit $exec
+ ; GFX8-NEXT: $sgpr12 = COPY [[V_READFIRSTLANE_B32_12]]
+ ; GFX8-NEXT: [[COPY34:%[0-9]+]]:vgpr_32 = COPY [[COPY18]]
+ ; GFX8-NEXT: [[V_READFIRSTLANE_B32_13:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY34]], implicit $exec
+ ; GFX8-NEXT: $sgpr13 = COPY [[V_READFIRSTLANE_B32_13]]
+ ; GFX8-NEXT: [[COPY35:%[0-9]+]]:vgpr_32 = COPY [[COPY19]]
+ ; GFX8-NEXT: [[V_READFIRSTLANE_B32_14:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY35]], implicit $exec
+ ; GFX8-NEXT: $sgpr14 = COPY [[V_READFIRSTLANE_B32_14]]
+ ; GFX8-NEXT: [[COPY36:%[0-9]+]]:vgpr_32 = COPY [[COPY20]]
+ ; GFX8-NEXT: [[V_READFIRSTLANE_B32_15:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY36]], implicit $exec
+ ; GFX8-NEXT: $sgpr15 = COPY [[V_READFIRSTLANE_B32_15]]
+ ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15
%val = call <16 x i32> @llvm.amdgcn.s.buffer.load.v16i32(<4 x i32> %rsrc, i32 %soffset, i32 0)
ret <16 x i32> %val
}
@@ -615,45 +624,48 @@ define amdgpu_ps <16 x i32> @s_buffer_load_v16i32(<4 x i32> inreg %rsrc, i32 inr
define amdgpu_ps i32 @s_buffer_load_i32_offset_1(<4 x i32> inreg %rsrc) {
; GFX6-LABEL: name: s_buffer_load_i32_offset_1
; GFX6: bb.1 (%ir-block.0):
- ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
- ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
- ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
- ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
- ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
- ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
- ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1
- ; GFX6: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32))
- ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
- ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
- ; GFX6: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
- ; GFX6: SI_RETURN_TO_EPILOG implicit $sgpr0
+ ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
+ ; GFX6-NEXT: {{ $}}
+ ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1
+ ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32))
+ ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
+ ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
+ ; GFX6-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
+ ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0
; GFX7-LABEL: name: s_buffer_load_i32_offset_1
; GFX7: bb.1 (%ir-block.0):
- ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
- ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
- ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
- ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
- ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
- ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
- ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1
- ; GFX7: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32))
- ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
- ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
- ; GFX7: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
- ; GFX7: SI_RETURN_TO_EPILOG implicit $sgpr0
+ ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
+ ; GFX7-NEXT: {{ $}}
+ ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1
+ ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32))
+ ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
+ ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
+ ; GFX7-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
+ ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0
; GFX8-LABEL: name: s_buffer_load_i32_offset_1
; GFX8: bb.1 (%ir-block.0):
- ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
- ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
- ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
- ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
- ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
- ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
- ; GFX8: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 1, 0 :: (dereferenceable invariant load (s32))
- ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]]
- ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
- ; GFX8: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
- ; GFX8: SI_RETURN_TO_EPILOG implicit $sgpr0
+ ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
+ ; GFX8-NEXT: {{ $}}
+ ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 1, 0 :: (dereferenceable invariant load (s32))
+ ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]]
+ ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
+ ; GFX8-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
+ ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0
%val = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %rsrc, i32 1, i32 0)
ret i32 %val
}
@@ -661,43 +673,46 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_1(<4 x i32> inreg %rsrc) {
define amdgpu_ps i32 @s_buffer_load_i32_offset_glc_4(<4 x i32> inreg %rsrc) {
; GFX6-LABEL: name: s_buffer_load_i32_offset_glc_4
; GFX6: bb.1 (%ir-block.0):
- ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
- ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
- ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
- ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
- ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
- ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
- ; GFX6: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 1, 1 :: (dereferenceable invariant load (s32))
- ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]]
- ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
- ; GFX6: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
- ; GFX6: SI_RETURN_TO_EPILOG implicit $sgpr0
+ ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
+ ; GFX6-NEXT: {{ $}}
+ ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 1, 1 :: (dereferenceable invariant load (s32))
+ ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]]
+ ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
+ ; GFX6-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
+ ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0
; GFX7-LABEL: name: s_buffer_load_i32_offset_glc_4
; GFX7: bb.1 (%ir-block.0):
- ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
- ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
- ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
- ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
- ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
- ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
- ; GFX7: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 1, 1 :: (dereferenceable invariant load (s32))
- ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]]
- ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
- ; GFX7: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
- ; GFX7: SI_RETURN_TO_EPILOG implicit $sgpr0
+ ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
+ ; GFX7-NEXT: {{ $}}
+ ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 1, 1 :: (dereferenceable invariant load (s32))
+ ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]]
+ ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
+ ; GFX7-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
+ ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0
; GFX8-LABEL: name: s_buffer_load_i32_offset_glc_4
; GFX8: bb.1 (%ir-block.0):
- ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
- ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
- ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
- ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
- ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
- ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
- ; GFX8: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 4, 1 :: (dereferenceable invariant load (s32))
- ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]]
- ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
- ; GFX8: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
- ; GFX8: SI_RETURN_TO_EPILOG implicit $sgpr0
+ ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
+ ; GFX8-NEXT: {{ $}}
+ ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 4, 1 :: (dereferenceable invariant load (s32))
+ ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]]
+ ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
+ ; GFX8-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
+ ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0
%val = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %rsrc, i32 4, i32 1)
ret i32 %val
}
@@ -705,45 +720,48 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_glc_4(<4 x i32> inreg %rsrc) {
define amdgpu_ps i32 @s_buffer_load_i32_offset_255(<4 x i32> inreg %rsrc) {
; GFX6-LABEL: name: s_buffer_load_i32_offset_255
; GFX6: bb.1 (%ir-block.0):
- ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
- ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
- ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
- ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
- ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
- ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
- ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 255
- ; GFX6: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32))
- ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
- ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
- ; GFX6: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
- ; GFX6: SI_RETURN_TO_EPILOG implicit $sgpr0
+ ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
+ ; GFX6-NEXT: {{ $}}
+ ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 255
+ ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32))
+ ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
+ ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
+ ; GFX6-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
+ ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0
; GFX7-LABEL: name: s_buffer_load_i32_offset_255
; GFX7: bb.1 (%ir-block.0):
- ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
- ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
- ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
- ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
- ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
- ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
- ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 255
- ; GFX7: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32))
- ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
- ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
- ; GFX7: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
- ; GFX7: SI_RETURN_TO_EPILOG implicit $sgpr0
+ ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
+ ; GFX7-NEXT: {{ $}}
+ ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 255
+ ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32))
+ ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
+ ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
+ ; GFX7-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
+ ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0
; GFX8-LABEL: name: s_buffer_load_i32_offset_255
; GFX8: bb.1 (%ir-block.0):
- ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
- ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
- ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
- ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
- ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
- ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
- ; GFX8: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 255, 0 :: (dereferenceable invariant load (s32))
- ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]]
- ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
- ; GFX8: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
- ; GFX8: SI_RETURN_TO_EPILOG implicit $sgpr0
+ ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
+ ; GFX8-NEXT: {{ $}}
+ ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 255, 0 :: (dereferenceable invariant load (s32))
+ ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]]
+ ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
+ ; GFX8-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
+ ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0
%val = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %rsrc, i32 255, i32 0)
ret i32 %val
}
@@ -751,43 +769,46 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_255(<4 x i32> inreg %rsrc) {
define amdgpu_ps i32 @s_buffer_load_i32_offset_256(<4 x i32> inreg %rsrc) {
; GFX6-LABEL: name: s_buffer_load_i32_offset_256
; GFX6: bb.1 (%ir-block.0):
- ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
- ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
- ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
- ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
- ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
- ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
- ; GFX6: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 64, 0 :: (dereferenceable invariant load (s32))
- ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]]
- ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
- ; GFX6: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
- ; GFX6: SI_RETURN_TO_EPILOG implicit $sgpr0
+ ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
+ ; GFX6-NEXT: {{ $}}
+ ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 64, 0 :: (dereferenceable invariant load (s32))
+ ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]]
+ ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
+ ; GFX6-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
+ ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0
; GFX7-LABEL: name: s_buffer_load_i32_offset_256
; GFX7: bb.1 (%ir-block.0):
- ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
- ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
- ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
- ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
- ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
- ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
- ; GFX7: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 64, 0 :: (dereferenceable invariant load (s32))
- ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]]
- ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
- ; GFX7: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
- ; GFX7: SI_RETURN_TO_EPILOG implicit $sgpr0
+ ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
+ ; GFX7-NEXT: {{ $}}
+ ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 64, 0 :: (dereferenceable invariant load (s32))
+ ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]]
+ ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
+ ; GFX7-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
+ ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0
; GFX8-LABEL: name: s_buffer_load_i32_offset_256
; GFX8: bb.1 (%ir-block.0):
- ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
- ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
- ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
- ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
- ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
- ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
- ; GFX8: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 256, 0 :: (dereferenceable invariant load (s32))
- ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]]
- ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
- ; GFX8: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
- ; GFX8: SI_RETURN_TO_EPILOG implicit $sgpr0
+ ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
+ ; GFX8-NEXT: {{ $}}
+ ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 256, 0 :: (dereferenceable invariant load (s32))
+ ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]]
+ ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
+ ; GFX8-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
+ ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0
%val = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %rsrc, i32 256, i32 0)
ret i32 %val
}
@@ -795,43 +816,46 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_256(<4 x i32> inreg %rsrc) {
define amdgpu_ps i32 @s_buffer_load_i32_offset_1020(<4 x i32> inreg %rsrc) {
; GFX6-LABEL: name: s_buffer_load_i32_offset_1020
; GFX6: bb.1 (%ir-block.0):
- ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
- ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
- ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
- ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
- ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
- ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
- ; GFX6: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 255, 0 :: (dereferenceable invariant load (s32))
- ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]]
- ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
- ; GFX6: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
- ; GFX6: SI_RETURN_TO_EPILOG implicit $sgpr0
+ ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
+ ; GFX6-NEXT: {{ $}}
+ ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 255, 0 :: (dereferenceable invariant load (s32))
+ ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]]
+ ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
+ ; GFX6-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
+ ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0
; GFX7-LABEL: name: s_buffer_load_i32_offset_1020
; GFX7: bb.1 (%ir-block.0):
- ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
- ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
- ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
- ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
- ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
- ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
- ; GFX7: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 255, 0 :: (dereferenceable invariant load (s32))
- ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]]
- ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
- ; GFX7: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
- ; GFX7: SI_RETURN_TO_EPILOG implicit $sgpr0
+ ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
+ ; GFX7-NEXT: {{ $}}
+ ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 255, 0 :: (dereferenceable invariant load (s32))
+ ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]]
+ ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
+ ; GFX7-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
+ ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0
; GFX8-LABEL: name: s_buffer_load_i32_offset_1020
; GFX8: bb.1 (%ir-block.0):
- ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
- ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
- ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
- ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
- ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
- ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
- ; GFX8: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 1020, 0 :: (dereferenceable invariant load (s32))
- ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]]
- ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
- ; GFX8: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
- ; GFX8: SI_RETURN_TO_EPILOG implicit $sgpr0
+ ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
+ ; GFX8-NEXT: {{ $}}
+ ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 1020, 0 :: (dereferenceable invariant load (s32))
+ ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]]
+ ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
+ ; GFX8-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
+ ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0
%val = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %rsrc, i32 1020, i32 0)
ret i32 %val
}
@@ -839,45 +863,48 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_1020(<4 x i32> inreg %rsrc) {
define amdgpu_ps i32 @s_buffer_load_i32_offset_1023(<4 x i32> inreg %rsrc) {
; GFX6-LABEL: name: s_buffer_load_i32_offset_1023
; GFX6: bb.1 (%ir-block.0):
- ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
- ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
- ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
- ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
- ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
- ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
- ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1023
- ; GFX6: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32))
- ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
- ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
- ; GFX6: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
- ; GFX6: SI_RETURN_TO_EPILOG implicit $sgpr0
+ ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
+ ; GFX6-NEXT: {{ $}}
+ ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1023
+ ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32))
+ ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
+ ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
+ ; GFX6-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
+ ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0
; GFX7-LABEL: name: s_buffer_load_i32_offset_1023
; GFX7: bb.1 (%ir-block.0):
- ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
- ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
- ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
- ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
- ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
- ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
- ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1023
- ; GFX7: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32))
- ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
- ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
- ; GFX7: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
- ; GFX7: SI_RETURN_TO_EPILOG implicit $sgpr0
+ ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
+ ; GFX7-NEXT: {{ $}}
+ ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1023
+ ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32))
+ ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
+ ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
+ ; GFX7-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
+ ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0
; GFX8-LABEL: name: s_buffer_load_i32_offset_1023
; GFX8: bb.1 (%ir-block.0):
- ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
- ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
- ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
- ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
- ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
- ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
- ; GFX8: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 1023, 0 :: (dereferenceable invariant load (s32))
- ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]]
- ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
- ; GFX8: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
- ; GFX8: SI_RETURN_TO_EPILOG implicit $sgpr0
+ ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
+ ; GFX8-NEXT: {{ $}}
+ ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 1023, 0 :: (dereferenceable invariant load (s32))
+ ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]]
+ ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
+ ; GFX8-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
+ ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0
%val = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %rsrc, i32 1023, i32 0)
ret i32 %val
}
@@ -885,44 +912,47 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_1023(<4 x i32> inreg %rsrc) {
define amdgpu_ps i32 @s_buffer_load_i32_offset_1024(<4 x i32> inreg %rsrc) {
; GFX6-LABEL: name: s_buffer_load_i32_offset_1024
; GFX6: bb.1 (%ir-block.0):
- ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
- ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
- ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
- ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
- ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
- ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
- ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1024
- ; GFX6: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32))
- ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
- ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
- ; GFX6: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
- ; GFX6: SI_RETURN_TO_EPILOG implicit $sgpr0
+ ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
+ ; GFX6-NEXT: {{ $}}
+ ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1024
+ ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32))
+ ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
+ ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
+ ; GFX6-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
+ ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0
; GFX7-LABEL: name: s_buffer_load_i32_offset_1024
; GFX7: bb.1 (%ir-block.0):
- ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
- ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
- ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
- ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
- ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
- ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
- ; GFX7: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 256, 0 :: (dereferenceable invariant load (s32))
- ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]]
- ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
- ; GFX7: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
- ; GFX7: SI_RETURN_TO_EPILOG implicit $sgpr0
+ ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
+ ; GFX7-NEXT: {{ $}}
+ ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 256, 0 :: (dereferenceable invariant load (s32))
+ ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]]
+ ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
+ ; GFX7-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
+ ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0
; GFX8-LABEL: name: s_buffer_load_i32_offset_1024
; GFX8: bb.1 (%ir-block.0):
- ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
- ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
- ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
- ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
- ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
- ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
- ; GFX8: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 1024, 0 :: (dereferenceable invariant load (s32))
- ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]]
- ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
- ; GFX8: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
- ; GFX8: SI_RETURN_TO_EPILOG implicit $sgpr0
+ ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
+ ; GFX8-NEXT: {{ $}}
+ ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 1024, 0 :: (dereferenceable invariant load (s32))
+ ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]]
+ ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
+ ; GFX8-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
+ ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0
%val = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %rsrc, i32 1024, i32 0)
ret i32 %val
}
@@ -930,45 +960,48 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_1024(<4 x i32> inreg %rsrc) {
define amdgpu_ps i32 @s_buffer_load_i32_offset_1025(<4 x i32> inreg %rsrc) {
; GFX6-LABEL: name: s_buffer_load_i32_offset_1025
; GFX6: bb.1 (%ir-block.0):
- ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
- ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
- ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
- ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
- ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
- ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
- ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1025
- ; GFX6: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32))
- ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
- ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
- ; GFX6: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
- ; GFX6: SI_RETURN_TO_EPILOG implicit $sgpr0
+ ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
+ ; GFX6-NEXT: {{ $}}
+ ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1025
+ ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32))
+ ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
+ ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
+ ; GFX6-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
+ ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0
; GFX7-LABEL: name: s_buffer_load_i32_offset_1025
; GFX7: bb.1 (%ir-block.0):
- ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
- ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
- ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
- ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
- ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
- ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
- ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1025
- ; GFX7: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32))
- ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
- ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
- ; GFX7: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
- ; GFX7: SI_RETURN_TO_EPILOG implicit $sgpr0
+ ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
+ ; GFX7-NEXT: {{ $}}
+ ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1025
+ ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32))
+ ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
+ ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
+ ; GFX7-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
+ ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0
; GFX8-LABEL: name: s_buffer_load_i32_offset_1025
; GFX8: bb.1 (%ir-block.0):
- ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
- ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
- ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
- ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
- ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
- ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
- ; GFX8: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 1025, 0 :: (dereferenceable invariant load (s32))
- ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]]
- ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
- ; GFX8: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
- ; GFX8: SI_RETURN_TO_EPILOG implicit $sgpr0
+ ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
+ ; GFX8-NEXT: {{ $}}
+ ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 1025, 0 :: (dereferenceable invariant load (s32))
+ ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]]
+ ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
+ ; GFX8-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
+ ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0
%val = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %rsrc, i32 1025, i32 0)
ret i32 %val
}
@@ -976,46 +1009,49 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_1025(<4 x i32> inreg %rsrc) {
define amdgpu_ps i32 @s_buffer_load_i32_offset_neg1(<4 x i32> inreg %desc) {
; GFX6-LABEL: name: s_buffer_load_i32_offset_neg1
; GFX6: bb.1 (%ir-block.0):
- ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
- ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
- ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
- ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
- ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
- ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
- ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -1
- ; GFX6: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32))
- ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
- ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
- ; GFX6: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
- ; GFX6: SI_RETURN_TO_EPILOG implicit $sgpr0
+ ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
+ ; GFX6-NEXT: {{ $}}
+ ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -1
+ ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32))
+ ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
+ ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
+ ; GFX6-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
+ ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0
; GFX7-LABEL: name: s_buffer_load_i32_offset_neg1
; GFX7: bb.1 (%ir-block.0):
- ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
- ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
- ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
- ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
- ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
- ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
- ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -1
- ; GFX7: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32))
- ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
- ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
- ; GFX7: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
- ; GFX7: SI_RETURN_TO_EPILOG implicit $sgpr0
+ ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
+ ; GFX7-NEXT: {{ $}}
+ ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -1
+ ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32))
+ ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
+ ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
+ ; GFX7-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
+ ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0
; GFX8-LABEL: name: s_buffer_load_i32_offset_neg1
; GFX8: bb.1 (%ir-block.0):
- ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
- ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
- ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
- ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
- ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
- ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
- ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -1
- ; GFX8: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32))
- ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
- ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
- ; GFX8: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
- ; GFX8: SI_RETURN_TO_EPILOG implicit $sgpr0
+ ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
+ ; GFX8-NEXT: {{ $}}
+ ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -1
+ ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32))
+ ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
+ ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
+ ; GFX8-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
+ ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0
%load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 -1, i32 0)
ret i32 %load
}
@@ -1023,45 +1059,48 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_neg1(<4 x i32> inreg %desc) {
define amdgpu_ps i32 @s_buffer_load_i32_offset_neg4(<4 x i32> inreg %desc) {
; GFX6-LABEL: name: s_buffer_load_i32_offset_neg4
; GFX6: bb.1 (%ir-block.0):
- ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
- ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
- ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
- ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
- ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
- ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
- ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -4
- ; GFX6: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32))
- ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
- ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
- ; GFX6: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
- ; GFX6: SI_RETURN_TO_EPILOG implicit $sgpr0
+ ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
+ ; GFX6-NEXT: {{ $}}
+ ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -4
+ ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32))
+ ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
+ ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
+ ; GFX6-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
+ ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0
; GFX7-LABEL: name: s_buffer_load_i32_offset_neg4
; GFX7: bb.1 (%ir-block.0):
- ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
- ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
- ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
- ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
- ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
- ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
- ; GFX7: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 1073741823, 0 :: (dereferenceable invariant load (s32))
- ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]]
- ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
- ; GFX7: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
- ; GFX7: SI_RETURN_TO_EPILOG implicit $sgpr0
+ ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
+ ; GFX7-NEXT: {{ $}}
+ ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 1073741823, 0 :: (dereferenceable invariant load (s32))
+ ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]]
+ ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
+ ; GFX7-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
+ ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0
; GFX8-LABEL: name: s_buffer_load_i32_offset_neg4
; GFX8: bb.1 (%ir-block.0):
- ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
- ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
- ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
- ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
- ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
- ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
- ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -4
- ; GFX8: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32))
- ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
- ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
- ; GFX8: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
- ; GFX8: SI_RETURN_TO_EPILOG implicit $sgpr0
+ ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
+ ; GFX8-NEXT: {{ $}}
+ ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -4
+ ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32))
+ ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
+ ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
+ ; GFX8-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
+ ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0
%load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 -4, i32 0)
ret i32 %load
}
@@ -1069,45 +1108,48 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_neg4(<4 x i32> inreg %desc) {
define amdgpu_ps i32 @s_buffer_load_i32_offset_neg8(<4 x i32> inreg %desc) {
; GFX6-LABEL: name: s_buffer_load_i32_offset_neg8
; GFX6: bb.1 (%ir-block.0):
- ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
- ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
- ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
- ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
- ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
- ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
- ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -8
- ; GFX6: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32))
- ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
- ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
- ; GFX6: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
- ; GFX6: SI_RETURN_TO_EPILOG implicit $sgpr0
+ ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
+ ; GFX6-NEXT: {{ $}}
+ ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -8
+ ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32))
+ ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
+ ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
+ ; GFX6-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
+ ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0
; GFX7-LABEL: name: s_buffer_load_i32_offset_neg8
; GFX7: bb.1 (%ir-block.0):
- ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
- ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
- ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
- ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
- ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
- ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
- ; GFX7: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 1073741822, 0 :: (dereferenceable invariant load (s32))
- ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]]
- ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
- ; GFX7: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
- ; GFX7: SI_RETURN_TO_EPILOG implicit $sgpr0
+ ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
+ ; GFX7-NEXT: {{ $}}
+ ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 1073741822, 0 :: (dereferenceable invariant load (s32))
+ ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]]
+ ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
+ ; GFX7-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
+ ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0
; GFX8-LABEL: name: s_buffer_load_i32_offset_neg8
; GFX8: bb.1 (%ir-block.0):
- ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
- ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
- ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
- ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
- ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
- ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
- ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -8
- ; GFX8: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32))
- ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
- ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
- ; GFX8: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
- ; GFX8: SI_RETURN_TO_EPILOG implicit $sgpr0
+ ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
+ ; GFX8-NEXT: {{ $}}
+ ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -8
+ ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32))
+ ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
+ ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
+ ; GFX8-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
+ ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0
%load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 -8, i32 0)
ret i32 %load
}
@@ -1115,45 +1157,48 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_neg8(<4 x i32> inreg %desc) {
define amdgpu_ps i32 @s_buffer_load_i32_offset_bit31(<4 x i32> inreg %desc) {
; GFX6-LABEL: name: s_buffer_load_i32_offset_bit31
; GFX6: bb.1 (%ir-block.0):
- ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
- ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
- ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
- ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
- ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
- ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
- ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -2147483648
- ; GFX6: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32))
- ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
- ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
- ; GFX6: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
- ; GFX6: SI_RETURN_TO_EPILOG implicit $sgpr0
+ ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
+ ; GFX6-NEXT: {{ $}}
+ ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -2147483648
+ ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32))
+ ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
+ ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
+ ; GFX6-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
+ ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0
; GFX7-LABEL: name: s_buffer_load_i32_offset_bit31
; GFX7: bb.1 (%ir-block.0):
- ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
- ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
- ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
- ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
- ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
- ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
- ; GFX7: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 536870912, 0 :: (dereferenceable invariant load (s32))
- ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]]
- ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
- ; GFX7: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
- ; GFX7: SI_RETURN_TO_EPILOG implicit $sgpr0
+ ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
+ ; GFX7-NEXT: {{ $}}
+ ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 536870912, 0 :: (dereferenceable invariant load (s32))
+ ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]]
+ ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
+ ; GFX7-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
+ ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0
; GFX8-LABEL: name: s_buffer_load_i32_offset_bit31
; GFX8: bb.1 (%ir-block.0):
- ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
- ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
- ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
- ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
- ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
- ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
- ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -2147483648
- ; GFX8: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32))
- ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
- ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
- ; GFX8: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
- ; GFX8: SI_RETURN_TO_EPILOG implicit $sgpr0
+ ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
+ ; GFX8-NEXT: {{ $}}
+ ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -2147483648
+ ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32))
+ ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
+ ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
+ ; GFX8-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
+ ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0
%load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 -2147483648, i32 0)
ret i32 %load
}
@@ -1161,45 +1206,48 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_bit31(<4 x i32> inreg %desc) {
define amdgpu_ps i32 @s_buffer_load_i32_offset_glc_bit30(<4 x i32> inreg %desc) {
; GFX6-LABEL: name: s_buffer_load_i32_offset_glc_bit30
; GFX6: bb.1 (%ir-block.0):
- ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
- ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
- ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
- ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
- ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
- ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
- ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1073741824
- ; GFX6: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 1 :: (dereferenceable invariant load (s32))
- ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
- ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
- ; GFX6: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
- ; GFX6: SI_RETURN_TO_EPILOG implicit $sgpr0
+ ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
+ ; GFX6-NEXT: {{ $}}
+ ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1073741824
+ ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 1 :: (dereferenceable invariant load (s32))
+ ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
+ ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
+ ; GFX6-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
+ ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0
; GFX7-LABEL: name: s_buffer_load_i32_offset_glc_bit30
; GFX7: bb.1 (%ir-block.0):
- ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
- ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
- ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
- ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
- ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
- ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
- ; GFX7: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 268435456, 1 :: (dereferenceable invariant load (s32))
- ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]]
- ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
- ; GFX7: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
- ; GFX7: SI_RETURN_TO_EPILOG implicit $sgpr0
+ ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
+ ; GFX7-NEXT: {{ $}}
+ ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 268435456, 1 :: (dereferenceable invariant load (s32))
+ ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]]
+ ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
+ ; GFX7-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
+ ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0
; GFX8-LABEL: name: s_buffer_load_i32_offset_glc_bit30
; GFX8: bb.1 (%ir-block.0):
- ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
- ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
- ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
- ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
- ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
- ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
- ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1073741824
- ; GFX8: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 1 :: (dereferenceable invariant load (s32))
- ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
- ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
- ; GFX8: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
- ; GFX8: SI_RETURN_TO_EPILOG implicit $sgpr0
+ ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
+ ; GFX8-NEXT: {{ $}}
+ ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1073741824
+ ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 1 :: (dereferenceable invariant load (s32))
+ ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
+ ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
+ ; GFX8-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
+ ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0
%load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 1073741824, i32 1)
ret i32 %load
}
@@ -1207,45 +1255,48 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_glc_bit30(<4 x i32> inreg %desc)
define amdgpu_ps i32 @s_buffer_load_i32_offset_bit29(<4 x i32> inreg %desc) {
; GFX6-LABEL: name: s_buffer_load_i32_offset_bit29
; GFX6: bb.1 (%ir-block.0):
- ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
- ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
- ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
- ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
- ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
- ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
- ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 536870912
- ; GFX6: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32))
- ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
- ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
- ; GFX6: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
- ; GFX6: SI_RETURN_TO_EPILOG implicit $sgpr0
+ ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
+ ; GFX6-NEXT: {{ $}}
+ ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 536870912
+ ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32))
+ ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
+ ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
+ ; GFX6-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
+ ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0
; GFX7-LABEL: name: s_buffer_load_i32_offset_bit29
; GFX7: bb.1 (%ir-block.0):
- ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
- ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
- ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
- ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
- ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
- ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
- ; GFX7: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 134217728, 0 :: (dereferenceable invariant load (s32))
- ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]]
- ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
- ; GFX7: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
- ; GFX7: SI_RETURN_TO_EPILOG implicit $sgpr0
+ ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
+ ; GFX7-NEXT: {{ $}}
+ ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 134217728, 0 :: (dereferenceable invariant load (s32))
+ ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]]
+ ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
+ ; GFX7-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
+ ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0
; GFX8-LABEL: name: s_buffer_load_i32_offset_bit29
; GFX8: bb.1 (%ir-block.0):
- ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
- ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
- ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
- ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
- ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
- ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
- ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 536870912
- ; GFX8: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32))
- ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
- ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
- ; GFX8: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
- ; GFX8: SI_RETURN_TO_EPILOG implicit $sgpr0
+ ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
+ ; GFX8-NEXT: {{ $}}
+ ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 536870912
+ ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32))
+ ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
+ ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
+ ; GFX8-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
+ ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0
%load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 536870912, i32 0)
ret i32 %load
}
@@ -1253,45 +1304,48 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_bit29(<4 x i32> inreg %desc) {
define amdgpu_ps i32 @s_buffer_load_i32_offset_bit21(<4 x i32> inreg %desc) {
; GFX6-LABEL: name: s_buffer_load_i32_offset_bit21
; GFX6: bb.1 (%ir-block.0):
- ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
- ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
- ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
- ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
- ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
- ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
- ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2097152
- ; GFX6: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32))
- ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
- ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
- ; GFX6: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
- ; GFX6: SI_RETURN_TO_EPILOG implicit $sgpr0
+ ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
+ ; GFX6-NEXT: {{ $}}
+ ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2097152
+ ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32))
+ ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
+ ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
+ ; GFX6-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
+ ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0
; GFX7-LABEL: name: s_buffer_load_i32_offset_bit21
; GFX7: bb.1 (%ir-block.0):
- ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
- ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
- ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
- ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
- ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
- ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
- ; GFX7: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 524288, 0 :: (dereferenceable invariant load (s32))
- ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]]
- ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
- ; GFX7: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
- ; GFX7: SI_RETURN_TO_EPILOG implicit $sgpr0
+ ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
+ ; GFX7-NEXT: {{ $}}
+ ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 524288, 0 :: (dereferenceable invariant load (s32))
+ ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]]
+ ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
+ ; GFX7-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
+ ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0
; GFX8-LABEL: name: s_buffer_load_i32_offset_bit21
; GFX8: bb.1 (%ir-block.0):
- ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
- ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
- ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
- ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
- ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
- ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
- ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2097152
- ; GFX8: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32))
- ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
- ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
- ; GFX8: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
- ; GFX8: SI_RETURN_TO_EPILOG implicit $sgpr0
+ ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
+ ; GFX8-NEXT: {{ $}}
+ ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2097152
+ ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32))
+ ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
+ ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
+ ; GFX8-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
+ ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0
%load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 2097152, i32 0)
ret i32 %load
}
@@ -1299,45 +1353,48 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_bit21(<4 x i32> inreg %desc) {
define amdgpu_ps i32 @s_buffer_load_i32_offset_bit20(<4 x i32> inreg %desc) {
; GFX6-LABEL: name: s_buffer_load_i32_offset_bit20
; GFX6: bb.1 (%ir-block.0):
- ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
- ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
- ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
- ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
- ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
- ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
- ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1048576
- ; GFX6: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32))
- ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
- ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
- ; GFX6: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
- ; GFX6: SI_RETURN_TO_EPILOG implicit $sgpr0
+ ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
+ ; GFX6-NEXT: {{ $}}
+ ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1048576
+ ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32))
+ ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
+ ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
+ ; GFX6-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
+ ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0
; GFX7-LABEL: name: s_buffer_load_i32_offset_bit20
; GFX7: bb.1 (%ir-block.0):
- ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
- ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
- ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
- ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
- ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
- ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
- ; GFX7: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 262144, 0 :: (dereferenceable invariant load (s32))
- ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]]
- ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
- ; GFX7: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
- ; GFX7: SI_RETURN_TO_EPILOG implicit $sgpr0
+ ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
+ ; GFX7-NEXT: {{ $}}
+ ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 262144, 0 :: (dereferenceable invariant load (s32))
+ ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]]
+ ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
+ ; GFX7-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
+ ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0
; GFX8-LABEL: name: s_buffer_load_i32_offset_bit20
; GFX8: bb.1 (%ir-block.0):
- ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
- ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
- ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
- ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
- ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
- ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
- ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1048576
- ; GFX8: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32))
- ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
- ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
- ; GFX8: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
- ; GFX8: SI_RETURN_TO_EPILOG implicit $sgpr0
+ ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
+ ; GFX8-NEXT: {{ $}}
+ ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1048576
+ ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32))
+ ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
+ ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
+ ; GFX8-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
+ ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0
%load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 1048576, i32 0)
ret i32 %load
}
@@ -1345,45 +1402,48 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_bit20(<4 x i32> inreg %desc) {
define amdgpu_ps i32 @s_buffer_load_i32_offset_neg_bit20(<4 x i32> inreg %desc) {
; GFX6-LABEL: name: s_buffer_load_i32_offset_neg_bit20
; GFX6: bb.1 (%ir-block.0):
- ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
- ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
- ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
- ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
- ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
- ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
- ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -1048576
- ; GFX6: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32))
- ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
- ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
- ; GFX6: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
- ; GFX6: SI_RETURN_TO_EPILOG implicit $sgpr0
+ ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
+ ; GFX6-NEXT: {{ $}}
+ ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -1048576
+ ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32))
+ ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
+ ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
+ ; GFX6-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
+ ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0
; GFX7-LABEL: name: s_buffer_load_i32_offset_neg_bit20
; GFX7: bb.1 (%ir-block.0):
- ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
- ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
- ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
- ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
- ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
- ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
- ; GFX7: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 1073479680, 0 :: (dereferenceable invariant load (s32))
- ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]]
- ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
- ; GFX7: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
- ; GFX7: SI_RETURN_TO_EPILOG implicit $sgpr0
+ ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
+ ; GFX7-NEXT: {{ $}}
+ ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 1073479680, 0 :: (dereferenceable invariant load (s32))
+ ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]]
+ ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
+ ; GFX7-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
+ ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0
; GFX8-LABEL: name: s_buffer_load_i32_offset_neg_bit20
; GFX8: bb.1 (%ir-block.0):
- ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
- ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
- ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
- ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
- ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
- ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
- ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -1048576
- ; GFX8: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32))
- ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
- ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
- ; GFX8: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
- ; GFX8: SI_RETURN_TO_EPILOG implicit $sgpr0
+ ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
+ ; GFX8-NEXT: {{ $}}
+ ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -1048576
+ ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32))
+ ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
+ ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
+ ; GFX8-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
+ ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0
%load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 -1048576, i32 0)
ret i32 %load
}
@@ -1391,44 +1451,47 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_neg_bit20(<4 x i32> inreg %desc)
define amdgpu_ps i32 @s_buffer_load_i32_offset_bit19(<4 x i32> inreg %desc) {
; GFX6-LABEL: name: s_buffer_load_i32_offset_bit19
; GFX6: bb.1 (%ir-block.0):
- ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
- ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
- ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
- ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
- ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
- ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
- ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 524288
- ; GFX6: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32))
- ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
- ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
- ; GFX6: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
- ; GFX6: SI_RETURN_TO_EPILOG implicit $sgpr0
+ ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
+ ; GFX6-NEXT: {{ $}}
+ ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 524288
+ ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32))
+ ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
+ ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
+ ; GFX6-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
+ ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0
; GFX7-LABEL: name: s_buffer_load_i32_offset_bit19
; GFX7: bb.1 (%ir-block.0):
- ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
- ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
- ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
- ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
- ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
- ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
- ; GFX7: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 131072, 0 :: (dereferenceable invariant load (s32))
- ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]]
- ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
- ; GFX7: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
- ; GFX7: SI_RETURN_TO_EPILOG implicit $sgpr0
+ ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
+ ; GFX7-NEXT: {{ $}}
+ ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 131072, 0 :: (dereferenceable invariant load (s32))
+ ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]]
+ ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
+ ; GFX7-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
+ ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0
; GFX8-LABEL: name: s_buffer_load_i32_offset_bit19
; GFX8: bb.1 (%ir-block.0):
- ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
- ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
- ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
- ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
- ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
- ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
- ; GFX8: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 524288, 0 :: (dereferenceable invariant load (s32))
- ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]]
- ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
- ; GFX8: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
- ; GFX8: SI_RETURN_TO_EPILOG implicit $sgpr0
+ ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
+ ; GFX8-NEXT: {{ $}}
+ ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 524288, 0 :: (dereferenceable invariant load (s32))
+ ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]]
+ ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
+ ; GFX8-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
+ ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0
%load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 524288, i32 0)
ret i32 %load
}
@@ -1436,45 +1499,48 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_bit19(<4 x i32> inreg %desc) {
define amdgpu_ps i32 @s_buffer_load_i32_offset_neg_bit19(<4 x i32> inreg %desc) {
; GFX6-LABEL: name: s_buffer_load_i32_offset_neg_bit19
; GFX6: bb.1 (%ir-block.0):
- ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
- ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
- ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
- ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
- ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
- ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
- ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -524288
- ; GFX6: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32))
- ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
- ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
- ; GFX6: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
- ; GFX6: SI_RETURN_TO_EPILOG implicit $sgpr0
+ ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
+ ; GFX6-NEXT: {{ $}}
+ ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -524288
+ ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32))
+ ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
+ ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
+ ; GFX6-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
+ ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0
; GFX7-LABEL: name: s_buffer_load_i32_offset_neg_bit19
; GFX7: bb.1 (%ir-block.0):
- ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
- ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
- ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
- ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
- ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
- ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
- ; GFX7: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 1073610752, 0 :: (dereferenceable invariant load (s32))
- ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]]
- ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
- ; GFX7: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
- ; GFX7: SI_RETURN_TO_EPILOG implicit $sgpr0
+ ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
+ ; GFX7-NEXT: {{ $}}
+ ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 1073610752, 0 :: (dereferenceable invariant load (s32))
+ ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]]
+ ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
+ ; GFX7-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
+ ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0
; GFX8-LABEL: name: s_buffer_load_i32_offset_neg_bit19
; GFX8: bb.1 (%ir-block.0):
- ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
- ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
- ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
- ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
- ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
- ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
- ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -524288
- ; GFX8: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32))
- ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
- ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
- ; GFX8: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
- ; GFX8: SI_RETURN_TO_EPILOG implicit $sgpr0
+ ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5
+ ; GFX8-NEXT: {{ $}}
+ ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -524288
+ ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32))
+ ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
+ ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
+ ; GFX8-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
+ ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0
%load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 -524288, i32 0)
ret i32 %load
}
@@ -1483,43 +1549,46 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_neg_bit19(<4 x i32> inreg %desc)
define amdgpu_ps float @s_buffer_load_f32_vgpr_offset(<4 x i32> inreg %rsrc, i32 %soffset) {
; GFX6-LABEL: name: s_buffer_load_f32_vgpr_offset
; GFX6: bb.1 (%ir-block.0):
- ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
- ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
- ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
- ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
- ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
- ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
- ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
- ; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32))
- ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
- ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0
+ ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
+ ; GFX6-NEXT: {{ $}}
+ ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32))
+ ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
+ ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
; GFX7-LABEL: name: s_buffer_load_f32_vgpr_offset
; GFX7: bb.1 (%ir-block.0):
- ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
- ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
- ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
- ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
- ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
- ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
- ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
- ; GFX7: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32))
- ; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
- ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0
+ ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
+ ; GFX7-NEXT: {{ $}}
+ ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32))
+ ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
+ ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
; GFX8-LABEL: name: s_buffer_load_f32_vgpr_offset
; GFX8: bb.1 (%ir-block.0):
- ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
- ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
- ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
- ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
- ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
- ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
- ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
- ; GFX8: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32))
- ; GFX8: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
- ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0
+ ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
+ ; GFX8-NEXT: {{ $}}
+ ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32))
+ ; GFX8-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
+ ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
%val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %soffset, i32 0)
ret float %val
}
@@ -1527,52 +1596,55 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_offset(<4 x i32> inreg %rsrc, i32
define amdgpu_ps <2 x float> @s_buffer_load_v2f32_vgpr_offset(<4 x i32> inreg %rsrc, i32 %soffset) {
; GFX6-LABEL: name: s_buffer_load_v2f32_vgpr_offset
; GFX6: bb.1 (%ir-block.0):
- ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
- ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
- ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
- ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
- ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
- ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
- ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
- ; GFX6: [[BUFFER_LOAD_DWORDX2_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s64), align 4)
- ; GFX6: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_OFFEN]].sub0
- ; GFX6: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_OFFEN]].sub1
- ; GFX6: $vgpr0 = COPY [[COPY5]]
- ; GFX6: $vgpr1 = COPY [[COPY6]]
- ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
+ ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
+ ; GFX6-NEXT: {{ $}}
+ ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX2_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s64), align 4)
+ ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_OFFEN]].sub0
+ ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_OFFEN]].sub1
+ ; GFX6-NEXT: $vgpr0 = COPY [[COPY5]]
+ ; GFX6-NEXT: $vgpr1 = COPY [[COPY6]]
+ ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
; GFX7-LABEL: name: s_buffer_load_v2f32_vgpr_offset
; GFX7: bb.1 (%ir-block.0):
- ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
- ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
- ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
- ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
- ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
- ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
- ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
- ; GFX7: [[BUFFER_LOAD_DWORDX2_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s64), align 4)
- ; GFX7: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_OFFEN]].sub0
- ; GFX7: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_OFFEN]].sub1
- ; GFX7: $vgpr0 = COPY [[COPY5]]
- ; GFX7: $vgpr1 = COPY [[COPY6]]
- ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
+ ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
+ ; GFX7-NEXT: {{ $}}
+ ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX2_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s64), align 4)
+ ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_OFFEN]].sub0
+ ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_OFFEN]].sub1
+ ; GFX7-NEXT: $vgpr0 = COPY [[COPY5]]
+ ; GFX7-NEXT: $vgpr1 = COPY [[COPY6]]
+ ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
; GFX8-LABEL: name: s_buffer_load_v2f32_vgpr_offset
; GFX8: bb.1 (%ir-block.0):
- ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
- ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
- ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
- ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
- ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
- ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
- ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
- ; GFX8: [[BUFFER_LOAD_DWORDX2_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s64), align 4)
- ; GFX8: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_OFFEN]].sub0
- ; GFX8: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_OFFEN]].sub1
- ; GFX8: $vgpr0 = COPY [[COPY5]]
- ; GFX8: $vgpr1 = COPY [[COPY6]]
- ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
+ ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
+ ; GFX8-NEXT: {{ $}}
+ ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX2_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s64), align 4)
+ ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_OFFEN]].sub0
+ ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_OFFEN]].sub1
+ ; GFX8-NEXT: $vgpr0 = COPY [[COPY5]]
+ ; GFX8-NEXT: $vgpr1 = COPY [[COPY6]]
+ ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
%val = call <2 x float> @llvm.amdgcn.s.buffer.load.v2f32(<4 x i32> %rsrc, i32 %soffset, i32 0)
ret <2 x float> %val
}
@@ -1580,73 +1652,64 @@ define amdgpu_ps <2 x float> @s_buffer_load_v2f32_vgpr_offset(<4 x i32> inreg %r
define amdgpu_ps <3 x float> @s_buffer_load_v3f32_vgpr_offset(<4 x i32> inreg %rsrc, i32 %soffset) {
; GFX6-LABEL: name: s_buffer_load_v3f32_vgpr_offset
; GFX6: bb.1 (%ir-block.0):
- ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
- ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
- ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
- ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
- ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
- ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
- ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
- ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
- ; GFX6: [[DEF:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF
- ; GFX6: [[COPY5:%[0-9]+]]:vreg_128 = COPY [[DEF]]
- ; GFX6: [[COPY6:%[0-9]+]]:vreg_128 = COPY [[DEF]]
- ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[COPY5]], %subreg.sub4_sub5_sub6_sub7, [[COPY6]], %subreg.sub8_sub9_sub10_sub11
- ; GFX6: [[COPY7:%[0-9]+]]:vreg_96 = COPY [[REG_SEQUENCE1]].sub0_sub1_sub2
- ; GFX6: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY7]].sub0
- ; GFX6: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY7]].sub1
- ; GFX6: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY7]].sub2
- ; GFX6: $vgpr0 = COPY [[COPY8]]
- ; GFX6: $vgpr1 = COPY [[COPY9]]
- ; GFX6: $vgpr2 = COPY [[COPY10]]
- ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2
+ ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
+ ; GFX6-NEXT: {{ $}}
+ ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
+ ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub0
+ ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub1
+ ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub2
+ ; GFX6-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub3
+ ; GFX6-NEXT: $vgpr0 = COPY [[COPY5]]
+ ; GFX6-NEXT: $vgpr1 = COPY [[COPY6]]
+ ; GFX6-NEXT: $vgpr2 = COPY [[COPY7]]
+ ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2
; GFX7-LABEL: name: s_buffer_load_v3f32_vgpr_offset
; GFX7: bb.1 (%ir-block.0):
- ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
- ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
- ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
- ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
- ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
- ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
- ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
- ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
- ; GFX7: [[DEF:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF
- ; GFX7: [[COPY5:%[0-9]+]]:vreg_128 = COPY [[DEF]]
- ; GFX7: [[COPY6:%[0-9]+]]:vreg_128 = COPY [[DEF]]
- ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[COPY5]], %subreg.sub4_sub5_sub6_sub7, [[COPY6]], %subreg.sub8_sub9_sub10_sub11
- ; GFX7: [[COPY7:%[0-9]+]]:vreg_96 = COPY [[REG_SEQUENCE1]].sub0_sub1_sub2
- ; GFX7: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY7]].sub0
- ; GFX7: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY7]].sub1
- ; GFX7: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY7]].sub2
- ; GFX7: $vgpr0 = COPY [[COPY8]]
- ; GFX7: $vgpr1 = COPY [[COPY9]]
- ; GFX7: $vgpr2 = COPY [[COPY10]]
- ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2
+ ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
+ ; GFX7-NEXT: {{ $}}
+ ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
+ ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub0
+ ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub1
+ ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub2
+ ; GFX7-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub3
+ ; GFX7-NEXT: $vgpr0 = COPY [[COPY5]]
+ ; GFX7-NEXT: $vgpr1 = COPY [[COPY6]]
+ ; GFX7-NEXT: $vgpr2 = COPY [[COPY7]]
+ ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2
; GFX8-LABEL: name: s_buffer_load_v3f32_vgpr_offset
; GFX8: bb.1 (%ir-block.0):
- ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
- ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
- ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
- ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
- ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
- ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
- ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
- ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
- ; GFX8: [[DEF:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF
- ; GFX8: [[COPY5:%[0-9]+]]:vreg_128 = COPY [[DEF]]
- ; GFX8: [[COPY6:%[0-9]+]]:vreg_128 = COPY [[DEF]]
- ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[COPY5]], %subreg.sub4_sub5_sub6_sub7, [[COPY6]], %subreg.sub8_sub9_sub10_sub11
- ; GFX8: [[COPY7:%[0-9]+]]:vreg_96 = COPY [[REG_SEQUENCE1]].sub0_sub1_sub2
- ; GFX8: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY7]].sub0
- ; GFX8: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY7]].sub1
- ; GFX8: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY7]].sub2
- ; GFX8: $vgpr0 = COPY [[COPY8]]
- ; GFX8: $vgpr1 = COPY [[COPY9]]
- ; GFX8: $vgpr2 = COPY [[COPY10]]
- ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2
+ ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
+ ; GFX8-NEXT: {{ $}}
+ ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
+ ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub0
+ ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub1
+ ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub2
+ ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub3
+ ; GFX8-NEXT: $vgpr0 = COPY [[COPY5]]
+ ; GFX8-NEXT: $vgpr1 = COPY [[COPY6]]
+ ; GFX8-NEXT: $vgpr2 = COPY [[COPY7]]
+ ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2
%val = call <3 x float> @llvm.amdgcn.s.buffer.load.v3f32(<4 x i32> %rsrc, i32 %soffset, i32 0)
ret <3 x float> %val
}
@@ -1654,64 +1717,67 @@ define amdgpu_ps <3 x float> @s_buffer_load_v3f32_vgpr_offset(<4 x i32> inreg %r
define amdgpu_ps <4 x float> @s_buffer_load_v4f32_vgpr_offset(<4 x i32> inreg %rsrc, i32 %soffset) {
; GFX6-LABEL: name: s_buffer_load_v4f32_vgpr_offset
; GFX6: bb.1 (%ir-block.0):
- ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
- ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
- ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
- ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
- ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
- ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
- ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
- ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
- ; GFX6: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub0
- ; GFX6: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub1
- ; GFX6: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub2
- ; GFX6: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub3
- ; GFX6: $vgpr0 = COPY [[COPY5]]
- ; GFX6: $vgpr1 = COPY [[COPY6]]
- ; GFX6: $vgpr2 = COPY [[COPY7]]
- ; GFX6: $vgpr3 = COPY [[COPY8]]
- ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
+ ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
+ ; GFX6-NEXT: {{ $}}
+ ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
+ ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub0
+ ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub1
+ ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub2
+ ; GFX6-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub3
+ ; GFX6-NEXT: $vgpr0 = COPY [[COPY5]]
+ ; GFX6-NEXT: $vgpr1 = COPY [[COPY6]]
+ ; GFX6-NEXT: $vgpr2 = COPY [[COPY7]]
+ ; GFX6-NEXT: $vgpr3 = COPY [[COPY8]]
+ ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
; GFX7-LABEL: name: s_buffer_load_v4f32_vgpr_offset
; GFX7: bb.1 (%ir-block.0):
- ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
- ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
- ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
- ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
- ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
- ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
- ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
- ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
- ; GFX7: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub0
- ; GFX7: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub1
- ; GFX7: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub2
- ; GFX7: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub3
- ; GFX7: $vgpr0 = COPY [[COPY5]]
- ; GFX7: $vgpr1 = COPY [[COPY6]]
- ; GFX7: $vgpr2 = COPY [[COPY7]]
- ; GFX7: $vgpr3 = COPY [[COPY8]]
- ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
+ ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
+ ; GFX7-NEXT: {{ $}}
+ ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
+ ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub0
+ ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub1
+ ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub2
+ ; GFX7-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub3
+ ; GFX7-NEXT: $vgpr0 = COPY [[COPY5]]
+ ; GFX7-NEXT: $vgpr1 = COPY [[COPY6]]
+ ; GFX7-NEXT: $vgpr2 = COPY [[COPY7]]
+ ; GFX7-NEXT: $vgpr3 = COPY [[COPY8]]
+ ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
; GFX8-LABEL: name: s_buffer_load_v4f32_vgpr_offset
; GFX8: bb.1 (%ir-block.0):
- ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
- ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
- ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
- ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
- ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
- ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
- ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
- ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
- ; GFX8: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub0
- ; GFX8: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub1
- ; GFX8: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub2
- ; GFX8: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub3
- ; GFX8: $vgpr0 = COPY [[COPY5]]
- ; GFX8: $vgpr1 = COPY [[COPY6]]
- ; GFX8: $vgpr2 = COPY [[COPY7]]
- ; GFX8: $vgpr3 = COPY [[COPY8]]
- ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
+ ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
+ ; GFX8-NEXT: {{ $}}
+ ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
+ ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub0
+ ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub1
+ ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub2
+ ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub3
+ ; GFX8-NEXT: $vgpr0 = COPY [[COPY5]]
+ ; GFX8-NEXT: $vgpr1 = COPY [[COPY6]]
+ ; GFX8-NEXT: $vgpr2 = COPY [[COPY7]]
+ ; GFX8-NEXT: $vgpr3 = COPY [[COPY8]]
+ ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
%val = call <4 x float> @llvm.amdgcn.s.buffer.load.v4f32(<4 x i32> %rsrc, i32 %soffset, i32 0)
ret <4 x float> %val
}
@@ -1719,94 +1785,97 @@ define amdgpu_ps <4 x float> @s_buffer_load_v4f32_vgpr_offset(<4 x i32> inreg %r
define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset(<4 x i32> inreg %rsrc, i32 %soffset) {
; GFX6-LABEL: name: s_buffer_load_v8f32_vgpr_offset
; GFX6: bb.1 (%ir-block.0):
- ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
- ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
- ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
- ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
- ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
- ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
- ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
- ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
- ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
- ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7
- ; GFX6: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0
- ; GFX6: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1
- ; GFX6: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2
- ; GFX6: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3
- ; GFX6: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4
- ; GFX6: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5
- ; GFX6: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6
- ; GFX6: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7
- ; GFX6: $vgpr0 = COPY [[COPY5]]
- ; GFX6: $vgpr1 = COPY [[COPY6]]
- ; GFX6: $vgpr2 = COPY [[COPY7]]
- ; GFX6: $vgpr3 = COPY [[COPY8]]
- ; GFX6: $vgpr4 = COPY [[COPY9]]
- ; GFX6: $vgpr5 = COPY [[COPY10]]
- ; GFX6: $vgpr6 = COPY [[COPY11]]
- ; GFX6: $vgpr7 = COPY [[COPY12]]
- ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
+ ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
+ ; GFX6-NEXT: {{ $}}
+ ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
+ ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
+ ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7
+ ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0
+ ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1
+ ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2
+ ; GFX6-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3
+ ; GFX6-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4
+ ; GFX6-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5
+ ; GFX6-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6
+ ; GFX6-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7
+ ; GFX6-NEXT: $vgpr0 = COPY [[COPY5]]
+ ; GFX6-NEXT: $vgpr1 = COPY [[COPY6]]
+ ; GFX6-NEXT: $vgpr2 = COPY [[COPY7]]
+ ; GFX6-NEXT: $vgpr3 = COPY [[COPY8]]
+ ; GFX6-NEXT: $vgpr4 = COPY [[COPY9]]
+ ; GFX6-NEXT: $vgpr5 = COPY [[COPY10]]
+ ; GFX6-NEXT: $vgpr6 = COPY [[COPY11]]
+ ; GFX6-NEXT: $vgpr7 = COPY [[COPY12]]
+ ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
; GFX7-LABEL: name: s_buffer_load_v8f32_vgpr_offset
; GFX7: bb.1 (%ir-block.0):
- ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
- ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
- ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
- ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
- ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
- ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
- ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
- ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
- ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
- ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7
- ; GFX7: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0
- ; GFX7: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1
- ; GFX7: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2
- ; GFX7: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3
- ; GFX7: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4
- ; GFX7: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5
- ; GFX7: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6
- ; GFX7: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7
- ; GFX7: $vgpr0 = COPY [[COPY5]]
- ; GFX7: $vgpr1 = COPY [[COPY6]]
- ; GFX7: $vgpr2 = COPY [[COPY7]]
- ; GFX7: $vgpr3 = COPY [[COPY8]]
- ; GFX7: $vgpr4 = COPY [[COPY9]]
- ; GFX7: $vgpr5 = COPY [[COPY10]]
- ; GFX7: $vgpr6 = COPY [[COPY11]]
- ; GFX7: $vgpr7 = COPY [[COPY12]]
- ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
+ ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
+ ; GFX7-NEXT: {{ $}}
+ ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
+ ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
+ ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7
+ ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0
+ ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1
+ ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2
+ ; GFX7-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3
+ ; GFX7-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4
+ ; GFX7-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5
+ ; GFX7-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6
+ ; GFX7-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7
+ ; GFX7-NEXT: $vgpr0 = COPY [[COPY5]]
+ ; GFX7-NEXT: $vgpr1 = COPY [[COPY6]]
+ ; GFX7-NEXT: $vgpr2 = COPY [[COPY7]]
+ ; GFX7-NEXT: $vgpr3 = COPY [[COPY8]]
+ ; GFX7-NEXT: $vgpr4 = COPY [[COPY9]]
+ ; GFX7-NEXT: $vgpr5 = COPY [[COPY10]]
+ ; GFX7-NEXT: $vgpr6 = COPY [[COPY11]]
+ ; GFX7-NEXT: $vgpr7 = COPY [[COPY12]]
+ ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
; GFX8-LABEL: name: s_buffer_load_v8f32_vgpr_offset
; GFX8: bb.1 (%ir-block.0):
- ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
- ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
- ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
- ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
- ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
- ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
- ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
- ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
- ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
- ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7
- ; GFX8: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0
- ; GFX8: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1
- ; GFX8: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2
- ; GFX8: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3
- ; GFX8: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4
- ; GFX8: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5
- ; GFX8: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6
- ; GFX8: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7
- ; GFX8: $vgpr0 = COPY [[COPY5]]
- ; GFX8: $vgpr1 = COPY [[COPY6]]
- ; GFX8: $vgpr2 = COPY [[COPY7]]
- ; GFX8: $vgpr3 = COPY [[COPY8]]
- ; GFX8: $vgpr4 = COPY [[COPY9]]
- ; GFX8: $vgpr5 = COPY [[COPY10]]
- ; GFX8: $vgpr6 = COPY [[COPY11]]
- ; GFX8: $vgpr7 = COPY [[COPY12]]
- ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
+ ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
+ ; GFX8-NEXT: {{ $}}
+ ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
+ ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
+ ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7
+ ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0
+ ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1
+ ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2
+ ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3
+ ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4
+ ; GFX8-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5
+ ; GFX8-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6
+ ; GFX8-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7
+ ; GFX8-NEXT: $vgpr0 = COPY [[COPY5]]
+ ; GFX8-NEXT: $vgpr1 = COPY [[COPY6]]
+ ; GFX8-NEXT: $vgpr2 = COPY [[COPY7]]
+ ; GFX8-NEXT: $vgpr3 = COPY [[COPY8]]
+ ; GFX8-NEXT: $vgpr4 = COPY [[COPY9]]
+ ; GFX8-NEXT: $vgpr5 = COPY [[COPY10]]
+ ; GFX8-NEXT: $vgpr6 = COPY [[COPY11]]
+ ; GFX8-NEXT: $vgpr7 = COPY [[COPY12]]
+ ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
%val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0)
ret <8 x float> %val
}
@@ -1814,148 +1883,151 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset(<4 x i32> inreg %r
define amdgpu_ps <16 x float> @s_buffer_load_v16f32_vgpr_offset(<4 x i32> inreg %rsrc, i32 %soffset) {
; GFX6-LABEL: name: s_buffer_load_v16f32_vgpr_offset
; GFX6: bb.1 (%ir-block.0):
- ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
- ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
- ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
- ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
- ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
- ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
- ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
- ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
- ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
- ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 32, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4)
- ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 48, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4)
- ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7, [[BUFFER_LOAD_DWORDX4_OFFEN2]], %subreg.sub8_sub9_sub10_sub11, [[BUFFER_LOAD_DWORDX4_OFFEN3]], %subreg.sub12_sub13_sub14_sub15
- ; GFX6: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0
- ; GFX6: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1
- ; GFX6: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2
- ; GFX6: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3
- ; GFX6: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4
- ; GFX6: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5
- ; GFX6: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6
- ; GFX6: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7
- ; GFX6: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub8
- ; GFX6: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub9
- ; GFX6: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub10
- ; GFX6: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub11
- ; GFX6: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub12
- ; GFX6: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub13
- ; GFX6: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub14
- ; GFX6: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub15
- ; GFX6: $vgpr0 = COPY [[COPY5]]
- ; GFX6: $vgpr1 = COPY [[COPY6]]
- ; GFX6: $vgpr2 = COPY [[COPY7]]
- ; GFX6: $vgpr3 = COPY [[COPY8]]
- ; GFX6: $vgpr4 = COPY [[COPY9]]
- ; GFX6: $vgpr5 = COPY [[COPY10]]
- ; GFX6: $vgpr6 = COPY [[COPY11]]
- ; GFX6: $vgpr7 = COPY [[COPY12]]
- ; GFX6: $vgpr8 = COPY [[COPY13]]
- ; GFX6: $vgpr9 = COPY [[COPY14]]
- ; GFX6: $vgpr10 = COPY [[COPY15]]
- ; GFX6: $vgpr11 = COPY [[COPY16]]
- ; GFX6: $vgpr12 = COPY [[COPY17]]
- ; GFX6: $vgpr13 = COPY [[COPY18]]
- ; GFX6: $vgpr14 = COPY [[COPY19]]
- ; GFX6: $vgpr15 = COPY [[COPY20]]
- ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15
+ ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
+ ; GFX6-NEXT: {{ $}}
+ ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
+ ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
+ ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 32, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4)
+ ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 48, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4)
+ ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7, [[BUFFER_LOAD_DWORDX4_OFFEN2]], %subreg.sub8_sub9_sub10_sub11, [[BUFFER_LOAD_DWORDX4_OFFEN3]], %subreg.sub12_sub13_sub14_sub15
+ ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0
+ ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1
+ ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2
+ ; GFX6-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3
+ ; GFX6-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4
+ ; GFX6-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5
+ ; GFX6-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6
+ ; GFX6-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7
+ ; GFX6-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub8
+ ; GFX6-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub9
+ ; GFX6-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub10
+ ; GFX6-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub11
+ ; GFX6-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub12
+ ; GFX6-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub13
+ ; GFX6-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub14
+ ; GFX6-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub15
+ ; GFX6-NEXT: $vgpr0 = COPY [[COPY5]]
+ ; GFX6-NEXT: $vgpr1 = COPY [[COPY6]]
+ ; GFX6-NEXT: $vgpr2 = COPY [[COPY7]]
+ ; GFX6-NEXT: $vgpr3 = COPY [[COPY8]]
+ ; GFX6-NEXT: $vgpr4 = COPY [[COPY9]]
+ ; GFX6-NEXT: $vgpr5 = COPY [[COPY10]]
+ ; GFX6-NEXT: $vgpr6 = COPY [[COPY11]]
+ ; GFX6-NEXT: $vgpr7 = COPY [[COPY12]]
+ ; GFX6-NEXT: $vgpr8 = COPY [[COPY13]]
+ ; GFX6-NEXT: $vgpr9 = COPY [[COPY14]]
+ ; GFX6-NEXT: $vgpr10 = COPY [[COPY15]]
+ ; GFX6-NEXT: $vgpr11 = COPY [[COPY16]]
+ ; GFX6-NEXT: $vgpr12 = COPY [[COPY17]]
+ ; GFX6-NEXT: $vgpr13 = COPY [[COPY18]]
+ ; GFX6-NEXT: $vgpr14 = COPY [[COPY19]]
+ ; GFX6-NEXT: $vgpr15 = COPY [[COPY20]]
+ ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15
; GFX7-LABEL: name: s_buffer_load_v16f32_vgpr_offset
; GFX7: bb.1 (%ir-block.0):
- ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
- ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
- ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
- ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
- ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
- ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
- ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
- ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
- ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
- ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 32, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4)
- ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 48, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4)
- ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7, [[BUFFER_LOAD_DWORDX4_OFFEN2]], %subreg.sub8_sub9_sub10_sub11, [[BUFFER_LOAD_DWORDX4_OFFEN3]], %subreg.sub12_sub13_sub14_sub15
- ; GFX7: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0
- ; GFX7: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1
- ; GFX7: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2
- ; GFX7: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3
- ; GFX7: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4
- ; GFX7: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5
- ; GFX7: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6
- ; GFX7: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7
- ; GFX7: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub8
- ; GFX7: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub9
- ; GFX7: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub10
- ; GFX7: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub11
- ; GFX7: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub12
- ; GFX7: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub13
- ; GFX7: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub14
- ; GFX7: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub15
- ; GFX7: $vgpr0 = COPY [[COPY5]]
- ; GFX7: $vgpr1 = COPY [[COPY6]]
- ; GFX7: $vgpr2 = COPY [[COPY7]]
- ; GFX7: $vgpr3 = COPY [[COPY8]]
- ; GFX7: $vgpr4 = COPY [[COPY9]]
- ; GFX7: $vgpr5 = COPY [[COPY10]]
- ; GFX7: $vgpr6 = COPY [[COPY11]]
- ; GFX7: $vgpr7 = COPY [[COPY12]]
- ; GFX7: $vgpr8 = COPY [[COPY13]]
- ; GFX7: $vgpr9 = COPY [[COPY14]]
- ; GFX7: $vgpr10 = COPY [[COPY15]]
- ; GFX7: $vgpr11 = COPY [[COPY16]]
- ; GFX7: $vgpr12 = COPY [[COPY17]]
- ; GFX7: $vgpr13 = COPY [[COPY18]]
- ; GFX7: $vgpr14 = COPY [[COPY19]]
- ; GFX7: $vgpr15 = COPY [[COPY20]]
- ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15
+ ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
+ ; GFX7-NEXT: {{ $}}
+ ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
+ ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
+ ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 32, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4)
+ ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 48, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4)
+ ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7, [[BUFFER_LOAD_DWORDX4_OFFEN2]], %subreg.sub8_sub9_sub10_sub11, [[BUFFER_LOAD_DWORDX4_OFFEN3]], %subreg.sub12_sub13_sub14_sub15
+ ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0
+ ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1
+ ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2
+ ; GFX7-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3
+ ; GFX7-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4
+ ; GFX7-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5
+ ; GFX7-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6
+ ; GFX7-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7
+ ; GFX7-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub8
+ ; GFX7-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub9
+ ; GFX7-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub10
+ ; GFX7-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub11
+ ; GFX7-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub12
+ ; GFX7-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub13
+ ; GFX7-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub14
+ ; GFX7-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub15
+ ; GFX7-NEXT: $vgpr0 = COPY [[COPY5]]
+ ; GFX7-NEXT: $vgpr1 = COPY [[COPY6]]
+ ; GFX7-NEXT: $vgpr2 = COPY [[COPY7]]
+ ; GFX7-NEXT: $vgpr3 = COPY [[COPY8]]
+ ; GFX7-NEXT: $vgpr4 = COPY [[COPY9]]
+ ; GFX7-NEXT: $vgpr5 = COPY [[COPY10]]
+ ; GFX7-NEXT: $vgpr6 = COPY [[COPY11]]
+ ; GFX7-NEXT: $vgpr7 = COPY [[COPY12]]
+ ; GFX7-NEXT: $vgpr8 = COPY [[COPY13]]
+ ; GFX7-NEXT: $vgpr9 = COPY [[COPY14]]
+ ; GFX7-NEXT: $vgpr10 = COPY [[COPY15]]
+ ; GFX7-NEXT: $vgpr11 = COPY [[COPY16]]
+ ; GFX7-NEXT: $vgpr12 = COPY [[COPY17]]
+ ; GFX7-NEXT: $vgpr13 = COPY [[COPY18]]
+ ; GFX7-NEXT: $vgpr14 = COPY [[COPY19]]
+ ; GFX7-NEXT: $vgpr15 = COPY [[COPY20]]
+ ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15
; GFX8-LABEL: name: s_buffer_load_v16f32_vgpr_offset
; GFX8: bb.1 (%ir-block.0):
- ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
- ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
- ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
- ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
- ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
- ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
- ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
- ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
- ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
- ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 32, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4)
- ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 48, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4)
- ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7, [[BUFFER_LOAD_DWORDX4_OFFEN2]], %subreg.sub8_sub9_sub10_sub11, [[BUFFER_LOAD_DWORDX4_OFFEN3]], %subreg.sub12_sub13_sub14_sub15
- ; GFX8: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0
- ; GFX8: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1
- ; GFX8: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2
- ; GFX8: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3
- ; GFX8: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4
- ; GFX8: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5
- ; GFX8: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6
- ; GFX8: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7
- ; GFX8: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub8
- ; GFX8: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub9
- ; GFX8: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub10
- ; GFX8: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub11
- ; GFX8: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub12
- ; GFX8: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub13
- ; GFX8: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub14
- ; GFX8: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub15
- ; GFX8: $vgpr0 = COPY [[COPY5]]
- ; GFX8: $vgpr1 = COPY [[COPY6]]
- ; GFX8: $vgpr2 = COPY [[COPY7]]
- ; GFX8: $vgpr3 = COPY [[COPY8]]
- ; GFX8: $vgpr4 = COPY [[COPY9]]
- ; GFX8: $vgpr5 = COPY [[COPY10]]
- ; GFX8: $vgpr6 = COPY [[COPY11]]
- ; GFX8: $vgpr7 = COPY [[COPY12]]
- ; GFX8: $vgpr8 = COPY [[COPY13]]
- ; GFX8: $vgpr9 = COPY [[COPY14]]
- ; GFX8: $vgpr10 = COPY [[COPY15]]
- ; GFX8: $vgpr11 = COPY [[COPY16]]
- ; GFX8: $vgpr12 = COPY [[COPY17]]
- ; GFX8: $vgpr13 = COPY [[COPY18]]
- ; GFX8: $vgpr14 = COPY [[COPY19]]
- ; GFX8: $vgpr15 = COPY [[COPY20]]
- ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15
+ ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
+ ; GFX8-NEXT: {{ $}}
+ ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
+ ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
+ ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 32, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4)
+ ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 48, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4)
+ ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7, [[BUFFER_LOAD_DWORDX4_OFFEN2]], %subreg.sub8_sub9_sub10_sub11, [[BUFFER_LOAD_DWORDX4_OFFEN3]], %subreg.sub12_sub13_sub14_sub15
+ ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0
+ ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1
+ ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2
+ ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3
+ ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4
+ ; GFX8-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5
+ ; GFX8-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6
+ ; GFX8-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7
+ ; GFX8-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub8
+ ; GFX8-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub9
+ ; GFX8-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub10
+ ; GFX8-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub11
+ ; GFX8-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub12
+ ; GFX8-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub13
+ ; GFX8-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub14
+ ; GFX8-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub15
+ ; GFX8-NEXT: $vgpr0 = COPY [[COPY5]]
+ ; GFX8-NEXT: $vgpr1 = COPY [[COPY6]]
+ ; GFX8-NEXT: $vgpr2 = COPY [[COPY7]]
+ ; GFX8-NEXT: $vgpr3 = COPY [[COPY8]]
+ ; GFX8-NEXT: $vgpr4 = COPY [[COPY9]]
+ ; GFX8-NEXT: $vgpr5 = COPY [[COPY10]]
+ ; GFX8-NEXT: $vgpr6 = COPY [[COPY11]]
+ ; GFX8-NEXT: $vgpr7 = COPY [[COPY12]]
+ ; GFX8-NEXT: $vgpr8 = COPY [[COPY13]]
+ ; GFX8-NEXT: $vgpr9 = COPY [[COPY14]]
+ ; GFX8-NEXT: $vgpr10 = COPY [[COPY15]]
+ ; GFX8-NEXT: $vgpr11 = COPY [[COPY16]]
+ ; GFX8-NEXT: $vgpr12 = COPY [[COPY17]]
+ ; GFX8-NEXT: $vgpr13 = COPY [[COPY18]]
+ ; GFX8-NEXT: $vgpr14 = COPY [[COPY19]]
+ ; GFX8-NEXT: $vgpr15 = COPY [[COPY20]]
+ ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15
%val = call <16 x float> @llvm.amdgcn.s.buffer.load.v16f32(<4 x i32> %rsrc, i32 %soffset, i32 0)
ret <16 x float> %val
}
@@ -1963,43 +2035,46 @@ define amdgpu_ps <16 x float> @s_buffer_load_v16f32_vgpr_offset(<4 x i32> inreg
define amdgpu_ps float @s_buffer_load_f32_vgpr_offset_add_4092(<4 x i32> inreg %rsrc, i32 %soffset.base) {
; GFX6-LABEL: name: s_buffer_load_f32_vgpr_offset_add_4092
; GFX6: bb.1 (%ir-block.0):
- ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
- ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
- ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
- ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
- ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
- ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
- ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
- ; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4092, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32))
- ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
- ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0
+ ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
+ ; GFX6-NEXT: {{ $}}
+ ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4092, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32))
+ ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
+ ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
; GFX7-LABEL: name: s_buffer_load_f32_vgpr_offset_add_4092
; GFX7: bb.1 (%ir-block.0):
- ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
- ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
- ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
- ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
- ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
- ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
- ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
- ; GFX7: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4092, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32))
- ; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
- ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0
+ ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
+ ; GFX7-NEXT: {{ $}}
+ ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4092, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32))
+ ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
+ ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
; GFX8-LABEL: name: s_buffer_load_f32_vgpr_offset_add_4092
; GFX8: bb.1 (%ir-block.0):
- ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
- ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
- ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
- ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
- ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
- ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
- ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
- ; GFX8: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4092, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32))
- ; GFX8: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
- ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0
+ ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
+ ; GFX8-NEXT: {{ $}}
+ ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4092, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32))
+ ; GFX8-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
+ ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
%soffset = add i32 %soffset.base, 4092
%val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %soffset, i32 0)
ret float %val
@@ -2008,43 +2083,46 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_offset_add_4092(<4 x i32> inreg %
define amdgpu_ps float @s_buffer_load_f32_vgpr_offset_add_4095(<4 x i32> inreg %rsrc, i32 %soffset.base) {
; GFX6-LABEL: name: s_buffer_load_f32_vgpr_offset_add_4095
; GFX6: bb.1 (%ir-block.0):
- ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
- ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
- ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
- ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
- ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
- ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
- ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
- ; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32))
- ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
- ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0
+ ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
+ ; GFX6-NEXT: {{ $}}
+ ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32))
+ ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
+ ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
; GFX7-LABEL: name: s_buffer_load_f32_vgpr_offset_add_4095
; GFX7: bb.1 (%ir-block.0):
- ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
- ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
- ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
- ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
- ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
- ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
- ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
- ; GFX7: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32))
- ; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
- ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0
+ ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
+ ; GFX7-NEXT: {{ $}}
+ ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32))
+ ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
+ ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
; GFX8-LABEL: name: s_buffer_load_f32_vgpr_offset_add_4095
; GFX8: bb.1 (%ir-block.0):
- ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
- ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
- ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
- ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
- ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
- ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
- ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
- ; GFX8: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32))
- ; GFX8: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
- ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0
+ ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
+ ; GFX8-NEXT: {{ $}}
+ ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32))
+ ; GFX8-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
+ ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
%soffset = add i32 %soffset.base, 4095
%val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %soffset, i32 0)
ret float %val
@@ -2053,43 +2131,46 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_offset_add_4095(<4 x i32> inreg %
define amdgpu_ps float @s_buffer_load_f32_vgpr_offset_add_4096(<4 x i32> inreg %rsrc, i32 %soffset.base) {
; GFX6-LABEL: name: s_buffer_load_f32_vgpr_offset_add_4096
; GFX6: bb.1 (%ir-block.0):
- ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
- ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
- ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
- ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
- ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
- ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
- ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096
- ; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32))
- ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
- ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0
+ ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
+ ; GFX6-NEXT: {{ $}}
+ ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096
+ ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32))
+ ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
+ ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
; GFX7-LABEL: name: s_buffer_load_f32_vgpr_offset_add_4096
; GFX7: bb.1 (%ir-block.0):
- ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
- ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
- ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
- ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
- ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
- ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
- ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096
- ; GFX7: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32))
- ; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
- ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0
+ ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
+ ; GFX7-NEXT: {{ $}}
+ ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096
+ ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32))
+ ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
+ ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
; GFX8-LABEL: name: s_buffer_load_f32_vgpr_offset_add_4096
; GFX8: bb.1 (%ir-block.0):
- ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
- ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
- ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
- ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
- ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
- ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
- ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1
- ; GFX8: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32))
- ; GFX8: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
- ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0
+ ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
+ ; GFX8-NEXT: {{ $}}
+ ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1
+ ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32))
+ ; GFX8-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
+ ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
%soffset = add i32 %soffset.base, 4096
%val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %soffset, i32 0)
ret float %val
@@ -2099,94 +2180,97 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_offset_add_4096(<4 x i32> inreg %
define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_add_4064(<4 x i32> inreg %rsrc, i32 %soffset.base) {
; GFX6-LABEL: name: s_buffer_load_v8f32_vgpr_offset_add_4064
; GFX6: bb.1 (%ir-block.0):
- ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
- ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
- ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
- ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
- ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
- ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
- ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
- ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
- ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
- ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7
- ; GFX6: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0
- ; GFX6: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1
- ; GFX6: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2
- ; GFX6: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3
- ; GFX6: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4
- ; GFX6: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5
- ; GFX6: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6
- ; GFX6: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7
- ; GFX6: $vgpr0 = COPY [[COPY5]]
- ; GFX6: $vgpr1 = COPY [[COPY6]]
- ; GFX6: $vgpr2 = COPY [[COPY7]]
- ; GFX6: $vgpr3 = COPY [[COPY8]]
- ; GFX6: $vgpr4 = COPY [[COPY9]]
- ; GFX6: $vgpr5 = COPY [[COPY10]]
- ; GFX6: $vgpr6 = COPY [[COPY11]]
- ; GFX6: $vgpr7 = COPY [[COPY12]]
- ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
+ ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
+ ; GFX6-NEXT: {{ $}}
+ ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
+ ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
+ ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7
+ ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0
+ ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1
+ ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2
+ ; GFX6-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3
+ ; GFX6-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4
+ ; GFX6-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5
+ ; GFX6-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6
+ ; GFX6-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7
+ ; GFX6-NEXT: $vgpr0 = COPY [[COPY5]]
+ ; GFX6-NEXT: $vgpr1 = COPY [[COPY6]]
+ ; GFX6-NEXT: $vgpr2 = COPY [[COPY7]]
+ ; GFX6-NEXT: $vgpr3 = COPY [[COPY8]]
+ ; GFX6-NEXT: $vgpr4 = COPY [[COPY9]]
+ ; GFX6-NEXT: $vgpr5 = COPY [[COPY10]]
+ ; GFX6-NEXT: $vgpr6 = COPY [[COPY11]]
+ ; GFX6-NEXT: $vgpr7 = COPY [[COPY12]]
+ ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
; GFX7-LABEL: name: s_buffer_load_v8f32_vgpr_offset_add_4064
; GFX7: bb.1 (%ir-block.0):
- ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
- ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
- ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
- ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
- ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
- ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
- ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
- ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
- ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
- ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7
- ; GFX7: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0
- ; GFX7: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1
- ; GFX7: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2
- ; GFX7: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3
- ; GFX7: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4
- ; GFX7: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5
- ; GFX7: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6
- ; GFX7: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7
- ; GFX7: $vgpr0 = COPY [[COPY5]]
- ; GFX7: $vgpr1 = COPY [[COPY6]]
- ; GFX7: $vgpr2 = COPY [[COPY7]]
- ; GFX7: $vgpr3 = COPY [[COPY8]]
- ; GFX7: $vgpr4 = COPY [[COPY9]]
- ; GFX7: $vgpr5 = COPY [[COPY10]]
- ; GFX7: $vgpr6 = COPY [[COPY11]]
- ; GFX7: $vgpr7 = COPY [[COPY12]]
- ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
+ ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
+ ; GFX7-NEXT: {{ $}}
+ ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
+ ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
+ ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7
+ ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0
+ ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1
+ ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2
+ ; GFX7-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3
+ ; GFX7-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4
+ ; GFX7-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5
+ ; GFX7-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6
+ ; GFX7-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7
+ ; GFX7-NEXT: $vgpr0 = COPY [[COPY5]]
+ ; GFX7-NEXT: $vgpr1 = COPY [[COPY6]]
+ ; GFX7-NEXT: $vgpr2 = COPY [[COPY7]]
+ ; GFX7-NEXT: $vgpr3 = COPY [[COPY8]]
+ ; GFX7-NEXT: $vgpr4 = COPY [[COPY9]]
+ ; GFX7-NEXT: $vgpr5 = COPY [[COPY10]]
+ ; GFX7-NEXT: $vgpr6 = COPY [[COPY11]]
+ ; GFX7-NEXT: $vgpr7 = COPY [[COPY12]]
+ ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
; GFX8-LABEL: name: s_buffer_load_v8f32_vgpr_offset_add_4064
; GFX8: bb.1 (%ir-block.0):
- ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
- ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
- ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
- ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
- ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
- ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
- ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
- ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
- ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
- ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7
- ; GFX8: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0
- ; GFX8: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1
- ; GFX8: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2
- ; GFX8: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3
- ; GFX8: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4
- ; GFX8: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5
- ; GFX8: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6
- ; GFX8: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7
- ; GFX8: $vgpr0 = COPY [[COPY5]]
- ; GFX8: $vgpr1 = COPY [[COPY6]]
- ; GFX8: $vgpr2 = COPY [[COPY7]]
- ; GFX8: $vgpr3 = COPY [[COPY8]]
- ; GFX8: $vgpr4 = COPY [[COPY9]]
- ; GFX8: $vgpr5 = COPY [[COPY10]]
- ; GFX8: $vgpr6 = COPY [[COPY11]]
- ; GFX8: $vgpr7 = COPY [[COPY12]]
- ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
+ ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
+ ; GFX8-NEXT: {{ $}}
+ ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
+ ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
+ ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7
+ ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0
+ ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1
+ ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2
+ ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3
+ ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4
+ ; GFX8-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5
+ ; GFX8-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6
+ ; GFX8-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7
+ ; GFX8-NEXT: $vgpr0 = COPY [[COPY5]]
+ ; GFX8-NEXT: $vgpr1 = COPY [[COPY6]]
+ ; GFX8-NEXT: $vgpr2 = COPY [[COPY7]]
+ ; GFX8-NEXT: $vgpr3 = COPY [[COPY8]]
+ ; GFX8-NEXT: $vgpr4 = COPY [[COPY9]]
+ ; GFX8-NEXT: $vgpr5 = COPY [[COPY10]]
+ ; GFX8-NEXT: $vgpr6 = COPY [[COPY11]]
+ ; GFX8-NEXT: $vgpr7 = COPY [[COPY12]]
+ ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
%soffset = add i32 %soffset.base, 4064
%val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0)
ret <8 x float> %val
@@ -2196,94 +2280,97 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_add_4064(<4 x i32>
define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_add_4068(<4 x i32> inreg %rsrc, i32 %soffset.base) {
; GFX6-LABEL: name: s_buffer_load_v8f32_vgpr_offset_add_4068
; GFX6: bb.1 (%ir-block.0):
- ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
- ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
- ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
- ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
- ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
- ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
- ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4068
- ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
- ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
- ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7
- ; GFX6: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0
- ; GFX6: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1
- ; GFX6: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2
- ; GFX6: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3
- ; GFX6: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4
- ; GFX6: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5
- ; GFX6: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6
- ; GFX6: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7
- ; GFX6: $vgpr0 = COPY [[COPY5]]
- ; GFX6: $vgpr1 = COPY [[COPY6]]
- ; GFX6: $vgpr2 = COPY [[COPY7]]
- ; GFX6: $vgpr3 = COPY [[COPY8]]
- ; GFX6: $vgpr4 = COPY [[COPY9]]
- ; GFX6: $vgpr5 = COPY [[COPY10]]
- ; GFX6: $vgpr6 = COPY [[COPY11]]
- ; GFX6: $vgpr7 = COPY [[COPY12]]
- ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
+ ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
+ ; GFX6-NEXT: {{ $}}
+ ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4068
+ ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
+ ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
+ ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7
+ ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0
+ ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1
+ ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2
+ ; GFX6-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3
+ ; GFX6-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4
+ ; GFX6-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5
+ ; GFX6-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6
+ ; GFX6-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7
+ ; GFX6-NEXT: $vgpr0 = COPY [[COPY5]]
+ ; GFX6-NEXT: $vgpr1 = COPY [[COPY6]]
+ ; GFX6-NEXT: $vgpr2 = COPY [[COPY7]]
+ ; GFX6-NEXT: $vgpr3 = COPY [[COPY8]]
+ ; GFX6-NEXT: $vgpr4 = COPY [[COPY9]]
+ ; GFX6-NEXT: $vgpr5 = COPY [[COPY10]]
+ ; GFX6-NEXT: $vgpr6 = COPY [[COPY11]]
+ ; GFX6-NEXT: $vgpr7 = COPY [[COPY12]]
+ ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
; GFX7-LABEL: name: s_buffer_load_v8f32_vgpr_offset_add_4068
; GFX7: bb.1 (%ir-block.0):
- ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
- ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
- ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
- ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
- ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
- ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
- ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4068
- ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
- ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
- ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7
- ; GFX7: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0
- ; GFX7: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1
- ; GFX7: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2
- ; GFX7: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3
- ; GFX7: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4
- ; GFX7: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5
- ; GFX7: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6
- ; GFX7: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7
- ; GFX7: $vgpr0 = COPY [[COPY5]]
- ; GFX7: $vgpr1 = COPY [[COPY6]]
- ; GFX7: $vgpr2 = COPY [[COPY7]]
- ; GFX7: $vgpr3 = COPY [[COPY8]]
- ; GFX7: $vgpr4 = COPY [[COPY9]]
- ; GFX7: $vgpr5 = COPY [[COPY10]]
- ; GFX7: $vgpr6 = COPY [[COPY11]]
- ; GFX7: $vgpr7 = COPY [[COPY12]]
- ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
+ ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
+ ; GFX7-NEXT: {{ $}}
+ ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4068
+ ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
+ ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
+ ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7
+ ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0
+ ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1
+ ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2
+ ; GFX7-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3
+ ; GFX7-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4
+ ; GFX7-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5
+ ; GFX7-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6
+ ; GFX7-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7
+ ; GFX7-NEXT: $vgpr0 = COPY [[COPY5]]
+ ; GFX7-NEXT: $vgpr1 = COPY [[COPY6]]
+ ; GFX7-NEXT: $vgpr2 = COPY [[COPY7]]
+ ; GFX7-NEXT: $vgpr3 = COPY [[COPY8]]
+ ; GFX7-NEXT: $vgpr4 = COPY [[COPY9]]
+ ; GFX7-NEXT: $vgpr5 = COPY [[COPY10]]
+ ; GFX7-NEXT: $vgpr6 = COPY [[COPY11]]
+ ; GFX7-NEXT: $vgpr7 = COPY [[COPY12]]
+ ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
; GFX8-LABEL: name: s_buffer_load_v8f32_vgpr_offset_add_4068
; GFX8: bb.1 (%ir-block.0):
- ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
- ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
- ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
- ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
- ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
- ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
- ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4
- ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
- ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
- ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7
- ; GFX8: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0
- ; GFX8: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1
- ; GFX8: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2
- ; GFX8: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3
- ; GFX8: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4
- ; GFX8: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5
- ; GFX8: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6
- ; GFX8: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7
- ; GFX8: $vgpr0 = COPY [[COPY5]]
- ; GFX8: $vgpr1 = COPY [[COPY6]]
- ; GFX8: $vgpr2 = COPY [[COPY7]]
- ; GFX8: $vgpr3 = COPY [[COPY8]]
- ; GFX8: $vgpr4 = COPY [[COPY9]]
- ; GFX8: $vgpr5 = COPY [[COPY10]]
- ; GFX8: $vgpr6 = COPY [[COPY11]]
- ; GFX8: $vgpr7 = COPY [[COPY12]]
- ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
+ ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
+ ; GFX8-NEXT: {{ $}}
+ ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4
+ ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
+ ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
+ ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7
+ ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0
+ ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1
+ ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2
+ ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3
+ ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4
+ ; GFX8-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5
+ ; GFX8-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6
+ ; GFX8-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7
+ ; GFX8-NEXT: $vgpr0 = COPY [[COPY5]]
+ ; GFX8-NEXT: $vgpr1 = COPY [[COPY6]]
+ ; GFX8-NEXT: $vgpr2 = COPY [[COPY7]]
+ ; GFX8-NEXT: $vgpr3 = COPY [[COPY8]]
+ ; GFX8-NEXT: $vgpr4 = COPY [[COPY9]]
+ ; GFX8-NEXT: $vgpr5 = COPY [[COPY10]]
+ ; GFX8-NEXT: $vgpr6 = COPY [[COPY11]]
+ ; GFX8-NEXT: $vgpr7 = COPY [[COPY12]]
+ ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
%soffset = add i32 %soffset.base, 4068
%val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0)
ret <8 x float> %val
@@ -2292,148 +2379,151 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_add_4068(<4 x i32>
define amdgpu_ps <16 x float> @s_buffer_load_v16f32_vgpr_offset_add_4032(<4 x i32> inreg %rsrc, i32 %soffset.base) {
; GFX6-LABEL: name: s_buffer_load_v16f32_vgpr_offset_add_4032
; GFX6: bb.1 (%ir-block.0):
- ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
- ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
- ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
- ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
- ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
- ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
- ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
- ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4032, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
- ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4048, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
- ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4)
- ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4)
- ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7, [[BUFFER_LOAD_DWORDX4_OFFEN2]], %subreg.sub8_sub9_sub10_sub11, [[BUFFER_LOAD_DWORDX4_OFFEN3]], %subreg.sub12_sub13_sub14_sub15
- ; GFX6: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0
- ; GFX6: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1
- ; GFX6: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2
- ; GFX6: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3
- ; GFX6: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4
- ; GFX6: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5
- ; GFX6: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6
- ; GFX6: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7
- ; GFX6: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub8
- ; GFX6: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub9
- ; GFX6: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub10
- ; GFX6: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub11
- ; GFX6: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub12
- ; GFX6: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub13
- ; GFX6: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub14
- ; GFX6: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub15
- ; GFX6: $vgpr0 = COPY [[COPY5]]
- ; GFX6: $vgpr1 = COPY [[COPY6]]
- ; GFX6: $vgpr2 = COPY [[COPY7]]
- ; GFX6: $vgpr3 = COPY [[COPY8]]
- ; GFX6: $vgpr4 = COPY [[COPY9]]
- ; GFX6: $vgpr5 = COPY [[COPY10]]
- ; GFX6: $vgpr6 = COPY [[COPY11]]
- ; GFX6: $vgpr7 = COPY [[COPY12]]
- ; GFX6: $vgpr8 = COPY [[COPY13]]
- ; GFX6: $vgpr9 = COPY [[COPY14]]
- ; GFX6: $vgpr10 = COPY [[COPY15]]
- ; GFX6: $vgpr11 = COPY [[COPY16]]
- ; GFX6: $vgpr12 = COPY [[COPY17]]
- ; GFX6: $vgpr13 = COPY [[COPY18]]
- ; GFX6: $vgpr14 = COPY [[COPY19]]
- ; GFX6: $vgpr15 = COPY [[COPY20]]
- ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15
+ ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
+ ; GFX6-NEXT: {{ $}}
+ ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4032, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
+ ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4048, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
+ ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4)
+ ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4)
+ ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7, [[BUFFER_LOAD_DWORDX4_OFFEN2]], %subreg.sub8_sub9_sub10_sub11, [[BUFFER_LOAD_DWORDX4_OFFEN3]], %subreg.sub12_sub13_sub14_sub15
+ ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0
+ ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1
+ ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2
+ ; GFX6-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3
+ ; GFX6-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4
+ ; GFX6-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5
+ ; GFX6-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6
+ ; GFX6-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7
+ ; GFX6-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub8
+ ; GFX6-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub9
+ ; GFX6-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub10
+ ; GFX6-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub11
+ ; GFX6-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub12
+ ; GFX6-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub13
+ ; GFX6-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub14
+ ; GFX6-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub15
+ ; GFX6-NEXT: $vgpr0 = COPY [[COPY5]]
+ ; GFX6-NEXT: $vgpr1 = COPY [[COPY6]]
+ ; GFX6-NEXT: $vgpr2 = COPY [[COPY7]]
+ ; GFX6-NEXT: $vgpr3 = COPY [[COPY8]]
+ ; GFX6-NEXT: $vgpr4 = COPY [[COPY9]]
+ ; GFX6-NEXT: $vgpr5 = COPY [[COPY10]]
+ ; GFX6-NEXT: $vgpr6 = COPY [[COPY11]]
+ ; GFX6-NEXT: $vgpr7 = COPY [[COPY12]]
+ ; GFX6-NEXT: $vgpr8 = COPY [[COPY13]]
+ ; GFX6-NEXT: $vgpr9 = COPY [[COPY14]]
+ ; GFX6-NEXT: $vgpr10 = COPY [[COPY15]]
+ ; GFX6-NEXT: $vgpr11 = COPY [[COPY16]]
+ ; GFX6-NEXT: $vgpr12 = COPY [[COPY17]]
+ ; GFX6-NEXT: $vgpr13 = COPY [[COPY18]]
+ ; GFX6-NEXT: $vgpr14 = COPY [[COPY19]]
+ ; GFX6-NEXT: $vgpr15 = COPY [[COPY20]]
+ ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15
; GFX7-LABEL: name: s_buffer_load_v16f32_vgpr_offset_add_4032
; GFX7: bb.1 (%ir-block.0):
- ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
- ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
- ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
- ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
- ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
- ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
- ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
- ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4032, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
- ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4048, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
- ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4)
- ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4)
- ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7, [[BUFFER_LOAD_DWORDX4_OFFEN2]], %subreg.sub8_sub9_sub10_sub11, [[BUFFER_LOAD_DWORDX4_OFFEN3]], %subreg.sub12_sub13_sub14_sub15
- ; GFX7: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0
- ; GFX7: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1
- ; GFX7: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2
- ; GFX7: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3
- ; GFX7: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4
- ; GFX7: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5
- ; GFX7: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6
- ; GFX7: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7
- ; GFX7: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub8
- ; GFX7: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub9
- ; GFX7: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub10
- ; GFX7: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub11
- ; GFX7: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub12
- ; GFX7: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub13
- ; GFX7: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub14
- ; GFX7: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub15
- ; GFX7: $vgpr0 = COPY [[COPY5]]
- ; GFX7: $vgpr1 = COPY [[COPY6]]
- ; GFX7: $vgpr2 = COPY [[COPY7]]
- ; GFX7: $vgpr3 = COPY [[COPY8]]
- ; GFX7: $vgpr4 = COPY [[COPY9]]
- ; GFX7: $vgpr5 = COPY [[COPY10]]
- ; GFX7: $vgpr6 = COPY [[COPY11]]
- ; GFX7: $vgpr7 = COPY [[COPY12]]
- ; GFX7: $vgpr8 = COPY [[COPY13]]
- ; GFX7: $vgpr9 = COPY [[COPY14]]
- ; GFX7: $vgpr10 = COPY [[COPY15]]
- ; GFX7: $vgpr11 = COPY [[COPY16]]
- ; GFX7: $vgpr12 = COPY [[COPY17]]
- ; GFX7: $vgpr13 = COPY [[COPY18]]
- ; GFX7: $vgpr14 = COPY [[COPY19]]
- ; GFX7: $vgpr15 = COPY [[COPY20]]
- ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15
+ ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
+ ; GFX7-NEXT: {{ $}}
+ ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4032, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
+ ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4048, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
+ ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4)
+ ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4)
+ ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7, [[BUFFER_LOAD_DWORDX4_OFFEN2]], %subreg.sub8_sub9_sub10_sub11, [[BUFFER_LOAD_DWORDX4_OFFEN3]], %subreg.sub12_sub13_sub14_sub15
+ ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0
+ ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1
+ ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2
+ ; GFX7-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3
+ ; GFX7-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4
+ ; GFX7-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5
+ ; GFX7-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6
+ ; GFX7-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7
+ ; GFX7-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub8
+ ; GFX7-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub9
+ ; GFX7-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub10
+ ; GFX7-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub11
+ ; GFX7-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub12
+ ; GFX7-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub13
+ ; GFX7-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub14
+ ; GFX7-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub15
+ ; GFX7-NEXT: $vgpr0 = COPY [[COPY5]]
+ ; GFX7-NEXT: $vgpr1 = COPY [[COPY6]]
+ ; GFX7-NEXT: $vgpr2 = COPY [[COPY7]]
+ ; GFX7-NEXT: $vgpr3 = COPY [[COPY8]]
+ ; GFX7-NEXT: $vgpr4 = COPY [[COPY9]]
+ ; GFX7-NEXT: $vgpr5 = COPY [[COPY10]]
+ ; GFX7-NEXT: $vgpr6 = COPY [[COPY11]]
+ ; GFX7-NEXT: $vgpr7 = COPY [[COPY12]]
+ ; GFX7-NEXT: $vgpr8 = COPY [[COPY13]]
+ ; GFX7-NEXT: $vgpr9 = COPY [[COPY14]]
+ ; GFX7-NEXT: $vgpr10 = COPY [[COPY15]]
+ ; GFX7-NEXT: $vgpr11 = COPY [[COPY16]]
+ ; GFX7-NEXT: $vgpr12 = COPY [[COPY17]]
+ ; GFX7-NEXT: $vgpr13 = COPY [[COPY18]]
+ ; GFX7-NEXT: $vgpr14 = COPY [[COPY19]]
+ ; GFX7-NEXT: $vgpr15 = COPY [[COPY20]]
+ ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15
; GFX8-LABEL: name: s_buffer_load_v16f32_vgpr_offset_add_4032
; GFX8: bb.1 (%ir-block.0):
- ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
- ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
- ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
- ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
- ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
- ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
- ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
- ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4032, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
- ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4048, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
- ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4)
- ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4)
- ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7, [[BUFFER_LOAD_DWORDX4_OFFEN2]], %subreg.sub8_sub9_sub10_sub11, [[BUFFER_LOAD_DWORDX4_OFFEN3]], %subreg.sub12_sub13_sub14_sub15
- ; GFX8: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0
- ; GFX8: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1
- ; GFX8: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2
- ; GFX8: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3
- ; GFX8: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4
- ; GFX8: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5
- ; GFX8: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6
- ; GFX8: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7
- ; GFX8: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub8
- ; GFX8: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub9
- ; GFX8: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub10
- ; GFX8: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub11
- ; GFX8: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub12
- ; GFX8: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub13
- ; GFX8: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub14
- ; GFX8: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub15
- ; GFX8: $vgpr0 = COPY [[COPY5]]
- ; GFX8: $vgpr1 = COPY [[COPY6]]
- ; GFX8: $vgpr2 = COPY [[COPY7]]
- ; GFX8: $vgpr3 = COPY [[COPY8]]
- ; GFX8: $vgpr4 = COPY [[COPY9]]
- ; GFX8: $vgpr5 = COPY [[COPY10]]
- ; GFX8: $vgpr6 = COPY [[COPY11]]
- ; GFX8: $vgpr7 = COPY [[COPY12]]
- ; GFX8: $vgpr8 = COPY [[COPY13]]
- ; GFX8: $vgpr9 = COPY [[COPY14]]
- ; GFX8: $vgpr10 = COPY [[COPY15]]
- ; GFX8: $vgpr11 = COPY [[COPY16]]
- ; GFX8: $vgpr12 = COPY [[COPY17]]
- ; GFX8: $vgpr13 = COPY [[COPY18]]
- ; GFX8: $vgpr14 = COPY [[COPY19]]
- ; GFX8: $vgpr15 = COPY [[COPY20]]
- ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15
+ ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
+ ; GFX8-NEXT: {{ $}}
+ ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4032, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
+ ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4048, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
+ ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4)
+ ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4)
+ ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7, [[BUFFER_LOAD_DWORDX4_OFFEN2]], %subreg.sub8_sub9_sub10_sub11, [[BUFFER_LOAD_DWORDX4_OFFEN3]], %subreg.sub12_sub13_sub14_sub15
+ ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0
+ ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1
+ ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2
+ ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3
+ ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4
+ ; GFX8-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5
+ ; GFX8-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6
+ ; GFX8-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7
+ ; GFX8-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub8
+ ; GFX8-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub9
+ ; GFX8-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub10
+ ; GFX8-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub11
+ ; GFX8-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub12
+ ; GFX8-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub13
+ ; GFX8-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub14
+ ; GFX8-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub15
+ ; GFX8-NEXT: $vgpr0 = COPY [[COPY5]]
+ ; GFX8-NEXT: $vgpr1 = COPY [[COPY6]]
+ ; GFX8-NEXT: $vgpr2 = COPY [[COPY7]]
+ ; GFX8-NEXT: $vgpr3 = COPY [[COPY8]]
+ ; GFX8-NEXT: $vgpr4 = COPY [[COPY9]]
+ ; GFX8-NEXT: $vgpr5 = COPY [[COPY10]]
+ ; GFX8-NEXT: $vgpr6 = COPY [[COPY11]]
+ ; GFX8-NEXT: $vgpr7 = COPY [[COPY12]]
+ ; GFX8-NEXT: $vgpr8 = COPY [[COPY13]]
+ ; GFX8-NEXT: $vgpr9 = COPY [[COPY14]]
+ ; GFX8-NEXT: $vgpr10 = COPY [[COPY15]]
+ ; GFX8-NEXT: $vgpr11 = COPY [[COPY16]]
+ ; GFX8-NEXT: $vgpr12 = COPY [[COPY17]]
+ ; GFX8-NEXT: $vgpr13 = COPY [[COPY18]]
+ ; GFX8-NEXT: $vgpr14 = COPY [[COPY19]]
+ ; GFX8-NEXT: $vgpr15 = COPY [[COPY20]]
+ ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15
%soffset = add i32 %soffset.base, 4032
%val = call <16 x float> @llvm.amdgcn.s.buffer.load.v16f32(<4 x i32> %rsrc, i32 %soffset, i32 0)
ret <16 x float> %val
@@ -2442,148 +2532,151 @@ define amdgpu_ps <16 x float> @s_buffer_load_v16f32_vgpr_offset_add_4032(<4 x i3
define amdgpu_ps <16 x float> @s_buffer_load_v16f32_vgpr_offset_add_4036(<4 x i32> inreg %rsrc, i32 %soffset.base) {
; GFX6-LABEL: name: s_buffer_load_v16f32_vgpr_offset_add_4036
; GFX6: bb.1 (%ir-block.0):
- ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
- ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
- ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
- ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
- ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
- ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
- ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4036
- ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
- ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
- ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 32, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4)
- ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 48, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4)
- ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7, [[BUFFER_LOAD_DWORDX4_OFFEN2]], %subreg.sub8_sub9_sub10_sub11, [[BUFFER_LOAD_DWORDX4_OFFEN3]], %subreg.sub12_sub13_sub14_sub15
- ; GFX6: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0
- ; GFX6: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1
- ; GFX6: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2
- ; GFX6: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3
- ; GFX6: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4
- ; GFX6: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5
- ; GFX6: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6
- ; GFX6: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7
- ; GFX6: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub8
- ; GFX6: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub9
- ; GFX6: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub10
- ; GFX6: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub11
- ; GFX6: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub12
- ; GFX6: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub13
- ; GFX6: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub14
- ; GFX6: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub15
- ; GFX6: $vgpr0 = COPY [[COPY5]]
- ; GFX6: $vgpr1 = COPY [[COPY6]]
- ; GFX6: $vgpr2 = COPY [[COPY7]]
- ; GFX6: $vgpr3 = COPY [[COPY8]]
- ; GFX6: $vgpr4 = COPY [[COPY9]]
- ; GFX6: $vgpr5 = COPY [[COPY10]]
- ; GFX6: $vgpr6 = COPY [[COPY11]]
- ; GFX6: $vgpr7 = COPY [[COPY12]]
- ; GFX6: $vgpr8 = COPY [[COPY13]]
- ; GFX6: $vgpr9 = COPY [[COPY14]]
- ; GFX6: $vgpr10 = COPY [[COPY15]]
- ; GFX6: $vgpr11 = COPY [[COPY16]]
- ; GFX6: $vgpr12 = COPY [[COPY17]]
- ; GFX6: $vgpr13 = COPY [[COPY18]]
- ; GFX6: $vgpr14 = COPY [[COPY19]]
- ; GFX6: $vgpr15 = COPY [[COPY20]]
- ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15
+ ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
+ ; GFX6-NEXT: {{ $}}
+ ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4036
+ ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
+ ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
+ ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 32, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4)
+ ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 48, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4)
+ ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7, [[BUFFER_LOAD_DWORDX4_OFFEN2]], %subreg.sub8_sub9_sub10_sub11, [[BUFFER_LOAD_DWORDX4_OFFEN3]], %subreg.sub12_sub13_sub14_sub15
+ ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0
+ ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1
+ ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2
+ ; GFX6-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3
+ ; GFX6-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4
+ ; GFX6-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5
+ ; GFX6-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6
+ ; GFX6-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7
+ ; GFX6-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub8
+ ; GFX6-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub9
+ ; GFX6-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub10
+ ; GFX6-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub11
+ ; GFX6-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub12
+ ; GFX6-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub13
+ ; GFX6-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub14
+ ; GFX6-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub15
+ ; GFX6-NEXT: $vgpr0 = COPY [[COPY5]]
+ ; GFX6-NEXT: $vgpr1 = COPY [[COPY6]]
+ ; GFX6-NEXT: $vgpr2 = COPY [[COPY7]]
+ ; GFX6-NEXT: $vgpr3 = COPY [[COPY8]]
+ ; GFX6-NEXT: $vgpr4 = COPY [[COPY9]]
+ ; GFX6-NEXT: $vgpr5 = COPY [[COPY10]]
+ ; GFX6-NEXT: $vgpr6 = COPY [[COPY11]]
+ ; GFX6-NEXT: $vgpr7 = COPY [[COPY12]]
+ ; GFX6-NEXT: $vgpr8 = COPY [[COPY13]]
+ ; GFX6-NEXT: $vgpr9 = COPY [[COPY14]]
+ ; GFX6-NEXT: $vgpr10 = COPY [[COPY15]]
+ ; GFX6-NEXT: $vgpr11 = COPY [[COPY16]]
+ ; GFX6-NEXT: $vgpr12 = COPY [[COPY17]]
+ ; GFX6-NEXT: $vgpr13 = COPY [[COPY18]]
+ ; GFX6-NEXT: $vgpr14 = COPY [[COPY19]]
+ ; GFX6-NEXT: $vgpr15 = COPY [[COPY20]]
+ ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15
; GFX7-LABEL: name: s_buffer_load_v16f32_vgpr_offset_add_4036
; GFX7: bb.1 (%ir-block.0):
- ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
- ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
- ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
- ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
- ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
- ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
- ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4036
- ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
- ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
- ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 32, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4)
- ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 48, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4)
- ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7, [[BUFFER_LOAD_DWORDX4_OFFEN2]], %subreg.sub8_sub9_sub10_sub11, [[BUFFER_LOAD_DWORDX4_OFFEN3]], %subreg.sub12_sub13_sub14_sub15
- ; GFX7: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0
- ; GFX7: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1
- ; GFX7: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2
- ; GFX7: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3
- ; GFX7: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4
- ; GFX7: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5
- ; GFX7: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6
- ; GFX7: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7
- ; GFX7: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub8
- ; GFX7: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub9
- ; GFX7: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub10
- ; GFX7: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub11
- ; GFX7: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub12
- ; GFX7: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub13
- ; GFX7: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub14
- ; GFX7: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub15
- ; GFX7: $vgpr0 = COPY [[COPY5]]
- ; GFX7: $vgpr1 = COPY [[COPY6]]
- ; GFX7: $vgpr2 = COPY [[COPY7]]
- ; GFX7: $vgpr3 = COPY [[COPY8]]
- ; GFX7: $vgpr4 = COPY [[COPY9]]
- ; GFX7: $vgpr5 = COPY [[COPY10]]
- ; GFX7: $vgpr6 = COPY [[COPY11]]
- ; GFX7: $vgpr7 = COPY [[COPY12]]
- ; GFX7: $vgpr8 = COPY [[COPY13]]
- ; GFX7: $vgpr9 = COPY [[COPY14]]
- ; GFX7: $vgpr10 = COPY [[COPY15]]
- ; GFX7: $vgpr11 = COPY [[COPY16]]
- ; GFX7: $vgpr12 = COPY [[COPY17]]
- ; GFX7: $vgpr13 = COPY [[COPY18]]
- ; GFX7: $vgpr14 = COPY [[COPY19]]
- ; GFX7: $vgpr15 = COPY [[COPY20]]
- ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15
+ ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
+ ; GFX7-NEXT: {{ $}}
+ ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4036
+ ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
+ ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
+ ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 32, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4)
+ ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 48, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4)
+ ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7, [[BUFFER_LOAD_DWORDX4_OFFEN2]], %subreg.sub8_sub9_sub10_sub11, [[BUFFER_LOAD_DWORDX4_OFFEN3]], %subreg.sub12_sub13_sub14_sub15
+ ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0
+ ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1
+ ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2
+ ; GFX7-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3
+ ; GFX7-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4
+ ; GFX7-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5
+ ; GFX7-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6
+ ; GFX7-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7
+ ; GFX7-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub8
+ ; GFX7-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub9
+ ; GFX7-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub10
+ ; GFX7-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub11
+ ; GFX7-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub12
+ ; GFX7-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub13
+ ; GFX7-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub14
+ ; GFX7-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub15
+ ; GFX7-NEXT: $vgpr0 = COPY [[COPY5]]
+ ; GFX7-NEXT: $vgpr1 = COPY [[COPY6]]
+ ; GFX7-NEXT: $vgpr2 = COPY [[COPY7]]
+ ; GFX7-NEXT: $vgpr3 = COPY [[COPY8]]
+ ; GFX7-NEXT: $vgpr4 = COPY [[COPY9]]
+ ; GFX7-NEXT: $vgpr5 = COPY [[COPY10]]
+ ; GFX7-NEXT: $vgpr6 = COPY [[COPY11]]
+ ; GFX7-NEXT: $vgpr7 = COPY [[COPY12]]
+ ; GFX7-NEXT: $vgpr8 = COPY [[COPY13]]
+ ; GFX7-NEXT: $vgpr9 = COPY [[COPY14]]
+ ; GFX7-NEXT: $vgpr10 = COPY [[COPY15]]
+ ; GFX7-NEXT: $vgpr11 = COPY [[COPY16]]
+ ; GFX7-NEXT: $vgpr12 = COPY [[COPY17]]
+ ; GFX7-NEXT: $vgpr13 = COPY [[COPY18]]
+ ; GFX7-NEXT: $vgpr14 = COPY [[COPY19]]
+ ; GFX7-NEXT: $vgpr15 = COPY [[COPY20]]
+ ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15
; GFX8-LABEL: name: s_buffer_load_v16f32_vgpr_offset_add_4036
; GFX8: bb.1 (%ir-block.0):
- ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
- ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
- ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
- ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
- ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
- ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
- ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4
- ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4032, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
- ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4048, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
- ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4)
- ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4)
- ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7, [[BUFFER_LOAD_DWORDX4_OFFEN2]], %subreg.sub8_sub9_sub10_sub11, [[BUFFER_LOAD_DWORDX4_OFFEN3]], %subreg.sub12_sub13_sub14_sub15
- ; GFX8: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0
- ; GFX8: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1
- ; GFX8: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2
- ; GFX8: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3
- ; GFX8: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4
- ; GFX8: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5
- ; GFX8: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6
- ; GFX8: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7
- ; GFX8: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub8
- ; GFX8: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub9
- ; GFX8: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub10
- ; GFX8: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub11
- ; GFX8: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub12
- ; GFX8: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub13
- ; GFX8: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub14
- ; GFX8: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub15
- ; GFX8: $vgpr0 = COPY [[COPY5]]
- ; GFX8: $vgpr1 = COPY [[COPY6]]
- ; GFX8: $vgpr2 = COPY [[COPY7]]
- ; GFX8: $vgpr3 = COPY [[COPY8]]
- ; GFX8: $vgpr4 = COPY [[COPY9]]
- ; GFX8: $vgpr5 = COPY [[COPY10]]
- ; GFX8: $vgpr6 = COPY [[COPY11]]
- ; GFX8: $vgpr7 = COPY [[COPY12]]
- ; GFX8: $vgpr8 = COPY [[COPY13]]
- ; GFX8: $vgpr9 = COPY [[COPY14]]
- ; GFX8: $vgpr10 = COPY [[COPY15]]
- ; GFX8: $vgpr11 = COPY [[COPY16]]
- ; GFX8: $vgpr12 = COPY [[COPY17]]
- ; GFX8: $vgpr13 = COPY [[COPY18]]
- ; GFX8: $vgpr14 = COPY [[COPY19]]
- ; GFX8: $vgpr15 = COPY [[COPY20]]
- ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15
+ ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0
+ ; GFX8-NEXT: {{ $}}
+ ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4
+ ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4032, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
+ ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4048, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
+ ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4)
+ ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4)
+ ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7, [[BUFFER_LOAD_DWORDX4_OFFEN2]], %subreg.sub8_sub9_sub10_sub11, [[BUFFER_LOAD_DWORDX4_OFFEN3]], %subreg.sub12_sub13_sub14_sub15
+ ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0
+ ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1
+ ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2
+ ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3
+ ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4
+ ; GFX8-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5
+ ; GFX8-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6
+ ; GFX8-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7
+ ; GFX8-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub8
+ ; GFX8-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub9
+ ; GFX8-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub10
+ ; GFX8-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub11
+ ; GFX8-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub12
+ ; GFX8-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub13
+ ; GFX8-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub14
+ ; GFX8-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub15
+ ; GFX8-NEXT: $vgpr0 = COPY [[COPY5]]
+ ; GFX8-NEXT: $vgpr1 = COPY [[COPY6]]
+ ; GFX8-NEXT: $vgpr2 = COPY [[COPY7]]
+ ; GFX8-NEXT: $vgpr3 = COPY [[COPY8]]
+ ; GFX8-NEXT: $vgpr4 = COPY [[COPY9]]
+ ; GFX8-NEXT: $vgpr5 = COPY [[COPY10]]
+ ; GFX8-NEXT: $vgpr6 = COPY [[COPY11]]
+ ; GFX8-NEXT: $vgpr7 = COPY [[COPY12]]
+ ; GFX8-NEXT: $vgpr8 = COPY [[COPY13]]
+ ; GFX8-NEXT: $vgpr9 = COPY [[COPY14]]
+ ; GFX8-NEXT: $vgpr10 = COPY [[COPY15]]
+ ; GFX8-NEXT: $vgpr11 = COPY [[COPY16]]
+ ; GFX8-NEXT: $vgpr12 = COPY [[COPY17]]
+ ; GFX8-NEXT: $vgpr13 = COPY [[COPY18]]
+ ; GFX8-NEXT: $vgpr14 = COPY [[COPY19]]
+ ; GFX8-NEXT: $vgpr15 = COPY [[COPY20]]
+ ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15
%soffset = add i32 %soffset.base, 4036
%val = call <16 x float> @llvm.amdgcn.s.buffer.load.v16f32(<4 x i32> %rsrc, i32 %soffset, i32 0)
ret <16 x float> %val
@@ -2593,109 +2686,124 @@ define amdgpu_ps <16 x float> @s_buffer_load_v16f32_vgpr_offset_add_4036(<4 x i3
define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc(<4 x i32> %rsrc, i32 inreg %soffset) {
; GFX6-LABEL: name: s_buffer_load_f32_vgpr_rsrc
; GFX6: bb.1 (%ir-block.0):
- ; GFX6: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3
- ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
- ; GFX6: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
- ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
- ; GFX6: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2
- ; GFX6: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY4]]
- ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
- ; GFX6: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
- ; GFX6: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
- ; GFX6: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
- ; GFX6: bb.2:
- ; GFX6: successors: %bb.3, %bb.2
- ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec
- ; GFX6: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec
- ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
- ; GFX6: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec
- ; GFX6: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec
- ; GFX6: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec
- ; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
- ; GFX6: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec
- ; GFX6: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
- ; GFX6: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
- ; GFX6: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
- ; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32))
- ; GFX6: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
- ; GFX6: SI_WATERFALL_LOOP %bb.2, implicit $exec
- ; GFX6: bb.3:
- ; GFX6: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
- ; GFX6: bb.4:
- ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
- ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0
+ ; GFX6-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3
+ ; GFX6-NEXT: {{ $}}
+ ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
+ ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX6-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY4]]
+ ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
+ ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
+ ; GFX6-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
+ ; GFX6-NEXT: {{ $}}
+ ; GFX6-NEXT: bb.2:
+ ; GFX6-NEXT: successors: %bb.3, %bb.2
+ ; GFX6-NEXT: {{ $}}
+ ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec
+ ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec
+ ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
+ ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec
+ ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec
+ ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec
+ ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
+ ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec
+ ; GFX6-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
+ ; GFX6-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
+ ; GFX6-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32))
+ ; GFX6-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
+ ; GFX6-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec
+ ; GFX6-NEXT: {{ $}}
+ ; GFX6-NEXT: bb.3:
+ ; GFX6-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
+ ; GFX6-NEXT: {{ $}}
+ ; GFX6-NEXT: bb.4:
+ ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
+ ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
; GFX7-LABEL: name: s_buffer_load_f32_vgpr_rsrc
; GFX7: bb.1 (%ir-block.0):
- ; GFX7: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3
- ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
- ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
- ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
- ; GFX7: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2
- ; GFX7: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY4]]
- ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
- ; GFX7: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
- ; GFX7: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
- ; GFX7: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
- ; GFX7: bb.2:
- ; GFX7: successors: %bb.3, %bb.2
- ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec
- ; GFX7: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec
- ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
- ; GFX7: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec
- ; GFX7: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec
- ; GFX7: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec
- ; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
- ; GFX7: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec
- ; GFX7: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
- ; GFX7: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
- ; GFX7: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
- ; GFX7: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32))
- ; GFX7: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
- ; GFX7: SI_WATERFALL_LOOP %bb.2, implicit $exec
- ; GFX7: bb.3:
- ; GFX7: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
- ; GFX7: bb.4:
- ; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
- ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0
+ ; GFX7-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3
+ ; GFX7-NEXT: {{ $}}
+ ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
+ ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX7-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY4]]
+ ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
+ ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
+ ; GFX7-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
+ ; GFX7-NEXT: {{ $}}
+ ; GFX7-NEXT: bb.2:
+ ; GFX7-NEXT: successors: %bb.3, %bb.2
+ ; GFX7-NEXT: {{ $}}
+ ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec
+ ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec
+ ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
+ ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec
+ ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec
+ ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec
+ ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
+ ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec
+ ; GFX7-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
+ ; GFX7-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
+ ; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32))
+ ; GFX7-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
+ ; GFX7-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec
+ ; GFX7-NEXT: {{ $}}
+ ; GFX7-NEXT: bb.3:
+ ; GFX7-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
+ ; GFX7-NEXT: {{ $}}
+ ; GFX7-NEXT: bb.4:
+ ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
+ ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
; GFX8-LABEL: name: s_buffer_load_f32_vgpr_rsrc
; GFX8: bb.1 (%ir-block.0):
- ; GFX8: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3
- ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
- ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
- ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
- ; GFX8: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2
- ; GFX8: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY4]]
- ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
- ; GFX8: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
- ; GFX8: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
- ; GFX8: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
- ; GFX8: bb.2:
- ; GFX8: successors: %bb.3, %bb.2
- ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec
- ; GFX8: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec
- ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
- ; GFX8: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec
- ; GFX8: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec
- ; GFX8: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec
- ; GFX8: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
- ; GFX8: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec
- ; GFX8: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
- ; GFX8: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
- ; GFX8: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
- ; GFX8: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32))
- ; GFX8: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
- ; GFX8: SI_WATERFALL_LOOP %bb.2, implicit $exec
- ; GFX8: bb.3:
- ; GFX8: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
- ; GFX8: bb.4:
- ; GFX8: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
- ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0
+ ; GFX8-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3
+ ; GFX8-NEXT: {{ $}}
+ ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
+ ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX8-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY4]]
+ ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
+ ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
+ ; GFX8-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
+ ; GFX8-NEXT: {{ $}}
+ ; GFX8-NEXT: bb.2:
+ ; GFX8-NEXT: successors: %bb.3, %bb.2
+ ; GFX8-NEXT: {{ $}}
+ ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec
+ ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec
+ ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
+ ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec
+ ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec
+ ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec
+ ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
+ ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec
+ ; GFX8-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
+ ; GFX8-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
+ ; GFX8-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32))
+ ; GFX8-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
+ ; GFX8-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec
+ ; GFX8-NEXT: {{ $}}
+ ; GFX8-NEXT: bb.3:
+ ; GFX8-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
+ ; GFX8-NEXT: {{ $}}
+ ; GFX8-NEXT: bb.4:
+ ; GFX8-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
+ ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
%val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %soffset, i32 0)
ret float %val
}
@@ -2704,103 +2812,118 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc(<4 x i32> %rsrc, i32 inreg %
define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_soffset_add_4092(<4 x i32> %rsrc, i32 inreg %soffset.base) {
; GFX6-LABEL: name: s_buffer_load_f32_vgpr_rsrc_soffset_add_4092
; GFX6: bb.1 (%ir-block.0):
- ; GFX6: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3
- ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
- ; GFX6: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
- ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
- ; GFX6: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2
- ; GFX6: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
- ; GFX6: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
- ; GFX6: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
- ; GFX6: bb.2:
- ; GFX6: successors: %bb.3, %bb.2
- ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec
- ; GFX6: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec
- ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
- ; GFX6: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec
- ; GFX6: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec
- ; GFX6: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec
- ; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
- ; GFX6: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec
- ; GFX6: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
- ; GFX6: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
- ; GFX6: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
- ; GFX6: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE3]], [[COPY4]], 4092, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32))
- ; GFX6: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
- ; GFX6: SI_WATERFALL_LOOP %bb.2, implicit $exec
- ; GFX6: bb.3:
- ; GFX6: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
- ; GFX6: bb.4:
- ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]]
- ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0
+ ; GFX6-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3
+ ; GFX6-NEXT: {{ $}}
+ ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
+ ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX6-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
+ ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
+ ; GFX6-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
+ ; GFX6-NEXT: {{ $}}
+ ; GFX6-NEXT: bb.2:
+ ; GFX6-NEXT: successors: %bb.3, %bb.2
+ ; GFX6-NEXT: {{ $}}
+ ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec
+ ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec
+ ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
+ ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec
+ ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec
+ ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec
+ ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
+ ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec
+ ; GFX6-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
+ ; GFX6-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
+ ; GFX6-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE3]], [[COPY4]], 4092, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32))
+ ; GFX6-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
+ ; GFX6-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec
+ ; GFX6-NEXT: {{ $}}
+ ; GFX6-NEXT: bb.3:
+ ; GFX6-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
+ ; GFX6-NEXT: {{ $}}
+ ; GFX6-NEXT: bb.4:
+ ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]]
+ ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
; GFX7-LABEL: name: s_buffer_load_f32_vgpr_rsrc_soffset_add_4092
; GFX7: bb.1 (%ir-block.0):
- ; GFX7: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3
- ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
- ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
- ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
- ; GFX7: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2
- ; GFX7: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
- ; GFX7: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
- ; GFX7: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
- ; GFX7: bb.2:
- ; GFX7: successors: %bb.3, %bb.2
- ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec
- ; GFX7: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec
- ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
- ; GFX7: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec
- ; GFX7: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec
- ; GFX7: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec
- ; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
- ; GFX7: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec
- ; GFX7: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
- ; GFX7: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
- ; GFX7: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
- ; GFX7: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE3]], [[COPY4]], 4092, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32))
- ; GFX7: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
- ; GFX7: SI_WATERFALL_LOOP %bb.2, implicit $exec
- ; GFX7: bb.3:
- ; GFX7: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
- ; GFX7: bb.4:
- ; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]]
- ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0
+ ; GFX7-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3
+ ; GFX7-NEXT: {{ $}}
+ ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
+ ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX7-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
+ ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
+ ; GFX7-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
+ ; GFX7-NEXT: {{ $}}
+ ; GFX7-NEXT: bb.2:
+ ; GFX7-NEXT: successors: %bb.3, %bb.2
+ ; GFX7-NEXT: {{ $}}
+ ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec
+ ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec
+ ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
+ ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec
+ ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec
+ ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec
+ ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
+ ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec
+ ; GFX7-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
+ ; GFX7-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
+ ; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE3]], [[COPY4]], 4092, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32))
+ ; GFX7-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
+ ; GFX7-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec
+ ; GFX7-NEXT: {{ $}}
+ ; GFX7-NEXT: bb.3:
+ ; GFX7-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
+ ; GFX7-NEXT: {{ $}}
+ ; GFX7-NEXT: bb.4:
+ ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]]
+ ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
; GFX8-LABEL: name: s_buffer_load_f32_vgpr_rsrc_soffset_add_4092
; GFX8: bb.1 (%ir-block.0):
- ; GFX8: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3
- ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
- ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
- ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
- ; GFX8: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2
- ; GFX8: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
- ; GFX8: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
- ; GFX8: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
- ; GFX8: bb.2:
- ; GFX8: successors: %bb.3, %bb.2
- ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec
- ; GFX8: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec
- ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
- ; GFX8: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec
- ; GFX8: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec
- ; GFX8: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec
- ; GFX8: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
- ; GFX8: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec
- ; GFX8: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
- ; GFX8: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
- ; GFX8: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
- ; GFX8: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE3]], [[COPY4]], 4092, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32))
- ; GFX8: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
- ; GFX8: SI_WATERFALL_LOOP %bb.2, implicit $exec
- ; GFX8: bb.3:
- ; GFX8: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
- ; GFX8: bb.4:
- ; GFX8: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]]
- ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0
+ ; GFX8-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3
+ ; GFX8-NEXT: {{ $}}
+ ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
+ ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX8-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
+ ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
+ ; GFX8-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
+ ; GFX8-NEXT: {{ $}}
+ ; GFX8-NEXT: bb.2:
+ ; GFX8-NEXT: successors: %bb.3, %bb.2
+ ; GFX8-NEXT: {{ $}}
+ ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec
+ ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec
+ ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
+ ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec
+ ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec
+ ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec
+ ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
+ ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec
+ ; GFX8-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
+ ; GFX8-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
+ ; GFX8-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE3]], [[COPY4]], 4092, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32))
+ ; GFX8-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
+ ; GFX8-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec
+ ; GFX8-NEXT: {{ $}}
+ ; GFX8-NEXT: bb.3:
+ ; GFX8-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
+ ; GFX8-NEXT: {{ $}}
+ ; GFX8-NEXT: bb.4:
+ ; GFX8-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]]
+ ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
%soffset = add i32 %soffset.base, 4092
%val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %soffset, i32 0)
ret float %val
@@ -2810,115 +2933,130 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_soffset_add_4092(<4 x i32> %
define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_soffset_add_4096(<4 x i32> %rsrc, i32 inreg %soffset.base) {
; GFX6-LABEL: name: s_buffer_load_f32_vgpr_rsrc_soffset_add_4096
; GFX6: bb.1 (%ir-block.0):
- ; GFX6: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3
- ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
- ; GFX6: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
- ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
- ; GFX6: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2
- ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096
- ; GFX6: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY4]], [[S_MOV_B32_]], implicit-def $scc
- ; GFX6: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]]
- ; GFX6: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0
- ; GFX6: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
- ; GFX6: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
- ; GFX6: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
- ; GFX6: bb.2:
- ; GFX6: successors: %bb.3, %bb.2
- ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec
- ; GFX6: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec
- ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
- ; GFX6: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec
- ; GFX6: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec
- ; GFX6: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec
- ; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
- ; GFX6: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec
- ; GFX6: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
- ; GFX6: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
- ; GFX6: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
- ; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32))
- ; GFX6: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
- ; GFX6: SI_WATERFALL_LOOP %bb.2, implicit $exec
- ; GFX6: bb.3:
- ; GFX6: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
- ; GFX6: bb.4:
- ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
- ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0
+ ; GFX6-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3
+ ; GFX6-NEXT: {{ $}}
+ ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
+ ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX6-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096
+ ; GFX6-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY4]], [[S_MOV_B32_]], implicit-def $scc
+ ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]]
+ ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
+ ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
+ ; GFX6-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
+ ; GFX6-NEXT: {{ $}}
+ ; GFX6-NEXT: bb.2:
+ ; GFX6-NEXT: successors: %bb.3, %bb.2
+ ; GFX6-NEXT: {{ $}}
+ ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec
+ ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec
+ ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
+ ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec
+ ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec
+ ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec
+ ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
+ ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec
+ ; GFX6-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
+ ; GFX6-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
+ ; GFX6-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32))
+ ; GFX6-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
+ ; GFX6-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec
+ ; GFX6-NEXT: {{ $}}
+ ; GFX6-NEXT: bb.3:
+ ; GFX6-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
+ ; GFX6-NEXT: {{ $}}
+ ; GFX6-NEXT: bb.4:
+ ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
+ ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
; GFX7-LABEL: name: s_buffer_load_f32_vgpr_rsrc_soffset_add_4096
; GFX7: bb.1 (%ir-block.0):
- ; GFX7: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3
- ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
- ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
- ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
- ; GFX7: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2
- ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096
- ; GFX7: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY4]], [[S_MOV_B32_]], implicit-def $scc
- ; GFX7: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]]
- ; GFX7: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0
- ; GFX7: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
- ; GFX7: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
- ; GFX7: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
- ; GFX7: bb.2:
- ; GFX7: successors: %bb.3, %bb.2
- ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec
- ; GFX7: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec
- ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
- ; GFX7: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec
- ; GFX7: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec
- ; GFX7: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec
- ; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
- ; GFX7: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec
- ; GFX7: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
- ; GFX7: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
- ; GFX7: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
- ; GFX7: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32))
- ; GFX7: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
- ; GFX7: SI_WATERFALL_LOOP %bb.2, implicit $exec
- ; GFX7: bb.3:
- ; GFX7: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
- ; GFX7: bb.4:
- ; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
- ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0
+ ; GFX7-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3
+ ; GFX7-NEXT: {{ $}}
+ ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
+ ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX7-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096
+ ; GFX7-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY4]], [[S_MOV_B32_]], implicit-def $scc
+ ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]]
+ ; GFX7-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
+ ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
+ ; GFX7-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
+ ; GFX7-NEXT: {{ $}}
+ ; GFX7-NEXT: bb.2:
+ ; GFX7-NEXT: successors: %bb.3, %bb.2
+ ; GFX7-NEXT: {{ $}}
+ ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec
+ ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec
+ ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
+ ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec
+ ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec
+ ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec
+ ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
+ ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec
+ ; GFX7-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
+ ; GFX7-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
+ ; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32))
+ ; GFX7-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
+ ; GFX7-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec
+ ; GFX7-NEXT: {{ $}}
+ ; GFX7-NEXT: bb.3:
+ ; GFX7-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
+ ; GFX7-NEXT: {{ $}}
+ ; GFX7-NEXT: bb.4:
+ ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
+ ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
; GFX8-LABEL: name: s_buffer_load_f32_vgpr_rsrc_soffset_add_4096
; GFX8: bb.1 (%ir-block.0):
- ; GFX8: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3
- ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
- ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
- ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
- ; GFX8: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2
- ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096
- ; GFX8: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY4]], [[S_MOV_B32_]], implicit-def $scc
- ; GFX8: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]]
- ; GFX8: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0
- ; GFX8: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
- ; GFX8: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
- ; GFX8: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
- ; GFX8: bb.2:
- ; GFX8: successors: %bb.3, %bb.2
- ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec
- ; GFX8: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec
- ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
- ; GFX8: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec
- ; GFX8: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec
- ; GFX8: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec
- ; GFX8: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
- ; GFX8: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec
- ; GFX8: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
- ; GFX8: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
- ; GFX8: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
- ; GFX8: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32))
- ; GFX8: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
- ; GFX8: SI_WATERFALL_LOOP %bb.2, implicit $exec
- ; GFX8: bb.3:
- ; GFX8: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
- ; GFX8: bb.4:
- ; GFX8: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
- ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0
+ ; GFX8-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3
+ ; GFX8-NEXT: {{ $}}
+ ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
+ ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX8-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096
+ ; GFX8-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY4]], [[S_MOV_B32_]], implicit-def $scc
+ ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]]
+ ; GFX8-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
+ ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
+ ; GFX8-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
+ ; GFX8-NEXT: {{ $}}
+ ; GFX8-NEXT: bb.2:
+ ; GFX8-NEXT: successors: %bb.3, %bb.2
+ ; GFX8-NEXT: {{ $}}
+ ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec
+ ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec
+ ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
+ ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec
+ ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec
+ ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec
+ ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
+ ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec
+ ; GFX8-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
+ ; GFX8-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
+ ; GFX8-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32))
+ ; GFX8-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
+ ; GFX8-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec
+ ; GFX8-NEXT: {{ $}}
+ ; GFX8-NEXT: bb.3:
+ ; GFX8-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
+ ; GFX8-NEXT: {{ $}}
+ ; GFX8-NEXT: bb.4:
+ ; GFX8-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
+ ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
%soffset = add i32 %soffset.base, 4096
%val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %soffset, i32 0)
ret float %val
@@ -2928,103 +3066,118 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_soffset_add_4096(<4 x i32> %
define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_offset_4095(<4 x i32> %rsrc) {
; GFX6-LABEL: name: s_buffer_load_f32_vgpr_rsrc_offset_4095
; GFX6: bb.1 (%ir-block.0):
- ; GFX6: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
- ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
- ; GFX6: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
- ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
- ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
- ; GFX6: [[COPY4:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
- ; GFX6: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
- ; GFX6: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
- ; GFX6: bb.2:
- ; GFX6: successors: %bb.3, %bb.2
- ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub0, implicit $exec
- ; GFX6: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub1, implicit $exec
- ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
- ; GFX6: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY4]], implicit $exec
- ; GFX6: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec
- ; GFX6: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec
- ; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
- ; GFX6: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY5]], implicit $exec
- ; GFX6: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
- ; GFX6: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
- ; GFX6: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
- ; GFX6: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32) from unknown-address + 4095, align 1)
- ; GFX6: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
- ; GFX6: SI_WATERFALL_LOOP %bb.2, implicit $exec
- ; GFX6: bb.3:
- ; GFX6: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
- ; GFX6: bb.4:
- ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]]
- ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0
+ ; GFX6-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
+ ; GFX6-NEXT: {{ $}}
+ ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
+ ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
+ ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
+ ; GFX6-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
+ ; GFX6-NEXT: {{ $}}
+ ; GFX6-NEXT: bb.2:
+ ; GFX6-NEXT: successors: %bb.3, %bb.2
+ ; GFX6-NEXT: {{ $}}
+ ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub0, implicit $exec
+ ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub1, implicit $exec
+ ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
+ ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY4]], implicit $exec
+ ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec
+ ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec
+ ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
+ ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY5]], implicit $exec
+ ; GFX6-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
+ ; GFX6-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
+ ; GFX6-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32) from unknown-address + 4095, align 1)
+ ; GFX6-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
+ ; GFX6-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec
+ ; GFX6-NEXT: {{ $}}
+ ; GFX6-NEXT: bb.3:
+ ; GFX6-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
+ ; GFX6-NEXT: {{ $}}
+ ; GFX6-NEXT: bb.4:
+ ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]]
+ ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
; GFX7-LABEL: name: s_buffer_load_f32_vgpr_rsrc_offset_4095
; GFX7: bb.1 (%ir-block.0):
- ; GFX7: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
- ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
- ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
- ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
- ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
- ; GFX7: [[COPY4:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
- ; GFX7: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
- ; GFX7: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
- ; GFX7: bb.2:
- ; GFX7: successors: %bb.3, %bb.2
- ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub0, implicit $exec
- ; GFX7: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub1, implicit $exec
- ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
- ; GFX7: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY4]], implicit $exec
- ; GFX7: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec
- ; GFX7: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec
- ; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
- ; GFX7: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY5]], implicit $exec
- ; GFX7: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
- ; GFX7: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
- ; GFX7: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
- ; GFX7: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32) from unknown-address + 4095, align 1)
- ; GFX7: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
- ; GFX7: SI_WATERFALL_LOOP %bb.2, implicit $exec
- ; GFX7: bb.3:
- ; GFX7: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
- ; GFX7: bb.4:
- ; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]]
- ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0
+ ; GFX7-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
+ ; GFX7-NEXT: {{ $}}
+ ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
+ ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
+ ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
+ ; GFX7-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
+ ; GFX7-NEXT: {{ $}}
+ ; GFX7-NEXT: bb.2:
+ ; GFX7-NEXT: successors: %bb.3, %bb.2
+ ; GFX7-NEXT: {{ $}}
+ ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub0, implicit $exec
+ ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub1, implicit $exec
+ ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
+ ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY4]], implicit $exec
+ ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec
+ ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec
+ ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
+ ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY5]], implicit $exec
+ ; GFX7-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
+ ; GFX7-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
+ ; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32) from unknown-address + 4095, align 1)
+ ; GFX7-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
+ ; GFX7-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec
+ ; GFX7-NEXT: {{ $}}
+ ; GFX7-NEXT: bb.3:
+ ; GFX7-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
+ ; GFX7-NEXT: {{ $}}
+ ; GFX7-NEXT: bb.4:
+ ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]]
+ ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
; GFX8-LABEL: name: s_buffer_load_f32_vgpr_rsrc_offset_4095
; GFX8: bb.1 (%ir-block.0):
- ; GFX8: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
- ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
- ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
- ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
- ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
- ; GFX8: [[COPY4:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
- ; GFX8: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
- ; GFX8: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
- ; GFX8: bb.2:
- ; GFX8: successors: %bb.3, %bb.2
- ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub0, implicit $exec
- ; GFX8: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub1, implicit $exec
- ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
- ; GFX8: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY4]], implicit $exec
- ; GFX8: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec
- ; GFX8: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec
- ; GFX8: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
- ; GFX8: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY5]], implicit $exec
- ; GFX8: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
- ; GFX8: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
- ; GFX8: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
- ; GFX8: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32) from unknown-address + 4095, align 1)
- ; GFX8: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
- ; GFX8: SI_WATERFALL_LOOP %bb.2, implicit $exec
- ; GFX8: bb.3:
- ; GFX8: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
- ; GFX8: bb.4:
- ; GFX8: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]]
- ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0
+ ; GFX8-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
+ ; GFX8-NEXT: {{ $}}
+ ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
+ ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
+ ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
+ ; GFX8-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
+ ; GFX8-NEXT: {{ $}}
+ ; GFX8-NEXT: bb.2:
+ ; GFX8-NEXT: successors: %bb.3, %bb.2
+ ; GFX8-NEXT: {{ $}}
+ ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub0, implicit $exec
+ ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub1, implicit $exec
+ ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
+ ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY4]], implicit $exec
+ ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec
+ ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec
+ ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
+ ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY5]], implicit $exec
+ ; GFX8-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
+ ; GFX8-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
+ ; GFX8-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32) from unknown-address + 4095, align 1)
+ ; GFX8-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
+ ; GFX8-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec
+ ; GFX8-NEXT: {{ $}}
+ ; GFX8-NEXT: bb.3:
+ ; GFX8-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
+ ; GFX8-NEXT: {{ $}}
+ ; GFX8-NEXT: bb.4:
+ ; GFX8-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]]
+ ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
%val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 4095, i32 0)
ret float %val
}
@@ -3033,107 +3186,122 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_offset_4095(<4 x i32> %rsrc)
define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_offset_4096(<4 x i32> %rsrc) {
; GFX6-LABEL: name: s_buffer_load_f32_vgpr_rsrc_offset_4096
; GFX6: bb.1 (%ir-block.0):
- ; GFX6: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
- ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
- ; GFX6: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
- ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
- ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096
- ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
- ; GFX6: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0
- ; GFX6: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
- ; GFX6: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
- ; GFX6: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
- ; GFX6: bb.2:
- ; GFX6: successors: %bb.3, %bb.2
- ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec
- ; GFX6: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec
- ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
- ; GFX6: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec
- ; GFX6: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec
- ; GFX6: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec
- ; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
- ; GFX6: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec
- ; GFX6: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
- ; GFX6: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
- ; GFX6: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
- ; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32))
- ; GFX6: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
- ; GFX6: SI_WATERFALL_LOOP %bb.2, implicit $exec
- ; GFX6: bb.3:
- ; GFX6: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
- ; GFX6: bb.4:
- ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
- ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0
+ ; GFX6-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
+ ; GFX6-NEXT: {{ $}}
+ ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
+ ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096
+ ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
+ ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
+ ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
+ ; GFX6-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
+ ; GFX6-NEXT: {{ $}}
+ ; GFX6-NEXT: bb.2:
+ ; GFX6-NEXT: successors: %bb.3, %bb.2
+ ; GFX6-NEXT: {{ $}}
+ ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec
+ ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec
+ ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
+ ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec
+ ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec
+ ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec
+ ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
+ ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec
+ ; GFX6-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
+ ; GFX6-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
+ ; GFX6-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32))
+ ; GFX6-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
+ ; GFX6-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec
+ ; GFX6-NEXT: {{ $}}
+ ; GFX6-NEXT: bb.3:
+ ; GFX6-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
+ ; GFX6-NEXT: {{ $}}
+ ; GFX6-NEXT: bb.4:
+ ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
+ ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
; GFX7-LABEL: name: s_buffer_load_f32_vgpr_rsrc_offset_4096
; GFX7: bb.1 (%ir-block.0):
- ; GFX7: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
- ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
- ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
- ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
- ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096
- ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
- ; GFX7: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0
- ; GFX7: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
- ; GFX7: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
- ; GFX7: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
- ; GFX7: bb.2:
- ; GFX7: successors: %bb.3, %bb.2
- ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec
- ; GFX7: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec
- ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
- ; GFX7: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec
- ; GFX7: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec
- ; GFX7: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec
- ; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
- ; GFX7: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec
- ; GFX7: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
- ; GFX7: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
- ; GFX7: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
- ; GFX7: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32))
- ; GFX7: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
- ; GFX7: SI_WATERFALL_LOOP %bb.2, implicit $exec
- ; GFX7: bb.3:
- ; GFX7: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
- ; GFX7: bb.4:
- ; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
- ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0
+ ; GFX7-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
+ ; GFX7-NEXT: {{ $}}
+ ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
+ ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096
+ ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
+ ; GFX7-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
+ ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
+ ; GFX7-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
+ ; GFX7-NEXT: {{ $}}
+ ; GFX7-NEXT: bb.2:
+ ; GFX7-NEXT: successors: %bb.3, %bb.2
+ ; GFX7-NEXT: {{ $}}
+ ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec
+ ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec
+ ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
+ ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec
+ ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec
+ ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec
+ ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
+ ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec
+ ; GFX7-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
+ ; GFX7-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
+ ; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32))
+ ; GFX7-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
+ ; GFX7-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec
+ ; GFX7-NEXT: {{ $}}
+ ; GFX7-NEXT: bb.3:
+ ; GFX7-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
+ ; GFX7-NEXT: {{ $}}
+ ; GFX7-NEXT: bb.4:
+ ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
+ ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
; GFX8-LABEL: name: s_buffer_load_f32_vgpr_rsrc_offset_4096
; GFX8: bb.1 (%ir-block.0):
- ; GFX8: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
- ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
- ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
- ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
- ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1
- ; GFX8: [[COPY4:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
- ; GFX8: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
- ; GFX8: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
- ; GFX8: bb.2:
- ; GFX8: successors: %bb.3, %bb.2
- ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub0, implicit $exec
- ; GFX8: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub1, implicit $exec
- ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
- ; GFX8: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY4]], implicit $exec
- ; GFX8: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec
- ; GFX8: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec
- ; GFX8: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
- ; GFX8: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY5]], implicit $exec
- ; GFX8: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
- ; GFX8: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
- ; GFX8: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
- ; GFX8: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32) from unknown-address + 4096)
- ; GFX8: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
- ; GFX8: SI_WATERFALL_LOOP %bb.2, implicit $exec
- ; GFX8: bb.3:
- ; GFX8: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
- ; GFX8: bb.4:
- ; GFX8: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]]
- ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0
+ ; GFX8-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
+ ; GFX8-NEXT: {{ $}}
+ ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
+ ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1
+ ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
+ ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
+ ; GFX8-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
+ ; GFX8-NEXT: {{ $}}
+ ; GFX8-NEXT: bb.2:
+ ; GFX8-NEXT: successors: %bb.3, %bb.2
+ ; GFX8-NEXT: {{ $}}
+ ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub0, implicit $exec
+ ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub1, implicit $exec
+ ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
+ ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY4]], implicit $exec
+ ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec
+ ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec
+ ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
+ ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY5]], implicit $exec
+ ; GFX8-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
+ ; GFX8-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
+ ; GFX8-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32) from unknown-address + 4096)
+ ; GFX8-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
+ ; GFX8-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec
+ ; GFX8-NEXT: {{ $}}
+ ; GFX8-NEXT: bb.3:
+ ; GFX8-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
+ ; GFX8-NEXT: {{ $}}
+ ; GFX8-NEXT: bb.4:
+ ; GFX8-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]]
+ ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
%val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 4096, i32 0)
ret float %val
}
@@ -3143,154 +3311,169 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_offset_4096(<4 x i32> %rsrc)
define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4064(<4 x i32> %rsrc, i32 inreg %soffset.base) {
; GFX6-LABEL: name: s_buffer_load_v8f32_vgpr_rsrc_add_4064
; GFX6: bb.1 (%ir-block.0):
- ; GFX6: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3
- ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
- ; GFX6: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
- ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
- ; GFX6: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2
- ; GFX6: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
- ; GFX6: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
- ; GFX6: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
- ; GFX6: bb.2:
- ; GFX6: successors: %bb.3, %bb.2
- ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec
- ; GFX6: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec
- ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
- ; GFX6: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec
- ; GFX6: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec
- ; GFX6: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec
- ; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
- ; GFX6: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec
- ; GFX6: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
- ; GFX6: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
- ; GFX6: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
- ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[COPY4]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
- ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[COPY4]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
- ; GFX6: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
- ; GFX6: SI_WATERFALL_LOOP %bb.2, implicit $exec
- ; GFX6: bb.3:
- ; GFX6: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
- ; GFX6: bb.4:
- ; GFX6: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFSET]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFSET1]], %subreg.sub4_sub5_sub6_sub7
- ; GFX6: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0
- ; GFX6: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1
- ; GFX6: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2
- ; GFX6: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3
- ; GFX6: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4
- ; GFX6: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5
- ; GFX6: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6
- ; GFX6: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7
- ; GFX6: $vgpr0 = COPY [[COPY7]]
- ; GFX6: $vgpr1 = COPY [[COPY8]]
- ; GFX6: $vgpr2 = COPY [[COPY9]]
- ; GFX6: $vgpr3 = COPY [[COPY10]]
- ; GFX6: $vgpr4 = COPY [[COPY11]]
- ; GFX6: $vgpr5 = COPY [[COPY12]]
- ; GFX6: $vgpr6 = COPY [[COPY13]]
- ; GFX6: $vgpr7 = COPY [[COPY14]]
- ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
+ ; GFX6-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3
+ ; GFX6-NEXT: {{ $}}
+ ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
+ ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX6-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
+ ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
+ ; GFX6-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
+ ; GFX6-NEXT: {{ $}}
+ ; GFX6-NEXT: bb.2:
+ ; GFX6-NEXT: successors: %bb.3, %bb.2
+ ; GFX6-NEXT: {{ $}}
+ ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec
+ ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec
+ ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
+ ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec
+ ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec
+ ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec
+ ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
+ ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec
+ ; GFX6-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
+ ; GFX6-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
+ ; GFX6-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[COPY4]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
+ ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[COPY4]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
+ ; GFX6-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
+ ; GFX6-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec
+ ; GFX6-NEXT: {{ $}}
+ ; GFX6-NEXT: bb.3:
+ ; GFX6-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
+ ; GFX6-NEXT: {{ $}}
+ ; GFX6-NEXT: bb.4:
+ ; GFX6-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFSET]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFSET1]], %subreg.sub4_sub5_sub6_sub7
+ ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0
+ ; GFX6-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1
+ ; GFX6-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2
+ ; GFX6-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3
+ ; GFX6-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4
+ ; GFX6-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5
+ ; GFX6-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6
+ ; GFX6-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7
+ ; GFX6-NEXT: $vgpr0 = COPY [[COPY7]]
+ ; GFX6-NEXT: $vgpr1 = COPY [[COPY8]]
+ ; GFX6-NEXT: $vgpr2 = COPY [[COPY9]]
+ ; GFX6-NEXT: $vgpr3 = COPY [[COPY10]]
+ ; GFX6-NEXT: $vgpr4 = COPY [[COPY11]]
+ ; GFX6-NEXT: $vgpr5 = COPY [[COPY12]]
+ ; GFX6-NEXT: $vgpr6 = COPY [[COPY13]]
+ ; GFX6-NEXT: $vgpr7 = COPY [[COPY14]]
+ ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
; GFX7-LABEL: name: s_buffer_load_v8f32_vgpr_rsrc_add_4064
; GFX7: bb.1 (%ir-block.0):
- ; GFX7: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3
- ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
- ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
- ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
- ; GFX7: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2
- ; GFX7: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
- ; GFX7: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
- ; GFX7: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
- ; GFX7: bb.2:
- ; GFX7: successors: %bb.3, %bb.2
- ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec
- ; GFX7: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec
- ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
- ; GFX7: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec
- ; GFX7: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec
- ; GFX7: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec
- ; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
- ; GFX7: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec
- ; GFX7: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
- ; GFX7: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
- ; GFX7: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
- ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[COPY4]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
- ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[COPY4]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
- ; GFX7: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
- ; GFX7: SI_WATERFALL_LOOP %bb.2, implicit $exec
- ; GFX7: bb.3:
- ; GFX7: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
- ; GFX7: bb.4:
- ; GFX7: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFSET]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFSET1]], %subreg.sub4_sub5_sub6_sub7
- ; GFX7: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0
- ; GFX7: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1
- ; GFX7: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2
- ; GFX7: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3
- ; GFX7: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4
- ; GFX7: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5
- ; GFX7: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6
- ; GFX7: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7
- ; GFX7: $vgpr0 = COPY [[COPY7]]
- ; GFX7: $vgpr1 = COPY [[COPY8]]
- ; GFX7: $vgpr2 = COPY [[COPY9]]
- ; GFX7: $vgpr3 = COPY [[COPY10]]
- ; GFX7: $vgpr4 = COPY [[COPY11]]
- ; GFX7: $vgpr5 = COPY [[COPY12]]
- ; GFX7: $vgpr6 = COPY [[COPY13]]
- ; GFX7: $vgpr7 = COPY [[COPY14]]
- ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
+ ; GFX7-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3
+ ; GFX7-NEXT: {{ $}}
+ ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
+ ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX7-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
+ ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
+ ; GFX7-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
+ ; GFX7-NEXT: {{ $}}
+ ; GFX7-NEXT: bb.2:
+ ; GFX7-NEXT: successors: %bb.3, %bb.2
+ ; GFX7-NEXT: {{ $}}
+ ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec
+ ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec
+ ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
+ ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec
+ ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec
+ ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec
+ ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
+ ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec
+ ; GFX7-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
+ ; GFX7-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
+ ; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[COPY4]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
+ ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[COPY4]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
+ ; GFX7-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
+ ; GFX7-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec
+ ; GFX7-NEXT: {{ $}}
+ ; GFX7-NEXT: bb.3:
+ ; GFX7-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
+ ; GFX7-NEXT: {{ $}}
+ ; GFX7-NEXT: bb.4:
+ ; GFX7-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFSET]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFSET1]], %subreg.sub4_sub5_sub6_sub7
+ ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0
+ ; GFX7-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1
+ ; GFX7-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2
+ ; GFX7-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3
+ ; GFX7-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4
+ ; GFX7-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5
+ ; GFX7-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6
+ ; GFX7-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7
+ ; GFX7-NEXT: $vgpr0 = COPY [[COPY7]]
+ ; GFX7-NEXT: $vgpr1 = COPY [[COPY8]]
+ ; GFX7-NEXT: $vgpr2 = COPY [[COPY9]]
+ ; GFX7-NEXT: $vgpr3 = COPY [[COPY10]]
+ ; GFX7-NEXT: $vgpr4 = COPY [[COPY11]]
+ ; GFX7-NEXT: $vgpr5 = COPY [[COPY12]]
+ ; GFX7-NEXT: $vgpr6 = COPY [[COPY13]]
+ ; GFX7-NEXT: $vgpr7 = COPY [[COPY14]]
+ ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
; GFX8-LABEL: name: s_buffer_load_v8f32_vgpr_rsrc_add_4064
; GFX8: bb.1 (%ir-block.0):
- ; GFX8: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3
- ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
- ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
- ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
- ; GFX8: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2
- ; GFX8: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
- ; GFX8: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
- ; GFX8: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
- ; GFX8: bb.2:
- ; GFX8: successors: %bb.3, %bb.2
- ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec
- ; GFX8: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec
- ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
- ; GFX8: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec
- ; GFX8: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec
- ; GFX8: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec
- ; GFX8: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
- ; GFX8: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec
- ; GFX8: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
- ; GFX8: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
- ; GFX8: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
- ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[COPY4]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
- ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[COPY4]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
- ; GFX8: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
- ; GFX8: SI_WATERFALL_LOOP %bb.2, implicit $exec
- ; GFX8: bb.3:
- ; GFX8: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
- ; GFX8: bb.4:
- ; GFX8: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFSET]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFSET1]], %subreg.sub4_sub5_sub6_sub7
- ; GFX8: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0
- ; GFX8: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1
- ; GFX8: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2
- ; GFX8: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3
- ; GFX8: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4
- ; GFX8: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5
- ; GFX8: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6
- ; GFX8: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7
- ; GFX8: $vgpr0 = COPY [[COPY7]]
- ; GFX8: $vgpr1 = COPY [[COPY8]]
- ; GFX8: $vgpr2 = COPY [[COPY9]]
- ; GFX8: $vgpr3 = COPY [[COPY10]]
- ; GFX8: $vgpr4 = COPY [[COPY11]]
- ; GFX8: $vgpr5 = COPY [[COPY12]]
- ; GFX8: $vgpr6 = COPY [[COPY13]]
- ; GFX8: $vgpr7 = COPY [[COPY14]]
- ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
+ ; GFX8-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3
+ ; GFX8-NEXT: {{ $}}
+ ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
+ ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX8-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
+ ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
+ ; GFX8-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
+ ; GFX8-NEXT: {{ $}}
+ ; GFX8-NEXT: bb.2:
+ ; GFX8-NEXT: successors: %bb.3, %bb.2
+ ; GFX8-NEXT: {{ $}}
+ ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec
+ ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec
+ ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
+ ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec
+ ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec
+ ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec
+ ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
+ ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec
+ ; GFX8-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
+ ; GFX8-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
+ ; GFX8-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[COPY4]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
+ ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[COPY4]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
+ ; GFX8-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
+ ; GFX8-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec
+ ; GFX8-NEXT: {{ $}}
+ ; GFX8-NEXT: bb.3:
+ ; GFX8-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
+ ; GFX8-NEXT: {{ $}}
+ ; GFX8-NEXT: bb.4:
+ ; GFX8-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFSET]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFSET1]], %subreg.sub4_sub5_sub6_sub7
+ ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0
+ ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1
+ ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2
+ ; GFX8-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3
+ ; GFX8-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4
+ ; GFX8-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5
+ ; GFX8-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6
+ ; GFX8-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7
+ ; GFX8-NEXT: $vgpr0 = COPY [[COPY7]]
+ ; GFX8-NEXT: $vgpr1 = COPY [[COPY8]]
+ ; GFX8-NEXT: $vgpr2 = COPY [[COPY9]]
+ ; GFX8-NEXT: $vgpr3 = COPY [[COPY10]]
+ ; GFX8-NEXT: $vgpr4 = COPY [[COPY11]]
+ ; GFX8-NEXT: $vgpr5 = COPY [[COPY12]]
+ ; GFX8-NEXT: $vgpr6 = COPY [[COPY13]]
+ ; GFX8-NEXT: $vgpr7 = COPY [[COPY14]]
+ ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
%soffset = add i32 %soffset.base, 4064
%val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0)
ret <8 x float> %val
@@ -3301,166 +3484,181 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4064(<4 x i32> %
define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4068(<4 x i32> %rsrc, i32 inreg %soffset.base) {
; GFX6-LABEL: name: s_buffer_load_v8f32_vgpr_rsrc_add_4068
; GFX6: bb.1 (%ir-block.0):
- ; GFX6: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3
- ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
- ; GFX6: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
- ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
- ; GFX6: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2
- ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4068
- ; GFX6: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY4]], [[S_MOV_B32_]], implicit-def $scc
- ; GFX6: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]]
- ; GFX6: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0
- ; GFX6: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
- ; GFX6: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
- ; GFX6: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
- ; GFX6: bb.2:
- ; GFX6: successors: %bb.3, %bb.2
- ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec
- ; GFX6: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec
- ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
- ; GFX6: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec
- ; GFX6: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec
- ; GFX6: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec
- ; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
- ; GFX6: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec
- ; GFX6: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
- ; GFX6: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
- ; GFX6: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
- ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
- ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
- ; GFX6: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
- ; GFX6: SI_WATERFALL_LOOP %bb.2, implicit $exec
- ; GFX6: bb.3:
- ; GFX6: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
- ; GFX6: bb.4:
- ; GFX6: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7
- ; GFX6: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0
- ; GFX6: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1
- ; GFX6: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2
- ; GFX6: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3
- ; GFX6: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4
- ; GFX6: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5
- ; GFX6: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6
- ; GFX6: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7
- ; GFX6: $vgpr0 = COPY [[COPY8]]
- ; GFX6: $vgpr1 = COPY [[COPY9]]
- ; GFX6: $vgpr2 = COPY [[COPY10]]
- ; GFX6: $vgpr3 = COPY [[COPY11]]
- ; GFX6: $vgpr4 = COPY [[COPY12]]
- ; GFX6: $vgpr5 = COPY [[COPY13]]
- ; GFX6: $vgpr6 = COPY [[COPY14]]
- ; GFX6: $vgpr7 = COPY [[COPY15]]
- ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
+ ; GFX6-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3
+ ; GFX6-NEXT: {{ $}}
+ ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
+ ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX6-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4068
+ ; GFX6-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY4]], [[S_MOV_B32_]], implicit-def $scc
+ ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]]
+ ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
+ ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
+ ; GFX6-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
+ ; GFX6-NEXT: {{ $}}
+ ; GFX6-NEXT: bb.2:
+ ; GFX6-NEXT: successors: %bb.3, %bb.2
+ ; GFX6-NEXT: {{ $}}
+ ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec
+ ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec
+ ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
+ ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec
+ ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec
+ ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec
+ ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
+ ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec
+ ; GFX6-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
+ ; GFX6-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
+ ; GFX6-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
+ ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
+ ; GFX6-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
+ ; GFX6-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec
+ ; GFX6-NEXT: {{ $}}
+ ; GFX6-NEXT: bb.3:
+ ; GFX6-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
+ ; GFX6-NEXT: {{ $}}
+ ; GFX6-NEXT: bb.4:
+ ; GFX6-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7
+ ; GFX6-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0
+ ; GFX6-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1
+ ; GFX6-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2
+ ; GFX6-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3
+ ; GFX6-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4
+ ; GFX6-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5
+ ; GFX6-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6
+ ; GFX6-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7
+ ; GFX6-NEXT: $vgpr0 = COPY [[COPY8]]
+ ; GFX6-NEXT: $vgpr1 = COPY [[COPY9]]
+ ; GFX6-NEXT: $vgpr2 = COPY [[COPY10]]
+ ; GFX6-NEXT: $vgpr3 = COPY [[COPY11]]
+ ; GFX6-NEXT: $vgpr4 = COPY [[COPY12]]
+ ; GFX6-NEXT: $vgpr5 = COPY [[COPY13]]
+ ; GFX6-NEXT: $vgpr6 = COPY [[COPY14]]
+ ; GFX6-NEXT: $vgpr7 = COPY [[COPY15]]
+ ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
; GFX7-LABEL: name: s_buffer_load_v8f32_vgpr_rsrc_add_4068
; GFX7: bb.1 (%ir-block.0):
- ; GFX7: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3
- ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
- ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
- ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
- ; GFX7: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2
- ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4068
- ; GFX7: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY4]], [[S_MOV_B32_]], implicit-def $scc
- ; GFX7: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]]
- ; GFX7: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0
- ; GFX7: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
- ; GFX7: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
- ; GFX7: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
- ; GFX7: bb.2:
- ; GFX7: successors: %bb.3, %bb.2
- ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec
- ; GFX7: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec
- ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
- ; GFX7: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec
- ; GFX7: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec
- ; GFX7: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec
- ; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
- ; GFX7: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec
- ; GFX7: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
- ; GFX7: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
- ; GFX7: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
- ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
- ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
- ; GFX7: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
- ; GFX7: SI_WATERFALL_LOOP %bb.2, implicit $exec
- ; GFX7: bb.3:
- ; GFX7: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
- ; GFX7: bb.4:
- ; GFX7: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7
- ; GFX7: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0
- ; GFX7: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1
- ; GFX7: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2
- ; GFX7: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3
- ; GFX7: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4
- ; GFX7: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5
- ; GFX7: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6
- ; GFX7: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7
- ; GFX7: $vgpr0 = COPY [[COPY8]]
- ; GFX7: $vgpr1 = COPY [[COPY9]]
- ; GFX7: $vgpr2 = COPY [[COPY10]]
- ; GFX7: $vgpr3 = COPY [[COPY11]]
- ; GFX7: $vgpr4 = COPY [[COPY12]]
- ; GFX7: $vgpr5 = COPY [[COPY13]]
- ; GFX7: $vgpr6 = COPY [[COPY14]]
- ; GFX7: $vgpr7 = COPY [[COPY15]]
- ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
+ ; GFX7-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3
+ ; GFX7-NEXT: {{ $}}
+ ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
+ ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX7-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4068
+ ; GFX7-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY4]], [[S_MOV_B32_]], implicit-def $scc
+ ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]]
+ ; GFX7-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
+ ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
+ ; GFX7-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
+ ; GFX7-NEXT: {{ $}}
+ ; GFX7-NEXT: bb.2:
+ ; GFX7-NEXT: successors: %bb.3, %bb.2
+ ; GFX7-NEXT: {{ $}}
+ ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec
+ ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec
+ ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
+ ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec
+ ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec
+ ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec
+ ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
+ ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec
+ ; GFX7-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
+ ; GFX7-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
+ ; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
+ ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
+ ; GFX7-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
+ ; GFX7-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec
+ ; GFX7-NEXT: {{ $}}
+ ; GFX7-NEXT: bb.3:
+ ; GFX7-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
+ ; GFX7-NEXT: {{ $}}
+ ; GFX7-NEXT: bb.4:
+ ; GFX7-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7
+ ; GFX7-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0
+ ; GFX7-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1
+ ; GFX7-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2
+ ; GFX7-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3
+ ; GFX7-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4
+ ; GFX7-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5
+ ; GFX7-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6
+ ; GFX7-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7
+ ; GFX7-NEXT: $vgpr0 = COPY [[COPY8]]
+ ; GFX7-NEXT: $vgpr1 = COPY [[COPY9]]
+ ; GFX7-NEXT: $vgpr2 = COPY [[COPY10]]
+ ; GFX7-NEXT: $vgpr3 = COPY [[COPY11]]
+ ; GFX7-NEXT: $vgpr4 = COPY [[COPY12]]
+ ; GFX7-NEXT: $vgpr5 = COPY [[COPY13]]
+ ; GFX7-NEXT: $vgpr6 = COPY [[COPY14]]
+ ; GFX7-NEXT: $vgpr7 = COPY [[COPY15]]
+ ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
; GFX8-LABEL: name: s_buffer_load_v8f32_vgpr_rsrc_add_4068
; GFX8: bb.1 (%ir-block.0):
- ; GFX8: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3
- ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
- ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
- ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
- ; GFX8: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2
- ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4068
- ; GFX8: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY4]], [[S_MOV_B32_]], implicit-def $scc
- ; GFX8: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]]
- ; GFX8: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0
- ; GFX8: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
- ; GFX8: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
- ; GFX8: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
- ; GFX8: bb.2:
- ; GFX8: successors: %bb.3, %bb.2
- ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec
- ; GFX8: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec
- ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
- ; GFX8: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec
- ; GFX8: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec
- ; GFX8: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec
- ; GFX8: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
- ; GFX8: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec
- ; GFX8: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
- ; GFX8: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
- ; GFX8: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
- ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
- ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
- ; GFX8: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
- ; GFX8: SI_WATERFALL_LOOP %bb.2, implicit $exec
- ; GFX8: bb.3:
- ; GFX8: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
- ; GFX8: bb.4:
- ; GFX8: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7
- ; GFX8: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0
- ; GFX8: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1
- ; GFX8: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2
- ; GFX8: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3
- ; GFX8: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4
- ; GFX8: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5
- ; GFX8: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6
- ; GFX8: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7
- ; GFX8: $vgpr0 = COPY [[COPY8]]
- ; GFX8: $vgpr1 = COPY [[COPY9]]
- ; GFX8: $vgpr2 = COPY [[COPY10]]
- ; GFX8: $vgpr3 = COPY [[COPY11]]
- ; GFX8: $vgpr4 = COPY [[COPY12]]
- ; GFX8: $vgpr5 = COPY [[COPY13]]
- ; GFX8: $vgpr6 = COPY [[COPY14]]
- ; GFX8: $vgpr7 = COPY [[COPY15]]
- ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
+ ; GFX8-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3
+ ; GFX8-NEXT: {{ $}}
+ ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
+ ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX8-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4068
+ ; GFX8-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY4]], [[S_MOV_B32_]], implicit-def $scc
+ ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]]
+ ; GFX8-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
+ ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
+ ; GFX8-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
+ ; GFX8-NEXT: {{ $}}
+ ; GFX8-NEXT: bb.2:
+ ; GFX8-NEXT: successors: %bb.3, %bb.2
+ ; GFX8-NEXT: {{ $}}
+ ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec
+ ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec
+ ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
+ ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec
+ ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec
+ ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec
+ ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
+ ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec
+ ; GFX8-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
+ ; GFX8-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
+ ; GFX8-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
+ ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
+ ; GFX8-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
+ ; GFX8-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec
+ ; GFX8-NEXT: {{ $}}
+ ; GFX8-NEXT: bb.3:
+ ; GFX8-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
+ ; GFX8-NEXT: {{ $}}
+ ; GFX8-NEXT: bb.4:
+ ; GFX8-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7
+ ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0
+ ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1
+ ; GFX8-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2
+ ; GFX8-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3
+ ; GFX8-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4
+ ; GFX8-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5
+ ; GFX8-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6
+ ; GFX8-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7
+ ; GFX8-NEXT: $vgpr0 = COPY [[COPY8]]
+ ; GFX8-NEXT: $vgpr1 = COPY [[COPY9]]
+ ; GFX8-NEXT: $vgpr2 = COPY [[COPY10]]
+ ; GFX8-NEXT: $vgpr3 = COPY [[COPY11]]
+ ; GFX8-NEXT: $vgpr4 = COPY [[COPY12]]
+ ; GFX8-NEXT: $vgpr5 = COPY [[COPY13]]
+ ; GFX8-NEXT: $vgpr6 = COPY [[COPY14]]
+ ; GFX8-NEXT: $vgpr7 = COPY [[COPY15]]
+ ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
%soffset = add i32 %soffset.base, 4068
%val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0)
ret <8 x float> %val
@@ -3469,166 +3667,181 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4068(<4 x i32> %
define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4096(<4 x i32> %rsrc, i32 inreg %soffset.base) {
; GFX6-LABEL: name: s_buffer_load_v8f32_vgpr_rsrc_add_4096
; GFX6: bb.1 (%ir-block.0):
- ; GFX6: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3
- ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
- ; GFX6: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
- ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
- ; GFX6: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2
- ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096
- ; GFX6: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY4]], [[S_MOV_B32_]], implicit-def $scc
- ; GFX6: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]]
- ; GFX6: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0
- ; GFX6: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
- ; GFX6: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
- ; GFX6: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
- ; GFX6: bb.2:
- ; GFX6: successors: %bb.3, %bb.2
- ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec
- ; GFX6: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec
- ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
- ; GFX6: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec
- ; GFX6: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec
- ; GFX6: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec
- ; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
- ; GFX6: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec
- ; GFX6: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
- ; GFX6: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
- ; GFX6: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
- ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
- ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
- ; GFX6: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
- ; GFX6: SI_WATERFALL_LOOP %bb.2, implicit $exec
- ; GFX6: bb.3:
- ; GFX6: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
- ; GFX6: bb.4:
- ; GFX6: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7
- ; GFX6: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0
- ; GFX6: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1
- ; GFX6: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2
- ; GFX6: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3
- ; GFX6: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4
- ; GFX6: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5
- ; GFX6: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6
- ; GFX6: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7
- ; GFX6: $vgpr0 = COPY [[COPY8]]
- ; GFX6: $vgpr1 = COPY [[COPY9]]
- ; GFX6: $vgpr2 = COPY [[COPY10]]
- ; GFX6: $vgpr3 = COPY [[COPY11]]
- ; GFX6: $vgpr4 = COPY [[COPY12]]
- ; GFX6: $vgpr5 = COPY [[COPY13]]
- ; GFX6: $vgpr6 = COPY [[COPY14]]
- ; GFX6: $vgpr7 = COPY [[COPY15]]
- ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
+ ; GFX6-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3
+ ; GFX6-NEXT: {{ $}}
+ ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
+ ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX6-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096
+ ; GFX6-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY4]], [[S_MOV_B32_]], implicit-def $scc
+ ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]]
+ ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
+ ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
+ ; GFX6-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
+ ; GFX6-NEXT: {{ $}}
+ ; GFX6-NEXT: bb.2:
+ ; GFX6-NEXT: successors: %bb.3, %bb.2
+ ; GFX6-NEXT: {{ $}}
+ ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec
+ ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec
+ ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
+ ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec
+ ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec
+ ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec
+ ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
+ ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec
+ ; GFX6-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
+ ; GFX6-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
+ ; GFX6-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
+ ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
+ ; GFX6-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
+ ; GFX6-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec
+ ; GFX6-NEXT: {{ $}}
+ ; GFX6-NEXT: bb.3:
+ ; GFX6-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
+ ; GFX6-NEXT: {{ $}}
+ ; GFX6-NEXT: bb.4:
+ ; GFX6-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7
+ ; GFX6-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0
+ ; GFX6-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1
+ ; GFX6-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2
+ ; GFX6-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3
+ ; GFX6-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4
+ ; GFX6-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5
+ ; GFX6-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6
+ ; GFX6-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7
+ ; GFX6-NEXT: $vgpr0 = COPY [[COPY8]]
+ ; GFX6-NEXT: $vgpr1 = COPY [[COPY9]]
+ ; GFX6-NEXT: $vgpr2 = COPY [[COPY10]]
+ ; GFX6-NEXT: $vgpr3 = COPY [[COPY11]]
+ ; GFX6-NEXT: $vgpr4 = COPY [[COPY12]]
+ ; GFX6-NEXT: $vgpr5 = COPY [[COPY13]]
+ ; GFX6-NEXT: $vgpr6 = COPY [[COPY14]]
+ ; GFX6-NEXT: $vgpr7 = COPY [[COPY15]]
+ ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
; GFX7-LABEL: name: s_buffer_load_v8f32_vgpr_rsrc_add_4096
; GFX7: bb.1 (%ir-block.0):
- ; GFX7: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3
- ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
- ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
- ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
- ; GFX7: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2
- ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096
- ; GFX7: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY4]], [[S_MOV_B32_]], implicit-def $scc
- ; GFX7: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]]
- ; GFX7: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0
- ; GFX7: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
- ; GFX7: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
- ; GFX7: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
- ; GFX7: bb.2:
- ; GFX7: successors: %bb.3, %bb.2
- ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec
- ; GFX7: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec
- ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
- ; GFX7: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec
- ; GFX7: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec
- ; GFX7: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec
- ; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
- ; GFX7: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec
- ; GFX7: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
- ; GFX7: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
- ; GFX7: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
- ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
- ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
- ; GFX7: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
- ; GFX7: SI_WATERFALL_LOOP %bb.2, implicit $exec
- ; GFX7: bb.3:
- ; GFX7: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
- ; GFX7: bb.4:
- ; GFX7: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7
- ; GFX7: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0
- ; GFX7: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1
- ; GFX7: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2
- ; GFX7: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3
- ; GFX7: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4
- ; GFX7: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5
- ; GFX7: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6
- ; GFX7: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7
- ; GFX7: $vgpr0 = COPY [[COPY8]]
- ; GFX7: $vgpr1 = COPY [[COPY9]]
- ; GFX7: $vgpr2 = COPY [[COPY10]]
- ; GFX7: $vgpr3 = COPY [[COPY11]]
- ; GFX7: $vgpr4 = COPY [[COPY12]]
- ; GFX7: $vgpr5 = COPY [[COPY13]]
- ; GFX7: $vgpr6 = COPY [[COPY14]]
- ; GFX7: $vgpr7 = COPY [[COPY15]]
- ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
+ ; GFX7-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3
+ ; GFX7-NEXT: {{ $}}
+ ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
+ ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX7-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096
+ ; GFX7-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY4]], [[S_MOV_B32_]], implicit-def $scc
+ ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]]
+ ; GFX7-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
+ ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
+ ; GFX7-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
+ ; GFX7-NEXT: {{ $}}
+ ; GFX7-NEXT: bb.2:
+ ; GFX7-NEXT: successors: %bb.3, %bb.2
+ ; GFX7-NEXT: {{ $}}
+ ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec
+ ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec
+ ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
+ ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec
+ ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec
+ ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec
+ ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
+ ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec
+ ; GFX7-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
+ ; GFX7-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
+ ; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
+ ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
+ ; GFX7-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
+ ; GFX7-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec
+ ; GFX7-NEXT: {{ $}}
+ ; GFX7-NEXT: bb.3:
+ ; GFX7-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
+ ; GFX7-NEXT: {{ $}}
+ ; GFX7-NEXT: bb.4:
+ ; GFX7-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7
+ ; GFX7-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0
+ ; GFX7-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1
+ ; GFX7-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2
+ ; GFX7-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3
+ ; GFX7-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4
+ ; GFX7-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5
+ ; GFX7-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6
+ ; GFX7-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7
+ ; GFX7-NEXT: $vgpr0 = COPY [[COPY8]]
+ ; GFX7-NEXT: $vgpr1 = COPY [[COPY9]]
+ ; GFX7-NEXT: $vgpr2 = COPY [[COPY10]]
+ ; GFX7-NEXT: $vgpr3 = COPY [[COPY11]]
+ ; GFX7-NEXT: $vgpr4 = COPY [[COPY12]]
+ ; GFX7-NEXT: $vgpr5 = COPY [[COPY13]]
+ ; GFX7-NEXT: $vgpr6 = COPY [[COPY14]]
+ ; GFX7-NEXT: $vgpr7 = COPY [[COPY15]]
+ ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
; GFX8-LABEL: name: s_buffer_load_v8f32_vgpr_rsrc_add_4096
; GFX8: bb.1 (%ir-block.0):
- ; GFX8: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3
- ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
- ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
- ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
- ; GFX8: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2
- ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096
- ; GFX8: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY4]], [[S_MOV_B32_]], implicit-def $scc
- ; GFX8: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]]
- ; GFX8: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0
- ; GFX8: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
- ; GFX8: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
- ; GFX8: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
- ; GFX8: bb.2:
- ; GFX8: successors: %bb.3, %bb.2
- ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec
- ; GFX8: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec
- ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
- ; GFX8: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec
- ; GFX8: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec
- ; GFX8: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec
- ; GFX8: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
- ; GFX8: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec
- ; GFX8: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
- ; GFX8: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
- ; GFX8: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
- ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
- ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
- ; GFX8: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
- ; GFX8: SI_WATERFALL_LOOP %bb.2, implicit $exec
- ; GFX8: bb.3:
- ; GFX8: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
- ; GFX8: bb.4:
- ; GFX8: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7
- ; GFX8: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0
- ; GFX8: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1
- ; GFX8: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2
- ; GFX8: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3
- ; GFX8: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4
- ; GFX8: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5
- ; GFX8: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6
- ; GFX8: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7
- ; GFX8: $vgpr0 = COPY [[COPY8]]
- ; GFX8: $vgpr1 = COPY [[COPY9]]
- ; GFX8: $vgpr2 = COPY [[COPY10]]
- ; GFX8: $vgpr3 = COPY [[COPY11]]
- ; GFX8: $vgpr4 = COPY [[COPY12]]
- ; GFX8: $vgpr5 = COPY [[COPY13]]
- ; GFX8: $vgpr6 = COPY [[COPY14]]
- ; GFX8: $vgpr7 = COPY [[COPY15]]
- ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
+ ; GFX8-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3
+ ; GFX8-NEXT: {{ $}}
+ ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
+ ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX8-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096
+ ; GFX8-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY4]], [[S_MOV_B32_]], implicit-def $scc
+ ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]]
+ ; GFX8-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
+ ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
+ ; GFX8-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
+ ; GFX8-NEXT: {{ $}}
+ ; GFX8-NEXT: bb.2:
+ ; GFX8-NEXT: successors: %bb.3, %bb.2
+ ; GFX8-NEXT: {{ $}}
+ ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec
+ ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec
+ ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
+ ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec
+ ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec
+ ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec
+ ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
+ ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec
+ ; GFX8-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
+ ; GFX8-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
+ ; GFX8-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
+ ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
+ ; GFX8-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
+ ; GFX8-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec
+ ; GFX8-NEXT: {{ $}}
+ ; GFX8-NEXT: bb.3:
+ ; GFX8-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
+ ; GFX8-NEXT: {{ $}}
+ ; GFX8-NEXT: bb.4:
+ ; GFX8-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7
+ ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0
+ ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1
+ ; GFX8-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2
+ ; GFX8-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3
+ ; GFX8-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4
+ ; GFX8-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5
+ ; GFX8-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6
+ ; GFX8-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7
+ ; GFX8-NEXT: $vgpr0 = COPY [[COPY8]]
+ ; GFX8-NEXT: $vgpr1 = COPY [[COPY9]]
+ ; GFX8-NEXT: $vgpr2 = COPY [[COPY10]]
+ ; GFX8-NEXT: $vgpr3 = COPY [[COPY11]]
+ ; GFX8-NEXT: $vgpr4 = COPY [[COPY12]]
+ ; GFX8-NEXT: $vgpr5 = COPY [[COPY13]]
+ ; GFX8-NEXT: $vgpr6 = COPY [[COPY14]]
+ ; GFX8-NEXT: $vgpr7 = COPY [[COPY15]]
+ ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
%soffset = add i32 %soffset.base, 4096
%val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0)
ret <8 x float> %val
@@ -3637,157 +3850,172 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4096(<4 x i32> %
define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_5000(<4 x i32> %rsrc, i32 %offset.base) {
; GFX6-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_5000
; GFX6: bb.1 (%ir-block.0):
- ; GFX6: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
- ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
- ; GFX6: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
- ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
- ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4
- ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 5000
- ; GFX6: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
- ; GFX6: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
- ; GFX6: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
- ; GFX6: bb.2:
- ; GFX6: successors: %bb.3, %bb.2
- ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec
- ; GFX6: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec
- ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
- ; GFX6: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec
- ; GFX6: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec
- ; GFX6: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec
- ; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
- ; GFX6: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec
- ; GFX6: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
- ; GFX6: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
- ; GFX6: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
- ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
- ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
- ; GFX6: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
- ; GFX6: SI_WATERFALL_LOOP %bb.2, implicit $exec
- ; GFX6: bb.3:
- ; GFX6: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
- ; GFX6: bb.4:
- ; GFX6: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7
- ; GFX6: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0
- ; GFX6: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1
- ; GFX6: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2
- ; GFX6: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3
- ; GFX6: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4
- ; GFX6: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5
- ; GFX6: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6
- ; GFX6: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7
- ; GFX6: $vgpr0 = COPY [[COPY7]]
- ; GFX6: $vgpr1 = COPY [[COPY8]]
- ; GFX6: $vgpr2 = COPY [[COPY9]]
- ; GFX6: $vgpr3 = COPY [[COPY10]]
- ; GFX6: $vgpr4 = COPY [[COPY11]]
- ; GFX6: $vgpr5 = COPY [[COPY12]]
- ; GFX6: $vgpr6 = COPY [[COPY13]]
- ; GFX6: $vgpr7 = COPY [[COPY14]]
- ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
+ ; GFX6-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
+ ; GFX6-NEXT: {{ $}}
+ ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
+ ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4
+ ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 5000
+ ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
+ ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
+ ; GFX6-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
+ ; GFX6-NEXT: {{ $}}
+ ; GFX6-NEXT: bb.2:
+ ; GFX6-NEXT: successors: %bb.3, %bb.2
+ ; GFX6-NEXT: {{ $}}
+ ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec
+ ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec
+ ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
+ ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec
+ ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec
+ ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec
+ ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
+ ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec
+ ; GFX6-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
+ ; GFX6-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
+ ; GFX6-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
+ ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
+ ; GFX6-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
+ ; GFX6-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec
+ ; GFX6-NEXT: {{ $}}
+ ; GFX6-NEXT: bb.3:
+ ; GFX6-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
+ ; GFX6-NEXT: {{ $}}
+ ; GFX6-NEXT: bb.4:
+ ; GFX6-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7
+ ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0
+ ; GFX6-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1
+ ; GFX6-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2
+ ; GFX6-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3
+ ; GFX6-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4
+ ; GFX6-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5
+ ; GFX6-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6
+ ; GFX6-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7
+ ; GFX6-NEXT: $vgpr0 = COPY [[COPY7]]
+ ; GFX6-NEXT: $vgpr1 = COPY [[COPY8]]
+ ; GFX6-NEXT: $vgpr2 = COPY [[COPY9]]
+ ; GFX6-NEXT: $vgpr3 = COPY [[COPY10]]
+ ; GFX6-NEXT: $vgpr4 = COPY [[COPY11]]
+ ; GFX6-NEXT: $vgpr5 = COPY [[COPY12]]
+ ; GFX6-NEXT: $vgpr6 = COPY [[COPY13]]
+ ; GFX6-NEXT: $vgpr7 = COPY [[COPY14]]
+ ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
; GFX7-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_5000
; GFX7: bb.1 (%ir-block.0):
- ; GFX7: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
- ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
- ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
- ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
- ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4
- ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 5000
- ; GFX7: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
- ; GFX7: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
- ; GFX7: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
- ; GFX7: bb.2:
- ; GFX7: successors: %bb.3, %bb.2
- ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec
- ; GFX7: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec
- ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
- ; GFX7: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec
- ; GFX7: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec
- ; GFX7: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec
- ; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
- ; GFX7: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec
- ; GFX7: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
- ; GFX7: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
- ; GFX7: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
- ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
- ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
- ; GFX7: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
- ; GFX7: SI_WATERFALL_LOOP %bb.2, implicit $exec
- ; GFX7: bb.3:
- ; GFX7: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
- ; GFX7: bb.4:
- ; GFX7: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7
- ; GFX7: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0
- ; GFX7: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1
- ; GFX7: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2
- ; GFX7: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3
- ; GFX7: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4
- ; GFX7: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5
- ; GFX7: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6
- ; GFX7: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7
- ; GFX7: $vgpr0 = COPY [[COPY7]]
- ; GFX7: $vgpr1 = COPY [[COPY8]]
- ; GFX7: $vgpr2 = COPY [[COPY9]]
- ; GFX7: $vgpr3 = COPY [[COPY10]]
- ; GFX7: $vgpr4 = COPY [[COPY11]]
- ; GFX7: $vgpr5 = COPY [[COPY12]]
- ; GFX7: $vgpr6 = COPY [[COPY13]]
- ; GFX7: $vgpr7 = COPY [[COPY14]]
- ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
+ ; GFX7-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
+ ; GFX7-NEXT: {{ $}}
+ ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
+ ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4
+ ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 5000
+ ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
+ ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
+ ; GFX7-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
+ ; GFX7-NEXT: {{ $}}
+ ; GFX7-NEXT: bb.2:
+ ; GFX7-NEXT: successors: %bb.3, %bb.2
+ ; GFX7-NEXT: {{ $}}
+ ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec
+ ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec
+ ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
+ ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec
+ ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec
+ ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec
+ ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
+ ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec
+ ; GFX7-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
+ ; GFX7-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
+ ; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
+ ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
+ ; GFX7-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
+ ; GFX7-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec
+ ; GFX7-NEXT: {{ $}}
+ ; GFX7-NEXT: bb.3:
+ ; GFX7-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
+ ; GFX7-NEXT: {{ $}}
+ ; GFX7-NEXT: bb.4:
+ ; GFX7-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7
+ ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0
+ ; GFX7-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1
+ ; GFX7-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2
+ ; GFX7-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3
+ ; GFX7-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4
+ ; GFX7-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5
+ ; GFX7-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6
+ ; GFX7-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7
+ ; GFX7-NEXT: $vgpr0 = COPY [[COPY7]]
+ ; GFX7-NEXT: $vgpr1 = COPY [[COPY8]]
+ ; GFX7-NEXT: $vgpr2 = COPY [[COPY9]]
+ ; GFX7-NEXT: $vgpr3 = COPY [[COPY10]]
+ ; GFX7-NEXT: $vgpr4 = COPY [[COPY11]]
+ ; GFX7-NEXT: $vgpr5 = COPY [[COPY12]]
+ ; GFX7-NEXT: $vgpr6 = COPY [[COPY13]]
+ ; GFX7-NEXT: $vgpr7 = COPY [[COPY14]]
+ ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
; GFX8-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_5000
; GFX8: bb.1 (%ir-block.0):
- ; GFX8: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
- ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
- ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
- ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
- ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4
- ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4064
- ; GFX8: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
- ; GFX8: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
- ; GFX8: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
- ; GFX8: bb.2:
- ; GFX8: successors: %bb.3, %bb.2
- ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec
- ; GFX8: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec
- ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
- ; GFX8: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec
- ; GFX8: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec
- ; GFX8: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec
- ; GFX8: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
- ; GFX8: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec
- ; GFX8: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
- ; GFX8: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
- ; GFX8: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
- ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 936, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
- ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 952, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
- ; GFX8: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
- ; GFX8: SI_WATERFALL_LOOP %bb.2, implicit $exec
- ; GFX8: bb.3:
- ; GFX8: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
- ; GFX8: bb.4:
- ; GFX8: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7
- ; GFX8: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0
- ; GFX8: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1
- ; GFX8: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2
- ; GFX8: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3
- ; GFX8: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4
- ; GFX8: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5
- ; GFX8: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6
- ; GFX8: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7
- ; GFX8: $vgpr0 = COPY [[COPY7]]
- ; GFX8: $vgpr1 = COPY [[COPY8]]
- ; GFX8: $vgpr2 = COPY [[COPY9]]
- ; GFX8: $vgpr3 = COPY [[COPY10]]
- ; GFX8: $vgpr4 = COPY [[COPY11]]
- ; GFX8: $vgpr5 = COPY [[COPY12]]
- ; GFX8: $vgpr6 = COPY [[COPY13]]
- ; GFX8: $vgpr7 = COPY [[COPY14]]
- ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
+ ; GFX8-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
+ ; GFX8-NEXT: {{ $}}
+ ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
+ ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4
+ ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4064
+ ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
+ ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
+ ; GFX8-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
+ ; GFX8-NEXT: {{ $}}
+ ; GFX8-NEXT: bb.2:
+ ; GFX8-NEXT: successors: %bb.3, %bb.2
+ ; GFX8-NEXT: {{ $}}
+ ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec
+ ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec
+ ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
+ ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec
+ ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec
+ ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec
+ ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
+ ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec
+ ; GFX8-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
+ ; GFX8-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
+ ; GFX8-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 936, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
+ ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 952, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
+ ; GFX8-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
+ ; GFX8-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec
+ ; GFX8-NEXT: {{ $}}
+ ; GFX8-NEXT: bb.3:
+ ; GFX8-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
+ ; GFX8-NEXT: {{ $}}
+ ; GFX8-NEXT: bb.4:
+ ; GFX8-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7
+ ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0
+ ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1
+ ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2
+ ; GFX8-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3
+ ; GFX8-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4
+ ; GFX8-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5
+ ; GFX8-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6
+ ; GFX8-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7
+ ; GFX8-NEXT: $vgpr0 = COPY [[COPY7]]
+ ; GFX8-NEXT: $vgpr1 = COPY [[COPY8]]
+ ; GFX8-NEXT: $vgpr2 = COPY [[COPY9]]
+ ; GFX8-NEXT: $vgpr3 = COPY [[COPY10]]
+ ; GFX8-NEXT: $vgpr4 = COPY [[COPY11]]
+ ; GFX8-NEXT: $vgpr5 = COPY [[COPY12]]
+ ; GFX8-NEXT: $vgpr6 = COPY [[COPY13]]
+ ; GFX8-NEXT: $vgpr7 = COPY [[COPY14]]
+ ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
%soffset = add i32 %offset.base, 5000
%val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0)
ret <8 x float> %val
@@ -3796,157 +4024,172 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_5000
define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4076(<4 x i32> %rsrc, i32 %offset.base) {
; GFX6-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4076
; GFX6: bb.1 (%ir-block.0):
- ; GFX6: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
- ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
- ; GFX6: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
- ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
- ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4
- ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4076
- ; GFX6: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
- ; GFX6: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
- ; GFX6: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
- ; GFX6: bb.2:
- ; GFX6: successors: %bb.3, %bb.2
- ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec
- ; GFX6: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec
- ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
- ; GFX6: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec
- ; GFX6: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec
- ; GFX6: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec
- ; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
- ; GFX6: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec
- ; GFX6: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
- ; GFX6: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
- ; GFX6: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
- ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
- ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
- ; GFX6: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
- ; GFX6: SI_WATERFALL_LOOP %bb.2, implicit $exec
- ; GFX6: bb.3:
- ; GFX6: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
- ; GFX6: bb.4:
- ; GFX6: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7
- ; GFX6: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0
- ; GFX6: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1
- ; GFX6: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2
- ; GFX6: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3
- ; GFX6: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4
- ; GFX6: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5
- ; GFX6: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6
- ; GFX6: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7
- ; GFX6: $vgpr0 = COPY [[COPY7]]
- ; GFX6: $vgpr1 = COPY [[COPY8]]
- ; GFX6: $vgpr2 = COPY [[COPY9]]
- ; GFX6: $vgpr3 = COPY [[COPY10]]
- ; GFX6: $vgpr4 = COPY [[COPY11]]
- ; GFX6: $vgpr5 = COPY [[COPY12]]
- ; GFX6: $vgpr6 = COPY [[COPY13]]
- ; GFX6: $vgpr7 = COPY [[COPY14]]
- ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
+ ; GFX6-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
+ ; GFX6-NEXT: {{ $}}
+ ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
+ ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4
+ ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4076
+ ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
+ ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
+ ; GFX6-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
+ ; GFX6-NEXT: {{ $}}
+ ; GFX6-NEXT: bb.2:
+ ; GFX6-NEXT: successors: %bb.3, %bb.2
+ ; GFX6-NEXT: {{ $}}
+ ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec
+ ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec
+ ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
+ ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec
+ ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec
+ ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec
+ ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
+ ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec
+ ; GFX6-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
+ ; GFX6-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
+ ; GFX6-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
+ ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
+ ; GFX6-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
+ ; GFX6-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec
+ ; GFX6-NEXT: {{ $}}
+ ; GFX6-NEXT: bb.3:
+ ; GFX6-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
+ ; GFX6-NEXT: {{ $}}
+ ; GFX6-NEXT: bb.4:
+ ; GFX6-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7
+ ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0
+ ; GFX6-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1
+ ; GFX6-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2
+ ; GFX6-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3
+ ; GFX6-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4
+ ; GFX6-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5
+ ; GFX6-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6
+ ; GFX6-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7
+ ; GFX6-NEXT: $vgpr0 = COPY [[COPY7]]
+ ; GFX6-NEXT: $vgpr1 = COPY [[COPY8]]
+ ; GFX6-NEXT: $vgpr2 = COPY [[COPY9]]
+ ; GFX6-NEXT: $vgpr3 = COPY [[COPY10]]
+ ; GFX6-NEXT: $vgpr4 = COPY [[COPY11]]
+ ; GFX6-NEXT: $vgpr5 = COPY [[COPY12]]
+ ; GFX6-NEXT: $vgpr6 = COPY [[COPY13]]
+ ; GFX6-NEXT: $vgpr7 = COPY [[COPY14]]
+ ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
; GFX7-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4076
; GFX7: bb.1 (%ir-block.0):
- ; GFX7: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
- ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
- ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
- ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
- ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4
- ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4076
- ; GFX7: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
- ; GFX7: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
- ; GFX7: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
- ; GFX7: bb.2:
- ; GFX7: successors: %bb.3, %bb.2
- ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec
- ; GFX7: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec
- ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
- ; GFX7: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec
- ; GFX7: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec
- ; GFX7: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec
- ; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
- ; GFX7: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec
- ; GFX7: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
- ; GFX7: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
- ; GFX7: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
- ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
- ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
- ; GFX7: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
- ; GFX7: SI_WATERFALL_LOOP %bb.2, implicit $exec
- ; GFX7: bb.3:
- ; GFX7: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
- ; GFX7: bb.4:
- ; GFX7: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7
- ; GFX7: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0
- ; GFX7: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1
- ; GFX7: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2
- ; GFX7: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3
- ; GFX7: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4
- ; GFX7: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5
- ; GFX7: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6
- ; GFX7: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7
- ; GFX7: $vgpr0 = COPY [[COPY7]]
- ; GFX7: $vgpr1 = COPY [[COPY8]]
- ; GFX7: $vgpr2 = COPY [[COPY9]]
- ; GFX7: $vgpr3 = COPY [[COPY10]]
- ; GFX7: $vgpr4 = COPY [[COPY11]]
- ; GFX7: $vgpr5 = COPY [[COPY12]]
- ; GFX7: $vgpr6 = COPY [[COPY13]]
- ; GFX7: $vgpr7 = COPY [[COPY14]]
- ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
+ ; GFX7-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
+ ; GFX7-NEXT: {{ $}}
+ ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
+ ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4
+ ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4076
+ ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
+ ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
+ ; GFX7-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
+ ; GFX7-NEXT: {{ $}}
+ ; GFX7-NEXT: bb.2:
+ ; GFX7-NEXT: successors: %bb.3, %bb.2
+ ; GFX7-NEXT: {{ $}}
+ ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec
+ ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec
+ ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
+ ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec
+ ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec
+ ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec
+ ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
+ ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec
+ ; GFX7-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
+ ; GFX7-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
+ ; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
+ ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
+ ; GFX7-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
+ ; GFX7-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec
+ ; GFX7-NEXT: {{ $}}
+ ; GFX7-NEXT: bb.3:
+ ; GFX7-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
+ ; GFX7-NEXT: {{ $}}
+ ; GFX7-NEXT: bb.4:
+ ; GFX7-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7
+ ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0
+ ; GFX7-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1
+ ; GFX7-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2
+ ; GFX7-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3
+ ; GFX7-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4
+ ; GFX7-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5
+ ; GFX7-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6
+ ; GFX7-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7
+ ; GFX7-NEXT: $vgpr0 = COPY [[COPY7]]
+ ; GFX7-NEXT: $vgpr1 = COPY [[COPY8]]
+ ; GFX7-NEXT: $vgpr2 = COPY [[COPY9]]
+ ; GFX7-NEXT: $vgpr3 = COPY [[COPY10]]
+ ; GFX7-NEXT: $vgpr4 = COPY [[COPY11]]
+ ; GFX7-NEXT: $vgpr5 = COPY [[COPY12]]
+ ; GFX7-NEXT: $vgpr6 = COPY [[COPY13]]
+ ; GFX7-NEXT: $vgpr7 = COPY [[COPY14]]
+ ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
; GFX8-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4076
; GFX8: bb.1 (%ir-block.0):
- ; GFX8: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
- ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
- ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
- ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
- ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4
- ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 12
- ; GFX8: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
- ; GFX8: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
- ; GFX8: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
- ; GFX8: bb.2:
- ; GFX8: successors: %bb.3, %bb.2
- ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec
- ; GFX8: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec
- ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
- ; GFX8: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec
- ; GFX8: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec
- ; GFX8: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec
- ; GFX8: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
- ; GFX8: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec
- ; GFX8: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
- ; GFX8: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
- ; GFX8: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
- ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
- ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
- ; GFX8: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
- ; GFX8: SI_WATERFALL_LOOP %bb.2, implicit $exec
- ; GFX8: bb.3:
- ; GFX8: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
- ; GFX8: bb.4:
- ; GFX8: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7
- ; GFX8: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0
- ; GFX8: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1
- ; GFX8: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2
- ; GFX8: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3
- ; GFX8: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4
- ; GFX8: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5
- ; GFX8: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6
- ; GFX8: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7
- ; GFX8: $vgpr0 = COPY [[COPY7]]
- ; GFX8: $vgpr1 = COPY [[COPY8]]
- ; GFX8: $vgpr2 = COPY [[COPY9]]
- ; GFX8: $vgpr3 = COPY [[COPY10]]
- ; GFX8: $vgpr4 = COPY [[COPY11]]
- ; GFX8: $vgpr5 = COPY [[COPY12]]
- ; GFX8: $vgpr6 = COPY [[COPY13]]
- ; GFX8: $vgpr7 = COPY [[COPY14]]
- ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
+ ; GFX8-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
+ ; GFX8-NEXT: {{ $}}
+ ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
+ ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4
+ ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 12
+ ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
+ ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
+ ; GFX8-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
+ ; GFX8-NEXT: {{ $}}
+ ; GFX8-NEXT: bb.2:
+ ; GFX8-NEXT: successors: %bb.3, %bb.2
+ ; GFX8-NEXT: {{ $}}
+ ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec
+ ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec
+ ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
+ ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec
+ ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec
+ ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec
+ ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
+ ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec
+ ; GFX8-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
+ ; GFX8-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
+ ; GFX8-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
+ ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
+ ; GFX8-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
+ ; GFX8-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec
+ ; GFX8-NEXT: {{ $}}
+ ; GFX8-NEXT: bb.3:
+ ; GFX8-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
+ ; GFX8-NEXT: {{ $}}
+ ; GFX8-NEXT: bb.4:
+ ; GFX8-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7
+ ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0
+ ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1
+ ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2
+ ; GFX8-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3
+ ; GFX8-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4
+ ; GFX8-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5
+ ; GFX8-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6
+ ; GFX8-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7
+ ; GFX8-NEXT: $vgpr0 = COPY [[COPY7]]
+ ; GFX8-NEXT: $vgpr1 = COPY [[COPY8]]
+ ; GFX8-NEXT: $vgpr2 = COPY [[COPY9]]
+ ; GFX8-NEXT: $vgpr3 = COPY [[COPY10]]
+ ; GFX8-NEXT: $vgpr4 = COPY [[COPY11]]
+ ; GFX8-NEXT: $vgpr5 = COPY [[COPY12]]
+ ; GFX8-NEXT: $vgpr6 = COPY [[COPY13]]
+ ; GFX8-NEXT: $vgpr7 = COPY [[COPY14]]
+ ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
%soffset = add i32 %offset.base, 4076
%val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0)
ret <8 x float> %val
@@ -3955,157 +4198,172 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4076
define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4080(<4 x i32> %rsrc, i32 %offset.base) {
; GFX6-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4080
; GFX6: bb.1 (%ir-block.0):
- ; GFX6: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
- ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
- ; GFX6: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
- ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
- ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4
- ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4080
- ; GFX6: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
- ; GFX6: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
- ; GFX6: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
- ; GFX6: bb.2:
- ; GFX6: successors: %bb.3, %bb.2
- ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec
- ; GFX6: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec
- ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
- ; GFX6: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec
- ; GFX6: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec
- ; GFX6: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec
- ; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
- ; GFX6: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec
- ; GFX6: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
- ; GFX6: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
- ; GFX6: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
- ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
- ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
- ; GFX6: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
- ; GFX6: SI_WATERFALL_LOOP %bb.2, implicit $exec
- ; GFX6: bb.3:
- ; GFX6: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
- ; GFX6: bb.4:
- ; GFX6: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7
- ; GFX6: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0
- ; GFX6: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1
- ; GFX6: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2
- ; GFX6: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3
- ; GFX6: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4
- ; GFX6: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5
- ; GFX6: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6
- ; GFX6: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7
- ; GFX6: $vgpr0 = COPY [[COPY7]]
- ; GFX6: $vgpr1 = COPY [[COPY8]]
- ; GFX6: $vgpr2 = COPY [[COPY9]]
- ; GFX6: $vgpr3 = COPY [[COPY10]]
- ; GFX6: $vgpr4 = COPY [[COPY11]]
- ; GFX6: $vgpr5 = COPY [[COPY12]]
- ; GFX6: $vgpr6 = COPY [[COPY13]]
- ; GFX6: $vgpr7 = COPY [[COPY14]]
- ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
+ ; GFX6-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
+ ; GFX6-NEXT: {{ $}}
+ ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
+ ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4
+ ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4080
+ ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
+ ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
+ ; GFX6-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
+ ; GFX6-NEXT: {{ $}}
+ ; GFX6-NEXT: bb.2:
+ ; GFX6-NEXT: successors: %bb.3, %bb.2
+ ; GFX6-NEXT: {{ $}}
+ ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec
+ ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec
+ ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
+ ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec
+ ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec
+ ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec
+ ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
+ ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec
+ ; GFX6-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
+ ; GFX6-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
+ ; GFX6-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
+ ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
+ ; GFX6-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
+ ; GFX6-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec
+ ; GFX6-NEXT: {{ $}}
+ ; GFX6-NEXT: bb.3:
+ ; GFX6-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
+ ; GFX6-NEXT: {{ $}}
+ ; GFX6-NEXT: bb.4:
+ ; GFX6-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7
+ ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0
+ ; GFX6-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1
+ ; GFX6-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2
+ ; GFX6-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3
+ ; GFX6-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4
+ ; GFX6-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5
+ ; GFX6-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6
+ ; GFX6-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7
+ ; GFX6-NEXT: $vgpr0 = COPY [[COPY7]]
+ ; GFX6-NEXT: $vgpr1 = COPY [[COPY8]]
+ ; GFX6-NEXT: $vgpr2 = COPY [[COPY9]]
+ ; GFX6-NEXT: $vgpr3 = COPY [[COPY10]]
+ ; GFX6-NEXT: $vgpr4 = COPY [[COPY11]]
+ ; GFX6-NEXT: $vgpr5 = COPY [[COPY12]]
+ ; GFX6-NEXT: $vgpr6 = COPY [[COPY13]]
+ ; GFX6-NEXT: $vgpr7 = COPY [[COPY14]]
+ ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
; GFX7-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4080
; GFX7: bb.1 (%ir-block.0):
- ; GFX7: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
- ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
- ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
- ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
- ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4
- ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4080
- ; GFX7: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
- ; GFX7: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
- ; GFX7: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
- ; GFX7: bb.2:
- ; GFX7: successors: %bb.3, %bb.2
- ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec
- ; GFX7: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec
- ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
- ; GFX7: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec
- ; GFX7: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec
- ; GFX7: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec
- ; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
- ; GFX7: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec
- ; GFX7: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
- ; GFX7: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
- ; GFX7: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
- ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
- ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
- ; GFX7: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
- ; GFX7: SI_WATERFALL_LOOP %bb.2, implicit $exec
- ; GFX7: bb.3:
- ; GFX7: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
- ; GFX7: bb.4:
- ; GFX7: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7
- ; GFX7: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0
- ; GFX7: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1
- ; GFX7: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2
- ; GFX7: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3
- ; GFX7: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4
- ; GFX7: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5
- ; GFX7: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6
- ; GFX7: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7
- ; GFX7: $vgpr0 = COPY [[COPY7]]
- ; GFX7: $vgpr1 = COPY [[COPY8]]
- ; GFX7: $vgpr2 = COPY [[COPY9]]
- ; GFX7: $vgpr3 = COPY [[COPY10]]
- ; GFX7: $vgpr4 = COPY [[COPY11]]
- ; GFX7: $vgpr5 = COPY [[COPY12]]
- ; GFX7: $vgpr6 = COPY [[COPY13]]
- ; GFX7: $vgpr7 = COPY [[COPY14]]
- ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
+ ; GFX7-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
+ ; GFX7-NEXT: {{ $}}
+ ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
+ ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4
+ ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4080
+ ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
+ ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
+ ; GFX7-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
+ ; GFX7-NEXT: {{ $}}
+ ; GFX7-NEXT: bb.2:
+ ; GFX7-NEXT: successors: %bb.3, %bb.2
+ ; GFX7-NEXT: {{ $}}
+ ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec
+ ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec
+ ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
+ ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec
+ ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec
+ ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec
+ ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
+ ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec
+ ; GFX7-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
+ ; GFX7-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
+ ; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
+ ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
+ ; GFX7-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
+ ; GFX7-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec
+ ; GFX7-NEXT: {{ $}}
+ ; GFX7-NEXT: bb.3:
+ ; GFX7-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
+ ; GFX7-NEXT: {{ $}}
+ ; GFX7-NEXT: bb.4:
+ ; GFX7-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7
+ ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0
+ ; GFX7-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1
+ ; GFX7-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2
+ ; GFX7-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3
+ ; GFX7-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4
+ ; GFX7-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5
+ ; GFX7-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6
+ ; GFX7-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7
+ ; GFX7-NEXT: $vgpr0 = COPY [[COPY7]]
+ ; GFX7-NEXT: $vgpr1 = COPY [[COPY8]]
+ ; GFX7-NEXT: $vgpr2 = COPY [[COPY9]]
+ ; GFX7-NEXT: $vgpr3 = COPY [[COPY10]]
+ ; GFX7-NEXT: $vgpr4 = COPY [[COPY11]]
+ ; GFX7-NEXT: $vgpr5 = COPY [[COPY12]]
+ ; GFX7-NEXT: $vgpr6 = COPY [[COPY13]]
+ ; GFX7-NEXT: $vgpr7 = COPY [[COPY14]]
+ ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
; GFX8-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4080
; GFX8: bb.1 (%ir-block.0):
- ; GFX8: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
- ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
- ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
- ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
- ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4
- ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 16
- ; GFX8: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
- ; GFX8: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
- ; GFX8: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
- ; GFX8: bb.2:
- ; GFX8: successors: %bb.3, %bb.2
- ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec
- ; GFX8: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec
- ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
- ; GFX8: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec
- ; GFX8: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec
- ; GFX8: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec
- ; GFX8: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
- ; GFX8: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec
- ; GFX8: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
- ; GFX8: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
- ; GFX8: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
- ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
- ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
- ; GFX8: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
- ; GFX8: SI_WATERFALL_LOOP %bb.2, implicit $exec
- ; GFX8: bb.3:
- ; GFX8: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
- ; GFX8: bb.4:
- ; GFX8: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7
- ; GFX8: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0
- ; GFX8: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1
- ; GFX8: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2
- ; GFX8: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3
- ; GFX8: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4
- ; GFX8: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5
- ; GFX8: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6
- ; GFX8: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7
- ; GFX8: $vgpr0 = COPY [[COPY7]]
- ; GFX8: $vgpr1 = COPY [[COPY8]]
- ; GFX8: $vgpr2 = COPY [[COPY9]]
- ; GFX8: $vgpr3 = COPY [[COPY10]]
- ; GFX8: $vgpr4 = COPY [[COPY11]]
- ; GFX8: $vgpr5 = COPY [[COPY12]]
- ; GFX8: $vgpr6 = COPY [[COPY13]]
- ; GFX8: $vgpr7 = COPY [[COPY14]]
- ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
+ ; GFX8-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
+ ; GFX8-NEXT: {{ $}}
+ ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
+ ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4
+ ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 16
+ ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
+ ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
+ ; GFX8-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
+ ; GFX8-NEXT: {{ $}}
+ ; GFX8-NEXT: bb.2:
+ ; GFX8-NEXT: successors: %bb.3, %bb.2
+ ; GFX8-NEXT: {{ $}}
+ ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec
+ ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec
+ ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
+ ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec
+ ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec
+ ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec
+ ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
+ ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec
+ ; GFX8-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
+ ; GFX8-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
+ ; GFX8-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
+ ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
+ ; GFX8-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
+ ; GFX8-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec
+ ; GFX8-NEXT: {{ $}}
+ ; GFX8-NEXT: bb.3:
+ ; GFX8-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
+ ; GFX8-NEXT: {{ $}}
+ ; GFX8-NEXT: bb.4:
+ ; GFX8-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7
+ ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0
+ ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1
+ ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2
+ ; GFX8-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3
+ ; GFX8-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4
+ ; GFX8-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5
+ ; GFX8-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6
+ ; GFX8-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7
+ ; GFX8-NEXT: $vgpr0 = COPY [[COPY7]]
+ ; GFX8-NEXT: $vgpr1 = COPY [[COPY8]]
+ ; GFX8-NEXT: $vgpr2 = COPY [[COPY9]]
+ ; GFX8-NEXT: $vgpr3 = COPY [[COPY10]]
+ ; GFX8-NEXT: $vgpr4 = COPY [[COPY11]]
+ ; GFX8-NEXT: $vgpr5 = COPY [[COPY12]]
+ ; GFX8-NEXT: $vgpr6 = COPY [[COPY13]]
+ ; GFX8-NEXT: $vgpr7 = COPY [[COPY14]]
+ ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
%soffset = add i32 %offset.base, 4080
%val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0)
ret <8 x float> %val
@@ -4114,154 +4372,169 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4080
define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_offset_4064(<4 x i32> %rsrc, i32 %offset.base) {
; GFX6-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_offset_4064
; GFX6: bb.1 (%ir-block.0):
- ; GFX6: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
- ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
- ; GFX6: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
- ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
- ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
- ; GFX6: [[COPY4:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
- ; GFX6: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
- ; GFX6: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
- ; GFX6: bb.2:
- ; GFX6: successors: %bb.3, %bb.2
- ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub0, implicit $exec
- ; GFX6: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub1, implicit $exec
- ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
- ; GFX6: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY4]], implicit $exec
- ; GFX6: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec
- ; GFX6: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec
- ; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
- ; GFX6: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY5]], implicit $exec
- ; GFX6: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
- ; GFX6: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
- ; GFX6: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
- ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 4064, align 4)
- ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 4064, align 4)
- ; GFX6: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
- ; GFX6: SI_WATERFALL_LOOP %bb.2, implicit $exec
- ; GFX6: bb.3:
- ; GFX6: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
- ; GFX6: bb.4:
- ; GFX6: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFSET]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFSET1]], %subreg.sub4_sub5_sub6_sub7
- ; GFX6: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0
- ; GFX6: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1
- ; GFX6: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2
- ; GFX6: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3
- ; GFX6: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4
- ; GFX6: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5
- ; GFX6: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6
- ; GFX6: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7
- ; GFX6: $vgpr0 = COPY [[COPY6]]
- ; GFX6: $vgpr1 = COPY [[COPY7]]
- ; GFX6: $vgpr2 = COPY [[COPY8]]
- ; GFX6: $vgpr3 = COPY [[COPY9]]
- ; GFX6: $vgpr4 = COPY [[COPY10]]
- ; GFX6: $vgpr5 = COPY [[COPY11]]
- ; GFX6: $vgpr6 = COPY [[COPY12]]
- ; GFX6: $vgpr7 = COPY [[COPY13]]
- ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
+ ; GFX6-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
+ ; GFX6-NEXT: {{ $}}
+ ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
+ ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
+ ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
+ ; GFX6-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
+ ; GFX6-NEXT: {{ $}}
+ ; GFX6-NEXT: bb.2:
+ ; GFX6-NEXT: successors: %bb.3, %bb.2
+ ; GFX6-NEXT: {{ $}}
+ ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub0, implicit $exec
+ ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub1, implicit $exec
+ ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
+ ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY4]], implicit $exec
+ ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec
+ ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec
+ ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
+ ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY5]], implicit $exec
+ ; GFX6-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
+ ; GFX6-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
+ ; GFX6-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 4064, align 4)
+ ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 4064, align 4)
+ ; GFX6-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
+ ; GFX6-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec
+ ; GFX6-NEXT: {{ $}}
+ ; GFX6-NEXT: bb.3:
+ ; GFX6-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
+ ; GFX6-NEXT: {{ $}}
+ ; GFX6-NEXT: bb.4:
+ ; GFX6-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFSET]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFSET1]], %subreg.sub4_sub5_sub6_sub7
+ ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0
+ ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1
+ ; GFX6-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2
+ ; GFX6-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3
+ ; GFX6-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4
+ ; GFX6-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5
+ ; GFX6-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6
+ ; GFX6-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7
+ ; GFX6-NEXT: $vgpr0 = COPY [[COPY6]]
+ ; GFX6-NEXT: $vgpr1 = COPY [[COPY7]]
+ ; GFX6-NEXT: $vgpr2 = COPY [[COPY8]]
+ ; GFX6-NEXT: $vgpr3 = COPY [[COPY9]]
+ ; GFX6-NEXT: $vgpr4 = COPY [[COPY10]]
+ ; GFX6-NEXT: $vgpr5 = COPY [[COPY11]]
+ ; GFX6-NEXT: $vgpr6 = COPY [[COPY12]]
+ ; GFX6-NEXT: $vgpr7 = COPY [[COPY13]]
+ ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
; GFX7-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_offset_4064
; GFX7: bb.1 (%ir-block.0):
- ; GFX7: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
- ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
- ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
- ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
- ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
- ; GFX7: [[COPY4:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
- ; GFX7: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
- ; GFX7: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
- ; GFX7: bb.2:
- ; GFX7: successors: %bb.3, %bb.2
- ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub0, implicit $exec
- ; GFX7: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub1, implicit $exec
- ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
- ; GFX7: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY4]], implicit $exec
- ; GFX7: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec
- ; GFX7: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec
- ; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
- ; GFX7: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY5]], implicit $exec
- ; GFX7: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
- ; GFX7: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
- ; GFX7: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
- ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 4064, align 4)
- ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 4064, align 4)
- ; GFX7: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
- ; GFX7: SI_WATERFALL_LOOP %bb.2, implicit $exec
- ; GFX7: bb.3:
- ; GFX7: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
- ; GFX7: bb.4:
- ; GFX7: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFSET]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFSET1]], %subreg.sub4_sub5_sub6_sub7
- ; GFX7: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0
- ; GFX7: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1
- ; GFX7: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2
- ; GFX7: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3
- ; GFX7: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4
- ; GFX7: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5
- ; GFX7: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6
- ; GFX7: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7
- ; GFX7: $vgpr0 = COPY [[COPY6]]
- ; GFX7: $vgpr1 = COPY [[COPY7]]
- ; GFX7: $vgpr2 = COPY [[COPY8]]
- ; GFX7: $vgpr3 = COPY [[COPY9]]
- ; GFX7: $vgpr4 = COPY [[COPY10]]
- ; GFX7: $vgpr5 = COPY [[COPY11]]
- ; GFX7: $vgpr6 = COPY [[COPY12]]
- ; GFX7: $vgpr7 = COPY [[COPY13]]
- ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
+ ; GFX7-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
+ ; GFX7-NEXT: {{ $}}
+ ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
+ ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
+ ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
+ ; GFX7-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
+ ; GFX7-NEXT: {{ $}}
+ ; GFX7-NEXT: bb.2:
+ ; GFX7-NEXT: successors: %bb.3, %bb.2
+ ; GFX7-NEXT: {{ $}}
+ ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub0, implicit $exec
+ ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub1, implicit $exec
+ ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
+ ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY4]], implicit $exec
+ ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec
+ ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec
+ ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
+ ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY5]], implicit $exec
+ ; GFX7-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
+ ; GFX7-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
+ ; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 4064, align 4)
+ ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 4064, align 4)
+ ; GFX7-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
+ ; GFX7-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec
+ ; GFX7-NEXT: {{ $}}
+ ; GFX7-NEXT: bb.3:
+ ; GFX7-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
+ ; GFX7-NEXT: {{ $}}
+ ; GFX7-NEXT: bb.4:
+ ; GFX7-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFSET]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFSET1]], %subreg.sub4_sub5_sub6_sub7
+ ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0
+ ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1
+ ; GFX7-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2
+ ; GFX7-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3
+ ; GFX7-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4
+ ; GFX7-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5
+ ; GFX7-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6
+ ; GFX7-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7
+ ; GFX7-NEXT: $vgpr0 = COPY [[COPY6]]
+ ; GFX7-NEXT: $vgpr1 = COPY [[COPY7]]
+ ; GFX7-NEXT: $vgpr2 = COPY [[COPY8]]
+ ; GFX7-NEXT: $vgpr3 = COPY [[COPY9]]
+ ; GFX7-NEXT: $vgpr4 = COPY [[COPY10]]
+ ; GFX7-NEXT: $vgpr5 = COPY [[COPY11]]
+ ; GFX7-NEXT: $vgpr6 = COPY [[COPY12]]
+ ; GFX7-NEXT: $vgpr7 = COPY [[COPY13]]
+ ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
; GFX8-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_offset_4064
; GFX8: bb.1 (%ir-block.0):
- ; GFX8: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
- ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
- ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
- ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
- ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
- ; GFX8: [[COPY4:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
- ; GFX8: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
- ; GFX8: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
- ; GFX8: bb.2:
- ; GFX8: successors: %bb.3, %bb.2
- ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub0, implicit $exec
- ; GFX8: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub1, implicit $exec
- ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
- ; GFX8: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY4]], implicit $exec
- ; GFX8: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec
- ; GFX8: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec
- ; GFX8: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
- ; GFX8: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY5]], implicit $exec
- ; GFX8: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
- ; GFX8: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
- ; GFX8: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
- ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 4064, align 4)
- ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 4064, align 4)
- ; GFX8: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
- ; GFX8: SI_WATERFALL_LOOP %bb.2, implicit $exec
- ; GFX8: bb.3:
- ; GFX8: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
- ; GFX8: bb.4:
- ; GFX8: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFSET]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFSET1]], %subreg.sub4_sub5_sub6_sub7
- ; GFX8: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0
- ; GFX8: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1
- ; GFX8: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2
- ; GFX8: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3
- ; GFX8: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4
- ; GFX8: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5
- ; GFX8: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6
- ; GFX8: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7
- ; GFX8: $vgpr0 = COPY [[COPY6]]
- ; GFX8: $vgpr1 = COPY [[COPY7]]
- ; GFX8: $vgpr2 = COPY [[COPY8]]
- ; GFX8: $vgpr3 = COPY [[COPY9]]
- ; GFX8: $vgpr4 = COPY [[COPY10]]
- ; GFX8: $vgpr5 = COPY [[COPY11]]
- ; GFX8: $vgpr6 = COPY [[COPY12]]
- ; GFX8: $vgpr7 = COPY [[COPY13]]
- ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
+ ; GFX8-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
+ ; GFX8-NEXT: {{ $}}
+ ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
+ ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
+ ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
+ ; GFX8-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
+ ; GFX8-NEXT: {{ $}}
+ ; GFX8-NEXT: bb.2:
+ ; GFX8-NEXT: successors: %bb.3, %bb.2
+ ; GFX8-NEXT: {{ $}}
+ ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub0, implicit $exec
+ ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub1, implicit $exec
+ ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
+ ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY4]], implicit $exec
+ ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec
+ ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec
+ ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
+ ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY5]], implicit $exec
+ ; GFX8-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
+ ; GFX8-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
+ ; GFX8-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 4064, align 4)
+ ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 4064, align 4)
+ ; GFX8-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
+ ; GFX8-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec
+ ; GFX8-NEXT: {{ $}}
+ ; GFX8-NEXT: bb.3:
+ ; GFX8-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
+ ; GFX8-NEXT: {{ $}}
+ ; GFX8-NEXT: bb.4:
+ ; GFX8-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFSET]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFSET1]], %subreg.sub4_sub5_sub6_sub7
+ ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0
+ ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1
+ ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2
+ ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3
+ ; GFX8-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4
+ ; GFX8-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5
+ ; GFX8-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6
+ ; GFX8-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7
+ ; GFX8-NEXT: $vgpr0 = COPY [[COPY6]]
+ ; GFX8-NEXT: $vgpr1 = COPY [[COPY7]]
+ ; GFX8-NEXT: $vgpr2 = COPY [[COPY8]]
+ ; GFX8-NEXT: $vgpr3 = COPY [[COPY9]]
+ ; GFX8-NEXT: $vgpr4 = COPY [[COPY10]]
+ ; GFX8-NEXT: $vgpr5 = COPY [[COPY11]]
+ ; GFX8-NEXT: $vgpr6 = COPY [[COPY12]]
+ ; GFX8-NEXT: $vgpr7 = COPY [[COPY13]]
+ ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
%val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 4064, i32 0)
ret <8 x float> %val
}
@@ -4269,43 +4542,46 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_offset_4
define amdgpu_ps float @s_buffer_load_f32_offset_add_vgpr_sgpr(<4 x i32> inreg %rsrc, i32 %offset.v, i32 inreg %offset.s) {
; GFX6-LABEL: name: s_buffer_load_f32_offset_add_vgpr_sgpr
; GFX6: bb.1 (%ir-block.0):
- ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0
- ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
- ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
- ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
- ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
- ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
- ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX6: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32))
- ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
- ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0
+ ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0
+ ; GFX6-NEXT: {{ $}}
+ ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX6-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
+ ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32))
+ ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
+ ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
; GFX7-LABEL: name: s_buffer_load_f32_offset_add_vgpr_sgpr
; GFX7: bb.1 (%ir-block.0):
- ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0
- ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
- ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
- ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
- ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
- ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
- ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX7: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; GFX7: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32))
- ; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
- ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0
+ ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0
+ ; GFX7-NEXT: {{ $}}
+ ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX7-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
+ ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32))
+ ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
+ ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
; GFX8-LABEL: name: s_buffer_load_f32_offset_add_vgpr_sgpr
; GFX8: bb.1 (%ir-block.0):
- ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0
- ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
- ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
- ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
- ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
- ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
- ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX8: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; GFX8: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32))
- ; GFX8: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
- ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0
+ ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0
+ ; GFX8-NEXT: {{ $}}
+ ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX8-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
+ ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32))
+ ; GFX8-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
+ ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
%offset = add i32 %offset.v, %offset.s
%val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %offset, i32 0)
ret float %val
@@ -4314,43 +4590,46 @@ define amdgpu_ps float @s_buffer_load_f32_offset_add_vgpr_sgpr(<4 x i32> inreg %
define amdgpu_ps float @s_buffer_load_f32_offset_add_sgpr_vgpr(<4 x i32> inreg %rsrc, i32 %offset.v, i32 inreg %offset.s) {
; GFX6-LABEL: name: s_buffer_load_f32_offset_add_sgpr_vgpr
; GFX6: bb.1 (%ir-block.0):
- ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0
- ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
- ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
- ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
- ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
- ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
- ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX6: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32))
- ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
- ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0
+ ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0
+ ; GFX6-NEXT: {{ $}}
+ ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX6-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
+ ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32))
+ ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
+ ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
; GFX7-LABEL: name: s_buffer_load_f32_offset_add_sgpr_vgpr
; GFX7: bb.1 (%ir-block.0):
- ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0
- ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
- ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
- ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
- ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
- ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
- ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX7: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; GFX7: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32))
- ; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
- ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0
+ ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0
+ ; GFX7-NEXT: {{ $}}
+ ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX7-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
+ ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32))
+ ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
+ ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
; GFX8-LABEL: name: s_buffer_load_f32_offset_add_sgpr_vgpr
; GFX8: bb.1 (%ir-block.0):
- ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0
- ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
- ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
- ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
- ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
- ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
- ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX8: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; GFX8: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32))
- ; GFX8: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
- ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0
+ ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0
+ ; GFX8-NEXT: {{ $}}
+ ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX8-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
+ ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32))
+ ; GFX8-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
+ ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
%offset = add i32 %offset.s, %offset.v
%val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %offset, i32 0)
ret float %val
@@ -4359,52 +4638,55 @@ define amdgpu_ps float @s_buffer_load_f32_offset_add_sgpr_vgpr(<4 x i32> inreg %
define amdgpu_ps float @s_buffer_load_f32_offset_add_vgpr_sgpr_imm(<4 x i32> inreg %rsrc, i32 %offset.v, i32 inreg %offset.s) {
; GFX6-LABEL: name: s_buffer_load_f32_offset_add_vgpr_sgpr_imm
; GFX6: bb.1 (%ir-block.0):
- ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0
- ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
- ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
- ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
- ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
- ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
- ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX6: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; GFX6: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]]
- ; GFX6: %9:vgpr_32, dead %17:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec
- ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
- ; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %9, [[REG_SEQUENCE]], [[S_MOV_B32_]], 1024, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32))
- ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
- ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0
+ ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0
+ ; GFX6-NEXT: {{ $}}
+ ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX6-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
+ ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]]
+ ; GFX6-NEXT: %9:vgpr_32, dead %17:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec
+ ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %9, [[REG_SEQUENCE]], [[S_MOV_B32_]], 1024, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32))
+ ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
+ ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
; GFX7-LABEL: name: s_buffer_load_f32_offset_add_vgpr_sgpr_imm
; GFX7: bb.1 (%ir-block.0):
- ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0
- ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
- ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
- ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
- ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
- ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
- ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX7: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; GFX7: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]]
- ; GFX7: %9:vgpr_32, dead %17:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec
- ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
- ; GFX7: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %9, [[REG_SEQUENCE]], [[S_MOV_B32_]], 1024, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32))
- ; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
- ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0
+ ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0
+ ; GFX7-NEXT: {{ $}}
+ ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX7-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
+ ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]]
+ ; GFX7-NEXT: %9:vgpr_32, dead %17:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec
+ ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %9, [[REG_SEQUENCE]], [[S_MOV_B32_]], 1024, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32))
+ ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
+ ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
; GFX8-LABEL: name: s_buffer_load_f32_offset_add_vgpr_sgpr_imm
; GFX8: bb.1 (%ir-block.0):
- ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0
- ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
- ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
- ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
- ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
- ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
- ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX8: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; GFX8: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]]
- ; GFX8: %9:vgpr_32, dead %17:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec
- ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
- ; GFX8: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %9, [[REG_SEQUENCE]], [[S_MOV_B32_]], 1024, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32))
- ; GFX8: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
- ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0
+ ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0
+ ; GFX8-NEXT: {{ $}}
+ ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX8-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
+ ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]]
+ ; GFX8-NEXT: %9:vgpr_32, dead %17:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec
+ ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %9, [[REG_SEQUENCE]], [[S_MOV_B32_]], 1024, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32))
+ ; GFX8-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
+ ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
%offset.base = add i32 %offset.v, %offset.s
%offset = add i32 %offset.base, 1024
%val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %offset, i32 0)
@@ -4414,52 +4696,55 @@ define amdgpu_ps float @s_buffer_load_f32_offset_add_vgpr_sgpr_imm(<4 x i32> inr
define amdgpu_ps float @s_buffer_load_f32_offset_add_sgpr_vgpr_imm(<4 x i32> inreg %rsrc, i32 %offset.v, i32 inreg %offset.s) {
; GFX6-LABEL: name: s_buffer_load_f32_offset_add_sgpr_vgpr_imm
; GFX6: bb.1 (%ir-block.0):
- ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0
- ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
- ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
- ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
- ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
- ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
- ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX6: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; GFX6: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]]
- ; GFX6: %9:vgpr_32, dead %17:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY6]], [[COPY4]], 0, implicit $exec
- ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
- ; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %9, [[REG_SEQUENCE]], [[S_MOV_B32_]], 1024, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32))
- ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
- ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0
+ ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0
+ ; GFX6-NEXT: {{ $}}
+ ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX6-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
+ ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]]
+ ; GFX6-NEXT: %9:vgpr_32, dead %17:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY6]], [[COPY4]], 0, implicit $exec
+ ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %9, [[REG_SEQUENCE]], [[S_MOV_B32_]], 1024, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32))
+ ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
+ ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
; GFX7-LABEL: name: s_buffer_load_f32_offset_add_sgpr_vgpr_imm
; GFX7: bb.1 (%ir-block.0):
- ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0
- ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
- ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
- ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
- ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
- ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
- ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX7: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; GFX7: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]]
- ; GFX7: %9:vgpr_32, dead %17:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY6]], [[COPY4]], 0, implicit $exec
- ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
- ; GFX7: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %9, [[REG_SEQUENCE]], [[S_MOV_B32_]], 1024, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32))
- ; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
- ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0
+ ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0
+ ; GFX7-NEXT: {{ $}}
+ ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX7-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
+ ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]]
+ ; GFX7-NEXT: %9:vgpr_32, dead %17:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY6]], [[COPY4]], 0, implicit $exec
+ ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %9, [[REG_SEQUENCE]], [[S_MOV_B32_]], 1024, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32))
+ ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
+ ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
; GFX8-LABEL: name: s_buffer_load_f32_offset_add_sgpr_vgpr_imm
; GFX8: bb.1 (%ir-block.0):
- ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0
- ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
- ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
- ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
- ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
- ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
- ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX8: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; GFX8: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]]
- ; GFX8: %9:vgpr_32, dead %17:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY6]], [[COPY4]], 0, implicit $exec
- ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
- ; GFX8: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %9, [[REG_SEQUENCE]], [[S_MOV_B32_]], 1024, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32))
- ; GFX8: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
- ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0
+ ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0
+ ; GFX8-NEXT: {{ $}}
+ ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX8-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
+ ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]]
+ ; GFX8-NEXT: %9:vgpr_32, dead %17:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY6]], [[COPY4]], 0, implicit $exec
+ ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %9, [[REG_SEQUENCE]], [[S_MOV_B32_]], 1024, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32))
+ ; GFX8-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
+ ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
%offset.base = add i32 %offset.s, %offset.v
%offset = add i32 %offset.base, 1024
%val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %offset, i32 0)
@@ -4470,49 +4755,52 @@ define amdgpu_ps float @s_buffer_load_f32_offset_add_sgpr_vgpr_imm(<4 x i32> inr
define amdgpu_ps float @s_buffer_load_f32_offset_add_imm_sgpr_vgpr(<4 x i32> inreg %rsrc, i32 %offset.v, i32 inreg %offset.s) {
; GFX6-LABEL: name: s_buffer_load_f32_offset_add_imm_sgpr_vgpr
; GFX6: bb.1 (%ir-block.0):
- ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0
- ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
- ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
- ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
- ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
- ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
- ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX6: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1024
- ; GFX6: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY5]], [[S_MOV_B32_]], implicit-def $scc
- ; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_ADD_I32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32))
- ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
- ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0
+ ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0
+ ; GFX6-NEXT: {{ $}}
+ ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX6-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
+ ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1024
+ ; GFX6-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY5]], [[S_MOV_B32_]], implicit-def $scc
+ ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_ADD_I32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32))
+ ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
+ ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
; GFX7-LABEL: name: s_buffer_load_f32_offset_add_imm_sgpr_vgpr
; GFX7: bb.1 (%ir-block.0):
- ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0
- ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
- ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
- ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
- ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
- ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
- ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX7: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1024
- ; GFX7: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY5]], [[S_MOV_B32_]], implicit-def $scc
- ; GFX7: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_ADD_I32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32))
- ; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
- ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0
+ ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0
+ ; GFX7-NEXT: {{ $}}
+ ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX7-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
+ ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1024
+ ; GFX7-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY5]], [[S_MOV_B32_]], implicit-def $scc
+ ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_ADD_I32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32))
+ ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
+ ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
; GFX8-LABEL: name: s_buffer_load_f32_offset_add_imm_sgpr_vgpr
; GFX8: bb.1 (%ir-block.0):
- ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0
- ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
- ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
- ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
- ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
- ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
- ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX8: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1024
- ; GFX8: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY5]], [[S_MOV_B32_]], implicit-def $scc
- ; GFX8: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_ADD_I32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32))
- ; GFX8: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
- ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0
+ ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0
+ ; GFX8-NEXT: {{ $}}
+ ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX8-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
+ ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1024
+ ; GFX8-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY5]], [[S_MOV_B32_]], implicit-def $scc
+ ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_ADD_I32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32))
+ ; GFX8-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
+ ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
%offset.base = add i32 %offset.s, 1024
%offset = add i32 %offset.base, %offset.v
%val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %offset, i32 0)
@@ -4522,52 +4810,55 @@ define amdgpu_ps float @s_buffer_load_f32_offset_add_imm_sgpr_vgpr(<4 x i32> inr
define amdgpu_ps float @s_buffer_load_f32_offset_add_imm_vgpr_sgpr(<4 x i32> inreg %rsrc, i32 %offset.v, i32 inreg %offset.s) {
; GFX6-LABEL: name: s_buffer_load_f32_offset_add_imm_vgpr_sgpr
; GFX6: bb.1 (%ir-block.0):
- ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0
- ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
- ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
- ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
- ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
- ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
- ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX6: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1024
- ; GFX6: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
- ; GFX6: %10:vgpr_32, dead %16:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec
- ; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %10, [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32))
- ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
- ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0
+ ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0
+ ; GFX6-NEXT: {{ $}}
+ ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX6-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
+ ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1024
+ ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
+ ; GFX6-NEXT: %10:vgpr_32, dead %16:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec
+ ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %10, [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32))
+ ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
+ ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
; GFX7-LABEL: name: s_buffer_load_f32_offset_add_imm_vgpr_sgpr
; GFX7: bb.1 (%ir-block.0):
- ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0
- ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
- ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
- ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
- ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
- ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
- ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX7: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1024
- ; GFX7: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
- ; GFX7: %10:vgpr_32, dead %16:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec
- ; GFX7: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %10, [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32))
- ; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
- ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0
+ ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0
+ ; GFX7-NEXT: {{ $}}
+ ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX7-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
+ ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1024
+ ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
+ ; GFX7-NEXT: %10:vgpr_32, dead %16:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec
+ ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %10, [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32))
+ ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
+ ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
; GFX8-LABEL: name: s_buffer_load_f32_offset_add_imm_vgpr_sgpr
; GFX8: bb.1 (%ir-block.0):
- ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0
- ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
- ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
- ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
- ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
- ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
- ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX8: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1024
- ; GFX8: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
- ; GFX8: %10:vgpr_32, dead %16:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec
- ; GFX8: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %10, [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32))
- ; GFX8: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
- ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0
+ ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0
+ ; GFX8-NEXT: {{ $}}
+ ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
+ ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX8-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
+ ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1024
+ ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
+ ; GFX8-NEXT: %10:vgpr_32, dead %16:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec
+ ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %10, [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32))
+ ; GFX8-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
+ ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
%offset.base = add i32 %offset.v, 1024
%offset = add i32 %offset.base, %offset.s
%val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %offset, i32 0)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/load-constant.96.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/load-constant.96.ll
index c99da17fe964e..98bc9815f001d 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/load-constant.96.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/load-constant.96.ll
@@ -530,20 +530,20 @@ define amdgpu_ps <3 x i32> @s_load_constant_v3i32_align2(<3 x i32> addrspace(4)*
define amdgpu_ps <3 x i32> @s_load_constant_v3i32_align4(<3 x i32> addrspace(4)* inreg %ptr) {
; GFX9-LABEL: s_load_constant_v3i32_align4:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s4, s0
-; GFX9-NEXT: s_mov_b32 s5, s1
-; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
-; GFX9-NEXT: s_load_dword s2, s[4:5], 0x8
+; GFX9-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0
+; GFX9-NEXT: s_load_dword s2, s[0:1], 0x8
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-NEXT: s_mov_b32 s0, s4
+; GFX9-NEXT: s_mov_b32 s1, s5
; GFX9-NEXT: ; return to shader part epilog
;
; GFX7-LABEL: s_load_constant_v3i32_align4:
; GFX7: ; %bb.0:
-; GFX7-NEXT: s_mov_b32 s2, s0
-; GFX7-NEXT: s_mov_b32 s3, s1
-; GFX7-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0
-; GFX7-NEXT: s_load_dword s2, s[2:3], 0x2
+; GFX7-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0
+; GFX7-NEXT: s_load_dword s2, s[0:1], 0x2
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
+; GFX7-NEXT: s_mov_b32 s0, s4
+; GFX7-NEXT: s_mov_b32 s1, s5
; GFX7-NEXT: ; return to shader part epilog
%load = load <3 x i32>, <3 x i32> addrspace(4)* %ptr, align 4
ret <3 x i32> %load
@@ -552,20 +552,20 @@ define amdgpu_ps <3 x i32> @s_load_constant_v3i32_align4(<3 x i32> addrspace(4)*
define amdgpu_ps i96 @s_load_constant_i96_align8(i96 addrspace(4)* inreg %ptr) {
; GFX9-LABEL: s_load_constant_i96_align8:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s4, s0
-; GFX9-NEXT: s_mov_b32 s5, s1
-; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
-; GFX9-NEXT: s_load_dword s2, s[4:5], 0x8
+; GFX9-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0
+; GFX9-NEXT: s_load_dword s2, s[0:1], 0x8
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-NEXT: s_mov_b32 s0, s4
+; GFX9-NEXT: s_mov_b32 s1, s5
; GFX9-NEXT: ; return to shader part epilog
;
; GFX7-LABEL: s_load_constant_i96_align8:
; GFX7: ; %bb.0:
-; GFX7-NEXT: s_mov_b32 s2, s0
-; GFX7-NEXT: s_mov_b32 s3, s1
-; GFX7-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0
-; GFX7-NEXT: s_load_dword s2, s[2:3], 0x2
+; GFX7-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0
+; GFX7-NEXT: s_load_dword s2, s[0:1], 0x2
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
+; GFX7-NEXT: s_mov_b32 s0, s4
+; GFX7-NEXT: s_mov_b32 s1, s5
; GFX7-NEXT: ; return to shader part epilog
%load = load i96, i96 addrspace(4)* %ptr, align 8
ret i96 %load
@@ -574,20 +574,20 @@ define amdgpu_ps i96 @s_load_constant_i96_align8(i96 addrspace(4)* inreg %ptr) {
define amdgpu_ps <3 x i32> @s_load_constant_v3i32_align8(<3 x i32> addrspace(4)* inreg %ptr) {
; GFX9-LABEL: s_load_constant_v3i32_align8:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s4, s0
-; GFX9-NEXT: s_mov_b32 s5, s1
-; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
-; GFX9-NEXT: s_load_dword s2, s[4:5], 0x8
+; GFX9-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0
+; GFX9-NEXT: s_load_dword s2, s[0:1], 0x8
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-NEXT: s_mov_b32 s0, s4
+; GFX9-NEXT: s_mov_b32 s1, s5
; GFX9-NEXT: ; return to shader part epilog
;
; GFX7-LABEL: s_load_constant_v3i32_align8:
; GFX7: ; %bb.0:
-; GFX7-NEXT: s_mov_b32 s2, s0
-; GFX7-NEXT: s_mov_b32 s3, s1
-; GFX7-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0
-; GFX7-NEXT: s_load_dword s2, s[2:3], 0x2
+; GFX7-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0
+; GFX7-NEXT: s_load_dword s2, s[0:1], 0x2
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
+; GFX7-NEXT: s_mov_b32 s0, s4
+; GFX7-NEXT: s_mov_b32 s1, s5
; GFX7-NEXT: ; return to shader part epilog
%load = load <3 x i32>, <3 x i32> addrspace(4)* %ptr, align 8
ret <3 x i32> %load
@@ -596,20 +596,20 @@ define amdgpu_ps <3 x i32> @s_load_constant_v3i32_align8(<3 x i32> addrspace(4)*
define amdgpu_ps <3 x i32> @s_load_constant_v6i16_align8(<6 x i16> addrspace(4)* inreg %ptr) {
; GFX9-LABEL: s_load_constant_v6i16_align8:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s4, s0
-; GFX9-NEXT: s_mov_b32 s5, s1
-; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
-; GFX9-NEXT: s_load_dword s2, s[4:5], 0x8
+; GFX9-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0
+; GFX9-NEXT: s_load_dword s2, s[0:1], 0x8
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-NEXT: s_mov_b32 s0, s4
+; GFX9-NEXT: s_mov_b32 s1, s5
; GFX9-NEXT: ; return to shader part epilog
;
; GFX7-LABEL: s_load_constant_v6i16_align8:
; GFX7: ; %bb.0:
-; GFX7-NEXT: s_mov_b32 s2, s0
-; GFX7-NEXT: s_mov_b32 s3, s1
-; GFX7-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0
-; GFX7-NEXT: s_load_dword s2, s[2:3], 0x2
+; GFX7-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0
+; GFX7-NEXT: s_load_dword s2, s[0:1], 0x2
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
+; GFX7-NEXT: s_mov_b32 s0, s4
+; GFX7-NEXT: s_mov_b32 s1, s5
; GFX7-NEXT: ; return to shader part epilog
%load = load <6 x i16>, <6 x i16> addrspace(4)* %ptr, align 8
%cast = bitcast <6 x i16> %load to <3 x i32>
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.s.buffer.load.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.s.buffer.load.ll
index 169507619a128..c11a19481949d 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.s.buffer.load.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.s.buffer.load.ll
@@ -93,12 +93,7 @@ define amdgpu_ps <3 x i32> @s_buffer_load_v3i32(<4 x i32> inreg %rsrc, i32 inreg
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32)
; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6
; CHECK-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:sgpr(<4 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[COPY4]](s32), 0 :: (dereferenceable invariant load (s96), align 4)
- ; CHECK-NEXT: [[DEF:%[0-9]+]]:sgpr(<4 x s32>) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:sgpr(<12 x s32>) = G_CONCAT_VECTORS [[AMDGPU_S_BUFFER_LOAD]](<4 x s32>), [[DEF]](<4 x s32>), [[DEF]](<4 x s32>)
- ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:sgpr(s384) = G_BITCAST [[CONCAT_VECTORS]](<12 x s32>)
- ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s96) = G_TRUNC [[BITCAST]](s384)
- ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:sgpr(<3 x s32>) = G_BITCAST [[TRUNC]](s96)
- ; CHECK-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32), [[UV2:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[BITCAST1]](<3 x s32>)
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32), [[UV2:%[0-9]+]]:sgpr(s32), [[UV3:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[AMDGPU_S_BUFFER_LOAD]](<4 x s32>)
; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[UV]](s32)
; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY5]](s32)
; CHECK-NEXT: $sgpr0 = COPY [[INT]](s32)
@@ -120,12 +115,7 @@ define amdgpu_ps <3 x i32> @s_buffer_load_v3i32(<4 x i32> inreg %rsrc, i32 inreg
; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32)
; GREEDY-NEXT: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6
; GREEDY-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:sgpr(<4 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[COPY4]](s32), 0 :: (dereferenceable invariant load (s96), align 4)
- ; GREEDY-NEXT: [[DEF:%[0-9]+]]:sgpr(<4 x s32>) = G_IMPLICIT_DEF
- ; GREEDY-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:sgpr(<12 x s32>) = G_CONCAT_VECTORS [[AMDGPU_S_BUFFER_LOAD]](<4 x s32>), [[DEF]](<4 x s32>), [[DEF]](<4 x s32>)
- ; GREEDY-NEXT: [[BITCAST:%[0-9]+]]:sgpr(s384) = G_BITCAST [[CONCAT_VECTORS]](<12 x s32>)
- ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s96) = G_TRUNC [[BITCAST]](s384)
- ; GREEDY-NEXT: [[BITCAST1:%[0-9]+]]:sgpr(<3 x s32>) = G_BITCAST [[TRUNC]](s96)
- ; GREEDY-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32), [[UV2:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[BITCAST1]](<3 x s32>)
+ ; GREEDY-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32), [[UV2:%[0-9]+]]:sgpr(s32), [[UV3:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[AMDGPU_S_BUFFER_LOAD]](<4 x s32>)
; GREEDY-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[UV]](s32)
; GREEDY-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY5]](s32)
; GREEDY-NEXT: $sgpr0 = COPY [[INT]](s32)
@@ -435,14 +425,7 @@ define amdgpu_ps <3 x float> @s_buffer_load_v3f32_vgpr_offset(<4 x i32> inreg %r
; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0
; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0
; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4)
- ; CHECK-NEXT: [[DEF:%[0-9]+]]:sgpr(<4 x s32>) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(<4 x s32>) = COPY [[DEF]](<4 x s32>)
- ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr(<4 x s32>) = COPY [[DEF]](<4 x s32>)
- ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<12 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[COPY5]](<4 x s32>), [[COPY6]](<4 x s32>)
- ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:vgpr(s384) = G_BITCAST [[CONCAT_VECTORS]](<12 x s32>)
- ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s96) = G_TRUNC [[BITCAST]](s384)
- ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:vgpr(<3 x s32>) = G_BITCAST [[TRUNC]](s96)
- ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[BITCAST1]](<3 x s32>)
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[AMDGPU_BUFFER_LOAD]](<4 x s32>)
; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32)
; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32)
; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32)
@@ -460,14 +443,7 @@ define amdgpu_ps <3 x float> @s_buffer_load_v3f32_vgpr_offset(<4 x i32> inreg %r
; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0
; GREEDY-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0
; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4)
- ; GREEDY-NEXT: [[DEF:%[0-9]+]]:sgpr(<4 x s32>) = G_IMPLICIT_DEF
- ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:vgpr(<4 x s32>) = COPY [[DEF]](<4 x s32>)
- ; GREEDY-NEXT: [[COPY6:%[0-9]+]]:vgpr(<4 x s32>) = COPY [[DEF]](<4 x s32>)
- ; GREEDY-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<12 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[COPY5]](<4 x s32>), [[COPY6]](<4 x s32>)
- ; GREEDY-NEXT: [[BITCAST:%[0-9]+]]:vgpr(s384) = G_BITCAST [[CONCAT_VECTORS]](<12 x s32>)
- ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s96) = G_TRUNC [[BITCAST]](s384)
- ; GREEDY-NEXT: [[BITCAST1:%[0-9]+]]:vgpr(<3 x s32>) = G_BITCAST [[TRUNC]](s96)
- ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[BITCAST1]](<3 x s32>)
+ ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[AMDGPU_BUFFER_LOAD]](<4 x s32>)
; GREEDY-NEXT: $vgpr0 = COPY [[UV]](s32)
; GREEDY-NEXT: $vgpr1 = COPY [[UV1]](s32)
; GREEDY-NEXT: $vgpr2 = COPY [[UV2]](s32)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-load.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-load.mir
index 3036df730a46b..d8a19824fde9a 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-load.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-load.mir
@@ -680,10 +680,9 @@ body: |
; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 8
; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
; CHECK-NEXT: [[LOAD1:%[0-9]+]]:sgpr(s32) = G_LOAD [[PTR_ADD]](p4) :: (invariant load (s32) from unknown-address + 8, addrspace 4)
- ; CHECK-NEXT: [[DEF:%[0-9]+]]:sgpr(<3 x s32>) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[INSERT:%[0-9]+]]:sgpr(<3 x s32>) = G_INSERT [[DEF]], [[LOAD]](<2 x s32>), 0
- ; CHECK-NEXT: [[INSERT1:%[0-9]+]]:sgpr(<3 x s32>) = G_INSERT [[INSERT]], [[LOAD1]](s32), 64
- ; CHECK-NEXT: S_ENDPGM 0, implicit [[INSERT1]](<3 x s32>)
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>)
+ ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32)
+ ; CHECK-NEXT: S_ENDPGM 0, implicit [[BUILD_VECTOR]](<3 x s32>)
%0:_(p4) = COPY $sgpr0_sgpr1
%1:_(<3 x s32>) = G_LOAD %0 :: (invariant load (<3 x s32>), addrspace 4, align 4)
S_ENDPGM 0, implicit %1
@@ -702,10 +701,9 @@ body: |
; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 8
; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
; CHECK-NEXT: [[LOAD1:%[0-9]+]]:sgpr(s32) = G_LOAD [[PTR_ADD]](p4) :: (invariant load (s32) from unknown-address + 8, align 8, addrspace 4)
- ; CHECK-NEXT: [[DEF:%[0-9]+]]:sgpr(<3 x s32>) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[INSERT:%[0-9]+]]:sgpr(<3 x s32>) = G_INSERT [[DEF]], [[LOAD]](<2 x s32>), 0
- ; CHECK-NEXT: [[INSERT1:%[0-9]+]]:sgpr(<3 x s32>) = G_INSERT [[INSERT]], [[LOAD1]](s32), 64
- ; CHECK-NEXT: S_ENDPGM 0, implicit [[INSERT1]](<3 x s32>)
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>)
+ ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32)
+ ; CHECK-NEXT: S_ENDPGM 0, implicit [[BUILD_VECTOR]](<3 x s32>)
%0:_(p4) = COPY $sgpr0_sgpr1
%1:_(<3 x s32>) = G_LOAD %0 :: (invariant load (<3 x s32>), addrspace 4, align 8)
S_ENDPGM 0, implicit %1
@@ -721,8 +719,9 @@ body: |
; CHECK-LABEL: name: load_constant_v3i32_align16
; CHECK: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1
; CHECK-NEXT: [[LOAD:%[0-9]+]]:sgpr(<4 x s32>) = G_LOAD [[COPY]](p4) :: (invariant load (<4 x s32>), addrspace 4)
- ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:sgpr(<3 x s32>) = G_EXTRACT [[LOAD]](<4 x s32>), 0
- ; CHECK-NEXT: S_ENDPGM 0, implicit [[EXTRACT]](<3 x s32>)
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32), [[UV2:%[0-9]+]]:sgpr(s32), [[UV3:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>)
+ ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32)
+ ; CHECK-NEXT: S_ENDPGM 0, implicit [[BUILD_VECTOR]](<3 x s32>)
%0:_(p4) = COPY $sgpr0_sgpr1
%1:_(<3 x s32>) = G_LOAD %0 :: (invariant load (<3 x s32>), addrspace 4, align 16)
S_ENDPGM 0, implicit %1
@@ -741,10 +740,10 @@ body: |
; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 8
; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
; CHECK-NEXT: [[LOAD1:%[0-9]+]]:sgpr(<2 x s16>) = G_LOAD [[PTR_ADD]](p4) :: (invariant load (<2 x s16>) from unknown-address + 8, addrspace 4)
- ; CHECK-NEXT: [[DEF:%[0-9]+]]:sgpr(<6 x s16>) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[INSERT:%[0-9]+]]:sgpr(<6 x s16>) = G_INSERT [[DEF]], [[LOAD]](<4 x s16>), 0
- ; CHECK-NEXT: [[INSERT1:%[0-9]+]]:sgpr(<6 x s16>) = G_INSERT [[INSERT]], [[LOAD1]](<2 x s16>), 64
- ; CHECK-NEXT: S_ENDPGM 0, implicit [[INSERT1]](<6 x s16>)
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:sgpr(s16), [[UV1:%[0-9]+]]:sgpr(s16), [[UV2:%[0-9]+]]:sgpr(s16), [[UV3:%[0-9]+]]:sgpr(s16) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>)
+ ; CHECK-NEXT: [[UV4:%[0-9]+]]:sgpr(s16), [[UV5:%[0-9]+]]:sgpr(s16) = G_UNMERGE_VALUES [[LOAD1]](<2 x s16>)
+ ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<6 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16), [[UV3]](s16), [[UV4]](s16), [[UV5]](s16)
+ ; CHECK-NEXT: S_ENDPGM 0, implicit [[BUILD_VECTOR]](<6 x s16>)
%0:_(p4) = COPY $sgpr0_sgpr1
%1:_(<6 x s16>) = G_LOAD %0 :: (invariant load (<6 x s16>), addrspace 4, align 4)
S_ENDPGM 0, implicit %1
@@ -763,10 +762,10 @@ body: |
; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 8
; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
; CHECK-NEXT: [[LOAD1:%[0-9]+]]:sgpr(<2 x s16>) = G_LOAD [[PTR_ADD]](p4) :: (invariant load (<2 x s16>) from unknown-address + 8, align 8, addrspace 4)
- ; CHECK-NEXT: [[DEF:%[0-9]+]]:sgpr(<6 x s16>) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[INSERT:%[0-9]+]]:sgpr(<6 x s16>) = G_INSERT [[DEF]], [[LOAD]](<4 x s16>), 0
- ; CHECK-NEXT: [[INSERT1:%[0-9]+]]:sgpr(<6 x s16>) = G_INSERT [[INSERT]], [[LOAD1]](<2 x s16>), 64
- ; CHECK-NEXT: S_ENDPGM 0, implicit [[INSERT1]](<6 x s16>)
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:sgpr(s16), [[UV1:%[0-9]+]]:sgpr(s16), [[UV2:%[0-9]+]]:sgpr(s16), [[UV3:%[0-9]+]]:sgpr(s16) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>)
+ ; CHECK-NEXT: [[UV4:%[0-9]+]]:sgpr(s16), [[UV5:%[0-9]+]]:sgpr(s16) = G_UNMERGE_VALUES [[LOAD1]](<2 x s16>)
+ ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<6 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16), [[UV3]](s16), [[UV4]](s16), [[UV5]](s16)
+ ; CHECK-NEXT: S_ENDPGM 0, implicit [[BUILD_VECTOR]](<6 x s16>)
%0:_(p4) = COPY $sgpr0_sgpr1
%1:_(<6 x s16>) = G_LOAD %0 :: (invariant load (<6 x s16>), addrspace 4, align 8)
S_ENDPGM 0, implicit %1
@@ -782,8 +781,9 @@ body: |
; CHECK-LABEL: name: load_constant_v6i16_align16
; CHECK: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1
; CHECK-NEXT: [[LOAD:%[0-9]+]]:sgpr(<8 x s16>) = G_LOAD [[COPY]](p4) :: (invariant load (<8 x s16>), addrspace 4)
- ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:sgpr(<6 x s16>) = G_EXTRACT [[LOAD]](<8 x s16>), 0
- ; CHECK-NEXT: S_ENDPGM 0, implicit [[EXTRACT]](<6 x s16>)
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:sgpr(s16), [[UV1:%[0-9]+]]:sgpr(s16), [[UV2:%[0-9]+]]:sgpr(s16), [[UV3:%[0-9]+]]:sgpr(s16), [[UV4:%[0-9]+]]:sgpr(s16), [[UV5:%[0-9]+]]:sgpr(s16), [[UV6:%[0-9]+]]:sgpr(s16), [[UV7:%[0-9]+]]:sgpr(s16) = G_UNMERGE_VALUES [[LOAD]](<8 x s16>)
+ ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<6 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16), [[UV3]](s16), [[UV4]](s16), [[UV5]](s16)
+ ; CHECK-NEXT: S_ENDPGM 0, implicit [[BUILD_VECTOR]](<6 x s16>)
%0:_(p4) = COPY $sgpr0_sgpr1
%1:_(<6 x s16>) = G_LOAD %0 :: (invariant load (<6 x s16>), addrspace 4, align 16)
S_ENDPGM 0, implicit %1
@@ -802,10 +802,9 @@ body: |
; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 8
; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
; CHECK-NEXT: [[LOAD1:%[0-9]+]]:sgpr(s32) = G_LOAD [[PTR_ADD]](p4) :: (invariant load (s32) from unknown-address + 8, addrspace 4)
- ; CHECK-NEXT: [[DEF:%[0-9]+]]:sgpr(s96) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[INSERT:%[0-9]+]]:sgpr(s96) = G_INSERT [[DEF]], [[LOAD]](s64), 0
- ; CHECK-NEXT: [[INSERT1:%[0-9]+]]:sgpr(s96) = G_INSERT [[INSERT]], [[LOAD1]](s32), 64
- ; CHECK-NEXT: S_ENDPGM 0, implicit [[INSERT1]](s96)
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[LOAD]](s64)
+ ; CHECK-NEXT: [[MV:%[0-9]+]]:sgpr(s96) = G_MERGE_VALUES [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32)
+ ; CHECK-NEXT: S_ENDPGM 0, implicit [[MV]](s96)
%0:_(p4) = COPY $sgpr0_sgpr1
%1:_(s96) = G_LOAD %0 :: (invariant load (s96), addrspace 4, align 4)
S_ENDPGM 0, implicit %1
@@ -824,10 +823,9 @@ body: |
; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 8
; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
; CHECK-NEXT: [[LOAD1:%[0-9]+]]:sgpr(s32) = G_LOAD [[PTR_ADD]](p4) :: (invariant load (s32) from unknown-address + 8, align 8, addrspace 4)
- ; CHECK-NEXT: [[DEF:%[0-9]+]]:sgpr(s96) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[INSERT:%[0-9]+]]:sgpr(s96) = G_INSERT [[DEF]], [[LOAD]](s64), 0
- ; CHECK-NEXT: [[INSERT1:%[0-9]+]]:sgpr(s96) = G_INSERT [[INSERT]], [[LOAD1]](s32), 64
- ; CHECK-NEXT: S_ENDPGM 0, implicit [[INSERT1]](s96)
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[LOAD]](s64)
+ ; CHECK-NEXT: [[MV:%[0-9]+]]:sgpr(s96) = G_MERGE_VALUES [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32)
+ ; CHECK-NEXT: S_ENDPGM 0, implicit [[MV]](s96)
%0:_(p4) = COPY $sgpr0_sgpr1
%1:_(s96) = G_LOAD %0 :: (invariant load (s96), addrspace 4, align 8)
S_ENDPGM 0, implicit %1
@@ -843,8 +841,8 @@ body: |
; CHECK-LABEL: name: load_constant_i96_align16
; CHECK: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1
; CHECK-NEXT: [[LOAD:%[0-9]+]]:sgpr(s128) = G_LOAD [[COPY]](p4) :: (invariant load (s128), addrspace 4)
- ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:sgpr(s96) = G_EXTRACT [[LOAD]](s128), 0
- ; CHECK-NEXT: S_ENDPGM 0, implicit [[EXTRACT]](s96)
+ ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s96) = G_TRUNC [[LOAD]](s128)
+ ; CHECK-NEXT: S_ENDPGM 0, implicit [[TRUNC]](s96)
%0:_(p4) = COPY $sgpr0_sgpr1
%1:_(s96) = G_LOAD %0 :: (invariant load (s96), addrspace 4, align 16)
S_ENDPGM 0, implicit %1
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-split-scalar-load-metadata.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-split-scalar-load-metadata.mir
index ea5a841e32d95..bf150a54f434f 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-split-scalar-load-metadata.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-split-scalar-load-metadata.mir
@@ -34,10 +34,9 @@ body: |
; SI-NEXT: [[C:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 8
; SI-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
; SI-NEXT: [[LOAD1:%[0-9]+]]:sgpr(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s32) from unknown-address + 8, align 8, addrspace 4)
- ; SI-NEXT: [[DEF:%[0-9]+]]:sgpr(<3 x s32>) = G_IMPLICIT_DEF
- ; SI-NEXT: [[INSERT:%[0-9]+]]:sgpr(<3 x s32>) = G_INSERT [[DEF]], [[LOAD]](<2 x s32>), 0
- ; SI-NEXT: [[INSERT1:%[0-9]+]]:sgpr(<3 x s32>) = G_INSERT [[INSERT]], [[LOAD1]](s32), 64
- ; SI-NEXT: $sgpr0_sgpr1_sgpr2 = COPY [[INSERT1]](<3 x s32>)
+ ; SI-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>)
+ ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32)
+ ; SI-NEXT: $sgpr0_sgpr1_sgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>)
%0:_(p4) = COPY $sgpr0_sgpr1
%1:_(<3 x s32>) = G_LOAD %0 :: (load (<3 x s32>), align 8, addrspace 4, !range !0)
$sgpr0_sgpr1_sgpr2 = COPY %1
@@ -57,10 +56,9 @@ body: |
; SI-NEXT: [[C:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 8
; SI-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
; SI-NEXT: [[LOAD1:%[0-9]+]]:sgpr(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s32) from unknown-address + 8, align 8, !tbaa !2, addrspace 4)
- ; SI-NEXT: [[DEF:%[0-9]+]]:sgpr(<3 x s32>) = G_IMPLICIT_DEF
- ; SI-NEXT: [[INSERT:%[0-9]+]]:sgpr(<3 x s32>) = G_INSERT [[DEF]], [[LOAD]](<2 x s32>), 0
- ; SI-NEXT: [[INSERT1:%[0-9]+]]:sgpr(<3 x s32>) = G_INSERT [[INSERT]], [[LOAD1]](s32), 64
- ; SI-NEXT: $sgpr0_sgpr1_sgpr2 = COPY [[INSERT1]](<3 x s32>)
+ ; SI-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>)
+ ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32)
+ ; SI-NEXT: $sgpr0_sgpr1_sgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>)
%0:_(p4) = COPY $sgpr0_sgpr1
%1:_(<3 x s32>) = G_LOAD %0 :: (load (<3 x s32>), align 8, addrspace 4, !tbaa !1)
$sgpr0_sgpr1_sgpr2 = COPY %1
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll
index 740fc68c4b3d5..65c62c47a823d 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll
@@ -1051,10 +1051,11 @@ define i64 @v_sdiv_i64_pow2k_denom(i64 %num) {
; CHECK-LABEL: v_sdiv_i64_pow2k_denom:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CHECK-NEXT: v_cvt_f32_u32_e32 v2, 0x1000
+; CHECK-NEXT: s_movk_i32 s4, 0x1000
+; CHECK-NEXT: v_cvt_f32_u32_e32 v2, s4
; CHECK-NEXT: v_cvt_f32_ubyte0_e32 v3, 0
-; CHECK-NEXT: s_movk_i32 s4, 0xf000
-; CHECK-NEXT: s_movk_i32 s6, 0x1000
+; CHECK-NEXT: s_movk_i32 s5, 0xf000
+; CHECK-NEXT: s_bfe_i32 s6, -1, 0x10000
; CHECK-NEXT: v_mac_f32_e32 v2, 0x4f800000, v3
; CHECK-NEXT: v_rcp_iflag_f32_e32 v2, v2
; CHECK-NEXT: v_ashrrev_i32_e32 v3, 31, v1
@@ -1066,10 +1067,10 @@ define i64 @v_sdiv_i64_pow2k_denom(i64 %num) {
; CHECK-NEXT: v_cvt_u32_f32_e32 v4, v4
; CHECK-NEXT: v_cvt_u32_f32_e32 v2, v2
; CHECK-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc
-; CHECK-NEXT: v_mul_lo_u32 v6, s4, v4
+; CHECK-NEXT: v_mul_lo_u32 v6, s5, v4
; CHECK-NEXT: v_mul_lo_u32 v5, -1, v2
-; CHECK-NEXT: v_mul_hi_u32 v8, s4, v2
-; CHECK-NEXT: v_mul_lo_u32 v7, s4, v2
+; CHECK-NEXT: v_mul_hi_u32 v8, s5, v2
+; CHECK-NEXT: v_mul_lo_u32 v7, s5, v2
; CHECK-NEXT: v_xor_b32_e32 v0, v0, v3
; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v6
; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v8
@@ -1098,10 +1099,9 @@ define i64 @v_sdiv_i64_pow2k_denom(i64 %num) {
; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v6
; CHECK-NEXT: v_addc_u32_e32 v4, vcc, v4, v5, vcc
; CHECK-NEXT: v_mul_lo_u32 v5, -1, v2
-; CHECK-NEXT: v_mul_lo_u32 v6, s4, v4
-; CHECK-NEXT: v_mul_hi_u32 v8, s4, v2
-; CHECK-NEXT: v_mul_lo_u32 v7, s4, v2
-; CHECK-NEXT: s_bfe_i32 s7, -1, 0x10000
+; CHECK-NEXT: v_mul_lo_u32 v6, s5, v4
+; CHECK-NEXT: v_mul_hi_u32 v8, s5, v2
+; CHECK-NEXT: v_mul_lo_u32 v7, s5, v2
; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v6
; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v8
; CHECK-NEXT: v_mul_lo_u32 v6, v4, v7
@@ -1129,55 +1129,56 @@ define i64 @v_sdiv_i64_pow2k_denom(i64 %num) {
; CHECK-NEXT: v_addc_u32_e32 v4, vcc, v4, v5, vcc
; CHECK-NEXT: v_mul_lo_u32 v5, v1, v2
; CHECK-NEXT: v_mul_lo_u32 v6, v0, v4
-; CHECK-NEXT: v_mul_hi_u32 v7, v0, v2
+; CHECK-NEXT: v_mul_hi_u32 v8, v0, v2
; CHECK-NEXT: v_mul_hi_u32 v2, v1, v2
+; CHECK-NEXT: v_mov_b32_e32 v7, 0x1000
; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v6
; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
-; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v7
+; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v8
; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
-; CHECK-NEXT: v_mul_lo_u32 v7, v1, v4
+; CHECK-NEXT: v_mul_lo_u32 v8, v1, v4
; CHECK-NEXT: v_add_i32_e32 v5, vcc, v6, v5
; CHECK-NEXT: v_mul_hi_u32 v6, v0, v4
-; CHECK-NEXT: v_add_i32_e32 v2, vcc, v7, v2
-; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
+; CHECK-NEXT: v_add_i32_e32 v2, vcc, v8, v2
+; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v6
; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
-; CHECK-NEXT: v_add_i32_e32 v6, vcc, v7, v6
+; CHECK-NEXT: v_add_i32_e32 v6, vcc, v8, v6
; CHECK-NEXT: v_mul_hi_u32 v4, v1, v4
; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v5
; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
; CHECK-NEXT: v_add_i32_e32 v5, vcc, v6, v5
; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v5
; CHECK-NEXT: v_mul_lo_u32 v5, 0, v2
-; CHECK-NEXT: v_mul_lo_u32 v6, s6, v4
-; CHECK-NEXT: v_mul_hi_u32 v8, s6, v2
-; CHECK-NEXT: v_mul_lo_u32 v7, s6, v2
+; CHECK-NEXT: v_mul_lo_u32 v6, s4, v4
+; CHECK-NEXT: v_mul_hi_u32 v9, s4, v2
+; CHECK-NEXT: v_mul_lo_u32 v8, s4, v2
; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v6
-; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v8
-; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v7
+; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v9
+; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v8
; CHECK-NEXT: v_subb_u32_e64 v6, s[4:5], v1, v5, vcc
; CHECK-NEXT: v_sub_i32_e64 v1, s[4:5], v1, v5
; CHECK-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
-; CHECK-NEXT: v_cmp_le_u32_e64 s[4:5], s6, v0
-; CHECK-NEXT: v_subrev_i32_e32 v0, vcc, s6, v0
+; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v7
+; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v7
; CHECK-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, -1, s[4:5]
-; CHECK-NEXT: v_mov_b32_e32 v7, s7
+; CHECK-NEXT: v_mov_b32_e32 v8, s6
; CHECK-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v6
; CHECK-NEXT: v_add_i32_e32 v6, vcc, 1, v2
-; CHECK-NEXT: v_cndmask_b32_e64 v5, v7, v5, s[4:5]
-; CHECK-NEXT: v_addc_u32_e32 v7, vcc, 0, v4, vcc
+; CHECK-NEXT: v_cndmask_b32_e64 v5, v8, v5, s[4:5]
+; CHECK-NEXT: v_addc_u32_e32 v8, vcc, 0, v4, vcc
; CHECK-NEXT: s_bfe_i32 s4, -1, 0x10000
-; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s6, v0
+; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v0, v7
; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc
-; CHECK-NEXT: v_mov_b32_e32 v8, s4
+; CHECK-NEXT: v_mov_b32_e32 v7, s4
; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
-; CHECK-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc
+; CHECK-NEXT: v_cndmask_b32_e32 v0, v7, v0, vcc
; CHECK-NEXT: v_add_i32_e32 v1, vcc, 1, v6
-; CHECK-NEXT: v_addc_u32_e32 v8, vcc, 0, v7, vcc
+; CHECK-NEXT: v_addc_u32_e32 v7, vcc, 0, v8, vcc
; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0
; CHECK-NEXT: v_cndmask_b32_e32 v0, v6, v1, vcc
-; CHECK-NEXT: v_cndmask_b32_e32 v1, v7, v8, vcc
+; CHECK-NEXT: v_cndmask_b32_e32 v1, v8, v7, vcc
; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v5
; CHECK-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
; CHECK-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc
@@ -1490,41 +1491,169 @@ define <2 x i64> @v_sdiv_v2i64_pow2k_denom(<2 x i64> %num) {
; CGP-LABEL: v_sdiv_v2i64_pow2k_denom:
; CGP: ; %bb.0:
; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CGP-NEXT: v_cvt_f32_u32_e32 v5, 0x1000
-; CGP-NEXT: v_cvt_f32_ubyte0_e32 v6, 0
-; CGP-NEXT: s_movk_i32 s6, 0xf000
-; CGP-NEXT: s_movk_i32 s7, 0x1000
-; CGP-NEXT: v_mov_b32_e32 v4, v5
-; CGP-NEXT: v_mac_f32_e32 v4, 0x4f800000, v6
-; CGP-NEXT: v_rcp_iflag_f32_e32 v7, v4
+; CGP-NEXT: s_movk_i32 s6, 0x1000
+; CGP-NEXT: v_cvt_f32_u32_e32 v4, s6
+; CGP-NEXT: v_cvt_f32_ubyte0_e32 v5, 0
+; CGP-NEXT: s_movk_i32 s7, 0xf000
+; CGP-NEXT: s_bfe_i32 s8, -1, 0x10000
+; CGP-NEXT: v_mac_f32_e32 v4, 0x4f800000, v5
+; CGP-NEXT: v_rcp_iflag_f32_e32 v5, v4
; CGP-NEXT: v_ashrrev_i32_e32 v4, 31, v1
; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v4
-; CGP-NEXT: v_mul_f32_e32 v7, 0x5f7ffffc, v7
-; CGP-NEXT: v_mul_f32_e32 v8, 0x2f800000, v7
+; CGP-NEXT: v_mul_f32_e32 v5, 0x5f7ffffc, v5
+; CGP-NEXT: v_mul_f32_e32 v6, 0x2f800000, v5
+; CGP-NEXT: v_trunc_f32_e32 v6, v6
+; CGP-NEXT: v_mac_f32_e32 v5, 0xcf800000, v6
+; CGP-NEXT: v_cvt_u32_f32_e32 v5, v5
+; CGP-NEXT: v_cvt_u32_f32_e32 v6, v6
+; CGP-NEXT: v_addc_u32_e32 v1, vcc, v1, v4, vcc
+; CGP-NEXT: v_mul_lo_u32 v7, -1, v5
+; CGP-NEXT: v_mul_lo_u32 v8, s7, v6
+; CGP-NEXT: v_mul_hi_u32 v10, s7, v5
+; CGP-NEXT: v_mul_lo_u32 v9, s7, v5
+; CGP-NEXT: v_xor_b32_e32 v0, v0, v4
+; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v8
+; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v10
+; CGP-NEXT: v_mul_lo_u32 v8, v6, v9
+; CGP-NEXT: v_mul_lo_u32 v10, v5, v7
+; CGP-NEXT: v_mul_hi_u32 v11, v5, v9
+; CGP-NEXT: v_mul_hi_u32 v9, v6, v9
+; CGP-NEXT: v_xor_b32_e32 v1, v1, v4
+; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v10
+; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
+; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v11
+; CGP-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
+; CGP-NEXT: v_mul_lo_u32 v11, v6, v7
+; CGP-NEXT: v_add_i32_e32 v8, vcc, v10, v8
+; CGP-NEXT: v_mul_hi_u32 v10, v5, v7
+; CGP-NEXT: v_add_i32_e32 v9, vcc, v11, v9
+; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
+; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v10
+; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
+; CGP-NEXT: v_add_i32_e32 v10, vcc, v11, v10
+; CGP-NEXT: v_mul_hi_u32 v7, v6, v7
+; CGP-NEXT: v_add_i32_e32 v8, vcc, v9, v8
+; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
+; CGP-NEXT: v_add_i32_e32 v9, vcc, v10, v9
+; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v9
+; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v8
+; CGP-NEXT: v_addc_u32_e32 v6, vcc, v6, v7, vcc
+; CGP-NEXT: v_mul_lo_u32 v7, -1, v5
+; CGP-NEXT: v_mul_lo_u32 v8, s7, v6
+; CGP-NEXT: v_mul_hi_u32 v10, s7, v5
+; CGP-NEXT: v_mul_lo_u32 v9, s7, v5
+; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v8
+; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v10
+; CGP-NEXT: v_mul_lo_u32 v8, v6, v9
+; CGP-NEXT: v_mul_lo_u32 v10, v5, v7
+; CGP-NEXT: v_mul_hi_u32 v11, v5, v9
+; CGP-NEXT: v_mul_hi_u32 v9, v6, v9
+; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v10
+; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
+; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v11
+; CGP-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
+; CGP-NEXT: v_mul_lo_u32 v11, v6, v7
+; CGP-NEXT: v_add_i32_e32 v8, vcc, v10, v8
+; CGP-NEXT: v_mul_hi_u32 v10, v5, v7
+; CGP-NEXT: v_add_i32_e32 v9, vcc, v11, v9
+; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
+; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v10
+; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
+; CGP-NEXT: v_add_i32_e32 v10, vcc, v11, v10
+; CGP-NEXT: v_mul_hi_u32 v7, v6, v7
+; CGP-NEXT: v_add_i32_e32 v8, vcc, v9, v8
+; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
+; CGP-NEXT: v_add_i32_e32 v9, vcc, v10, v9
+; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v9
+; CGP-NEXT: v_add_i32_e32 v8, vcc, v5, v8
+; CGP-NEXT: v_addc_u32_e32 v6, vcc, v6, v7, vcc
+; CGP-NEXT: v_mul_lo_u32 v7, v1, v8
+; CGP-NEXT: v_mul_lo_u32 v9, v0, v6
+; CGP-NEXT: v_mul_hi_u32 v10, v0, v8
+; CGP-NEXT: v_mul_hi_u32 v8, v1, v8
+; CGP-NEXT: v_mov_b32_e32 v5, 0x1000
+; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v9
+; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
+; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v10
+; CGP-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
+; CGP-NEXT: v_mul_lo_u32 v10, v1, v6
+; CGP-NEXT: v_add_i32_e32 v7, vcc, v9, v7
+; CGP-NEXT: v_mul_hi_u32 v9, v0, v6
+; CGP-NEXT: v_add_i32_e32 v8, vcc, v10, v8
+; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
+; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v9
+; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
+; CGP-NEXT: v_add_i32_e32 v9, vcc, v10, v9
+; CGP-NEXT: v_mul_hi_u32 v6, v1, v6
+; CGP-NEXT: v_add_i32_e32 v7, vcc, v8, v7
+; CGP-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
+; CGP-NEXT: v_add_i32_e32 v8, vcc, v9, v8
+; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v8
+; CGP-NEXT: v_mul_lo_u32 v8, 0, v7
+; CGP-NEXT: v_mul_lo_u32 v9, s6, v6
+; CGP-NEXT: v_mul_hi_u32 v11, s6, v7
+; CGP-NEXT: v_mul_lo_u32 v10, s6, v7
+; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v9
+; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v11
+; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v10
+; CGP-NEXT: v_subb_u32_e64 v9, s[4:5], v1, v8, vcc
+; CGP-NEXT: v_sub_i32_e64 v1, s[4:5], v1, v8
+; CGP-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
+; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v5
+; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v5
+; CGP-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
+; CGP-NEXT: v_cndmask_b32_e64 v8, 0, -1, s[4:5]
+; CGP-NEXT: v_mov_b32_e32 v10, s8
+; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v9
+; CGP-NEXT: v_add_i32_e32 v9, vcc, 1, v7
+; CGP-NEXT: v_cndmask_b32_e64 v8, v10, v8, s[4:5]
+; CGP-NEXT: v_addc_u32_e32 v10, vcc, 0, v6, vcc
+; CGP-NEXT: s_bfe_i32 s4, -1, 0x10000
+; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v5
+; CGP-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc
+; CGP-NEXT: v_mov_b32_e32 v11, s4
+; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
+; CGP-NEXT: v_cndmask_b32_e32 v0, v11, v0, vcc
+; CGP-NEXT: v_add_i32_e32 v1, vcc, 1, v9
+; CGP-NEXT: v_addc_u32_e32 v11, vcc, 0, v10, vcc
+; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0
+; CGP-NEXT: v_cndmask_b32_e32 v0, v9, v1, vcc
+; CGP-NEXT: v_cvt_f32_u32_e32 v9, v5
+; CGP-NEXT: v_cndmask_b32_e32 v1, v10, v11, vcc
+; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v8
+; CGP-NEXT: v_cndmask_b32_e32 v1, v6, v1, vcc
+; CGP-NEXT: v_cvt_f32_ubyte0_e32 v6, 0
+; CGP-NEXT: v_mac_f32_e32 v9, 0x4f800000, v6
+; CGP-NEXT: v_rcp_iflag_f32_e32 v6, v9
+; CGP-NEXT: v_cndmask_b32_e32 v0, v7, v0, vcc
+; CGP-NEXT: v_ashrrev_i32_e32 v7, 31, v3
+; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v7
+; CGP-NEXT: v_mul_f32_e32 v6, 0x5f7ffffc, v6
+; CGP-NEXT: v_mul_f32_e32 v8, 0x2f800000, v6
; CGP-NEXT: v_trunc_f32_e32 v8, v8
-; CGP-NEXT: v_mac_f32_e32 v7, 0xcf800000, v8
-; CGP-NEXT: v_cvt_u32_f32_e32 v7, v7
+; CGP-NEXT: v_mac_f32_e32 v6, 0xcf800000, v8
+; CGP-NEXT: v_cvt_u32_f32_e32 v6, v6
; CGP-NEXT: v_cvt_u32_f32_e32 v8, v8
-; CGP-NEXT: v_addc_u32_e32 v1, vcc, v1, v4, vcc
-; CGP-NEXT: v_mul_lo_u32 v9, -1, v7
-; CGP-NEXT: v_mul_lo_u32 v10, s6, v8
-; CGP-NEXT: v_mul_hi_u32 v12, s6, v7
-; CGP-NEXT: v_mul_lo_u32 v11, s6, v7
+; CGP-NEXT: v_addc_u32_e32 v3, vcc, v3, v7, vcc
+; CGP-NEXT: v_mul_lo_u32 v9, -1, v6
+; CGP-NEXT: v_mul_lo_u32 v10, s7, v8
+; CGP-NEXT: v_mul_hi_u32 v12, s7, v6
+; CGP-NEXT: v_mul_lo_u32 v11, s7, v6
; CGP-NEXT: v_xor_b32_e32 v0, v0, v4
; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v10
; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v12
; CGP-NEXT: v_mul_lo_u32 v10, v8, v11
-; CGP-NEXT: v_mul_lo_u32 v12, v7, v9
-; CGP-NEXT: v_mul_hi_u32 v13, v7, v11
+; CGP-NEXT: v_mul_lo_u32 v12, v6, v9
+; CGP-NEXT: v_mul_hi_u32 v13, v6, v11
; CGP-NEXT: v_mul_hi_u32 v11, v8, v11
-; CGP-NEXT: v_xor_b32_e32 v1, v1, v4
+; CGP-NEXT: v_xor_b32_e32 v2, v2, v7
; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v12
; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc
; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v13
; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
; CGP-NEXT: v_mul_lo_u32 v13, v8, v9
; CGP-NEXT: v_add_i32_e32 v10, vcc, v12, v10
-; CGP-NEXT: v_mul_hi_u32 v12, v7, v9
+; CGP-NEXT: v_mul_hi_u32 v12, v6, v9
; CGP-NEXT: v_add_i32_e32 v11, vcc, v13, v11
; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc
; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v12
@@ -1535,27 +1664,27 @@ define <2 x i64> @v_sdiv_v2i64_pow2k_denom(<2 x i64> %num) {
; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
; CGP-NEXT: v_add_i32_e32 v11, vcc, v12, v11
; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v11
-; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v10
+; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v10
; CGP-NEXT: v_addc_u32_e32 v8, vcc, v8, v9, vcc
-; CGP-NEXT: v_mul_lo_u32 v9, -1, v7
-; CGP-NEXT: v_mul_lo_u32 v10, s6, v8
-; CGP-NEXT: v_mul_hi_u32 v12, s6, v7
-; CGP-NEXT: v_mul_lo_u32 v11, s6, v7
-; CGP-NEXT: s_bfe_i32 s8, -1, 0x10000
+; CGP-NEXT: v_mul_lo_u32 v9, -1, v6
+; CGP-NEXT: v_mul_lo_u32 v10, s7, v8
+; CGP-NEXT: v_mul_hi_u32 v12, s7, v6
+; CGP-NEXT: v_mul_lo_u32 v11, s7, v6
+; CGP-NEXT: v_xor_b32_e32 v3, v3, v7
; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v10
; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v12
; CGP-NEXT: v_mul_lo_u32 v10, v8, v11
-; CGP-NEXT: v_mul_lo_u32 v12, v7, v9
-; CGP-NEXT: v_mul_hi_u32 v13, v7, v11
+; CGP-NEXT: v_mul_lo_u32 v12, v6, v9
+; CGP-NEXT: v_mul_hi_u32 v13, v6, v11
; CGP-NEXT: v_mul_hi_u32 v11, v8, v11
-; CGP-NEXT: v_mac_f32_e32 v5, 0x4f800000, v6
+; CGP-NEXT: v_xor_b32_e32 v1, v1, v4
; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v12
; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc
; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v13
; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
; CGP-NEXT: v_mul_lo_u32 v13, v8, v9
; CGP-NEXT: v_add_i32_e32 v10, vcc, v12, v10
-; CGP-NEXT: v_mul_hi_u32 v12, v7, v9
+; CGP-NEXT: v_mul_hi_u32 v12, v6, v9
; CGP-NEXT: v_add_i32_e32 v11, vcc, v13, v11
; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc
; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v12
@@ -1566,195 +1695,69 @@ define <2 x i64> @v_sdiv_v2i64_pow2k_denom(<2 x i64> %num) {
; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
; CGP-NEXT: v_add_i32_e32 v11, vcc, v12, v11
; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v11
-; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v10
+; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v10
; CGP-NEXT: v_addc_u32_e32 v8, vcc, v8, v9, vcc
-; CGP-NEXT: v_mul_lo_u32 v9, v1, v7
-; CGP-NEXT: v_mul_lo_u32 v10, v0, v8
-; CGP-NEXT: v_mul_hi_u32 v11, v0, v7
-; CGP-NEXT: v_mul_hi_u32 v7, v1, v7
-; CGP-NEXT: v_rcp_iflag_f32_e32 v5, v5
+; CGP-NEXT: v_mul_lo_u32 v9, v3, v6
+; CGP-NEXT: v_mul_lo_u32 v10, v2, v8
+; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v4
+; CGP-NEXT: v_subb_u32_e32 v1, vcc, v1, v4, vcc
+; CGP-NEXT: v_mul_hi_u32 v4, v2, v6
; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v10
; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v11
+; CGP-NEXT: v_add_i32_e32 v4, vcc, v9, v4
+; CGP-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
+; CGP-NEXT: v_mul_lo_u32 v9, v3, v8
+; CGP-NEXT: v_mul_hi_u32 v6, v3, v6
+; CGP-NEXT: v_add_i32_e32 v4, vcc, v10, v4
+; CGP-NEXT: v_mul_hi_u32 v10, v2, v8
+; CGP-NEXT: v_add_i32_e32 v6, vcc, v9, v6
; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
-; CGP-NEXT: v_mul_lo_u32 v11, v1, v8
-; CGP-NEXT: v_add_i32_e32 v9, vcc, v10, v9
-; CGP-NEXT: v_mul_hi_u32 v10, v0, v8
-; CGP-NEXT: v_add_i32_e32 v7, vcc, v11, v7
-; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v10
+; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v10
; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v10, vcc, v11, v10
-; CGP-NEXT: v_mul_hi_u32 v8, v1, v8
-; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v9
-; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v9, vcc, v10, v9
-; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v9
-; CGP-NEXT: v_mul_lo_u32 v9, 0, v7
-; CGP-NEXT: v_mul_lo_u32 v10, s7, v8
-; CGP-NEXT: v_mul_hi_u32 v12, s7, v7
-; CGP-NEXT: v_mul_lo_u32 v11, s7, v7
-; CGP-NEXT: v_mul_f32_e32 v5, 0x5f7ffffc, v5
; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v10
-; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v12
-; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v11
-; CGP-NEXT: v_subb_u32_e64 v10, s[4:5], v1, v9, vcc
-; CGP-NEXT: v_sub_i32_e64 v1, s[4:5], v1, v9
-; CGP-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
-; CGP-NEXT: v_cmp_le_u32_e64 s[4:5], s7, v0
-; CGP-NEXT: v_subrev_i32_e32 v0, vcc, s7, v0
-; CGP-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
-; CGP-NEXT: v_cndmask_b32_e64 v9, 0, -1, s[4:5]
-; CGP-NEXT: v_mov_b32_e32 v11, s8
-; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v10
-; CGP-NEXT: v_add_i32_e32 v10, vcc, 1, v7
-; CGP-NEXT: v_cndmask_b32_e64 v9, v11, v9, s[4:5]
-; CGP-NEXT: v_addc_u32_e32 v11, vcc, 0, v8, vcc
-; CGP-NEXT: s_bfe_i32 s4, -1, 0x10000
-; CGP-NEXT: v_cmp_le_u32_e32 vcc, s7, v0
-; CGP-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc
-; CGP-NEXT: v_mov_b32_e32 v12, s4
-; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
-; CGP-NEXT: v_cndmask_b32_e32 v0, v12, v0, vcc
-; CGP-NEXT: v_add_i32_e32 v1, vcc, 1, v10
-; CGP-NEXT: v_addc_u32_e32 v12, vcc, 0, v11, vcc
-; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0
-; CGP-NEXT: v_cndmask_b32_e32 v0, v10, v1, vcc
-; CGP-NEXT: v_cndmask_b32_e32 v1, v11, v12, vcc
-; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v9
-; CGP-NEXT: v_cndmask_b32_e32 v0, v7, v0, vcc
-; CGP-NEXT: v_mul_f32_e32 v7, 0x2f800000, v5
-; CGP-NEXT: v_trunc_f32_e32 v7, v7
-; CGP-NEXT: v_mac_f32_e32 v5, 0xcf800000, v7
-; CGP-NEXT: v_cvt_u32_f32_e32 v5, v5
-; CGP-NEXT: v_cvt_u32_f32_e32 v7, v7
-; CGP-NEXT: v_cndmask_b32_e32 v1, v8, v1, vcc
-; CGP-NEXT: v_ashrrev_i32_e32 v6, 31, v3
-; CGP-NEXT: v_mul_lo_u32 v8, -1, v5
-; CGP-NEXT: v_mul_lo_u32 v9, s6, v7
-; CGP-NEXT: v_mul_hi_u32 v11, s6, v5
-; CGP-NEXT: v_mul_lo_u32 v10, s6, v5
-; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v6
-; CGP-NEXT: v_addc_u32_e32 v3, vcc, v3, v6, vcc
-; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v9
-; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v11
-; CGP-NEXT: v_mul_lo_u32 v9, v7, v10
-; CGP-NEXT: v_mul_lo_u32 v11, v5, v8
-; CGP-NEXT: v_mul_hi_u32 v12, v5, v10
-; CGP-NEXT: v_mul_hi_u32 v10, v7, v10
-; CGP-NEXT: v_xor_b32_e32 v0, v0, v4
-; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v11
-; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v12
-; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
-; CGP-NEXT: v_mul_lo_u32 v12, v7, v8
-; CGP-NEXT: v_add_i32_e32 v9, vcc, v11, v9
-; CGP-NEXT: v_mul_hi_u32 v11, v5, v8
-; CGP-NEXT: v_add_i32_e32 v10, vcc, v12, v10
-; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v11
-; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v11, vcc, v12, v11
-; CGP-NEXT: v_mul_hi_u32 v8, v7, v8
-; CGP-NEXT: v_add_i32_e32 v9, vcc, v10, v9
-; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v10, vcc, v11, v10
-; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v10
-; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v9
-; CGP-NEXT: v_addc_u32_e32 v7, vcc, v7, v8, vcc
-; CGP-NEXT: v_mul_lo_u32 v8, -1, v5
-; CGP-NEXT: v_mul_lo_u32 v9, s6, v7
-; CGP-NEXT: v_mul_hi_u32 v11, s6, v5
-; CGP-NEXT: v_mul_lo_u32 v10, s6, v5
-; CGP-NEXT: v_xor_b32_e32 v2, v2, v6
+; CGP-NEXT: v_mul_hi_u32 v8, v3, v8
+; CGP-NEXT: v_add_i32_e32 v4, vcc, v6, v4
+; CGP-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
+; CGP-NEXT: v_add_i32_e32 v6, vcc, v9, v6
+; CGP-NEXT: v_add_i32_e32 v6, vcc, v8, v6
+; CGP-NEXT: v_mul_lo_u32 v8, 0, v4
+; CGP-NEXT: v_mul_lo_u32 v9, s6, v6
+; CGP-NEXT: v_mul_hi_u32 v11, s6, v4
+; CGP-NEXT: v_mul_lo_u32 v10, s6, v4
+; CGP-NEXT: s_bfe_i32 s6, -1, 0x10000
; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v9
; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v11
-; CGP-NEXT: v_mul_lo_u32 v9, v7, v10
-; CGP-NEXT: v_mul_lo_u32 v11, v5, v8
-; CGP-NEXT: v_mul_hi_u32 v12, v5, v10
-; CGP-NEXT: v_mul_hi_u32 v10, v7, v10
-; CGP-NEXT: v_xor_b32_e32 v3, v3, v6
-; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v11
-; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v12
-; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
-; CGP-NEXT: v_mul_lo_u32 v12, v7, v8
-; CGP-NEXT: v_add_i32_e32 v9, vcc, v11, v9
-; CGP-NEXT: v_mul_hi_u32 v11, v5, v8
-; CGP-NEXT: v_add_i32_e32 v10, vcc, v12, v10
-; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v11
-; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v11, vcc, v12, v11
-; CGP-NEXT: v_mul_hi_u32 v8, v7, v8
-; CGP-NEXT: v_add_i32_e32 v9, vcc, v10, v9
-; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v10, vcc, v11, v10
-; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v10
-; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v9
-; CGP-NEXT: v_addc_u32_e32 v7, vcc, v7, v8, vcc
-; CGP-NEXT: v_xor_b32_e32 v1, v1, v4
-; CGP-NEXT: v_mul_lo_u32 v8, v3, v5
-; CGP-NEXT: v_mul_lo_u32 v9, v2, v7
-; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v4
-; CGP-NEXT: v_subb_u32_e32 v1, vcc, v1, v4, vcc
-; CGP-NEXT: v_mul_hi_u32 v4, v2, v5
-; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v9
-; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v4, vcc, v8, v4
-; CGP-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
-; CGP-NEXT: v_mul_lo_u32 v8, v3, v7
-; CGP-NEXT: v_mul_hi_u32 v5, v3, v5
-; CGP-NEXT: v_add_i32_e32 v4, vcc, v9, v4
-; CGP-NEXT: v_mul_hi_u32 v9, v2, v7
-; CGP-NEXT: v_add_i32_e32 v5, vcc, v8, v5
-; CGP-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v9
-; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v9
-; CGP-NEXT: v_mul_hi_u32 v7, v3, v7
-; CGP-NEXT: v_add_i32_e32 v4, vcc, v5, v4
-; CGP-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v5, vcc, v8, v5
-; CGP-NEXT: v_add_i32_e32 v5, vcc, v7, v5
-; CGP-NEXT: v_mul_lo_u32 v7, 0, v4
-; CGP-NEXT: v_mul_lo_u32 v8, s7, v5
-; CGP-NEXT: v_mul_hi_u32 v10, s7, v4
-; CGP-NEXT: v_mul_lo_u32 v9, s7, v4
-; CGP-NEXT: s_bfe_i32 s6, -1, 0x10000
-; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v8
-; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v10
-; CGP-NEXT: v_sub_i32_e32 v2, vcc, v2, v9
-; CGP-NEXT: v_subb_u32_e64 v8, s[4:5], v3, v7, vcc
-; CGP-NEXT: v_sub_i32_e64 v3, s[4:5], v3, v7
+; CGP-NEXT: v_sub_i32_e32 v2, vcc, v2, v10
+; CGP-NEXT: v_subb_u32_e64 v9, s[4:5], v3, v8, vcc
+; CGP-NEXT: v_sub_i32_e64 v3, s[4:5], v3, v8
; CGP-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc
-; CGP-NEXT: v_cmp_le_u32_e64 s[4:5], s7, v2
-; CGP-NEXT: v_subrev_i32_e32 v2, vcc, s7, v2
+; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v2, v5
+; CGP-NEXT: v_sub_i32_e32 v2, vcc, v2, v5
; CGP-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc
-; CGP-NEXT: v_cndmask_b32_e64 v7, 0, -1, s[4:5]
-; CGP-NEXT: v_mov_b32_e32 v9, s6
-; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v8
-; CGP-NEXT: v_add_i32_e32 v8, vcc, 1, v4
-; CGP-NEXT: v_cndmask_b32_e64 v7, v9, v7, s[4:5]
-; CGP-NEXT: v_addc_u32_e32 v9, vcc, 0, v5, vcc
+; CGP-NEXT: v_cndmask_b32_e64 v8, 0, -1, s[4:5]
+; CGP-NEXT: v_mov_b32_e32 v10, s6
+; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v9
+; CGP-NEXT: v_add_i32_e32 v9, vcc, 1, v4
+; CGP-NEXT: v_cndmask_b32_e64 v8, v10, v8, s[4:5]
+; CGP-NEXT: v_addc_u32_e32 v10, vcc, 0, v6, vcc
; CGP-NEXT: s_bfe_i32 s4, -1, 0x10000
-; CGP-NEXT: v_cmp_le_u32_e32 vcc, s7, v2
+; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v2, v5
; CGP-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc
-; CGP-NEXT: v_mov_b32_e32 v10, s4
+; CGP-NEXT: v_mov_b32_e32 v5, s4
; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3
-; CGP-NEXT: v_cndmask_b32_e32 v2, v10, v2, vcc
-; CGP-NEXT: v_add_i32_e32 v3, vcc, 1, v8
-; CGP-NEXT: v_addc_u32_e32 v10, vcc, 0, v9, vcc
+; CGP-NEXT: v_cndmask_b32_e32 v2, v5, v2, vcc
+; CGP-NEXT: v_add_i32_e32 v3, vcc, 1, v9
+; CGP-NEXT: v_addc_u32_e32 v5, vcc, 0, v10, vcc
; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2
-; CGP-NEXT: v_cndmask_b32_e32 v2, v8, v3, vcc
-; CGP-NEXT: v_cndmask_b32_e32 v3, v9, v10, vcc
-; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v7
+; CGP-NEXT: v_cndmask_b32_e32 v2, v9, v3, vcc
+; CGP-NEXT: v_cndmask_b32_e32 v3, v10, v5, vcc
+; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v8
; CGP-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc
-; CGP-NEXT: v_cndmask_b32_e32 v3, v5, v3, vcc
-; CGP-NEXT: v_xor_b32_e32 v2, v2, v6
-; CGP-NEXT: v_xor_b32_e32 v3, v3, v6
-; CGP-NEXT: v_sub_i32_e32 v2, vcc, v2, v6
-; CGP-NEXT: v_subb_u32_e32 v3, vcc, v3, v6, vcc
+; CGP-NEXT: v_cndmask_b32_e32 v3, v6, v3, vcc
+; CGP-NEXT: v_xor_b32_e32 v2, v2, v7
+; CGP-NEXT: v_xor_b32_e32 v3, v3, v7
+; CGP-NEXT: v_sub_i32_e32 v2, vcc, v2, v7
+; CGP-NEXT: v_subb_u32_e32 v3, vcc, v3, v7, vcc
; CGP-NEXT: s_setpc_b64 s[30:31]
%result = sdiv <2 x i64> %num, <i64 4096, i64 4096>
ret <2 x i64> %result
@@ -1764,10 +1767,11 @@ define i64 @v_sdiv_i64_oddk_denom(i64 %num) {
; CHECK-LABEL: v_sdiv_i64_oddk_denom:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CHECK-NEXT: v_cvt_f32_u32_e32 v2, 0x12d8fb
+; CHECK-NEXT: s_mov_b32 s4, 0x12d8fb
+; CHECK-NEXT: v_cvt_f32_u32_e32 v2, s4
; CHECK-NEXT: v_cvt_f32_ubyte0_e32 v3, 0
-; CHECK-NEXT: s_mov_b32 s4, 0xffed2705
-; CHECK-NEXT: s_mov_b32 s6, 0x12d8fb
+; CHECK-NEXT: s_mov_b32 s5, 0xffed2705
+; CHECK-NEXT: s_bfe_i32 s6, -1, 0x10000
; CHECK-NEXT: v_mac_f32_e32 v2, 0x4f800000, v3
; CHECK-NEXT: v_rcp_iflag_f32_e32 v2, v2
; CHECK-NEXT: v_ashrrev_i32_e32 v3, 31, v1
@@ -1779,10 +1783,10 @@ define i64 @v_sdiv_i64_oddk_denom(i64 %num) {
; CHECK-NEXT: v_cvt_u32_f32_e32 v4, v4
; CHECK-NEXT: v_cvt_u32_f32_e32 v2, v2
; CHECK-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc
-; CHECK-NEXT: v_mul_lo_u32 v6, s4, v4
+; CHECK-NEXT: v_mul_lo_u32 v6, s5, v4
; CHECK-NEXT: v_mul_lo_u32 v5, -1, v2
-; CHECK-NEXT: v_mul_hi_u32 v8, s4, v2
-; CHECK-NEXT: v_mul_lo_u32 v7, s4, v2
+; CHECK-NEXT: v_mul_hi_u32 v8, s5, v2
+; CHECK-NEXT: v_mul_lo_u32 v7, s5, v2
; CHECK-NEXT: v_xor_b32_e32 v0, v0, v3
; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v6
; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v8
@@ -1811,10 +1815,9 @@ define i64 @v_sdiv_i64_oddk_denom(i64 %num) {
; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v6
; CHECK-NEXT: v_addc_u32_e32 v4, vcc, v4, v5, vcc
; CHECK-NEXT: v_mul_lo_u32 v5, -1, v2
-; CHECK-NEXT: v_mul_lo_u32 v6, s4, v4
-; CHECK-NEXT: v_mul_hi_u32 v8, s4, v2
-; CHECK-NEXT: v_mul_lo_u32 v7, s4, v2
-; CHECK-NEXT: s_bfe_i32 s7, -1, 0x10000
+; CHECK-NEXT: v_mul_lo_u32 v6, s5, v4
+; CHECK-NEXT: v_mul_hi_u32 v8, s5, v2
+; CHECK-NEXT: v_mul_lo_u32 v7, s5, v2
; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v6
; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v8
; CHECK-NEXT: v_mul_lo_u32 v6, v4, v7
@@ -1842,55 +1845,56 @@ define i64 @v_sdiv_i64_oddk_denom(i64 %num) {
; CHECK-NEXT: v_addc_u32_e32 v4, vcc, v4, v5, vcc
; CHECK-NEXT: v_mul_lo_u32 v5, v1, v2
; CHECK-NEXT: v_mul_lo_u32 v6, v0, v4
-; CHECK-NEXT: v_mul_hi_u32 v7, v0, v2
+; CHECK-NEXT: v_mul_hi_u32 v8, v0, v2
; CHECK-NEXT: v_mul_hi_u32 v2, v1, v2
+; CHECK-NEXT: v_mov_b32_e32 v7, 0x12d8fb
; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v6
; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
-; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v7
+; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v8
; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
-; CHECK-NEXT: v_mul_lo_u32 v7, v1, v4
+; CHECK-NEXT: v_mul_lo_u32 v8, v1, v4
; CHECK-NEXT: v_add_i32_e32 v5, vcc, v6, v5
; CHECK-NEXT: v_mul_hi_u32 v6, v0, v4
-; CHECK-NEXT: v_add_i32_e32 v2, vcc, v7, v2
-; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
+; CHECK-NEXT: v_add_i32_e32 v2, vcc, v8, v2
+; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v6
; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
-; CHECK-NEXT: v_add_i32_e32 v6, vcc, v7, v6
+; CHECK-NEXT: v_add_i32_e32 v6, vcc, v8, v6
; CHECK-NEXT: v_mul_hi_u32 v4, v1, v4
; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v5
; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
; CHECK-NEXT: v_add_i32_e32 v5, vcc, v6, v5
; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v5
; CHECK-NEXT: v_mul_lo_u32 v5, 0, v2
-; CHECK-NEXT: v_mul_lo_u32 v6, s6, v4
-; CHECK-NEXT: v_mul_hi_u32 v8, s6, v2
-; CHECK-NEXT: v_mul_lo_u32 v7, s6, v2
+; CHECK-NEXT: v_mul_lo_u32 v6, s4, v4
+; CHECK-NEXT: v_mul_hi_u32 v9, s4, v2
+; CHECK-NEXT: v_mul_lo_u32 v8, s4, v2
; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v6
-; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v8
-; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v7
+; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v9
+; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v8
; CHECK-NEXT: v_subb_u32_e64 v6, s[4:5], v1, v5, vcc
; CHECK-NEXT: v_sub_i32_e64 v1, s[4:5], v1, v5
; CHECK-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
-; CHECK-NEXT: v_cmp_le_u32_e64 s[4:5], s6, v0
-; CHECK-NEXT: v_subrev_i32_e32 v0, vcc, s6, v0
+; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v7
+; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v7
; CHECK-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, -1, s[4:5]
-; CHECK-NEXT: v_mov_b32_e32 v7, s7
+; CHECK-NEXT: v_mov_b32_e32 v8, s6
; CHECK-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v6
; CHECK-NEXT: v_add_i32_e32 v6, vcc, 1, v2
-; CHECK-NEXT: v_cndmask_b32_e64 v5, v7, v5, s[4:5]
-; CHECK-NEXT: v_addc_u32_e32 v7, vcc, 0, v4, vcc
+; CHECK-NEXT: v_cndmask_b32_e64 v5, v8, v5, s[4:5]
+; CHECK-NEXT: v_addc_u32_e32 v8, vcc, 0, v4, vcc
; CHECK-NEXT: s_bfe_i32 s4, -1, 0x10000
-; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s6, v0
+; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v0, v7
; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc
-; CHECK-NEXT: v_mov_b32_e32 v8, s4
+; CHECK-NEXT: v_mov_b32_e32 v7, s4
; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
-; CHECK-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc
+; CHECK-NEXT: v_cndmask_b32_e32 v0, v7, v0, vcc
; CHECK-NEXT: v_add_i32_e32 v1, vcc, 1, v6
-; CHECK-NEXT: v_addc_u32_e32 v8, vcc, 0, v7, vcc
+; CHECK-NEXT: v_addc_u32_e32 v7, vcc, 0, v8, vcc
; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0
; CHECK-NEXT: v_cndmask_b32_e32 v0, v6, v1, vcc
-; CHECK-NEXT: v_cndmask_b32_e32 v1, v7, v8, vcc
+; CHECK-NEXT: v_cndmask_b32_e32 v1, v8, v7, vcc
; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v5
; CHECK-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
; CHECK-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc
@@ -2203,41 +2207,169 @@ define <2 x i64> @v_sdiv_v2i64_oddk_denom(<2 x i64> %num) {
; CGP-LABEL: v_sdiv_v2i64_oddk_denom:
; CGP: ; %bb.0:
; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CGP-NEXT: v_cvt_f32_u32_e32 v5, 0x12d8fb
-; CGP-NEXT: v_cvt_f32_ubyte0_e32 v6, 0
-; CGP-NEXT: s_mov_b32 s6, 0xffed2705
-; CGP-NEXT: s_mov_b32 s7, 0x12d8fb
-; CGP-NEXT: v_mov_b32_e32 v4, v5
-; CGP-NEXT: v_mac_f32_e32 v4, 0x4f800000, v6
-; CGP-NEXT: v_rcp_iflag_f32_e32 v7, v4
+; CGP-NEXT: s_mov_b32 s6, 0x12d8fb
+; CGP-NEXT: v_cvt_f32_u32_e32 v4, s6
+; CGP-NEXT: v_cvt_f32_ubyte0_e32 v5, 0
+; CGP-NEXT: s_mov_b32 s7, 0xffed2705
+; CGP-NEXT: s_bfe_i32 s8, -1, 0x10000
+; CGP-NEXT: v_mac_f32_e32 v4, 0x4f800000, v5
+; CGP-NEXT: v_rcp_iflag_f32_e32 v5, v4
; CGP-NEXT: v_ashrrev_i32_e32 v4, 31, v1
; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v4
-; CGP-NEXT: v_mul_f32_e32 v7, 0x5f7ffffc, v7
-; CGP-NEXT: v_mul_f32_e32 v8, 0x2f800000, v7
+; CGP-NEXT: v_mul_f32_e32 v5, 0x5f7ffffc, v5
+; CGP-NEXT: v_mul_f32_e32 v6, 0x2f800000, v5
+; CGP-NEXT: v_trunc_f32_e32 v6, v6
+; CGP-NEXT: v_mac_f32_e32 v5, 0xcf800000, v6
+; CGP-NEXT: v_cvt_u32_f32_e32 v5, v5
+; CGP-NEXT: v_cvt_u32_f32_e32 v6, v6
+; CGP-NEXT: v_addc_u32_e32 v1, vcc, v1, v4, vcc
+; CGP-NEXT: v_mul_lo_u32 v7, -1, v5
+; CGP-NEXT: v_mul_lo_u32 v8, s7, v6
+; CGP-NEXT: v_mul_hi_u32 v10, s7, v5
+; CGP-NEXT: v_mul_lo_u32 v9, s7, v5
+; CGP-NEXT: v_xor_b32_e32 v0, v0, v4
+; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v8
+; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v10
+; CGP-NEXT: v_mul_lo_u32 v8, v6, v9
+; CGP-NEXT: v_mul_lo_u32 v10, v5, v7
+; CGP-NEXT: v_mul_hi_u32 v11, v5, v9
+; CGP-NEXT: v_mul_hi_u32 v9, v6, v9
+; CGP-NEXT: v_xor_b32_e32 v1, v1, v4
+; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v10
+; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
+; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v11
+; CGP-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
+; CGP-NEXT: v_mul_lo_u32 v11, v6, v7
+; CGP-NEXT: v_add_i32_e32 v8, vcc, v10, v8
+; CGP-NEXT: v_mul_hi_u32 v10, v5, v7
+; CGP-NEXT: v_add_i32_e32 v9, vcc, v11, v9
+; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
+; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v10
+; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
+; CGP-NEXT: v_add_i32_e32 v10, vcc, v11, v10
+; CGP-NEXT: v_mul_hi_u32 v7, v6, v7
+; CGP-NEXT: v_add_i32_e32 v8, vcc, v9, v8
+; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
+; CGP-NEXT: v_add_i32_e32 v9, vcc, v10, v9
+; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v9
+; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v8
+; CGP-NEXT: v_addc_u32_e32 v6, vcc, v6, v7, vcc
+; CGP-NEXT: v_mul_lo_u32 v7, -1, v5
+; CGP-NEXT: v_mul_lo_u32 v8, s7, v6
+; CGP-NEXT: v_mul_hi_u32 v10, s7, v5
+; CGP-NEXT: v_mul_lo_u32 v9, s7, v5
+; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v8
+; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v10
+; CGP-NEXT: v_mul_lo_u32 v8, v6, v9
+; CGP-NEXT: v_mul_lo_u32 v10, v5, v7
+; CGP-NEXT: v_mul_hi_u32 v11, v5, v9
+; CGP-NEXT: v_mul_hi_u32 v9, v6, v9
+; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v10
+; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
+; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v11
+; CGP-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
+; CGP-NEXT: v_mul_lo_u32 v11, v6, v7
+; CGP-NEXT: v_add_i32_e32 v8, vcc, v10, v8
+; CGP-NEXT: v_mul_hi_u32 v10, v5, v7
+; CGP-NEXT: v_add_i32_e32 v9, vcc, v11, v9
+; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
+; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v10
+; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
+; CGP-NEXT: v_add_i32_e32 v10, vcc, v11, v10
+; CGP-NEXT: v_mul_hi_u32 v7, v6, v7
+; CGP-NEXT: v_add_i32_e32 v8, vcc, v9, v8
+; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
+; CGP-NEXT: v_add_i32_e32 v9, vcc, v10, v9
+; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v9
+; CGP-NEXT: v_add_i32_e32 v8, vcc, v5, v8
+; CGP-NEXT: v_addc_u32_e32 v6, vcc, v6, v7, vcc
+; CGP-NEXT: v_mul_lo_u32 v7, v1, v8
+; CGP-NEXT: v_mul_lo_u32 v9, v0, v6
+; CGP-NEXT: v_mul_hi_u32 v10, v0, v8
+; CGP-NEXT: v_mul_hi_u32 v8, v1, v8
+; CGP-NEXT: v_mov_b32_e32 v5, 0x12d8fb
+; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v9
+; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
+; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v10
+; CGP-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
+; CGP-NEXT: v_mul_lo_u32 v10, v1, v6
+; CGP-NEXT: v_add_i32_e32 v7, vcc, v9, v7
+; CGP-NEXT: v_mul_hi_u32 v9, v0, v6
+; CGP-NEXT: v_add_i32_e32 v8, vcc, v10, v8
+; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
+; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v9
+; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
+; CGP-NEXT: v_add_i32_e32 v9, vcc, v10, v9
+; CGP-NEXT: v_mul_hi_u32 v6, v1, v6
+; CGP-NEXT: v_add_i32_e32 v7, vcc, v8, v7
+; CGP-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
+; CGP-NEXT: v_add_i32_e32 v8, vcc, v9, v8
+; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v8
+; CGP-NEXT: v_mul_lo_u32 v8, 0, v7
+; CGP-NEXT: v_mul_lo_u32 v9, s6, v6
+; CGP-NEXT: v_mul_hi_u32 v11, s6, v7
+; CGP-NEXT: v_mul_lo_u32 v10, s6, v7
+; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v9
+; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v11
+; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v10
+; CGP-NEXT: v_subb_u32_e64 v9, s[4:5], v1, v8, vcc
+; CGP-NEXT: v_sub_i32_e64 v1, s[4:5], v1, v8
+; CGP-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
+; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v5
+; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v5
+; CGP-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
+; CGP-NEXT: v_cndmask_b32_e64 v8, 0, -1, s[4:5]
+; CGP-NEXT: v_mov_b32_e32 v10, s8
+; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v9
+; CGP-NEXT: v_add_i32_e32 v9, vcc, 1, v7
+; CGP-NEXT: v_cndmask_b32_e64 v8, v10, v8, s[4:5]
+; CGP-NEXT: v_addc_u32_e32 v10, vcc, 0, v6, vcc
+; CGP-NEXT: s_bfe_i32 s4, -1, 0x10000
+; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v5
+; CGP-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc
+; CGP-NEXT: v_mov_b32_e32 v11, s4
+; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
+; CGP-NEXT: v_cndmask_b32_e32 v0, v11, v0, vcc
+; CGP-NEXT: v_add_i32_e32 v1, vcc, 1, v9
+; CGP-NEXT: v_addc_u32_e32 v11, vcc, 0, v10, vcc
+; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0
+; CGP-NEXT: v_cndmask_b32_e32 v0, v9, v1, vcc
+; CGP-NEXT: v_cvt_f32_u32_e32 v9, v5
+; CGP-NEXT: v_cndmask_b32_e32 v1, v10, v11, vcc
+; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v8
+; CGP-NEXT: v_cndmask_b32_e32 v1, v6, v1, vcc
+; CGP-NEXT: v_cvt_f32_ubyte0_e32 v6, 0
+; CGP-NEXT: v_mac_f32_e32 v9, 0x4f800000, v6
+; CGP-NEXT: v_rcp_iflag_f32_e32 v6, v9
+; CGP-NEXT: v_cndmask_b32_e32 v0, v7, v0, vcc
+; CGP-NEXT: v_ashrrev_i32_e32 v7, 31, v3
+; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v7
+; CGP-NEXT: v_mul_f32_e32 v6, 0x5f7ffffc, v6
+; CGP-NEXT: v_mul_f32_e32 v8, 0x2f800000, v6
; CGP-NEXT: v_trunc_f32_e32 v8, v8
-; CGP-NEXT: v_mac_f32_e32 v7, 0xcf800000, v8
-; CGP-NEXT: v_cvt_u32_f32_e32 v7, v7
+; CGP-NEXT: v_mac_f32_e32 v6, 0xcf800000, v8
+; CGP-NEXT: v_cvt_u32_f32_e32 v6, v6
; CGP-NEXT: v_cvt_u32_f32_e32 v8, v8
-; CGP-NEXT: v_addc_u32_e32 v1, vcc, v1, v4, vcc
-; CGP-NEXT: v_mul_lo_u32 v9, -1, v7
-; CGP-NEXT: v_mul_lo_u32 v10, s6, v8
-; CGP-NEXT: v_mul_hi_u32 v12, s6, v7
-; CGP-NEXT: v_mul_lo_u32 v11, s6, v7
+; CGP-NEXT: v_addc_u32_e32 v3, vcc, v3, v7, vcc
+; CGP-NEXT: v_mul_lo_u32 v9, -1, v6
+; CGP-NEXT: v_mul_lo_u32 v10, s7, v8
+; CGP-NEXT: v_mul_hi_u32 v12, s7, v6
+; CGP-NEXT: v_mul_lo_u32 v11, s7, v6
; CGP-NEXT: v_xor_b32_e32 v0, v0, v4
; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v10
; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v12
; CGP-NEXT: v_mul_lo_u32 v10, v8, v11
-; CGP-NEXT: v_mul_lo_u32 v12, v7, v9
-; CGP-NEXT: v_mul_hi_u32 v13, v7, v11
+; CGP-NEXT: v_mul_lo_u32 v12, v6, v9
+; CGP-NEXT: v_mul_hi_u32 v13, v6, v11
; CGP-NEXT: v_mul_hi_u32 v11, v8, v11
-; CGP-NEXT: v_xor_b32_e32 v1, v1, v4
+; CGP-NEXT: v_xor_b32_e32 v2, v2, v7
; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v12
; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc
; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v13
; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
; CGP-NEXT: v_mul_lo_u32 v13, v8, v9
; CGP-NEXT: v_add_i32_e32 v10, vcc, v12, v10
-; CGP-NEXT: v_mul_hi_u32 v12, v7, v9
+; CGP-NEXT: v_mul_hi_u32 v12, v6, v9
; CGP-NEXT: v_add_i32_e32 v11, vcc, v13, v11
; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc
; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v12
@@ -2248,27 +2380,27 @@ define <2 x i64> @v_sdiv_v2i64_oddk_denom(<2 x i64> %num) {
; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
; CGP-NEXT: v_add_i32_e32 v11, vcc, v12, v11
; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v11
-; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v10
+; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v10
; CGP-NEXT: v_addc_u32_e32 v8, vcc, v8, v9, vcc
-; CGP-NEXT: v_mul_lo_u32 v9, -1, v7
-; CGP-NEXT: v_mul_lo_u32 v10, s6, v8
-; CGP-NEXT: v_mul_hi_u32 v12, s6, v7
-; CGP-NEXT: v_mul_lo_u32 v11, s6, v7
-; CGP-NEXT: s_bfe_i32 s8, -1, 0x10000
+; CGP-NEXT: v_mul_lo_u32 v9, -1, v6
+; CGP-NEXT: v_mul_lo_u32 v10, s7, v8
+; CGP-NEXT: v_mul_hi_u32 v12, s7, v6
+; CGP-NEXT: v_mul_lo_u32 v11, s7, v6
+; CGP-NEXT: v_xor_b32_e32 v3, v3, v7
; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v10
; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v12
; CGP-NEXT: v_mul_lo_u32 v10, v8, v11
-; CGP-NEXT: v_mul_lo_u32 v12, v7, v9
-; CGP-NEXT: v_mul_hi_u32 v13, v7, v11
+; CGP-NEXT: v_mul_lo_u32 v12, v6, v9
+; CGP-NEXT: v_mul_hi_u32 v13, v6, v11
; CGP-NEXT: v_mul_hi_u32 v11, v8, v11
-; CGP-NEXT: v_mac_f32_e32 v5, 0x4f800000, v6
+; CGP-NEXT: v_xor_b32_e32 v1, v1, v4
; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v12
; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc
; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v13
; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
; CGP-NEXT: v_mul_lo_u32 v13, v8, v9
; CGP-NEXT: v_add_i32_e32 v10, vcc, v12, v10
-; CGP-NEXT: v_mul_hi_u32 v12, v7, v9
+; CGP-NEXT: v_mul_hi_u32 v12, v6, v9
; CGP-NEXT: v_add_i32_e32 v11, vcc, v13, v11
; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc
; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v12
@@ -2279,195 +2411,69 @@ define <2 x i64> @v_sdiv_v2i64_oddk_denom(<2 x i64> %num) {
; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
; CGP-NEXT: v_add_i32_e32 v11, vcc, v12, v11
; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v11
-; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v10
+; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v10
; CGP-NEXT: v_addc_u32_e32 v8, vcc, v8, v9, vcc
-; CGP-NEXT: v_mul_lo_u32 v9, v1, v7
-; CGP-NEXT: v_mul_lo_u32 v10, v0, v8
-; CGP-NEXT: v_mul_hi_u32 v11, v0, v7
-; CGP-NEXT: v_mul_hi_u32 v7, v1, v7
-; CGP-NEXT: v_rcp_iflag_f32_e32 v5, v5
+; CGP-NEXT: v_mul_lo_u32 v9, v3, v6
+; CGP-NEXT: v_mul_lo_u32 v10, v2, v8
+; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v4
+; CGP-NEXT: v_subb_u32_e32 v1, vcc, v1, v4, vcc
+; CGP-NEXT: v_mul_hi_u32 v4, v2, v6
; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v10
; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v11
+; CGP-NEXT: v_add_i32_e32 v4, vcc, v9, v4
+; CGP-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
+; CGP-NEXT: v_mul_lo_u32 v9, v3, v8
+; CGP-NEXT: v_mul_hi_u32 v6, v3, v6
+; CGP-NEXT: v_add_i32_e32 v4, vcc, v10, v4
+; CGP-NEXT: v_mul_hi_u32 v10, v2, v8
+; CGP-NEXT: v_add_i32_e32 v6, vcc, v9, v6
; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
-; CGP-NEXT: v_mul_lo_u32 v11, v1, v8
-; CGP-NEXT: v_add_i32_e32 v9, vcc, v10, v9
-; CGP-NEXT: v_mul_hi_u32 v10, v0, v8
-; CGP-NEXT: v_add_i32_e32 v7, vcc, v11, v7
-; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v10
+; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v10
; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v10, vcc, v11, v10
-; CGP-NEXT: v_mul_hi_u32 v8, v1, v8
-; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v9
-; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v9, vcc, v10, v9
-; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v9
-; CGP-NEXT: v_mul_lo_u32 v9, 0, v7
-; CGP-NEXT: v_mul_lo_u32 v10, s7, v8
-; CGP-NEXT: v_mul_hi_u32 v12, s7, v7
-; CGP-NEXT: v_mul_lo_u32 v11, s7, v7
-; CGP-NEXT: v_mul_f32_e32 v5, 0x5f7ffffc, v5
; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v10
-; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v12
-; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v11
-; CGP-NEXT: v_subb_u32_e64 v10, s[4:5], v1, v9, vcc
-; CGP-NEXT: v_sub_i32_e64 v1, s[4:5], v1, v9
-; CGP-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
-; CGP-NEXT: v_cmp_le_u32_e64 s[4:5], s7, v0
-; CGP-NEXT: v_subrev_i32_e32 v0, vcc, s7, v0
-; CGP-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
-; CGP-NEXT: v_cndmask_b32_e64 v9, 0, -1, s[4:5]
-; CGP-NEXT: v_mov_b32_e32 v11, s8
-; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v10
-; CGP-NEXT: v_add_i32_e32 v10, vcc, 1, v7
-; CGP-NEXT: v_cndmask_b32_e64 v9, v11, v9, s[4:5]
-; CGP-NEXT: v_addc_u32_e32 v11, vcc, 0, v8, vcc
-; CGP-NEXT: s_bfe_i32 s4, -1, 0x10000
-; CGP-NEXT: v_cmp_le_u32_e32 vcc, s7, v0
-; CGP-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc
-; CGP-NEXT: v_mov_b32_e32 v12, s4
-; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
-; CGP-NEXT: v_cndmask_b32_e32 v0, v12, v0, vcc
-; CGP-NEXT: v_add_i32_e32 v1, vcc, 1, v10
-; CGP-NEXT: v_addc_u32_e32 v12, vcc, 0, v11, vcc
-; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0
-; CGP-NEXT: v_cndmask_b32_e32 v0, v10, v1, vcc
-; CGP-NEXT: v_cndmask_b32_e32 v1, v11, v12, vcc
-; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v9
-; CGP-NEXT: v_cndmask_b32_e32 v0, v7, v0, vcc
-; CGP-NEXT: v_mul_f32_e32 v7, 0x2f800000, v5
-; CGP-NEXT: v_trunc_f32_e32 v7, v7
-; CGP-NEXT: v_mac_f32_e32 v5, 0xcf800000, v7
-; CGP-NEXT: v_cvt_u32_f32_e32 v5, v5
-; CGP-NEXT: v_cvt_u32_f32_e32 v7, v7
-; CGP-NEXT: v_cndmask_b32_e32 v1, v8, v1, vcc
-; CGP-NEXT: v_ashrrev_i32_e32 v6, 31, v3
-; CGP-NEXT: v_mul_lo_u32 v8, -1, v5
-; CGP-NEXT: v_mul_lo_u32 v9, s6, v7
-; CGP-NEXT: v_mul_hi_u32 v11, s6, v5
-; CGP-NEXT: v_mul_lo_u32 v10, s6, v5
-; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v6
-; CGP-NEXT: v_addc_u32_e32 v3, vcc, v3, v6, vcc
-; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v9
-; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v11
-; CGP-NEXT: v_mul_lo_u32 v9, v7, v10
-; CGP-NEXT: v_mul_lo_u32 v11, v5, v8
-; CGP-NEXT: v_mul_hi_u32 v12, v5, v10
-; CGP-NEXT: v_mul_hi_u32 v10, v7, v10
-; CGP-NEXT: v_xor_b32_e32 v0, v0, v4
-; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v11
-; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v12
-; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
-; CGP-NEXT: v_mul_lo_u32 v12, v7, v8
-; CGP-NEXT: v_add_i32_e32 v9, vcc, v11, v9
-; CGP-NEXT: v_mul_hi_u32 v11, v5, v8
-; CGP-NEXT: v_add_i32_e32 v10, vcc, v12, v10
-; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v11
-; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v11, vcc, v12, v11
-; CGP-NEXT: v_mul_hi_u32 v8, v7, v8
-; CGP-NEXT: v_add_i32_e32 v9, vcc, v10, v9
-; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v10, vcc, v11, v10
-; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v10
-; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v9
-; CGP-NEXT: v_addc_u32_e32 v7, vcc, v7, v8, vcc
-; CGP-NEXT: v_mul_lo_u32 v8, -1, v5
-; CGP-NEXT: v_mul_lo_u32 v9, s6, v7
-; CGP-NEXT: v_mul_hi_u32 v11, s6, v5
-; CGP-NEXT: v_mul_lo_u32 v10, s6, v5
-; CGP-NEXT: v_xor_b32_e32 v2, v2, v6
+; CGP-NEXT: v_mul_hi_u32 v8, v3, v8
+; CGP-NEXT: v_add_i32_e32 v4, vcc, v6, v4
+; CGP-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
+; CGP-NEXT: v_add_i32_e32 v6, vcc, v9, v6
+; CGP-NEXT: v_add_i32_e32 v6, vcc, v8, v6
+; CGP-NEXT: v_mul_lo_u32 v8, 0, v4
+; CGP-NEXT: v_mul_lo_u32 v9, s6, v6
+; CGP-NEXT: v_mul_hi_u32 v11, s6, v4
+; CGP-NEXT: v_mul_lo_u32 v10, s6, v4
+; CGP-NEXT: s_bfe_i32 s6, -1, 0x10000
; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v9
; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v11
-; CGP-NEXT: v_mul_lo_u32 v9, v7, v10
-; CGP-NEXT: v_mul_lo_u32 v11, v5, v8
-; CGP-NEXT: v_mul_hi_u32 v12, v5, v10
-; CGP-NEXT: v_mul_hi_u32 v10, v7, v10
-; CGP-NEXT: v_xor_b32_e32 v3, v3, v6
-; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v11
-; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v12
-; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
-; CGP-NEXT: v_mul_lo_u32 v12, v7, v8
-; CGP-NEXT: v_add_i32_e32 v9, vcc, v11, v9
-; CGP-NEXT: v_mul_hi_u32 v11, v5, v8
-; CGP-NEXT: v_add_i32_e32 v10, vcc, v12, v10
-; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v11
-; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v11, vcc, v12, v11
-; CGP-NEXT: v_mul_hi_u32 v8, v7, v8
-; CGP-NEXT: v_add_i32_e32 v9, vcc, v10, v9
-; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v10, vcc, v11, v10
-; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v10
-; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v9
-; CGP-NEXT: v_addc_u32_e32 v7, vcc, v7, v8, vcc
-; CGP-NEXT: v_xor_b32_e32 v1, v1, v4
-; CGP-NEXT: v_mul_lo_u32 v8, v3, v5
-; CGP-NEXT: v_mul_lo_u32 v9, v2, v7
-; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v4
-; CGP-NEXT: v_subb_u32_e32 v1, vcc, v1, v4, vcc
-; CGP-NEXT: v_mul_hi_u32 v4, v2, v5
-; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v9
-; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v4, vcc, v8, v4
-; CGP-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
-; CGP-NEXT: v_mul_lo_u32 v8, v3, v7
-; CGP-NEXT: v_mul_hi_u32 v5, v3, v5
-; CGP-NEXT: v_add_i32_e32 v4, vcc, v9, v4
-; CGP-NEXT: v_mul_hi_u32 v9, v2, v7
-; CGP-NEXT: v_add_i32_e32 v5, vcc, v8, v5
-; CGP-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v9
-; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v9
-; CGP-NEXT: v_mul_hi_u32 v7, v3, v7
-; CGP-NEXT: v_add_i32_e32 v4, vcc, v5, v4
-; CGP-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v5, vcc, v8, v5
-; CGP-NEXT: v_add_i32_e32 v5, vcc, v7, v5
-; CGP-NEXT: v_mul_lo_u32 v7, 0, v4
-; CGP-NEXT: v_mul_lo_u32 v8, s7, v5
-; CGP-NEXT: v_mul_hi_u32 v10, s7, v4
-; CGP-NEXT: v_mul_lo_u32 v9, s7, v4
-; CGP-NEXT: s_bfe_i32 s6, -1, 0x10000
-; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v8
-; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v10
-; CGP-NEXT: v_sub_i32_e32 v2, vcc, v2, v9
-; CGP-NEXT: v_subb_u32_e64 v8, s[4:5], v3, v7, vcc
-; CGP-NEXT: v_sub_i32_e64 v3, s[4:5], v3, v7
+; CGP-NEXT: v_sub_i32_e32 v2, vcc, v2, v10
+; CGP-NEXT: v_subb_u32_e64 v9, s[4:5], v3, v8, vcc
+; CGP-NEXT: v_sub_i32_e64 v3, s[4:5], v3, v8
; CGP-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc
-; CGP-NEXT: v_cmp_le_u32_e64 s[4:5], s7, v2
-; CGP-NEXT: v_subrev_i32_e32 v2, vcc, s7, v2
+; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v2, v5
+; CGP-NEXT: v_sub_i32_e32 v2, vcc, v2, v5
; CGP-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc
-; CGP-NEXT: v_cndmask_b32_e64 v7, 0, -1, s[4:5]
-; CGP-NEXT: v_mov_b32_e32 v9, s6
-; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v8
-; CGP-NEXT: v_add_i32_e32 v8, vcc, 1, v4
-; CGP-NEXT: v_cndmask_b32_e64 v7, v9, v7, s[4:5]
-; CGP-NEXT: v_addc_u32_e32 v9, vcc, 0, v5, vcc
+; CGP-NEXT: v_cndmask_b32_e64 v8, 0, -1, s[4:5]
+; CGP-NEXT: v_mov_b32_e32 v10, s6
+; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v9
+; CGP-NEXT: v_add_i32_e32 v9, vcc, 1, v4
+; CGP-NEXT: v_cndmask_b32_e64 v8, v10, v8, s[4:5]
+; CGP-NEXT: v_addc_u32_e32 v10, vcc, 0, v6, vcc
; CGP-NEXT: s_bfe_i32 s4, -1, 0x10000
-; CGP-NEXT: v_cmp_le_u32_e32 vcc, s7, v2
+; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v2, v5
; CGP-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc
-; CGP-NEXT: v_mov_b32_e32 v10, s4
+; CGP-NEXT: v_mov_b32_e32 v5, s4
; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3
-; CGP-NEXT: v_cndmask_b32_e32 v2, v10, v2, vcc
-; CGP-NEXT: v_add_i32_e32 v3, vcc, 1, v8
-; CGP-NEXT: v_addc_u32_e32 v10, vcc, 0, v9, vcc
+; CGP-NEXT: v_cndmask_b32_e32 v2, v5, v2, vcc
+; CGP-NEXT: v_add_i32_e32 v3, vcc, 1, v9
+; CGP-NEXT: v_addc_u32_e32 v5, vcc, 0, v10, vcc
; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2
-; CGP-NEXT: v_cndmask_b32_e32 v2, v8, v3, vcc
-; CGP-NEXT: v_cndmask_b32_e32 v3, v9, v10, vcc
-; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v7
+; CGP-NEXT: v_cndmask_b32_e32 v2, v9, v3, vcc
+; CGP-NEXT: v_cndmask_b32_e32 v3, v10, v5, vcc
+; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v8
; CGP-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc
-; CGP-NEXT: v_cndmask_b32_e32 v3, v5, v3, vcc
-; CGP-NEXT: v_xor_b32_e32 v2, v2, v6
-; CGP-NEXT: v_xor_b32_e32 v3, v3, v6
-; CGP-NEXT: v_sub_i32_e32 v2, vcc, v2, v6
-; CGP-NEXT: v_subb_u32_e32 v3, vcc, v3, v6, vcc
+; CGP-NEXT: v_cndmask_b32_e32 v3, v6, v3, vcc
+; CGP-NEXT: v_xor_b32_e32 v2, v2, v7
+; CGP-NEXT: v_xor_b32_e32 v3, v3, v7
+; CGP-NEXT: v_sub_i32_e32 v2, vcc, v2, v7
+; CGP-NEXT: v_subb_u32_e32 v3, vcc, v3, v7, vcc
; CGP-NEXT: s_setpc_b64 s[30:31]
%result = sdiv <2 x i64> %num, <i64 1235195, i64 1235195>
ret <2 x i64> %result
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll
index 5323967b4fc46..e75db654647b4 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll
@@ -1031,10 +1031,11 @@ define i64 @v_srem_i64_pow2k_denom(i64 %num) {
; CHECK-LABEL: v_srem_i64_pow2k_denom:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CHECK-NEXT: v_cvt_f32_u32_e32 v2, 0x1000
+; CHECK-NEXT: s_movk_i32 s4, 0x1000
+; CHECK-NEXT: v_cvt_f32_u32_e32 v2, s4
; CHECK-NEXT: v_cvt_f32_ubyte0_e32 v3, 0
-; CHECK-NEXT: s_movk_i32 s4, 0xf000
-; CHECK-NEXT: s_movk_i32 s6, 0x1000
+; CHECK-NEXT: s_movk_i32 s5, 0xf000
+; CHECK-NEXT: s_bfe_i32 s6, -1, 0x10000
; CHECK-NEXT: v_mac_f32_e32 v2, 0x4f800000, v3
; CHECK-NEXT: v_rcp_iflag_f32_e32 v2, v2
; CHECK-NEXT: v_ashrrev_i32_e32 v3, 31, v1
@@ -1046,10 +1047,10 @@ define i64 @v_srem_i64_pow2k_denom(i64 %num) {
; CHECK-NEXT: v_cvt_u32_f32_e32 v4, v4
; CHECK-NEXT: v_cvt_u32_f32_e32 v2, v2
; CHECK-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc
-; CHECK-NEXT: v_mul_lo_u32 v6, s4, v4
+; CHECK-NEXT: v_mul_lo_u32 v6, s5, v4
; CHECK-NEXT: v_mul_lo_u32 v5, -1, v2
-; CHECK-NEXT: v_mul_hi_u32 v8, s4, v2
-; CHECK-NEXT: v_mul_lo_u32 v7, s4, v2
+; CHECK-NEXT: v_mul_hi_u32 v8, s5, v2
+; CHECK-NEXT: v_mul_lo_u32 v7, s5, v2
; CHECK-NEXT: v_xor_b32_e32 v0, v0, v3
; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v6
; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v8
@@ -1078,10 +1079,9 @@ define i64 @v_srem_i64_pow2k_denom(i64 %num) {
; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v6
; CHECK-NEXT: v_addc_u32_e32 v4, vcc, v4, v5, vcc
; CHECK-NEXT: v_mul_lo_u32 v5, -1, v2
-; CHECK-NEXT: v_mul_lo_u32 v6, s4, v4
-; CHECK-NEXT: v_mul_hi_u32 v8, s4, v2
-; CHECK-NEXT: v_mul_lo_u32 v7, s4, v2
-; CHECK-NEXT: s_bfe_i32 s7, -1, 0x10000
+; CHECK-NEXT: v_mul_lo_u32 v6, s5, v4
+; CHECK-NEXT: v_mul_hi_u32 v8, s5, v2
+; CHECK-NEXT: v_mul_lo_u32 v7, s5, v2
; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v6
; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v8
; CHECK-NEXT: v_mul_lo_u32 v6, v4, v7
@@ -1109,49 +1109,50 @@ define i64 @v_srem_i64_pow2k_denom(i64 %num) {
; CHECK-NEXT: v_addc_u32_e32 v4, vcc, v4, v5, vcc
; CHECK-NEXT: v_mul_lo_u32 v5, v1, v2
; CHECK-NEXT: v_mul_lo_u32 v6, v0, v4
-; CHECK-NEXT: v_mul_hi_u32 v7, v0, v2
+; CHECK-NEXT: v_mul_hi_u32 v8, v0, v2
; CHECK-NEXT: v_mul_hi_u32 v2, v1, v2
+; CHECK-NEXT: v_mov_b32_e32 v7, 0x1000
; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v6
; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
-; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v7
+; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v8
; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
-; CHECK-NEXT: v_mul_lo_u32 v7, v1, v4
+; CHECK-NEXT: v_mul_lo_u32 v8, v1, v4
; CHECK-NEXT: v_add_i32_e32 v5, vcc, v6, v5
; CHECK-NEXT: v_mul_hi_u32 v6, v0, v4
-; CHECK-NEXT: v_add_i32_e32 v2, vcc, v7, v2
-; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
+; CHECK-NEXT: v_add_i32_e32 v2, vcc, v8, v2
+; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v6
; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
-; CHECK-NEXT: v_add_i32_e32 v6, vcc, v7, v6
+; CHECK-NEXT: v_add_i32_e32 v6, vcc, v8, v6
; CHECK-NEXT: v_mul_hi_u32 v4, v1, v4
; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v5
; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
; CHECK-NEXT: v_add_i32_e32 v5, vcc, v6, v5
; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v5
; CHECK-NEXT: v_mul_lo_u32 v5, 0, v2
-; CHECK-NEXT: v_mul_lo_u32 v4, s6, v4
-; CHECK-NEXT: v_mul_lo_u32 v6, s6, v2
-; CHECK-NEXT: v_mul_hi_u32 v2, s6, v2
+; CHECK-NEXT: v_mul_lo_u32 v4, s4, v4
+; CHECK-NEXT: v_mul_lo_u32 v6, s4, v2
+; CHECK-NEXT: v_mul_hi_u32 v2, s4, v2
; CHECK-NEXT: v_add_i32_e32 v4, vcc, v5, v4
; CHECK-NEXT: v_add_i32_e32 v2, vcc, v4, v2
; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v6
; CHECK-NEXT: v_subb_u32_e64 v4, s[4:5], v1, v2, vcc
; CHECK-NEXT: v_sub_i32_e64 v1, s[4:5], v1, v2
-; CHECK-NEXT: v_cmp_le_u32_e64 s[4:5], s6, v0
+; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v7
; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, -1, s[4:5]
-; CHECK-NEXT: v_mov_b32_e32 v5, s7
+; CHECK-NEXT: v_mov_b32_e32 v5, s6
; CHECK-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v4
; CHECK-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
; CHECK-NEXT: v_cndmask_b32_e64 v2, v5, v2, s[4:5]
-; CHECK-NEXT: v_subrev_i32_e32 v5, vcc, s6, v0
+; CHECK-NEXT: v_sub_i32_e32 v5, vcc, v0, v7
; CHECK-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
; CHECK-NEXT: s_bfe_i32 s4, -1, 0x10000
-; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s6, v5
+; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v5, v7
; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, -1, vcc
-; CHECK-NEXT: v_mov_b32_e32 v7, s4
+; CHECK-NEXT: v_mov_b32_e32 v8, s4
; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
-; CHECK-NEXT: v_cndmask_b32_e32 v6, v7, v6, vcc
-; CHECK-NEXT: v_subrev_i32_e32 v7, vcc, s6, v5
+; CHECK-NEXT: v_cndmask_b32_e32 v6, v8, v6, vcc
+; CHECK-NEXT: v_sub_i32_e32 v7, vcc, v5, v7
; CHECK-NEXT: v_subbrev_u32_e32 v8, vcc, 0, v1, vcc
; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v6
; CHECK-NEXT: v_cndmask_b32_e32 v5, v5, v7, vcc
@@ -1466,41 +1467,166 @@ define <2 x i64> @v_srem_v2i64_pow2k_denom(<2 x i64> %num) {
; CGP-LABEL: v_srem_v2i64_pow2k_denom:
; CGP: ; %bb.0:
; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CGP-NEXT: v_cvt_f32_u32_e32 v5, 0x1000
-; CGP-NEXT: v_cvt_f32_ubyte0_e32 v6, 0
-; CGP-NEXT: s_movk_i32 s6, 0xf000
-; CGP-NEXT: s_movk_i32 s7, 0x1000
-; CGP-NEXT: v_mov_b32_e32 v4, v5
-; CGP-NEXT: v_mac_f32_e32 v4, 0x4f800000, v6
-; CGP-NEXT: v_rcp_iflag_f32_e32 v7, v4
+; CGP-NEXT: s_movk_i32 s6, 0x1000
+; CGP-NEXT: v_cvt_f32_u32_e32 v4, s6
+; CGP-NEXT: v_cvt_f32_ubyte0_e32 v5, 0
+; CGP-NEXT: s_movk_i32 s7, 0xf000
+; CGP-NEXT: s_bfe_i32 s8, -1, 0x10000
+; CGP-NEXT: v_mac_f32_e32 v4, 0x4f800000, v5
+; CGP-NEXT: v_rcp_iflag_f32_e32 v5, v4
; CGP-NEXT: v_ashrrev_i32_e32 v4, 31, v1
; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v4
-; CGP-NEXT: v_mul_f32_e32 v7, 0x5f7ffffc, v7
-; CGP-NEXT: v_mul_f32_e32 v8, 0x2f800000, v7
-; CGP-NEXT: v_trunc_f32_e32 v8, v8
-; CGP-NEXT: v_mac_f32_e32 v7, 0xcf800000, v8
-; CGP-NEXT: v_cvt_u32_f32_e32 v7, v7
-; CGP-NEXT: v_cvt_u32_f32_e32 v8, v8
+; CGP-NEXT: v_mul_f32_e32 v5, 0x5f7ffffc, v5
+; CGP-NEXT: v_mul_f32_e32 v6, 0x2f800000, v5
+; CGP-NEXT: v_trunc_f32_e32 v6, v6
+; CGP-NEXT: v_mac_f32_e32 v5, 0xcf800000, v6
+; CGP-NEXT: v_cvt_u32_f32_e32 v5, v5
+; CGP-NEXT: v_cvt_u32_f32_e32 v6, v6
; CGP-NEXT: v_addc_u32_e32 v1, vcc, v1, v4, vcc
-; CGP-NEXT: v_mul_lo_u32 v9, -1, v7
-; CGP-NEXT: v_mul_lo_u32 v10, s6, v8
-; CGP-NEXT: v_mul_hi_u32 v12, s6, v7
-; CGP-NEXT: v_mul_lo_u32 v11, s6, v7
+; CGP-NEXT: v_mul_lo_u32 v7, -1, v5
+; CGP-NEXT: v_mul_lo_u32 v8, s7, v6
+; CGP-NEXT: v_mul_hi_u32 v10, s7, v5
+; CGP-NEXT: v_mul_lo_u32 v9, s7, v5
; CGP-NEXT: v_xor_b32_e32 v0, v0, v4
+; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v8
+; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v10
+; CGP-NEXT: v_mul_lo_u32 v8, v6, v9
+; CGP-NEXT: v_mul_lo_u32 v10, v5, v7
+; CGP-NEXT: v_mul_hi_u32 v11, v5, v9
+; CGP-NEXT: v_mul_hi_u32 v9, v6, v9
+; CGP-NEXT: v_xor_b32_e32 v1, v1, v4
+; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v10
+; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
+; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v11
+; CGP-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
+; CGP-NEXT: v_mul_lo_u32 v11, v6, v7
+; CGP-NEXT: v_add_i32_e32 v8, vcc, v10, v8
+; CGP-NEXT: v_mul_hi_u32 v10, v5, v7
+; CGP-NEXT: v_add_i32_e32 v9, vcc, v11, v9
+; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
+; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v10
+; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
+; CGP-NEXT: v_add_i32_e32 v10, vcc, v11, v10
+; CGP-NEXT: v_mul_hi_u32 v7, v6, v7
+; CGP-NEXT: v_add_i32_e32 v8, vcc, v9, v8
+; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
+; CGP-NEXT: v_add_i32_e32 v9, vcc, v10, v9
+; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v9
+; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v8
+; CGP-NEXT: v_addc_u32_e32 v6, vcc, v6, v7, vcc
+; CGP-NEXT: v_mul_lo_u32 v7, -1, v5
+; CGP-NEXT: v_mul_lo_u32 v8, s7, v6
+; CGP-NEXT: v_mul_hi_u32 v10, s7, v5
+; CGP-NEXT: v_mul_lo_u32 v9, s7, v5
+; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v8
+; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v10
+; CGP-NEXT: v_mul_lo_u32 v8, v6, v9
+; CGP-NEXT: v_mul_lo_u32 v10, v5, v7
+; CGP-NEXT: v_mul_hi_u32 v11, v5, v9
+; CGP-NEXT: v_mul_hi_u32 v9, v6, v9
+; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v10
+; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
+; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v11
+; CGP-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
+; CGP-NEXT: v_mul_lo_u32 v11, v6, v7
+; CGP-NEXT: v_add_i32_e32 v8, vcc, v10, v8
+; CGP-NEXT: v_mul_hi_u32 v10, v5, v7
+; CGP-NEXT: v_add_i32_e32 v9, vcc, v11, v9
+; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
+; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v10
+; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
+; CGP-NEXT: v_add_i32_e32 v10, vcc, v11, v10
+; CGP-NEXT: v_mul_hi_u32 v7, v6, v7
+; CGP-NEXT: v_add_i32_e32 v8, vcc, v9, v8
+; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
+; CGP-NEXT: v_add_i32_e32 v9, vcc, v10, v9
+; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v9
+; CGP-NEXT: v_add_i32_e32 v8, vcc, v5, v8
+; CGP-NEXT: v_addc_u32_e32 v6, vcc, v6, v7, vcc
+; CGP-NEXT: v_mul_lo_u32 v7, v1, v8
+; CGP-NEXT: v_mul_lo_u32 v9, v0, v6
+; CGP-NEXT: v_mul_hi_u32 v10, v0, v8
+; CGP-NEXT: v_mul_hi_u32 v8, v1, v8
+; CGP-NEXT: v_mov_b32_e32 v5, 0x1000
+; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v9
+; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
+; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v10
+; CGP-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
+; CGP-NEXT: v_mul_lo_u32 v10, v1, v6
+; CGP-NEXT: v_add_i32_e32 v7, vcc, v9, v7
+; CGP-NEXT: v_mul_hi_u32 v9, v0, v6
+; CGP-NEXT: v_add_i32_e32 v8, vcc, v10, v8
+; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
+; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v9
+; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
+; CGP-NEXT: v_add_i32_e32 v9, vcc, v10, v9
+; CGP-NEXT: v_mul_hi_u32 v6, v1, v6
+; CGP-NEXT: v_add_i32_e32 v7, vcc, v8, v7
+; CGP-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
+; CGP-NEXT: v_add_i32_e32 v8, vcc, v9, v8
+; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v8
+; CGP-NEXT: v_mul_lo_u32 v8, 0, v7
+; CGP-NEXT: v_mul_lo_u32 v6, s6, v6
+; CGP-NEXT: v_mul_lo_u32 v9, s6, v7
+; CGP-NEXT: v_mul_hi_u32 v7, s6, v7
+; CGP-NEXT: v_add_i32_e32 v6, vcc, v8, v6
+; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v7
+; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v9
+; CGP-NEXT: v_subb_u32_e64 v7, s[4:5], v1, v6, vcc
+; CGP-NEXT: v_sub_i32_e64 v1, s[4:5], v1, v6
+; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v5
+; CGP-NEXT: v_cndmask_b32_e64 v6, 0, -1, s[4:5]
+; CGP-NEXT: v_mov_b32_e32 v8, s8
+; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v7
+; CGP-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
+; CGP-NEXT: v_cndmask_b32_e64 v6, v8, v6, s[4:5]
+; CGP-NEXT: v_sub_i32_e32 v8, vcc, v0, v5
+; CGP-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
+; CGP-NEXT: s_bfe_i32 s4, -1, 0x10000
+; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v8, v5
+; CGP-NEXT: v_cndmask_b32_e64 v9, 0, -1, vcc
+; CGP-NEXT: v_mov_b32_e32 v10, s4
+; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
+; CGP-NEXT: v_cndmask_b32_e32 v9, v10, v9, vcc
+; CGP-NEXT: v_sub_i32_e32 v10, vcc, v8, v5
+; CGP-NEXT: v_subbrev_u32_e32 v11, vcc, 0, v1, vcc
+; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v9
+; CGP-NEXT: v_cvt_f32_u32_e32 v9, v5
+; CGP-NEXT: v_cndmask_b32_e32 v8, v8, v10, vcc
+; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v11, vcc
+; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v6
+; CGP-NEXT: v_cvt_f32_ubyte0_e32 v6, 0
+; CGP-NEXT: v_mac_f32_e32 v9, 0x4f800000, v6
+; CGP-NEXT: v_rcp_iflag_f32_e32 v6, v9
+; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc
+; CGP-NEXT: v_cndmask_b32_e32 v1, v7, v1, vcc
+; CGP-NEXT: v_ashrrev_i32_e32 v7, 31, v3
+; CGP-NEXT: v_mul_f32_e32 v6, 0x5f7ffffc, v6
+; CGP-NEXT: v_mul_f32_e32 v8, 0x2f800000, v6
+; CGP-NEXT: v_trunc_f32_e32 v8, v8
+; CGP-NEXT: v_mac_f32_e32 v6, 0xcf800000, v8
+; CGP-NEXT: v_cvt_u32_f32_e32 v6, v6
+; CGP-NEXT: v_cvt_u32_f32_e32 v8, v8
+; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v7
+; CGP-NEXT: v_mul_lo_u32 v9, -1, v6
+; CGP-NEXT: v_mul_lo_u32 v10, s7, v8
+; CGP-NEXT: v_mul_hi_u32 v12, s7, v6
+; CGP-NEXT: v_mul_lo_u32 v11, s7, v6
+; CGP-NEXT: v_addc_u32_e32 v3, vcc, v3, v7, vcc
; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v10
; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v12
; CGP-NEXT: v_mul_lo_u32 v10, v8, v11
-; CGP-NEXT: v_mul_lo_u32 v12, v7, v9
-; CGP-NEXT: v_mul_hi_u32 v13, v7, v11
+; CGP-NEXT: v_mul_lo_u32 v12, v6, v9
+; CGP-NEXT: v_mul_hi_u32 v13, v6, v11
; CGP-NEXT: v_mul_hi_u32 v11, v8, v11
-; CGP-NEXT: v_xor_b32_e32 v1, v1, v4
+; CGP-NEXT: v_xor_b32_e32 v0, v0, v4
; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v12
; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc
; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v13
; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
; CGP-NEXT: v_mul_lo_u32 v13, v8, v9
; CGP-NEXT: v_add_i32_e32 v10, vcc, v12, v10
-; CGP-NEXT: v_mul_hi_u32 v12, v7, v9
+; CGP-NEXT: v_mul_hi_u32 v12, v6, v9
; CGP-NEXT: v_add_i32_e32 v11, vcc, v13, v11
; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc
; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v12
@@ -1511,27 +1637,27 @@ define <2 x i64> @v_srem_v2i64_pow2k_denom(<2 x i64> %num) {
; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
; CGP-NEXT: v_add_i32_e32 v11, vcc, v12, v11
; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v11
-; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v10
+; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v10
; CGP-NEXT: v_addc_u32_e32 v8, vcc, v8, v9, vcc
-; CGP-NEXT: v_mul_lo_u32 v9, -1, v7
-; CGP-NEXT: v_mul_lo_u32 v10, s6, v8
-; CGP-NEXT: v_mul_hi_u32 v12, s6, v7
-; CGP-NEXT: v_mul_lo_u32 v11, s6, v7
-; CGP-NEXT: s_bfe_i32 s8, -1, 0x10000
+; CGP-NEXT: v_mul_lo_u32 v9, -1, v6
+; CGP-NEXT: v_mul_lo_u32 v10, s7, v8
+; CGP-NEXT: v_mul_hi_u32 v12, s7, v6
+; CGP-NEXT: v_mul_lo_u32 v11, s7, v6
+; CGP-NEXT: v_xor_b32_e32 v2, v2, v7
; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v10
; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v12
; CGP-NEXT: v_mul_lo_u32 v10, v8, v11
-; CGP-NEXT: v_mul_lo_u32 v12, v7, v9
-; CGP-NEXT: v_mul_hi_u32 v13, v7, v11
+; CGP-NEXT: v_mul_lo_u32 v12, v6, v9
+; CGP-NEXT: v_mul_hi_u32 v13, v6, v11
; CGP-NEXT: v_mul_hi_u32 v11, v8, v11
-; CGP-NEXT: v_mac_f32_e32 v5, 0x4f800000, v6
+; CGP-NEXT: v_xor_b32_e32 v3, v3, v7
; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v12
; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc
; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v13
; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
; CGP-NEXT: v_mul_lo_u32 v13, v8, v9
; CGP-NEXT: v_add_i32_e32 v10, vcc, v12, v10
-; CGP-NEXT: v_mul_hi_u32 v12, v7, v9
+; CGP-NEXT: v_mul_hi_u32 v12, v6, v9
; CGP-NEXT: v_add_i32_e32 v11, vcc, v13, v11
; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc
; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v12
@@ -1542,191 +1668,68 @@ define <2 x i64> @v_srem_v2i64_pow2k_denom(<2 x i64> %num) {
; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
; CGP-NEXT: v_add_i32_e32 v11, vcc, v12, v11
; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v11
-; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v10
+; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v10
; CGP-NEXT: v_addc_u32_e32 v8, vcc, v8, v9, vcc
-; CGP-NEXT: v_mul_lo_u32 v9, v1, v7
-; CGP-NEXT: v_mul_lo_u32 v10, v0, v8
-; CGP-NEXT: v_mul_hi_u32 v11, v0, v7
-; CGP-NEXT: v_mul_hi_u32 v7, v1, v7
-; CGP-NEXT: v_rcp_iflag_f32_e32 v5, v5
-; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v10
-; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v11
-; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
-; CGP-NEXT: v_mul_lo_u32 v11, v1, v8
-; CGP-NEXT: v_add_i32_e32 v9, vcc, v10, v9
-; CGP-NEXT: v_mul_hi_u32 v10, v0, v8
-; CGP-NEXT: v_add_i32_e32 v7, vcc, v11, v7
-; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v10
-; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v10, vcc, v11, v10
-; CGP-NEXT: v_mul_hi_u32 v8, v1, v8
-; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v9
-; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v9, vcc, v10, v9
-; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v9
-; CGP-NEXT: v_mul_lo_u32 v9, 0, v7
-; CGP-NEXT: v_mul_lo_u32 v8, s7, v8
-; CGP-NEXT: v_mul_lo_u32 v10, s7, v7
-; CGP-NEXT: v_mul_hi_u32 v7, s7, v7
-; CGP-NEXT: v_mul_f32_e32 v5, 0x5f7ffffc, v5
-; CGP-NEXT: v_add_i32_e32 v8, vcc, v9, v8
-; CGP-NEXT: v_add_i32_e32 v7, vcc, v8, v7
-; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v10
-; CGP-NEXT: v_subb_u32_e64 v8, s[4:5], v1, v7, vcc
-; CGP-NEXT: v_sub_i32_e64 v1, s[4:5], v1, v7
-; CGP-NEXT: v_cmp_le_u32_e64 s[4:5], s7, v0
-; CGP-NEXT: v_cndmask_b32_e64 v7, 0, -1, s[4:5]
-; CGP-NEXT: v_mov_b32_e32 v9, s8
-; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v8
-; CGP-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
-; CGP-NEXT: v_cndmask_b32_e64 v7, v9, v7, s[4:5]
-; CGP-NEXT: v_subrev_i32_e32 v9, vcc, s7, v0
-; CGP-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
-; CGP-NEXT: s_bfe_i32 s4, -1, 0x10000
-; CGP-NEXT: v_cmp_le_u32_e32 vcc, s7, v9
-; CGP-NEXT: v_cndmask_b32_e64 v10, 0, -1, vcc
-; CGP-NEXT: v_mov_b32_e32 v11, s4
-; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
-; CGP-NEXT: v_cndmask_b32_e32 v10, v11, v10, vcc
-; CGP-NEXT: v_subrev_i32_e32 v11, vcc, s7, v9
-; CGP-NEXT: v_subbrev_u32_e32 v12, vcc, 0, v1, vcc
-; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v10
-; CGP-NEXT: v_cndmask_b32_e32 v9, v9, v11, vcc
-; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v12, vcc
-; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v7
-; CGP-NEXT: v_mul_f32_e32 v7, 0x2f800000, v5
-; CGP-NEXT: v_trunc_f32_e32 v7, v7
-; CGP-NEXT: v_mac_f32_e32 v5, 0xcf800000, v7
-; CGP-NEXT: v_cvt_u32_f32_e32 v5, v5
-; CGP-NEXT: v_cvt_u32_f32_e32 v7, v7
-; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v9, vcc
-; CGP-NEXT: v_cndmask_b32_e32 v1, v8, v1, vcc
-; CGP-NEXT: v_mul_lo_u32 v8, -1, v5
-; CGP-NEXT: v_mul_lo_u32 v9, s6, v7
-; CGP-NEXT: v_mul_hi_u32 v11, s6, v5
-; CGP-NEXT: v_ashrrev_i32_e32 v6, 31, v3
-; CGP-NEXT: v_mul_lo_u32 v10, s6, v5
-; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v6
-; CGP-NEXT: v_addc_u32_e32 v3, vcc, v3, v6, vcc
-; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v9
-; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v11
-; CGP-NEXT: v_mul_lo_u32 v9, v7, v10
-; CGP-NEXT: v_mul_lo_u32 v11, v5, v8
-; CGP-NEXT: v_mul_hi_u32 v12, v5, v10
-; CGP-NEXT: v_mul_hi_u32 v10, v7, v10
-; CGP-NEXT: v_xor_b32_e32 v0, v0, v4
-; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v11
-; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v12
-; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
-; CGP-NEXT: v_mul_lo_u32 v12, v7, v8
-; CGP-NEXT: v_add_i32_e32 v9, vcc, v11, v9
-; CGP-NEXT: v_mul_hi_u32 v11, v5, v8
-; CGP-NEXT: v_add_i32_e32 v10, vcc, v12, v10
-; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v11
-; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v11, vcc, v12, v11
-; CGP-NEXT: v_mul_hi_u32 v8, v7, v8
-; CGP-NEXT: v_add_i32_e32 v9, vcc, v10, v9
-; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v10, vcc, v11, v10
-; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v10
-; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v9
-; CGP-NEXT: v_addc_u32_e32 v7, vcc, v7, v8, vcc
-; CGP-NEXT: v_mul_lo_u32 v8, -1, v5
-; CGP-NEXT: v_mul_lo_u32 v9, s6, v7
-; CGP-NEXT: v_mul_hi_u32 v11, s6, v5
-; CGP-NEXT: v_mul_lo_u32 v10, s6, v5
-; CGP-NEXT: v_xor_b32_e32 v2, v2, v6
-; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v9
-; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v11
-; CGP-NEXT: v_mul_lo_u32 v9, v7, v10
-; CGP-NEXT: v_mul_lo_u32 v11, v5, v8
-; CGP-NEXT: v_mul_hi_u32 v12, v5, v10
-; CGP-NEXT: v_mul_hi_u32 v10, v7, v10
-; CGP-NEXT: v_xor_b32_e32 v3, v3, v6
-; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v11
-; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v12
-; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
-; CGP-NEXT: v_mul_lo_u32 v12, v7, v8
-; CGP-NEXT: v_add_i32_e32 v9, vcc, v11, v9
-; CGP-NEXT: v_mul_hi_u32 v11, v5, v8
-; CGP-NEXT: v_add_i32_e32 v10, vcc, v12, v10
-; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v11
-; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v11, vcc, v12, v11
-; CGP-NEXT: v_mul_hi_u32 v8, v7, v8
-; CGP-NEXT: v_add_i32_e32 v9, vcc, v10, v9
-; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v10, vcc, v11, v10
-; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v10
-; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v9
-; CGP-NEXT: v_addc_u32_e32 v7, vcc, v7, v8, vcc
; CGP-NEXT: v_xor_b32_e32 v1, v1, v4
-; CGP-NEXT: v_mul_lo_u32 v8, v3, v5
-; CGP-NEXT: v_mul_lo_u32 v9, v2, v7
+; CGP-NEXT: v_mul_lo_u32 v9, v3, v6
+; CGP-NEXT: v_mul_lo_u32 v10, v2, v8
; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v4
; CGP-NEXT: v_subb_u32_e32 v1, vcc, v1, v4, vcc
-; CGP-NEXT: v_mul_hi_u32 v4, v2, v5
-; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v9
-; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v4, vcc, v8, v4
-; CGP-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
-; CGP-NEXT: v_mul_lo_u32 v8, v3, v7
-; CGP-NEXT: v_mul_hi_u32 v5, v3, v5
+; CGP-NEXT: v_mul_hi_u32 v4, v2, v6
+; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v10
+; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
; CGP-NEXT: v_add_i32_e32 v4, vcc, v9, v4
-; CGP-NEXT: v_mul_hi_u32 v9, v2, v7
-; CGP-NEXT: v_add_i32_e32 v5, vcc, v8, v5
-; CGP-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v9
+; CGP-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
+; CGP-NEXT: v_mul_lo_u32 v9, v3, v8
+; CGP-NEXT: v_mul_hi_u32 v6, v3, v6
+; CGP-NEXT: v_add_i32_e32 v4, vcc, v10, v4
+; CGP-NEXT: v_mul_hi_u32 v10, v2, v8
+; CGP-NEXT: v_add_i32_e32 v6, vcc, v9, v6
; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v9
-; CGP-NEXT: v_mul_hi_u32 v7, v3, v7
-; CGP-NEXT: v_add_i32_e32 v4, vcc, v5, v4
-; CGP-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v5, vcc, v8, v5
-; CGP-NEXT: v_add_i32_e32 v5, vcc, v7, v5
-; CGP-NEXT: v_mul_lo_u32 v7, 0, v4
-; CGP-NEXT: v_mul_lo_u32 v5, s7, v5
-; CGP-NEXT: v_mul_lo_u32 v8, s7, v4
-; CGP-NEXT: v_mul_hi_u32 v4, s7, v4
+; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v10
+; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
+; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v10
+; CGP-NEXT: v_mul_hi_u32 v8, v3, v8
+; CGP-NEXT: v_add_i32_e32 v4, vcc, v6, v4
+; CGP-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
+; CGP-NEXT: v_add_i32_e32 v6, vcc, v9, v6
+; CGP-NEXT: v_add_i32_e32 v6, vcc, v8, v6
+; CGP-NEXT: v_mul_lo_u32 v8, 0, v4
+; CGP-NEXT: v_mul_lo_u32 v6, s6, v6
+; CGP-NEXT: v_mul_lo_u32 v9, s6, v4
+; CGP-NEXT: v_mul_hi_u32 v4, s6, v4
; CGP-NEXT: s_bfe_i32 s6, -1, 0x10000
-; CGP-NEXT: v_add_i32_e32 v5, vcc, v7, v5
-; CGP-NEXT: v_add_i32_e32 v4, vcc, v5, v4
-; CGP-NEXT: v_sub_i32_e32 v2, vcc, v2, v8
-; CGP-NEXT: v_subb_u32_e64 v5, s[4:5], v3, v4, vcc
+; CGP-NEXT: v_add_i32_e32 v6, vcc, v8, v6
+; CGP-NEXT: v_add_i32_e32 v4, vcc, v6, v4
+; CGP-NEXT: v_sub_i32_e32 v2, vcc, v2, v9
+; CGP-NEXT: v_subb_u32_e64 v6, s[4:5], v3, v4, vcc
; CGP-NEXT: v_sub_i32_e64 v3, s[4:5], v3, v4
-; CGP-NEXT: v_cmp_le_u32_e64 s[4:5], s7, v2
+; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v2, v5
; CGP-NEXT: v_cndmask_b32_e64 v4, 0, -1, s[4:5]
-; CGP-NEXT: v_mov_b32_e32 v7, s6
-; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v5
+; CGP-NEXT: v_mov_b32_e32 v8, s6
+; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v6
; CGP-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc
-; CGP-NEXT: v_cndmask_b32_e64 v4, v7, v4, s[4:5]
-; CGP-NEXT: v_subrev_i32_e32 v7, vcc, s7, v2
+; CGP-NEXT: v_cndmask_b32_e64 v4, v8, v4, s[4:5]
+; CGP-NEXT: v_sub_i32_e32 v8, vcc, v2, v5
; CGP-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc
; CGP-NEXT: s_bfe_i32 s4, -1, 0x10000
-; CGP-NEXT: v_cmp_le_u32_e32 vcc, s7, v7
-; CGP-NEXT: v_cndmask_b32_e64 v8, 0, -1, vcc
-; CGP-NEXT: v_mov_b32_e32 v9, s4
+; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v8, v5
+; CGP-NEXT: v_cndmask_b32_e64 v9, 0, -1, vcc
+; CGP-NEXT: v_mov_b32_e32 v10, s4
; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3
-; CGP-NEXT: v_cndmask_b32_e32 v8, v9, v8, vcc
-; CGP-NEXT: v_subrev_i32_e32 v9, vcc, s7, v7
+; CGP-NEXT: v_cndmask_b32_e32 v9, v10, v9, vcc
+; CGP-NEXT: v_sub_i32_e32 v5, vcc, v8, v5
; CGP-NEXT: v_subbrev_u32_e32 v10, vcc, 0, v3, vcc
-; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v8
-; CGP-NEXT: v_cndmask_b32_e32 v7, v7, v9, vcc
+; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v9
+; CGP-NEXT: v_cndmask_b32_e32 v5, v8, v5, vcc
; CGP-NEXT: v_cndmask_b32_e32 v3, v3, v10, vcc
; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4
-; CGP-NEXT: v_cndmask_b32_e32 v2, v2, v7, vcc
-; CGP-NEXT: v_cndmask_b32_e32 v3, v5, v3, vcc
-; CGP-NEXT: v_xor_b32_e32 v2, v2, v6
-; CGP-NEXT: v_xor_b32_e32 v3, v3, v6
-; CGP-NEXT: v_sub_i32_e32 v2, vcc, v2, v6
-; CGP-NEXT: v_subb_u32_e32 v3, vcc, v3, v6, vcc
+; CGP-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc
+; CGP-NEXT: v_cndmask_b32_e32 v3, v6, v3, vcc
+; CGP-NEXT: v_xor_b32_e32 v2, v2, v7
+; CGP-NEXT: v_xor_b32_e32 v3, v3, v7
+; CGP-NEXT: v_sub_i32_e32 v2, vcc, v2, v7
+; CGP-NEXT: v_subb_u32_e32 v3, vcc, v3, v7, vcc
; CGP-NEXT: s_setpc_b64 s[30:31]
%result = srem <2 x i64> %num, <i64 4096, i64 4096>
ret <2 x i64> %result
@@ -1736,10 +1739,11 @@ define i64 @v_srem_i64_oddk_denom(i64 %num) {
; CHECK-LABEL: v_srem_i64_oddk_denom:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CHECK-NEXT: v_cvt_f32_u32_e32 v2, 0x12d8fb
+; CHECK-NEXT: s_mov_b32 s4, 0x12d8fb
+; CHECK-NEXT: v_cvt_f32_u32_e32 v2, s4
; CHECK-NEXT: v_cvt_f32_ubyte0_e32 v3, 0
-; CHECK-NEXT: s_mov_b32 s4, 0xffed2705
-; CHECK-NEXT: s_mov_b32 s6, 0x12d8fb
+; CHECK-NEXT: s_mov_b32 s5, 0xffed2705
+; CHECK-NEXT: s_bfe_i32 s6, -1, 0x10000
; CHECK-NEXT: v_mac_f32_e32 v2, 0x4f800000, v3
; CHECK-NEXT: v_rcp_iflag_f32_e32 v2, v2
; CHECK-NEXT: v_ashrrev_i32_e32 v3, 31, v1
@@ -1751,10 +1755,10 @@ define i64 @v_srem_i64_oddk_denom(i64 %num) {
; CHECK-NEXT: v_cvt_u32_f32_e32 v4, v4
; CHECK-NEXT: v_cvt_u32_f32_e32 v2, v2
; CHECK-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc
-; CHECK-NEXT: v_mul_lo_u32 v6, s4, v4
+; CHECK-NEXT: v_mul_lo_u32 v6, s5, v4
; CHECK-NEXT: v_mul_lo_u32 v5, -1, v2
-; CHECK-NEXT: v_mul_hi_u32 v8, s4, v2
-; CHECK-NEXT: v_mul_lo_u32 v7, s4, v2
+; CHECK-NEXT: v_mul_hi_u32 v8, s5, v2
+; CHECK-NEXT: v_mul_lo_u32 v7, s5, v2
; CHECK-NEXT: v_xor_b32_e32 v0, v0, v3
; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v6
; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v8
@@ -1783,10 +1787,9 @@ define i64 @v_srem_i64_oddk_denom(i64 %num) {
; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v6
; CHECK-NEXT: v_addc_u32_e32 v4, vcc, v4, v5, vcc
; CHECK-NEXT: v_mul_lo_u32 v5, -1, v2
-; CHECK-NEXT: v_mul_lo_u32 v6, s4, v4
-; CHECK-NEXT: v_mul_hi_u32 v8, s4, v2
-; CHECK-NEXT: v_mul_lo_u32 v7, s4, v2
-; CHECK-NEXT: s_bfe_i32 s7, -1, 0x10000
+; CHECK-NEXT: v_mul_lo_u32 v6, s5, v4
+; CHECK-NEXT: v_mul_hi_u32 v8, s5, v2
+; CHECK-NEXT: v_mul_lo_u32 v7, s5, v2
; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v6
; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v8
; CHECK-NEXT: v_mul_lo_u32 v6, v4, v7
@@ -1814,49 +1817,50 @@ define i64 @v_srem_i64_oddk_denom(i64 %num) {
; CHECK-NEXT: v_addc_u32_e32 v4, vcc, v4, v5, vcc
; CHECK-NEXT: v_mul_lo_u32 v5, v1, v2
; CHECK-NEXT: v_mul_lo_u32 v6, v0, v4
-; CHECK-NEXT: v_mul_hi_u32 v7, v0, v2
+; CHECK-NEXT: v_mul_hi_u32 v8, v0, v2
; CHECK-NEXT: v_mul_hi_u32 v2, v1, v2
+; CHECK-NEXT: v_mov_b32_e32 v7, 0x12d8fb
; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v6
; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
-; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v7
+; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v8
; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
-; CHECK-NEXT: v_mul_lo_u32 v7, v1, v4
+; CHECK-NEXT: v_mul_lo_u32 v8, v1, v4
; CHECK-NEXT: v_add_i32_e32 v5, vcc, v6, v5
; CHECK-NEXT: v_mul_hi_u32 v6, v0, v4
-; CHECK-NEXT: v_add_i32_e32 v2, vcc, v7, v2
-; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
+; CHECK-NEXT: v_add_i32_e32 v2, vcc, v8, v2
+; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v6
; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
-; CHECK-NEXT: v_add_i32_e32 v6, vcc, v7, v6
+; CHECK-NEXT: v_add_i32_e32 v6, vcc, v8, v6
; CHECK-NEXT: v_mul_hi_u32 v4, v1, v4
; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v5
; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
; CHECK-NEXT: v_add_i32_e32 v5, vcc, v6, v5
; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v5
; CHECK-NEXT: v_mul_lo_u32 v5, 0, v2
-; CHECK-NEXT: v_mul_lo_u32 v4, s6, v4
-; CHECK-NEXT: v_mul_lo_u32 v6, s6, v2
-; CHECK-NEXT: v_mul_hi_u32 v2, s6, v2
+; CHECK-NEXT: v_mul_lo_u32 v4, s4, v4
+; CHECK-NEXT: v_mul_lo_u32 v6, s4, v2
+; CHECK-NEXT: v_mul_hi_u32 v2, s4, v2
; CHECK-NEXT: v_add_i32_e32 v4, vcc, v5, v4
; CHECK-NEXT: v_add_i32_e32 v2, vcc, v4, v2
; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v6
; CHECK-NEXT: v_subb_u32_e64 v4, s[4:5], v1, v2, vcc
; CHECK-NEXT: v_sub_i32_e64 v1, s[4:5], v1, v2
-; CHECK-NEXT: v_cmp_le_u32_e64 s[4:5], s6, v0
+; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v7
; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, -1, s[4:5]
-; CHECK-NEXT: v_mov_b32_e32 v5, s7
+; CHECK-NEXT: v_mov_b32_e32 v5, s6
; CHECK-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v4
; CHECK-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
; CHECK-NEXT: v_cndmask_b32_e64 v2, v5, v2, s[4:5]
-; CHECK-NEXT: v_subrev_i32_e32 v5, vcc, s6, v0
+; CHECK-NEXT: v_sub_i32_e32 v5, vcc, v0, v7
; CHECK-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
; CHECK-NEXT: s_bfe_i32 s4, -1, 0x10000
-; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s6, v5
+; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v5, v7
; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, -1, vcc
-; CHECK-NEXT: v_mov_b32_e32 v7, s4
+; CHECK-NEXT: v_mov_b32_e32 v8, s4
; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
-; CHECK-NEXT: v_cndmask_b32_e32 v6, v7, v6, vcc
-; CHECK-NEXT: v_subrev_i32_e32 v7, vcc, s6, v5
+; CHECK-NEXT: v_cndmask_b32_e32 v6, v8, v6, vcc
+; CHECK-NEXT: v_sub_i32_e32 v7, vcc, v5, v7
; CHECK-NEXT: v_subbrev_u32_e32 v8, vcc, 0, v1, vcc
; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v6
; CHECK-NEXT: v_cndmask_b32_e32 v5, v5, v7, vcc
@@ -2171,41 +2175,166 @@ define <2 x i64> @v_srem_v2i64_oddk_denom(<2 x i64> %num) {
; CGP-LABEL: v_srem_v2i64_oddk_denom:
; CGP: ; %bb.0:
; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CGP-NEXT: v_cvt_f32_u32_e32 v5, 0x12d8fb
-; CGP-NEXT: v_cvt_f32_ubyte0_e32 v6, 0
-; CGP-NEXT: s_mov_b32 s6, 0xffed2705
-; CGP-NEXT: s_mov_b32 s7, 0x12d8fb
-; CGP-NEXT: v_mov_b32_e32 v4, v5
-; CGP-NEXT: v_mac_f32_e32 v4, 0x4f800000, v6
-; CGP-NEXT: v_rcp_iflag_f32_e32 v7, v4
+; CGP-NEXT: s_mov_b32 s6, 0x12d8fb
+; CGP-NEXT: v_cvt_f32_u32_e32 v4, s6
+; CGP-NEXT: v_cvt_f32_ubyte0_e32 v5, 0
+; CGP-NEXT: s_mov_b32 s7, 0xffed2705
+; CGP-NEXT: s_bfe_i32 s8, -1, 0x10000
+; CGP-NEXT: v_mac_f32_e32 v4, 0x4f800000, v5
+; CGP-NEXT: v_rcp_iflag_f32_e32 v5, v4
; CGP-NEXT: v_ashrrev_i32_e32 v4, 31, v1
; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v4
-; CGP-NEXT: v_mul_f32_e32 v7, 0x5f7ffffc, v7
-; CGP-NEXT: v_mul_f32_e32 v8, 0x2f800000, v7
-; CGP-NEXT: v_trunc_f32_e32 v8, v8
-; CGP-NEXT: v_mac_f32_e32 v7, 0xcf800000, v8
-; CGP-NEXT: v_cvt_u32_f32_e32 v7, v7
-; CGP-NEXT: v_cvt_u32_f32_e32 v8, v8
+; CGP-NEXT: v_mul_f32_e32 v5, 0x5f7ffffc, v5
+; CGP-NEXT: v_mul_f32_e32 v6, 0x2f800000, v5
+; CGP-NEXT: v_trunc_f32_e32 v6, v6
+; CGP-NEXT: v_mac_f32_e32 v5, 0xcf800000, v6
+; CGP-NEXT: v_cvt_u32_f32_e32 v5, v5
+; CGP-NEXT: v_cvt_u32_f32_e32 v6, v6
; CGP-NEXT: v_addc_u32_e32 v1, vcc, v1, v4, vcc
-; CGP-NEXT: v_mul_lo_u32 v9, -1, v7
-; CGP-NEXT: v_mul_lo_u32 v10, s6, v8
-; CGP-NEXT: v_mul_hi_u32 v12, s6, v7
-; CGP-NEXT: v_mul_lo_u32 v11, s6, v7
+; CGP-NEXT: v_mul_lo_u32 v7, -1, v5
+; CGP-NEXT: v_mul_lo_u32 v8, s7, v6
+; CGP-NEXT: v_mul_hi_u32 v10, s7, v5
+; CGP-NEXT: v_mul_lo_u32 v9, s7, v5
; CGP-NEXT: v_xor_b32_e32 v0, v0, v4
+; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v8
+; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v10
+; CGP-NEXT: v_mul_lo_u32 v8, v6, v9
+; CGP-NEXT: v_mul_lo_u32 v10, v5, v7
+; CGP-NEXT: v_mul_hi_u32 v11, v5, v9
+; CGP-NEXT: v_mul_hi_u32 v9, v6, v9
+; CGP-NEXT: v_xor_b32_e32 v1, v1, v4
+; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v10
+; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
+; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v11
+; CGP-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
+; CGP-NEXT: v_mul_lo_u32 v11, v6, v7
+; CGP-NEXT: v_add_i32_e32 v8, vcc, v10, v8
+; CGP-NEXT: v_mul_hi_u32 v10, v5, v7
+; CGP-NEXT: v_add_i32_e32 v9, vcc, v11, v9
+; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
+; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v10
+; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
+; CGP-NEXT: v_add_i32_e32 v10, vcc, v11, v10
+; CGP-NEXT: v_mul_hi_u32 v7, v6, v7
+; CGP-NEXT: v_add_i32_e32 v8, vcc, v9, v8
+; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
+; CGP-NEXT: v_add_i32_e32 v9, vcc, v10, v9
+; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v9
+; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v8
+; CGP-NEXT: v_addc_u32_e32 v6, vcc, v6, v7, vcc
+; CGP-NEXT: v_mul_lo_u32 v7, -1, v5
+; CGP-NEXT: v_mul_lo_u32 v8, s7, v6
+; CGP-NEXT: v_mul_hi_u32 v10, s7, v5
+; CGP-NEXT: v_mul_lo_u32 v9, s7, v5
+; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v8
+; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v10
+; CGP-NEXT: v_mul_lo_u32 v8, v6, v9
+; CGP-NEXT: v_mul_lo_u32 v10, v5, v7
+; CGP-NEXT: v_mul_hi_u32 v11, v5, v9
+; CGP-NEXT: v_mul_hi_u32 v9, v6, v9
+; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v10
+; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
+; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v11
+; CGP-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
+; CGP-NEXT: v_mul_lo_u32 v11, v6, v7
+; CGP-NEXT: v_add_i32_e32 v8, vcc, v10, v8
+; CGP-NEXT: v_mul_hi_u32 v10, v5, v7
+; CGP-NEXT: v_add_i32_e32 v9, vcc, v11, v9
+; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
+; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v10
+; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
+; CGP-NEXT: v_add_i32_e32 v10, vcc, v11, v10
+; CGP-NEXT: v_mul_hi_u32 v7, v6, v7
+; CGP-NEXT: v_add_i32_e32 v8, vcc, v9, v8
+; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
+; CGP-NEXT: v_add_i32_e32 v9, vcc, v10, v9
+; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v9
+; CGP-NEXT: v_add_i32_e32 v8, vcc, v5, v8
+; CGP-NEXT: v_addc_u32_e32 v6, vcc, v6, v7, vcc
+; CGP-NEXT: v_mul_lo_u32 v7, v1, v8
+; CGP-NEXT: v_mul_lo_u32 v9, v0, v6
+; CGP-NEXT: v_mul_hi_u32 v10, v0, v8
+; CGP-NEXT: v_mul_hi_u32 v8, v1, v8
+; CGP-NEXT: v_mov_b32_e32 v5, 0x12d8fb
+; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v9
+; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
+; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v10
+; CGP-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
+; CGP-NEXT: v_mul_lo_u32 v10, v1, v6
+; CGP-NEXT: v_add_i32_e32 v7, vcc, v9, v7
+; CGP-NEXT: v_mul_hi_u32 v9, v0, v6
+; CGP-NEXT: v_add_i32_e32 v8, vcc, v10, v8
+; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
+; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v9
+; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
+; CGP-NEXT: v_add_i32_e32 v9, vcc, v10, v9
+; CGP-NEXT: v_mul_hi_u32 v6, v1, v6
+; CGP-NEXT: v_add_i32_e32 v7, vcc, v8, v7
+; CGP-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
+; CGP-NEXT: v_add_i32_e32 v8, vcc, v9, v8
+; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v8
+; CGP-NEXT: v_mul_lo_u32 v8, 0, v7
+; CGP-NEXT: v_mul_lo_u32 v6, s6, v6
+; CGP-NEXT: v_mul_lo_u32 v9, s6, v7
+; CGP-NEXT: v_mul_hi_u32 v7, s6, v7
+; CGP-NEXT: v_add_i32_e32 v6, vcc, v8, v6
+; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v7
+; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v9
+; CGP-NEXT: v_subb_u32_e64 v7, s[4:5], v1, v6, vcc
+; CGP-NEXT: v_sub_i32_e64 v1, s[4:5], v1, v6
+; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v5
+; CGP-NEXT: v_cndmask_b32_e64 v6, 0, -1, s[4:5]
+; CGP-NEXT: v_mov_b32_e32 v8, s8
+; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v7
+; CGP-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
+; CGP-NEXT: v_cndmask_b32_e64 v6, v8, v6, s[4:5]
+; CGP-NEXT: v_sub_i32_e32 v8, vcc, v0, v5
+; CGP-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
+; CGP-NEXT: s_bfe_i32 s4, -1, 0x10000
+; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v8, v5
+; CGP-NEXT: v_cndmask_b32_e64 v9, 0, -1, vcc
+; CGP-NEXT: v_mov_b32_e32 v10, s4
+; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
+; CGP-NEXT: v_cndmask_b32_e32 v9, v10, v9, vcc
+; CGP-NEXT: v_sub_i32_e32 v10, vcc, v8, v5
+; CGP-NEXT: v_subbrev_u32_e32 v11, vcc, 0, v1, vcc
+; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v9
+; CGP-NEXT: v_cvt_f32_u32_e32 v9, v5
+; CGP-NEXT: v_cndmask_b32_e32 v8, v8, v10, vcc
+; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v11, vcc
+; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v6
+; CGP-NEXT: v_cvt_f32_ubyte0_e32 v6, 0
+; CGP-NEXT: v_mac_f32_e32 v9, 0x4f800000, v6
+; CGP-NEXT: v_rcp_iflag_f32_e32 v6, v9
+; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc
+; CGP-NEXT: v_cndmask_b32_e32 v1, v7, v1, vcc
+; CGP-NEXT: v_ashrrev_i32_e32 v7, 31, v3
+; CGP-NEXT: v_mul_f32_e32 v6, 0x5f7ffffc, v6
+; CGP-NEXT: v_mul_f32_e32 v8, 0x2f800000, v6
+; CGP-NEXT: v_trunc_f32_e32 v8, v8
+; CGP-NEXT: v_mac_f32_e32 v6, 0xcf800000, v8
+; CGP-NEXT: v_cvt_u32_f32_e32 v6, v6
+; CGP-NEXT: v_cvt_u32_f32_e32 v8, v8
+; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v7
+; CGP-NEXT: v_mul_lo_u32 v9, -1, v6
+; CGP-NEXT: v_mul_lo_u32 v10, s7, v8
+; CGP-NEXT: v_mul_hi_u32 v12, s7, v6
+; CGP-NEXT: v_mul_lo_u32 v11, s7, v6
+; CGP-NEXT: v_addc_u32_e32 v3, vcc, v3, v7, vcc
; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v10
; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v12
; CGP-NEXT: v_mul_lo_u32 v10, v8, v11
-; CGP-NEXT: v_mul_lo_u32 v12, v7, v9
-; CGP-NEXT: v_mul_hi_u32 v13, v7, v11
+; CGP-NEXT: v_mul_lo_u32 v12, v6, v9
+; CGP-NEXT: v_mul_hi_u32 v13, v6, v11
; CGP-NEXT: v_mul_hi_u32 v11, v8, v11
-; CGP-NEXT: v_xor_b32_e32 v1, v1, v4
+; CGP-NEXT: v_xor_b32_e32 v0, v0, v4
; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v12
; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc
; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v13
; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
; CGP-NEXT: v_mul_lo_u32 v13, v8, v9
; CGP-NEXT: v_add_i32_e32 v10, vcc, v12, v10
-; CGP-NEXT: v_mul_hi_u32 v12, v7, v9
+; CGP-NEXT: v_mul_hi_u32 v12, v6, v9
; CGP-NEXT: v_add_i32_e32 v11, vcc, v13, v11
; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc
; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v12
@@ -2216,27 +2345,27 @@ define <2 x i64> @v_srem_v2i64_oddk_denom(<2 x i64> %num) {
; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
; CGP-NEXT: v_add_i32_e32 v11, vcc, v12, v11
; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v11
-; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v10
+; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v10
; CGP-NEXT: v_addc_u32_e32 v8, vcc, v8, v9, vcc
-; CGP-NEXT: v_mul_lo_u32 v9, -1, v7
-; CGP-NEXT: v_mul_lo_u32 v10, s6, v8
-; CGP-NEXT: v_mul_hi_u32 v12, s6, v7
-; CGP-NEXT: v_mul_lo_u32 v11, s6, v7
-; CGP-NEXT: s_bfe_i32 s8, -1, 0x10000
+; CGP-NEXT: v_mul_lo_u32 v9, -1, v6
+; CGP-NEXT: v_mul_lo_u32 v10, s7, v8
+; CGP-NEXT: v_mul_hi_u32 v12, s7, v6
+; CGP-NEXT: v_mul_lo_u32 v11, s7, v6
+; CGP-NEXT: v_xor_b32_e32 v2, v2, v7
; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v10
; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v12
; CGP-NEXT: v_mul_lo_u32 v10, v8, v11
-; CGP-NEXT: v_mul_lo_u32 v12, v7, v9
-; CGP-NEXT: v_mul_hi_u32 v13, v7, v11
+; CGP-NEXT: v_mul_lo_u32 v12, v6, v9
+; CGP-NEXT: v_mul_hi_u32 v13, v6, v11
; CGP-NEXT: v_mul_hi_u32 v11, v8, v11
-; CGP-NEXT: v_mac_f32_e32 v5, 0x4f800000, v6
+; CGP-NEXT: v_xor_b32_e32 v3, v3, v7
; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v12
; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc
; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v13
; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
; CGP-NEXT: v_mul_lo_u32 v13, v8, v9
; CGP-NEXT: v_add_i32_e32 v10, vcc, v12, v10
-; CGP-NEXT: v_mul_hi_u32 v12, v7, v9
+; CGP-NEXT: v_mul_hi_u32 v12, v6, v9
; CGP-NEXT: v_add_i32_e32 v11, vcc, v13, v11
; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc
; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v12
@@ -2247,191 +2376,68 @@ define <2 x i64> @v_srem_v2i64_oddk_denom(<2 x i64> %num) {
; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
; CGP-NEXT: v_add_i32_e32 v11, vcc, v12, v11
; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v11
-; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v10
+; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v10
; CGP-NEXT: v_addc_u32_e32 v8, vcc, v8, v9, vcc
-; CGP-NEXT: v_mul_lo_u32 v9, v1, v7
-; CGP-NEXT: v_mul_lo_u32 v10, v0, v8
-; CGP-NEXT: v_mul_hi_u32 v11, v0, v7
-; CGP-NEXT: v_mul_hi_u32 v7, v1, v7
-; CGP-NEXT: v_rcp_iflag_f32_e32 v5, v5
-; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v10
-; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v11
-; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
-; CGP-NEXT: v_mul_lo_u32 v11, v1, v8
-; CGP-NEXT: v_add_i32_e32 v9, vcc, v10, v9
-; CGP-NEXT: v_mul_hi_u32 v10, v0, v8
-; CGP-NEXT: v_add_i32_e32 v7, vcc, v11, v7
-; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v10
-; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v10, vcc, v11, v10
-; CGP-NEXT: v_mul_hi_u32 v8, v1, v8
-; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v9
-; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v9, vcc, v10, v9
-; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v9
-; CGP-NEXT: v_mul_lo_u32 v9, 0, v7
-; CGP-NEXT: v_mul_lo_u32 v8, s7, v8
-; CGP-NEXT: v_mul_lo_u32 v10, s7, v7
-; CGP-NEXT: v_mul_hi_u32 v7, s7, v7
-; CGP-NEXT: v_mul_f32_e32 v5, 0x5f7ffffc, v5
-; CGP-NEXT: v_add_i32_e32 v8, vcc, v9, v8
-; CGP-NEXT: v_add_i32_e32 v7, vcc, v8, v7
-; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v10
-; CGP-NEXT: v_subb_u32_e64 v8, s[4:5], v1, v7, vcc
-; CGP-NEXT: v_sub_i32_e64 v1, s[4:5], v1, v7
-; CGP-NEXT: v_cmp_le_u32_e64 s[4:5], s7, v0
-; CGP-NEXT: v_cndmask_b32_e64 v7, 0, -1, s[4:5]
-; CGP-NEXT: v_mov_b32_e32 v9, s8
-; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v8
-; CGP-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
-; CGP-NEXT: v_cndmask_b32_e64 v7, v9, v7, s[4:5]
-; CGP-NEXT: v_subrev_i32_e32 v9, vcc, s7, v0
-; CGP-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
-; CGP-NEXT: s_bfe_i32 s4, -1, 0x10000
-; CGP-NEXT: v_cmp_le_u32_e32 vcc, s7, v9
-; CGP-NEXT: v_cndmask_b32_e64 v10, 0, -1, vcc
-; CGP-NEXT: v_mov_b32_e32 v11, s4
-; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
-; CGP-NEXT: v_cndmask_b32_e32 v10, v11, v10, vcc
-; CGP-NEXT: v_subrev_i32_e32 v11, vcc, s7, v9
-; CGP-NEXT: v_subbrev_u32_e32 v12, vcc, 0, v1, vcc
-; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v10
-; CGP-NEXT: v_cndmask_b32_e32 v9, v9, v11, vcc
-; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v12, vcc
-; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v7
-; CGP-NEXT: v_mul_f32_e32 v7, 0x2f800000, v5
-; CGP-NEXT: v_trunc_f32_e32 v7, v7
-; CGP-NEXT: v_mac_f32_e32 v5, 0xcf800000, v7
-; CGP-NEXT: v_cvt_u32_f32_e32 v5, v5
-; CGP-NEXT: v_cvt_u32_f32_e32 v7, v7
-; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v9, vcc
-; CGP-NEXT: v_cndmask_b32_e32 v1, v8, v1, vcc
-; CGP-NEXT: v_mul_lo_u32 v8, -1, v5
-; CGP-NEXT: v_mul_lo_u32 v9, s6, v7
-; CGP-NEXT: v_mul_hi_u32 v11, s6, v5
-; CGP-NEXT: v_ashrrev_i32_e32 v6, 31, v3
-; CGP-NEXT: v_mul_lo_u32 v10, s6, v5
-; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v6
-; CGP-NEXT: v_addc_u32_e32 v3, vcc, v3, v6, vcc
-; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v9
-; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v11
-; CGP-NEXT: v_mul_lo_u32 v9, v7, v10
-; CGP-NEXT: v_mul_lo_u32 v11, v5, v8
-; CGP-NEXT: v_mul_hi_u32 v12, v5, v10
-; CGP-NEXT: v_mul_hi_u32 v10, v7, v10
-; CGP-NEXT: v_xor_b32_e32 v0, v0, v4
-; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v11
-; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v12
-; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
-; CGP-NEXT: v_mul_lo_u32 v12, v7, v8
-; CGP-NEXT: v_add_i32_e32 v9, vcc, v11, v9
-; CGP-NEXT: v_mul_hi_u32 v11, v5, v8
-; CGP-NEXT: v_add_i32_e32 v10, vcc, v12, v10
-; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v11
-; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v11, vcc, v12, v11
-; CGP-NEXT: v_mul_hi_u32 v8, v7, v8
-; CGP-NEXT: v_add_i32_e32 v9, vcc, v10, v9
-; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v10, vcc, v11, v10
-; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v10
-; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v9
-; CGP-NEXT: v_addc_u32_e32 v7, vcc, v7, v8, vcc
-; CGP-NEXT: v_mul_lo_u32 v8, -1, v5
-; CGP-NEXT: v_mul_lo_u32 v9, s6, v7
-; CGP-NEXT: v_mul_hi_u32 v11, s6, v5
-; CGP-NEXT: v_mul_lo_u32 v10, s6, v5
-; CGP-NEXT: v_xor_b32_e32 v2, v2, v6
-; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v9
-; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v11
-; CGP-NEXT: v_mul_lo_u32 v9, v7, v10
-; CGP-NEXT: v_mul_lo_u32 v11, v5, v8
-; CGP-NEXT: v_mul_hi_u32 v12, v5, v10
-; CGP-NEXT: v_mul_hi_u32 v10, v7, v10
-; CGP-NEXT: v_xor_b32_e32 v3, v3, v6
-; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v11
-; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v12
-; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
-; CGP-NEXT: v_mul_lo_u32 v12, v7, v8
-; CGP-NEXT: v_add_i32_e32 v9, vcc, v11, v9
-; CGP-NEXT: v_mul_hi_u32 v11, v5, v8
-; CGP-NEXT: v_add_i32_e32 v10, vcc, v12, v10
-; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v11
-; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v11, vcc, v12, v11
-; CGP-NEXT: v_mul_hi_u32 v8, v7, v8
-; CGP-NEXT: v_add_i32_e32 v9, vcc, v10, v9
-; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v10, vcc, v11, v10
-; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v10
-; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v9
-; CGP-NEXT: v_addc_u32_e32 v7, vcc, v7, v8, vcc
; CGP-NEXT: v_xor_b32_e32 v1, v1, v4
-; CGP-NEXT: v_mul_lo_u32 v8, v3, v5
-; CGP-NEXT: v_mul_lo_u32 v9, v2, v7
+; CGP-NEXT: v_mul_lo_u32 v9, v3, v6
+; CGP-NEXT: v_mul_lo_u32 v10, v2, v8
; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v4
; CGP-NEXT: v_subb_u32_e32 v1, vcc, v1, v4, vcc
-; CGP-NEXT: v_mul_hi_u32 v4, v2, v5
-; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v9
-; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v4, vcc, v8, v4
-; CGP-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
-; CGP-NEXT: v_mul_lo_u32 v8, v3, v7
-; CGP-NEXT: v_mul_hi_u32 v5, v3, v5
+; CGP-NEXT: v_mul_hi_u32 v4, v2, v6
+; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v10
+; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
; CGP-NEXT: v_add_i32_e32 v4, vcc, v9, v4
-; CGP-NEXT: v_mul_hi_u32 v9, v2, v7
-; CGP-NEXT: v_add_i32_e32 v5, vcc, v8, v5
-; CGP-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v9
+; CGP-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
+; CGP-NEXT: v_mul_lo_u32 v9, v3, v8
+; CGP-NEXT: v_mul_hi_u32 v6, v3, v6
+; CGP-NEXT: v_add_i32_e32 v4, vcc, v10, v4
+; CGP-NEXT: v_mul_hi_u32 v10, v2, v8
+; CGP-NEXT: v_add_i32_e32 v6, vcc, v9, v6
; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v9
-; CGP-NEXT: v_mul_hi_u32 v7, v3, v7
-; CGP-NEXT: v_add_i32_e32 v4, vcc, v5, v4
-; CGP-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v5, vcc, v8, v5
-; CGP-NEXT: v_add_i32_e32 v5, vcc, v7, v5
-; CGP-NEXT: v_mul_lo_u32 v7, 0, v4
-; CGP-NEXT: v_mul_lo_u32 v5, s7, v5
-; CGP-NEXT: v_mul_lo_u32 v8, s7, v4
-; CGP-NEXT: v_mul_hi_u32 v4, s7, v4
+; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v10
+; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
+; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v10
+; CGP-NEXT: v_mul_hi_u32 v8, v3, v8
+; CGP-NEXT: v_add_i32_e32 v4, vcc, v6, v4
+; CGP-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
+; CGP-NEXT: v_add_i32_e32 v6, vcc, v9, v6
+; CGP-NEXT: v_add_i32_e32 v6, vcc, v8, v6
+; CGP-NEXT: v_mul_lo_u32 v8, 0, v4
+; CGP-NEXT: v_mul_lo_u32 v6, s6, v6
+; CGP-NEXT: v_mul_lo_u32 v9, s6, v4
+; CGP-NEXT: v_mul_hi_u32 v4, s6, v4
; CGP-NEXT: s_bfe_i32 s6, -1, 0x10000
-; CGP-NEXT: v_add_i32_e32 v5, vcc, v7, v5
-; CGP-NEXT: v_add_i32_e32 v4, vcc, v5, v4
-; CGP-NEXT: v_sub_i32_e32 v2, vcc, v2, v8
-; CGP-NEXT: v_subb_u32_e64 v5, s[4:5], v3, v4, vcc
+; CGP-NEXT: v_add_i32_e32 v6, vcc, v8, v6
+; CGP-NEXT: v_add_i32_e32 v4, vcc, v6, v4
+; CGP-NEXT: v_sub_i32_e32 v2, vcc, v2, v9
+; CGP-NEXT: v_subb_u32_e64 v6, s[4:5], v3, v4, vcc
; CGP-NEXT: v_sub_i32_e64 v3, s[4:5], v3, v4
-; CGP-NEXT: v_cmp_le_u32_e64 s[4:5], s7, v2
+; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v2, v5
; CGP-NEXT: v_cndmask_b32_e64 v4, 0, -1, s[4:5]
-; CGP-NEXT: v_mov_b32_e32 v7, s6
-; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v5
+; CGP-NEXT: v_mov_b32_e32 v8, s6
+; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v6
; CGP-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc
-; CGP-NEXT: v_cndmask_b32_e64 v4, v7, v4, s[4:5]
-; CGP-NEXT: v_subrev_i32_e32 v7, vcc, s7, v2
+; CGP-NEXT: v_cndmask_b32_e64 v4, v8, v4, s[4:5]
+; CGP-NEXT: v_sub_i32_e32 v8, vcc, v2, v5
; CGP-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc
; CGP-NEXT: s_bfe_i32 s4, -1, 0x10000
-; CGP-NEXT: v_cmp_le_u32_e32 vcc, s7, v7
-; CGP-NEXT: v_cndmask_b32_e64 v8, 0, -1, vcc
-; CGP-NEXT: v_mov_b32_e32 v9, s4
+; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v8, v5
+; CGP-NEXT: v_cndmask_b32_e64 v9, 0, -1, vcc
+; CGP-NEXT: v_mov_b32_e32 v10, s4
; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3
-; CGP-NEXT: v_cndmask_b32_e32 v8, v9, v8, vcc
-; CGP-NEXT: v_subrev_i32_e32 v9, vcc, s7, v7
+; CGP-NEXT: v_cndmask_b32_e32 v9, v10, v9, vcc
+; CGP-NEXT: v_sub_i32_e32 v5, vcc, v8, v5
; CGP-NEXT: v_subbrev_u32_e32 v10, vcc, 0, v3, vcc
-; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v8
-; CGP-NEXT: v_cndmask_b32_e32 v7, v7, v9, vcc
+; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v9
+; CGP-NEXT: v_cndmask_b32_e32 v5, v8, v5, vcc
; CGP-NEXT: v_cndmask_b32_e32 v3, v3, v10, vcc
; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4
-; CGP-NEXT: v_cndmask_b32_e32 v2, v2, v7, vcc
-; CGP-NEXT: v_cndmask_b32_e32 v3, v5, v3, vcc
-; CGP-NEXT: v_xor_b32_e32 v2, v2, v6
-; CGP-NEXT: v_xor_b32_e32 v3, v3, v6
-; CGP-NEXT: v_sub_i32_e32 v2, vcc, v2, v6
-; CGP-NEXT: v_subb_u32_e32 v3, vcc, v3, v6, vcc
+; CGP-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc
+; CGP-NEXT: v_cndmask_b32_e32 v3, v6, v3, vcc
+; CGP-NEXT: v_xor_b32_e32 v2, v2, v7
+; CGP-NEXT: v_xor_b32_e32 v3, v3, v7
+; CGP-NEXT: v_sub_i32_e32 v2, vcc, v2, v7
+; CGP-NEXT: v_subb_u32_e32 v3, vcc, v3, v7, vcc
; CGP-NEXT: s_setpc_b64 s[30:31]
%result = srem <2 x i64> %num, <i64 1235195, i64 1235195>
ret <2 x i64> %result
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll
index ce21d2546b8a0..fdf5ba53330c1 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll
@@ -968,129 +968,130 @@ define i64 @v_urem_i64_oddk_denom(i64 %num) {
; CHECK-LABEL: v_urem_i64_oddk_denom:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CHECK-NEXT: v_cvt_f32_u32_e32 v2, 0x12d8fb
+; CHECK-NEXT: s_mov_b32 s4, 0x12d8fb
+; CHECK-NEXT: v_mov_b32_e32 v2, 0x12d8fb
; CHECK-NEXT: v_cvt_f32_ubyte0_e32 v3, 0
-; CHECK-NEXT: s_mov_b32 s4, 0xffed2705
-; CHECK-NEXT: s_mov_b32 s6, 0x12d8fb
-; CHECK-NEXT: s_bfe_i32 s5, -1, 0x10000
+; CHECK-NEXT: s_mov_b32 s5, 0xffed2705
+; CHECK-NEXT: s_bfe_i32 s6, -1, 0x10000
; CHECK-NEXT: s_bfe_i32 s7, -1, 0x10000
-; CHECK-NEXT: v_mac_f32_e32 v2, 0x4f800000, v3
-; CHECK-NEXT: v_mov_b32_e32 v3, s5
-; CHECK-NEXT: v_mov_b32_e32 v4, s7
-; CHECK-NEXT: v_rcp_iflag_f32_e32 v2, v2
-; CHECK-NEXT: v_mul_f32_e32 v2, 0x5f7ffffc, v2
-; CHECK-NEXT: v_mul_f32_e32 v5, 0x2f800000, v2
-; CHECK-NEXT: v_trunc_f32_e32 v5, v5
-; CHECK-NEXT: v_mac_f32_e32 v2, 0xcf800000, v5
-; CHECK-NEXT: v_cvt_u32_f32_e32 v5, v5
-; CHECK-NEXT: v_cvt_u32_f32_e32 v2, v2
-; CHECK-NEXT: v_mul_lo_u32 v6, s4, v5
-; CHECK-NEXT: v_mul_lo_u32 v7, s4, v2
-; CHECK-NEXT: v_mul_lo_u32 v8, -1, v2
-; CHECK-NEXT: v_mul_hi_u32 v9, s4, v2
-; CHECK-NEXT: v_add_i32_e32 v6, vcc, v8, v6
-; CHECK-NEXT: v_mul_lo_u32 v8, v5, v7
-; CHECK-NEXT: v_mul_hi_u32 v10, v2, v7
-; CHECK-NEXT: v_mul_hi_u32 v7, v5, v7
-; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v9
-; CHECK-NEXT: v_mul_lo_u32 v9, v2, v6
-; CHECK-NEXT: v_mul_lo_u32 v11, v5, v6
-; CHECK-NEXT: v_mul_hi_u32 v12, v2, v6
-; CHECK-NEXT: v_mul_hi_u32 v6, v5, v6
-; CHECK-NEXT: v_add_i32_e32 v8, vcc, v8, v9
+; CHECK-NEXT: v_cvt_f32_u32_e32 v4, s4
+; CHECK-NEXT: v_mov_b32_e32 v5, s6
+; CHECK-NEXT: v_mov_b32_e32 v6, s7
+; CHECK-NEXT: v_mac_f32_e32 v4, 0x4f800000, v3
+; CHECK-NEXT: v_rcp_iflag_f32_e32 v3, v4
+; CHECK-NEXT: v_mul_f32_e32 v3, 0x5f7ffffc, v3
+; CHECK-NEXT: v_mul_f32_e32 v4, 0x2f800000, v3
+; CHECK-NEXT: v_trunc_f32_e32 v4, v4
+; CHECK-NEXT: v_mac_f32_e32 v3, 0xcf800000, v4
+; CHECK-NEXT: v_cvt_u32_f32_e32 v4, v4
+; CHECK-NEXT: v_cvt_u32_f32_e32 v3, v3
+; CHECK-NEXT: v_mul_lo_u32 v7, s5, v4
+; CHECK-NEXT: v_mul_lo_u32 v8, s5, v3
+; CHECK-NEXT: v_mul_lo_u32 v9, -1, v3
+; CHECK-NEXT: v_mul_hi_u32 v10, s5, v3
+; CHECK-NEXT: v_add_i32_e32 v7, vcc, v9, v7
+; CHECK-NEXT: v_mul_lo_u32 v9, v4, v8
+; CHECK-NEXT: v_mul_hi_u32 v11, v3, v8
+; CHECK-NEXT: v_mul_hi_u32 v8, v4, v8
+; CHECK-NEXT: v_add_i32_e32 v7, vcc, v7, v10
+; CHECK-NEXT: v_mul_lo_u32 v10, v3, v7
+; CHECK-NEXT: v_mul_lo_u32 v12, v4, v7
+; CHECK-NEXT: v_mul_hi_u32 v13, v3, v7
+; CHECK-NEXT: v_mul_hi_u32 v7, v4, v7
+; CHECK-NEXT: v_add_i32_e32 v9, vcc, v9, v10
+; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
+; CHECK-NEXT: v_add_i32_e32 v8, vcc, v12, v8
+; CHECK-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc
+; CHECK-NEXT: v_add_i32_e32 v9, vcc, v9, v11
; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
-; CHECK-NEXT: v_add_i32_e32 v7, vcc, v11, v7
+; CHECK-NEXT: v_add_i32_e32 v8, vcc, v8, v13
; CHECK-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
+; CHECK-NEXT: v_add_i32_e32 v9, vcc, v10, v9
+; CHECK-NEXT: v_add_i32_e32 v10, vcc, v12, v11
+; CHECK-NEXT: v_add_i32_e32 v8, vcc, v8, v9
+; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
+; CHECK-NEXT: v_add_i32_e32 v9, vcc, v10, v9
+; CHECK-NEXT: v_add_i32_e32 v7, vcc, v7, v9
+; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v8
+; CHECK-NEXT: v_addc_u32_e32 v4, vcc, v4, v7, vcc
+; CHECK-NEXT: v_mul_lo_u32 v7, s5, v3
+; CHECK-NEXT: v_mul_lo_u32 v8, -1, v3
+; CHECK-NEXT: v_mul_hi_u32 v9, s5, v3
+; CHECK-NEXT: v_mul_lo_u32 v10, s5, v4
+; CHECK-NEXT: v_mul_lo_u32 v11, v4, v7
+; CHECK-NEXT: v_mul_hi_u32 v12, v3, v7
+; CHECK-NEXT: v_mul_hi_u32 v7, v4, v7
; CHECK-NEXT: v_add_i32_e32 v8, vcc, v8, v10
-; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
-; CHECK-NEXT: v_add_i32_e32 v7, vcc, v7, v12
+; CHECK-NEXT: v_add_i32_e32 v8, vcc, v8, v9
+; CHECK-NEXT: v_mul_lo_u32 v9, v3, v8
+; CHECK-NEXT: v_mul_lo_u32 v10, v4, v8
+; CHECK-NEXT: v_mul_hi_u32 v13, v3, v8
+; CHECK-NEXT: v_mul_hi_u32 v8, v4, v8
+; CHECK-NEXT: v_add_i32_e32 v9, vcc, v11, v9
+; CHECK-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
+; CHECK-NEXT: v_add_i32_e32 v7, vcc, v10, v7
; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
-; CHECK-NEXT: v_add_i32_e32 v8, vcc, v9, v8
-; CHECK-NEXT: v_add_i32_e32 v9, vcc, v11, v10
-; CHECK-NEXT: v_add_i32_e32 v7, vcc, v7, v8
-; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
-; CHECK-NEXT: v_add_i32_e32 v8, vcc, v9, v8
-; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v8
-; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v7
-; CHECK-NEXT: v_addc_u32_e32 v5, vcc, v5, v6, vcc
-; CHECK-NEXT: v_mul_lo_u32 v6, s4, v2
-; CHECK-NEXT: v_mul_lo_u32 v7, -1, v2
-; CHECK-NEXT: v_mul_hi_u32 v8, s4, v2
-; CHECK-NEXT: v_mul_lo_u32 v9, s4, v5
-; CHECK-NEXT: v_mul_lo_u32 v10, v5, v6
-; CHECK-NEXT: v_mul_hi_u32 v11, v2, v6
-; CHECK-NEXT: v_mul_hi_u32 v6, v5, v6
+; CHECK-NEXT: v_add_i32_e32 v9, vcc, v9, v12
+; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
+; CHECK-NEXT: v_add_i32_e32 v7, vcc, v7, v13
+; CHECK-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc
+; CHECK-NEXT: v_add_i32_e32 v9, vcc, v11, v9
+; CHECK-NEXT: v_add_i32_e32 v10, vcc, v10, v12
; CHECK-NEXT: v_add_i32_e32 v7, vcc, v7, v9
-; CHECK-NEXT: v_add_i32_e32 v7, vcc, v7, v8
-; CHECK-NEXT: v_mul_lo_u32 v8, v2, v7
-; CHECK-NEXT: v_mul_lo_u32 v9, v5, v7
-; CHECK-NEXT: v_mul_hi_u32 v12, v2, v7
-; CHECK-NEXT: v_mul_hi_u32 v7, v5, v7
-; CHECK-NEXT: v_add_i32_e32 v8, vcc, v10, v8
-; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
-; CHECK-NEXT: v_add_i32_e32 v6, vcc, v9, v6
; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
-; CHECK-NEXT: v_add_i32_e32 v8, vcc, v8, v11
-; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
-; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v12
-; CHECK-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
-; CHECK-NEXT: v_add_i32_e32 v8, vcc, v10, v8
-; CHECK-NEXT: v_add_i32_e32 v9, vcc, v9, v11
-; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v8
-; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
-; CHECK-NEXT: v_add_i32_e32 v8, vcc, v9, v8
-; CHECK-NEXT: v_add_i32_e32 v7, vcc, v7, v8
-; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v6
-; CHECK-NEXT: v_addc_u32_e32 v5, vcc, v5, v7, vcc
-; CHECK-NEXT: v_mul_lo_u32 v6, v1, v2
-; CHECK-NEXT: v_mul_hi_u32 v7, v0, v2
-; CHECK-NEXT: v_mul_hi_u32 v2, v1, v2
-; CHECK-NEXT: v_mul_lo_u32 v8, v0, v5
-; CHECK-NEXT: v_mul_lo_u32 v9, v1, v5
-; CHECK-NEXT: v_mul_hi_u32 v10, v0, v5
-; CHECK-NEXT: v_mul_hi_u32 v5, v1, v5
-; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v8
-; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
-; CHECK-NEXT: v_add_i32_e32 v2, vcc, v9, v2
+; CHECK-NEXT: v_add_i32_e32 v9, vcc, v10, v9
+; CHECK-NEXT: v_add_i32_e32 v8, vcc, v8, v9
+; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v7
+; CHECK-NEXT: v_addc_u32_e32 v4, vcc, v4, v8, vcc
+; CHECK-NEXT: v_mul_lo_u32 v7, v1, v3
+; CHECK-NEXT: v_mul_hi_u32 v8, v0, v3
+; CHECK-NEXT: v_mul_hi_u32 v3, v1, v3
+; CHECK-NEXT: v_mul_lo_u32 v9, v0, v4
+; CHECK-NEXT: v_mul_lo_u32 v10, v1, v4
+; CHECK-NEXT: v_mul_hi_u32 v11, v0, v4
+; CHECK-NEXT: v_mul_hi_u32 v4, v1, v4
+; CHECK-NEXT: v_add_i32_e32 v7, vcc, v7, v9
; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
-; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v7
-; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
-; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v10
+; CHECK-NEXT: v_add_i32_e32 v3, vcc, v10, v3
+; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
+; CHECK-NEXT: v_add_i32_e32 v7, vcc, v7, v8
; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
-; CHECK-NEXT: v_add_i32_e32 v6, vcc, v8, v6
+; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v11
+; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
; CHECK-NEXT: v_add_i32_e32 v7, vcc, v9, v7
-; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v6
-; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
-; CHECK-NEXT: v_add_i32_e32 v6, vcc, v7, v6
-; CHECK-NEXT: v_mul_lo_u32 v7, s6, v2
-; CHECK-NEXT: v_mul_lo_u32 v8, 0, v2
-; CHECK-NEXT: v_mul_hi_u32 v2, s6, v2
-; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v6
-; CHECK-NEXT: v_mul_lo_u32 v5, s6, v5
-; CHECK-NEXT: v_add_i32_e32 v5, vcc, v8, v5
-; CHECK-NEXT: v_add_i32_e32 v2, vcc, v5, v2
-; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v7
-; CHECK-NEXT: v_subb_u32_e64 v5, s[4:5], v1, v2, vcc
-; CHECK-NEXT: v_sub_i32_e64 v1, s[4:5], v1, v2
-; CHECK-NEXT: v_cmp_le_u32_e64 s[4:5], s6, v0
-; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, -1, s[4:5]
-; CHECK-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v5
-; CHECK-NEXT: v_cndmask_b32_e64 v2, v3, v2, s[4:5]
+; CHECK-NEXT: v_add_i32_e32 v8, vcc, v10, v8
+; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v7
+; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
+; CHECK-NEXT: v_add_i32_e32 v7, vcc, v8, v7
+; CHECK-NEXT: v_mul_lo_u32 v8, s4, v3
+; CHECK-NEXT: v_mul_lo_u32 v9, 0, v3
+; CHECK-NEXT: v_mul_hi_u32 v3, s4, v3
+; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v7
+; CHECK-NEXT: v_mul_lo_u32 v4, s4, v4
+; CHECK-NEXT: v_add_i32_e32 v4, vcc, v9, v4
+; CHECK-NEXT: v_add_i32_e32 v3, vcc, v4, v3
+; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v8
+; CHECK-NEXT: v_subb_u32_e64 v4, s[4:5], v1, v3, vcc
+; CHECK-NEXT: v_sub_i32_e64 v1, s[4:5], v1, v3
+; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v2
+; CHECK-NEXT: v_cndmask_b32_e64 v3, 0, -1, s[4:5]
+; CHECK-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v4
+; CHECK-NEXT: v_cndmask_b32_e64 v3, v5, v3, s[4:5]
; CHECK-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
-; CHECK-NEXT: v_subrev_i32_e32 v3, vcc, s6, v0
+; CHECK-NEXT: v_sub_i32_e32 v5, vcc, v0, v2
; CHECK-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
-; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s6, v3
-; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, -1, vcc
+; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v5, v2
+; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, -1, vcc
; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
-; CHECK-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc
-; CHECK-NEXT: v_subrev_i32_e32 v6, vcc, s6, v3
+; CHECK-NEXT: v_cndmask_b32_e32 v6, v6, v7, vcc
+; CHECK-NEXT: v_sub_i32_e32 v2, vcc, v5, v2
; CHECK-NEXT: v_subbrev_u32_e32 v7, vcc, 0, v1, vcc
-; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4
-; CHECK-NEXT: v_cndmask_b32_e32 v3, v3, v6, vcc
+; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v6
+; CHECK-NEXT: v_cndmask_b32_e32 v2, v5, v2, vcc
; CHECK-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc
-; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2
-; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
-; CHECK-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
+; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v3
+; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
+; CHECK-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc
; CHECK-NEXT: s_setpc_b64 s[30:31]
%result = urem i64 %num, 1235195
ret i64 %result
@@ -1357,63 +1358,64 @@ define <2 x i64> @v_urem_v2i64_oddk_denom(<2 x i64> %num) {
; CGP-LABEL: v_urem_v2i64_oddk_denom:
; CGP: ; %bb.0:
; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CGP-NEXT: v_cvt_f32_u32_e32 v4, 0x12d8fb
+; CGP-NEXT: s_mov_b32 s8, 0x12d8fb
+; CGP-NEXT: v_mov_b32_e32 v4, 0x12d8fb
; CGP-NEXT: v_cvt_f32_ubyte0_e32 v5, 0
; CGP-NEXT: s_mov_b32 s6, 0xffed2705
-; CGP-NEXT: s_mov_b32 s8, 0x12d8fb
; CGP-NEXT: s_bfe_i32 s4, -1, 0x10000
; CGP-NEXT: s_bfe_i32 s5, -1, 0x10000
+; CGP-NEXT: v_cvt_f32_ubyte0_e32 v6, 0
; CGP-NEXT: s_bfe_i32 s7, -1, 0x10000
; CGP-NEXT: s_bfe_i32 s9, -1, 0x10000
-; CGP-NEXT: v_mov_b32_e32 v6, v4
-; CGP-NEXT: v_mov_b32_e32 v7, s4
-; CGP-NEXT: v_mov_b32_e32 v8, s5
-; CGP-NEXT: v_mac_f32_e32 v4, 0x4f800000, v5
-; CGP-NEXT: v_mov_b32_e32 v9, s7
-; CGP-NEXT: v_mac_f32_e32 v6, 0x4f800000, v5
-; CGP-NEXT: v_rcp_iflag_f32_e32 v4, v4
-; CGP-NEXT: v_rcp_iflag_f32_e32 v5, v6
-; CGP-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4
+; CGP-NEXT: v_cvt_f32_u32_e32 v7, s8
+; CGP-NEXT: v_mov_b32_e32 v8, s4
+; CGP-NEXT: v_mov_b32_e32 v9, s5
+; CGP-NEXT: v_cvt_f32_u32_e32 v10, v4
+; CGP-NEXT: v_mac_f32_e32 v7, 0x4f800000, v5
+; CGP-NEXT: v_mac_f32_e32 v10, 0x4f800000, v6
+; CGP-NEXT: v_rcp_iflag_f32_e32 v5, v7
+; CGP-NEXT: v_rcp_iflag_f32_e32 v6, v10
; CGP-NEXT: v_mul_f32_e32 v5, 0x5f7ffffc, v5
-; CGP-NEXT: v_mul_f32_e32 v6, 0x2f800000, v4
-; CGP-NEXT: v_mul_f32_e32 v10, 0x2f800000, v5
-; CGP-NEXT: v_trunc_f32_e32 v6, v6
+; CGP-NEXT: v_mul_f32_e32 v6, 0x5f7ffffc, v6
+; CGP-NEXT: v_mul_f32_e32 v7, 0x2f800000, v5
+; CGP-NEXT: v_mul_f32_e32 v10, 0x2f800000, v6
+; CGP-NEXT: v_trunc_f32_e32 v7, v7
; CGP-NEXT: v_trunc_f32_e32 v10, v10
-; CGP-NEXT: v_mac_f32_e32 v4, 0xcf800000, v6
-; CGP-NEXT: v_cvt_u32_f32_e32 v6, v6
-; CGP-NEXT: v_mac_f32_e32 v5, 0xcf800000, v10
+; CGP-NEXT: v_mac_f32_e32 v5, 0xcf800000, v7
+; CGP-NEXT: v_cvt_u32_f32_e32 v7, v7
+; CGP-NEXT: v_mac_f32_e32 v6, 0xcf800000, v10
; CGP-NEXT: v_cvt_u32_f32_e32 v10, v10
-; CGP-NEXT: v_cvt_u32_f32_e32 v4, v4
-; CGP-NEXT: v_mul_lo_u32 v11, s6, v6
; CGP-NEXT: v_cvt_u32_f32_e32 v5, v5
+; CGP-NEXT: v_mul_lo_u32 v11, s6, v7
+; CGP-NEXT: v_cvt_u32_f32_e32 v6, v6
; CGP-NEXT: v_mul_lo_u32 v12, s6, v10
-; CGP-NEXT: v_mul_lo_u32 v13, s6, v4
-; CGP-NEXT: v_mul_lo_u32 v14, -1, v4
-; CGP-NEXT: v_mul_hi_u32 v15, s6, v4
-; CGP-NEXT: v_mul_lo_u32 v16, s6, v5
-; CGP-NEXT: v_mul_lo_u32 v17, -1, v5
-; CGP-NEXT: v_mul_hi_u32 v18, s6, v5
+; CGP-NEXT: v_mul_lo_u32 v13, s6, v5
+; CGP-NEXT: v_mul_lo_u32 v14, -1, v5
+; CGP-NEXT: v_mul_hi_u32 v15, s6, v5
+; CGP-NEXT: v_mul_lo_u32 v16, s6, v6
+; CGP-NEXT: v_mul_lo_u32 v17, -1, v6
+; CGP-NEXT: v_mul_hi_u32 v18, s6, v6
; CGP-NEXT: v_add_i32_e32 v11, vcc, v14, v11
-; CGP-NEXT: v_mul_lo_u32 v14, v6, v13
-; CGP-NEXT: v_mul_hi_u32 v19, v4, v13
-; CGP-NEXT: v_mul_hi_u32 v13, v6, v13
+; CGP-NEXT: v_mul_lo_u32 v14, v7, v13
+; CGP-NEXT: v_mul_hi_u32 v19, v5, v13
+; CGP-NEXT: v_mul_hi_u32 v13, v7, v13
; CGP-NEXT: v_add_i32_e32 v12, vcc, v17, v12
; CGP-NEXT: v_mul_lo_u32 v17, v10, v16
; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v15
-; CGP-NEXT: v_mul_hi_u32 v15, v5, v16
+; CGP-NEXT: v_mul_hi_u32 v15, v6, v16
; CGP-NEXT: v_mul_hi_u32 v16, v10, v16
; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v18
-; CGP-NEXT: v_mul_lo_u32 v18, v5, v12
+; CGP-NEXT: v_mul_lo_u32 v18, v6, v12
; CGP-NEXT: v_add_i32_e32 v17, vcc, v17, v18
; CGP-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc
; CGP-NEXT: v_add_i32_e32 v15, vcc, v17, v15
-; CGP-NEXT: v_mul_lo_u32 v15, v4, v11
-; CGP-NEXT: v_mul_lo_u32 v17, v6, v11
+; CGP-NEXT: v_mul_lo_u32 v15, v5, v11
+; CGP-NEXT: v_mul_lo_u32 v17, v7, v11
; CGP-NEXT: v_add_i32_e64 v14, s[4:5], v14, v15
; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, s[4:5]
; CGP-NEXT: v_add_i32_e64 v14, s[4:5], v14, v19
-; CGP-NEXT: v_mul_hi_u32 v14, v4, v11
-; CGP-NEXT: v_mul_hi_u32 v11, v6, v11
+; CGP-NEXT: v_mul_hi_u32 v14, v5, v11
+; CGP-NEXT: v_mul_hi_u32 v11, v7, v11
; CGP-NEXT: v_cndmask_b32_e64 v19, 0, 1, s[4:5]
; CGP-NEXT: v_add_i32_e64 v15, s[4:5], v15, v19
; CGP-NEXT: v_mul_lo_u32 v19, v10, v12
@@ -1424,7 +1426,7 @@ define <2 x i64> @v_urem_v2i64_oddk_denom(<2 x i64> %num) {
; CGP-NEXT: v_add_i32_e64 v14, s[4:5], v17, v14
; CGP-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc
; CGP-NEXT: v_add_i32_e32 v17, vcc, v18, v17
-; CGP-NEXT: v_mul_hi_u32 v18, v5, v12
+; CGP-NEXT: v_mul_hi_u32 v18, v6, v12
; CGP-NEXT: v_mul_hi_u32 v12, v10, v12
; CGP-NEXT: v_add_i32_e32 v16, vcc, v19, v16
; CGP-NEXT: v_cndmask_b32_e64 v19, 0, 1, vcc
@@ -1439,38 +1441,38 @@ define <2 x i64> @v_urem_v2i64_oddk_denom(<2 x i64> %num) {
; CGP-NEXT: v_add_i32_e32 v15, vcc, v18, v17
; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v14
; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v15
-; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v13
-; CGP-NEXT: v_addc_u32_e32 v6, vcc, v6, v11, vcc
-; CGP-NEXT: v_mul_lo_u32 v11, s6, v4
-; CGP-NEXT: v_mul_lo_u32 v13, -1, v4
-; CGP-NEXT: v_mul_hi_u32 v14, s6, v4
-; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v16
+; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v13
+; CGP-NEXT: v_addc_u32_e32 v7, vcc, v7, v11, vcc
+; CGP-NEXT: v_mul_lo_u32 v11, s6, v5
+; CGP-NEXT: v_mul_lo_u32 v13, -1, v5
+; CGP-NEXT: v_mul_hi_u32 v14, s6, v5
+; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v16
; CGP-NEXT: v_addc_u32_e32 v10, vcc, v10, v12, vcc
-; CGP-NEXT: v_mul_lo_u32 v12, s6, v5
-; CGP-NEXT: v_mul_lo_u32 v15, -1, v5
-; CGP-NEXT: v_mul_hi_u32 v16, s6, v5
-; CGP-NEXT: v_mul_lo_u32 v17, s6, v6
-; CGP-NEXT: v_mul_lo_u32 v18, v6, v11
-; CGP-NEXT: v_mul_hi_u32 v19, v4, v11
-; CGP-NEXT: v_mul_hi_u32 v11, v6, v11
+; CGP-NEXT: v_mul_lo_u32 v12, s6, v6
+; CGP-NEXT: v_mul_lo_u32 v15, -1, v6
+; CGP-NEXT: v_mul_hi_u32 v16, s6, v6
+; CGP-NEXT: v_mul_lo_u32 v17, s6, v7
+; CGP-NEXT: v_mul_lo_u32 v18, v7, v11
+; CGP-NEXT: v_mul_hi_u32 v19, v5, v11
+; CGP-NEXT: v_mul_hi_u32 v11, v7, v11
; CGP-NEXT: v_add_i32_e32 v13, vcc, v13, v17
; CGP-NEXT: v_mul_lo_u32 v17, s6, v10
; CGP-NEXT: v_add_i32_e32 v15, vcc, v15, v17
; CGP-NEXT: v_mul_lo_u32 v17, v10, v12
; CGP-NEXT: v_add_i32_e32 v13, vcc, v13, v14
-; CGP-NEXT: v_mul_hi_u32 v14, v5, v12
+; CGP-NEXT: v_mul_hi_u32 v14, v6, v12
; CGP-NEXT: v_mul_hi_u32 v12, v10, v12
; CGP-NEXT: v_add_i32_e32 v15, vcc, v15, v16
-; CGP-NEXT: v_mul_lo_u32 v16, v5, v15
+; CGP-NEXT: v_mul_lo_u32 v16, v6, v15
; CGP-NEXT: v_add_i32_e32 v16, vcc, v17, v16
; CGP-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc
; CGP-NEXT: v_add_i32_e32 v14, vcc, v16, v14
-; CGP-NEXT: v_mul_lo_u32 v14, v4, v13
-; CGP-NEXT: v_mul_lo_u32 v16, v6, v13
+; CGP-NEXT: v_mul_lo_u32 v14, v5, v13
+; CGP-NEXT: v_mul_lo_u32 v16, v7, v13
; CGP-NEXT: v_add_i32_e64 v14, s[4:5], v18, v14
; CGP-NEXT: v_cndmask_b32_e64 v18, 0, 1, s[4:5]
; CGP-NEXT: v_add_i32_e64 v14, s[4:5], v14, v19
-; CGP-NEXT: v_mul_hi_u32 v14, v4, v13
+; CGP-NEXT: v_mul_hi_u32 v14, v5, v13
; CGP-NEXT: v_cndmask_b32_e64 v19, 0, 1, s[4:5]
; CGP-NEXT: v_add_i32_e64 v18, s[4:5], v18, v19
; CGP-NEXT: v_mul_lo_u32 v19, v10, v15
@@ -1481,125 +1483,126 @@ define <2 x i64> @v_urem_v2i64_oddk_denom(<2 x i64> %num) {
; CGP-NEXT: v_add_i32_e64 v14, s[4:5], v16, v14
; CGP-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc
; CGP-NEXT: v_add_i32_e32 v16, vcc, v17, v16
-; CGP-NEXT: v_mul_hi_u32 v17, v5, v15
+; CGP-NEXT: v_mul_hi_u32 v17, v6, v15
; CGP-NEXT: v_add_i32_e32 v12, vcc, v19, v12
; CGP-NEXT: v_cndmask_b32_e64 v19, 0, 1, vcc
; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v17
; CGP-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc
; CGP-NEXT: v_add_i32_e32 v17, vcc, v19, v17
-; CGP-NEXT: v_mov_b32_e32 v19, s9
-; CGP-NEXT: v_mul_hi_u32 v13, v6, v13
-; CGP-NEXT: v_mul_hi_u32 v15, v10, v15
+; CGP-NEXT: v_mov_b32_e32 v19, s7
; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v18
; CGP-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc
+; CGP-NEXT: v_add_i32_e32 v14, vcc, v14, v18
+; CGP-NEXT: v_mov_b32_e32 v18, s9
+; CGP-NEXT: v_mul_hi_u32 v13, v7, v13
+; CGP-NEXT: v_mul_hi_u32 v15, v10, v15
; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v16
; CGP-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v14, vcc, v14, v18
; CGP-NEXT: v_add_i32_e32 v16, vcc, v17, v16
; CGP-NEXT: v_add_i32_e32 v13, vcc, v13, v14
; CGP-NEXT: v_add_i32_e32 v14, vcc, v15, v16
-; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v11
-; CGP-NEXT: v_addc_u32_e32 v6, vcc, v6, v13, vcc
-; CGP-NEXT: v_mul_lo_u32 v11, v3, v4
-; CGP-NEXT: v_mul_hi_u32 v13, v2, v4
-; CGP-NEXT: v_mul_hi_u32 v4, v3, v4
-; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v12
-; CGP-NEXT: v_addc_u32_e32 v10, vcc, v10, v14, vcc
-; CGP-NEXT: v_mul_lo_u32 v12, v1, v5
-; CGP-NEXT: v_mul_hi_u32 v14, v0, v5
+; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v11
+; CGP-NEXT: v_addc_u32_e32 v7, vcc, v7, v13, vcc
+; CGP-NEXT: v_mul_lo_u32 v11, v1, v5
+; CGP-NEXT: v_mul_hi_u32 v13, v0, v5
; CGP-NEXT: v_mul_hi_u32 v5, v1, v5
-; CGP-NEXT: v_mul_lo_u32 v15, v2, v6
-; CGP-NEXT: v_mul_lo_u32 v16, v3, v6
-; CGP-NEXT: v_mul_hi_u32 v17, v2, v6
+; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v12
+; CGP-NEXT: v_addc_u32_e32 v10, vcc, v10, v14, vcc
+; CGP-NEXT: v_mul_lo_u32 v12, v3, v6
+; CGP-NEXT: v_mul_hi_u32 v14, v2, v6
; CGP-NEXT: v_mul_hi_u32 v6, v3, v6
-; CGP-NEXT: v_mul_lo_u32 v18, v0, v10
-; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v18
-; CGP-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v14
-; CGP-NEXT: v_mul_lo_u32 v12, v1, v10
-; CGP-NEXT: v_mul_hi_u32 v14, v0, v10
-; CGP-NEXT: v_mul_hi_u32 v10, v1, v10
-; CGP-NEXT: v_add_i32_e64 v11, s[4:5], v11, v15
-; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, s[4:5]
-; CGP-NEXT: v_add_i32_e64 v4, s[4:5], v16, v4
-; CGP-NEXT: v_cndmask_b32_e64 v16, 0, 1, s[4:5]
-; CGP-NEXT: v_add_i32_e64 v5, s[4:5], v12, v5
+; CGP-NEXT: v_mul_lo_u32 v15, v0, v7
+; CGP-NEXT: v_mul_lo_u32 v16, v1, v7
+; CGP-NEXT: v_mul_hi_u32 v17, v0, v7
+; CGP-NEXT: v_mul_hi_u32 v7, v1, v7
+; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v15
+; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc
+; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v13
+; CGP-NEXT: v_mul_lo_u32 v11, v2, v10
+; CGP-NEXT: v_mul_lo_u32 v13, v3, v10
+; CGP-NEXT: v_add_i32_e64 v11, s[4:5], v12, v11
; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, s[4:5]
-; CGP-NEXT: v_add_i32_e64 v11, s[4:5], v11, v13
-; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, s[4:5]
-; CGP-NEXT: v_add_i32_e64 v4, s[4:5], v4, v17
-; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, s[4:5]
+; CGP-NEXT: v_add_i32_e64 v11, s[4:5], v11, v14
+; CGP-NEXT: v_mul_hi_u32 v11, v2, v10
+; CGP-NEXT: v_mul_hi_u32 v10, v3, v10
+; CGP-NEXT: v_add_i32_e64 v5, s[6:7], v16, v5
+; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, s[6:7]
+; CGP-NEXT: v_add_i32_e64 v6, s[6:7], v13, v6
+; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, s[6:7]
+; CGP-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc
+; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v17
; CGP-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v14
-; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v11, vcc, v15, v11
-; CGP-NEXT: v_add_i32_e32 v13, vcc, v16, v13
-; CGP-NEXT: v_add_i32_e32 v15, vcc, v18, v17
-; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v14
-; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v11
+; CGP-NEXT: v_add_i32_e32 v15, vcc, v15, v16
+; CGP-NEXT: v_cndmask_b32_e64 v16, 0, 1, s[4:5]
+; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v11
; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
-; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v15
-; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc
+; CGP-NEXT: v_add_i32_e32 v14, vcc, v14, v17
+; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v16
; CGP-NEXT: v_add_i32_e32 v11, vcc, v13, v11
-; CGP-NEXT: v_mul_lo_u32 v13, s8, v4
-; CGP-NEXT: v_mul_lo_u32 v15, 0, v4
-; CGP-NEXT: v_mul_hi_u32 v4, s8, v4
-; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v14
+; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v15
+; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc
+; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v12
+; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc
+; CGP-NEXT: v_add_i32_e32 v13, vcc, v14, v13
; CGP-NEXT: v_mul_lo_u32 v14, s8, v5
-; CGP-NEXT: v_mul_lo_u32 v16, 0, v5
+; CGP-NEXT: v_mul_lo_u32 v15, 0, v5
; CGP-NEXT: v_mul_hi_u32 v5, s8, v5
-; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v11
-; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v12
-; CGP-NEXT: v_mul_lo_u32 v6, s8, v6
+; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v12
+; CGP-NEXT: v_mul_lo_u32 v12, s8, v6
+; CGP-NEXT: v_mul_lo_u32 v16, 0, v6
+; CGP-NEXT: v_mul_hi_u32 v6, s8, v6
+; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v13
+; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v11
+; CGP-NEXT: v_mul_lo_u32 v7, s8, v7
; CGP-NEXT: v_mul_lo_u32 v10, s8, v10
-; CGP-NEXT: v_add_i32_e32 v6, vcc, v15, v6
+; CGP-NEXT: v_add_i32_e32 v7, vcc, v15, v7
; CGP-NEXT: v_add_i32_e32 v10, vcc, v16, v10
-; CGP-NEXT: v_add_i32_e32 v4, vcc, v6, v4
-; CGP-NEXT: v_add_i32_e32 v5, vcc, v10, v5
-; CGP-NEXT: v_sub_i32_e32 v2, vcc, v2, v13
-; CGP-NEXT: v_subb_u32_e64 v6, s[4:5], v3, v4, vcc
-; CGP-NEXT: v_sub_i32_e64 v3, s[4:5], v3, v4
-; CGP-NEXT: v_cmp_le_u32_e64 s[4:5], s8, v2
-; CGP-NEXT: v_cndmask_b32_e64 v4, 0, -1, s[4:5]
-; CGP-NEXT: v_sub_i32_e64 v0, s[4:5], v0, v14
-; CGP-NEXT: v_subb_u32_e64 v10, s[6:7], v1, v5, s[4:5]
-; CGP-NEXT: v_sub_i32_e64 v1, s[6:7], v1, v5
-; CGP-NEXT: v_cmp_le_u32_e64 s[6:7], s8, v0
-; CGP-NEXT: v_cndmask_b32_e64 v5, 0, -1, s[6:7]
-; CGP-NEXT: v_cmp_eq_u32_e64 s[6:7], 0, v6
-; CGP-NEXT: v_cndmask_b32_e64 v4, v9, v4, s[6:7]
-; CGP-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc
+; CGP-NEXT: v_add_i32_e32 v5, vcc, v7, v5
+; CGP-NEXT: v_add_i32_e32 v6, vcc, v10, v6
+; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v14
+; CGP-NEXT: v_subb_u32_e64 v7, s[4:5], v1, v5, vcc
+; CGP-NEXT: v_sub_i32_e64 v1, s[4:5], v1, v5
+; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v4
+; CGP-NEXT: v_cndmask_b32_e64 v5, 0, -1, s[4:5]
+; CGP-NEXT: v_sub_i32_e64 v2, s[4:5], v2, v12
+; CGP-NEXT: v_subb_u32_e64 v10, s[6:7], v3, v6, s[4:5]
+; CGP-NEXT: v_sub_i32_e64 v3, s[6:7], v3, v6
+; CGP-NEXT: v_cmp_ge_u32_e64 s[6:7], v2, v4
+; CGP-NEXT: v_cndmask_b32_e64 v6, 0, -1, s[6:7]
+; CGP-NEXT: v_cmp_eq_u32_e64 s[6:7], 0, v7
+; CGP-NEXT: v_cndmask_b32_e64 v5, v8, v5, s[6:7]
+; CGP-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v10
-; CGP-NEXT: v_cndmask_b32_e32 v5, v7, v5, vcc
-; CGP-NEXT: v_subbrev_u32_e64 v1, vcc, 0, v1, s[4:5]
-; CGP-NEXT: v_subrev_i32_e32 v7, vcc, s8, v2
-; CGP-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc
-; CGP-NEXT: v_cmp_le_u32_e32 vcc, s8, v7
-; CGP-NEXT: v_cndmask_b32_e64 v9, 0, -1, vcc
-; CGP-NEXT: v_subrev_i32_e32 v11, vcc, s8, v0
+; CGP-NEXT: v_cndmask_b32_e32 v6, v19, v6, vcc
+; CGP-NEXT: v_subbrev_u32_e64 v3, vcc, 0, v3, s[4:5]
+; CGP-NEXT: v_sub_i32_e32 v8, vcc, v0, v4
; CGP-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
-; CGP-NEXT: v_cmp_le_u32_e32 vcc, s8, v11
-; CGP-NEXT: v_cndmask_b32_e64 v12, 0, -1, vcc
-; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3
-; CGP-NEXT: v_cndmask_b32_e32 v9, v19, v9, vcc
-; CGP-NEXT: v_subrev_i32_e32 v13, vcc, s8, v7
-; CGP-NEXT: v_subbrev_u32_e32 v14, vcc, 0, v3, vcc
+; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v8, v4
+; CGP-NEXT: v_cndmask_b32_e64 v11, 0, -1, vcc
+; CGP-NEXT: v_sub_i32_e32 v12, vcc, v2, v4
+; CGP-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc
+; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v12, v4
+; CGP-NEXT: v_cndmask_b32_e64 v13, 0, -1, vcc
; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
-; CGP-NEXT: v_cndmask_b32_e32 v8, v8, v12, vcc
-; CGP-NEXT: v_subrev_i32_e32 v12, vcc, s8, v11
-; CGP-NEXT: v_subbrev_u32_e32 v15, vcc, 0, v1, vcc
+; CGP-NEXT: v_cndmask_b32_e32 v9, v9, v11, vcc
+; CGP-NEXT: v_sub_i32_e32 v11, vcc, v8, v4
+; CGP-NEXT: v_subbrev_u32_e32 v14, vcc, 0, v1, vcc
+; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3
+; CGP-NEXT: v_cndmask_b32_e32 v13, v18, v13, vcc
+; CGP-NEXT: v_sub_i32_e32 v4, vcc, v12, v4
+; CGP-NEXT: v_subbrev_u32_e32 v15, vcc, 0, v3, vcc
; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v9
-; CGP-NEXT: v_cndmask_b32_e32 v7, v7, v13, vcc
-; CGP-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v8
-; CGP-NEXT: v_cndmask_b32_e64 v8, v11, v12, s[4:5]
-; CGP-NEXT: v_cndmask_b32_e32 v3, v3, v14, vcc
-; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4
-; CGP-NEXT: v_cndmask_b32_e32 v2, v2, v7, vcc
-; CGP-NEXT: v_cndmask_b32_e64 v1, v1, v15, s[4:5]
-; CGP-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v5
-; CGP-NEXT: v_cndmask_b32_e64 v0, v0, v8, s[4:5]
-; CGP-NEXT: v_cndmask_b32_e64 v1, v10, v1, s[4:5]
-; CGP-NEXT: v_cndmask_b32_e32 v3, v6, v3, vcc
+; CGP-NEXT: v_cndmask_b32_e32 v8, v8, v11, vcc
+; CGP-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v13
+; CGP-NEXT: v_cndmask_b32_e64 v4, v12, v4, s[4:5]
+; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v14, vcc
+; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v5
+; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc
+; CGP-NEXT: v_cndmask_b32_e64 v3, v3, v15, s[4:5]
+; CGP-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v6
+; CGP-NEXT: v_cndmask_b32_e64 v2, v2, v4, s[4:5]
+; CGP-NEXT: v_cndmask_b32_e32 v1, v7, v1, vcc
+; CGP-NEXT: v_cndmask_b32_e64 v3, v10, v3, s[4:5]
; CGP-NEXT: s_setpc_b64 s[30:31]
%result = urem <2 x i64> %num, <i64 1235195, i64 1235195>
ret <2 x i64> %result
diff --git a/llvm/test/CodeGen/AMDGPU/fp-min-max-atomics.ll b/llvm/test/CodeGen/AMDGPU/fp-min-max-atomics.ll
index e04eec3e3ab7a..b27943195857e 100644
--- a/llvm/test/CodeGen/AMDGPU/fp-min-max-atomics.ll
+++ b/llvm/test/CodeGen/AMDGPU/fp-min-max-atomics.ll
@@ -428,30 +428,31 @@ define amdgpu_kernel void @raw_buffer_atomic_min_rtn_f32_off4_slc(<4 x i32> inre
;
; G_GFX7-LABEL: raw_buffer_atomic_min_rtn_f32_off4_slc:
; G_GFX7: ; %bb.0: ; %main_body
-; G_GFX7-NEXT: s_load_dwordx2 s[12:13], s[0:1], 0xd
+; G_GFX7-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xd
; G_GFX7-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9
-; G_GFX7-NEXT: s_load_dword s2, s[0:1], 0xf
+; G_GFX7-NEXT: s_load_dword s0, s[0:1], 0xf
; G_GFX7-NEXT: s_mov_b32 m0, -1
; G_GFX7-NEXT: s_waitcnt lgkmcnt(0)
-; G_GFX7-NEXT: v_mov_b32_e32 v0, s12
-; G_GFX7-NEXT: v_mov_b32_e32 v1, s13
+; G_GFX7-NEXT: v_mov_b32_e32 v0, s2
+; G_GFX7-NEXT: v_mov_b32_e32 v1, s3
; G_GFX7-NEXT: buffer_atomic_fmin v0, v1, s[4:7], 4 offen glc slc
-; G_GFX7-NEXT: v_mov_b32_e32 v1, s2
+; G_GFX7-NEXT: v_mov_b32_e32 v1, s0
; G_GFX7-NEXT: s_waitcnt vmcnt(0)
; G_GFX7-NEXT: ds_write_b32 v1, v0
; G_GFX7-NEXT: s_endpgm
;
; G_GFX10-LABEL: raw_buffer_atomic_min_rtn_f32_off4_slc:
; G_GFX10: ; %bb.0: ; %main_body
-; G_GFX10-NEXT: s_clause 0x2
-; G_GFX10-NEXT: s_load_dwordx2 s[12:13], s[0:1], 0x34
+; G_GFX10-NEXT: s_clause 0x1
+; G_GFX10-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x34
; G_GFX10-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
-; G_GFX10-NEXT: s_load_dword s2, s[0:1], 0x3c
; G_GFX10-NEXT: s_waitcnt lgkmcnt(0)
-; G_GFX10-NEXT: v_mov_b32_e32 v0, s12
-; G_GFX10-NEXT: v_mov_b32_e32 v1, s13
+; G_GFX10-NEXT: v_mov_b32_e32 v0, s2
+; G_GFX10-NEXT: v_mov_b32_e32 v1, s3
+; G_GFX10-NEXT: s_load_dword s0, s[0:1], 0x3c
; G_GFX10-NEXT: buffer_atomic_fmin v0, v1, s[4:7], 4 offen glc slc
-; G_GFX10-NEXT: v_mov_b32_e32 v1, s2
+; G_GFX10-NEXT: s_waitcnt lgkmcnt(0)
+; G_GFX10-NEXT: v_mov_b32_e32 v1, s0
; G_GFX10-NEXT: s_waitcnt vmcnt(0)
; G_GFX10-NEXT: ds_write_b32 v1, v0
; G_GFX10-NEXT: s_endpgm
@@ -459,14 +460,14 @@ define amdgpu_kernel void @raw_buffer_atomic_min_rtn_f32_off4_slc(<4 x i32> inre
; G_GFX1030-LABEL: raw_buffer_atomic_min_rtn_f32_off4_slc:
; G_GFX1030: ; %bb.0: ; %main_body
; G_GFX1030-NEXT: s_clause 0x2
-; G_GFX1030-NEXT: s_load_dwordx2 s[12:13], s[0:1], 0x34
+; G_GFX1030-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x34
; G_GFX1030-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
-; G_GFX1030-NEXT: s_load_dword s2, s[0:1], 0x3c
+; G_GFX1030-NEXT: s_load_dword s0, s[0:1], 0x3c
; G_GFX1030-NEXT: s_waitcnt lgkmcnt(0)
-; G_GFX1030-NEXT: v_mov_b32_e32 v0, s12
-; G_GFX1030-NEXT: v_mov_b32_e32 v1, s13
+; G_GFX1030-NEXT: v_mov_b32_e32 v0, s2
+; G_GFX1030-NEXT: v_mov_b32_e32 v1, s3
; G_GFX1030-NEXT: buffer_atomic_fmin v0, v1, s[4:7], 4 offen glc slc
-; G_GFX1030-NEXT: v_mov_b32_e32 v1, s2
+; G_GFX1030-NEXT: v_mov_b32_e32 v1, s0
; G_GFX1030-NEXT: s_waitcnt vmcnt(0)
; G_GFX1030-NEXT: ds_write_b32 v1, v0
; G_GFX1030-NEXT: s_endpgm
diff --git a/llvm/test/CodeGen/AMDGPU/lds-atomic-fmin-fmax.ll b/llvm/test/CodeGen/AMDGPU/lds-atomic-fmin-fmax.ll
index e879fce3f4afd..91a403ff983a9 100644
--- a/llvm/test/CodeGen/AMDGPU/lds-atomic-fmin-fmax.ll
+++ b/llvm/test/CodeGen/AMDGPU/lds-atomic-fmin-fmax.ll
@@ -244,25 +244,25 @@ define amdgpu_kernel void @lds_ds_fmin(float addrspace(5)* %out, float addrspace
; G_GFX9: ; %bb.0:
; G_GFX9-NEXT: s_mov_b32 s8, SCRATCH_RSRC_DWORD0
; G_GFX9-NEXT: s_mov_b32 s9, SCRATCH_RSRC_DWORD1
-; G_GFX9-NEXT: s_load_dword s2, s[0:1], 0x2c
-; G_GFX9-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x24
; G_GFX9-NEXT: s_mov_b32 s10, -1
; G_GFX9-NEXT: s_mov_b32 s11, 0xe00000
; G_GFX9-NEXT: s_add_u32 s8, s8, s3
+; G_GFX9-NEXT: s_load_dword s4, s[0:1], 0x2c
+; G_GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
; G_GFX9-NEXT: s_addc_u32 s9, s9, 0
-; G_GFX9-NEXT: s_waitcnt lgkmcnt(0)
-; G_GFX9-NEXT: s_add_i32 s0, s2, 4
-; G_GFX9-NEXT: s_lshl_b32 s1, s0, 3
-; G_GFX9-NEXT: v_mov_b32_e32 v0, s1
; G_GFX9-NEXT: v_mov_b32_e32 v1, 0x42280000
+; G_GFX9-NEXT: s_waitcnt lgkmcnt(0)
+; G_GFX9-NEXT: s_add_i32 s4, s4, 4
+; G_GFX9-NEXT: s_lshl_b32 s0, s4, 3
+; G_GFX9-NEXT: v_mov_b32_e32 v0, s0
; G_GFX9-NEXT: ds_min_rtn_f32 v0, v0, v1
-; G_GFX9-NEXT: s_lshl_b32 s0, s0, 4
+; G_GFX9-NEXT: s_lshl_b32 s0, s4, 4
; G_GFX9-NEXT: v_mov_b32_e32 v2, s0
; G_GFX9-NEXT: ds_min_rtn_f32 v1, v2, v1
; G_GFX9-NEXT: s_waitcnt lgkmcnt(0)
-; G_GFX9-NEXT: v_mov_b32_e32 v1, s7
+; G_GFX9-NEXT: v_mov_b32_e32 v1, s3
; G_GFX9-NEXT: ds_min_rtn_f32 v0, v1, v0
-; G_GFX9-NEXT: v_mov_b32_e32 v1, s6
+; G_GFX9-NEXT: v_mov_b32_e32 v1, s2
; G_GFX9-NEXT: s_waitcnt lgkmcnt(0)
; G_GFX9-NEXT: buffer_store_dword v0, v1, s[8:11], 0 offen
; G_GFX9-NEXT: s_endpgm
@@ -533,25 +533,25 @@ define amdgpu_kernel void @lds_ds_fmax(float addrspace(5)* %out, float addrspace
; G_GFX9: ; %bb.0:
; G_GFX9-NEXT: s_mov_b32 s8, SCRATCH_RSRC_DWORD0
; G_GFX9-NEXT: s_mov_b32 s9, SCRATCH_RSRC_DWORD1
-; G_GFX9-NEXT: s_load_dword s2, s[0:1], 0x2c
-; G_GFX9-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x24
; G_GFX9-NEXT: s_mov_b32 s10, -1
; G_GFX9-NEXT: s_mov_b32 s11, 0xe00000
; G_GFX9-NEXT: s_add_u32 s8, s8, s3
+; G_GFX9-NEXT: s_load_dword s4, s[0:1], 0x2c
+; G_GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
; G_GFX9-NEXT: s_addc_u32 s9, s9, 0
-; G_GFX9-NEXT: s_waitcnt lgkmcnt(0)
-; G_GFX9-NEXT: s_add_i32 s0, s2, 4
-; G_GFX9-NEXT: s_lshl_b32 s1, s0, 3
-; G_GFX9-NEXT: v_mov_b32_e32 v0, s1
; G_GFX9-NEXT: v_mov_b32_e32 v1, 0x42280000
+; G_GFX9-NEXT: s_waitcnt lgkmcnt(0)
+; G_GFX9-NEXT: s_add_i32 s4, s4, 4
+; G_GFX9-NEXT: s_lshl_b32 s0, s4, 3
+; G_GFX9-NEXT: v_mov_b32_e32 v0, s0
; G_GFX9-NEXT: ds_max_rtn_f32 v0, v0, v1
-; G_GFX9-NEXT: s_lshl_b32 s0, s0, 4
+; G_GFX9-NEXT: s_lshl_b32 s0, s4, 4
; G_GFX9-NEXT: v_mov_b32_e32 v2, s0
; G_GFX9-NEXT: ds_max_rtn_f32 v1, v2, v1
; G_GFX9-NEXT: s_waitcnt lgkmcnt(0)
-; G_GFX9-NEXT: v_mov_b32_e32 v1, s7
+; G_GFX9-NEXT: v_mov_b32_e32 v1, s3
; G_GFX9-NEXT: ds_max_rtn_f32 v0, v1, v0
-; G_GFX9-NEXT: v_mov_b32_e32 v1, s6
+; G_GFX9-NEXT: v_mov_b32_e32 v1, s2
; G_GFX9-NEXT: s_waitcnt lgkmcnt(0)
; G_GFX9-NEXT: buffer_store_dword v0, v1, s[8:11], 0 offen
; G_GFX9-NEXT: s_endpgm
@@ -800,23 +800,23 @@ define amdgpu_kernel void @lds_ds_fmin_f64(double addrspace(5)* %out, double add
; G_GFX7: ; %bb.0:
; G_GFX7-NEXT: s_mov_b32 s8, SCRATCH_RSRC_DWORD0
; G_GFX7-NEXT: s_mov_b32 s9, SCRATCH_RSRC_DWORD1
-; G_GFX7-NEXT: s_load_dword s2, s[0:1], 0xb
+; G_GFX7-NEXT: s_load_dword s4, s[0:1], 0xb
; G_GFX7-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
; G_GFX7-NEXT: s_mov_b32 s10, -1
; G_GFX7-NEXT: s_mov_b32 s11, 0xe8f000
; G_GFX7-NEXT: s_add_u32 s8, s8, s3
+; G_GFX7-NEXT: s_mov_b32 s2, 0
; G_GFX7-NEXT: s_addc_u32 s9, s9, 0
-; G_GFX7-NEXT: s_mov_b32 s4, 0
+; G_GFX7-NEXT: s_mov_b32 s3, 0x40450000
+; G_GFX7-NEXT: v_mov_b32_e32 v0, s2
; G_GFX7-NEXT: s_waitcnt lgkmcnt(0)
-; G_GFX7-NEXT: s_add_i32 s2, s2, 4
-; G_GFX7-NEXT: s_mov_b32 s5, 0x40450000
-; G_GFX7-NEXT: v_mov_b32_e32 v0, s4
-; G_GFX7-NEXT: s_lshl_b32 s3, s2, 3
-; G_GFX7-NEXT: v_mov_b32_e32 v1, s5
-; G_GFX7-NEXT: v_mov_b32_e32 v2, s3
+; G_GFX7-NEXT: s_add_i32 s4, s4, 4
+; G_GFX7-NEXT: v_mov_b32_e32 v1, s3
+; G_GFX7-NEXT: s_lshl_b32 s2, s4, 3
+; G_GFX7-NEXT: v_mov_b32_e32 v2, s2
; G_GFX7-NEXT: s_mov_b32 m0, -1
; G_GFX7-NEXT: ds_min_rtn_f64 v[2:3], v2, v[0:1]
-; G_GFX7-NEXT: s_lshl_b32 s2, s2, 4
+; G_GFX7-NEXT: s_lshl_b32 s2, s4, 4
; G_GFX7-NEXT: v_mov_b32_e32 v4, s2
; G_GFX7-NEXT: ds_min_rtn_f64 v[0:1], v4, v[0:1]
; G_GFX7-NEXT: s_waitcnt lgkmcnt(0)
@@ -834,23 +834,23 @@ define amdgpu_kernel void @lds_ds_fmin_f64(double addrspace(5)* %out, double add
; G_VI: ; %bb.0:
; G_VI-NEXT: s_mov_b32 s88, SCRATCH_RSRC_DWORD0
; G_VI-NEXT: s_mov_b32 s89, SCRATCH_RSRC_DWORD1
-; G_VI-NEXT: s_load_dword s2, s[0:1], 0x2c
+; G_VI-NEXT: s_load_dword s4, s[0:1], 0x2c
; G_VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; G_VI-NEXT: s_mov_b32 s90, -1
; G_VI-NEXT: s_mov_b32 s91, 0xe80000
; G_VI-NEXT: s_add_u32 s88, s88, s3
+; G_VI-NEXT: s_mov_b32 s2, 0
; G_VI-NEXT: s_addc_u32 s89, s89, 0
-; G_VI-NEXT: s_mov_b32 s4, 0
+; G_VI-NEXT: s_mov_b32 s3, 0x40450000
+; G_VI-NEXT: v_mov_b32_e32 v0, s2
; G_VI-NEXT: s_waitcnt lgkmcnt(0)
-; G_VI-NEXT: s_add_i32 s2, s2, 4
-; G_VI-NEXT: s_mov_b32 s5, 0x40450000
-; G_VI-NEXT: v_mov_b32_e32 v0, s4
-; G_VI-NEXT: s_lshl_b32 s3, s2, 3
-; G_VI-NEXT: v_mov_b32_e32 v1, s5
-; G_VI-NEXT: v_mov_b32_e32 v2, s3
+; G_VI-NEXT: s_add_i32 s4, s4, 4
+; G_VI-NEXT: v_mov_b32_e32 v1, s3
+; G_VI-NEXT: s_lshl_b32 s2, s4, 3
+; G_VI-NEXT: v_mov_b32_e32 v2, s2
; G_VI-NEXT: s_mov_b32 m0, -1
; G_VI-NEXT: ds_min_rtn_f64 v[2:3], v2, v[0:1]
-; G_VI-NEXT: s_lshl_b32 s2, s2, 4
+; G_VI-NEXT: s_lshl_b32 s2, s4, 4
; G_VI-NEXT: v_mov_b32_e32 v4, s2
; G_VI-NEXT: ds_min_rtn_f64 v[0:1], v4, v[0:1]
; G_VI-NEXT: s_waitcnt lgkmcnt(0)
@@ -868,28 +868,28 @@ define amdgpu_kernel void @lds_ds_fmin_f64(double addrspace(5)* %out, double add
; G_GFX9: ; %bb.0:
; G_GFX9-NEXT: s_mov_b32 s8, SCRATCH_RSRC_DWORD0
; G_GFX9-NEXT: s_mov_b32 s9, SCRATCH_RSRC_DWORD1
-; G_GFX9-NEXT: s_load_dword s2, s[0:1], 0x2c
-; G_GFX9-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x24
; G_GFX9-NEXT: s_mov_b32 s10, -1
; G_GFX9-NEXT: s_mov_b32 s11, 0xe00000
-; G_GFX9-NEXT: s_mov_b32 s0, 0
; G_GFX9-NEXT: s_add_u32 s8, s8, s3
+; G_GFX9-NEXT: s_load_dword s4, s[0:1], 0x2c
+; G_GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
+; G_GFX9-NEXT: s_mov_b32 s0, 0
+; G_GFX9-NEXT: s_addc_u32 s9, s9, 0
; G_GFX9-NEXT: s_mov_b32 s1, 0x40450000
; G_GFX9-NEXT: v_mov_b32_e32 v0, s0
-; G_GFX9-NEXT: s_addc_u32 s9, s9, 0
-; G_GFX9-NEXT: v_mov_b32_e32 v1, s1
; G_GFX9-NEXT: s_waitcnt lgkmcnt(0)
-; G_GFX9-NEXT: s_add_i32 s0, s2, 4
-; G_GFX9-NEXT: s_lshl_b32 s1, s0, 3
-; G_GFX9-NEXT: v_mov_b32_e32 v2, s1
+; G_GFX9-NEXT: s_add_i32 s4, s4, 4
+; G_GFX9-NEXT: v_mov_b32_e32 v1, s1
+; G_GFX9-NEXT: s_lshl_b32 s0, s4, 3
+; G_GFX9-NEXT: v_mov_b32_e32 v2, s0
; G_GFX9-NEXT: ds_min_rtn_f64 v[2:3], v2, v[0:1]
-; G_GFX9-NEXT: s_lshl_b32 s0, s0, 4
+; G_GFX9-NEXT: s_lshl_b32 s0, s4, 4
; G_GFX9-NEXT: v_mov_b32_e32 v5, s0
-; G_GFX9-NEXT: v_mov_b32_e32 v4, s7
+; G_GFX9-NEXT: v_mov_b32_e32 v4, s3
; G_GFX9-NEXT: ds_min_rtn_f64 v[0:1], v5, v[0:1]
; G_GFX9-NEXT: s_waitcnt lgkmcnt(0)
; G_GFX9-NEXT: ds_min_rtn_f64 v[0:1], v4, v[2:3]
-; G_GFX9-NEXT: v_mov_b32_e32 v2, s6
+; G_GFX9-NEXT: v_mov_b32_e32 v2, s2
; G_GFX9-NEXT: s_waitcnt lgkmcnt(0)
; G_GFX9-NEXT: buffer_store_dword v0, v2, s[8:11], 0 offen
; G_GFX9-NEXT: buffer_store_dword v1, v2, s[8:11], 0 offen offset:4
@@ -897,31 +897,31 @@ define amdgpu_kernel void @lds_ds_fmin_f64(double addrspace(5)* %out, double add
;
; G_GFX10-LABEL: lds_ds_fmin_f64:
; G_GFX10: ; %bb.0:
-; G_GFX10-NEXT: s_clause 0x1
-; G_GFX10-NEXT: s_load_dword s2, s[0:1], 0x2c
-; G_GFX10-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x24
; G_GFX10-NEXT: s_mov_b32 s8, SCRATCH_RSRC_DWORD0
; G_GFX10-NEXT: s_mov_b32 s9, SCRATCH_RSRC_DWORD1
; G_GFX10-NEXT: s_mov_b32 s10, -1
; G_GFX10-NEXT: s_mov_b32 s11, 0x31c16000
; G_GFX10-NEXT: s_add_u32 s8, s8, s3
+; G_GFX10-NEXT: s_clause 0x1
+; G_GFX10-NEXT: s_load_dword s4, s[0:1], 0x2c
+; G_GFX10-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
; G_GFX10-NEXT: s_addc_u32 s9, s9, 0
; G_GFX10-NEXT: s_mov_b32 s0, 0
; G_GFX10-NEXT: s_mov_b32 s1, 0x40450000
; G_GFX10-NEXT: v_mov_b32_e32 v0, s0
; G_GFX10-NEXT: v_mov_b32_e32 v1, s1
; G_GFX10-NEXT: s_waitcnt lgkmcnt(0)
-; G_GFX10-NEXT: s_add_i32 s2, s2, 4
-; G_GFX10-NEXT: v_mov_b32_e32 v5, s7
-; G_GFX10-NEXT: s_lshl_b32 s3, s2, 3
-; G_GFX10-NEXT: s_lshl_b32 s0, s2, 4
-; G_GFX10-NEXT: v_mov_b32_e32 v2, s3
+; G_GFX10-NEXT: s_add_i32 s4, s4, 4
+; G_GFX10-NEXT: v_mov_b32_e32 v5, s3
+; G_GFX10-NEXT: s_lshl_b32 s5, s4, 3
+; G_GFX10-NEXT: s_lshl_b32 s0, s4, 4
+; G_GFX10-NEXT: v_mov_b32_e32 v2, s5
; G_GFX10-NEXT: v_mov_b32_e32 v4, s0
; G_GFX10-NEXT: ds_min_rtn_f64 v[2:3], v2, v[0:1]
; G_GFX10-NEXT: ds_min_rtn_f64 v[0:1], v4, v[0:1]
; G_GFX10-NEXT: s_waitcnt lgkmcnt(0)
; G_GFX10-NEXT: ds_min_rtn_f64 v[0:1], v5, v[2:3]
-; G_GFX10-NEXT: v_mov_b32_e32 v2, s6
+; G_GFX10-NEXT: v_mov_b32_e32 v2, s2
; G_GFX10-NEXT: s_waitcnt lgkmcnt(0)
; G_GFX10-NEXT: buffer_store_dword v0, v2, s[8:11], 0 offen
; G_GFX10-NEXT: buffer_store_dword v1, v2, s[8:11], 0 offen offset:4
@@ -1143,23 +1143,23 @@ define amdgpu_kernel void @lds_ds_fmax_f64(double addrspace(5)* %out, double add
; G_GFX7: ; %bb.0:
; G_GFX7-NEXT: s_mov_b32 s8, SCRATCH_RSRC_DWORD0
; G_GFX7-NEXT: s_mov_b32 s9, SCRATCH_RSRC_DWORD1
-; G_GFX7-NEXT: s_load_dword s2, s[0:1], 0xb
+; G_GFX7-NEXT: s_load_dword s4, s[0:1], 0xb
; G_GFX7-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
; G_GFX7-NEXT: s_mov_b32 s10, -1
; G_GFX7-NEXT: s_mov_b32 s11, 0xe8f000
; G_GFX7-NEXT: s_add_u32 s8, s8, s3
+; G_GFX7-NEXT: s_mov_b32 s2, 0
; G_GFX7-NEXT: s_addc_u32 s9, s9, 0
-; G_GFX7-NEXT: s_mov_b32 s4, 0
+; G_GFX7-NEXT: s_mov_b32 s3, 0x40450000
+; G_GFX7-NEXT: v_mov_b32_e32 v0, s2
; G_GFX7-NEXT: s_waitcnt lgkmcnt(0)
-; G_GFX7-NEXT: s_add_i32 s2, s2, 4
-; G_GFX7-NEXT: s_mov_b32 s5, 0x40450000
-; G_GFX7-NEXT: v_mov_b32_e32 v0, s4
-; G_GFX7-NEXT: s_lshl_b32 s3, s2, 3
-; G_GFX7-NEXT: v_mov_b32_e32 v1, s5
-; G_GFX7-NEXT: v_mov_b32_e32 v2, s3
+; G_GFX7-NEXT: s_add_i32 s4, s4, 4
+; G_GFX7-NEXT: v_mov_b32_e32 v1, s3
+; G_GFX7-NEXT: s_lshl_b32 s2, s4, 3
+; G_GFX7-NEXT: v_mov_b32_e32 v2, s2
; G_GFX7-NEXT: s_mov_b32 m0, -1
; G_GFX7-NEXT: ds_max_rtn_f64 v[2:3], v2, v[0:1]
-; G_GFX7-NEXT: s_lshl_b32 s2, s2, 4
+; G_GFX7-NEXT: s_lshl_b32 s2, s4, 4
; G_GFX7-NEXT: v_mov_b32_e32 v4, s2
; G_GFX7-NEXT: ds_max_rtn_f64 v[0:1], v4, v[0:1]
; G_GFX7-NEXT: s_waitcnt lgkmcnt(0)
@@ -1177,23 +1177,23 @@ define amdgpu_kernel void @lds_ds_fmax_f64(double addrspace(5)* %out, double add
; G_VI: ; %bb.0:
; G_VI-NEXT: s_mov_b32 s88, SCRATCH_RSRC_DWORD0
; G_VI-NEXT: s_mov_b32 s89, SCRATCH_RSRC_DWORD1
-; G_VI-NEXT: s_load_dword s2, s[0:1], 0x2c
+; G_VI-NEXT: s_load_dword s4, s[0:1], 0x2c
; G_VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; G_VI-NEXT: s_mov_b32 s90, -1
; G_VI-NEXT: s_mov_b32 s91, 0xe80000
; G_VI-NEXT: s_add_u32 s88, s88, s3
+; G_VI-NEXT: s_mov_b32 s2, 0
; G_VI-NEXT: s_addc_u32 s89, s89, 0
-; G_VI-NEXT: s_mov_b32 s4, 0
+; G_VI-NEXT: s_mov_b32 s3, 0x40450000
+; G_VI-NEXT: v_mov_b32_e32 v0, s2
; G_VI-NEXT: s_waitcnt lgkmcnt(0)
-; G_VI-NEXT: s_add_i32 s2, s2, 4
-; G_VI-NEXT: s_mov_b32 s5, 0x40450000
-; G_VI-NEXT: v_mov_b32_e32 v0, s4
-; G_VI-NEXT: s_lshl_b32 s3, s2, 3
-; G_VI-NEXT: v_mov_b32_e32 v1, s5
-; G_VI-NEXT: v_mov_b32_e32 v2, s3
+; G_VI-NEXT: s_add_i32 s4, s4, 4
+; G_VI-NEXT: v_mov_b32_e32 v1, s3
+; G_VI-NEXT: s_lshl_b32 s2, s4, 3
+; G_VI-NEXT: v_mov_b32_e32 v2, s2
; G_VI-NEXT: s_mov_b32 m0, -1
; G_VI-NEXT: ds_max_rtn_f64 v[2:3], v2, v[0:1]
-; G_VI-NEXT: s_lshl_b32 s2, s2, 4
+; G_VI-NEXT: s_lshl_b32 s2, s4, 4
; G_VI-NEXT: v_mov_b32_e32 v4, s2
; G_VI-NEXT: ds_max_rtn_f64 v[0:1], v4, v[0:1]
; G_VI-NEXT: s_waitcnt lgkmcnt(0)
@@ -1211,28 +1211,28 @@ define amdgpu_kernel void @lds_ds_fmax_f64(double addrspace(5)* %out, double add
; G_GFX9: ; %bb.0:
; G_GFX9-NEXT: s_mov_b32 s8, SCRATCH_RSRC_DWORD0
; G_GFX9-NEXT: s_mov_b32 s9, SCRATCH_RSRC_DWORD1
-; G_GFX9-NEXT: s_load_dword s2, s[0:1], 0x2c
-; G_GFX9-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x24
; G_GFX9-NEXT: s_mov_b32 s10, -1
; G_GFX9-NEXT: s_mov_b32 s11, 0xe00000
-; G_GFX9-NEXT: s_mov_b32 s0, 0
; G_GFX9-NEXT: s_add_u32 s8, s8, s3
+; G_GFX9-NEXT: s_load_dword s4, s[0:1], 0x2c
+; G_GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
+; G_GFX9-NEXT: s_mov_b32 s0, 0
+; G_GFX9-NEXT: s_addc_u32 s9, s9, 0
; G_GFX9-NEXT: s_mov_b32 s1, 0x40450000
; G_GFX9-NEXT: v_mov_b32_e32 v0, s0
-; G_GFX9-NEXT: s_addc_u32 s9, s9, 0
-; G_GFX9-NEXT: v_mov_b32_e32 v1, s1
; G_GFX9-NEXT: s_waitcnt lgkmcnt(0)
-; G_GFX9-NEXT: s_add_i32 s0, s2, 4
-; G_GFX9-NEXT: s_lshl_b32 s1, s0, 3
-; G_GFX9-NEXT: v_mov_b32_e32 v2, s1
+; G_GFX9-NEXT: s_add_i32 s4, s4, 4
+; G_GFX9-NEXT: v_mov_b32_e32 v1, s1
+; G_GFX9-NEXT: s_lshl_b32 s0, s4, 3
+; G_GFX9-NEXT: v_mov_b32_e32 v2, s0
; G_GFX9-NEXT: ds_max_rtn_f64 v[2:3], v2, v[0:1]
-; G_GFX9-NEXT: s_lshl_b32 s0, s0, 4
+; G_GFX9-NEXT: s_lshl_b32 s0, s4, 4
; G_GFX9-NEXT: v_mov_b32_e32 v5, s0
-; G_GFX9-NEXT: v_mov_b32_e32 v4, s7
+; G_GFX9-NEXT: v_mov_b32_e32 v4, s3
; G_GFX9-NEXT: ds_max_rtn_f64 v[0:1], v5, v[0:1]
; G_GFX9-NEXT: s_waitcnt lgkmcnt(0)
; G_GFX9-NEXT: ds_max_rtn_f64 v[0:1], v4, v[2:3]
-; G_GFX9-NEXT: v_mov_b32_e32 v2, s6
+; G_GFX9-NEXT: v_mov_b32_e32 v2, s2
; G_GFX9-NEXT: s_waitcnt lgkmcnt(0)
; G_GFX9-NEXT: buffer_store_dword v0, v2, s[8:11], 0 offen
; G_GFX9-NEXT: buffer_store_dword v1, v2, s[8:11], 0 offen offset:4
@@ -1240,31 +1240,31 @@ define amdgpu_kernel void @lds_ds_fmax_f64(double addrspace(5)* %out, double add
;
; G_GFX10-LABEL: lds_ds_fmax_f64:
; G_GFX10: ; %bb.0:
-; G_GFX10-NEXT: s_clause 0x1
-; G_GFX10-NEXT: s_load_dword s2, s[0:1], 0x2c
-; G_GFX10-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x24
; G_GFX10-NEXT: s_mov_b32 s8, SCRATCH_RSRC_DWORD0
; G_GFX10-NEXT: s_mov_b32 s9, SCRATCH_RSRC_DWORD1
; G_GFX10-NEXT: s_mov_b32 s10, -1
; G_GFX10-NEXT: s_mov_b32 s11, 0x31c16000
; G_GFX10-NEXT: s_add_u32 s8, s8, s3
+; G_GFX10-NEXT: s_clause 0x1
+; G_GFX10-NEXT: s_load_dword s4, s[0:1], 0x2c
+; G_GFX10-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
; G_GFX10-NEXT: s_addc_u32 s9, s9, 0
; G_GFX10-NEXT: s_mov_b32 s0, 0
; G_GFX10-NEXT: s_mov_b32 s1, 0x40450000
; G_GFX10-NEXT: v_mov_b32_e32 v0, s0
; G_GFX10-NEXT: v_mov_b32_e32 v1, s1
; G_GFX10-NEXT: s_waitcnt lgkmcnt(0)
-; G_GFX10-NEXT: s_add_i32 s2, s2, 4
-; G_GFX10-NEXT: v_mov_b32_e32 v5, s7
-; G_GFX10-NEXT: s_lshl_b32 s3, s2, 3
-; G_GFX10-NEXT: s_lshl_b32 s0, s2, 4
-; G_GFX10-NEXT: v_mov_b32_e32 v2, s3
+; G_GFX10-NEXT: s_add_i32 s4, s4, 4
+; G_GFX10-NEXT: v_mov_b32_e32 v5, s3
+; G_GFX10-NEXT: s_lshl_b32 s5, s4, 3
+; G_GFX10-NEXT: s_lshl_b32 s0, s4, 4
+; G_GFX10-NEXT: v_mov_b32_e32 v2, s5
; G_GFX10-NEXT: v_mov_b32_e32 v4, s0
; G_GFX10-NEXT: ds_max_rtn_f64 v[2:3], v2, v[0:1]
; G_GFX10-NEXT: ds_max_rtn_f64 v[0:1], v4, v[0:1]
; G_GFX10-NEXT: s_waitcnt lgkmcnt(0)
; G_GFX10-NEXT: ds_max_rtn_f64 v[0:1], v5, v[2:3]
-; G_GFX10-NEXT: v_mov_b32_e32 v2, s6
+; G_GFX10-NEXT: v_mov_b32_e32 v2, s2
; G_GFX10-NEXT: s_waitcnt lgkmcnt(0)
; G_GFX10-NEXT: buffer_store_dword v0, v2, s[8:11], 0 offen
; G_GFX10-NEXT: buffer_store_dword v1, v2, s[8:11], 0 offen offset:4
diff --git a/llvm/test/CodeGen/AMDGPU/twoaddr-constrain.ll b/llvm/test/CodeGen/AMDGPU/twoaddr-constrain.ll
index ae0c8b57b0645..4c55eddcb4e71 100644
--- a/llvm/test/CodeGen/AMDGPU/twoaddr-constrain.ll
+++ b/llvm/test/CodeGen/AMDGPU/twoaddr-constrain.ll
@@ -13,15 +13,12 @@ define amdgpu_ps <3 x i32> @s_load_constant_v3i32_align4(<3 x i32> addrspace(4)*
; CHECK-NEXT: %0.sub1:sreg_64 = COPY killed [[COPY1]]
; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 0, 0 :: (load (<2 x s32>) from %ir.ptr, align 4, addrspace 4)
; CHECK-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM killed %0, 8, 0 :: (load (s32) from %ir.ptr + 8, addrspace 4)
- ; CHECK-NEXT: undef %16.sub0_sub1:sgpr_96_with_sub0_sub1 = COPY killed [[S_LOAD_DWORDX2_IMM]]
- ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_96_with_sub0_sub1 = COPY killed %16
- ; CHECK-NEXT: [[COPY2]].sub2:sgpr_96_with_sub0_sub1 = COPY undef [[S_LOAD_DWORD_IMM]]
- ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY2]].sub0
- ; CHECK-NEXT: $sgpr0 = COPY killed [[COPY3]]
- ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY killed [[COPY2]].sub1
- ; CHECK-NEXT: $sgpr1 = COPY killed [[COPY4]]
- ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY killed [[S_LOAD_DWORD_IMM]]
- ; CHECK-NEXT: $sgpr2 = COPY killed [[COPY5]]
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[S_LOAD_DWORDX2_IMM]].sub0
+ ; CHECK-NEXT: $sgpr0 = COPY killed [[COPY2]]
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY killed [[S_LOAD_DWORDX2_IMM]].sub1
+ ; CHECK-NEXT: $sgpr1 = COPY killed [[COPY3]]
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY killed [[S_LOAD_DWORD_IMM]]
+ ; CHECK-NEXT: $sgpr2 = COPY killed [[COPY4]]
; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit killed $sgpr0, implicit killed $sgpr1, implicit killed $sgpr2
%load = load <3 x i32>, <3 x i32> addrspace(4)* %ptr, align 4
ret <3 x i32> %load
diff --git a/llvm/unittests/CodeGen/GlobalISel/LegalizerHelperTest.cpp b/llvm/unittests/CodeGen/GlobalISel/LegalizerHelperTest.cpp
index 2270e4fb406be..49d2c8329b30d 100644
--- a/llvm/unittests/CodeGen/GlobalISel/LegalizerHelperTest.cpp
+++ b/llvm/unittests/CodeGen/GlobalISel/LegalizerHelperTest.cpp
@@ -1370,26 +1370,17 @@ TEST_F(AArch64GISelMITest, FewerElementsAnd) {
CHECK: [[IMP_DEF0:%[0-9]+]]:_(<5 x s32>) = G_IMPLICIT_DEF
CHECK: [[IMP_DEF1:%[0-9]+]]:_(<5 x s32>) = G_IMPLICIT_DEF
CHECK: [[VALUE0:%[0-9]+]]:_(s32), [[VALUE1:%[0-9]+]]:_(s32), [[VALUE2:%[0-9]+]]:_(s32), [[VALUE3:%[0-9]+]]:_(s32), [[VALUE4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[IMP_DEF0]]:_(<5 x s32>)
- CHECK: [[IMP_DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
CHECK: [[VECTOR0:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[VALUE0]]:_(s32), [[VALUE1]]:_(s32)
CHECK: [[VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[VALUE2]]:_(s32), [[VALUE3]]:_(s32)
- CHECK: [[VECTOR2:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[VALUE4]]:_(s32), [[IMP_DEF2]]:_(s32)
- CHECK: [[IMP_DEF3:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF
CHECK: [[VALUE5:%[0-9]+]]:_(s32), [[VALUE6:%[0-9]+]]:_(s32), [[VALUE7:%[0-9]+]]:_(s32), [[VALUE8:%[0-9]+]]:_(s32), [[VALUE9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[IMP_DEF1]]:_(<5 x s32>)
- CHECK: [[IMP_DEF4:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
- CHECK: [[VECTOR3:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[VALUE5]]:_(s32), [[VALUE6]]:_(s32)
- CHECK: [[VECTOR4:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[VALUE7]]:_(s32), [[VALUE8]]:_(s32)
- CHECK: [[VECTOR5:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[VALUE9]]:_(s32), [[IMP_DEF4]]:_(s32)
- CHECK: [[IMP_DEF5:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF
-
- CHECK: [[AND0:%[0-9]+]]:_(<2 x s32>) = G_AND [[VECTOR0]]:_, [[VECTOR3]]:_
- CHECK: [[AND1:%[0-9]+]]:_(<2 x s32>) = G_AND [[VECTOR1]]:_, [[VECTOR4]]:_
- CHECK: [[AND2:%[0-9]+]]:_(<2 x s32>) = G_AND [[VECTOR2]]:_, [[VECTOR5]]:_
- CHECK: [[IMP_DEF6:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF
-
- CHECK: [[VECTOR6:%[0-9]+]]:_(<10 x s32>) = G_CONCAT_VECTORS [[AND0]]:_(<2 x s32>), [[AND1]]:_(<2 x s32>), [[AND2]]:_(<2 x s32>), [[IMP_DEF6]]:_(<2 x s32>), [[IMP_DEF6]]:_(<2 x s32>)
- CHECK: [[VECTOR7:%[0-9]+]]:_(<10 x s32>) = G_CONCAT_VECTORS [[AND0]]:_(<2 x s32>), [[AND1]]:_(<2 x s32>), [[AND2]]:_(<2 x s32>), [[IMP_DEF6]]:_(<2 x s32>), [[IMP_DEF6]]:_(<2 x s32>)
- CHECK: [[VECTOR8:%[0-9]+]]:_(<5 x s32>), [[VECTOR9:%[0-9]+]]:_(<5 x s32>) = G_UNMERGE_VALUES [[VECTOR7]]:_(<10 x s32>)
+ CHECK: [[VECTOR2:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[VALUE5]]:_(s32), [[VALUE6]]:_(s32)
+ CHECK: [[VECTOR3:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[VALUE7]]:_(s32), [[VALUE8]]:_(s32)
+ CHECK: [[AND0:%[0-9]+]]:_(<2 x s32>) = G_AND [[VECTOR0]]:_, [[VECTOR2]]:_
+ CHECK: [[AND1:%[0-9]+]]:_(<2 x s32>) = G_AND [[VECTOR1]]:_, [[VECTOR3]]:_
+ CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[VALUE4]]:_, [[VALUE9]]:_
+ CHECK: [[AND0_E0:%[0-9]+]]:_(s32), [[AND0_E1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND0]]:_(<2 x s32>)
+ CHECK: [[AND1_E0:%[0-9]+]]:_(s32), [[AND1_E1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND1]]:_(<2 x s32>)
+ CHECK: [[RESULT:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[AND0_E0]]:_(s32), [[AND0_E1]]:_(s32), [[AND1_E0]]:_(s32), [[AND1_E1]]:_(s32), [[AND2]]:_(s32)
)";
// Check
@@ -1428,12 +1419,19 @@ TEST_F(AArch64GISelMITest, MoreElementsAnd) {
auto CheckStr = R"(
CHECK: [[BITCAST0:%[0-9]+]]:_(<2 x s32>) = G_BITCAST
CHECK: [[BITCAST1:%[0-9]+]]:_(<2 x s32>) = G_BITCAST
- CHECK: [[IMP_DEF0:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF
- CHECK: [[CONCAT0:%[0-9]+]]:_(<6 x s32>) = G_CONCAT_VECTORS [[BITCAST0]]:_(<2 x s32>), [[IMP_DEF0]]:_(<2 x s32>), [[IMP_DEF0]]:_(<2 x s32>)
- CHECK: [[IMP_DEF1:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF
- CHECK: [[CONCAT1:%[0-9]+]]:_(<6 x s32>) = G_CONCAT_VECTORS [[BITCAST1]]:_(<2 x s32>), [[IMP_DEF1]]:_(<2 x s32>), [[IMP_DEF1]]:_(<2 x s32>)
- CHECK: [[AND:%[0-9]+]]:_(<6 x s32>) = G_AND [[CONCAT0]]:_, [[CONCAT1]]:_
- CHECK: (<2 x s32>) = G_UNMERGE_VALUES [[AND]]:_(<6 x s32>)
+
+ CHECK: [[BITCAST0_E0:%[0-9]+]]:_(s32), [[BITCAST0_E1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST0]]:_(<2 x s32>)
+ CHECK: [[IMP_DEF0:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ CHECK: [[BITCAST0_LARGE:%[0-9]+]]:_(<6 x s32>) = G_BUILD_VECTOR [[BITCAST0_E0]]:_(s32), [[BITCAST0_E1]]:_(s32), [[IMP_DEF0]]:_(s32), [[IMP_DEF0]]:_(s32), [[IMP_DEF0]]:_(s32), [[IMP_DEF0]]:_(s32)
+
+ CHECK: [[BITCAST1_E0:%[0-9]+]]:_(s32), [[BITCAST1_E1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST1]]:_(<2 x s32>)
+ CHECK: [[IMP_DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ CHECK: [[BITCAST1_LARGE:%[0-9]+]]:_(<6 x s32>) = G_BUILD_VECTOR [[BITCAST1_E0]]:_(s32), [[BITCAST1_E1]]:_(s32), [[IMP_DEF1]]:_(s32), [[IMP_DEF1]]:_(s32), [[IMP_DEF1]]:_(s32), [[IMP_DEF1]]:_(s32)
+
+ CHECK: [[AND:%[0-9]+]]:_(<6 x s32>) = G_AND [[BITCAST0_LARGE]]:_, [[BITCAST1_LARGE]]:_
+
+ CHECK: [[AND_E0:%[0-9]+]]:_(s32), [[AND_E1:%[0-9]+]]:_(s32), [[AND_E2:%[0-9]+]]:_(s32), [[AND_E3:%[0-9]+]]:_(s32), [[AND_E4:%[0-9]+]]:_(s32), [[AND_E5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND]]:_(<6 x s32>)
+ CHECK: (<2 x s32>) = G_BUILD_VECTOR [[AND_E0]]:_(s32), [[AND_E1]]:_(s32)
)";
EXPECT_TRUE(CheckMachineFunction(*MF, CheckStr)) << *MF;
@@ -1509,26 +1507,26 @@ TEST_F(AArch64GISelMITest, FewerElementsPhi) {
auto CheckStr = R"(
CHECK: [[INITVAL:%[0-9]+]]:_(<5 x s32>) = G_IMPLICIT_DEF
- CHECK: [[EXTRACT0:%[0-9]+]]:_(<2 x s32>) = G_EXTRACT [[INITVAL]]:_(<5 x s32>), 0
- CHECK: [[EXTRACT1:%[0-9]+]]:_(<2 x s32>) = G_EXTRACT [[INITVAL]]:_(<5 x s32>), 64
- CHECK: [[EXTRACT2:%[0-9]+]]:_(s32) = G_EXTRACT [[INITVAL]]:_(<5 x s32>), 128
+ CHECK: [[INITVAL_E0:%[0-9]+]]:_(s32), [[INITVAL_E1:%[0-9]+]]:_(s32), [[INITVAL_E2:%[0-9]+]]:_(s32), [[INITVAL_E3:%[0-9]+]]:_(s32), [[INITVAL_E4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[INITVAL]]:_(<5 x s32>)
+ CHECK: [[INITVAL_E01:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[INITVAL_E0]]:_(s32), [[INITVAL_E1]]:_(s32)
+ CHECK: [[INITVAL_E23:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[INITVAL_E2]]:_(s32), [[INITVAL_E3]]:_(s32)
CHECK: G_BRCOND
CHECK: [[MIDVAL:%[0-9]+]]:_(<5 x s32>) = G_IMPLICIT_DEF
- CHECK: [[EXTRACT3:%[0-9]+]]:_(<2 x s32>) = G_EXTRACT [[MIDVAL]]:_(<5 x s32>), 0
- CHECK: [[EXTRACT4:%[0-9]+]]:_(<2 x s32>) = G_EXTRACT [[MIDVAL]]:_(<5 x s32>), 64
- CHECK: [[EXTRACT5:%[0-9]+]]:_(s32) = G_EXTRACT [[MIDVAL]]:_(<5 x s32>), 128
+ CHECK: [[MIDVAL_E0:%[0-9]+]]:_(s32), [[MIDVAL_E1:%[0-9]+]]:_(s32), [[MIDVAL_E2:%[0-9]+]]:_(s32), [[MIDVAL_E3:%[0-9]+]]:_(s32), [[MIDVAL_E4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[MIDVAL]]:_(<5 x s32>)
+ CHECK: [[MIDVAL_E01:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[MIDVAL_E0]]:_(s32), [[MIDVAL_E1]]:_(s32)
+ CHECK: [[MIDVAL_E23:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[MIDVAL_E2]]:_(s32), [[MIDVAL_E3]]:_(s32)
CHECK: G_BR
- CHECK: [[PHI0:%[0-9]+]]:_(<2 x s32>) = G_PHI [[EXTRACT0]]:_(<2 x s32>), %bb.0, [[EXTRACT3]]:_(<2 x s32>), %bb.1
- CHECK: [[PHI1:%[0-9]+]]:_(<2 x s32>) = G_PHI [[EXTRACT1]]:_(<2 x s32>), %bb.0, [[EXTRACT4]]:_(<2 x s32>), %bb.1
- CHECK: [[PHI2:%[0-9]+]]:_(s32) = G_PHI [[EXTRACT2]]:_(s32), %bb.0, [[EXTRACT5]]:_(s32), %bb.1
-
- CHECK: [[OTHER_PHI:%[0-9]+]]:_(s64) = G_PHI
-
+ CHECK: [[PHI0:%[0-9]+]]:_(<2 x s32>) = G_PHI [[INITVAL_E01]]:_(<2 x s32>), %bb.0, [[MIDVAL_E01]]:_(<2 x s32>), %bb.1
+ CHECK: [[PHI1:%[0-9]+]]:_(<2 x s32>) = G_PHI [[INITVAL_E23]]:_(<2 x s32>), %bb.0, [[MIDVAL_E23]]:_(<2 x s32>), %bb.1
+ CHECK: [[PHI2:%[0-9]+]]:_(s32) = G_PHI [[INITVAL_E4]]:_(s32), %bb.0, [[MIDVAL_E4]]:_(s32), %bb.1
CHECK: [[UNMERGE0:%[0-9]+]]:_(s32), [[UNMERGE1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[PHI0]]:_(<2 x s32>)
CHECK: [[UNMERGE2:%[0-9]+]]:_(s32), [[UNMERGE3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[PHI1]]:_(<2 x s32>)
CHECK: [[BV:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[UNMERGE0]]:_(s32), [[UNMERGE1]]:_(s32), [[UNMERGE2]]:_(s32), [[UNMERGE3]]:_(s32), [[PHI2]]:_(s32)
+
+ CHECK: [[OTHER_PHI:%[0-9]+]]:_(s64) = G_PHI
+
CHECK: [[USE_OP:%[0-9]+]]:_(<5 x s32>) = G_AND [[BV]]:_, [[BV]]:_
)";
@@ -3196,14 +3194,8 @@ TEST_F(AArch64GISelMITest, LowerInsert) {
CHECK: [[OR:%[0-9]+]]:_(s64) = G_OR [[AND]]:_, [[SHL]]:_
CHECK: [[INTTOPTR:%[0-9]+]]:_(p0) = G_INTTOPTR [[OR]]
- CHECK: [[BITCAST:%[0-9]+]]:_(s64) = G_BITCAST [[V2S32]]
- CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[S32]]
- CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT
- CHECK: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[ZEXT]]:_, [[C]]:_(s64)
- CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT
- CHECK: [[AND:%[0-9]+]]:_(s64) = G_AND [[BITCAST]]:_, [[C]]:_
- CHECK: [[OR:%[0-9]+]]:_(s64) = G_OR [[AND]]:_, [[SHL]]:_
- CHECK: [[BITCAST:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[OR]]
+ CHECK: [[V2S32_E0:%[0-9]+]]:_(s32), [[V2S32_E1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[V2S32]]
+ CHECK: [[BV:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[V2S32_E0]]:_(s32), [[S32]]:_(s32)
)";
// Check
@@ -3711,20 +3703,22 @@ TEST_F(AArch64GISelMITest, NarrowFreeze) {
DefineLegalizerInfo(A, {});
// Make sure that G_FREEZE is narrowed using unmerge/extract
- LLT S16{LLT::scalar(16)};
LLT S32{LLT::scalar(32)};
LLT S33{LLT::scalar(33)};
+ LLT S48{LLT::scalar(48)};
LLT S64{LLT::scalar(64)};
LLT V2S16{LLT::fixed_vector(2, 16)};
- LLT V2S32{LLT::fixed_vector(2, 32)};
+ LLT V3S16{LLT::fixed_vector(3, 16)};
+ LLT V4S16{LLT::fixed_vector(4, 16)};
auto Trunc = B.buildTrunc(S33, {Copies[0]});
- auto Vector = B.buildBitcast(V2S32, Copies[0]);
+ auto Trunc1 = B.buildTrunc(S48, {Copies[0]});
+ auto Vector = B.buildBitcast(V3S16, Trunc1);
auto FreezeScalar = B.buildInstr(TargetOpcode::G_FREEZE, {S64}, {Copies[0]});
auto FreezeOdd = B.buildInstr(TargetOpcode::G_FREEZE, {S33}, {Trunc});
- auto FreezeVector = B.buildInstr(TargetOpcode::G_FREEZE, {V2S32}, {Vector});
- auto FreezeVector1 = B.buildInstr(TargetOpcode::G_FREEZE, {V2S32}, {Vector});
+ auto FreezeVector = B.buildInstr(TargetOpcode::G_FREEZE, {V3S16}, {Vector});
+ auto FreezeVector1 = B.buildInstr(TargetOpcode::G_FREEZE, {V3S16}, {Vector});
AInfo Info(MF->getSubtarget());
DummyGISelObserver Observer;
@@ -3736,54 +3730,48 @@ TEST_F(AArch64GISelMITest, NarrowFreeze) {
EXPECT_EQ(LegalizerHelper::LegalizeResult::Legalized,
Helper.narrowScalar(*FreezeScalar, 0, S32));
+ // This should be followed by narrowScalar to S32.
B.setInsertPt(*EntryMBB, FreezeOdd->getIterator());
EXPECT_EQ(LegalizerHelper::LegalizeResult::Legalized,
- Helper.narrowScalar(*FreezeOdd, 0, S32));
+ Helper.widenScalar(*FreezeOdd, 0, S64));
B.setInsertPt(*EntryMBB, FreezeVector->getIterator());
EXPECT_EQ(LegalizerHelper::LegalizeResult::Legalized,
- Helper.narrowScalar(*FreezeVector, 0, V2S16));
+ Helper.fewerElementsVector(*FreezeVector, 0, V2S16));
+ // This should be followed by fewerElements to V2S16.
B.setInsertPt(*EntryMBB, FreezeVector1->getIterator());
EXPECT_EQ(LegalizerHelper::LegalizeResult::Legalized,
- Helper.narrowScalar(*FreezeVector1, 0, S16));
+ Helper.moreElementsVector(*FreezeVector1, 0, V4S16));
const auto *CheckStr = R"(
CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY
CHECK: [[TRUNC:%[0-9]+]]:_(s33) = G_TRUNC [[COPY]]
- CHECK: [[BITCAST:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[COPY]]
+ CHECK: [[TRUNC1:%[0-9]+]]:_(s48) = G_TRUNC [[COPY]]
+ CHECK: [[BITCAST:%[0-9]+]]:_(<3 x s16>) = G_BITCAST [[TRUNC1]]
CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]]
CHECK: [[FREEZE:%[0-9]+]]:_(s32) = G_FREEZE [[UV]]
CHECK: [[FREEZE1:%[0-9]+]]:_(s32) = G_FREEZE [[UV1]]
CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[FREEZE]]:_(s32), [[FREEZE1]]
- CHECK: (s1) = G_UNMERGE_VALUES [[TRUNC]]:_(s33)
- CHECK: [[UNDEF:%[0-9]+]]:_(s1) = G_IMPLICIT_DEF
- CHECK: [[MV1:%[0-9]+]]:_(s32) = G_MERGE_VALUES
- CHECK: [[MV2:%[0-9]+]]:_(s32) = G_MERGE_VALUES
- CHECK: [[UNDEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
- CHECK: [[FREEZE2:%[0-9]+]]:_(s32) = G_FREEZE [[MV1]]
- CHECK: [[FREEZE3:%[0-9]+]]:_(s32) = G_FREEZE [[MV2]]
- CHECK: [[UNDEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
- CHECK: [[MV3:%[0-9]+]]:_(s1056) = G_MERGE_VALUES [[FREEZE2]]:_(s32), [[FREEZE3]]:_(s32), [[UNDEF2]]
- CHECK: [[TRUNC1:%[0-9]+]]:_(s33) = G_TRUNC [[MV3]]
-
- CHECK: [[BITCAST1:%[0-9]+]]:_(s64) = G_BITCAST [[BITCAST]]
- CHECK: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST1]]
- CHECK: [[FREEZE4:%[0-9]+]]:_(s32) = G_FREEZE [[UV2]]
- CHECK: [[FREEZE5:%[0-9]+]]:_(s32) = G_FREEZE [[UV3]]
- CHECK: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[FREEZE4]]:_(s32), [[FREEZE5]]:_(s32)
- CHECK: [[BITCAST2:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[MV4]]
-
- CHECK: [[BITCAST3:%[0-9]+]]:_(s64) = G_BITCAST [[BITCAST]]
- CHECK: [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16), [[UV6:%[0-9]+]]:_(s16), [[UV7:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[BITCAST3]]
- CHECK: [[FREEZE6:%[0-9]+]]:_(s16) = G_FREEZE [[UV4]]
- CHECK: [[FREEZE7:%[0-9]+]]:_(s16) = G_FREEZE [[UV5]]
- CHECK: [[FREEZE8:%[0-9]+]]:_(s16) = G_FREEZE [[UV6]]
- CHECK: [[FREEZE9:%[0-9]+]]:_(s16) = G_FREEZE [[UV7]]
- CHECK: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[FREEZE6]]:_(s16), [[FREEZE7]]:_(s16), [[FREEZE8]]:_(s16), [[FREEZE9]]
- CHECK: [[BITCAST3:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[MV5]]
+ CHECK: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[TRUNC]]
+ CHECK: [[FREEZE2:%[0-9]+]]:_(s64) = G_FREEZE [[ANYEXT]]
+ CHECK: [[TRUNC1:%[0-9]+]]:_(s33) = G_TRUNC [[FREEZE2]]
+
+ CHECK: [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16), [[UV4:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[BITCAST]]
+ CHECK: [[BV:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[UV2]]:_(s16), [[UV3]]:_(s16)
+ CHECK: [[FREEZE3:%[0-9]+]]:_(<2 x s16>) = G_FREEZE [[BV]]
+ CHECK: [[FREEZE4:%[0-9]+]]:_(s16) = G_FREEZE [[UV4]]
+ CHECK: [[FREEZE3_E0:%[0-9]+]]:_(s16), [[FREEZE3_E1:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[FREEZE3]]
+ CHECK: [[BV1:%[0-9]+]]:_(<3 x s16>) = G_BUILD_VECTOR [[FREEZE3_E0]]:_(s16), [[FREEZE3_E1]]:_(s16), [[FREEZE4]]:_(s16)
+
+ CHECK: [[UV5:%[0-9]+]]:_(s16), [[UV6:%[0-9]+]]:_(s16), [[UV7:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[BITCAST]]
+ CHECK: [[IMP_DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+ CHECK: [[BV1:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[UV5]]:_(s16), [[UV6]]:_(s16), [[UV7]]:_(s16), [[IMP_DEF]]:_(s16)
+ CHECK: [[FREEZE5:%[0-9]+]]:_(<4 x s16>) = G_FREEZE [[BV1]]
+ CHECK: [[FREEZE5_E0:%[0-9]+]]:_(s16), [[FREEZE5_E1:%[0-9]+]]:_(s16), [[FREEZE5_E2:%[0-9]+]]:_(s16), [[FREEZE5_E3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[FREEZE5]]
+ CHECK: [[BV2:%[0-9]+]]:_(<3 x s16>) = G_BUILD_VECTOR [[FREEZE5_E0]]:_(s16), [[FREEZE5_E1]]:_(s16), [[FREEZE5_E2]]:_(s16)
)";
// Check
@@ -3869,10 +3857,12 @@ TEST_F(AArch64GISelMITest, MoreElementsFreeze) {
const auto *CheckStr = R"(
CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY
CHECK: [[BITCAST:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[COPY]]
- CHECK: [[UNDEF:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF
- CHECK: [[CV:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[BITCAST]]:_(<2 x s32>), [[UNDEF]]
- CHECK: [[FREEZE:%[0-9]+]]:_(<4 x s32>) = G_FREEZE [[CV]]
- CHECK: [[EXTR0:%[0-9]+]]:_(<2 x s32>), [[EXTR1:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[FREEZE]]:_(<4 x s32>)
+ CHECK: [[BITCAST_E0:%[0-9]+]]:_(s32), [[BITCAST_E1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]]:_(<2 x s32>)
+ CHECK: [[IMP_DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ CHECK: [[BITCAST_LARGE:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[BITCAST_E0]]:_(s32), [[BITCAST_E1]]:_(s32), [[IMP_DEF]]:_(s32), [[IMP_DEF]]:_(s32)
+ CHECK: [[FREEZE:%[0-9]+]]:_(<4 x s32>) = G_FREEZE [[BITCAST_LARGE]]
+ CHECK: [[FREEZE_E0:%[0-9]+]]:_(s32), [[FREEZE_E1:%[0-9]+]]:_(s32), [[FREEZE_E2:%[0-9]+]]:_(s32), [[FREEZE_E3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FREEZE]]:_(<4 x s32>)
+ CHECK: (<2 x s32>) = G_BUILD_VECTOR [[FREEZE_E0]]:_(s32), [[FREEZE_E1]]:_(s32)
)";
// Check
@@ -4040,14 +4030,19 @@ TEST_F(AArch64GISelMITest, moreElementsShuffle) {
CHECK: [[COPY2:%[0-9]+]]:_(s64) = COPY
CHECK: [[BV1:%[0-9]+]]:_(<6 x s64>) = G_BUILD_VECTOR
CHECK: [[BV2:%[0-9]+]]:_(<6 x s64>) = G_BUILD_VECTOR
- CHECK: [[IMPDEF1:%[0-9]+]]:_(<8 x s64>) = G_IMPLICIT_DEF
- CHECK: [[INSERT1:%[0-9]+]]:_(<8 x s64>) = G_INSERT [[IMPDEF1]]:_, [[BV1]]:_(<6 x s64>), 0
- CHECK: [[IMPDEF2:%[0-9]+]]:_(<8 x s64>) = G_IMPLICIT_DEF
- CHECK: [[INSERT2:%[0-9]+]]:_(<8 x s64>) = G_INSERT [[IMPDEF2]]:_, [[BV2]]:_(<6 x s64>), 0
- CHECK: [[SHUF:%[0-9]+]]:_(<8 x s64>) = G_SHUFFLE_VECTOR [[INSERT1]]:_(<8 x s64>), [[INSERT2]]:_, shufflemask(3, 4, 9, 0, 1, 13, undef, undef)
- CHECK: [[IMPDEF3:%[0-9]+]]:_(<8 x s64>) = G_IMPLICIT_DEF
- CHECK: [[CONCAT:%[0-9]+]]:_(<24 x s64>) = G_CONCAT_VECTORS [[SHUF]]:_(<8 x s64>), [[IMPDEF3]]:_(<8 x s64>), [[IMPDEF3]]:_(<8 x s64>)
- CHECK: [[UNMERGE:%[0-9]+]]:_(<6 x s64>), [[UNMERGE2:%[0-9]+]]:_(<6 x s64>), [[UNMERGE3:%[0-9]+]]:_(<6 x s64>), [[UNMERGE4:%[0-9]+]]:_(<6 x s64>) = G_UNMERGE_VALUES [[CONCAT]]:_(<24 x s64>)
+
+ CHECK: [[BV1_E0:%[0-9]+]]:_(s64), [[BV1_E1:%[0-9]+]]:_(s64), [[BV1_E2:%[0-9]+]]:_(s64), [[BV1_E3:%[0-9]+]]:_(s64), [[BV1_E4:%[0-9]+]]:_(s64), [[BV1_E5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[BV1]]:_(<6 x s64>)
+ CHECK: [[IMP_DEF0:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+ CHECK: [[BV1_LARGE:%[0-9]+]]:_(<8 x s64>) = G_BUILD_VECTOR [[BV1_E0]]:_(s64), [[BV1_E1]]:_(s64), [[BV1_E2]]:_(s64), [[BV1_E3]]:_(s64), [[BV1_E4]]:_(s64), [[BV1_E5]]:_(s64), [[IMP_DEF0]]:_(s64), [[IMP_DEF0]]:_(s64)
+
+ CHECK: [[BV2_E0:%[0-9]+]]:_(s64), [[BV2_E1:%[0-9]+]]:_(s64), [[BV2_E2:%[0-9]+]]:_(s64), [[BV2_E3:%[0-9]+]]:_(s64), [[BV2_E4:%[0-9]+]]:_(s64), [[BV2_E5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[BV2]]:_(<6 x s64>)
+ CHECK: [[IMP_DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+ CHECK: [[BV2_LARGE:%[0-9]+]]:_(<8 x s64>) = G_BUILD_VECTOR [[BV2_E0]]:_(s64), [[BV2_E1]]:_(s64), [[BV2_E2]]:_(s64), [[BV2_E3]]:_(s64), [[BV2_E4]]:_(s64), [[BV2_E5]]:_(s64), [[IMP_DEF1]]:_(s64), [[IMP_DEF1]]:_(s64)
+
+ CHECK: [[SHUF:%[0-9]+]]:_(<8 x s64>) = G_SHUFFLE_VECTOR [[BV1_LARGE]]:_(<8 x s64>), [[BV2_LARGE]]:_, shufflemask(3, 4, 9, 0, 1, 13, undef, undef)
+
+ CHECK: [[SHUF_E0:%[0-9]+]]:_(s64), [[SHUF_E1:%[0-9]+]]:_(s64), [[SHUF_E2:%[0-9]+]]:_(s64), [[SHUF_E3:%[0-9]+]]:_(s64), [[SHUF_E4:%[0-9]+]]:_(s64), [[SHUF_E5:%[0-9]+]]:_(s64), [[SHUF_E6:%[0-9]+]]:_(s64), [[SHUF_E7:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[SHUF]]:_(<8 x s64>)
+ CHECK: (<6 x s64>) = G_BUILD_VECTOR [[SHUF_E0]]:_(s64), [[SHUF_E1]]:_(s64), [[SHUF_E2]]:_(s64), [[SHUF_E3]]:_(s64), [[SHUF_E4]]:_(s64), [[SHUF_E5]]:_(s64)
)";
// Check
More information about the llvm-commits
mailing list