[llvm] [AMDGPU] Legalize 64bit elements for BUILD_VECTOR on gfx942 (PR #145052)
Janek van Oirschot via llvm-commits
llvm-commits at lists.llvm.org
Wed Sep 17 05:24:03 PDT 2025
https://github.com/JanekvO updated https://github.com/llvm/llvm-project/pull/145052
>From 4af9f87f332154c6706f4f20ec6233015d950bdc Mon Sep 17 00:00:00 2001
From: Janek van Oirschot <Janek.vanOirschot at amd.com>
Date: Fri, 20 Jun 2025 03:24:29 -0700
Subject: [PATCH] [AMDGPU] Legalize 64bit elements for BUILD_VECTOR on gfx942
---
llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp | 27 +-
llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 160 +-
llvm/lib/Target/AMDGPU/SIISelLowering.h | 1 +
.../test/CodeGen/AMDGPU/a-v-flat-atomicrmw.ll | 32 +-
.../CodeGen/AMDGPU/a-v-global-atomicrmw.ll | 28 +-
llvm/test/CodeGen/AMDGPU/bf16.ll | 882 +-
.../AMDGPU/buffer-atomic-fadd.f32-no-rtn.ll | 239 +-
.../AMDGPU/buffer-atomic-fadd.f32-rtn.ll | 251 +-
.../CodeGen/AMDGPU/buffer-atomic-fadd.f64.ll | 832 +-
.../AMDGPU/buffer-atomic-fadd.v2f16-no-rtn.ll | 239 +-
.../AMDGPU/buffer-atomic-fadd.v2f16-rtn.ll | 251 +-
.../CodeGen/AMDGPU/calling-conventions.ll | 59 +-
llvm/test/CodeGen/AMDGPU/flat-scratch.ll | 42 +-
llvm/test/CodeGen/AMDGPU/fmaximum3.ll | 2 +-
llvm/test/CodeGen/AMDGPU/fminimum3.ll | 2 +-
llvm/test/CodeGen/AMDGPU/global-load-xcnt.ll | 6 +-
.../issue153808-extract-subvector-legalize.ll | 18 +-
.../CodeGen/AMDGPU/llvm.amdgcn.mfma.gfx90a.ll | 354 +-
llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.ll | 147 +-
llvm/test/CodeGen/AMDGPU/llvm.maximum.f64.ll | 590 +-
llvm/test/CodeGen/AMDGPU/llvm.minimum.f64.ll | 590 +-
llvm/test/CodeGen/AMDGPU/load-constant-i1.ll | 917 +-
llvm/test/CodeGen/AMDGPU/load-constant-i32.ll | 327 +-
.../CodeGen/AMDGPU/masked-load-vectortypes.ll | 75 +-
llvm/test/CodeGen/AMDGPU/maximumnum.ll | 1461 ++--
llvm/test/CodeGen/AMDGPU/mfma-loop.ll | 65 +-
llvm/test/CodeGen/AMDGPU/minimumnum.ll | 1461 ++--
llvm/test/CodeGen/AMDGPU/mul.ll | 27 +-
.../CodeGen/AMDGPU/no-fold-accvgpr-mov.ll | 18 +-
llvm/test/CodeGen/AMDGPU/packed-fp32.ll | 52 +-
llvm/test/CodeGen/AMDGPU/preload-kernargs.ll | 12 +-
.../AMDGPU/rewrite-vgpr-mfma-to-agpr.ll | 51 +-
.../test/CodeGen/AMDGPU/sgpr-to-vreg1-copy.ll | 11 +-
.../AMDGPU/shufflevector.v2i64.v2i64.ll | 234 +-
.../AMDGPU/shufflevector.v2i64.v3i64.ll | 575 +-
.../AMDGPU/shufflevector.v2i64.v4i64.ll | 1273 ++-
.../AMDGPU/shufflevector.v2i64.v8i64.ll | 4339 ++++++----
.../CodeGen/AMDGPU/shufflevector.v2p0.v2p0.ll | 234 +-
.../CodeGen/AMDGPU/shufflevector.v2p0.v3p0.ll | 575 +-
.../CodeGen/AMDGPU/shufflevector.v2p0.v4p0.ll | 1273 ++-
.../AMDGPU/shufflevector.v3i64.v2i64.ll | 663 +-
.../AMDGPU/shufflevector.v3i64.v3i64.ll | 1856 ++--
.../AMDGPU/shufflevector.v3i64.v4i64.ll | 4987 ++++++-----
.../CodeGen/AMDGPU/shufflevector.v3p0.v2p0.ll | 663 +-
.../CodeGen/AMDGPU/shufflevector.v3p0.v3p0.ll | 1856 ++--
.../CodeGen/AMDGPU/shufflevector.v3p0.v4p0.ll | 4987 ++++++-----
.../AMDGPU/shufflevector.v4i64.v2i64.ll | 2213 +++--
.../AMDGPU/shufflevector.v4i64.v3i64.ll | 2400 +++---
.../AMDGPU/shufflevector.v4i64.v4i64.ll | 7502 +++++++++--------
.../CodeGen/AMDGPU/shufflevector.v4p0.v2p0.ll | 2213 +++--
.../CodeGen/AMDGPU/shufflevector.v4p0.v3p0.ll | 2400 +++---
.../CodeGen/AMDGPU/shufflevector.v4p0.v4p0.ll | 7502 +++++++++--------
llvm/test/CodeGen/AMDGPU/smfmac_no_agprs.ll | 25 +-
llvm/test/CodeGen/AMDGPU/uint_to_fp.f64.ll | 32 +-
.../test/CodeGen/AMDGPU/vni8-across-blocks.ll | 5 +-
55 files changed, 33542 insertions(+), 23494 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
index c2fca79979e1b..41fa6a6dad98e 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -475,6 +475,8 @@ void AMDGPUDAGToDAGISel::SelectBuildVector(SDNode *N, unsigned RegClassID) {
EVT EltVT = VT.getVectorElementType();
SDLoc DL(N);
SDValue RegClass = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32);
+ unsigned NumRegs = EltVT.getSizeInBits() / 32;
+ bool IsGCN = TM.getTargetTriple().isAMDGCN();
if (NumVectorElts == 1) {
CurDAG->SelectNodeTo(N, AMDGPU::COPY_TO_REGCLASS, EltVT, N->getOperand(0),
@@ -482,7 +484,6 @@ void AMDGPUDAGToDAGISel::SelectBuildVector(SDNode *N, unsigned RegClassID) {
return;
}
- bool IsGCN = CurDAG->getSubtarget().getTargetTriple().isAMDGCN();
if (IsGCN && Subtarget->has64BitLiterals() && VT.getSizeInBits() == 64 &&
CurDAG->isConstantValueOfAnyType(SDValue(N, 0))) {
uint64_t C = 0;
@@ -511,8 +512,10 @@ void AMDGPUDAGToDAGISel::SelectBuildVector(SDNode *N, unsigned RegClassID) {
}
}
- assert(NumVectorElts <= 32 && "Vectors with more than 32 elements not "
- "supported yet");
+ assert(NumVectorElts <= 32 &&
+ "Vectors with more than 32 elements are not supported yet");
+ assert((IsGCN || (!IsGCN && NumRegs == 1)) &&
+ "R600 does not support 64-bit reg_seq elements");
// 32 = Max Num Vector Elements
// 2 = 2 REG_SEQUENCE operands per element (value, subreg index)
// 1 = Vector Register Class
@@ -527,8 +530,9 @@ void AMDGPUDAGToDAGISel::SelectBuildVector(SDNode *N, unsigned RegClassID) {
IsRegSeq = false;
break;
}
- unsigned Sub = IsGCN ? SIRegisterInfo::getSubRegFromChannel(i)
- : R600RegisterInfo::getSubRegFromChannel(i);
+ unsigned Sub =
+ IsGCN ? SIRegisterInfo::getSubRegFromChannel(i * NumRegs, NumRegs)
+ : R600RegisterInfo::getSubRegFromChannel(i);
RegSeqArgs[1 + (2 * i)] = N->getOperand(i);
RegSeqArgs[1 + (2 * i) + 1] = CurDAG->getTargetConstant(Sub, DL, MVT::i32);
}
@@ -538,8 +542,9 @@ void AMDGPUDAGToDAGISel::SelectBuildVector(SDNode *N, unsigned RegClassID) {
MachineSDNode *ImpDef = CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
DL, EltVT);
for (unsigned i = NOps; i < NumVectorElts; ++i) {
- unsigned Sub = IsGCN ? SIRegisterInfo::getSubRegFromChannel(i)
- : R600RegisterInfo::getSubRegFromChannel(i);
+ unsigned Sub =
+ IsGCN ? SIRegisterInfo::getSubRegFromChannel(i * NumRegs, NumRegs)
+ : R600RegisterInfo::getSubRegFromChannel(i);
RegSeqArgs[1 + (2 * i)] = SDValue(ImpDef, 0);
RegSeqArgs[1 + (2 * i) + 1] =
CurDAG->getTargetConstant(Sub, DL, MVT::i32);
@@ -707,9 +712,13 @@ void AMDGPUDAGToDAGISel::Select(SDNode *N) {
break;
}
- assert(VT.getVectorElementType().bitsEq(MVT::i32));
+ EVT VET = VT.getVectorElementType();
+ assert((VET.bitsEq(MVT::i32) || VET.bitsEq(MVT::i64)) &&
+ "Only 32-bit and 64-bit vector elements supported");
+ unsigned EltSize = VET.getSizeInBits();
unsigned RegClassID =
- SIRegisterInfo::getSGPRClassForBitWidth(NumVectorElts * 32)->getID();
+ SIRegisterInfo::getSGPRClassForBitWidth(NumVectorElts * EltSize)
+ ->getID();
SelectBuildVector(N, RegClassID);
return;
}
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 015f8fe49ebcf..452bbd735e57a 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -360,9 +360,10 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
// Most operations are naturally 32-bit vector operations. We only support
// load and store of i64 vectors, so promote v2i64 vector operations to v4i32.
for (MVT Vec64 : {MVT::v2i64, MVT::v2f64}) {
- setOperationAction(ISD::BUILD_VECTOR, Vec64, Promote);
- AddPromotedToType(ISD::BUILD_VECTOR, Vec64, MVT::v4i32);
-
+ if (!STI.hasMovB64()) {
+ setOperationAction(ISD::BUILD_VECTOR, Vec64, Promote);
+ AddPromotedToType(ISD::BUILD_VECTOR, Vec64, MVT::v4i32);
+ }
setOperationAction(ISD::EXTRACT_VECTOR_ELT, Vec64, Promote);
AddPromotedToType(ISD::EXTRACT_VECTOR_ELT, Vec64, MVT::v4i32);
@@ -374,9 +375,10 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
}
for (MVT Vec64 : {MVT::v3i64, MVT::v3f64}) {
- setOperationAction(ISD::BUILD_VECTOR, Vec64, Promote);
- AddPromotedToType(ISD::BUILD_VECTOR, Vec64, MVT::v6i32);
-
+ if (!STI.hasMovB64()) {
+ setOperationAction(ISD::BUILD_VECTOR, Vec64, Promote);
+ AddPromotedToType(ISD::BUILD_VECTOR, Vec64, MVT::v6i32);
+ }
setOperationAction(ISD::EXTRACT_VECTOR_ELT, Vec64, Promote);
AddPromotedToType(ISD::EXTRACT_VECTOR_ELT, Vec64, MVT::v6i32);
@@ -388,9 +390,10 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
}
for (MVT Vec64 : {MVT::v4i64, MVT::v4f64}) {
- setOperationAction(ISD::BUILD_VECTOR, Vec64, Promote);
- AddPromotedToType(ISD::BUILD_VECTOR, Vec64, MVT::v8i32);
-
+ if (!STI.hasMovB64()) {
+ setOperationAction(ISD::BUILD_VECTOR, Vec64, Promote);
+ AddPromotedToType(ISD::BUILD_VECTOR, Vec64, MVT::v8i32);
+ }
setOperationAction(ISD::EXTRACT_VECTOR_ELT, Vec64, Promote);
AddPromotedToType(ISD::EXTRACT_VECTOR_ELT, Vec64, MVT::v8i32);
@@ -402,9 +405,10 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
}
for (MVT Vec64 : {MVT::v8i64, MVT::v8f64}) {
- setOperationAction(ISD::BUILD_VECTOR, Vec64, Promote);
- AddPromotedToType(ISD::BUILD_VECTOR, Vec64, MVT::v16i32);
-
+ if (!STI.hasMovB64()) {
+ setOperationAction(ISD::BUILD_VECTOR, Vec64, Promote);
+ AddPromotedToType(ISD::BUILD_VECTOR, Vec64, MVT::v16i32);
+ }
setOperationAction(ISD::EXTRACT_VECTOR_ELT, Vec64, Promote);
AddPromotedToType(ISD::EXTRACT_VECTOR_ELT, Vec64, MVT::v16i32);
@@ -416,9 +420,10 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
}
for (MVT Vec64 : {MVT::v16i64, MVT::v16f64}) {
- setOperationAction(ISD::BUILD_VECTOR, Vec64, Promote);
- AddPromotedToType(ISD::BUILD_VECTOR, Vec64, MVT::v32i32);
-
+ if (!STI.hasMovB64()) {
+ setOperationAction(ISD::BUILD_VECTOR, Vec64, Promote);
+ AddPromotedToType(ISD::BUILD_VECTOR, Vec64, MVT::v32i32);
+ }
setOperationAction(ISD::EXTRACT_VECTOR_ELT, Vec64, Promote);
AddPromotedToType(ISD::EXTRACT_VECTOR_ELT, Vec64, MVT::v32i32);
@@ -977,6 +982,7 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
setTargetDAGCombine({ISD::ADD,
ISD::PTRADD,
+ ISD::BUILD_VECTOR,
ISD::UADDO_CARRY,
ISD::SUB,
ISD::USUBO_CARRY,
@@ -15103,6 +15109,18 @@ bool SITargetLowering::shouldExpandVectorDynExt(SDNode *N) const {
EltSize, NumElem, Idx->isDivergent(), getSubtarget());
}
+static unsigned getMappedVectorIndex(unsigned Idx, EVT From, EVT To) {
+ assert(From.isVector() && To.isVector() &&
+ "Expected From and To types to be vector types.");
+ assert(From.getSizeInBits() == To.getSizeInBits() &&
+ "Expected From and To vector types require to have the same size.");
+
+ unsigned FromNumElts = From.getVectorNumElements();
+ unsigned ToNumElts = To.getVectorNumElements();
+
+ return (Idx * ToNumElts) / FromNumElts;
+}
+
SDValue
SITargetLowering::performExtractVectorEltCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
@@ -15186,6 +15204,27 @@ SITargetLowering::performExtractVectorEltCombine(SDNode *N,
}
}
+ // if PeekThoughBitcast(Vec)[MapIdx(CIdx)] == undef &&
+ // VecEltSize < PeekThroughEltSize, then
+ // EXTRACT_VECTOR_ELT(bitcast(build_vector(..., undef, ...)), CIdx) => undef
+ auto *IndexC = dyn_cast<ConstantSDNode>(N->getOperand(1));
+ SDValue PeekThroughVec = peekThroughBitcasts(Vec);
+ EVT PeekThroughVecVT = PeekThroughVec.getValueType();
+ if (IndexC && PeekThroughVec.getOpcode() == ISD::BUILD_VECTOR &&
+ PeekThroughVecVT.isFixedLengthVector()) {
+ EVT PeekThroughVecEltVT = PeekThroughVecVT.getVectorElementType();
+ // Small elt size vectors to big elt size vectors are the cases covered for
+ // now (e.g., v4i32 bitcast(v2i64)) which may be conservative.
+ if (VecEltSize < PeekThroughVecEltVT.getSizeInBits()) {
+ unsigned IndexVal = IndexC->getZExtValue();
+ unsigned MappedIndexVal =
+ getMappedVectorIndex(IndexVal, VecVT, PeekThroughVecVT);
+ SDValue PeekThroughElt = PeekThroughVec.getOperand(MappedIndexVal);
+ if (PeekThroughElt.isUndef())
+ return DAG.getNode(PeekThroughElt.getOpcode(), SDLoc(), VecEltVT);
+ }
+ }
+
// EXTRACT_VECTOR_ELT (<n x e>, var-idx) => n x select (e, const-idx)
if (shouldExpandVectorDynExt(N)) {
SDLoc SL(N);
@@ -16792,6 +16831,95 @@ SDValue SITargetLowering::performSelectCombine(SDNode *N,
SelectLHS, SelectRHS);
}
+SDValue
+SITargetLowering::performBuildVectorCombine(SDNode *N,
+ DAGCombinerInfo &DCI) const {
+ // TODO: legalize for all targets instead of just v_mov_b64 enabled ones,
+ // legalizing could still enable s_mov_b64 which is supported on all targets.
+ const GCNSubtarget *ST = getSubtarget();
+ if (DCI.Level < AfterLegalizeDAG || !ST->hasMovB64())
+ return SDValue();
+
+ SelectionDAG &DAG = DCI.DAG;
+ SDLoc SL(N);
+
+ EVT VT = N->getValueType(0);
+ EVT EltVT = VT.getVectorElementType();
+ unsigned SizeBits = VT.getSizeInBits();
+ unsigned EltSize = EltVT.getSizeInBits();
+
+ // Skip if:
+ // - Value type isn't multiple of 64 bit (e.g., v3i32), or
+ // - Element type has already been combined into 64b elements
+ if ((SizeBits % 64) != 0 || EltVT == MVT::i64 || EltVT == MVT::f64)
+ return SDValue();
+
+ // Construct the 64b values.
+ SmallVector<uint64_t, 8> ImmVals;
+ uint64_t ImmVal = 0;
+ uint64_t ImmSize = 0;
+ for (SDValue Opand : N->ops()) {
+ // Build_vector with constants only.
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(Opand);
+ ConstantFPSDNode *FPC = dyn_cast<ConstantFPSDNode>(Opand);
+ BuildVectorSDNode *BV =
+ dyn_cast<BuildVectorSDNode>(peekThroughBitcasts(Opand));
+
+ if (!C && !FPC && !BV)
+ return SDValue();
+
+ uint64_t Val = 0;
+ if (BV) {
+ if (!BV->isConstant())
+ return SDValue();
+ bool IsLE = DAG.getDataLayout().isLittleEndian();
+ BitVector UndefElements;
+ SmallVector<APInt> RawBits;
+ if (!BV->getConstantRawBits(IsLE, EltSize, RawBits, UndefElements))
+ return SDValue();
+
+ assert(RawBits.size() == 1 &&
+ "BuildVector constant value retrieval expected 1 element");
+
+ if (UndefElements.any())
+ return SDValue();
+
+ Val = RawBits[0].getZExtValue();
+ } else {
+ Val = C ? C->getZExtValue()
+ : FPC->getValueAPF().bitcastToAPInt().getZExtValue();
+ }
+ ImmVal |= Val << ImmSize;
+ ImmSize += EltSize;
+ if (ImmSize == 64) {
+ if (!isUInt<32>(ImmVal))
+ return SDValue();
+ ImmVals.push_back(ImmVal);
+ ImmVal = 0;
+ ImmSize = 0;
+ }
+ }
+
+ // Avoid emitting build_vector with 1 element and directly emit value.
+ if (ImmVals.size() == 1) {
+ SDValue Val = DAG.getConstant(ImmVals[0], SL, MVT::i64);
+ return DAG.getBitcast(VT, Val);
+ }
+
+ // Construct and return build_vector with 64b elements.
+ if (!ImmVals.empty()) {
+ SmallVector<SDValue, 8> VectorConsts(ImmVals.size());
+ for (unsigned i = 0; i < ImmVals.size(); ++i)
+ VectorConsts[i] = DAG.getConstant(ImmVals[i], SL, MVT::i64);
+ unsigned NewNumElts = SizeBits / 64;
+ LLVMContext &Ctx = *DAG.getContext();
+ EVT NewVT = EVT::getVectorVT(Ctx, MVT::i64, NewNumElts);
+ SDValue BV = DAG.getBuildVector(NewVT, SL, VectorConsts);
+ return DAG.getBitcast(VT, BV);
+ }
+ return SDValue();
+}
+
SDValue SITargetLowering::PerformDAGCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
switch (N->getOpcode()) {
@@ -16885,6 +17013,8 @@ SDValue SITargetLowering::PerformDAGCombine(SDNode *N,
return performFCanonicalizeCombine(N, DCI);
case AMDGPUISD::RCP:
return performRcpCombine(N, DCI);
+ case ISD::BUILD_VECTOR:
+ return performBuildVectorCombine(N, DCI);
case ISD::FLDEXP:
case AMDGPUISD::FRACT:
case AMDGPUISD::RSQ:
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.h b/llvm/lib/Target/AMDGPU/SIISelLowering.h
index ba408a8f64540..326c27cbc6e9f 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.h
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.h
@@ -245,6 +245,7 @@ class SITargetLowering final : public AMDGPUTargetLowering {
SDValue performCvtF32UByteNCombine(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue performClampCombine(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue performRcpCombine(SDNode *N, DAGCombinerInfo &DCI) const;
+ SDValue performBuildVectorCombine(SDNode *N, DAGCombinerInfo &DCI) const;
bool isLegalMUBUFAddressingMode(const AddrMode &AM) const;
diff --git a/llvm/test/CodeGen/AMDGPU/a-v-flat-atomicrmw.ll b/llvm/test/CodeGen/AMDGPU/a-v-flat-atomicrmw.ll
index d053425afbb6d..51731b1d4dcdf 100644
--- a/llvm/test/CodeGen/AMDGPU/a-v-flat-atomicrmw.ll
+++ b/llvm/test/CodeGen/AMDGPU/a-v-flat-atomicrmw.ll
@@ -10257,48 +10257,48 @@ define void @flat_atomic_fsub_f64_ret_av_av(ptr %ptr) #0 {
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX950-NEXT: s_mov_b64 s[2:3], 0x50
; GFX950-NEXT: s_mov_b64 s[0:1], src_private_base
-; GFX950-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, s[2:3]
-; GFX950-NEXT: v_cmp_ne_u32_e32 vcc, s1, v1
+; GFX950-NEXT: v_lshl_add_u64 v[2:3], v[0:1], 0, s[2:3]
+; GFX950-NEXT: v_cmp_ne_u32_e32 vcc, s1, v3
; GFX950-NEXT: ;;#ASMSTART
-; GFX950-NEXT: ; def v[2:3]
+; GFX950-NEXT: ; def v[4:5]
; GFX950-NEXT: ;;#ASMEND
-; GFX950-NEXT: ; implicit-def: $vgpr4_vgpr5
+; GFX950-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX950-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX950-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
; GFX950-NEXT: s_cbranch_execz .LBB130_4
; GFX950-NEXT: ; %bb.1: ; %atomicrmw.global
-; GFX950-NEXT: flat_load_dwordx2 v[4:5], v[0:1]
+; GFX950-NEXT: flat_load_dwordx2 v[0:1], v[2:3]
; GFX950-NEXT: s_mov_b64 s[2:3], 0
; GFX950-NEXT: .LBB130_2: ; %atomicrmw.start
; GFX950-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX950-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX950-NEXT: v_mov_b64_e32 v[6:7], v[4:5]
-; GFX950-NEXT: v_add_f64 v[4:5], v[6:7], -v[2:3]
-; GFX950-NEXT: flat_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7] sc0
+; GFX950-NEXT: v_mov_b64_e32 v[8:9], v[0:1]
+; GFX950-NEXT: v_add_f64 v[6:7], v[8:9], -v[4:5]
+; GFX950-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[2:3], v[6:9] sc0
; GFX950-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX950-NEXT: v_cmp_eq_u64_e32 vcc, v[4:5], v[6:7]
+; GFX950-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[8:9]
; GFX950-NEXT: s_or_b64 s[2:3], vcc, s[2:3]
; GFX950-NEXT: s_andn2_b64 exec, exec, s[2:3]
; GFX950-NEXT: s_cbranch_execnz .LBB130_2
; GFX950-NEXT: ; %bb.3: ; %Flow
; GFX950-NEXT: s_or_b64 exec, exec, s[2:3]
-; GFX950-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX950-NEXT: ; implicit-def: $vgpr2_vgpr3
+; GFX950-NEXT: ; implicit-def: $vgpr4_vgpr5
; GFX950-NEXT: .LBB130_4: ; %Flow3
; GFX950-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1]
; GFX950-NEXT: s_cbranch_execz .LBB130_6
; GFX950-NEXT: ; %bb.5: ; %atomicrmw.private
-; GFX950-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[0:1]
+; GFX950-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[2:3]
; GFX950-NEXT: s_nop 1
-; GFX950-NEXT: v_cndmask_b32_e32 v6, -1, v0, vcc
-; GFX950-NEXT: scratch_load_dwordx2 v[4:5], v6, off
+; GFX950-NEXT: v_cndmask_b32_e32 v6, -1, v2, vcc
+; GFX950-NEXT: scratch_load_dwordx2 v[0:1], v6, off
; GFX950-NEXT: s_waitcnt vmcnt(0)
-; GFX950-NEXT: v_add_f64 v[0:1], v[4:5], -v[2:3]
-; GFX950-NEXT: scratch_store_dwordx2 v6, v[0:1], off
+; GFX950-NEXT: v_add_f64 v[2:3], v[0:1], -v[4:5]
+; GFX950-NEXT: scratch_store_dwordx2 v6, v[2:3], off
; GFX950-NEXT: .LBB130_6: ; %atomicrmw.phi
; GFX950-NEXT: s_or_b64 exec, exec, s[0:1]
; GFX950-NEXT: ;;#ASMSTART
-; GFX950-NEXT: ; use v[4:5]
+; GFX950-NEXT: ; use v[0:1]
; GFX950-NEXT: ;;#ASMEND
; GFX950-NEXT: s_waitcnt vmcnt(0)
; GFX950-NEXT: s_setpc_b64 s[30:31]
diff --git a/llvm/test/CodeGen/AMDGPU/a-v-global-atomicrmw.ll b/llvm/test/CodeGen/AMDGPU/a-v-global-atomicrmw.ll
index c98fff96d7b8a..da976d72b837e 100644
--- a/llvm/test/CodeGen/AMDGPU/a-v-global-atomicrmw.ll
+++ b/llvm/test/CodeGen/AMDGPU/a-v-global-atomicrmw.ll
@@ -6823,26 +6823,26 @@ define void @global_atomic_fsub_f64_ret_av_av(ptr addrspace(1) %ptr) #0 {
; GFX950-LABEL: global_atomic_fsub_f64_ret_av_av:
; GFX950: ; %bb.0:
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-NEXT: global_load_dwordx2 v[4:5], v[0:1], off offset:80
+; GFX950-NEXT: global_load_dwordx2 v[2:3], v[0:1], off offset:80
; GFX950-NEXT: s_mov_b64 s[0:1], 0
; GFX950-NEXT: ;;#ASMSTART
-; GFX950-NEXT: ; def v[2:3]
+; GFX950-NEXT: ; def v[4:5]
; GFX950-NEXT: ;;#ASMEND
; GFX950-NEXT: .LBB130_1: ; %atomicrmw.start
; GFX950-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX950-NEXT: s_waitcnt vmcnt(0)
-; GFX950-NEXT: v_mov_b64_e32 v[6:7], v[4:5]
-; GFX950-NEXT: v_add_f64 v[4:5], v[6:7], -v[2:3]
-; GFX950-NEXT: global_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7], off offset:80 sc0
+; GFX950-NEXT: v_mov_b64_e32 v[8:9], v[2:3]
+; GFX950-NEXT: v_add_f64 v[6:7], v[8:9], -v[4:5]
+; GFX950-NEXT: global_atomic_cmpswap_x2 v[2:3], v[0:1], v[6:9], off offset:80 sc0
; GFX950-NEXT: s_waitcnt vmcnt(0)
-; GFX950-NEXT: v_cmp_eq_u64_e32 vcc, v[4:5], v[6:7]
+; GFX950-NEXT: v_cmp_eq_u64_e32 vcc, v[2:3], v[8:9]
; GFX950-NEXT: s_or_b64 s[0:1], vcc, s[0:1]
; GFX950-NEXT: s_andn2_b64 exec, exec, s[0:1]
; GFX950-NEXT: s_cbranch_execnz .LBB130_1
; GFX950-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX950-NEXT: s_or_b64 exec, exec, s[0:1]
; GFX950-NEXT: ;;#ASMSTART
-; GFX950-NEXT: ; use v[4:5]
+; GFX950-NEXT: ; use v[2:3]
; GFX950-NEXT: ;;#ASMEND
; GFX950-NEXT: s_setpc_b64 s[30:31]
%gep.0 = getelementptr inbounds [512 x double], ptr addrspace(1) %ptr, i64 0, i64 10
@@ -12626,26 +12626,26 @@ define void @global_atomic_fsub_f64_saddr_ret_av_av(ptr addrspace(1) inreg %ptr)
; GFX950: ; %bb.0:
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX950-NEXT: v_mov_b32_e32 v4, 0
-; GFX950-NEXT: global_load_dwordx2 v[2:3], v4, s[0:1] offset:80
+; GFX950-NEXT: global_load_dwordx2 v[0:1], v4, s[0:1] offset:80
; GFX950-NEXT: s_mov_b64 s[2:3], 0
; GFX950-NEXT: ;;#ASMSTART
-; GFX950-NEXT: ; def v[0:1]
+; GFX950-NEXT: ; def v[2:3]
; GFX950-NEXT: ;;#ASMEND
; GFX950-NEXT: .LBB238_1: ; %atomicrmw.start
; GFX950-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX950-NEXT: s_waitcnt vmcnt(0)
-; GFX950-NEXT: v_mov_b64_e32 v[8:9], v[2:3]
-; GFX950-NEXT: v_add_f64 v[6:7], v[8:9], -v[0:1]
-; GFX950-NEXT: global_atomic_cmpswap_x2 v[2:3], v4, v[6:9], s[0:1] offset:80 sc0
+; GFX950-NEXT: v_mov_b64_e32 v[8:9], v[0:1]
+; GFX950-NEXT: v_add_f64 v[6:7], v[8:9], -v[2:3]
+; GFX950-NEXT: global_atomic_cmpswap_x2 v[0:1], v4, v[6:9], s[0:1] offset:80 sc0
; GFX950-NEXT: s_waitcnt vmcnt(0)
-; GFX950-NEXT: v_cmp_eq_u64_e32 vcc, v[2:3], v[8:9]
+; GFX950-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[8:9]
; GFX950-NEXT: s_or_b64 s[2:3], vcc, s[2:3]
; GFX950-NEXT: s_andn2_b64 exec, exec, s[2:3]
; GFX950-NEXT: s_cbranch_execnz .LBB238_1
; GFX950-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX950-NEXT: s_or_b64 exec, exec, s[2:3]
; GFX950-NEXT: ;;#ASMSTART
-; GFX950-NEXT: ; use v[2:3]
+; GFX950-NEXT: ; use v[0:1]
; GFX950-NEXT: ;;#ASMEND
; GFX950-NEXT: s_setpc_b64 s[30:31]
%gep.0 = getelementptr inbounds [512 x double], ptr addrspace(1) %ptr, i64 0, i64 10
diff --git a/llvm/test/CodeGen/AMDGPU/bf16.ll b/llvm/test/CodeGen/AMDGPU/bf16.ll
index 44c719f3635c8..8357ffab5e5da 100644
--- a/llvm/test/CodeGen/AMDGPU/bf16.ll
+++ b/llvm/test/CodeGen/AMDGPU/bf16.ll
@@ -6952,50 +6952,95 @@ define <32 x float> @global_extload_v32bf16_to_v32f32(ptr addrspace(1) %ptr) {
; GFX8-NEXT: v_and_b32_e32 v31, 0xffff0000, v31
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: global_extload_v32bf16_to_v32f32:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: global_load_dwordx4 v[4:7], v[0:1], off
-; GFX9-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:16
-; GFX9-NEXT: global_load_dwordx4 v[20:23], v[0:1], off offset:32
-; GFX9-NEXT: global_load_dwordx4 v[28:31], v[0:1], off offset:48
-; GFX9-NEXT: s_waitcnt vmcnt(3)
-; GFX9-NEXT: v_lshlrev_b32_e32 v0, 16, v4
-; GFX9-NEXT: v_and_b32_e32 v1, 0xffff0000, v4
-; GFX9-NEXT: v_lshlrev_b32_e32 v2, 16, v5
-; GFX9-NEXT: v_and_b32_e32 v3, 0xffff0000, v5
-; GFX9-NEXT: v_lshlrev_b32_e32 v4, 16, v6
-; GFX9-NEXT: v_and_b32_e32 v5, 0xffff0000, v6
-; GFX9-NEXT: v_lshlrev_b32_e32 v6, 16, v7
-; GFX9-NEXT: v_and_b32_e32 v7, 0xffff0000, v7
-; GFX9-NEXT: s_waitcnt vmcnt(2)
-; GFX9-NEXT: v_lshlrev_b32_e32 v8, 16, v12
-; GFX9-NEXT: v_and_b32_e32 v9, 0xffff0000, v12
-; GFX9-NEXT: v_lshlrev_b32_e32 v10, 16, v13
-; GFX9-NEXT: v_and_b32_e32 v11, 0xffff0000, v13
-; GFX9-NEXT: v_lshlrev_b32_e32 v12, 16, v14
-; GFX9-NEXT: v_and_b32_e32 v13, 0xffff0000, v14
-; GFX9-NEXT: v_lshlrev_b32_e32 v14, 16, v15
-; GFX9-NEXT: v_and_b32_e32 v15, 0xffff0000, v15
-; GFX9-NEXT: s_waitcnt vmcnt(1)
-; GFX9-NEXT: v_lshlrev_b32_e32 v16, 16, v20
-; GFX9-NEXT: v_and_b32_e32 v17, 0xffff0000, v20
-; GFX9-NEXT: v_lshlrev_b32_e32 v18, 16, v21
-; GFX9-NEXT: v_and_b32_e32 v19, 0xffff0000, v21
-; GFX9-NEXT: v_lshlrev_b32_e32 v20, 16, v22
-; GFX9-NEXT: v_and_b32_e32 v21, 0xffff0000, v22
-; GFX9-NEXT: v_lshlrev_b32_e32 v22, 16, v23
-; GFX9-NEXT: v_and_b32_e32 v23, 0xffff0000, v23
-; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: v_lshlrev_b32_e32 v24, 16, v28
-; GFX9-NEXT: v_and_b32_e32 v25, 0xffff0000, v28
-; GFX9-NEXT: v_lshlrev_b32_e32 v26, 16, v29
-; GFX9-NEXT: v_and_b32_e32 v27, 0xffff0000, v29
-; GFX9-NEXT: v_lshlrev_b32_e32 v28, 16, v30
-; GFX9-NEXT: v_and_b32_e32 v29, 0xffff0000, v30
-; GFX9-NEXT: v_lshlrev_b32_e32 v30, 16, v31
-; GFX9-NEXT: v_and_b32_e32 v31, 0xffff0000, v31
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: global_extload_v32bf16_to_v32f32:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: global_load_dwordx4 v[4:7], v[0:1], off
+; GFX900-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:16
+; GFX900-NEXT: global_load_dwordx4 v[20:23], v[0:1], off offset:32
+; GFX900-NEXT: global_load_dwordx4 v[28:31], v[0:1], off offset:48
+; GFX900-NEXT: s_waitcnt vmcnt(3)
+; GFX900-NEXT: v_lshlrev_b32_e32 v0, 16, v4
+; GFX900-NEXT: v_and_b32_e32 v1, 0xffff0000, v4
+; GFX900-NEXT: v_lshlrev_b32_e32 v2, 16, v5
+; GFX900-NEXT: v_and_b32_e32 v3, 0xffff0000, v5
+; GFX900-NEXT: v_lshlrev_b32_e32 v4, 16, v6
+; GFX900-NEXT: v_and_b32_e32 v5, 0xffff0000, v6
+; GFX900-NEXT: v_lshlrev_b32_e32 v6, 16, v7
+; GFX900-NEXT: v_and_b32_e32 v7, 0xffff0000, v7
+; GFX900-NEXT: s_waitcnt vmcnt(2)
+; GFX900-NEXT: v_lshlrev_b32_e32 v8, 16, v12
+; GFX900-NEXT: v_and_b32_e32 v9, 0xffff0000, v12
+; GFX900-NEXT: v_lshlrev_b32_e32 v10, 16, v13
+; GFX900-NEXT: v_and_b32_e32 v11, 0xffff0000, v13
+; GFX900-NEXT: v_lshlrev_b32_e32 v12, 16, v14
+; GFX900-NEXT: v_and_b32_e32 v13, 0xffff0000, v14
+; GFX900-NEXT: v_lshlrev_b32_e32 v14, 16, v15
+; GFX900-NEXT: v_and_b32_e32 v15, 0xffff0000, v15
+; GFX900-NEXT: s_waitcnt vmcnt(1)
+; GFX900-NEXT: v_lshlrev_b32_e32 v16, 16, v20
+; GFX900-NEXT: v_and_b32_e32 v17, 0xffff0000, v20
+; GFX900-NEXT: v_lshlrev_b32_e32 v18, 16, v21
+; GFX900-NEXT: v_and_b32_e32 v19, 0xffff0000, v21
+; GFX900-NEXT: v_lshlrev_b32_e32 v20, 16, v22
+; GFX900-NEXT: v_and_b32_e32 v21, 0xffff0000, v22
+; GFX900-NEXT: v_lshlrev_b32_e32 v22, 16, v23
+; GFX900-NEXT: v_and_b32_e32 v23, 0xffff0000, v23
+; GFX900-NEXT: s_waitcnt vmcnt(0)
+; GFX900-NEXT: v_lshlrev_b32_e32 v24, 16, v28
+; GFX900-NEXT: v_and_b32_e32 v25, 0xffff0000, v28
+; GFX900-NEXT: v_lshlrev_b32_e32 v26, 16, v29
+; GFX900-NEXT: v_and_b32_e32 v27, 0xffff0000, v29
+; GFX900-NEXT: v_lshlrev_b32_e32 v28, 16, v30
+; GFX900-NEXT: v_and_b32_e32 v29, 0xffff0000, v30
+; GFX900-NEXT: v_lshlrev_b32_e32 v30, 16, v31
+; GFX900-NEXT: v_and_b32_e32 v31, 0xffff0000, v31
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: global_extload_v32bf16_to_v32f32:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: global_load_dwordx4 v[2:5], v[0:1], off offset:16
+; GFX950-NEXT: global_load_dwordx4 v[20:23], v[0:1], off offset:32
+; GFX950-NEXT: global_load_dwordx4 v[28:31], v[0:1], off offset:48
+; GFX950-NEXT: global_load_dwordx4 v[32:35], v[0:1], off
+; GFX950-NEXT: s_waitcnt vmcnt(3)
+; GFX950-NEXT: v_lshlrev_b32_e32 v8, 16, v2
+; GFX950-NEXT: v_and_b32_e32 v9, 0xffff0000, v2
+; GFX950-NEXT: v_lshlrev_b32_e32 v10, 16, v3
+; GFX950-NEXT: v_and_b32_e32 v11, 0xffff0000, v3
+; GFX950-NEXT: v_lshlrev_b32_e32 v12, 16, v4
+; GFX950-NEXT: v_and_b32_e32 v13, 0xffff0000, v4
+; GFX950-NEXT: v_lshlrev_b32_e32 v14, 16, v5
+; GFX950-NEXT: v_and_b32_e32 v15, 0xffff0000, v5
+; GFX950-NEXT: s_waitcnt vmcnt(2)
+; GFX950-NEXT: v_lshlrev_b32_e32 v16, 16, v20
+; GFX950-NEXT: v_and_b32_e32 v17, 0xffff0000, v20
+; GFX950-NEXT: v_lshlrev_b32_e32 v18, 16, v21
+; GFX950-NEXT: v_and_b32_e32 v19, 0xffff0000, v21
+; GFX950-NEXT: v_lshlrev_b32_e32 v20, 16, v22
+; GFX950-NEXT: v_and_b32_e32 v21, 0xffff0000, v22
+; GFX950-NEXT: v_lshlrev_b32_e32 v22, 16, v23
+; GFX950-NEXT: v_and_b32_e32 v23, 0xffff0000, v23
+; GFX950-NEXT: s_waitcnt vmcnt(1)
+; GFX950-NEXT: v_lshlrev_b32_e32 v24, 16, v28
+; GFX950-NEXT: v_and_b32_e32 v25, 0xffff0000, v28
+; GFX950-NEXT: v_lshlrev_b32_e32 v26, 16, v29
+; GFX950-NEXT: v_and_b32_e32 v27, 0xffff0000, v29
+; GFX950-NEXT: v_lshlrev_b32_e32 v28, 16, v30
+; GFX950-NEXT: v_and_b32_e32 v29, 0xffff0000, v30
+; GFX950-NEXT: v_lshlrev_b32_e32 v30, 16, v31
+; GFX950-NEXT: v_and_b32_e32 v31, 0xffff0000, v31
+; GFX950-NEXT: s_waitcnt vmcnt(0)
+; GFX950-NEXT: v_lshlrev_b32_e32 v0, 16, v32
+; GFX950-NEXT: v_and_b32_e32 v1, 0xffff0000, v32
+; GFX950-NEXT: v_lshlrev_b32_e32 v2, 16, v33
+; GFX950-NEXT: v_and_b32_e32 v3, 0xffff0000, v33
+; GFX950-NEXT: v_lshlrev_b32_e32 v4, 16, v34
+; GFX950-NEXT: v_and_b32_e32 v5, 0xffff0000, v34
+; GFX950-NEXT: v_lshlrev_b32_e32 v6, 16, v35
+; GFX950-NEXT: v_and_b32_e32 v7, 0xffff0000, v35
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: global_extload_v32bf16_to_v32f32:
; GFX10: ; %bb.0:
@@ -7151,10 +7196,10 @@ define <2 x double> @global_extload_v2bf16_to_v2f64(ptr addrspace(1) %ptr) {
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX950-NEXT: global_load_dword v0, v[0:1], off
; GFX950-NEXT: s_waitcnt vmcnt(0)
-; GFX950-NEXT: v_lshlrev_b32_e32 v1, 16, v0
-; GFX950-NEXT: v_and_b32_e32 v2, 0xffff0000, v0
-; GFX950-NEXT: v_cvt_f64_f32_e32 v[0:1], v1
-; GFX950-NEXT: v_cvt_f64_f32_e32 v[2:3], v2
+; GFX950-NEXT: v_and_b32_e32 v1, 0xffff0000, v0
+; GFX950-NEXT: v_lshlrev_b32_e32 v0, 16, v0
+; GFX950-NEXT: v_cvt_f64_f32_e32 v[2:3], v1
+; GFX950-NEXT: v_cvt_f64_f32_e32 v[0:1], v0
; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: global_extload_v2bf16_to_v2f64:
@@ -7250,12 +7295,12 @@ define <3 x double> @global_extload_v3bf16_to_v3f64(ptr addrspace(1) %ptr) {
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX950-NEXT: global_load_dwordx2 v[0:1], v[0:1], off
; GFX950-NEXT: s_waitcnt vmcnt(0)
-; GFX950-NEXT: v_lshlrev_b32_e32 v2, 16, v0
-; GFX950-NEXT: v_and_b32_e32 v3, 0xffff0000, v0
-; GFX950-NEXT: v_lshlrev_b32_e32 v4, 16, v1
-; GFX950-NEXT: v_cvt_f64_f32_e32 v[0:1], v2
-; GFX950-NEXT: v_cvt_f64_f32_e32 v[2:3], v3
-; GFX950-NEXT: v_cvt_f64_f32_e32 v[4:5], v4
+; GFX950-NEXT: v_lshlrev_b32_e32 v1, 16, v1
+; GFX950-NEXT: v_and_b32_e32 v2, 0xffff0000, v0
+; GFX950-NEXT: v_lshlrev_b32_e32 v0, 16, v0
+; GFX950-NEXT: v_cvt_f64_f32_e32 v[4:5], v1
+; GFX950-NEXT: v_cvt_f64_f32_e32 v[2:3], v2
+; GFX950-NEXT: v_cvt_f64_f32_e32 v[0:1], v0
; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: global_extload_v3bf16_to_v3f64:
@@ -7344,20 +7389,35 @@ define <4 x double> @global_extload_v4bf16_to_v4f64(ptr addrspace(1) %ptr) {
; GFX8-NEXT: v_cvt_f64_f32_e32 v[6:7], v6
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: global_extload_v4bf16_to_v4f64:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off
-; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: v_lshlrev_b32_e32 v2, 16, v0
-; GFX9-NEXT: v_and_b32_e32 v3, 0xffff0000, v0
-; GFX9-NEXT: v_lshlrev_b32_e32 v4, 16, v1
-; GFX9-NEXT: v_and_b32_e32 v6, 0xffff0000, v1
-; GFX9-NEXT: v_cvt_f64_f32_e32 v[0:1], v2
-; GFX9-NEXT: v_cvt_f64_f32_e32 v[2:3], v3
-; GFX9-NEXT: v_cvt_f64_f32_e32 v[4:5], v4
-; GFX9-NEXT: v_cvt_f64_f32_e32 v[6:7], v6
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: global_extload_v4bf16_to_v4f64:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: global_load_dwordx2 v[0:1], v[0:1], off
+; GFX900-NEXT: s_waitcnt vmcnt(0)
+; GFX900-NEXT: v_lshlrev_b32_e32 v2, 16, v0
+; GFX900-NEXT: v_and_b32_e32 v3, 0xffff0000, v0
+; GFX900-NEXT: v_lshlrev_b32_e32 v4, 16, v1
+; GFX900-NEXT: v_and_b32_e32 v6, 0xffff0000, v1
+; GFX900-NEXT: v_cvt_f64_f32_e32 v[0:1], v2
+; GFX900-NEXT: v_cvt_f64_f32_e32 v[2:3], v3
+; GFX900-NEXT: v_cvt_f64_f32_e32 v[4:5], v4
+; GFX900-NEXT: v_cvt_f64_f32_e32 v[6:7], v6
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: global_extload_v4bf16_to_v4f64:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: global_load_dwordx2 v[0:1], v[0:1], off
+; GFX950-NEXT: s_waitcnt vmcnt(0)
+; GFX950-NEXT: v_and_b32_e32 v2, 0xffff0000, v1
+; GFX950-NEXT: v_lshlrev_b32_e32 v1, 16, v1
+; GFX950-NEXT: v_and_b32_e32 v3, 0xffff0000, v0
+; GFX950-NEXT: v_lshlrev_b32_e32 v0, 16, v0
+; GFX950-NEXT: v_cvt_f64_f32_e32 v[6:7], v2
+; GFX950-NEXT: v_cvt_f64_f32_e32 v[4:5], v1
+; GFX950-NEXT: v_cvt_f64_f32_e32 v[2:3], v3
+; GFX950-NEXT: v_cvt_f64_f32_e32 v[0:1], v0
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: global_extload_v4bf16_to_v4f64:
; GFX10: ; %bb.0:
@@ -7459,22 +7519,39 @@ define <5 x double> @global_extload_v5bf16_to_v5f64(ptr addrspace(1) %ptr) {
; GFX8-NEXT: v_cvt_f64_f32_e32 v[8:9], v8
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: global_extload_v5bf16_to_v5f64:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
-; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: v_lshlrev_b32_e32 v3, 16, v0
-; GFX9-NEXT: v_and_b32_e32 v4, 0xffff0000, v0
-; GFX9-NEXT: v_lshlrev_b32_e32 v5, 16, v1
-; GFX9-NEXT: v_and_b32_e32 v6, 0xffff0000, v1
-; GFX9-NEXT: v_lshlrev_b32_e32 v8, 16, v2
-; GFX9-NEXT: v_cvt_f64_f32_e32 v[0:1], v3
-; GFX9-NEXT: v_cvt_f64_f32_e32 v[2:3], v4
-; GFX9-NEXT: v_cvt_f64_f32_e32 v[4:5], v5
-; GFX9-NEXT: v_cvt_f64_f32_e32 v[6:7], v6
-; GFX9-NEXT: v_cvt_f64_f32_e32 v[8:9], v8
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: global_extload_v5bf16_to_v5f64:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX900-NEXT: s_waitcnt vmcnt(0)
+; GFX900-NEXT: v_lshlrev_b32_e32 v3, 16, v0
+; GFX900-NEXT: v_and_b32_e32 v4, 0xffff0000, v0
+; GFX900-NEXT: v_lshlrev_b32_e32 v5, 16, v1
+; GFX900-NEXT: v_and_b32_e32 v6, 0xffff0000, v1
+; GFX900-NEXT: v_lshlrev_b32_e32 v8, 16, v2
+; GFX900-NEXT: v_cvt_f64_f32_e32 v[0:1], v3
+; GFX900-NEXT: v_cvt_f64_f32_e32 v[2:3], v4
+; GFX900-NEXT: v_cvt_f64_f32_e32 v[4:5], v5
+; GFX900-NEXT: v_cvt_f64_f32_e32 v[6:7], v6
+; GFX900-NEXT: v_cvt_f64_f32_e32 v[8:9], v8
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: global_extload_v5bf16_to_v5f64:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX950-NEXT: s_waitcnt vmcnt(0)
+; GFX950-NEXT: v_lshlrev_b32_e32 v2, 16, v2
+; GFX950-NEXT: v_and_b32_e32 v3, 0xffff0000, v1
+; GFX950-NEXT: v_lshlrev_b32_e32 v1, 16, v1
+; GFX950-NEXT: v_and_b32_e32 v10, 0xffff0000, v0
+; GFX950-NEXT: v_lshlrev_b32_e32 v0, 16, v0
+; GFX950-NEXT: v_cvt_f64_f32_e32 v[8:9], v2
+; GFX950-NEXT: v_cvt_f64_f32_e32 v[6:7], v3
+; GFX950-NEXT: v_cvt_f64_f32_e32 v[4:5], v1
+; GFX950-NEXT: v_cvt_f64_f32_e32 v[2:3], v10
+; GFX950-NEXT: v_cvt_f64_f32_e32 v[0:1], v0
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: global_extload_v5bf16_to_v5f64:
; GFX10: ; %bb.0:
@@ -7580,24 +7657,43 @@ define <6 x double> @global_extload_v6bf16_to_v6f64(ptr addrspace(1) %ptr) {
; GFX8-NEXT: v_cvt_f64_f32_e32 v[10:11], v10
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: global_extload_v6bf16_to_v6f64:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: global_load_dwordx3 v[0:2], v[0:1], off
-; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: v_lshlrev_b32_e32 v3, 16, v0
-; GFX9-NEXT: v_and_b32_e32 v4, 0xffff0000, v0
-; GFX9-NEXT: v_lshlrev_b32_e32 v5, 16, v1
-; GFX9-NEXT: v_and_b32_e32 v6, 0xffff0000, v1
-; GFX9-NEXT: v_lshlrev_b32_e32 v8, 16, v2
-; GFX9-NEXT: v_and_b32_e32 v10, 0xffff0000, v2
-; GFX9-NEXT: v_cvt_f64_f32_e32 v[0:1], v3
-; GFX9-NEXT: v_cvt_f64_f32_e32 v[2:3], v4
-; GFX9-NEXT: v_cvt_f64_f32_e32 v[4:5], v5
-; GFX9-NEXT: v_cvt_f64_f32_e32 v[6:7], v6
-; GFX9-NEXT: v_cvt_f64_f32_e32 v[8:9], v8
-; GFX9-NEXT: v_cvt_f64_f32_e32 v[10:11], v10
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: global_extload_v6bf16_to_v6f64:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: global_load_dwordx3 v[0:2], v[0:1], off
+; GFX900-NEXT: s_waitcnt vmcnt(0)
+; GFX900-NEXT: v_lshlrev_b32_e32 v3, 16, v0
+; GFX900-NEXT: v_and_b32_e32 v4, 0xffff0000, v0
+; GFX900-NEXT: v_lshlrev_b32_e32 v5, 16, v1
+; GFX900-NEXT: v_and_b32_e32 v6, 0xffff0000, v1
+; GFX900-NEXT: v_lshlrev_b32_e32 v8, 16, v2
+; GFX900-NEXT: v_and_b32_e32 v10, 0xffff0000, v2
+; GFX900-NEXT: v_cvt_f64_f32_e32 v[0:1], v3
+; GFX900-NEXT: v_cvt_f64_f32_e32 v[2:3], v4
+; GFX900-NEXT: v_cvt_f64_f32_e32 v[4:5], v5
+; GFX900-NEXT: v_cvt_f64_f32_e32 v[6:7], v6
+; GFX900-NEXT: v_cvt_f64_f32_e32 v[8:9], v8
+; GFX900-NEXT: v_cvt_f64_f32_e32 v[10:11], v10
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: global_extload_v6bf16_to_v6f64:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: global_load_dwordx3 v[0:2], v[0:1], off
+; GFX950-NEXT: s_waitcnt vmcnt(0)
+; GFX950-NEXT: v_and_b32_e32 v3, 0xffff0000, v2
+; GFX950-NEXT: v_lshlrev_b32_e32 v2, 16, v2
+; GFX950-NEXT: v_and_b32_e32 v4, 0xffff0000, v1
+; GFX950-NEXT: v_lshlrev_b32_e32 v1, 16, v1
+; GFX950-NEXT: v_and_b32_e32 v12, 0xffff0000, v0
+; GFX950-NEXT: v_lshlrev_b32_e32 v0, 16, v0
+; GFX950-NEXT: v_cvt_f64_f32_e32 v[10:11], v3
+; GFX950-NEXT: v_cvt_f64_f32_e32 v[8:9], v2
+; GFX950-NEXT: v_cvt_f64_f32_e32 v[6:7], v4
+; GFX950-NEXT: v_cvt_f64_f32_e32 v[4:5], v1
+; GFX950-NEXT: v_cvt_f64_f32_e32 v[2:3], v12
+; GFX950-NEXT: v_cvt_f64_f32_e32 v[0:1], v0
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: global_extload_v6bf16_to_v6f64:
; GFX10: ; %bb.0:
@@ -7719,28 +7815,51 @@ define <8 x double> @global_extload_v8bf16_to_v8f64(ptr addrspace(1) %ptr) {
; GFX8-NEXT: v_cvt_f64_f32_e32 v[14:15], v14
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: global_extload_v8bf16_to_v8f64:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
-; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: v_lshlrev_b32_e32 v4, 16, v0
-; GFX9-NEXT: v_and_b32_e32 v5, 0xffff0000, v0
-; GFX9-NEXT: v_lshlrev_b32_e32 v6, 16, v1
-; GFX9-NEXT: v_and_b32_e32 v7, 0xffff0000, v1
-; GFX9-NEXT: v_lshlrev_b32_e32 v8, 16, v2
-; GFX9-NEXT: v_and_b32_e32 v10, 0xffff0000, v2
-; GFX9-NEXT: v_lshlrev_b32_e32 v12, 16, v3
-; GFX9-NEXT: v_and_b32_e32 v14, 0xffff0000, v3
-; GFX9-NEXT: v_cvt_f64_f32_e32 v[0:1], v4
-; GFX9-NEXT: v_cvt_f64_f32_e32 v[2:3], v5
-; GFX9-NEXT: v_cvt_f64_f32_e32 v[4:5], v6
-; GFX9-NEXT: v_cvt_f64_f32_e32 v[6:7], v7
-; GFX9-NEXT: v_cvt_f64_f32_e32 v[8:9], v8
-; GFX9-NEXT: v_cvt_f64_f32_e32 v[10:11], v10
-; GFX9-NEXT: v_cvt_f64_f32_e32 v[12:13], v12
-; GFX9-NEXT: v_cvt_f64_f32_e32 v[14:15], v14
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: global_extload_v8bf16_to_v8f64:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX900-NEXT: s_waitcnt vmcnt(0)
+; GFX900-NEXT: v_lshlrev_b32_e32 v4, 16, v0
+; GFX900-NEXT: v_and_b32_e32 v5, 0xffff0000, v0
+; GFX900-NEXT: v_lshlrev_b32_e32 v6, 16, v1
+; GFX900-NEXT: v_and_b32_e32 v7, 0xffff0000, v1
+; GFX900-NEXT: v_lshlrev_b32_e32 v8, 16, v2
+; GFX900-NEXT: v_and_b32_e32 v10, 0xffff0000, v2
+; GFX900-NEXT: v_lshlrev_b32_e32 v12, 16, v3
+; GFX900-NEXT: v_and_b32_e32 v14, 0xffff0000, v3
+; GFX900-NEXT: v_cvt_f64_f32_e32 v[0:1], v4
+; GFX900-NEXT: v_cvt_f64_f32_e32 v[2:3], v5
+; GFX900-NEXT: v_cvt_f64_f32_e32 v[4:5], v6
+; GFX900-NEXT: v_cvt_f64_f32_e32 v[6:7], v7
+; GFX900-NEXT: v_cvt_f64_f32_e32 v[8:9], v8
+; GFX900-NEXT: v_cvt_f64_f32_e32 v[10:11], v10
+; GFX900-NEXT: v_cvt_f64_f32_e32 v[12:13], v12
+; GFX900-NEXT: v_cvt_f64_f32_e32 v[14:15], v14
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: global_extload_v8bf16_to_v8f64:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
+; GFX950-NEXT: s_waitcnt vmcnt(0)
+; GFX950-NEXT: v_and_b32_e32 v4, 0xffff0000, v3
+; GFX950-NEXT: v_lshlrev_b32_e32 v3, 16, v3
+; GFX950-NEXT: v_and_b32_e32 v5, 0xffff0000, v2
+; GFX950-NEXT: v_lshlrev_b32_e32 v2, 16, v2
+; GFX950-NEXT: v_and_b32_e32 v6, 0xffff0000, v1
+; GFX950-NEXT: v_lshlrev_b32_e32 v1, 16, v1
+; GFX950-NEXT: v_and_b32_e32 v16, 0xffff0000, v0
+; GFX950-NEXT: v_lshlrev_b32_e32 v0, 16, v0
+; GFX950-NEXT: v_cvt_f64_f32_e32 v[14:15], v4
+; GFX950-NEXT: v_cvt_f64_f32_e32 v[12:13], v3
+; GFX950-NEXT: v_cvt_f64_f32_e32 v[10:11], v5
+; GFX950-NEXT: v_cvt_f64_f32_e32 v[8:9], v2
+; GFX950-NEXT: v_cvt_f64_f32_e32 v[6:7], v6
+; GFX950-NEXT: v_cvt_f64_f32_e32 v[4:5], v1
+; GFX950-NEXT: v_cvt_f64_f32_e32 v[2:3], v16
+; GFX950-NEXT: v_cvt_f64_f32_e32 v[0:1], v0
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: global_extload_v8bf16_to_v8f64:
; GFX10: ; %bb.0:
@@ -7926,46 +8045,87 @@ define <16 x double> @global_extload_v16bf16_to_v16f64(ptr addrspace(1) %ptr) {
; GFX8-NEXT: v_cvt_f64_f32_e32 v[30:31], v30
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: global_extload_v16bf16_to_v16f64:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: global_load_dwordx4 v[2:5], v[0:1], off
-; GFX9-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:16
-; GFX9-NEXT: s_waitcnt vmcnt(1)
-; GFX9-NEXT: v_lshlrev_b32_e32 v0, 16, v2
-; GFX9-NEXT: v_and_b32_e32 v2, 0xffff0000, v2
-; GFX9-NEXT: v_lshlrev_b32_e32 v10, 16, v3
-; GFX9-NEXT: v_and_b32_e32 v11, 0xffff0000, v3
-; GFX9-NEXT: v_lshlrev_b32_e32 v12, 16, v4
-; GFX9-NEXT: v_and_b32_e32 v13, 0xffff0000, v4
-; GFX9-NEXT: v_lshlrev_b32_e32 v14, 16, v5
-; GFX9-NEXT: v_and_b32_e32 v15, 0xffff0000, v5
-; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: v_lshlrev_b32_e32 v16, 16, v6
-; GFX9-NEXT: v_and_b32_e32 v18, 0xffff0000, v6
-; GFX9-NEXT: v_lshlrev_b32_e32 v20, 16, v7
-; GFX9-NEXT: v_and_b32_e32 v22, 0xffff0000, v7
-; GFX9-NEXT: v_lshlrev_b32_e32 v24, 16, v8
-; GFX9-NEXT: v_and_b32_e32 v26, 0xffff0000, v8
-; GFX9-NEXT: v_lshlrev_b32_e32 v28, 16, v9
-; GFX9-NEXT: v_and_b32_e32 v30, 0xffff0000, v9
-; GFX9-NEXT: v_cvt_f64_f32_e32 v[0:1], v0
-; GFX9-NEXT: v_cvt_f64_f32_e32 v[2:3], v2
-; GFX9-NEXT: v_cvt_f64_f32_e32 v[4:5], v10
-; GFX9-NEXT: v_cvt_f64_f32_e32 v[6:7], v11
-; GFX9-NEXT: v_cvt_f64_f32_e32 v[8:9], v12
-; GFX9-NEXT: v_cvt_f64_f32_e32 v[10:11], v13
-; GFX9-NEXT: v_cvt_f64_f32_e32 v[12:13], v14
-; GFX9-NEXT: v_cvt_f64_f32_e32 v[14:15], v15
-; GFX9-NEXT: v_cvt_f64_f32_e32 v[16:17], v16
-; GFX9-NEXT: v_cvt_f64_f32_e32 v[18:19], v18
-; GFX9-NEXT: v_cvt_f64_f32_e32 v[20:21], v20
-; GFX9-NEXT: v_cvt_f64_f32_e32 v[22:23], v22
-; GFX9-NEXT: v_cvt_f64_f32_e32 v[24:25], v24
-; GFX9-NEXT: v_cvt_f64_f32_e32 v[26:27], v26
-; GFX9-NEXT: v_cvt_f64_f32_e32 v[28:29], v28
-; GFX9-NEXT: v_cvt_f64_f32_e32 v[30:31], v30
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: global_extload_v16bf16_to_v16f64:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: global_load_dwordx4 v[2:5], v[0:1], off
+; GFX900-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:16
+; GFX900-NEXT: s_waitcnt vmcnt(1)
+; GFX900-NEXT: v_lshlrev_b32_e32 v0, 16, v2
+; GFX900-NEXT: v_and_b32_e32 v2, 0xffff0000, v2
+; GFX900-NEXT: v_lshlrev_b32_e32 v10, 16, v3
+; GFX900-NEXT: v_and_b32_e32 v11, 0xffff0000, v3
+; GFX900-NEXT: v_lshlrev_b32_e32 v12, 16, v4
+; GFX900-NEXT: v_and_b32_e32 v13, 0xffff0000, v4
+; GFX900-NEXT: v_lshlrev_b32_e32 v14, 16, v5
+; GFX900-NEXT: v_and_b32_e32 v15, 0xffff0000, v5
+; GFX900-NEXT: s_waitcnt vmcnt(0)
+; GFX900-NEXT: v_lshlrev_b32_e32 v16, 16, v6
+; GFX900-NEXT: v_and_b32_e32 v18, 0xffff0000, v6
+; GFX900-NEXT: v_lshlrev_b32_e32 v20, 16, v7
+; GFX900-NEXT: v_and_b32_e32 v22, 0xffff0000, v7
+; GFX900-NEXT: v_lshlrev_b32_e32 v24, 16, v8
+; GFX900-NEXT: v_and_b32_e32 v26, 0xffff0000, v8
+; GFX900-NEXT: v_lshlrev_b32_e32 v28, 16, v9
+; GFX900-NEXT: v_and_b32_e32 v30, 0xffff0000, v9
+; GFX900-NEXT: v_cvt_f64_f32_e32 v[0:1], v0
+; GFX900-NEXT: v_cvt_f64_f32_e32 v[2:3], v2
+; GFX900-NEXT: v_cvt_f64_f32_e32 v[4:5], v10
+; GFX900-NEXT: v_cvt_f64_f32_e32 v[6:7], v11
+; GFX900-NEXT: v_cvt_f64_f32_e32 v[8:9], v12
+; GFX900-NEXT: v_cvt_f64_f32_e32 v[10:11], v13
+; GFX900-NEXT: v_cvt_f64_f32_e32 v[12:13], v14
+; GFX900-NEXT: v_cvt_f64_f32_e32 v[14:15], v15
+; GFX900-NEXT: v_cvt_f64_f32_e32 v[16:17], v16
+; GFX900-NEXT: v_cvt_f64_f32_e32 v[18:19], v18
+; GFX900-NEXT: v_cvt_f64_f32_e32 v[20:21], v20
+; GFX900-NEXT: v_cvt_f64_f32_e32 v[22:23], v22
+; GFX900-NEXT: v_cvt_f64_f32_e32 v[24:25], v24
+; GFX900-NEXT: v_cvt_f64_f32_e32 v[26:27], v26
+; GFX900-NEXT: v_cvt_f64_f32_e32 v[28:29], v28
+; GFX900-NEXT: v_cvt_f64_f32_e32 v[30:31], v30
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: global_extload_v16bf16_to_v16f64:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: global_load_dwordx4 v[2:5], v[0:1], off offset:16
+; GFX950-NEXT: global_load_dwordx4 v[6:9], v[0:1], off
+; GFX950-NEXT: s_waitcnt vmcnt(1)
+; GFX950-NEXT: v_and_b32_e32 v0, 0xffff0000, v5
+; GFX950-NEXT: v_lshlrev_b32_e32 v1, 16, v5
+; GFX950-NEXT: v_and_b32_e32 v5, 0xffff0000, v4
+; GFX950-NEXT: v_lshlrev_b32_e32 v4, 16, v4
+; GFX950-NEXT: v_and_b32_e32 v10, 0xffff0000, v3
+; GFX950-NEXT: v_lshlrev_b32_e32 v3, 16, v3
+; GFX950-NEXT: v_and_b32_e32 v11, 0xffff0000, v2
+; GFX950-NEXT: v_lshlrev_b32_e32 v2, 16, v2
+; GFX950-NEXT: s_waitcnt vmcnt(0)
+; GFX950-NEXT: v_and_b32_e32 v12, 0xffff0000, v9
+; GFX950-NEXT: v_lshlrev_b32_e32 v9, 16, v9
+; GFX950-NEXT: v_and_b32_e32 v32, 0xffff0000, v8
+; GFX950-NEXT: v_lshlrev_b32_e32 v8, 16, v8
+; GFX950-NEXT: v_and_b32_e32 v33, 0xffff0000, v7
+; GFX950-NEXT: v_lshlrev_b32_e32 v34, 16, v7
+; GFX950-NEXT: v_and_b32_e32 v35, 0xffff0000, v6
+; GFX950-NEXT: v_lshlrev_b32_e32 v36, 16, v6
+; GFX950-NEXT: v_cvt_f64_f32_e32 v[30:31], v0
+; GFX950-NEXT: v_cvt_f64_f32_e32 v[28:29], v1
+; GFX950-NEXT: v_cvt_f64_f32_e32 v[26:27], v5
+; GFX950-NEXT: v_cvt_f64_f32_e32 v[24:25], v4
+; GFX950-NEXT: v_cvt_f64_f32_e32 v[22:23], v10
+; GFX950-NEXT: v_cvt_f64_f32_e32 v[20:21], v3
+; GFX950-NEXT: v_cvt_f64_f32_e32 v[18:19], v11
+; GFX950-NEXT: v_cvt_f64_f32_e32 v[16:17], v2
+; GFX950-NEXT: v_cvt_f64_f32_e32 v[14:15], v12
+; GFX950-NEXT: v_cvt_f64_f32_e32 v[12:13], v9
+; GFX950-NEXT: v_cvt_f64_f32_e32 v[10:11], v32
+; GFX950-NEXT: v_cvt_f64_f32_e32 v[8:9], v8
+; GFX950-NEXT: v_cvt_f64_f32_e32 v[6:7], v33
+; GFX950-NEXT: v_cvt_f64_f32_e32 v[4:5], v34
+; GFX950-NEXT: v_cvt_f64_f32_e32 v[2:3], v35
+; GFX950-NEXT: v_cvt_f64_f32_e32 v[0:1], v36
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: global_extload_v16bf16_to_v16f64:
; GFX10: ; %bb.0:
@@ -9068,141 +9228,150 @@ define <32 x double> @global_extload_v32bf16_to_v32f64(ptr addrspace(1) %ptr) {
; GFX950-NEXT: v_accvgpr_write_b32 a7, v47 ; Reload Reuse
; GFX950-NEXT: v_accvgpr_write_b32 a8, v56 ; Reload Reuse
; GFX950-NEXT: v_accvgpr_write_b32 a9, v57 ; Reload Reuse
-; GFX950-NEXT: v_accvgpr_write_b32 a10, v58 ; Reload Reuse
; GFX950-NEXT: v_accvgpr_write_b32 a12, v60 ; Reload Reuse
-; GFX950-NEXT: global_load_ushort v1, v[2:3], off offset:2
-; GFX950-NEXT: global_load_ushort v4, v[2:3], off offset:12
-; GFX950-NEXT: global_load_ushort v5, v[2:3], off offset:8
-; GFX950-NEXT: global_load_ushort v6, v[2:3], off offset:4
-; GFX950-NEXT: global_load_ushort v7, v[2:3], off
-; GFX950-NEXT: global_load_ushort v8, v[2:3], off offset:6
-; GFX950-NEXT: global_load_ushort v9, v[2:3], off offset:10
-; GFX950-NEXT: global_load_ushort v10, v[2:3], off offset:14
-; GFX950-NEXT: global_load_ushort v11, v[2:3], off offset:18
-; GFX950-NEXT: global_load_ushort v12, v[2:3], off offset:28
-; GFX950-NEXT: global_load_ushort v13, v[2:3], off offset:24
-; GFX950-NEXT: global_load_ushort v14, v[2:3], off offset:20
-; GFX950-NEXT: global_load_ushort v15, v[2:3], off offset:16
-; GFX950-NEXT: global_load_ushort v16, v[2:3], off offset:22
-; GFX950-NEXT: global_load_ushort v17, v[2:3], off offset:26
-; GFX950-NEXT: global_load_ushort v18, v[2:3], off offset:30
-; GFX950-NEXT: global_load_ushort v19, v[2:3], off offset:34
-; GFX950-NEXT: global_load_ushort v20, v[2:3], off offset:44
-; GFX950-NEXT: global_load_ushort v21, v[2:3], off offset:40
-; GFX950-NEXT: global_load_ushort v22, v[2:3], off offset:36
-; GFX950-NEXT: global_load_ushort v23, v[2:3], off offset:32
-; GFX950-NEXT: global_load_ushort v24, v[2:3], off offset:38
-; GFX950-NEXT: global_load_ushort v25, v[2:3], off offset:42
-; GFX950-NEXT: global_load_ushort v26, v[2:3], off offset:46
-; GFX950-NEXT: global_load_ushort v42, v[2:3], off offset:50
-; GFX950-NEXT: global_load_ushort v43, v[2:3], off offset:62
-; GFX950-NEXT: global_load_ushort v46, v[2:3], off offset:60
-; GFX950-NEXT: global_load_ushort v47, v[2:3], off offset:56
-; GFX950-NEXT: global_load_ushort v60, v[2:3], off offset:52
-; GFX950-NEXT: global_load_ushort v56, v[2:3], off offset:48
-; GFX950-NEXT: global_load_ushort v57, v[2:3], off offset:54
-; GFX950-NEXT: global_load_ushort v58, v[2:3], off offset:58
+; GFX950-NEXT: v_accvgpr_write_b32 a13, v61 ; Reload Reuse
+; GFX950-NEXT: global_load_ushort v1, v[2:3], off offset:30
+; GFX950-NEXT: global_load_ushort v4, v[2:3], off offset:28
+; GFX950-NEXT: global_load_ushort v5, v[2:3], off offset:26
+; GFX950-NEXT: global_load_ushort v6, v[2:3], off offset:24
+; GFX950-NEXT: global_load_ushort v7, v[2:3], off offset:22
+; GFX950-NEXT: global_load_ushort v8, v[2:3], off offset:20
+; GFX950-NEXT: global_load_ushort v9, v[2:3], off offset:18
+; GFX950-NEXT: global_load_ushort v10, v[2:3], off offset:16
+; GFX950-NEXT: global_load_ushort v11, v[2:3], off offset:14
+; GFX950-NEXT: global_load_ushort v12, v[2:3], off offset:12
+; GFX950-NEXT: global_load_ushort v13, v[2:3], off offset:10
+; GFX950-NEXT: global_load_ushort v14, v[2:3], off offset:8
+; GFX950-NEXT: global_load_ushort v15, v[2:3], off offset:6
+; GFX950-NEXT: global_load_ushort v16, v[2:3], off offset:4
+; GFX950-NEXT: global_load_ushort v17, v[2:3], off offset:2
+; GFX950-NEXT: global_load_ushort v18, v[2:3], off
+; GFX950-NEXT: global_load_ushort v19, v[2:3], off offset:62
+; GFX950-NEXT: global_load_ushort v20, v[2:3], off offset:60
+; GFX950-NEXT: global_load_ushort v21, v[2:3], off offset:58
+; GFX950-NEXT: global_load_ushort v22, v[2:3], off offset:56
+; GFX950-NEXT: global_load_ushort v23, v[2:3], off offset:54
+; GFX950-NEXT: global_load_ushort v24, v[2:3], off offset:52
+; GFX950-NEXT: global_load_ushort v25, v[2:3], off offset:50
+; GFX950-NEXT: global_load_ushort v26, v[2:3], off offset:48
+; GFX950-NEXT: global_load_ushort v42, v[2:3], off offset:46
+; GFX950-NEXT: global_load_ushort v43, v[2:3], off offset:44
+; GFX950-NEXT: global_load_ushort v46, v[2:3], off offset:42
+; GFX950-NEXT: global_load_ushort v47, v[2:3], off offset:40
+; GFX950-NEXT: global_load_ushort v56, v[2:3], off offset:38
+; GFX950-NEXT: global_load_ushort v57, v[2:3], off offset:36
+; GFX950-NEXT: global_load_ushort v60, v[2:3], off offset:34
+; GFX950-NEXT: global_load_ushort v61, v[2:3], off offset:32
; GFX950-NEXT: v_accvgpr_write_b32 a4, v44 ; Reload Reuse
; GFX950-NEXT: v_accvgpr_write_b32 a0, v40 ; Reload Reuse
; GFX950-NEXT: v_accvgpr_write_b32 a1, v41 ; Reload Reuse
; GFX950-NEXT: v_accvgpr_write_b32 a5, v45 ; Reload Reuse
+; GFX950-NEXT: v_accvgpr_write_b32 a10, v58 ; Reload Reuse
; GFX950-NEXT: v_accvgpr_write_b32 a11, v59 ; Reload Reuse
; GFX950-NEXT: s_waitcnt vmcnt(31)
; GFX950-NEXT: v_lshlrev_b32_e32 v1, 16, v1
; GFX950-NEXT: s_waitcnt vmcnt(30)
-; GFX950-NEXT: v_lshlrev_b32_e32 v30, 16, v4
+; GFX950-NEXT: v_lshlrev_b32_e32 v2, 16, v4
; GFX950-NEXT: s_waitcnt vmcnt(29)
-; GFX950-NEXT: v_lshlrev_b32_e32 v28, 16, v5
-; GFX950-NEXT: v_cvt_f64_f32_e32 v[4:5], v1
+; GFX950-NEXT: v_lshlrev_b32_e32 v3, 16, v5
+; GFX950-NEXT: s_waitcnt vmcnt(28)
+; GFX950-NEXT: v_lshlrev_b32_e32 v4, 16, v6
; GFX950-NEXT: s_waitcnt vmcnt(27)
-; GFX950-NEXT: v_lshlrev_b32_e32 v2, 16, v7
+; GFX950-NEXT: v_lshlrev_b32_e32 v5, 16, v7
; GFX950-NEXT: s_waitcnt vmcnt(26)
-; GFX950-NEXT: v_lshlrev_b32_e32 v7, 16, v8
+; GFX950-NEXT: v_lshlrev_b32_e32 v6, 16, v8
; GFX950-NEXT: s_waitcnt vmcnt(25)
-; GFX950-NEXT: v_lshlrev_b32_e32 v27, 16, v9
+; GFX950-NEXT: v_lshlrev_b32_e32 v7, 16, v9
+; GFX950-NEXT: v_cvt_f64_f32_e32 v[32:33], v1
; GFX950-NEXT: s_waitcnt vmcnt(24)
-; GFX950-NEXT: v_lshlrev_b32_e32 v29, 16, v10
+; GFX950-NEXT: v_lshlrev_b32_e32 v8, 16, v10
; GFX950-NEXT: s_waitcnt vmcnt(23)
-; GFX950-NEXT: v_lshlrev_b32_e32 v31, 16, v11
+; GFX950-NEXT: v_lshlrev_b32_e32 v9, 16, v11
; GFX950-NEXT: s_waitcnt vmcnt(22)
-; GFX950-NEXT: v_lshlrev_b32_e32 v38, 16, v12
+; GFX950-NEXT: v_lshlrev_b32_e32 v10, 16, v12
; GFX950-NEXT: s_waitcnt vmcnt(21)
-; GFX950-NEXT: v_lshlrev_b32_e32 v36, 16, v13
+; GFX950-NEXT: v_lshlrev_b32_e32 v11, 16, v13
; GFX950-NEXT: s_waitcnt vmcnt(20)
; GFX950-NEXT: v_lshlrev_b32_e32 v34, 16, v14
-; GFX950-NEXT: s_waitcnt vmcnt(19)
-; GFX950-NEXT: v_lshlrev_b32_e32 v32, 16, v15
; GFX950-NEXT: s_waitcnt vmcnt(18)
-; GFX950-NEXT: v_lshlrev_b32_e32 v33, 16, v16
-; GFX950-NEXT: v_cvt_f64_f32_e32 v[12:13], v27
+; GFX950-NEXT: v_lshlrev_b32_e32 v36, 16, v16
+; GFX950-NEXT: s_waitcnt vmcnt(17)
+; GFX950-NEXT: v_lshlrev_b32_e32 v37, 16, v17
; GFX950-NEXT: s_waitcnt vmcnt(16)
-; GFX950-NEXT: v_lshlrev_b32_e32 v37, 16, v18
+; GFX950-NEXT: v_lshlrev_b32_e32 v38, 16, v18
; GFX950-NEXT: s_waitcnt vmcnt(15)
; GFX950-NEXT: v_lshlrev_b32_e32 v39, 16, v19
+; GFX950-NEXT: v_lshlrev_b32_e32 v35, 16, v15
; GFX950-NEXT: s_waitcnt vmcnt(14)
-; GFX950-NEXT: v_lshlrev_b32_e32 v44, 16, v20
+; GFX950-NEXT: v_lshlrev_b32_e32 v48, 16, v20
; GFX950-NEXT: s_waitcnt vmcnt(13)
-; GFX950-NEXT: v_lshlrev_b32_e32 v40, 16, v21
-; GFX950-NEXT: v_cvt_f64_f32_e32 v[14:15], v30
-; GFX950-NEXT: v_cvt_f64_f32_e32 v[20:21], v31
+; GFX950-NEXT: v_lshlrev_b32_e32 v49, 16, v21
+; GFX950-NEXT: s_waitcnt vmcnt(12)
+; GFX950-NEXT: v_lshlrev_b32_e32 v52, 16, v22
+; GFX950-NEXT: s_waitcnt vmcnt(11)
+; GFX950-NEXT: v_lshlrev_b32_e32 v53, 16, v23
; GFX950-NEXT: s_waitcnt vmcnt(10)
-; GFX950-NEXT: v_lshlrev_b32_e32 v49, 16, v24
-; GFX950-NEXT: s_waitcnt vmcnt(9)
-; GFX950-NEXT: v_lshlrev_b32_e32 v53, 16, v25
+; GFX950-NEXT: v_lshlrev_b32_e32 v40, 16, v24
; GFX950-NEXT: s_waitcnt vmcnt(8)
-; GFX950-NEXT: v_lshlrev_b32_e32 v41, 16, v26
+; GFX950-NEXT: v_lshlrev_b32_e32 v44, 16, v26
; GFX950-NEXT: s_waitcnt vmcnt(7)
; GFX950-NEXT: v_lshlrev_b32_e32 v1, 16, v42
-; GFX950-NEXT: s_waitcnt vmcnt(6)
-; GFX950-NEXT: v_lshlrev_b32_e32 v42, 16, v43
-; GFX950-NEXT: v_cvt_f64_f32_e32 v[18:19], v32
-; GFX950-NEXT: v_cvt_f64_f32_e32 v[24:25], v33
-; GFX950-NEXT: v_cvt_f64_f32_e32 v[26:27], v36
-; GFX950-NEXT: v_cvt_f64_f32_e32 v[32:33], v37
-; GFX950-NEXT: v_cvt_f64_f32_e32 v[30:31], v38
+; GFX950-NEXT: v_lshlrev_b32_e32 v41, 16, v25
+; GFX950-NEXT: v_cvt_f64_f32_e32 v[30:31], v2
+; GFX950-NEXT: v_cvt_f64_f32_e32 v[28:29], v3
+; GFX950-NEXT: v_cvt_f64_f32_e32 v[26:27], v4
+; GFX950-NEXT: v_cvt_f64_f32_e32 v[24:25], v5
+; GFX950-NEXT: v_cvt_f64_f32_e32 v[22:23], v6
+; GFX950-NEXT: v_cvt_f64_f32_e32 v[20:21], v7
+; GFX950-NEXT: v_cvt_f64_f32_e32 v[6:7], v36
+; GFX950-NEXT: v_cvt_f64_f32_e32 v[4:5], v37
+; GFX950-NEXT: v_cvt_f64_f32_e32 v[2:3], v38
; GFX950-NEXT: v_cvt_f64_f32_e32 v[36:37], v39
; GFX950-NEXT: v_cvt_f64_f32_e32 v[38:39], v44
-; GFX950-NEXT: v_cvt_f64_f32_e32 v[44:45], v42
-; GFX950-NEXT: s_waitcnt vmcnt(5)
-; GFX950-NEXT: v_lshlrev_b32_e32 v42, 16, v46
-; GFX950-NEXT: v_cvt_f64_f32_e32 v[42:43], v42
-; GFX950-NEXT: s_waitcnt vmcnt(0)
-; GFX950-NEXT: v_lshlrev_b32_e32 v46, 16, v58
-; GFX950-NEXT: scratch_store_dwordx4 v0, v[42:45], off offset:240
-; GFX950-NEXT: v_cvt_f64_f32_e32 v[58:59], v46
-; GFX950-NEXT: v_lshlrev_b32_e32 v46, 16, v47
; GFX950-NEXT: v_cvt_f64_f32_e32 v[44:45], v1
-; GFX950-NEXT: v_lshlrev_b32_e32 v1, 16, v56
-; GFX950-NEXT: v_cvt_f64_f32_e32 v[42:43], v1
-; GFX950-NEXT: v_lshlrev_b32_e32 v1, 16, v57
-; GFX950-NEXT: v_cvt_f64_f32_e32 v[56:57], v46
-; GFX950-NEXT: v_lshlrev_b32_e32 v35, 16, v17
-; GFX950-NEXT: v_lshlrev_b32_e32 v48, 16, v23
-; GFX950-NEXT: v_lshlrev_b32_e32 v52, 16, v22
-; GFX950-NEXT: scratch_store_dwordx4 v0, v[56:59], off offset:224
-; GFX950-NEXT: v_lshlrev_b32_e32 v6, 16, v6
-; GFX950-NEXT: v_cvt_f64_f32_e32 v[10:11], v28
-; GFX950-NEXT: v_cvt_f64_f32_e32 v[58:59], v1
-; GFX950-NEXT: v_lshlrev_b32_e32 v1, 16, v60
-; GFX950-NEXT: v_cvt_f64_f32_e32 v[16:17], v29
-; GFX950-NEXT: v_cvt_f64_f32_e32 v[22:23], v34
-; GFX950-NEXT: v_cvt_f64_f32_e32 v[28:29], v35
+; GFX950-NEXT: s_waitcnt vmcnt(6)
+; GFX950-NEXT: v_lshlrev_b32_e32 v1, 16, v43
+; GFX950-NEXT: v_cvt_f64_f32_e32 v[18:19], v8
+; GFX950-NEXT: v_cvt_f64_f32_e32 v[16:17], v9
+; GFX950-NEXT: v_cvt_f64_f32_e32 v[14:15], v10
+; GFX950-NEXT: v_cvt_f64_f32_e32 v[12:13], v11
+; GFX950-NEXT: v_cvt_f64_f32_e32 v[10:11], v34
+; GFX950-NEXT: v_cvt_f64_f32_e32 v[8:9], v35
; GFX950-NEXT: v_cvt_f64_f32_e32 v[34:35], v48
+; GFX950-NEXT: v_cvt_f64_f32_e32 v[42:43], v1
+; GFX950-NEXT: s_waitcnt vmcnt(5)
+; GFX950-NEXT: v_lshlrev_b32_e32 v1, 16, v46
+; GFX950-NEXT: scratch_store_dwordx4 v0, v[34:37], off offset:240
; GFX950-NEXT: v_cvt_f64_f32_e32 v[50:51], v49
; GFX950-NEXT: v_cvt_f64_f32_e32 v[48:49], v52
+; GFX950-NEXT: v_cvt_f64_f32_e32 v[36:37], v1
+; GFX950-NEXT: s_waitcnt vmcnt(5)
+; GFX950-NEXT: v_lshlrev_b32_e32 v1, 16, v47
+; GFX950-NEXT: v_cvt_f64_f32_e32 v[34:35], v1
+; GFX950-NEXT: s_waitcnt vmcnt(4)
+; GFX950-NEXT: v_lshlrev_b32_e32 v1, 16, v56
+; GFX950-NEXT: v_cvt_f64_f32_e32 v[58:59], v1
+; GFX950-NEXT: s_waitcnt vmcnt(3)
+; GFX950-NEXT: v_lshlrev_b32_e32 v1, 16, v57
+; GFX950-NEXT: v_cvt_f64_f32_e32 v[56:57], v1
+; GFX950-NEXT: s_waitcnt vmcnt(2)
+; GFX950-NEXT: v_lshlrev_b32_e32 v1, 16, v60
; GFX950-NEXT: v_cvt_f64_f32_e32 v[54:55], v53
; GFX950-NEXT: v_cvt_f64_f32_e32 v[52:53], v40
; GFX950-NEXT: v_cvt_f64_f32_e32 v[40:41], v41
-; GFX950-NEXT: v_cvt_f64_f32_e32 v[56:57], v1
-; GFX950-NEXT: v_cvt_f64_f32_e32 v[2:3], v2
-; GFX950-NEXT: v_cvt_f64_f32_e32 v[8:9], v7
-; GFX950-NEXT: v_cvt_f64_f32_e32 v[6:7], v6
-; GFX950-NEXT: scratch_store_dwordx4 v0, v[56:59], off offset:208
-; GFX950-NEXT: scratch_store_dwordx4 v0, v[42:45], off offset:192
-; GFX950-NEXT: scratch_store_dwordx4 v0, v[38:41], off offset:176
-; GFX950-NEXT: scratch_store_dwordx4 v0, v[52:55], off offset:160
-; GFX950-NEXT: scratch_store_dwordx4 v0, v[48:51], off offset:144
-; GFX950-NEXT: scratch_store_dwordx4 v0, v[34:37], off offset:128
+; GFX950-NEXT: scratch_store_dwordx4 v0, v[48:51], off offset:224
+; GFX950-NEXT: v_accvgpr_read_b32 v60, a12 ; Reload Reuse
+; GFX950-NEXT: v_accvgpr_read_b32 v47, a7 ; Reload Reuse
+; GFX950-NEXT: v_cvt_f64_f32_e32 v[50:51], v1
+; GFX950-NEXT: s_waitcnt vmcnt(2)
+; GFX950-NEXT: v_lshlrev_b32_e32 v1, 16, v61
+; GFX950-NEXT: v_cvt_f64_f32_e32 v[48:49], v1
+; GFX950-NEXT: scratch_store_dwordx4 v0, v[52:55], off offset:208
+; GFX950-NEXT: scratch_store_dwordx4 v0, v[38:41], off offset:192
+; GFX950-NEXT: scratch_store_dwordx4 v0, v[42:45], off offset:176
+; GFX950-NEXT: scratch_store_dwordx4 v0, v[34:37], off offset:160
+; GFX950-NEXT: scratch_store_dwordx4 v0, v[56:59], off offset:144
+; GFX950-NEXT: scratch_store_dwordx4 v0, v[48:51], off offset:128
; GFX950-NEXT: scratch_store_dwordx4 v0, v[30:33], off offset:112
; GFX950-NEXT: scratch_store_dwordx4 v0, v[26:29], off offset:96
; GFX950-NEXT: scratch_store_dwordx4 v0, v[22:25], off offset:80
@@ -9211,12 +9380,11 @@ define <32 x double> @global_extload_v32bf16_to_v32f64(ptr addrspace(1) %ptr) {
; GFX950-NEXT: scratch_store_dwordx4 v0, v[10:13], off offset:32
; GFX950-NEXT: scratch_store_dwordx4 v0, v[6:9], off offset:16
; GFX950-NEXT: scratch_store_dwordx4 v0, v[2:5], off
-; GFX950-NEXT: v_accvgpr_read_b32 v60, a12 ; Reload Reuse
+; GFX950-NEXT: v_accvgpr_read_b32 v61, a13 ; Reload Reuse
; GFX950-NEXT: v_accvgpr_read_b32 v59, a11 ; Reload Reuse
; GFX950-NEXT: v_accvgpr_read_b32 v58, a10 ; Reload Reuse
; GFX950-NEXT: v_accvgpr_read_b32 v57, a9 ; Reload Reuse
; GFX950-NEXT: v_accvgpr_read_b32 v56, a8 ; Reload Reuse
-; GFX950-NEXT: v_accvgpr_read_b32 v47, a7 ; Reload Reuse
; GFX950-NEXT: v_accvgpr_read_b32 v46, a6 ; Reload Reuse
; GFX950-NEXT: v_accvgpr_read_b32 v45, a5 ; Reload Reuse
; GFX950-NEXT: v_accvgpr_read_b32 v44, a4 ; Reload Reuse
@@ -33878,34 +34046,34 @@ define <2 x i64> @v_fptosi_v2bf16_to_v2i64(<2 x bfloat> %x) {
; GFX950-LABEL: v_fptosi_v2bf16_to_v2i64:
; GFX950: ; %bb.0:
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-NEXT: v_lshlrev_b32_e32 v1, 16, v0
+; GFX950-NEXT: v_and_b32_e32 v1, 0xffff0000, v0
; GFX950-NEXT: v_trunc_f32_e32 v1, v1
; GFX950-NEXT: s_mov_b32 s0, 0x2f800000
; GFX950-NEXT: v_mul_f32_e64 v2, |v1|, s0
-; GFX950-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
; GFX950-NEXT: v_floor_f32_e32 v2, v2
; GFX950-NEXT: s_mov_b32 s1, 0xcf800000
-; GFX950-NEXT: v_trunc_f32_e32 v4, v0
; GFX950-NEXT: v_fma_f32 v3, v2, s1, |v1|
-; GFX950-NEXT: v_mul_f32_e64 v0, |v4|, s0
-; GFX950-NEXT: v_cvt_u32_f32_e32 v3, v3
-; GFX950-NEXT: v_floor_f32_e32 v0, v0
; GFX950-NEXT: v_cvt_u32_f32_e32 v2, v2
-; GFX950-NEXT: v_fma_f32 v5, v0, s1, |v4|
-; GFX950-NEXT: v_cvt_u32_f32_e32 v5, v5
+; GFX950-NEXT: v_cvt_u32_f32_e32 v3, v3
+; GFX950-NEXT: v_lshlrev_b32_e32 v0, 16, v0
; GFX950-NEXT: v_ashrrev_i32_e32 v1, 31, v1
-; GFX950-NEXT: v_cvt_u32_f32_e32 v6, v0
-; GFX950-NEXT: v_xor_b32_e32 v3, v3, v1
-; GFX950-NEXT: v_xor_b32_e32 v2, v2, v1
-; GFX950-NEXT: v_sub_co_u32_e32 v0, vcc, v3, v1
-; GFX950-NEXT: v_ashrrev_i32_e32 v3, 31, v4
-; GFX950-NEXT: s_nop 0
-; GFX950-NEXT: v_subb_co_u32_e32 v1, vcc, v2, v1, vcc
-; GFX950-NEXT: v_xor_b32_e32 v2, v5, v3
-; GFX950-NEXT: v_xor_b32_e32 v4, v6, v3
-; GFX950-NEXT: v_sub_co_u32_e32 v2, vcc, v2, v3
+; GFX950-NEXT: v_trunc_f32_e32 v0, v0
+; GFX950-NEXT: v_xor_b32_e32 v4, v2, v1
+; GFX950-NEXT: v_xor_b32_e32 v2, v3, v1
+; GFX950-NEXT: v_mul_f32_e64 v3, |v0|, s0
+; GFX950-NEXT: v_floor_f32_e32 v3, v3
+; GFX950-NEXT: v_fma_f32 v5, v3, s1, |v0|
+; GFX950-NEXT: v_cvt_u32_f32_e32 v5, v5
+; GFX950-NEXT: v_cvt_u32_f32_e32 v6, v3
+; GFX950-NEXT: v_sub_co_u32_e32 v2, vcc, v2, v1
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_subb_co_u32_e32 v3, vcc, v4, v1, vcc
+; GFX950-NEXT: v_ashrrev_i32_e32 v1, 31, v0
+; GFX950-NEXT: v_xor_b32_e32 v0, v5, v1
+; GFX950-NEXT: v_xor_b32_e32 v4, v6, v1
+; GFX950-NEXT: v_sub_co_u32_e32 v0, vcc, v0, v1
; GFX950-NEXT: s_nop 1
-; GFX950-NEXT: v_subb_co_u32_e32 v3, vcc, v4, v3, vcc
+; GFX950-NEXT: v_subb_co_u32_e32 v1, vcc, v4, v1, vcc
; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_fptosi_v2bf16_to_v2i64:
@@ -34161,48 +34329,47 @@ define <3 x i64> @v_fptosi_v3bf16_to_v3i64(<3 x bfloat> %x) {
; GFX950-LABEL: v_fptosi_v3bf16_to_v3i64:
; GFX950: ; %bb.0:
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-NEXT: v_lshlrev_b32_e32 v2, 16, v0
-; GFX950-NEXT: v_trunc_f32_e32 v2, v2
+; GFX950-NEXT: v_lshlrev_b32_e32 v1, 16, v1
+; GFX950-NEXT: v_trunc_f32_e32 v1, v1
; GFX950-NEXT: s_mov_b32 s0, 0x2f800000
-; GFX950-NEXT: v_mul_f32_e64 v3, |v2|, s0
-; GFX950-NEXT: v_floor_f32_e32 v3, v3
+; GFX950-NEXT: v_mul_f32_e64 v2, |v1|, s0
+; GFX950-NEXT: v_floor_f32_e32 v2, v2
; GFX950-NEXT: s_mov_b32 s1, 0xcf800000
-; GFX950-NEXT: v_fma_f32 v4, v3, s1, |v2|
-; GFX950-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
-; GFX950-NEXT: v_cvt_u32_f32_e32 v4, v4
-; GFX950-NEXT: v_trunc_f32_e32 v5, v0
+; GFX950-NEXT: v_fma_f32 v3, v2, s1, |v1|
+; GFX950-NEXT: v_and_b32_e32 v4, 0xffff0000, v0
; GFX950-NEXT: v_cvt_u32_f32_e32 v3, v3
-; GFX950-NEXT: v_mul_f32_e64 v0, |v5|, s0
-; GFX950-NEXT: v_floor_f32_e32 v0, v0
-; GFX950-NEXT: v_ashrrev_i32_e32 v2, 31, v2
-; GFX950-NEXT: v_fma_f32 v6, v0, s1, |v5|
-; GFX950-NEXT: v_xor_b32_e32 v4, v4, v2
-; GFX950-NEXT: v_cvt_u32_f32_e32 v7, v6
-; GFX950-NEXT: v_lshlrev_b32_e32 v1, 16, v1
-; GFX950-NEXT: v_xor_b32_e32 v3, v3, v2
-; GFX950-NEXT: v_cvt_u32_f32_e32 v8, v0
-; GFX950-NEXT: v_sub_co_u32_e32 v0, vcc, v4, v2
-; GFX950-NEXT: v_trunc_f32_e32 v1, v1
-; GFX950-NEXT: s_nop 0
-; GFX950-NEXT: v_subb_co_u32_e32 v6, vcc, v3, v2, vcc
-; GFX950-NEXT: v_ashrrev_i32_e32 v3, 31, v5
-; GFX950-NEXT: v_mul_f32_e64 v5, |v1|, s0
-; GFX950-NEXT: v_floor_f32_e32 v5, v5
-; GFX950-NEXT: v_xor_b32_e32 v2, v7, v3
-; GFX950-NEXT: v_fma_f32 v7, v5, s1, |v1|
-; GFX950-NEXT: v_cvt_u32_f32_e32 v7, v7
-; GFX950-NEXT: v_cvt_u32_f32_e32 v5, v5
-; GFX950-NEXT: v_xor_b32_e32 v4, v8, v3
-; GFX950-NEXT: v_sub_co_u32_e32 v2, vcc, v2, v3
+; GFX950-NEXT: v_trunc_f32_e32 v6, v4
+; GFX950-NEXT: v_cvt_u32_f32_e32 v2, v2
+; GFX950-NEXT: v_mul_f32_e64 v4, |v6|, s0
+; GFX950-NEXT: v_floor_f32_e32 v4, v4
; GFX950-NEXT: v_ashrrev_i32_e32 v1, 31, v1
+; GFX950-NEXT: v_fma_f32 v5, v4, s1, |v6|
+; GFX950-NEXT: v_xor_b32_e32 v3, v3, v1
+; GFX950-NEXT: v_cvt_u32_f32_e32 v7, v5
+; GFX950-NEXT: v_lshlrev_b32_e32 v0, 16, v0
+; GFX950-NEXT: v_xor_b32_e32 v2, v2, v1
+; GFX950-NEXT: v_cvt_u32_f32_e32 v8, v4
+; GFX950-NEXT: v_sub_co_u32_e32 v4, vcc, v3, v1
+; GFX950-NEXT: v_trunc_f32_e32 v0, v0
; GFX950-NEXT: s_nop 0
-; GFX950-NEXT: v_subb_co_u32_e32 v3, vcc, v4, v3, vcc
-; GFX950-NEXT: v_xor_b32_e32 v4, v7, v1
-; GFX950-NEXT: v_xor_b32_e32 v5, v5, v1
-; GFX950-NEXT: v_sub_co_u32_e32 v4, vcc, v4, v1
+; GFX950-NEXT: v_subb_co_u32_e32 v5, vcc, v2, v1, vcc
+; GFX950-NEXT: v_ashrrev_i32_e32 v1, 31, v6
+; GFX950-NEXT: v_mul_f32_e64 v6, |v0|, s0
+; GFX950-NEXT: v_floor_f32_e32 v6, v6
+; GFX950-NEXT: v_xor_b32_e32 v2, v7, v1
+; GFX950-NEXT: v_fma_f32 v7, v6, s1, |v0|
+; GFX950-NEXT: v_cvt_u32_f32_e32 v7, v7
+; GFX950-NEXT: v_cvt_u32_f32_e32 v6, v6
+; GFX950-NEXT: v_xor_b32_e32 v3, v8, v1
+; GFX950-NEXT: v_sub_co_u32_e32 v2, vcc, v2, v1
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_subb_co_u32_e32 v3, vcc, v3, v1, vcc
+; GFX950-NEXT: v_ashrrev_i32_e32 v1, 31, v0
+; GFX950-NEXT: v_xor_b32_e32 v0, v7, v1
+; GFX950-NEXT: v_xor_b32_e32 v6, v6, v1
+; GFX950-NEXT: v_sub_co_u32_e32 v0, vcc, v0, v1
; GFX950-NEXT: s_nop 1
-; GFX950-NEXT: v_subb_co_u32_e32 v5, vcc, v5, v1, vcc
-; GFX950-NEXT: v_mov_b32_e32 v1, v6
+; GFX950-NEXT: v_subb_co_u32_e32 v1, vcc, v6, v1, vcc
; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_fptosi_v3bf16_to_v3i64:
@@ -34531,60 +34698,59 @@ define <4 x i64> @v_fptosi_v4bf16_to_v4i64(<4 x bfloat> %x) {
; GFX950-LABEL: v_fptosi_v4bf16_to_v4i64:
; GFX950: ; %bb.0:
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-NEXT: v_lshlrev_b32_e32 v2, 16, v0
+; GFX950-NEXT: v_and_b32_e32 v2, 0xffff0000, v1
; GFX950-NEXT: v_trunc_f32_e32 v2, v2
; GFX950-NEXT: s_mov_b32 s0, 0x2f800000
; GFX950-NEXT: v_mul_f32_e64 v3, |v2|, s0
+; GFX950-NEXT: v_lshlrev_b32_e32 v1, 16, v1
; GFX950-NEXT: v_floor_f32_e32 v3, v3
; GFX950-NEXT: s_mov_b32 s1, 0xcf800000
+; GFX950-NEXT: v_trunc_f32_e32 v1, v1
; GFX950-NEXT: v_fma_f32 v4, v3, s1, |v2|
-; GFX950-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
+; GFX950-NEXT: v_mul_f32_e64 v5, |v1|, s0
; GFX950-NEXT: v_cvt_u32_f32_e32 v4, v4
-; GFX950-NEXT: v_trunc_f32_e32 v5, v0
+; GFX950-NEXT: v_floor_f32_e32 v5, v5
; GFX950-NEXT: v_cvt_u32_f32_e32 v3, v3
-; GFX950-NEXT: v_mul_f32_e64 v0, |v5|, s0
-; GFX950-NEXT: v_floor_f32_e32 v0, v0
+; GFX950-NEXT: v_fma_f32 v6, v5, s1, |v1|
+; GFX950-NEXT: v_cvt_u32_f32_e32 v8, v6
; GFX950-NEXT: v_ashrrev_i32_e32 v2, 31, v2
-; GFX950-NEXT: v_fma_f32 v6, v0, s1, |v5|
; GFX950-NEXT: v_xor_b32_e32 v4, v4, v2
-; GFX950-NEXT: v_cvt_u32_f32_e32 v6, v6
; GFX950-NEXT: v_xor_b32_e32 v3, v3, v2
-; GFX950-NEXT: v_cvt_u32_f32_e32 v7, v0
-; GFX950-NEXT: v_sub_co_u32_e32 v0, vcc, v4, v2
-; GFX950-NEXT: s_nop 1
-; GFX950-NEXT: v_subb_co_u32_e32 v8, vcc, v3, v2, vcc
-; GFX950-NEXT: v_ashrrev_i32_e32 v3, 31, v5
-; GFX950-NEXT: v_lshlrev_b32_e32 v5, 16, v1
-; GFX950-NEXT: v_trunc_f32_e32 v5, v5
-; GFX950-NEXT: v_xor_b32_e32 v2, v6, v3
-; GFX950-NEXT: v_mul_f32_e64 v6, |v5|, s0
-; GFX950-NEXT: v_floor_f32_e32 v6, v6
-; GFX950-NEXT: v_xor_b32_e32 v4, v7, v3
-; GFX950-NEXT: v_fma_f32 v7, v6, s1, |v5|
-; GFX950-NEXT: v_cvt_u32_f32_e32 v7, v7
-; GFX950-NEXT: v_and_b32_e32 v1, 0xffff0000, v1
-; GFX950-NEXT: v_sub_co_u32_e32 v2, vcc, v2, v3
-; GFX950-NEXT: v_ashrrev_i32_e32 v5, 31, v5
-; GFX950-NEXT: v_trunc_f32_e32 v1, v1
-; GFX950-NEXT: v_subb_co_u32_e32 v3, vcc, v4, v3, vcc
-; GFX950-NEXT: v_xor_b32_e32 v4, v7, v5
-; GFX950-NEXT: v_mul_f32_e64 v7, |v1|, s0
-; GFX950-NEXT: v_floor_f32_e32 v7, v7
-; GFX950-NEXT: v_cvt_u32_f32_e32 v6, v6
-; GFX950-NEXT: v_fma_f32 v9, v7, s1, |v1|
-; GFX950-NEXT: v_cvt_u32_f32_e32 v9, v9
-; GFX950-NEXT: v_cvt_u32_f32_e32 v7, v7
-; GFX950-NEXT: v_xor_b32_e32 v6, v6, v5
-; GFX950-NEXT: v_sub_co_u32_e32 v4, vcc, v4, v5
+; GFX950-NEXT: v_cvt_u32_f32_e32 v5, v5
+; GFX950-NEXT: v_sub_co_u32_e32 v6, vcc, v4, v2
; GFX950-NEXT: v_ashrrev_i32_e32 v1, 31, v1
+; GFX950-NEXT: v_and_b32_e32 v4, 0xffff0000, v0
+; GFX950-NEXT: v_subb_co_u32_e32 v7, vcc, v3, v2, vcc
+; GFX950-NEXT: v_xor_b32_e32 v3, v8, v1
+; GFX950-NEXT: v_trunc_f32_e32 v8, v4
+; GFX950-NEXT: v_mul_f32_e64 v4, |v8|, s0
+; GFX950-NEXT: v_floor_f32_e32 v4, v4
+; GFX950-NEXT: v_xor_b32_e32 v2, v5, v1
+; GFX950-NEXT: v_fma_f32 v5, v4, s1, |v8|
+; GFX950-NEXT: v_cvt_u32_f32_e32 v9, v5
+; GFX950-NEXT: v_lshlrev_b32_e32 v0, 16, v0
+; GFX950-NEXT: v_cvt_u32_f32_e32 v10, v4
+; GFX950-NEXT: v_sub_co_u32_e32 v4, vcc, v3, v1
+; GFX950-NEXT: v_trunc_f32_e32 v0, v0
; GFX950-NEXT: s_nop 0
-; GFX950-NEXT: v_subb_co_u32_e32 v5, vcc, v6, v5, vcc
-; GFX950-NEXT: v_xor_b32_e32 v6, v9, v1
-; GFX950-NEXT: v_xor_b32_e32 v7, v7, v1
-; GFX950-NEXT: v_sub_co_u32_e32 v6, vcc, v6, v1
+; GFX950-NEXT: v_subb_co_u32_e32 v5, vcc, v2, v1, vcc
+; GFX950-NEXT: v_ashrrev_i32_e32 v1, 31, v8
+; GFX950-NEXT: v_mul_f32_e64 v8, |v0|, s0
+; GFX950-NEXT: v_floor_f32_e32 v8, v8
+; GFX950-NEXT: v_xor_b32_e32 v2, v9, v1
+; GFX950-NEXT: v_fma_f32 v9, v8, s1, |v0|
+; GFX950-NEXT: v_cvt_u32_f32_e32 v9, v9
+; GFX950-NEXT: v_cvt_u32_f32_e32 v8, v8
+; GFX950-NEXT: v_xor_b32_e32 v3, v10, v1
+; GFX950-NEXT: v_sub_co_u32_e32 v2, vcc, v2, v1
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_subb_co_u32_e32 v3, vcc, v3, v1, vcc
+; GFX950-NEXT: v_ashrrev_i32_e32 v1, 31, v0
+; GFX950-NEXT: v_xor_b32_e32 v0, v9, v1
+; GFX950-NEXT: v_xor_b32_e32 v8, v8, v1
+; GFX950-NEXT: v_sub_co_u32_e32 v0, vcc, v0, v1
; GFX950-NEXT: s_nop 1
-; GFX950-NEXT: v_subb_co_u32_e32 v7, vcc, v7, v1, vcc
-; GFX950-NEXT: v_mov_b32_e32 v1, v8
+; GFX950-NEXT: v_subb_co_u32_e32 v1, vcc, v8, v1, vcc
; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_fptosi_v4bf16_to_v4i64:
diff --git a/llvm/test/CodeGen/AMDGPU/buffer-atomic-fadd.f32-no-rtn.ll b/llvm/test/CodeGen/AMDGPU/buffer-atomic-fadd.f32-no-rtn.ll
index b08e9c439a9fe..74ae44d5210e3 100644
--- a/llvm/test/CodeGen/AMDGPU/buffer-atomic-fadd.f32-no-rtn.ll
+++ b/llvm/test/CodeGen/AMDGPU/buffer-atomic-fadd.f32-no-rtn.ll
@@ -1,11 +1,11 @@
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
; RUN: llc -mtriple=amdgcn -mcpu=gfx908 -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX908_GFX11 %s
-; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX90A_GFX942 %s
-; RUN: llc -mtriple=amdgcn -mcpu=gfx942 -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX90A_GFX942 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -stop-after=amdgpu-isel < %s | FileCheck -check-prefixes=GFX90A_GFX942,GFX90A %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx942 -stop-after=amdgpu-isel < %s | FileCheck -check-prefixes=GFX90A_GFX942,GFX942 %s
; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX908_GFX11 %s
; RUN: llc -mtriple=amdgcn -mcpu=gfx908 -enable-new-pm -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX908_GFX11 %s
-; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -enable-new-pm -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX90A_GFX942 %s
-; RUN: llc -mtriple=amdgcn -mcpu=gfx942 -enable-new-pm -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX90A_GFX942 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -enable-new-pm -stop-after=amdgpu-isel < %s | FileCheck -check-prefixes=GFX90A_GFX942,GFX90A %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx942 -enable-new-pm -stop-after=amdgpu-isel < %s | FileCheck -check-prefixes=GFX90A_GFX942,GFX942 %s
; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -enable-new-pm -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX908_GFX11 %s
define amdgpu_ps void @buffer_atomic_fadd_f32_offset_no_rtn(float %val, <4 x i32> inreg %rsrc, i32 inreg %soffset) {
@@ -167,25 +167,41 @@ define amdgpu_ps void @buffer_ptr_atomic_fadd_f32_offset_no_rtn(float %val, ptr
; GFX908_GFX11-NEXT: BUFFER_ATOMIC_ADD_F32_OFFSET [[COPY5]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
; GFX908_GFX11-NEXT: S_ENDPGM 0
;
- ; GFX90A_GFX942-LABEL: name: buffer_ptr_atomic_fadd_f32_offset_no_rtn
- ; GFX90A_GFX942: bb.0 (%ir-block.0):
- ; GFX90A_GFX942-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4
- ; GFX90A_GFX942-NEXT: {{ $}}
- ; GFX90A_GFX942-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
- ; GFX90A_GFX942-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr3
- ; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr2
- ; GFX90A_GFX942-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr1
- ; GFX90A_GFX942-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr0
- ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
- ; GFX90A_GFX942-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
- ; GFX90A_GFX942-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
- ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1
- ; GFX90A_GFX942-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1
- ; GFX90A_GFX942-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0
- ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY9]], %subreg.sub0, killed [[COPY8]], %subreg.sub1, killed [[COPY7]], %subreg.sub2, killed [[COPY6]], %subreg.sub3
- ; GFX90A_GFX942-NEXT: BUFFER_ATOMIC_ADD_F32_OFFSET [[COPY5]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
- ; GFX90A_GFX942-NEXT: S_ENDPGM 0
+ ; GFX90A-LABEL: name: buffer_ptr_atomic_fadd_f32_offset_no_rtn
+ ; GFX90A: bb.0 (%ir-block.0):
+ ; GFX90A-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4
+ ; GFX90A-NEXT: {{ $}}
+ ; GFX90A-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
+ ; GFX90A-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr3
+ ; GFX90A-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr2
+ ; GFX90A-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr1
+ ; GFX90A-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr0
+ ; GFX90A-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
+ ; GFX90A-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
+ ; GFX90A-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
+ ; GFX90A-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1
+ ; GFX90A-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1
+ ; GFX90A-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0
+ ; GFX90A-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY9]], %subreg.sub0, killed [[COPY8]], %subreg.sub1, killed [[COPY7]], %subreg.sub2, killed [[COPY6]], %subreg.sub3
+ ; GFX90A-NEXT: BUFFER_ATOMIC_ADD_F32_OFFSET [[COPY5]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
+ ; GFX90A-NEXT: S_ENDPGM 0
+ ;
+ ; GFX942-LABEL: name: buffer_ptr_atomic_fadd_f32_offset_no_rtn
+ ; GFX942: bb.0 (%ir-block.0):
+ ; GFX942-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4
+ ; GFX942-NEXT: {{ $}}
+ ; GFX942-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
+ ; GFX942-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr3
+ ; GFX942-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr2
+ ; GFX942-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr1
+ ; GFX942-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr0
+ ; GFX942-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
+ ; GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1
+ ; GFX942-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[REG_SEQUENCE1]], %subreg.sub0_sub1, killed [[REG_SEQUENCE]], %subreg.sub2_sub3
+ ; GFX942-NEXT: BUFFER_ATOMIC_ADD_F32_OFFSET [[COPY5]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
+ ; GFX942-NEXT: S_ENDPGM 0
%ret = call float @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.f32(float %val, ptr addrspace(8) %rsrc, i32 0, i32 %soffset, i32 0)
ret void
}
@@ -212,26 +228,43 @@ define amdgpu_ps void @buffer_ptr_atomic_fadd_f32_offen_no_rtn(float %val, ptr a
; GFX908_GFX11-NEXT: BUFFER_ATOMIC_ADD_F32_OFFEN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
; GFX908_GFX11-NEXT: S_ENDPGM 0
;
- ; GFX90A_GFX942-LABEL: name: buffer_ptr_atomic_fadd_f32_offen_no_rtn
- ; GFX90A_GFX942: bb.0 (%ir-block.0):
- ; GFX90A_GFX942-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $sgpr4
- ; GFX90A_GFX942-NEXT: {{ $}}
- ; GFX90A_GFX942-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
- ; GFX90A_GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3
- ; GFX90A_GFX942-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2
- ; GFX90A_GFX942-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1
- ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0
- ; GFX90A_GFX942-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1
- ; GFX90A_GFX942-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
- ; GFX90A_GFX942-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
- ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1
- ; GFX90A_GFX942-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1
- ; GFX90A_GFX942-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0
- ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY10]], %subreg.sub0, killed [[COPY9]], %subreg.sub1, killed [[COPY8]], %subreg.sub2, killed [[COPY7]], %subreg.sub3
- ; GFX90A_GFX942-NEXT: BUFFER_ATOMIC_ADD_F32_OFFEN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
- ; GFX90A_GFX942-NEXT: S_ENDPGM 0
+ ; GFX90A-LABEL: name: buffer_ptr_atomic_fadd_f32_offen_no_rtn
+ ; GFX90A: bb.0 (%ir-block.0):
+ ; GFX90A-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $sgpr4
+ ; GFX90A-NEXT: {{ $}}
+ ; GFX90A-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
+ ; GFX90A-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX90A-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3
+ ; GFX90A-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2
+ ; GFX90A-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1
+ ; GFX90A-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0
+ ; GFX90A-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1
+ ; GFX90A-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
+ ; GFX90A-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
+ ; GFX90A-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1
+ ; GFX90A-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1
+ ; GFX90A-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0
+ ; GFX90A-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY10]], %subreg.sub0, killed [[COPY9]], %subreg.sub1, killed [[COPY8]], %subreg.sub2, killed [[COPY7]], %subreg.sub3
+ ; GFX90A-NEXT: BUFFER_ATOMIC_ADD_F32_OFFEN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
+ ; GFX90A-NEXT: S_ENDPGM 0
+ ;
+ ; GFX942-LABEL: name: buffer_ptr_atomic_fadd_f32_offen_no_rtn
+ ; GFX942: bb.0 (%ir-block.0):
+ ; GFX942-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $sgpr4
+ ; GFX942-NEXT: {{ $}}
+ ; GFX942-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
+ ; GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX942-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3
+ ; GFX942-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2
+ ; GFX942-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1
+ ; GFX942-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0
+ ; GFX942-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1
+ ; GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1
+ ; GFX942-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[REG_SEQUENCE1]], %subreg.sub0_sub1, killed [[REG_SEQUENCE]], %subreg.sub2_sub3
+ ; GFX942-NEXT: BUFFER_ATOMIC_ADD_F32_OFFEN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
+ ; GFX942-NEXT: S_ENDPGM 0
%ret = call float @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.f32(float %val, ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 0)
ret void
}
@@ -258,26 +291,43 @@ define amdgpu_ps void @buffer_ptr_atomic_fadd_f32_idxen_no_rtn(float %val, ptr a
; GFX908_GFX11-NEXT: BUFFER_ATOMIC_ADD_F32_IDXEN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
; GFX908_GFX11-NEXT: S_ENDPGM 0
;
- ; GFX90A_GFX942-LABEL: name: buffer_ptr_atomic_fadd_f32_idxen_no_rtn
- ; GFX90A_GFX942: bb.0 (%ir-block.0):
- ; GFX90A_GFX942-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $sgpr4
- ; GFX90A_GFX942-NEXT: {{ $}}
- ; GFX90A_GFX942-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
- ; GFX90A_GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3
- ; GFX90A_GFX942-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2
- ; GFX90A_GFX942-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1
- ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0
- ; GFX90A_GFX942-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1
- ; GFX90A_GFX942-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
- ; GFX90A_GFX942-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
- ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1
- ; GFX90A_GFX942-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1
- ; GFX90A_GFX942-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0
- ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY10]], %subreg.sub0, killed [[COPY9]], %subreg.sub1, killed [[COPY8]], %subreg.sub2, killed [[COPY7]], %subreg.sub3
- ; GFX90A_GFX942-NEXT: BUFFER_ATOMIC_ADD_F32_IDXEN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
- ; GFX90A_GFX942-NEXT: S_ENDPGM 0
+ ; GFX90A-LABEL: name: buffer_ptr_atomic_fadd_f32_idxen_no_rtn
+ ; GFX90A: bb.0 (%ir-block.0):
+ ; GFX90A-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $sgpr4
+ ; GFX90A-NEXT: {{ $}}
+ ; GFX90A-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
+ ; GFX90A-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX90A-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3
+ ; GFX90A-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2
+ ; GFX90A-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1
+ ; GFX90A-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0
+ ; GFX90A-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1
+ ; GFX90A-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
+ ; GFX90A-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
+ ; GFX90A-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1
+ ; GFX90A-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1
+ ; GFX90A-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0
+ ; GFX90A-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY10]], %subreg.sub0, killed [[COPY9]], %subreg.sub1, killed [[COPY8]], %subreg.sub2, killed [[COPY7]], %subreg.sub3
+ ; GFX90A-NEXT: BUFFER_ATOMIC_ADD_F32_IDXEN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
+ ; GFX90A-NEXT: S_ENDPGM 0
+ ;
+ ; GFX942-LABEL: name: buffer_ptr_atomic_fadd_f32_idxen_no_rtn
+ ; GFX942: bb.0 (%ir-block.0):
+ ; GFX942-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $sgpr4
+ ; GFX942-NEXT: {{ $}}
+ ; GFX942-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
+ ; GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX942-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3
+ ; GFX942-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2
+ ; GFX942-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1
+ ; GFX942-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0
+ ; GFX942-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1
+ ; GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1
+ ; GFX942-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[REG_SEQUENCE1]], %subreg.sub0_sub1, killed [[REG_SEQUENCE]], %subreg.sub2_sub3
+ ; GFX942-NEXT: BUFFER_ATOMIC_ADD_F32_IDXEN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
+ ; GFX942-NEXT: S_ENDPGM 0
%ret = call float @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.f32(float %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 %soffset, i32 0)
ret void
}
@@ -306,28 +356,47 @@ define amdgpu_ps void @buffer_ptr_atomic_fadd_f32_bothen_no_rtn(float %val, ptr
; GFX908_GFX11-NEXT: BUFFER_ATOMIC_ADD_F32_BOTHEN [[COPY7]], killed [[REG_SEQUENCE3]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 2, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
; GFX908_GFX11-NEXT: S_ENDPGM 0
;
- ; GFX90A_GFX942-LABEL: name: buffer_ptr_atomic_fadd_f32_bothen_no_rtn
- ; GFX90A_GFX942: bb.0 (%ir-block.0):
- ; GFX90A_GFX942-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $vgpr2, $sgpr4
- ; GFX90A_GFX942-NEXT: {{ $}}
- ; GFX90A_GFX942-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
- ; GFX90A_GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
- ; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; GFX90A_GFX942-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr3
- ; GFX90A_GFX942-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr2
- ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr1
- ; GFX90A_GFX942-NEXT: [[COPY6:%[0-9]+]]:sgpr_32 = COPY $sgpr0
- ; GFX90A_GFX942-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1
- ; GFX90A_GFX942-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
- ; GFX90A_GFX942-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
- ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1
- ; GFX90A_GFX942-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1
- ; GFX90A_GFX942-NEXT: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0
- ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY11]], %subreg.sub0, killed [[COPY10]], %subreg.sub1, killed [[COPY9]], %subreg.sub2, killed [[COPY8]], %subreg.sub3
- ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
- ; GFX90A_GFX942-NEXT: BUFFER_ATOMIC_ADD_F32_BOTHEN [[COPY7]], killed [[REG_SEQUENCE3]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 2, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
- ; GFX90A_GFX942-NEXT: S_ENDPGM 0
+ ; GFX90A-LABEL: name: buffer_ptr_atomic_fadd_f32_bothen_no_rtn
+ ; GFX90A: bb.0 (%ir-block.0):
+ ; GFX90A-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $vgpr2, $sgpr4
+ ; GFX90A-NEXT: {{ $}}
+ ; GFX90A-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
+ ; GFX90A-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; GFX90A-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX90A-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr3
+ ; GFX90A-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr2
+ ; GFX90A-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr1
+ ; GFX90A-NEXT: [[COPY6:%[0-9]+]]:sgpr_32 = COPY $sgpr0
+ ; GFX90A-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1
+ ; GFX90A-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
+ ; GFX90A-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
+ ; GFX90A-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1
+ ; GFX90A-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1
+ ; GFX90A-NEXT: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0
+ ; GFX90A-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY11]], %subreg.sub0, killed [[COPY10]], %subreg.sub1, killed [[COPY9]], %subreg.sub2, killed [[COPY8]], %subreg.sub3
+ ; GFX90A-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
+ ; GFX90A-NEXT: BUFFER_ATOMIC_ADD_F32_BOTHEN [[COPY7]], killed [[REG_SEQUENCE3]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 2, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
+ ; GFX90A-NEXT: S_ENDPGM 0
+ ;
+ ; GFX942-LABEL: name: buffer_ptr_atomic_fadd_f32_bothen_no_rtn
+ ; GFX942: bb.0 (%ir-block.0):
+ ; GFX942-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $vgpr2, $sgpr4
+ ; GFX942-NEXT: {{ $}}
+ ; GFX942-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
+ ; GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; GFX942-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX942-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr3
+ ; GFX942-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr2
+ ; GFX942-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr1
+ ; GFX942-NEXT: [[COPY6:%[0-9]+]]:sgpr_32 = COPY $sgpr0
+ ; GFX942-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1
+ ; GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1
+ ; GFX942-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[REG_SEQUENCE1]], %subreg.sub0_sub1, killed [[REG_SEQUENCE]], %subreg.sub2_sub3
+ ; GFX942-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
+ ; GFX942-NEXT: BUFFER_ATOMIC_ADD_F32_BOTHEN [[COPY7]], killed [[REG_SEQUENCE3]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 2, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
+ ; GFX942-NEXT: S_ENDPGM 0
%ret = call float @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.f32(float %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 2)
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/buffer-atomic-fadd.f32-rtn.ll b/llvm/test/CodeGen/AMDGPU/buffer-atomic-fadd.f32-rtn.ll
index b80aa9324e616..de41cfabc5fd4 100644
--- a/llvm/test/CodeGen/AMDGPU/buffer-atomic-fadd.f32-rtn.ll
+++ b/llvm/test/CodeGen/AMDGPU/buffer-atomic-fadd.f32-rtn.ll
@@ -1,9 +1,9 @@
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX90A_GFX942 %s
-; RUN: llc -mtriple=amdgcn -mcpu=gfx942 -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX90A_GFX942 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -stop-after=amdgpu-isel < %s | FileCheck -check-prefixes=GFX90A_GFX942,GFX90A %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx942 -stop-after=amdgpu-isel < %s | FileCheck -check-prefixes=GFX90A_GFX942,GFX942 %s
; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -enable-new-pm -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX11 %s
-; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -enable-new-pm -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX90A_GFX942 %s
-; RUN: llc -mtriple=amdgcn -mcpu=gfx942 -enable-new-pm -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX90A_GFX942 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -enable-new-pm -stop-after=amdgpu-isel < %s | FileCheck -check-prefixes=GFX90A_GFX942,GFX90A %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx942 -enable-new-pm -stop-after=amdgpu-isel < %s | FileCheck -check-prefixes=GFX90A_GFX942,GFX942 %s
; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -enable-new-pm -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX11 %s
define amdgpu_ps float @buffer_atomic_fadd_f32_offset_rtn(float %val, <4 x i32> inreg %rsrc, i32 inreg %soffset) {
@@ -153,26 +153,43 @@ define amdgpu_ps float @buffer_atomic_fadd_f32_bothen_rtn(float %val, <4 x i32>
}
define amdgpu_ps float @buffer_ptr_atomic_fadd_f32_offset_rtn(float %val, ptr addrspace(8) inreg %rsrc, i32 inreg %soffset) {
- ; GFX90A_GFX942-LABEL: name: buffer_ptr_atomic_fadd_f32_offset_rtn
- ; GFX90A_GFX942: bb.0 (%ir-block.0):
- ; GFX90A_GFX942-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4
- ; GFX90A_GFX942-NEXT: {{ $}}
- ; GFX90A_GFX942-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
- ; GFX90A_GFX942-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr3
- ; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr2
- ; GFX90A_GFX942-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr1
- ; GFX90A_GFX942-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr0
- ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
- ; GFX90A_GFX942-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
- ; GFX90A_GFX942-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
- ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1
- ; GFX90A_GFX942-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1
- ; GFX90A_GFX942-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0
- ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY9]], %subreg.sub0, killed [[COPY8]], %subreg.sub1, killed [[COPY7]], %subreg.sub2, killed [[COPY6]], %subreg.sub3
- ; GFX90A_GFX942-NEXT: [[BUFFER_ATOMIC_ADD_F32_OFFSET_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_OFFSET_RTN [[COPY5]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
- ; GFX90A_GFX942-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_F32_OFFSET_RTN]]
- ; GFX90A_GFX942-NEXT: SI_RETURN_TO_EPILOG $vgpr0
+ ; GFX90A-LABEL: name: buffer_ptr_atomic_fadd_f32_offset_rtn
+ ; GFX90A: bb.0 (%ir-block.0):
+ ; GFX90A-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4
+ ; GFX90A-NEXT: {{ $}}
+ ; GFX90A-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
+ ; GFX90A-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr3
+ ; GFX90A-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr2
+ ; GFX90A-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr1
+ ; GFX90A-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr0
+ ; GFX90A-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
+ ; GFX90A-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
+ ; GFX90A-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
+ ; GFX90A-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1
+ ; GFX90A-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1
+ ; GFX90A-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0
+ ; GFX90A-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY9]], %subreg.sub0, killed [[COPY8]], %subreg.sub1, killed [[COPY7]], %subreg.sub2, killed [[COPY6]], %subreg.sub3
+ ; GFX90A-NEXT: [[BUFFER_ATOMIC_ADD_F32_OFFSET_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_OFFSET_RTN [[COPY5]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
+ ; GFX90A-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_F32_OFFSET_RTN]]
+ ; GFX90A-NEXT: SI_RETURN_TO_EPILOG $vgpr0
+ ;
+ ; GFX942-LABEL: name: buffer_ptr_atomic_fadd_f32_offset_rtn
+ ; GFX942: bb.0 (%ir-block.0):
+ ; GFX942-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4
+ ; GFX942-NEXT: {{ $}}
+ ; GFX942-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
+ ; GFX942-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr3
+ ; GFX942-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr2
+ ; GFX942-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr1
+ ; GFX942-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr0
+ ; GFX942-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
+ ; GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1
+ ; GFX942-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[REG_SEQUENCE1]], %subreg.sub0_sub1, killed [[REG_SEQUENCE]], %subreg.sub2_sub3
+ ; GFX942-NEXT: [[BUFFER_ATOMIC_ADD_F32_OFFSET_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_OFFSET_RTN [[COPY5]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
+ ; GFX942-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_F32_OFFSET_RTN]]
+ ; GFX942-NEXT: SI_RETURN_TO_EPILOG $vgpr0
;
; GFX11-LABEL: name: buffer_ptr_atomic_fadd_f32_offset_rtn
; GFX11: bb.0 (%ir-block.0):
@@ -199,27 +216,45 @@ define amdgpu_ps float @buffer_ptr_atomic_fadd_f32_offset_rtn(float %val, ptr ad
}
define amdgpu_ps float @buffer_ptr_atomic_fadd_f32_offen_rtn(float %val, ptr addrspace(8) inreg %rsrc, i32 %voffset, i32 inreg %soffset) {
- ; GFX90A_GFX942-LABEL: name: buffer_ptr_atomic_fadd_f32_offen_rtn
- ; GFX90A_GFX942: bb.0 (%ir-block.0):
- ; GFX90A_GFX942-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $sgpr4
- ; GFX90A_GFX942-NEXT: {{ $}}
- ; GFX90A_GFX942-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
- ; GFX90A_GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3
- ; GFX90A_GFX942-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2
- ; GFX90A_GFX942-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1
- ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0
- ; GFX90A_GFX942-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1
- ; GFX90A_GFX942-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
- ; GFX90A_GFX942-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
- ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1
- ; GFX90A_GFX942-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1
- ; GFX90A_GFX942-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0
- ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY10]], %subreg.sub0, killed [[COPY9]], %subreg.sub1, killed [[COPY8]], %subreg.sub2, killed [[COPY7]], %subreg.sub3
- ; GFX90A_GFX942-NEXT: [[BUFFER_ATOMIC_ADD_F32_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_OFFEN_RTN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
- ; GFX90A_GFX942-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_F32_OFFEN_RTN]]
- ; GFX90A_GFX942-NEXT: SI_RETURN_TO_EPILOG $vgpr0
+ ; GFX90A-LABEL: name: buffer_ptr_atomic_fadd_f32_offen_rtn
+ ; GFX90A: bb.0 (%ir-block.0):
+ ; GFX90A-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $sgpr4
+ ; GFX90A-NEXT: {{ $}}
+ ; GFX90A-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
+ ; GFX90A-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX90A-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3
+ ; GFX90A-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2
+ ; GFX90A-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1
+ ; GFX90A-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0
+ ; GFX90A-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1
+ ; GFX90A-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
+ ; GFX90A-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
+ ; GFX90A-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1
+ ; GFX90A-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1
+ ; GFX90A-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0
+ ; GFX90A-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY10]], %subreg.sub0, killed [[COPY9]], %subreg.sub1, killed [[COPY8]], %subreg.sub2, killed [[COPY7]], %subreg.sub3
+ ; GFX90A-NEXT: [[BUFFER_ATOMIC_ADD_F32_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_OFFEN_RTN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
+ ; GFX90A-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_F32_OFFEN_RTN]]
+ ; GFX90A-NEXT: SI_RETURN_TO_EPILOG $vgpr0
+ ;
+ ; GFX942-LABEL: name: buffer_ptr_atomic_fadd_f32_offen_rtn
+ ; GFX942: bb.0 (%ir-block.0):
+ ; GFX942-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $sgpr4
+ ; GFX942-NEXT: {{ $}}
+ ; GFX942-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
+ ; GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX942-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3
+ ; GFX942-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2
+ ; GFX942-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1
+ ; GFX942-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0
+ ; GFX942-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1
+ ; GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1
+ ; GFX942-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[REG_SEQUENCE1]], %subreg.sub0_sub1, killed [[REG_SEQUENCE]], %subreg.sub2_sub3
+ ; GFX942-NEXT: [[BUFFER_ATOMIC_ADD_F32_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_OFFEN_RTN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
+ ; GFX942-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_F32_OFFEN_RTN]]
+ ; GFX942-NEXT: SI_RETURN_TO_EPILOG $vgpr0
;
; GFX11-LABEL: name: buffer_ptr_atomic_fadd_f32_offen_rtn
; GFX11: bb.0 (%ir-block.0):
@@ -247,27 +282,45 @@ define amdgpu_ps float @buffer_ptr_atomic_fadd_f32_offen_rtn(float %val, ptr add
}
define amdgpu_ps float @buffer_ptr_atomic_fadd_f32_idxen_rtn(float %val, ptr addrspace(8) inreg %rsrc, i32 %vindex, i32 inreg %soffset) {
- ; GFX90A_GFX942-LABEL: name: buffer_ptr_atomic_fadd_f32_idxen_rtn
- ; GFX90A_GFX942: bb.0 (%ir-block.0):
- ; GFX90A_GFX942-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $sgpr4
- ; GFX90A_GFX942-NEXT: {{ $}}
- ; GFX90A_GFX942-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
- ; GFX90A_GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3
- ; GFX90A_GFX942-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2
- ; GFX90A_GFX942-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1
- ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0
- ; GFX90A_GFX942-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1
- ; GFX90A_GFX942-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
- ; GFX90A_GFX942-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
- ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1
- ; GFX90A_GFX942-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1
- ; GFX90A_GFX942-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0
- ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY10]], %subreg.sub0, killed [[COPY9]], %subreg.sub1, killed [[COPY8]], %subreg.sub2, killed [[COPY7]], %subreg.sub3
- ; GFX90A_GFX942-NEXT: [[BUFFER_ATOMIC_ADD_F32_IDXEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_IDXEN_RTN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
- ; GFX90A_GFX942-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_F32_IDXEN_RTN]]
- ; GFX90A_GFX942-NEXT: SI_RETURN_TO_EPILOG $vgpr0
+ ; GFX90A-LABEL: name: buffer_ptr_atomic_fadd_f32_idxen_rtn
+ ; GFX90A: bb.0 (%ir-block.0):
+ ; GFX90A-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $sgpr4
+ ; GFX90A-NEXT: {{ $}}
+ ; GFX90A-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
+ ; GFX90A-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX90A-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3
+ ; GFX90A-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2
+ ; GFX90A-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1
+ ; GFX90A-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0
+ ; GFX90A-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1
+ ; GFX90A-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
+ ; GFX90A-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
+ ; GFX90A-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1
+ ; GFX90A-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1
+ ; GFX90A-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0
+ ; GFX90A-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY10]], %subreg.sub0, killed [[COPY9]], %subreg.sub1, killed [[COPY8]], %subreg.sub2, killed [[COPY7]], %subreg.sub3
+ ; GFX90A-NEXT: [[BUFFER_ATOMIC_ADD_F32_IDXEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_IDXEN_RTN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
+ ; GFX90A-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_F32_IDXEN_RTN]]
+ ; GFX90A-NEXT: SI_RETURN_TO_EPILOG $vgpr0
+ ;
+ ; GFX942-LABEL: name: buffer_ptr_atomic_fadd_f32_idxen_rtn
+ ; GFX942: bb.0 (%ir-block.0):
+ ; GFX942-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $sgpr4
+ ; GFX942-NEXT: {{ $}}
+ ; GFX942-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
+ ; GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX942-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3
+ ; GFX942-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2
+ ; GFX942-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1
+ ; GFX942-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0
+ ; GFX942-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1
+ ; GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1
+ ; GFX942-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[REG_SEQUENCE1]], %subreg.sub0_sub1, killed [[REG_SEQUENCE]], %subreg.sub2_sub3
+ ; GFX942-NEXT: [[BUFFER_ATOMIC_ADD_F32_IDXEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_IDXEN_RTN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
+ ; GFX942-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_F32_IDXEN_RTN]]
+ ; GFX942-NEXT: SI_RETURN_TO_EPILOG $vgpr0
;
; GFX11-LABEL: name: buffer_ptr_atomic_fadd_f32_idxen_rtn
; GFX11: bb.0 (%ir-block.0):
@@ -295,29 +348,49 @@ define amdgpu_ps float @buffer_ptr_atomic_fadd_f32_idxen_rtn(float %val, ptr add
}
define amdgpu_ps float @buffer_ptr_atomic_fadd_f32_bothen_rtn(float %val, ptr addrspace(8) inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) {
- ; GFX90A_GFX942-LABEL: name: buffer_ptr_atomic_fadd_f32_bothen_rtn
- ; GFX90A_GFX942: bb.0 (%ir-block.0):
- ; GFX90A_GFX942-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $vgpr2, $sgpr4
- ; GFX90A_GFX942-NEXT: {{ $}}
- ; GFX90A_GFX942-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
- ; GFX90A_GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
- ; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; GFX90A_GFX942-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr3
- ; GFX90A_GFX942-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr2
- ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr1
- ; GFX90A_GFX942-NEXT: [[COPY6:%[0-9]+]]:sgpr_32 = COPY $sgpr0
- ; GFX90A_GFX942-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1
- ; GFX90A_GFX942-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
- ; GFX90A_GFX942-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
- ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1
- ; GFX90A_GFX942-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1
- ; GFX90A_GFX942-NEXT: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0
- ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY11]], %subreg.sub0, killed [[COPY10]], %subreg.sub1, killed [[COPY9]], %subreg.sub2, killed [[COPY8]], %subreg.sub3
- ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
- ; GFX90A_GFX942-NEXT: [[BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN [[COPY7]], killed [[REG_SEQUENCE3]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
- ; GFX90A_GFX942-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN]]
- ; GFX90A_GFX942-NEXT: SI_RETURN_TO_EPILOG $vgpr0
+ ; GFX90A-LABEL: name: buffer_ptr_atomic_fadd_f32_bothen_rtn
+ ; GFX90A: bb.0 (%ir-block.0):
+ ; GFX90A-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $vgpr2, $sgpr4
+ ; GFX90A-NEXT: {{ $}}
+ ; GFX90A-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
+ ; GFX90A-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; GFX90A-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX90A-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr3
+ ; GFX90A-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr2
+ ; GFX90A-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr1
+ ; GFX90A-NEXT: [[COPY6:%[0-9]+]]:sgpr_32 = COPY $sgpr0
+ ; GFX90A-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1
+ ; GFX90A-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
+ ; GFX90A-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
+ ; GFX90A-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1
+ ; GFX90A-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1
+ ; GFX90A-NEXT: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0
+ ; GFX90A-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY11]], %subreg.sub0, killed [[COPY10]], %subreg.sub1, killed [[COPY9]], %subreg.sub2, killed [[COPY8]], %subreg.sub3
+ ; GFX90A-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
+ ; GFX90A-NEXT: [[BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN [[COPY7]], killed [[REG_SEQUENCE3]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
+ ; GFX90A-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN]]
+ ; GFX90A-NEXT: SI_RETURN_TO_EPILOG $vgpr0
+ ;
+ ; GFX942-LABEL: name: buffer_ptr_atomic_fadd_f32_bothen_rtn
+ ; GFX942: bb.0 (%ir-block.0):
+ ; GFX942-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $vgpr2, $sgpr4
+ ; GFX942-NEXT: {{ $}}
+ ; GFX942-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
+ ; GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; GFX942-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX942-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr3
+ ; GFX942-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr2
+ ; GFX942-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr1
+ ; GFX942-NEXT: [[COPY6:%[0-9]+]]:sgpr_32 = COPY $sgpr0
+ ; GFX942-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1
+ ; GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1
+ ; GFX942-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[REG_SEQUENCE1]], %subreg.sub0_sub1, killed [[REG_SEQUENCE]], %subreg.sub2_sub3
+ ; GFX942-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
+ ; GFX942-NEXT: [[BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN [[COPY7]], killed [[REG_SEQUENCE3]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
+ ; GFX942-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN]]
+ ; GFX942-NEXT: SI_RETURN_TO_EPILOG $vgpr0
;
; GFX11-LABEL: name: buffer_ptr_atomic_fadd_f32_bothen_rtn
; GFX11: bb.0 (%ir-block.0):
diff --git a/llvm/test/CodeGen/AMDGPU/buffer-atomic-fadd.f64.ll b/llvm/test/CodeGen/AMDGPU/buffer-atomic-fadd.f64.ll
index 2ce54f8a463c7..1d993b68a1ae9 100644
--- a/llvm/test/CodeGen/AMDGPU/buffer-atomic-fadd.f64.ll
+++ b/llvm/test/CodeGen/AMDGPU/buffer-atomic-fadd.f64.ll
@@ -1,8 +1,8 @@
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX90A_GFX942 %s
-; RUN: llc -mtriple=amdgcn -mcpu=gfx942 -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX90A_GFX942 %s
-; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -enable-new-pm -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX90A_GFX942 %s
-; RUN: llc -mtriple=amdgcn -mcpu=gfx942 -enable-new-pm -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX90A_GFX942 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -stop-after=amdgpu-isel < %s | FileCheck -check-prefixes=GFX90A_GFX942,GFX90A %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx942 -stop-after=amdgpu-isel < %s | FileCheck -check-prefixes=GFX90A_GFX942,GFX942 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -enable-new-pm -stop-after=amdgpu-isel < %s | FileCheck -check-prefixes=GFX90A_GFX942,GFx90A %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx942 -enable-new-pm -stop-after=amdgpu-isel < %s | FileCheck -check-prefixes=GFX90A_GFX942,GFX942 %s
define amdgpu_ps void @buffer_atomic_fadd_f64_offset_no_rtn(double %val, <4 x i32> inreg %rsrc, i32 inreg %soffset) {
; GFX90A_GFX942-LABEL: name: buffer_atomic_fadd_f64_offset_no_rtn
@@ -207,251 +207,655 @@ define amdgpu_ps double @buffer_atomic_fadd_f64_bothen_rtn(double %val, <4 x i32
}
define amdgpu_ps void @buffer_ptr_atomic_fadd_f64_offset_no_rtn(double %val, ptr addrspace(8) inreg %rsrc, i32 inreg %soffset) {
- ; GFX90A_GFX942-LABEL: name: buffer_ptr_atomic_fadd_f64_offset_no_rtn
- ; GFX90A_GFX942: bb.0 (%ir-block.0):
- ; GFX90A_GFX942-NEXT: liveins: $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4
- ; GFX90A_GFX942-NEXT: {{ $}}
- ; GFX90A_GFX942-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
- ; GFX90A_GFX942-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr3
- ; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr2
- ; GFX90A_GFX942-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr1
- ; GFX90A_GFX942-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr0
- ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; GFX90A_GFX942-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
- ; GFX90A_GFX942-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
- ; GFX90A_GFX942-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
- ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1
- ; GFX90A_GFX942-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1
- ; GFX90A_GFX942-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0
- ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY10]], %subreg.sub0, killed [[COPY9]], %subreg.sub1, killed [[COPY8]], %subreg.sub2, killed [[COPY7]], %subreg.sub3
- ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1
- ; GFX90A_GFX942-NEXT: [[COPY11:%[0-9]+]]:av_64_align2 = COPY [[REG_SEQUENCE3]]
- ; GFX90A_GFX942-NEXT: BUFFER_ATOMIC_ADD_F64_OFFSET killed [[COPY11]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8)
- ; GFX90A_GFX942-NEXT: S_ENDPGM 0
+ ; GFX90A-LABEL: name: buffer_ptr_atomic_fadd_f64_offset_no_rtn
+ ; GFX90A: bb.0 (%ir-block.0):
+ ; GFX90A-NEXT: liveins: $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4
+ ; GFX90A-NEXT: {{ $}}
+ ; GFX90A-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
+ ; GFX90A-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr3
+ ; GFX90A-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr2
+ ; GFX90A-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr1
+ ; GFX90A-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr0
+ ; GFX90A-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX90A-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
+ ; GFX90A-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
+ ; GFX90A-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
+ ; GFX90A-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1
+ ; GFX90A-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1
+ ; GFX90A-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0
+ ; GFX90A-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY10]], %subreg.sub0, killed [[COPY9]], %subreg.sub1, killed [[COPY8]], %subreg.sub2, killed [[COPY7]], %subreg.sub3
+ ; GFX90A-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1
+ ; GFX90A-NEXT: [[COPY11:%[0-9]+]]:av_64_align2 = COPY [[REG_SEQUENCE3]]
+ ; GFX90A-NEXT: BUFFER_ATOMIC_ADD_F64_OFFSET killed [[COPY11]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8)
+ ; GFX90A-NEXT: S_ENDPGM 0
+ ;
+ ; GFX942-LABEL: name: buffer_ptr_atomic_fadd_f64_offset_no_rtn
+ ; GFX942: bb.0 (%ir-block.0):
+ ; GFX942-NEXT: liveins: $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4
+ ; GFX942-NEXT: {{ $}}
+ ; GFX942-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
+ ; GFX942-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr3
+ ; GFX942-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr2
+ ; GFX942-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr1
+ ; GFX942-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr0
+ ; GFX942-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX942-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
+ ; GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1
+ ; GFX942-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[REG_SEQUENCE1]], %subreg.sub0_sub1, killed [[REG_SEQUENCE]], %subreg.sub2_sub3
+ ; GFX942-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1
+ ; GFX942-NEXT: [[COPY7:%[0-9]+]]:av_64_align2 = COPY [[REG_SEQUENCE3]]
+ ; GFX942-NEXT: BUFFER_ATOMIC_ADD_F64_OFFSET killed [[COPY7]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8)
+ ; GFX942-NEXT: S_ENDPGM 0
+ ;
+ ; GFx90A-LABEL: name: buffer_ptr_atomic_fadd_f64_offset_no_rtn
+ ; GFx90A: bb.0 (%ir-block.0):
+ ; GFx90A-NEXT: liveins: $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4
+ ; GFx90A-NEXT: {{ $}}
+ ; GFx90A-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
+ ; GFx90A-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr3
+ ; GFx90A-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr2
+ ; GFx90A-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr1
+ ; GFx90A-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr0
+ ; GFx90A-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFx90A-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFx90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
+ ; GFx90A-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
+ ; GFx90A-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
+ ; GFx90A-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1
+ ; GFx90A-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1
+ ; GFx90A-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0
+ ; GFx90A-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY10]], %subreg.sub0, killed [[COPY9]], %subreg.sub1, killed [[COPY8]], %subreg.sub2, killed [[COPY7]], %subreg.sub3
+ ; GFx90A-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1
+ ; GFx90A-NEXT: [[COPY11:%[0-9]+]]:av_64_align2 = COPY [[REG_SEQUENCE3]]
+ ; GFx90A-NEXT: BUFFER_ATOMIC_ADD_F64_OFFSET killed [[COPY11]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8)
+ ; GFx90A-NEXT: S_ENDPGM 0
%ret = call double @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.f64(double %val, ptr addrspace(8) %rsrc, i32 0, i32 %soffset, i32 0)
ret void
}
define amdgpu_ps void @buffer_ptr_atomic_fadd_f64_offen_no_rtn(double %val, ptr addrspace(8) inreg %rsrc, i32 %voffset, i32 inreg %soffset) {
- ; GFX90A_GFX942-LABEL: name: buffer_ptr_atomic_fadd_f64_offen_no_rtn
- ; GFX90A_GFX942: bb.0 (%ir-block.0):
- ; GFX90A_GFX942-NEXT: liveins: $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr2, $sgpr4
- ; GFX90A_GFX942-NEXT: {{ $}}
- ; GFX90A_GFX942-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
- ; GFX90A_GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
- ; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3
- ; GFX90A_GFX942-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2
- ; GFX90A_GFX942-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1
- ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0
- ; GFX90A_GFX942-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; GFX90A_GFX942-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1
- ; GFX90A_GFX942-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
- ; GFX90A_GFX942-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
- ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1
- ; GFX90A_GFX942-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1
- ; GFX90A_GFX942-NEXT: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0
- ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY11]], %subreg.sub0, killed [[COPY10]], %subreg.sub1, killed [[COPY9]], %subreg.sub2, killed [[COPY8]], %subreg.sub3
- ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY6]], %subreg.sub1
- ; GFX90A_GFX942-NEXT: [[COPY12:%[0-9]+]]:av_64_align2 = COPY [[REG_SEQUENCE3]]
- ; GFX90A_GFX942-NEXT: BUFFER_ATOMIC_ADD_F64_OFFEN killed [[COPY12]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8)
- ; GFX90A_GFX942-NEXT: S_ENDPGM 0
+ ; GFX90A-LABEL: name: buffer_ptr_atomic_fadd_f64_offen_no_rtn
+ ; GFX90A: bb.0 (%ir-block.0):
+ ; GFX90A-NEXT: liveins: $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr2, $sgpr4
+ ; GFX90A-NEXT: {{ $}}
+ ; GFX90A-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
+ ; GFX90A-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; GFX90A-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3
+ ; GFX90A-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2
+ ; GFX90A-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1
+ ; GFX90A-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0
+ ; GFX90A-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX90A-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1
+ ; GFX90A-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
+ ; GFX90A-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
+ ; GFX90A-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1
+ ; GFX90A-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1
+ ; GFX90A-NEXT: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0
+ ; GFX90A-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY11]], %subreg.sub0, killed [[COPY10]], %subreg.sub1, killed [[COPY9]], %subreg.sub2, killed [[COPY8]], %subreg.sub3
+ ; GFX90A-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY6]], %subreg.sub1
+ ; GFX90A-NEXT: [[COPY12:%[0-9]+]]:av_64_align2 = COPY [[REG_SEQUENCE3]]
+ ; GFX90A-NEXT: BUFFER_ATOMIC_ADD_F64_OFFEN killed [[COPY12]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8)
+ ; GFX90A-NEXT: S_ENDPGM 0
+ ;
+ ; GFX942-LABEL: name: buffer_ptr_atomic_fadd_f64_offen_no_rtn
+ ; GFX942: bb.0 (%ir-block.0):
+ ; GFX942-NEXT: liveins: $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr2, $sgpr4
+ ; GFX942-NEXT: {{ $}}
+ ; GFX942-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
+ ; GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; GFX942-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3
+ ; GFX942-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2
+ ; GFX942-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1
+ ; GFX942-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0
+ ; GFX942-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX942-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1
+ ; GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1
+ ; GFX942-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[REG_SEQUENCE1]], %subreg.sub0_sub1, killed [[REG_SEQUENCE]], %subreg.sub2_sub3
+ ; GFX942-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY6]], %subreg.sub1
+ ; GFX942-NEXT: [[COPY8:%[0-9]+]]:av_64_align2 = COPY [[REG_SEQUENCE3]]
+ ; GFX942-NEXT: BUFFER_ATOMIC_ADD_F64_OFFEN killed [[COPY8]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8)
+ ; GFX942-NEXT: S_ENDPGM 0
+ ;
+ ; GFx90A-LABEL: name: buffer_ptr_atomic_fadd_f64_offen_no_rtn
+ ; GFx90A: bb.0 (%ir-block.0):
+ ; GFx90A-NEXT: liveins: $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr2, $sgpr4
+ ; GFx90A-NEXT: {{ $}}
+ ; GFx90A-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
+ ; GFx90A-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; GFx90A-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3
+ ; GFx90A-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2
+ ; GFx90A-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1
+ ; GFx90A-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0
+ ; GFx90A-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFx90A-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFx90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1
+ ; GFx90A-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
+ ; GFx90A-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
+ ; GFx90A-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1
+ ; GFx90A-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1
+ ; GFx90A-NEXT: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0
+ ; GFx90A-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY11]], %subreg.sub0, killed [[COPY10]], %subreg.sub1, killed [[COPY9]], %subreg.sub2, killed [[COPY8]], %subreg.sub3
+ ; GFx90A-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY6]], %subreg.sub1
+ ; GFx90A-NEXT: [[COPY12:%[0-9]+]]:av_64_align2 = COPY [[REG_SEQUENCE3]]
+ ; GFx90A-NEXT: BUFFER_ATOMIC_ADD_F64_OFFEN killed [[COPY12]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8)
+ ; GFx90A-NEXT: S_ENDPGM 0
%ret = call double @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.f64(double %val, ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 0)
ret void
}
define amdgpu_ps void @buffer_ptr_atomic_fadd_f64_idxen_no_rtn(double %val, ptr addrspace(8) inreg %rsrc, i32 %vindex, i32 inreg %soffset) {
- ; GFX90A_GFX942-LABEL: name: buffer_ptr_atomic_fadd_f64_idxen_no_rtn
- ; GFX90A_GFX942: bb.0 (%ir-block.0):
- ; GFX90A_GFX942-NEXT: liveins: $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr2, $sgpr4
- ; GFX90A_GFX942-NEXT: {{ $}}
- ; GFX90A_GFX942-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
- ; GFX90A_GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
- ; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3
- ; GFX90A_GFX942-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2
- ; GFX90A_GFX942-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1
- ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0
- ; GFX90A_GFX942-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; GFX90A_GFX942-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1
- ; GFX90A_GFX942-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
- ; GFX90A_GFX942-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
- ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1
- ; GFX90A_GFX942-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1
- ; GFX90A_GFX942-NEXT: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0
- ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY11]], %subreg.sub0, killed [[COPY10]], %subreg.sub1, killed [[COPY9]], %subreg.sub2, killed [[COPY8]], %subreg.sub3
- ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY6]], %subreg.sub1
- ; GFX90A_GFX942-NEXT: [[COPY12:%[0-9]+]]:av_64_align2 = COPY [[REG_SEQUENCE3]]
- ; GFX90A_GFX942-NEXT: BUFFER_ATOMIC_ADD_F64_IDXEN killed [[COPY12]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8)
- ; GFX90A_GFX942-NEXT: S_ENDPGM 0
+ ; GFX90A-LABEL: name: buffer_ptr_atomic_fadd_f64_idxen_no_rtn
+ ; GFX90A: bb.0 (%ir-block.0):
+ ; GFX90A-NEXT: liveins: $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr2, $sgpr4
+ ; GFX90A-NEXT: {{ $}}
+ ; GFX90A-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
+ ; GFX90A-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; GFX90A-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3
+ ; GFX90A-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2
+ ; GFX90A-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1
+ ; GFX90A-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0
+ ; GFX90A-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX90A-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1
+ ; GFX90A-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
+ ; GFX90A-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
+ ; GFX90A-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1
+ ; GFX90A-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1
+ ; GFX90A-NEXT: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0
+ ; GFX90A-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY11]], %subreg.sub0, killed [[COPY10]], %subreg.sub1, killed [[COPY9]], %subreg.sub2, killed [[COPY8]], %subreg.sub3
+ ; GFX90A-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY6]], %subreg.sub1
+ ; GFX90A-NEXT: [[COPY12:%[0-9]+]]:av_64_align2 = COPY [[REG_SEQUENCE3]]
+ ; GFX90A-NEXT: BUFFER_ATOMIC_ADD_F64_IDXEN killed [[COPY12]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8)
+ ; GFX90A-NEXT: S_ENDPGM 0
+ ;
+ ; GFX942-LABEL: name: buffer_ptr_atomic_fadd_f64_idxen_no_rtn
+ ; GFX942: bb.0 (%ir-block.0):
+ ; GFX942-NEXT: liveins: $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr2, $sgpr4
+ ; GFX942-NEXT: {{ $}}
+ ; GFX942-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
+ ; GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; GFX942-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3
+ ; GFX942-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2
+ ; GFX942-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1
+ ; GFX942-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0
+ ; GFX942-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX942-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1
+ ; GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1
+ ; GFX942-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[REG_SEQUENCE1]], %subreg.sub0_sub1, killed [[REG_SEQUENCE]], %subreg.sub2_sub3
+ ; GFX942-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY6]], %subreg.sub1
+ ; GFX942-NEXT: [[COPY8:%[0-9]+]]:av_64_align2 = COPY [[REG_SEQUENCE3]]
+ ; GFX942-NEXT: BUFFER_ATOMIC_ADD_F64_IDXEN killed [[COPY8]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8)
+ ; GFX942-NEXT: S_ENDPGM 0
+ ;
+ ; GFx90A-LABEL: name: buffer_ptr_atomic_fadd_f64_idxen_no_rtn
+ ; GFx90A: bb.0 (%ir-block.0):
+ ; GFx90A-NEXT: liveins: $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr2, $sgpr4
+ ; GFx90A-NEXT: {{ $}}
+ ; GFx90A-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
+ ; GFx90A-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; GFx90A-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3
+ ; GFx90A-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2
+ ; GFx90A-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1
+ ; GFx90A-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0
+ ; GFx90A-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFx90A-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFx90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1
+ ; GFx90A-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
+ ; GFx90A-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
+ ; GFx90A-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1
+ ; GFx90A-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1
+ ; GFx90A-NEXT: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0
+ ; GFx90A-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY11]], %subreg.sub0, killed [[COPY10]], %subreg.sub1, killed [[COPY9]], %subreg.sub2, killed [[COPY8]], %subreg.sub3
+ ; GFx90A-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY6]], %subreg.sub1
+ ; GFx90A-NEXT: [[COPY12:%[0-9]+]]:av_64_align2 = COPY [[REG_SEQUENCE3]]
+ ; GFx90A-NEXT: BUFFER_ATOMIC_ADD_F64_IDXEN killed [[COPY12]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8)
+ ; GFx90A-NEXT: S_ENDPGM 0
%ret = call double @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.f64(double %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 %soffset, i32 0)
ret void
}
define amdgpu_ps void @buffer_ptr_atomic_fadd_f64_bothen_no_rtn(double %val, ptr addrspace(8) inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) {
- ; GFX90A_GFX942-LABEL: name: buffer_ptr_atomic_fadd_f64_bothen_no_rtn
- ; GFX90A_GFX942: bb.0 (%ir-block.0):
- ; GFX90A_GFX942-NEXT: liveins: $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr2, $vgpr3, $sgpr4
- ; GFX90A_GFX942-NEXT: {{ $}}
- ; GFX90A_GFX942-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
- ; GFX90A_GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr3
- ; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
- ; GFX90A_GFX942-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr3
- ; GFX90A_GFX942-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr2
- ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr1
- ; GFX90A_GFX942-NEXT: [[COPY6:%[0-9]+]]:sgpr_32 = COPY $sgpr0
- ; GFX90A_GFX942-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; GFX90A_GFX942-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1
- ; GFX90A_GFX942-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
- ; GFX90A_GFX942-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
- ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1
- ; GFX90A_GFX942-NEXT: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1
- ; GFX90A_GFX942-NEXT: [[COPY12:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0
- ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY12]], %subreg.sub0, killed [[COPY11]], %subreg.sub1, killed [[COPY10]], %subreg.sub2, killed [[COPY9]], %subreg.sub3
- ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY7]], %subreg.sub1
- ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
- ; GFX90A_GFX942-NEXT: [[COPY13:%[0-9]+]]:av_64_align2 = COPY [[REG_SEQUENCE3]]
- ; GFX90A_GFX942-NEXT: BUFFER_ATOMIC_ADD_F64_BOTHEN killed [[COPY13]], killed [[REG_SEQUENCE4]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 2, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8)
- ; GFX90A_GFX942-NEXT: S_ENDPGM 0
+ ; GFX90A-LABEL: name: buffer_ptr_atomic_fadd_f64_bothen_no_rtn
+ ; GFX90A: bb.0 (%ir-block.0):
+ ; GFX90A-NEXT: liveins: $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr2, $vgpr3, $sgpr4
+ ; GFX90A-NEXT: {{ $}}
+ ; GFX90A-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
+ ; GFX90A-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr3
+ ; GFX90A-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; GFX90A-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr3
+ ; GFX90A-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr2
+ ; GFX90A-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr1
+ ; GFX90A-NEXT: [[COPY6:%[0-9]+]]:sgpr_32 = COPY $sgpr0
+ ; GFX90A-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX90A-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1
+ ; GFX90A-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
+ ; GFX90A-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
+ ; GFX90A-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1
+ ; GFX90A-NEXT: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1
+ ; GFX90A-NEXT: [[COPY12:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0
+ ; GFX90A-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY12]], %subreg.sub0, killed [[COPY11]], %subreg.sub1, killed [[COPY10]], %subreg.sub2, killed [[COPY9]], %subreg.sub3
+ ; GFX90A-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY7]], %subreg.sub1
+ ; GFX90A-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
+ ; GFX90A-NEXT: [[COPY13:%[0-9]+]]:av_64_align2 = COPY [[REG_SEQUENCE3]]
+ ; GFX90A-NEXT: BUFFER_ATOMIC_ADD_F64_BOTHEN killed [[COPY13]], killed [[REG_SEQUENCE4]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 2, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8)
+ ; GFX90A-NEXT: S_ENDPGM 0
+ ;
+ ; GFX942-LABEL: name: buffer_ptr_atomic_fadd_f64_bothen_no_rtn
+ ; GFX942: bb.0 (%ir-block.0):
+ ; GFX942-NEXT: liveins: $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr2, $vgpr3, $sgpr4
+ ; GFX942-NEXT: {{ $}}
+ ; GFX942-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
+ ; GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr3
+ ; GFX942-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; GFX942-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr3
+ ; GFX942-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr2
+ ; GFX942-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr1
+ ; GFX942-NEXT: [[COPY6:%[0-9]+]]:sgpr_32 = COPY $sgpr0
+ ; GFX942-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX942-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1
+ ; GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1
+ ; GFX942-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[REG_SEQUENCE1]], %subreg.sub0_sub1, killed [[REG_SEQUENCE]], %subreg.sub2_sub3
+ ; GFX942-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY7]], %subreg.sub1
+ ; GFX942-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
+ ; GFX942-NEXT: [[COPY9:%[0-9]+]]:av_64_align2 = COPY [[REG_SEQUENCE3]]
+ ; GFX942-NEXT: BUFFER_ATOMIC_ADD_F64_BOTHEN killed [[COPY9]], killed [[REG_SEQUENCE4]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 2, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8)
+ ; GFX942-NEXT: S_ENDPGM 0
+ ;
+ ; GFx90A-LABEL: name: buffer_ptr_atomic_fadd_f64_bothen_no_rtn
+ ; GFx90A: bb.0 (%ir-block.0):
+ ; GFx90A-NEXT: liveins: $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr2, $vgpr3, $sgpr4
+ ; GFx90A-NEXT: {{ $}}
+ ; GFx90A-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
+ ; GFx90A-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr3
+ ; GFx90A-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; GFx90A-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr3
+ ; GFx90A-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr2
+ ; GFx90A-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr1
+ ; GFx90A-NEXT: [[COPY6:%[0-9]+]]:sgpr_32 = COPY $sgpr0
+ ; GFx90A-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFx90A-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFx90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1
+ ; GFx90A-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
+ ; GFx90A-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
+ ; GFx90A-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1
+ ; GFx90A-NEXT: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1
+ ; GFx90A-NEXT: [[COPY12:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0
+ ; GFx90A-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY12]], %subreg.sub0, killed [[COPY11]], %subreg.sub1, killed [[COPY10]], %subreg.sub2, killed [[COPY9]], %subreg.sub3
+ ; GFx90A-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY7]], %subreg.sub1
+ ; GFx90A-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
+ ; GFx90A-NEXT: [[COPY13:%[0-9]+]]:av_64_align2 = COPY [[REG_SEQUENCE3]]
+ ; GFx90A-NEXT: BUFFER_ATOMIC_ADD_F64_BOTHEN killed [[COPY13]], killed [[REG_SEQUENCE4]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 2, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8)
+ ; GFx90A-NEXT: S_ENDPGM 0
%ret = call double @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.f64(double %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 2)
ret void
}
define amdgpu_ps double @buffer_ptr_atomic_fadd_f64_offset_rtn(double %val, ptr addrspace(8) inreg %rsrc, i32 inreg %soffset) {
- ; GFX90A_GFX942-LABEL: name: buffer_ptr_atomic_fadd_f64_offset_rtn
- ; GFX90A_GFX942: bb.0 (%ir-block.0):
- ; GFX90A_GFX942-NEXT: liveins: $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4
- ; GFX90A_GFX942-NEXT: {{ $}}
- ; GFX90A_GFX942-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
- ; GFX90A_GFX942-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr3
- ; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr2
- ; GFX90A_GFX942-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr1
- ; GFX90A_GFX942-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr0
- ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; GFX90A_GFX942-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
- ; GFX90A_GFX942-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
- ; GFX90A_GFX942-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
- ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1
- ; GFX90A_GFX942-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1
- ; GFX90A_GFX942-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0
- ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY10]], %subreg.sub0, killed [[COPY9]], %subreg.sub1, killed [[COPY8]], %subreg.sub2, killed [[COPY7]], %subreg.sub3
- ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1
- ; GFX90A_GFX942-NEXT: [[COPY11:%[0-9]+]]:av_64_align2 = COPY [[REG_SEQUENCE3]]
- ; GFX90A_GFX942-NEXT: [[BUFFER_ATOMIC_ADD_F64_OFFSET_RTN:%[0-9]+]]:vreg_64_align2 = BUFFER_ATOMIC_ADD_F64_OFFSET_RTN [[COPY11]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8)
- ; GFX90A_GFX942-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_OFFSET_RTN]].sub0
- ; GFX90A_GFX942-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 killed [[COPY12]], implicit $exec
- ; GFX90A_GFX942-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_OFFSET_RTN]].sub1
- ; GFX90A_GFX942-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 killed [[COPY13]], implicit $exec
- ; GFX90A_GFX942-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
- ; GFX90A_GFX942-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
- ; GFX90A_GFX942-NEXT: SI_RETURN_TO_EPILOG $sgpr0, $sgpr1
+ ; GFX90A-LABEL: name: buffer_ptr_atomic_fadd_f64_offset_rtn
+ ; GFX90A: bb.0 (%ir-block.0):
+ ; GFX90A-NEXT: liveins: $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4
+ ; GFX90A-NEXT: {{ $}}
+ ; GFX90A-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
+ ; GFX90A-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr3
+ ; GFX90A-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr2
+ ; GFX90A-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr1
+ ; GFX90A-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr0
+ ; GFX90A-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX90A-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
+ ; GFX90A-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
+ ; GFX90A-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
+ ; GFX90A-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1
+ ; GFX90A-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1
+ ; GFX90A-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0
+ ; GFX90A-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY10]], %subreg.sub0, killed [[COPY9]], %subreg.sub1, killed [[COPY8]], %subreg.sub2, killed [[COPY7]], %subreg.sub3
+ ; GFX90A-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1
+ ; GFX90A-NEXT: [[COPY11:%[0-9]+]]:av_64_align2 = COPY [[REG_SEQUENCE3]]
+ ; GFX90A-NEXT: [[BUFFER_ATOMIC_ADD_F64_OFFSET_RTN:%[0-9]+]]:vreg_64_align2 = BUFFER_ATOMIC_ADD_F64_OFFSET_RTN [[COPY11]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8)
+ ; GFX90A-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_OFFSET_RTN]].sub0
+ ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 killed [[COPY12]], implicit $exec
+ ; GFX90A-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_OFFSET_RTN]].sub1
+ ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 killed [[COPY13]], implicit $exec
+ ; GFX90A-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
+ ; GFX90A-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
+ ; GFX90A-NEXT: SI_RETURN_TO_EPILOG $sgpr0, $sgpr1
+ ;
+ ; GFX942-LABEL: name: buffer_ptr_atomic_fadd_f64_offset_rtn
+ ; GFX942: bb.0 (%ir-block.0):
+ ; GFX942-NEXT: liveins: $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4
+ ; GFX942-NEXT: {{ $}}
+ ; GFX942-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
+ ; GFX942-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr3
+ ; GFX942-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr2
+ ; GFX942-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr1
+ ; GFX942-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr0
+ ; GFX942-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX942-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
+ ; GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1
+ ; GFX942-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[REG_SEQUENCE1]], %subreg.sub0_sub1, killed [[REG_SEQUENCE]], %subreg.sub2_sub3
+ ; GFX942-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1
+ ; GFX942-NEXT: [[COPY7:%[0-9]+]]:av_64_align2 = COPY [[REG_SEQUENCE3]]
+ ; GFX942-NEXT: [[BUFFER_ATOMIC_ADD_F64_OFFSET_RTN:%[0-9]+]]:vreg_64_align2 = BUFFER_ATOMIC_ADD_F64_OFFSET_RTN [[COPY7]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8)
+ ; GFX942-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_OFFSET_RTN]].sub0
+ ; GFX942-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 killed [[COPY8]], implicit $exec
+ ; GFX942-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_OFFSET_RTN]].sub1
+ ; GFX942-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 killed [[COPY9]], implicit $exec
+ ; GFX942-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
+ ; GFX942-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
+ ; GFX942-NEXT: SI_RETURN_TO_EPILOG $sgpr0, $sgpr1
+ ;
+ ; GFx90A-LABEL: name: buffer_ptr_atomic_fadd_f64_offset_rtn
+ ; GFx90A: bb.0 (%ir-block.0):
+ ; GFx90A-NEXT: liveins: $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4
+ ; GFx90A-NEXT: {{ $}}
+ ; GFx90A-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
+ ; GFx90A-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr3
+ ; GFx90A-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr2
+ ; GFx90A-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr1
+ ; GFx90A-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr0
+ ; GFx90A-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFx90A-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFx90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
+ ; GFx90A-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
+ ; GFx90A-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
+ ; GFx90A-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1
+ ; GFx90A-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1
+ ; GFx90A-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0
+ ; GFx90A-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY10]], %subreg.sub0, killed [[COPY9]], %subreg.sub1, killed [[COPY8]], %subreg.sub2, killed [[COPY7]], %subreg.sub3
+ ; GFx90A-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1
+ ; GFx90A-NEXT: [[COPY11:%[0-9]+]]:av_64_align2 = COPY [[REG_SEQUENCE3]]
+ ; GFx90A-NEXT: [[BUFFER_ATOMIC_ADD_F64_OFFSET_RTN:%[0-9]+]]:vreg_64_align2 = BUFFER_ATOMIC_ADD_F64_OFFSET_RTN [[COPY11]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8)
+ ; GFx90A-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_OFFSET_RTN]].sub0
+ ; GFx90A-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 killed [[COPY12]], implicit $exec
+ ; GFx90A-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_OFFSET_RTN]].sub1
+ ; GFx90A-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 killed [[COPY13]], implicit $exec
+ ; GFx90A-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
+ ; GFx90A-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
+ ; GFx90A-NEXT: SI_RETURN_TO_EPILOG $sgpr0, $sgpr1
%ret = call double @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.f64(double %val, ptr addrspace(8) %rsrc, i32 0, i32 %soffset, i32 0)
ret double %ret
}
define amdgpu_ps double @buffer_ptr_atomic_fadd_f64_offen_rtn(double %val, ptr addrspace(8) inreg %rsrc, i32 %voffset, i32 inreg %soffset) {
- ; GFX90A_GFX942-LABEL: name: buffer_ptr_atomic_fadd_f64_offen_rtn
- ; GFX90A_GFX942: bb.0 (%ir-block.0):
- ; GFX90A_GFX942-NEXT: liveins: $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr2, $sgpr4
- ; GFX90A_GFX942-NEXT: {{ $}}
- ; GFX90A_GFX942-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
- ; GFX90A_GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
- ; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3
- ; GFX90A_GFX942-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2
- ; GFX90A_GFX942-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1
- ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0
- ; GFX90A_GFX942-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; GFX90A_GFX942-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1
- ; GFX90A_GFX942-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
- ; GFX90A_GFX942-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
- ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1
- ; GFX90A_GFX942-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1
- ; GFX90A_GFX942-NEXT: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0
- ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY11]], %subreg.sub0, killed [[COPY10]], %subreg.sub1, killed [[COPY9]], %subreg.sub2, killed [[COPY8]], %subreg.sub3
- ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY6]], %subreg.sub1
- ; GFX90A_GFX942-NEXT: [[COPY12:%[0-9]+]]:av_64_align2 = COPY [[REG_SEQUENCE3]]
- ; GFX90A_GFX942-NEXT: [[BUFFER_ATOMIC_ADD_F64_OFFEN_RTN:%[0-9]+]]:vreg_64_align2 = BUFFER_ATOMIC_ADD_F64_OFFEN_RTN [[COPY12]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8)
- ; GFX90A_GFX942-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_OFFEN_RTN]].sub0
- ; GFX90A_GFX942-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 killed [[COPY13]], implicit $exec
- ; GFX90A_GFX942-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_OFFEN_RTN]].sub1
- ; GFX90A_GFX942-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 killed [[COPY14]], implicit $exec
- ; GFX90A_GFX942-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
- ; GFX90A_GFX942-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
- ; GFX90A_GFX942-NEXT: SI_RETURN_TO_EPILOG $sgpr0, $sgpr1
+ ; GFX90A-LABEL: name: buffer_ptr_atomic_fadd_f64_offen_rtn
+ ; GFX90A: bb.0 (%ir-block.0):
+ ; GFX90A-NEXT: liveins: $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr2, $sgpr4
+ ; GFX90A-NEXT: {{ $}}
+ ; GFX90A-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
+ ; GFX90A-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; GFX90A-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3
+ ; GFX90A-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2
+ ; GFX90A-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1
+ ; GFX90A-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0
+ ; GFX90A-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX90A-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1
+ ; GFX90A-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
+ ; GFX90A-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
+ ; GFX90A-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1
+ ; GFX90A-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1
+ ; GFX90A-NEXT: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0
+ ; GFX90A-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY11]], %subreg.sub0, killed [[COPY10]], %subreg.sub1, killed [[COPY9]], %subreg.sub2, killed [[COPY8]], %subreg.sub3
+ ; GFX90A-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY6]], %subreg.sub1
+ ; GFX90A-NEXT: [[COPY12:%[0-9]+]]:av_64_align2 = COPY [[REG_SEQUENCE3]]
+ ; GFX90A-NEXT: [[BUFFER_ATOMIC_ADD_F64_OFFEN_RTN:%[0-9]+]]:vreg_64_align2 = BUFFER_ATOMIC_ADD_F64_OFFEN_RTN [[COPY12]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8)
+ ; GFX90A-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_OFFEN_RTN]].sub0
+ ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 killed [[COPY13]], implicit $exec
+ ; GFX90A-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_OFFEN_RTN]].sub1
+ ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 killed [[COPY14]], implicit $exec
+ ; GFX90A-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
+ ; GFX90A-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
+ ; GFX90A-NEXT: SI_RETURN_TO_EPILOG $sgpr0, $sgpr1
+ ;
+ ; GFX942-LABEL: name: buffer_ptr_atomic_fadd_f64_offen_rtn
+ ; GFX942: bb.0 (%ir-block.0):
+ ; GFX942-NEXT: liveins: $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr2, $sgpr4
+ ; GFX942-NEXT: {{ $}}
+ ; GFX942-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
+ ; GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; GFX942-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3
+ ; GFX942-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2
+ ; GFX942-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1
+ ; GFX942-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0
+ ; GFX942-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX942-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1
+ ; GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1
+ ; GFX942-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[REG_SEQUENCE1]], %subreg.sub0_sub1, killed [[REG_SEQUENCE]], %subreg.sub2_sub3
+ ; GFX942-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY6]], %subreg.sub1
+ ; GFX942-NEXT: [[COPY8:%[0-9]+]]:av_64_align2 = COPY [[REG_SEQUENCE3]]
+ ; GFX942-NEXT: [[BUFFER_ATOMIC_ADD_F64_OFFEN_RTN:%[0-9]+]]:vreg_64_align2 = BUFFER_ATOMIC_ADD_F64_OFFEN_RTN [[COPY8]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8)
+ ; GFX942-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_OFFEN_RTN]].sub0
+ ; GFX942-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 killed [[COPY9]], implicit $exec
+ ; GFX942-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_OFFEN_RTN]].sub1
+ ; GFX942-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 killed [[COPY10]], implicit $exec
+ ; GFX942-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
+ ; GFX942-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
+ ; GFX942-NEXT: SI_RETURN_TO_EPILOG $sgpr0, $sgpr1
+ ;
+ ; GFx90A-LABEL: name: buffer_ptr_atomic_fadd_f64_offen_rtn
+ ; GFx90A: bb.0 (%ir-block.0):
+ ; GFx90A-NEXT: liveins: $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr2, $sgpr4
+ ; GFx90A-NEXT: {{ $}}
+ ; GFx90A-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
+ ; GFx90A-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; GFx90A-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3
+ ; GFx90A-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2
+ ; GFx90A-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1
+ ; GFx90A-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0
+ ; GFx90A-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFx90A-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFx90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1
+ ; GFx90A-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
+ ; GFx90A-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
+ ; GFx90A-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1
+ ; GFx90A-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1
+ ; GFx90A-NEXT: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0
+ ; GFx90A-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY11]], %subreg.sub0, killed [[COPY10]], %subreg.sub1, killed [[COPY9]], %subreg.sub2, killed [[COPY8]], %subreg.sub3
+ ; GFx90A-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY6]], %subreg.sub1
+ ; GFx90A-NEXT: [[COPY12:%[0-9]+]]:av_64_align2 = COPY [[REG_SEQUENCE3]]
+ ; GFx90A-NEXT: [[BUFFER_ATOMIC_ADD_F64_OFFEN_RTN:%[0-9]+]]:vreg_64_align2 = BUFFER_ATOMIC_ADD_F64_OFFEN_RTN [[COPY12]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8)
+ ; GFx90A-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_OFFEN_RTN]].sub0
+ ; GFx90A-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 killed [[COPY13]], implicit $exec
+ ; GFx90A-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_OFFEN_RTN]].sub1
+ ; GFx90A-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 killed [[COPY14]], implicit $exec
+ ; GFx90A-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
+ ; GFx90A-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
+ ; GFx90A-NEXT: SI_RETURN_TO_EPILOG $sgpr0, $sgpr1
%ret = call double @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.f64(double %val, ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 0)
ret double %ret
}
define amdgpu_ps double @buffer_ptr_atomic_fadd_f64_idxen_rtn(double %val, ptr addrspace(8) inreg %rsrc, i32 %vindex, i32 inreg %soffset) {
- ; GFX90A_GFX942-LABEL: name: buffer_ptr_atomic_fadd_f64_idxen_rtn
- ; GFX90A_GFX942: bb.0 (%ir-block.0):
- ; GFX90A_GFX942-NEXT: liveins: $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr2, $sgpr4
- ; GFX90A_GFX942-NEXT: {{ $}}
- ; GFX90A_GFX942-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
- ; GFX90A_GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
- ; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3
- ; GFX90A_GFX942-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2
- ; GFX90A_GFX942-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1
- ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0
- ; GFX90A_GFX942-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; GFX90A_GFX942-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1
- ; GFX90A_GFX942-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
- ; GFX90A_GFX942-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
- ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1
- ; GFX90A_GFX942-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1
- ; GFX90A_GFX942-NEXT: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0
- ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY11]], %subreg.sub0, killed [[COPY10]], %subreg.sub1, killed [[COPY9]], %subreg.sub2, killed [[COPY8]], %subreg.sub3
- ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY6]], %subreg.sub1
- ; GFX90A_GFX942-NEXT: [[COPY12:%[0-9]+]]:av_64_align2 = COPY [[REG_SEQUENCE3]]
- ; GFX90A_GFX942-NEXT: [[BUFFER_ATOMIC_ADD_F64_IDXEN_RTN:%[0-9]+]]:vreg_64_align2 = BUFFER_ATOMIC_ADD_F64_IDXEN_RTN [[COPY12]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8)
- ; GFX90A_GFX942-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_IDXEN_RTN]].sub0
- ; GFX90A_GFX942-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 killed [[COPY13]], implicit $exec
- ; GFX90A_GFX942-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_IDXEN_RTN]].sub1
- ; GFX90A_GFX942-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 killed [[COPY14]], implicit $exec
- ; GFX90A_GFX942-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
- ; GFX90A_GFX942-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
- ; GFX90A_GFX942-NEXT: SI_RETURN_TO_EPILOG $sgpr0, $sgpr1
+ ; GFX90A-LABEL: name: buffer_ptr_atomic_fadd_f64_idxen_rtn
+ ; GFX90A: bb.0 (%ir-block.0):
+ ; GFX90A-NEXT: liveins: $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr2, $sgpr4
+ ; GFX90A-NEXT: {{ $}}
+ ; GFX90A-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
+ ; GFX90A-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; GFX90A-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3
+ ; GFX90A-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2
+ ; GFX90A-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1
+ ; GFX90A-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0
+ ; GFX90A-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX90A-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1
+ ; GFX90A-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
+ ; GFX90A-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
+ ; GFX90A-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1
+ ; GFX90A-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1
+ ; GFX90A-NEXT: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0
+ ; GFX90A-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY11]], %subreg.sub0, killed [[COPY10]], %subreg.sub1, killed [[COPY9]], %subreg.sub2, killed [[COPY8]], %subreg.sub3
+ ; GFX90A-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY6]], %subreg.sub1
+ ; GFX90A-NEXT: [[COPY12:%[0-9]+]]:av_64_align2 = COPY [[REG_SEQUENCE3]]
+ ; GFX90A-NEXT: [[BUFFER_ATOMIC_ADD_F64_IDXEN_RTN:%[0-9]+]]:vreg_64_align2 = BUFFER_ATOMIC_ADD_F64_IDXEN_RTN [[COPY12]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8)
+ ; GFX90A-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_IDXEN_RTN]].sub0
+ ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 killed [[COPY13]], implicit $exec
+ ; GFX90A-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_IDXEN_RTN]].sub1
+ ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 killed [[COPY14]], implicit $exec
+ ; GFX90A-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
+ ; GFX90A-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
+ ; GFX90A-NEXT: SI_RETURN_TO_EPILOG $sgpr0, $sgpr1
+ ;
+ ; GFX942-LABEL: name: buffer_ptr_atomic_fadd_f64_idxen_rtn
+ ; GFX942: bb.0 (%ir-block.0):
+ ; GFX942-NEXT: liveins: $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr2, $sgpr4
+ ; GFX942-NEXT: {{ $}}
+ ; GFX942-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
+ ; GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; GFX942-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3
+ ; GFX942-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2
+ ; GFX942-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1
+ ; GFX942-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0
+ ; GFX942-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX942-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1
+ ; GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1
+ ; GFX942-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[REG_SEQUENCE1]], %subreg.sub0_sub1, killed [[REG_SEQUENCE]], %subreg.sub2_sub3
+ ; GFX942-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY6]], %subreg.sub1
+ ; GFX942-NEXT: [[COPY8:%[0-9]+]]:av_64_align2 = COPY [[REG_SEQUENCE3]]
+ ; GFX942-NEXT: [[BUFFER_ATOMIC_ADD_F64_IDXEN_RTN:%[0-9]+]]:vreg_64_align2 = BUFFER_ATOMIC_ADD_F64_IDXEN_RTN [[COPY8]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8)
+ ; GFX942-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_IDXEN_RTN]].sub0
+ ; GFX942-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 killed [[COPY9]], implicit $exec
+ ; GFX942-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_IDXEN_RTN]].sub1
+ ; GFX942-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 killed [[COPY10]], implicit $exec
+ ; GFX942-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
+ ; GFX942-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
+ ; GFX942-NEXT: SI_RETURN_TO_EPILOG $sgpr0, $sgpr1
+ ;
+ ; GFx90A-LABEL: name: buffer_ptr_atomic_fadd_f64_idxen_rtn
+ ; GFx90A: bb.0 (%ir-block.0):
+ ; GFx90A-NEXT: liveins: $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr2, $sgpr4
+ ; GFx90A-NEXT: {{ $}}
+ ; GFx90A-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
+ ; GFx90A-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; GFx90A-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3
+ ; GFx90A-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2
+ ; GFx90A-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1
+ ; GFx90A-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0
+ ; GFx90A-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFx90A-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFx90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1
+ ; GFx90A-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
+ ; GFx90A-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
+ ; GFx90A-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1
+ ; GFx90A-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1
+ ; GFx90A-NEXT: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0
+ ; GFx90A-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY11]], %subreg.sub0, killed [[COPY10]], %subreg.sub1, killed [[COPY9]], %subreg.sub2, killed [[COPY8]], %subreg.sub3
+ ; GFx90A-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY6]], %subreg.sub1
+ ; GFx90A-NEXT: [[COPY12:%[0-9]+]]:av_64_align2 = COPY [[REG_SEQUENCE3]]
+ ; GFx90A-NEXT: [[BUFFER_ATOMIC_ADD_F64_IDXEN_RTN:%[0-9]+]]:vreg_64_align2 = BUFFER_ATOMIC_ADD_F64_IDXEN_RTN [[COPY12]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8)
+ ; GFx90A-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_IDXEN_RTN]].sub0
+ ; GFx90A-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 killed [[COPY13]], implicit $exec
+ ; GFx90A-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_IDXEN_RTN]].sub1
+ ; GFx90A-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 killed [[COPY14]], implicit $exec
+ ; GFx90A-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
+ ; GFx90A-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
+ ; GFx90A-NEXT: SI_RETURN_TO_EPILOG $sgpr0, $sgpr1
%ret = call double @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.f64(double %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 %soffset, i32 0)
ret double %ret
}
define amdgpu_ps double @buffer_ptr_atomic_fadd_f64_bothen_rtn(double %val, ptr addrspace(8) inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) {
- ; GFX90A_GFX942-LABEL: name: buffer_ptr_atomic_fadd_f64_bothen_rtn
- ; GFX90A_GFX942: bb.0 (%ir-block.0):
- ; GFX90A_GFX942-NEXT: liveins: $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr2, $vgpr3, $sgpr4
- ; GFX90A_GFX942-NEXT: {{ $}}
- ; GFX90A_GFX942-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
- ; GFX90A_GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr3
- ; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
- ; GFX90A_GFX942-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr3
- ; GFX90A_GFX942-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr2
- ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr1
- ; GFX90A_GFX942-NEXT: [[COPY6:%[0-9]+]]:sgpr_32 = COPY $sgpr0
- ; GFX90A_GFX942-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; GFX90A_GFX942-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1
- ; GFX90A_GFX942-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
- ; GFX90A_GFX942-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
- ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1
- ; GFX90A_GFX942-NEXT: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1
- ; GFX90A_GFX942-NEXT: [[COPY12:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0
- ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY12]], %subreg.sub0, killed [[COPY11]], %subreg.sub1, killed [[COPY10]], %subreg.sub2, killed [[COPY9]], %subreg.sub3
- ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY7]], %subreg.sub1
- ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
- ; GFX90A_GFX942-NEXT: [[COPY13:%[0-9]+]]:av_64_align2 = COPY [[REG_SEQUENCE3]]
- ; GFX90A_GFX942-NEXT: [[BUFFER_ATOMIC_ADD_F64_BOTHEN_RTN:%[0-9]+]]:vreg_64_align2 = BUFFER_ATOMIC_ADD_F64_BOTHEN_RTN [[COPY13]], killed [[REG_SEQUENCE4]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8)
- ; GFX90A_GFX942-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_BOTHEN_RTN]].sub0
- ; GFX90A_GFX942-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 killed [[COPY14]], implicit $exec
- ; GFX90A_GFX942-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_BOTHEN_RTN]].sub1
- ; GFX90A_GFX942-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 killed [[COPY15]], implicit $exec
- ; GFX90A_GFX942-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
- ; GFX90A_GFX942-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
- ; GFX90A_GFX942-NEXT: SI_RETURN_TO_EPILOG $sgpr0, $sgpr1
+ ; GFX90A-LABEL: name: buffer_ptr_atomic_fadd_f64_bothen_rtn
+ ; GFX90A: bb.0 (%ir-block.0):
+ ; GFX90A-NEXT: liveins: $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr2, $vgpr3, $sgpr4
+ ; GFX90A-NEXT: {{ $}}
+ ; GFX90A-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
+ ; GFX90A-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr3
+ ; GFX90A-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; GFX90A-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr3
+ ; GFX90A-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr2
+ ; GFX90A-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr1
+ ; GFX90A-NEXT: [[COPY6:%[0-9]+]]:sgpr_32 = COPY $sgpr0
+ ; GFX90A-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX90A-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1
+ ; GFX90A-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
+ ; GFX90A-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
+ ; GFX90A-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1
+ ; GFX90A-NEXT: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1
+ ; GFX90A-NEXT: [[COPY12:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0
+ ; GFX90A-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY12]], %subreg.sub0, killed [[COPY11]], %subreg.sub1, killed [[COPY10]], %subreg.sub2, killed [[COPY9]], %subreg.sub3
+ ; GFX90A-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY7]], %subreg.sub1
+ ; GFX90A-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
+ ; GFX90A-NEXT: [[COPY13:%[0-9]+]]:av_64_align2 = COPY [[REG_SEQUENCE3]]
+ ; GFX90A-NEXT: [[BUFFER_ATOMIC_ADD_F64_BOTHEN_RTN:%[0-9]+]]:vreg_64_align2 = BUFFER_ATOMIC_ADD_F64_BOTHEN_RTN [[COPY13]], killed [[REG_SEQUENCE4]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8)
+ ; GFX90A-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_BOTHEN_RTN]].sub0
+ ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 killed [[COPY14]], implicit $exec
+ ; GFX90A-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_BOTHEN_RTN]].sub1
+ ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 killed [[COPY15]], implicit $exec
+ ; GFX90A-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
+ ; GFX90A-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
+ ; GFX90A-NEXT: SI_RETURN_TO_EPILOG $sgpr0, $sgpr1
+ ;
+ ; GFX942-LABEL: name: buffer_ptr_atomic_fadd_f64_bothen_rtn
+ ; GFX942: bb.0 (%ir-block.0):
+ ; GFX942-NEXT: liveins: $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr2, $vgpr3, $sgpr4
+ ; GFX942-NEXT: {{ $}}
+ ; GFX942-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
+ ; GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr3
+ ; GFX942-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; GFX942-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr3
+ ; GFX942-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr2
+ ; GFX942-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr1
+ ; GFX942-NEXT: [[COPY6:%[0-9]+]]:sgpr_32 = COPY $sgpr0
+ ; GFX942-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX942-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1
+ ; GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1
+ ; GFX942-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[REG_SEQUENCE1]], %subreg.sub0_sub1, killed [[REG_SEQUENCE]], %subreg.sub2_sub3
+ ; GFX942-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY7]], %subreg.sub1
+ ; GFX942-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
+ ; GFX942-NEXT: [[COPY9:%[0-9]+]]:av_64_align2 = COPY [[REG_SEQUENCE3]]
+ ; GFX942-NEXT: [[BUFFER_ATOMIC_ADD_F64_BOTHEN_RTN:%[0-9]+]]:vreg_64_align2 = BUFFER_ATOMIC_ADD_F64_BOTHEN_RTN [[COPY9]], killed [[REG_SEQUENCE4]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8)
+ ; GFX942-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_BOTHEN_RTN]].sub0
+ ; GFX942-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 killed [[COPY10]], implicit $exec
+ ; GFX942-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_BOTHEN_RTN]].sub1
+ ; GFX942-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 killed [[COPY11]], implicit $exec
+ ; GFX942-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
+ ; GFX942-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
+ ; GFX942-NEXT: SI_RETURN_TO_EPILOG $sgpr0, $sgpr1
+ ;
+ ; GFx90A-LABEL: name: buffer_ptr_atomic_fadd_f64_bothen_rtn
+ ; GFx90A: bb.0 (%ir-block.0):
+ ; GFx90A-NEXT: liveins: $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr2, $vgpr3, $sgpr4
+ ; GFx90A-NEXT: {{ $}}
+ ; GFx90A-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
+ ; GFx90A-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr3
+ ; GFx90A-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; GFx90A-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr3
+ ; GFx90A-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr2
+ ; GFx90A-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr1
+ ; GFx90A-NEXT: [[COPY6:%[0-9]+]]:sgpr_32 = COPY $sgpr0
+ ; GFx90A-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFx90A-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFx90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1
+ ; GFx90A-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
+ ; GFx90A-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
+ ; GFx90A-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1
+ ; GFx90A-NEXT: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1
+ ; GFx90A-NEXT: [[COPY12:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0
+ ; GFx90A-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY12]], %subreg.sub0, killed [[COPY11]], %subreg.sub1, killed [[COPY10]], %subreg.sub2, killed [[COPY9]], %subreg.sub3
+ ; GFx90A-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY7]], %subreg.sub1
+ ; GFx90A-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
+ ; GFx90A-NEXT: [[COPY13:%[0-9]+]]:av_64_align2 = COPY [[REG_SEQUENCE3]]
+ ; GFx90A-NEXT: [[BUFFER_ATOMIC_ADD_F64_BOTHEN_RTN:%[0-9]+]]:vreg_64_align2 = BUFFER_ATOMIC_ADD_F64_BOTHEN_RTN [[COPY13]], killed [[REG_SEQUENCE4]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8)
+ ; GFx90A-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_BOTHEN_RTN]].sub0
+ ; GFx90A-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 killed [[COPY14]], implicit $exec
+ ; GFx90A-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_BOTHEN_RTN]].sub1
+ ; GFx90A-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 killed [[COPY15]], implicit $exec
+ ; GFx90A-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
+ ; GFx90A-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
+ ; GFx90A-NEXT: SI_RETURN_TO_EPILOG $sgpr0, $sgpr1
%ret = call double @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.f64(double %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0)
ret double %ret
}
diff --git a/llvm/test/CodeGen/AMDGPU/buffer-atomic-fadd.v2f16-no-rtn.ll b/llvm/test/CodeGen/AMDGPU/buffer-atomic-fadd.v2f16-no-rtn.ll
index c30b5549776ea..b051674a915ca 100644
--- a/llvm/test/CodeGen/AMDGPU/buffer-atomic-fadd.v2f16-no-rtn.ll
+++ b/llvm/test/CodeGen/AMDGPU/buffer-atomic-fadd.v2f16-no-rtn.ll
@@ -1,10 +1,10 @@
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
; RUN: llc -mtriple=amdgcn -mcpu=gfx908 -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX908 %s
-; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX90A_GFX942 %s
-; RUN: llc -mtriple=amdgcn -mcpu=gfx942 -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX90A_GFX942 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -stop-after=amdgpu-isel < %s | FileCheck -check-prefixes=GFX90A_GFX942,GFX90A %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx942 -stop-after=amdgpu-isel < %s | FileCheck -check-prefixes=GFX90A_GFX942,GFX942 %s
; RUN: llc -mtriple=amdgcn -mcpu=gfx908 -enable-new-pm -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX908 %s
-; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -enable-new-pm -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX90A_GFX942 %s
-; RUN: llc -mtriple=amdgcn -mcpu=gfx942 -enable-new-pm -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX90A_GFX942 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -enable-new-pm -stop-after=amdgpu-isel < %s | FileCheck -check-prefixes=GFX90A_GFX942,GFX90A %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx942 -enable-new-pm -stop-after=amdgpu-isel < %s | FileCheck -check-prefixes=GFX90A_GFX942,GFX942 %s
define amdgpu_ps void @buffer_atomic_fadd_v2f16_offset_no_rtn(<2 x half> %val, <4 x i32> inreg %rsrc, i32 inreg %soffset) {
; GFX908-LABEL: name: buffer_atomic_fadd_v2f16_offset_no_rtn
@@ -165,25 +165,41 @@ define amdgpu_ps void @buffer_ptr_atomic_fadd_v2f16_offset_no_rtn(<2 x half> %va
; GFX908-NEXT: BUFFER_ATOMIC_PK_ADD_F16_OFFSET [[COPY5]], killed [[REG_SEQUENCE2]], [[COPY]], 4095, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
; GFX908-NEXT: S_ENDPGM 0
;
- ; GFX90A_GFX942-LABEL: name: buffer_ptr_atomic_fadd_v2f16_offset_no_rtn
- ; GFX90A_GFX942: bb.0 (%ir-block.0):
- ; GFX90A_GFX942-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4
- ; GFX90A_GFX942-NEXT: {{ $}}
- ; GFX90A_GFX942-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
- ; GFX90A_GFX942-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr3
- ; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr2
- ; GFX90A_GFX942-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr1
- ; GFX90A_GFX942-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr0
- ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
- ; GFX90A_GFX942-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
- ; GFX90A_GFX942-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
- ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1
- ; GFX90A_GFX942-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1
- ; GFX90A_GFX942-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0
- ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY9]], %subreg.sub0, killed [[COPY8]], %subreg.sub1, killed [[COPY7]], %subreg.sub2, killed [[COPY6]], %subreg.sub3
- ; GFX90A_GFX942-NEXT: BUFFER_ATOMIC_PK_ADD_F16_OFFSET [[COPY5]], killed [[REG_SEQUENCE2]], [[COPY]], 4095, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
- ; GFX90A_GFX942-NEXT: S_ENDPGM 0
+ ; GFX90A-LABEL: name: buffer_ptr_atomic_fadd_v2f16_offset_no_rtn
+ ; GFX90A: bb.0 (%ir-block.0):
+ ; GFX90A-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4
+ ; GFX90A-NEXT: {{ $}}
+ ; GFX90A-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
+ ; GFX90A-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr3
+ ; GFX90A-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr2
+ ; GFX90A-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr1
+ ; GFX90A-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr0
+ ; GFX90A-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
+ ; GFX90A-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
+ ; GFX90A-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
+ ; GFX90A-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1
+ ; GFX90A-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1
+ ; GFX90A-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0
+ ; GFX90A-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY9]], %subreg.sub0, killed [[COPY8]], %subreg.sub1, killed [[COPY7]], %subreg.sub2, killed [[COPY6]], %subreg.sub3
+ ; GFX90A-NEXT: BUFFER_ATOMIC_PK_ADD_F16_OFFSET [[COPY5]], killed [[REG_SEQUENCE2]], [[COPY]], 4095, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
+ ; GFX90A-NEXT: S_ENDPGM 0
+ ;
+ ; GFX942-LABEL: name: buffer_ptr_atomic_fadd_v2f16_offset_no_rtn
+ ; GFX942: bb.0 (%ir-block.0):
+ ; GFX942-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4
+ ; GFX942-NEXT: {{ $}}
+ ; GFX942-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
+ ; GFX942-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr3
+ ; GFX942-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr2
+ ; GFX942-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr1
+ ; GFX942-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr0
+ ; GFX942-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
+ ; GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1
+ ; GFX942-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[REG_SEQUENCE1]], %subreg.sub0_sub1, killed [[REG_SEQUENCE]], %subreg.sub2_sub3
+ ; GFX942-NEXT: BUFFER_ATOMIC_PK_ADD_F16_OFFSET [[COPY5]], killed [[REG_SEQUENCE2]], [[COPY]], 4095, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
+ ; GFX942-NEXT: S_ENDPGM 0
%ret = call <2 x half> @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.v2f16(<2 x half> %val, ptr addrspace(8) %rsrc, i32 4095, i32 %soffset, i32 0)
ret void
}
@@ -210,26 +226,43 @@ define amdgpu_ps void @buffer_ptr_atomic_fadd_v2f16_offen_no_rtn(<2 x half> %val
; GFX908-NEXT: BUFFER_ATOMIC_PK_ADD_F16_OFFEN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
; GFX908-NEXT: S_ENDPGM 0
;
- ; GFX90A_GFX942-LABEL: name: buffer_ptr_atomic_fadd_v2f16_offen_no_rtn
- ; GFX90A_GFX942: bb.0 (%ir-block.0):
- ; GFX90A_GFX942-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $sgpr4
- ; GFX90A_GFX942-NEXT: {{ $}}
- ; GFX90A_GFX942-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
- ; GFX90A_GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3
- ; GFX90A_GFX942-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2
- ; GFX90A_GFX942-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1
- ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0
- ; GFX90A_GFX942-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1
- ; GFX90A_GFX942-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
- ; GFX90A_GFX942-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
- ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1
- ; GFX90A_GFX942-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1
- ; GFX90A_GFX942-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0
- ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY10]], %subreg.sub0, killed [[COPY9]], %subreg.sub1, killed [[COPY8]], %subreg.sub2, killed [[COPY7]], %subreg.sub3
- ; GFX90A_GFX942-NEXT: BUFFER_ATOMIC_PK_ADD_F16_OFFEN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
- ; GFX90A_GFX942-NEXT: S_ENDPGM 0
+ ; GFX90A-LABEL: name: buffer_ptr_atomic_fadd_v2f16_offen_no_rtn
+ ; GFX90A: bb.0 (%ir-block.0):
+ ; GFX90A-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $sgpr4
+ ; GFX90A-NEXT: {{ $}}
+ ; GFX90A-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
+ ; GFX90A-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX90A-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3
+ ; GFX90A-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2
+ ; GFX90A-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1
+ ; GFX90A-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0
+ ; GFX90A-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1
+ ; GFX90A-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
+ ; GFX90A-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
+ ; GFX90A-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1
+ ; GFX90A-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1
+ ; GFX90A-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0
+ ; GFX90A-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY10]], %subreg.sub0, killed [[COPY9]], %subreg.sub1, killed [[COPY8]], %subreg.sub2, killed [[COPY7]], %subreg.sub3
+ ; GFX90A-NEXT: BUFFER_ATOMIC_PK_ADD_F16_OFFEN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
+ ; GFX90A-NEXT: S_ENDPGM 0
+ ;
+ ; GFX942-LABEL: name: buffer_ptr_atomic_fadd_v2f16_offen_no_rtn
+ ; GFX942: bb.0 (%ir-block.0):
+ ; GFX942-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $sgpr4
+ ; GFX942-NEXT: {{ $}}
+ ; GFX942-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
+ ; GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX942-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3
+ ; GFX942-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2
+ ; GFX942-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1
+ ; GFX942-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0
+ ; GFX942-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1
+ ; GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1
+ ; GFX942-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[REG_SEQUENCE1]], %subreg.sub0_sub1, killed [[REG_SEQUENCE]], %subreg.sub2_sub3
+ ; GFX942-NEXT: BUFFER_ATOMIC_PK_ADD_F16_OFFEN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
+ ; GFX942-NEXT: S_ENDPGM 0
%ret = call <2 x half> @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.v2f16(<2 x half> %val, ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 0)
ret void
}
@@ -256,26 +289,43 @@ define amdgpu_ps void @buffer_ptr_atomic_fadd_v2f16_idxen_no_rtn(<2 x half> %val
; GFX908-NEXT: BUFFER_ATOMIC_PK_ADD_F16_IDXEN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
; GFX908-NEXT: S_ENDPGM 0
;
- ; GFX90A_GFX942-LABEL: name: buffer_ptr_atomic_fadd_v2f16_idxen_no_rtn
- ; GFX90A_GFX942: bb.0 (%ir-block.0):
- ; GFX90A_GFX942-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $sgpr4
- ; GFX90A_GFX942-NEXT: {{ $}}
- ; GFX90A_GFX942-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
- ; GFX90A_GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3
- ; GFX90A_GFX942-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2
- ; GFX90A_GFX942-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1
- ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0
- ; GFX90A_GFX942-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1
- ; GFX90A_GFX942-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
- ; GFX90A_GFX942-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
- ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1
- ; GFX90A_GFX942-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1
- ; GFX90A_GFX942-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0
- ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY10]], %subreg.sub0, killed [[COPY9]], %subreg.sub1, killed [[COPY8]], %subreg.sub2, killed [[COPY7]], %subreg.sub3
- ; GFX90A_GFX942-NEXT: BUFFER_ATOMIC_PK_ADD_F16_IDXEN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
- ; GFX90A_GFX942-NEXT: S_ENDPGM 0
+ ; GFX90A-LABEL: name: buffer_ptr_atomic_fadd_v2f16_idxen_no_rtn
+ ; GFX90A: bb.0 (%ir-block.0):
+ ; GFX90A-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $sgpr4
+ ; GFX90A-NEXT: {{ $}}
+ ; GFX90A-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
+ ; GFX90A-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX90A-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3
+ ; GFX90A-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2
+ ; GFX90A-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1
+ ; GFX90A-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0
+ ; GFX90A-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1
+ ; GFX90A-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
+ ; GFX90A-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
+ ; GFX90A-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1
+ ; GFX90A-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1
+ ; GFX90A-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0
+ ; GFX90A-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY10]], %subreg.sub0, killed [[COPY9]], %subreg.sub1, killed [[COPY8]], %subreg.sub2, killed [[COPY7]], %subreg.sub3
+ ; GFX90A-NEXT: BUFFER_ATOMIC_PK_ADD_F16_IDXEN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
+ ; GFX90A-NEXT: S_ENDPGM 0
+ ;
+ ; GFX942-LABEL: name: buffer_ptr_atomic_fadd_v2f16_idxen_no_rtn
+ ; GFX942: bb.0 (%ir-block.0):
+ ; GFX942-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $sgpr4
+ ; GFX942-NEXT: {{ $}}
+ ; GFX942-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
+ ; GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX942-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3
+ ; GFX942-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2
+ ; GFX942-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1
+ ; GFX942-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0
+ ; GFX942-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1
+ ; GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1
+ ; GFX942-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[REG_SEQUENCE1]], %subreg.sub0_sub1, killed [[REG_SEQUENCE]], %subreg.sub2_sub3
+ ; GFX942-NEXT: BUFFER_ATOMIC_PK_ADD_F16_IDXEN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
+ ; GFX942-NEXT: S_ENDPGM 0
%ret = call <2 x half> @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.v2f16(<2 x half> %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 %soffset, i32 0)
ret void
}
@@ -304,28 +354,47 @@ define amdgpu_ps void @buffer_ptr_atomic_fadd_v2f16_bothen_no_rtn(<2 x half> %va
; GFX908-NEXT: BUFFER_ATOMIC_PK_ADD_F16_BOTHEN [[COPY7]], killed [[REG_SEQUENCE3]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 2, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
; GFX908-NEXT: S_ENDPGM 0
;
- ; GFX90A_GFX942-LABEL: name: buffer_ptr_atomic_fadd_v2f16_bothen_no_rtn
- ; GFX90A_GFX942: bb.0 (%ir-block.0):
- ; GFX90A_GFX942-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $vgpr2, $sgpr4
- ; GFX90A_GFX942-NEXT: {{ $}}
- ; GFX90A_GFX942-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
- ; GFX90A_GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
- ; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; GFX90A_GFX942-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr3
- ; GFX90A_GFX942-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr2
- ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr1
- ; GFX90A_GFX942-NEXT: [[COPY6:%[0-9]+]]:sgpr_32 = COPY $sgpr0
- ; GFX90A_GFX942-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1
- ; GFX90A_GFX942-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
- ; GFX90A_GFX942-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
- ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1
- ; GFX90A_GFX942-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1
- ; GFX90A_GFX942-NEXT: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0
- ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY11]], %subreg.sub0, killed [[COPY10]], %subreg.sub1, killed [[COPY9]], %subreg.sub2, killed [[COPY8]], %subreg.sub3
- ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
- ; GFX90A_GFX942-NEXT: BUFFER_ATOMIC_PK_ADD_F16_BOTHEN [[COPY7]], killed [[REG_SEQUENCE3]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 2, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
- ; GFX90A_GFX942-NEXT: S_ENDPGM 0
+ ; GFX90A-LABEL: name: buffer_ptr_atomic_fadd_v2f16_bothen_no_rtn
+ ; GFX90A: bb.0 (%ir-block.0):
+ ; GFX90A-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $vgpr2, $sgpr4
+ ; GFX90A-NEXT: {{ $}}
+ ; GFX90A-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
+ ; GFX90A-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; GFX90A-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX90A-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr3
+ ; GFX90A-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr2
+ ; GFX90A-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr1
+ ; GFX90A-NEXT: [[COPY6:%[0-9]+]]:sgpr_32 = COPY $sgpr0
+ ; GFX90A-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1
+ ; GFX90A-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
+ ; GFX90A-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
+ ; GFX90A-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1
+ ; GFX90A-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1
+ ; GFX90A-NEXT: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0
+ ; GFX90A-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY11]], %subreg.sub0, killed [[COPY10]], %subreg.sub1, killed [[COPY9]], %subreg.sub2, killed [[COPY8]], %subreg.sub3
+ ; GFX90A-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
+ ; GFX90A-NEXT: BUFFER_ATOMIC_PK_ADD_F16_BOTHEN [[COPY7]], killed [[REG_SEQUENCE3]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 2, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
+ ; GFX90A-NEXT: S_ENDPGM 0
+ ;
+ ; GFX942-LABEL: name: buffer_ptr_atomic_fadd_v2f16_bothen_no_rtn
+ ; GFX942: bb.0 (%ir-block.0):
+ ; GFX942-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $vgpr2, $sgpr4
+ ; GFX942-NEXT: {{ $}}
+ ; GFX942-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
+ ; GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; GFX942-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX942-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr3
+ ; GFX942-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr2
+ ; GFX942-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr1
+ ; GFX942-NEXT: [[COPY6:%[0-9]+]]:sgpr_32 = COPY $sgpr0
+ ; GFX942-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1
+ ; GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1
+ ; GFX942-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[REG_SEQUENCE1]], %subreg.sub0_sub1, killed [[REG_SEQUENCE]], %subreg.sub2_sub3
+ ; GFX942-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
+ ; GFX942-NEXT: BUFFER_ATOMIC_PK_ADD_F16_BOTHEN [[COPY7]], killed [[REG_SEQUENCE3]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 2, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
+ ; GFX942-NEXT: S_ENDPGM 0
%ret = call <2 x half> @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.v2f16(<2 x half> %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 2)
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/buffer-atomic-fadd.v2f16-rtn.ll b/llvm/test/CodeGen/AMDGPU/buffer-atomic-fadd.v2f16-rtn.ll
index 2abd7edade8a1..9a7ba94c3c083 100644
--- a/llvm/test/CodeGen/AMDGPU/buffer-atomic-fadd.v2f16-rtn.ll
+++ b/llvm/test/CodeGen/AMDGPU/buffer-atomic-fadd.v2f16-rtn.ll
@@ -1,8 +1,8 @@
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX90A_GFX942 %s
-; RUN: llc -mtriple=amdgcn -mcpu=gfx942 -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX90A_GFX942 %s
-; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -enable-new-pm -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX90A_GFX942 %s
-; RUN: llc -mtriple=amdgcn -mcpu=gfx942 -enable-new-pm -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX90A_GFX942 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -stop-after=amdgpu-isel < %s | FileCheck -check-prefixes=GFX90A_GFX942,GFX90A %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx942 -stop-after=amdgpu-isel < %s | FileCheck -check-prefixes=GFX90A_GFX942,GFX942 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -enable-new-pm -stop-after=amdgpu-isel < %s | FileCheck -check-prefixes=GFX90A_GFX942,GFX90A %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx942 -enable-new-pm -stop-after=amdgpu-isel < %s | FileCheck -check-prefixes=GFX90A_GFX942,GFX942 %s
define amdgpu_ps <2 x half> @buffer_atomic_fadd_v2f16_offset_rtn(<2 x half> %val, <4 x i32> inreg %rsrc, i32 inreg %soffset) {
; GFX90A_GFX942-LABEL: name: buffer_atomic_fadd_v2f16_offset_rtn
@@ -86,106 +86,179 @@ define amdgpu_ps <2 x half> @buffer_atomic_fadd_v2f16_bothen_rtn(<2 x half> %val
}
define amdgpu_ps <2 x half> @buffer_ptr_atomic_fadd_v2f16_offset_rtn(<2 x half> %val, ptr addrspace(8) inreg %rsrc, i32 inreg %soffset) {
- ; GFX90A_GFX942-LABEL: name: buffer_ptr_atomic_fadd_v2f16_offset_rtn
- ; GFX90A_GFX942: bb.0 (%ir-block.0):
- ; GFX90A_GFX942-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4
- ; GFX90A_GFX942-NEXT: {{ $}}
- ; GFX90A_GFX942-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
- ; GFX90A_GFX942-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr3
- ; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr2
- ; GFX90A_GFX942-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr1
- ; GFX90A_GFX942-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr0
- ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
- ; GFX90A_GFX942-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
- ; GFX90A_GFX942-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
- ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1
- ; GFX90A_GFX942-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1
- ; GFX90A_GFX942-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0
- ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY9]], %subreg.sub0, killed [[COPY8]], %subreg.sub1, killed [[COPY7]], %subreg.sub2, killed [[COPY6]], %subreg.sub3
- ; GFX90A_GFX942-NEXT: [[BUFFER_ATOMIC_PK_ADD_F16_OFFSET_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_PK_ADD_F16_OFFSET_RTN [[COPY5]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
- ; GFX90A_GFX942-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_PK_ADD_F16_OFFSET_RTN]]
- ; GFX90A_GFX942-NEXT: SI_RETURN_TO_EPILOG $vgpr0
+ ; GFX90A-LABEL: name: buffer_ptr_atomic_fadd_v2f16_offset_rtn
+ ; GFX90A: bb.0 (%ir-block.0):
+ ; GFX90A-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4
+ ; GFX90A-NEXT: {{ $}}
+ ; GFX90A-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
+ ; GFX90A-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr3
+ ; GFX90A-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr2
+ ; GFX90A-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr1
+ ; GFX90A-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr0
+ ; GFX90A-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
+ ; GFX90A-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
+ ; GFX90A-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
+ ; GFX90A-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1
+ ; GFX90A-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1
+ ; GFX90A-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0
+ ; GFX90A-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY9]], %subreg.sub0, killed [[COPY8]], %subreg.sub1, killed [[COPY7]], %subreg.sub2, killed [[COPY6]], %subreg.sub3
+ ; GFX90A-NEXT: [[BUFFER_ATOMIC_PK_ADD_F16_OFFSET_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_PK_ADD_F16_OFFSET_RTN [[COPY5]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
+ ; GFX90A-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_PK_ADD_F16_OFFSET_RTN]]
+ ; GFX90A-NEXT: SI_RETURN_TO_EPILOG $vgpr0
+ ;
+ ; GFX942-LABEL: name: buffer_ptr_atomic_fadd_v2f16_offset_rtn
+ ; GFX942: bb.0 (%ir-block.0):
+ ; GFX942-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4
+ ; GFX942-NEXT: {{ $}}
+ ; GFX942-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
+ ; GFX942-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr3
+ ; GFX942-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr2
+ ; GFX942-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr1
+ ; GFX942-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr0
+ ; GFX942-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
+ ; GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1
+ ; GFX942-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[REG_SEQUENCE1]], %subreg.sub0_sub1, killed [[REG_SEQUENCE]], %subreg.sub2_sub3
+ ; GFX942-NEXT: [[BUFFER_ATOMIC_PK_ADD_F16_OFFSET_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_PK_ADD_F16_OFFSET_RTN [[COPY5]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
+ ; GFX942-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_PK_ADD_F16_OFFSET_RTN]]
+ ; GFX942-NEXT: SI_RETURN_TO_EPILOG $vgpr0
%ret = call <2 x half> @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.v2f16(<2 x half> %val, ptr addrspace(8) %rsrc, i32 0, i32 %soffset, i32 0)
ret <2 x half> %ret
}
define amdgpu_ps <2 x half> @buffer_ptr_atomic_fadd_v2f16_offen_rtn(<2 x half> %val, ptr addrspace(8) inreg %rsrc, i32 %voffset, i32 inreg %soffset) {
- ; GFX90A_GFX942-LABEL: name: buffer_ptr_atomic_fadd_v2f16_offen_rtn
- ; GFX90A_GFX942: bb.0 (%ir-block.0):
- ; GFX90A_GFX942-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $sgpr4
- ; GFX90A_GFX942-NEXT: {{ $}}
- ; GFX90A_GFX942-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
- ; GFX90A_GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3
- ; GFX90A_GFX942-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2
- ; GFX90A_GFX942-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1
- ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0
- ; GFX90A_GFX942-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1
- ; GFX90A_GFX942-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
- ; GFX90A_GFX942-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
- ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1
- ; GFX90A_GFX942-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1
- ; GFX90A_GFX942-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0
- ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY10]], %subreg.sub0, killed [[COPY9]], %subreg.sub1, killed [[COPY8]], %subreg.sub2, killed [[COPY7]], %subreg.sub3
- ; GFX90A_GFX942-NEXT: [[BUFFER_ATOMIC_PK_ADD_F16_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_PK_ADD_F16_OFFEN_RTN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
- ; GFX90A_GFX942-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_PK_ADD_F16_OFFEN_RTN]]
- ; GFX90A_GFX942-NEXT: SI_RETURN_TO_EPILOG $vgpr0
+ ; GFX90A-LABEL: name: buffer_ptr_atomic_fadd_v2f16_offen_rtn
+ ; GFX90A: bb.0 (%ir-block.0):
+ ; GFX90A-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $sgpr4
+ ; GFX90A-NEXT: {{ $}}
+ ; GFX90A-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
+ ; GFX90A-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX90A-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3
+ ; GFX90A-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2
+ ; GFX90A-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1
+ ; GFX90A-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0
+ ; GFX90A-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1
+ ; GFX90A-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
+ ; GFX90A-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
+ ; GFX90A-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1
+ ; GFX90A-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1
+ ; GFX90A-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0
+ ; GFX90A-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY10]], %subreg.sub0, killed [[COPY9]], %subreg.sub1, killed [[COPY8]], %subreg.sub2, killed [[COPY7]], %subreg.sub3
+ ; GFX90A-NEXT: [[BUFFER_ATOMIC_PK_ADD_F16_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_PK_ADD_F16_OFFEN_RTN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
+ ; GFX90A-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_PK_ADD_F16_OFFEN_RTN]]
+ ; GFX90A-NEXT: SI_RETURN_TO_EPILOG $vgpr0
+ ;
+ ; GFX942-LABEL: name: buffer_ptr_atomic_fadd_v2f16_offen_rtn
+ ; GFX942: bb.0 (%ir-block.0):
+ ; GFX942-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $sgpr4
+ ; GFX942-NEXT: {{ $}}
+ ; GFX942-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
+ ; GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX942-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3
+ ; GFX942-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2
+ ; GFX942-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1
+ ; GFX942-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0
+ ; GFX942-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1
+ ; GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1
+ ; GFX942-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[REG_SEQUENCE1]], %subreg.sub0_sub1, killed [[REG_SEQUENCE]], %subreg.sub2_sub3
+ ; GFX942-NEXT: [[BUFFER_ATOMIC_PK_ADD_F16_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_PK_ADD_F16_OFFEN_RTN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
+ ; GFX942-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_PK_ADD_F16_OFFEN_RTN]]
+ ; GFX942-NEXT: SI_RETURN_TO_EPILOG $vgpr0
%ret = call <2 x half> @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.v2f16(<2 x half> %val, ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 0)
ret <2 x half> %ret
}
define amdgpu_ps <2 x half> @buffer_ptr_atomic_fadd_v2f16_idxen_rtn(<2 x half> %val, ptr addrspace(8) inreg %rsrc, i32 %vindex, i32 inreg %soffset) {
- ; GFX90A_GFX942-LABEL: name: buffer_ptr_atomic_fadd_v2f16_idxen_rtn
- ; GFX90A_GFX942: bb.0 (%ir-block.0):
- ; GFX90A_GFX942-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $sgpr4
- ; GFX90A_GFX942-NEXT: {{ $}}
- ; GFX90A_GFX942-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
- ; GFX90A_GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3
- ; GFX90A_GFX942-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2
- ; GFX90A_GFX942-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1
- ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0
- ; GFX90A_GFX942-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1
- ; GFX90A_GFX942-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
- ; GFX90A_GFX942-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
- ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1
- ; GFX90A_GFX942-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1
- ; GFX90A_GFX942-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0
- ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY10]], %subreg.sub0, killed [[COPY9]], %subreg.sub1, killed [[COPY8]], %subreg.sub2, killed [[COPY7]], %subreg.sub3
- ; GFX90A_GFX942-NEXT: [[BUFFER_ATOMIC_PK_ADD_F16_IDXEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_PK_ADD_F16_IDXEN_RTN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
- ; GFX90A_GFX942-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_PK_ADD_F16_IDXEN_RTN]]
- ; GFX90A_GFX942-NEXT: SI_RETURN_TO_EPILOG $vgpr0
+ ; GFX90A-LABEL: name: buffer_ptr_atomic_fadd_v2f16_idxen_rtn
+ ; GFX90A: bb.0 (%ir-block.0):
+ ; GFX90A-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $sgpr4
+ ; GFX90A-NEXT: {{ $}}
+ ; GFX90A-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
+ ; GFX90A-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX90A-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3
+ ; GFX90A-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2
+ ; GFX90A-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1
+ ; GFX90A-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0
+ ; GFX90A-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1
+ ; GFX90A-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
+ ; GFX90A-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
+ ; GFX90A-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1
+ ; GFX90A-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1
+ ; GFX90A-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0
+ ; GFX90A-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY10]], %subreg.sub0, killed [[COPY9]], %subreg.sub1, killed [[COPY8]], %subreg.sub2, killed [[COPY7]], %subreg.sub3
+ ; GFX90A-NEXT: [[BUFFER_ATOMIC_PK_ADD_F16_IDXEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_PK_ADD_F16_IDXEN_RTN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
+ ; GFX90A-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_PK_ADD_F16_IDXEN_RTN]]
+ ; GFX90A-NEXT: SI_RETURN_TO_EPILOG $vgpr0
+ ;
+ ; GFX942-LABEL: name: buffer_ptr_atomic_fadd_v2f16_idxen_rtn
+ ; GFX942: bb.0 (%ir-block.0):
+ ; GFX942-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $sgpr4
+ ; GFX942-NEXT: {{ $}}
+ ; GFX942-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
+ ; GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX942-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3
+ ; GFX942-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2
+ ; GFX942-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1
+ ; GFX942-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0
+ ; GFX942-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1
+ ; GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1
+ ; GFX942-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[REG_SEQUENCE1]], %subreg.sub0_sub1, killed [[REG_SEQUENCE]], %subreg.sub2_sub3
+ ; GFX942-NEXT: [[BUFFER_ATOMIC_PK_ADD_F16_IDXEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_PK_ADD_F16_IDXEN_RTN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
+ ; GFX942-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_PK_ADD_F16_IDXEN_RTN]]
+ ; GFX942-NEXT: SI_RETURN_TO_EPILOG $vgpr0
%ret = call <2 x half> @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.v2f16(<2 x half> %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 %soffset, i32 0)
ret <2 x half> %ret
}
define amdgpu_ps <2 x half> @buffer_ptr_atomic_fadd_v2f16_bothen_rtn(<2 x half> %val, ptr addrspace(8) inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) {
- ; GFX90A_GFX942-LABEL: name: buffer_ptr_atomic_fadd_v2f16_bothen_rtn
- ; GFX90A_GFX942: bb.0 (%ir-block.0):
- ; GFX90A_GFX942-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $vgpr2, $sgpr4
- ; GFX90A_GFX942-NEXT: {{ $}}
- ; GFX90A_GFX942-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
- ; GFX90A_GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
- ; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; GFX90A_GFX942-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr3
- ; GFX90A_GFX942-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr2
- ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr1
- ; GFX90A_GFX942-NEXT: [[COPY6:%[0-9]+]]:sgpr_32 = COPY $sgpr0
- ; GFX90A_GFX942-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1
- ; GFX90A_GFX942-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
- ; GFX90A_GFX942-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
- ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1
- ; GFX90A_GFX942-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1
- ; GFX90A_GFX942-NEXT: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0
- ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY11]], %subreg.sub0, killed [[COPY10]], %subreg.sub1, killed [[COPY9]], %subreg.sub2, killed [[COPY8]], %subreg.sub3
- ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
- ; GFX90A_GFX942-NEXT: [[BUFFER_ATOMIC_PK_ADD_F16_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_PK_ADD_F16_BOTHEN_RTN [[COPY7]], killed [[REG_SEQUENCE3]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 3, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
- ; GFX90A_GFX942-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_PK_ADD_F16_BOTHEN_RTN]]
- ; GFX90A_GFX942-NEXT: SI_RETURN_TO_EPILOG $vgpr0
+ ; GFX90A-LABEL: name: buffer_ptr_atomic_fadd_v2f16_bothen_rtn
+ ; GFX90A: bb.0 (%ir-block.0):
+ ; GFX90A-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $vgpr2, $sgpr4
+ ; GFX90A-NEXT: {{ $}}
+ ; GFX90A-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
+ ; GFX90A-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; GFX90A-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX90A-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr3
+ ; GFX90A-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr2
+ ; GFX90A-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr1
+ ; GFX90A-NEXT: [[COPY6:%[0-9]+]]:sgpr_32 = COPY $sgpr0
+ ; GFX90A-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1
+ ; GFX90A-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
+ ; GFX90A-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
+ ; GFX90A-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1
+ ; GFX90A-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1
+ ; GFX90A-NEXT: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0
+ ; GFX90A-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY11]], %subreg.sub0, killed [[COPY10]], %subreg.sub1, killed [[COPY9]], %subreg.sub2, killed [[COPY8]], %subreg.sub3
+ ; GFX90A-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
+ ; GFX90A-NEXT: [[BUFFER_ATOMIC_PK_ADD_F16_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_PK_ADD_F16_BOTHEN_RTN [[COPY7]], killed [[REG_SEQUENCE3]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 3, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
+ ; GFX90A-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_PK_ADD_F16_BOTHEN_RTN]]
+ ; GFX90A-NEXT: SI_RETURN_TO_EPILOG $vgpr0
+ ;
+ ; GFX942-LABEL: name: buffer_ptr_atomic_fadd_v2f16_bothen_rtn
+ ; GFX942: bb.0 (%ir-block.0):
+ ; GFX942-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $vgpr2, $sgpr4
+ ; GFX942-NEXT: {{ $}}
+ ; GFX942-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
+ ; GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; GFX942-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX942-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr3
+ ; GFX942-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr2
+ ; GFX942-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr1
+ ; GFX942-NEXT: [[COPY6:%[0-9]+]]:sgpr_32 = COPY $sgpr0
+ ; GFX942-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1
+ ; GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1
+ ; GFX942-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[REG_SEQUENCE1]], %subreg.sub0_sub1, killed [[REG_SEQUENCE]], %subreg.sub2_sub3
+ ; GFX942-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
+ ; GFX942-NEXT: [[BUFFER_ATOMIC_PK_ADD_F16_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_PK_ADD_F16_BOTHEN_RTN [[COPY7]], killed [[REG_SEQUENCE3]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 3, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
+ ; GFX942-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_PK_ADD_F16_BOTHEN_RTN]]
+ ; GFX942-NEXT: SI_RETURN_TO_EPILOG $vgpr0
%ret = call <2 x half> @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.v2f16(<2 x half> %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 2)
ret <2 x half> %ret
}
diff --git a/llvm/test/CodeGen/AMDGPU/calling-conventions.ll b/llvm/test/CodeGen/AMDGPU/calling-conventions.ll
index ddd3b1520bf5e..67924d1b42d56 100644
--- a/llvm/test/CodeGen/AMDGPU/calling-conventions.ll
+++ b/llvm/test/CodeGen/AMDGPU/calling-conventions.ll
@@ -2182,32 +2182,36 @@ define amdgpu_kernel void @amd_kernel_v16i8(<16 x i8> %arg0) {
; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
; GFX1250-NEXT: v_mov_b64_e32 v[4:5], 0
; GFX1250-NEXT: s_wait_kmcnt 0x0
-; GFX1250-NEXT: s_lshr_b32 s6, s1, 16
-; GFX1250-NEXT: s_lshr_b32 s7, s1, 24
; GFX1250-NEXT: s_lshr_b32 s8, s2, 16
; GFX1250-NEXT: s_lshr_b32 s9, s2, 24
-; GFX1250-NEXT: s_lshr_b32 s10, s3, 16
-; GFX1250-NEXT: s_lshr_b32 s11, s3, 24
+; GFX1250-NEXT: s_bfe_u32 s14, s2, 0x80008
+; GFX1250-NEXT: s_add_co_i32 s2, s2, s2
+; GFX1250-NEXT: s_add_co_i32 s14, s14, s14
+; GFX1250-NEXT: s_add_co_i32 s9, s9, s9
+; GFX1250-NEXT: s_add_co_i32 s8, s8, s8
+; GFX1250-NEXT: s_lshr_b32 s6, s1, 16
+; GFX1250-NEXT: s_lshr_b32 s7, s1, 24
+; GFX1250-NEXT: s_and_b32 s2, s2, 0xff
+; GFX1250-NEXT: s_lshl_b32 s14, s14, 8
+; GFX1250-NEXT: s_lshl_b32 s9, s9, 8
+; GFX1250-NEXT: s_and_b32 s8, s8, 0xff
; GFX1250-NEXT: s_lshr_b32 s4, s0, 16
; GFX1250-NEXT: s_lshr_b32 s5, s0, 24
+; GFX1250-NEXT: s_lshr_b32 s10, s3, 16
+; GFX1250-NEXT: s_lshr_b32 s11, s3, 24
; GFX1250-NEXT: s_bfe_u32 s12, s0, 0x80008
; GFX1250-NEXT: s_bfe_u32 s13, s1, 0x80008
-; GFX1250-NEXT: s_bfe_u32 s14, s2, 0x80008
; GFX1250-NEXT: s_bfe_u32 s15, s3, 0x80008
-; GFX1250-NEXT: s_add_co_i32 s11, s11, s11
-; GFX1250-NEXT: s_add_co_i32 s10, s10, s10
-; GFX1250-NEXT: s_add_co_i32 s9, s9, s9
-; GFX1250-NEXT: s_add_co_i32 s8, s8, s8
+; GFX1250-NEXT: s_or_b32 s2, s2, s14
+; GFX1250-NEXT: s_or_b32 s8, s8, s9
; GFX1250-NEXT: s_add_co_i32 s7, s7, s7
; GFX1250-NEXT: s_add_co_i32 s6, s6, s6
; GFX1250-NEXT: s_add_co_i32 s3, s3, s3
-; GFX1250-NEXT: s_add_co_i32 s2, s2, s2
; GFX1250-NEXT: s_add_co_i32 s15, s15, s15
-; GFX1250-NEXT: s_add_co_i32 s14, s14, s14
-; GFX1250-NEXT: s_lshl_b32 s11, s11, 8
-; GFX1250-NEXT: s_and_b32 s10, s10, 0xff
-; GFX1250-NEXT: s_lshl_b32 s9, s9, 8
-; GFX1250-NEXT: s_and_b32 s8, s8, 0xff
+; GFX1250-NEXT: s_add_co_i32 s11, s11, s11
+; GFX1250-NEXT: s_add_co_i32 s10, s10, s10
+; GFX1250-NEXT: s_and_b32 s2, s2, 0xffff
+; GFX1250-NEXT: s_lshl_b32 s8, s8, 16
; GFX1250-NEXT: s_add_co_i32 s1, s1, s1
; GFX1250-NEXT: s_add_co_i32 s13, s13, s13
; GFX1250-NEXT: s_lshl_b32 s7, s7, 8
@@ -2217,38 +2221,33 @@ define amdgpu_kernel void @amd_kernel_v16i8(<16 x i8> %arg0) {
; GFX1250-NEXT: s_add_co_i32 s5, s5, s5
; GFX1250-NEXT: s_add_co_i32 s4, s4, s4
; GFX1250-NEXT: s_and_b32 s3, s3, 0xff
-; GFX1250-NEXT: s_and_b32 s2, s2, 0xff
; GFX1250-NEXT: s_lshl_b32 s15, s15, 8
-; GFX1250-NEXT: s_or_b32 s10, s10, s11
-; GFX1250-NEXT: s_lshl_b32 s11, s14, 8
-; GFX1250-NEXT: s_or_b32 s8, s8, s9
+; GFX1250-NEXT: s_lshl_b32 s11, s11, 8
+; GFX1250-NEXT: s_and_b32 s10, s10, 0xff
+; GFX1250-NEXT: s_or_b32 s2, s2, s8
; GFX1250-NEXT: s_and_b32 s1, s1, 0xff
-; GFX1250-NEXT: s_lshl_b32 s9, s13, 8
+; GFX1250-NEXT: s_lshl_b32 s8, s13, 8
; GFX1250-NEXT: s_or_b32 s6, s6, s7
; GFX1250-NEXT: s_and_b32 s0, s0, 0xff
; GFX1250-NEXT: s_lshl_b32 s7, s12, 8
; GFX1250-NEXT: s_lshl_b32 s5, s5, 8
; GFX1250-NEXT: s_and_b32 s4, s4, 0xff
; GFX1250-NEXT: s_or_b32 s3, s3, s15
-; GFX1250-NEXT: s_or_b32 s2, s2, s11
-; GFX1250-NEXT: s_or_b32 s1, s1, s9
+; GFX1250-NEXT: s_or_b32 s10, s10, s11
+; GFX1250-NEXT: s_or_b32 s1, s1, s8
; GFX1250-NEXT: s_or_b32 s0, s0, s7
; GFX1250-NEXT: s_or_b32 s4, s4, s5
; GFX1250-NEXT: s_and_b32 s3, s3, 0xffff
; GFX1250-NEXT: s_lshl_b32 s10, s10, 16
-; GFX1250-NEXT: s_and_b32 s2, s2, 0xffff
-; GFX1250-NEXT: s_lshl_b32 s8, s8, 16
; GFX1250-NEXT: s_and_b32 s1, s1, 0xffff
+; GFX1250-NEXT: s_lshl_b32 s6, s6, 16
; GFX1250-NEXT: s_and_b32 s0, s0, 0xffff
; GFX1250-NEXT: s_lshl_b32 s4, s4, 16
-; GFX1250-NEXT: s_lshl_b32 s5, s6, 16
; GFX1250-NEXT: s_or_b32 s3, s3, s10
-; GFX1250-NEXT: s_or_b32 s2, s2, s8
+; GFX1250-NEXT: s_or_b32 s1, s1, s6
; GFX1250-NEXT: s_or_b32 s0, s0, s4
-; GFX1250-NEXT: s_or_b32 s1, s1, s5
-; GFX1250-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
-; GFX1250-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
-; GFX1250-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3
+; GFX1250-NEXT: v_mov_b64_e32 v[2:3], s[2:3]
+; GFX1250-NEXT: v_mov_b64_e32 v[0:1], s[0:1]
; GFX1250-NEXT: global_store_b128 v[4:5], v[0:3], off
; GFX1250-NEXT: s_endpgm
entry:
diff --git a/llvm/test/CodeGen/AMDGPU/flat-scratch.ll b/llvm/test/CodeGen/AMDGPU/flat-scratch.ll
index 4eaa1965c66f1..54b301c960df7 100644
--- a/llvm/test/CodeGen/AMDGPU/flat-scratch.ll
+++ b/llvm/test/CodeGen/AMDGPU/flat-scratch.ll
@@ -110,10 +110,8 @@ define amdgpu_kernel void @zero_init_kernel() {
;
; GFX942-LABEL: zero_init_kernel:
; GFX942: ; %bb.0:
-; GFX942-NEXT: s_mov_b32 s0, 0
-; GFX942-NEXT: s_mov_b32 s1, s0
-; GFX942-NEXT: s_mov_b32 s2, s0
-; GFX942-NEXT: s_mov_b32 s3, s0
+; GFX942-NEXT: s_mov_b64 s[0:1], 0
+; GFX942-NEXT: s_mov_b64 s[2:3], s[0:1]
; GFX942-NEXT: v_mov_b64_e32 v[0:1], s[0:1]
; GFX942-NEXT: v_mov_b64_e32 v[2:3], s[2:3]
; GFX942-NEXT: scratch_store_dwordx4 off, v[0:3], off offset:48
@@ -304,10 +302,8 @@ define void @zero_init_foo() {
; GFX942-LABEL: zero_init_foo:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: s_mov_b32 s0, 0
-; GFX942-NEXT: s_mov_b32 s1, s0
-; GFX942-NEXT: s_mov_b32 s2, s0
-; GFX942-NEXT: s_mov_b32 s3, s0
+; GFX942-NEXT: s_mov_b64 s[0:1], 0
+; GFX942-NEXT: s_mov_b64 s[2:3], s[0:1]
; GFX942-NEXT: v_mov_b64_e32 v[0:1], s[0:1]
; GFX942-NEXT: v_mov_b64_e32 v[2:3], s[2:3]
; GFX942-NEXT: scratch_store_dwordx4 off, v[0:3], s32 offset:48
@@ -1180,10 +1176,8 @@ define amdgpu_kernel void @zero_init_small_offset_kernel() {
; GFX942: ; %bb.0:
; GFX942-NEXT: scratch_load_dword v0, off, off sc0 sc1
; GFX942-NEXT: s_waitcnt vmcnt(0)
-; GFX942-NEXT: s_mov_b32 s0, 0
-; GFX942-NEXT: s_mov_b32 s1, s0
-; GFX942-NEXT: s_mov_b32 s2, s0
-; GFX942-NEXT: s_mov_b32 s3, s0
+; GFX942-NEXT: s_mov_b64 s[0:1], 0
+; GFX942-NEXT: s_mov_b64 s[2:3], s[0:1]
; GFX942-NEXT: v_mov_b64_e32 v[0:1], s[0:1]
; GFX942-NEXT: v_mov_b64_e32 v[2:3], s[2:3]
; GFX942-NEXT: scratch_store_dwordx4 off, v[0:3], off offset:256
@@ -1397,10 +1391,8 @@ define void @zero_init_small_offset_foo() {
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: scratch_load_dword v0, off, s32 sc0 sc1
; GFX942-NEXT: s_waitcnt vmcnt(0)
-; GFX942-NEXT: s_mov_b32 s0, 0
-; GFX942-NEXT: s_mov_b32 s1, s0
-; GFX942-NEXT: s_mov_b32 s2, s0
-; GFX942-NEXT: s_mov_b32 s3, s0
+; GFX942-NEXT: s_mov_b64 s[0:1], 0
+; GFX942-NEXT: s_mov_b64 s[2:3], s[0:1]
; GFX942-NEXT: v_mov_b64_e32 v[0:1], s[0:1]
; GFX942-NEXT: v_mov_b64_e32 v[2:3], s[2:3]
; GFX942-NEXT: scratch_store_dwordx4 off, v[0:3], s32 offset:256
@@ -2420,10 +2412,8 @@ define amdgpu_kernel void @zero_init_large_offset_kernel() {
; GFX942: ; %bb.0:
; GFX942-NEXT: scratch_load_dword v0, off, off offset:4 sc0 sc1
; GFX942-NEXT: s_waitcnt vmcnt(0)
-; GFX942-NEXT: s_mov_b32 s0, 0
-; GFX942-NEXT: s_mov_b32 s1, s0
-; GFX942-NEXT: s_mov_b32 s2, s0
-; GFX942-NEXT: s_mov_b32 s3, s0
+; GFX942-NEXT: s_mov_b64 s[0:1], 0
+; GFX942-NEXT: s_mov_b64 s[2:3], s[0:1]
; GFX942-NEXT: v_mov_b64_e32 v[0:1], s[0:1]
; GFX942-NEXT: v_mov_b64_e32 v[2:3], s[2:3]
; GFX942-NEXT: s_movk_i32 s0, 0x4004
@@ -2656,10 +2646,8 @@ define void @zero_init_large_offset_foo() {
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: scratch_load_dword v0, off, s32 offset:4 sc0 sc1
; GFX942-NEXT: s_waitcnt vmcnt(0)
-; GFX942-NEXT: s_mov_b32 s0, 0
-; GFX942-NEXT: s_mov_b32 s1, s0
-; GFX942-NEXT: s_mov_b32 s2, s0
-; GFX942-NEXT: s_mov_b32 s3, s0
+; GFX942-NEXT: s_mov_b64 s[0:1], 0
+; GFX942-NEXT: s_mov_b64 s[2:3], s[0:1]
; GFX942-NEXT: v_mov_b64_e32 v[0:1], s[0:1]
; GFX942-NEXT: v_mov_b64_e32 v[2:3], s[2:3]
; GFX942-NEXT: s_add_i32 s0, s32, 0x4004
@@ -5021,10 +5009,8 @@ define amdgpu_ps void @large_offset() {
;
; GFX942-LABEL: large_offset:
; GFX942: ; %bb.0: ; %bb
-; GFX942-NEXT: v_mov_b32_e32 v0, 0
-; GFX942-NEXT: v_mov_b32_e32 v1, v0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: v_mov_b32_e32 v3, v0
+; GFX942-NEXT: v_mov_b64_e32 v[0:1], 0
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[0:1]
; GFX942-NEXT: scratch_store_dwordx4 off, v[0:3], off offset:3024 sc0 sc1
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: scratch_load_dwordx4 v[0:3], off, off offset:3024 sc0 sc1
diff --git a/llvm/test/CodeGen/AMDGPU/fmaximum3.ll b/llvm/test/CodeGen/AMDGPU/fmaximum3.ll
index 069a47ec97bfe..20e7a15c829a9 100644
--- a/llvm/test/CodeGen/AMDGPU/fmaximum3.ll
+++ b/llvm/test/CodeGen/AMDGPU/fmaximum3.ll
@@ -4010,8 +4010,8 @@ define <2 x double> @v_no_fmaximum3_f64__multi_use(double %a, double %b, double
; GFX9-NEXT: v_max_f64 v[2:3], v[0:1], v[4:5]
; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
; GFX9-NEXT: s_nop 1
-; GFX9-NEXT: v_cndmask_b32_e64 v2, v2, 0, vcc
; GFX9-NEXT: v_cndmask_b32_e32 v3, v3, v8, vcc
+; GFX9-NEXT: v_cndmask_b32_e64 v2, v2, 0, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%max0 = call double @llvm.maximum.f64(double %a, double %b)
%max1 = call double @llvm.maximum.f64(double %max0, double %c)
diff --git a/llvm/test/CodeGen/AMDGPU/fminimum3.ll b/llvm/test/CodeGen/AMDGPU/fminimum3.ll
index d8746b58b16b7..8aebd6f719040 100644
--- a/llvm/test/CodeGen/AMDGPU/fminimum3.ll
+++ b/llvm/test/CodeGen/AMDGPU/fminimum3.ll
@@ -4010,8 +4010,8 @@ define <2 x double> @v_no_fminimum3_f64__multi_use(double %a, double %b, double
; GFX9-NEXT: v_min_f64 v[2:3], v[0:1], v[4:5]
; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
; GFX9-NEXT: s_nop 1
-; GFX9-NEXT: v_cndmask_b32_e64 v2, v2, 0, vcc
; GFX9-NEXT: v_cndmask_b32_e32 v3, v3, v8, vcc
+; GFX9-NEXT: v_cndmask_b32_e64 v2, v2, 0, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%max0 = call double @llvm.minimum.f64(double %a, double %b)
%max1 = call double @llvm.minimum.f64(double %max0, double %c)
diff --git a/llvm/test/CodeGen/AMDGPU/global-load-xcnt.ll b/llvm/test/CodeGen/AMDGPU/global-load-xcnt.ll
index e532deaca98a8..8a5a50c5070e7 100644
--- a/llvm/test/CodeGen/AMDGPU/global-load-xcnt.ll
+++ b/llvm/test/CodeGen/AMDGPU/global-load-xcnt.ll
@@ -288,15 +288,15 @@ define i64 @test_v16i64_load_store(ptr addrspace(1) %ptr_a, ptr addrspace(1) %pt
; GCN-SDAG-NEXT: s_wait_loadcnt 0x0
; GCN-SDAG-NEXT: v_add_nc_u64_e32 v[50:51], v[2:3], v[2:3]
; GCN-SDAG-NEXT: v_add_nc_u64_e32 v[48:49], v[0:1], v[0:1]
-; GCN-SDAG-NEXT: v_add_nc_u64_e32 v[16:17], v[16:17], v[16:17]
; GCN-SDAG-NEXT: v_add_nc_u64_e32 v[14:15], 0xc8, v[14:15]
+; GCN-SDAG-NEXT: v_add_nc_u64_e32 v[16:17], v[16:17], v[16:17]
; GCN-SDAG-NEXT: v_add_nc_u64_e32 v[24:25], 0x64, v[24:25]
; GCN-SDAG-NEXT: v_add_nc_u64_e32 v[22:23], v[22:23], v[22:23]
+; GCN-SDAG-NEXT: v_add_nc_u64_e32 v[20:21], v[20:21], v[20:21]
+; GCN-SDAG-NEXT: v_add_nc_u64_e32 v[18:19], v[18:19], v[18:19]
; GCN-SDAG-NEXT: v_add_nc_u64_e32 v[28:29], v[28:29], v[28:29]
; GCN-SDAG-NEXT: v_add_nc_u64_e32 v[26:27], v[26:27], v[26:27]
; GCN-SDAG-NEXT: v_add_nc_u64_e32 v[30:31], v[30:31], v[30:31]
-; GCN-SDAG-NEXT: v_add_nc_u64_e32 v[20:21], v[20:21], v[20:21]
-; GCN-SDAG-NEXT: v_add_nc_u64_e32 v[18:19], v[18:19], v[18:19]
; GCN-SDAG-NEXT: s_clause 0x1
; GCN-SDAG-NEXT: global_store_b128 v[52:53], v[34:37], off
; GCN-SDAG-NEXT: global_store_b128 v[54:55], v[0:3], off
diff --git a/llvm/test/CodeGen/AMDGPU/issue153808-extract-subvector-legalize.ll b/llvm/test/CodeGen/AMDGPU/issue153808-extract-subvector-legalize.ll
index 75c5d206e7933..a30afd000b6d4 100644
--- a/llvm/test/CodeGen/AMDGPU/issue153808-extract-subvector-legalize.ll
+++ b/llvm/test/CodeGen/AMDGPU/issue153808-extract-subvector-legalize.ll
@@ -82,10 +82,8 @@ define <3 x float> @extract_subvector_v3f32_v33f32_elt30_1(ptr addrspace(1) %ptr
; GFX942-NEXT: global_load_dwordx4 v[4:7], v[0:1], off
; GFX942-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:112
; GFX942-NEXT: global_load_dword v2, v[0:1], off offset:128
-; GFX942-NEXT: s_mov_b32 s0, 0
-; GFX942-NEXT: s_mov_b32 s1, s0
-; GFX942-NEXT: s_mov_b32 s2, s0
-; GFX942-NEXT: s_mov_b32 s3, s0
+; GFX942-NEXT: s_mov_b64 s[0:1], 0
+; GFX942-NEXT: s_mov_b64 s[2:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(2)
; GFX942-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0
; GFX942-NEXT: s_waitcnt vmcnt(2)
@@ -125,10 +123,8 @@ define <6 x float> @extract_subvector_v6f32_v36f32_elt30(ptr addrspace(1) %ptr)
; GFX942-NEXT: global_load_dwordx4 v[6:9], v[0:1], off
; GFX942-NEXT: global_load_dwordx4 v[10:13], v[0:1], off offset:112
; GFX942-NEXT: global_load_dwordx4 v[2:5], v[0:1], off offset:128
-; GFX942-NEXT: s_mov_b32 s0, 0
-; GFX942-NEXT: s_mov_b32 s1, s0
-; GFX942-NEXT: s_mov_b32 s2, s0
-; GFX942-NEXT: s_mov_b32 s3, s0
+; GFX942-NEXT: s_mov_b64 s[0:1], 0
+; GFX942-NEXT: s_mov_b64 s[2:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(2)
; GFX942-NEXT: buffer_store_dwordx4 v[6:9], off, s[0:3], 0
; GFX942-NEXT: s_waitcnt vmcnt(2)
@@ -165,10 +161,8 @@ define <3 x float> @issue153808_vector_extract_assert(ptr addrspace(1) %ptr) #0
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: global_load_dwordx4 v[6:9], v[0:1], off
; GFX942-NEXT: global_load_dwordx3 v[2:4], v[0:1], off offset:192
-; GFX942-NEXT: s_mov_b32 s0, 0
-; GFX942-NEXT: s_mov_b32 s1, s0
-; GFX942-NEXT: s_mov_b32 s2, s0
-; GFX942-NEXT: s_mov_b32 s3, s0
+; GFX942-NEXT: s_mov_b64 s[0:1], 0
+; GFX942-NEXT: s_mov_b64 s[2:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(1)
; GFX942-NEXT: buffer_store_dwordx4 v[6:9], off, s[0:3], 0
; GFX942-NEXT: s_waitcnt vmcnt(1)
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.gfx90a.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.gfx90a.ll
index 5ab8706f28f5f..7edb30d1fbd57 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.gfx90a.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.gfx90a.ll
@@ -74,10 +74,9 @@ define amdgpu_kernel void @test_mfma_f32_32x32x4bf16_1k(ptr addrspace(1) %arg) #
; GFX942-LABEL: test_mfma_f32_32x32x4bf16_1k:
; GFX942: ; %bb.0: ; %bb
; GFX942-NEXT: s_load_dwordx2 s[34:35], s[4:5], 0x24
-; GFX942-NEXT: v_mov_b32_e32 v1, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, 1
-; GFX942-NEXT: v_mov_b32_e32 v3, v1
-; GFX942-NEXT: v_mov_b32_e32 v0, 2
+; GFX942-NEXT: v_mov_b64_e32 v[0:1], 1
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], 2
+; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: s_waitcnt lgkmcnt(0)
; GFX942-NEXT: s_load_dwordx16 s[16:31], s[34:35], 0x0
; GFX942-NEXT: s_load_dwordx16 s[0:15], s[34:35], 0x40
@@ -115,17 +114,17 @@ define amdgpu_kernel void @test_mfma_f32_32x32x4bf16_1k(ptr addrspace(1) %arg) #
; GFX942-NEXT: v_accvgpr_write_b32 a30, s14
; GFX942-NEXT: v_accvgpr_write_b32 a31, s15
; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_mfma_f32_32x32x4_2b_bf16 a[0:31], v[2:3], v[0:1], a[0:31] cbsz:1 abid:2 blgp:3
+; GFX942-NEXT: v_mfma_f32_32x32x4_2b_bf16 a[0:31], v[0:1], v[2:3], a[0:31] cbsz:1 abid:2 blgp:3
; GFX942-NEXT: s_nop 15
; GFX942-NEXT: s_nop 2
-; GFX942-NEXT: global_store_dwordx4 v1, a[24:27], s[34:35] offset:96
-; GFX942-NEXT: global_store_dwordx4 v1, a[28:31], s[34:35] offset:112
-; GFX942-NEXT: global_store_dwordx4 v1, a[16:19], s[34:35] offset:64
-; GFX942-NEXT: global_store_dwordx4 v1, a[20:23], s[34:35] offset:80
-; GFX942-NEXT: global_store_dwordx4 v1, a[8:11], s[34:35] offset:32
-; GFX942-NEXT: global_store_dwordx4 v1, a[12:15], s[34:35] offset:48
-; GFX942-NEXT: global_store_dwordx4 v1, a[0:3], s[34:35]
-; GFX942-NEXT: global_store_dwordx4 v1, a[4:7], s[34:35] offset:16
+; GFX942-NEXT: global_store_dwordx4 v4, a[24:27], s[34:35] offset:96
+; GFX942-NEXT: global_store_dwordx4 v4, a[28:31], s[34:35] offset:112
+; GFX942-NEXT: global_store_dwordx4 v4, a[16:19], s[34:35] offset:64
+; GFX942-NEXT: global_store_dwordx4 v4, a[20:23], s[34:35] offset:80
+; GFX942-NEXT: global_store_dwordx4 v4, a[8:11], s[34:35] offset:32
+; GFX942-NEXT: global_store_dwordx4 v4, a[12:15], s[34:35] offset:48
+; GFX942-NEXT: global_store_dwordx4 v4, a[0:3], s[34:35]
+; GFX942-NEXT: global_store_dwordx4 v4, a[4:7], s[34:35] offset:16
; GFX942-NEXT: s_endpgm
;
; GFX90A-VGPR-LABEL: test_mfma_f32_32x32x4bf16_1k:
@@ -188,10 +187,9 @@ define amdgpu_kernel void @test_mfma_f32_32x32x4bf16_1k(ptr addrspace(1) %arg) #
; GFX942-VGPR-LABEL: test_mfma_f32_32x32x4bf16_1k:
; GFX942-VGPR: ; %bb.0: ; %bb
; GFX942-VGPR-NEXT: s_load_dwordx2 s[34:35], s[4:5], 0x24
-; GFX942-VGPR-NEXT: v_mov_b32_e32 v33, 0
-; GFX942-VGPR-NEXT: v_mov_b32_e32 v34, 1
-; GFX942-VGPR-NEXT: v_mov_b32_e32 v35, v33
-; GFX942-VGPR-NEXT: v_mov_b32_e32 v32, 2
+; GFX942-VGPR-NEXT: v_mov_b64_e32 v[32:33], 1
+; GFX942-VGPR-NEXT: v_mov_b64_e32 v[34:35], 2
+; GFX942-VGPR-NEXT: v_mov_b32_e32 v36, 0
; GFX942-VGPR-NEXT: s_waitcnt lgkmcnt(0)
; GFX942-VGPR-NEXT: s_load_dwordx16 s[16:31], s[34:35], 0x0
; GFX942-VGPR-NEXT: s_load_dwordx16 s[0:15], s[34:35], 0x40
@@ -229,17 +227,17 @@ define amdgpu_kernel void @test_mfma_f32_32x32x4bf16_1k(ptr addrspace(1) %arg) #
; GFX942-VGPR-NEXT: v_mov_b32_e32 v30, s14
; GFX942-VGPR-NEXT: v_mov_b32_e32 v31, s15
; GFX942-VGPR-NEXT: s_nop 1
-; GFX942-VGPR-NEXT: v_mfma_f32_32x32x4_2b_bf16 v[0:31], v[34:35], v[32:33], v[0:31] cbsz:1 abid:2 blgp:3
+; GFX942-VGPR-NEXT: v_mfma_f32_32x32x4_2b_bf16 v[0:31], v[32:33], v[34:35], v[0:31] cbsz:1 abid:2 blgp:3
; GFX942-VGPR-NEXT: s_nop 15
; GFX942-VGPR-NEXT: s_nop 2
-; GFX942-VGPR-NEXT: global_store_dwordx4 v33, v[24:27], s[34:35] offset:96
-; GFX942-VGPR-NEXT: global_store_dwordx4 v33, v[28:31], s[34:35] offset:112
-; GFX942-VGPR-NEXT: global_store_dwordx4 v33, v[16:19], s[34:35] offset:64
-; GFX942-VGPR-NEXT: global_store_dwordx4 v33, v[20:23], s[34:35] offset:80
-; GFX942-VGPR-NEXT: global_store_dwordx4 v33, v[8:11], s[34:35] offset:32
-; GFX942-VGPR-NEXT: global_store_dwordx4 v33, v[12:15], s[34:35] offset:48
-; GFX942-VGPR-NEXT: global_store_dwordx4 v33, v[0:3], s[34:35]
-; GFX942-VGPR-NEXT: global_store_dwordx4 v33, v[4:7], s[34:35] offset:16
+; GFX942-VGPR-NEXT: global_store_dwordx4 v36, v[24:27], s[34:35] offset:96
+; GFX942-VGPR-NEXT: global_store_dwordx4 v36, v[28:31], s[34:35] offset:112
+; GFX942-VGPR-NEXT: global_store_dwordx4 v36, v[16:19], s[34:35] offset:64
+; GFX942-VGPR-NEXT: global_store_dwordx4 v36, v[20:23], s[34:35] offset:80
+; GFX942-VGPR-NEXT: global_store_dwordx4 v36, v[8:11], s[34:35] offset:32
+; GFX942-VGPR-NEXT: global_store_dwordx4 v36, v[12:15], s[34:35] offset:48
+; GFX942-VGPR-NEXT: global_store_dwordx4 v36, v[0:3], s[34:35]
+; GFX942-VGPR-NEXT: global_store_dwordx4 v36, v[4:7], s[34:35] offset:16
; GFX942-VGPR-NEXT: s_endpgm
bb:
%in.1 = load <32 x float>, ptr addrspace(1) %arg
@@ -289,10 +287,8 @@ define amdgpu_kernel void @test_mfma_f32_16x16x4bf16_1k(ptr addrspace(1) %arg) #
; GFX942-LABEL: test_mfma_f32_16x16x4bf16_1k:
; GFX942: ; %bb.0: ; %bb
; GFX942-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x24
-; GFX942-NEXT: v_mov_b32_e32 v1, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, 1
-; GFX942-NEXT: v_mov_b32_e32 v3, v1
-; GFX942-NEXT: v_mov_b32_e32 v0, 2
+; GFX942-NEXT: v_mov_b64_e32 v[0:1], 1
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], 2
; GFX942-NEXT: s_waitcnt lgkmcnt(0)
; GFX942-NEXT: s_load_dwordx16 s[0:15], s[16:17], 0x0
; GFX942-NEXT: s_waitcnt lgkmcnt(0)
@@ -313,12 +309,13 @@ define amdgpu_kernel void @test_mfma_f32_16x16x4bf16_1k(ptr addrspace(1) %arg) #
; GFX942-NEXT: v_accvgpr_write_b32 a14, s14
; GFX942-NEXT: v_accvgpr_write_b32 a15, s15
; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_mfma_f32_16x16x4_4b_bf16 a[0:15], v[2:3], v[0:1], a[0:15] cbsz:1 abid:2 blgp:3
-; GFX942-NEXT: s_nop 10
-; GFX942-NEXT: global_store_dwordx4 v1, a[12:15], s[16:17] offset:48
-; GFX942-NEXT: global_store_dwordx4 v1, a[8:11], s[16:17] offset:32
-; GFX942-NEXT: global_store_dwordx4 v1, a[4:7], s[16:17] offset:16
-; GFX942-NEXT: global_store_dwordx4 v1, a[0:3], s[16:17]
+; GFX942-NEXT: v_mfma_f32_16x16x4_4b_bf16 a[0:15], v[0:1], v[2:3], a[0:15] cbsz:1 abid:2 blgp:3
+; GFX942-NEXT: v_mov_b32_e32 v0, 0
+; GFX942-NEXT: s_nop 9
+; GFX942-NEXT: global_store_dwordx4 v0, a[12:15], s[16:17] offset:48
+; GFX942-NEXT: global_store_dwordx4 v0, a[8:11], s[16:17] offset:32
+; GFX942-NEXT: global_store_dwordx4 v0, a[4:7], s[16:17] offset:16
+; GFX942-NEXT: global_store_dwordx4 v0, a[0:3], s[16:17]
; GFX942-NEXT: s_endpgm
;
; GFX90A-VGPR-LABEL: test_mfma_f32_16x16x4bf16_1k:
@@ -351,10 +348,8 @@ define amdgpu_kernel void @test_mfma_f32_16x16x4bf16_1k(ptr addrspace(1) %arg) #
; GFX942-VGPR-LABEL: test_mfma_f32_16x16x4bf16_1k:
; GFX942-VGPR: ; %bb.0: ; %bb
; GFX942-VGPR-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x24
-; GFX942-VGPR-NEXT: v_mov_b32_e32 v17, 0
-; GFX942-VGPR-NEXT: v_mov_b32_e32 v18, 1
-; GFX942-VGPR-NEXT: v_mov_b32_e32 v19, v17
-; GFX942-VGPR-NEXT: v_mov_b32_e32 v16, 2
+; GFX942-VGPR-NEXT: v_mov_b64_e32 v[16:17], 1
+; GFX942-VGPR-NEXT: v_mov_b64_e32 v[18:19], 2
; GFX942-VGPR-NEXT: s_waitcnt lgkmcnt(0)
; GFX942-VGPR-NEXT: s_load_dwordx16 s[0:15], s[16:17], 0x0
; GFX942-VGPR-NEXT: s_waitcnt lgkmcnt(0)
@@ -367,12 +362,13 @@ define amdgpu_kernel void @test_mfma_f32_16x16x4bf16_1k(ptr addrspace(1) %arg) #
; GFX942-VGPR-NEXT: v_mov_b64_e32 v[12:13], s[12:13]
; GFX942-VGPR-NEXT: v_mov_b64_e32 v[14:15], s[14:15]
; GFX942-VGPR-NEXT: s_nop 1
-; GFX942-VGPR-NEXT: v_mfma_f32_16x16x4_4b_bf16 v[0:15], v[18:19], v[16:17], v[0:15] cbsz:1 abid:2 blgp:3
-; GFX942-VGPR-NEXT: s_nop 10
-; GFX942-VGPR-NEXT: global_store_dwordx4 v17, v[12:15], s[16:17] offset:48
-; GFX942-VGPR-NEXT: global_store_dwordx4 v17, v[8:11], s[16:17] offset:32
-; GFX942-VGPR-NEXT: global_store_dwordx4 v17, v[4:7], s[16:17] offset:16
-; GFX942-VGPR-NEXT: global_store_dwordx4 v17, v[0:3], s[16:17]
+; GFX942-VGPR-NEXT: v_mfma_f32_16x16x4_4b_bf16 v[0:15], v[16:17], v[18:19], v[0:15] cbsz:1 abid:2 blgp:3
+; GFX942-VGPR-NEXT: v_mov_b32_e32 v16, 0
+; GFX942-VGPR-NEXT: s_nop 9
+; GFX942-VGPR-NEXT: global_store_dwordx4 v16, v[12:15], s[16:17] offset:48
+; GFX942-VGPR-NEXT: global_store_dwordx4 v16, v[8:11], s[16:17] offset:32
+; GFX942-VGPR-NEXT: global_store_dwordx4 v16, v[4:7], s[16:17] offset:16
+; GFX942-VGPR-NEXT: global_store_dwordx4 v16, v[0:3], s[16:17]
; GFX942-VGPR-NEXT: s_endpgm
bb:
%in.1 = load <16 x float>, ptr addrspace(1) %arg
@@ -407,10 +403,9 @@ define amdgpu_kernel void @test_mfma_f32_4x4x4bf16_1k(ptr addrspace(1) %arg) #0
; GFX942-LABEL: test_mfma_f32_4x4x4bf16_1k:
; GFX942: ; %bb.0: ; %bb
; GFX942-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x24
-; GFX942-NEXT: v_mov_b32_e32 v1, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, 1
-; GFX942-NEXT: v_mov_b32_e32 v3, v1
-; GFX942-NEXT: v_mov_b32_e32 v0, 2
+; GFX942-NEXT: v_mov_b64_e32 v[0:1], 1
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], 2
+; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: s_waitcnt lgkmcnt(0)
; GFX942-NEXT: s_load_dwordx4 s[0:3], s[6:7], 0x0
; GFX942-NEXT: s_waitcnt lgkmcnt(0)
@@ -419,9 +414,9 @@ define amdgpu_kernel void @test_mfma_f32_4x4x4bf16_1k(ptr addrspace(1) %arg) #0
; GFX942-NEXT: v_accvgpr_write_b32 a2, s2
; GFX942-NEXT: v_accvgpr_write_b32 a3, s3
; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_mfma_f32_4x4x4_16b_bf16 a[0:3], v[2:3], v[0:1], a[0:3] cbsz:1 abid:2 blgp:3
+; GFX942-NEXT: v_mfma_f32_4x4x4_16b_bf16 a[0:3], v[0:1], v[2:3], a[0:3] cbsz:1 abid:2 blgp:3
; GFX942-NEXT: s_nop 4
-; GFX942-NEXT: global_store_dwordx4 v1, a[0:3], s[6:7]
+; GFX942-NEXT: global_store_dwordx4 v4, a[0:3], s[6:7]
; GFX942-NEXT: s_endpgm
;
; GFX90A-VGPR-LABEL: test_mfma_f32_4x4x4bf16_1k:
@@ -445,19 +440,18 @@ define amdgpu_kernel void @test_mfma_f32_4x4x4bf16_1k(ptr addrspace(1) %arg) #0
; GFX942-VGPR-LABEL: test_mfma_f32_4x4x4bf16_1k:
; GFX942-VGPR: ; %bb.0: ; %bb
; GFX942-VGPR-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x24
-; GFX942-VGPR-NEXT: v_mov_b32_e32 v5, 0
-; GFX942-VGPR-NEXT: v_mov_b32_e32 v6, 1
-; GFX942-VGPR-NEXT: v_mov_b32_e32 v7, v5
-; GFX942-VGPR-NEXT: v_mov_b32_e32 v4, 2
+; GFX942-VGPR-NEXT: v_mov_b64_e32 v[4:5], 1
+; GFX942-VGPR-NEXT: v_mov_b64_e32 v[6:7], 2
+; GFX942-VGPR-NEXT: v_mov_b32_e32 v8, 0
; GFX942-VGPR-NEXT: s_waitcnt lgkmcnt(0)
; GFX942-VGPR-NEXT: s_load_dwordx4 s[0:3], s[6:7], 0x0
; GFX942-VGPR-NEXT: s_waitcnt lgkmcnt(0)
; GFX942-VGPR-NEXT: v_mov_b64_e32 v[0:1], s[0:1]
; GFX942-VGPR-NEXT: v_mov_b64_e32 v[2:3], s[2:3]
; GFX942-VGPR-NEXT: s_nop 1
-; GFX942-VGPR-NEXT: v_mfma_f32_4x4x4_16b_bf16 v[0:3], v[6:7], v[4:5], v[0:3] cbsz:1 abid:2 blgp:3
+; GFX942-VGPR-NEXT: v_mfma_f32_4x4x4_16b_bf16 v[0:3], v[4:5], v[6:7], v[0:3] cbsz:1 abid:2 blgp:3
; GFX942-VGPR-NEXT: s_nop 4
-; GFX942-VGPR-NEXT: global_store_dwordx4 v5, v[0:3], s[6:7]
+; GFX942-VGPR-NEXT: global_store_dwordx4 v8, v[0:3], s[6:7]
; GFX942-VGPR-NEXT: s_endpgm
bb:
%in.1 = load <4 x float>, ptr addrspace(1) %arg
@@ -508,10 +502,8 @@ define amdgpu_kernel void @test_mfma_f32_32x32x8bf16_1k(ptr addrspace(1) %arg) #
; GFX942-LABEL: test_mfma_f32_32x32x8bf16_1k:
; GFX942: ; %bb.0: ; %bb
; GFX942-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x24
-; GFX942-NEXT: v_mov_b32_e32 v1, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, 1
-; GFX942-NEXT: v_mov_b32_e32 v3, v1
-; GFX942-NEXT: v_mov_b32_e32 v0, 2
+; GFX942-NEXT: v_mov_b64_e32 v[0:1], 1
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], 2
; GFX942-NEXT: s_waitcnt lgkmcnt(0)
; GFX942-NEXT: s_load_dwordx16 s[0:15], s[16:17], 0x0
; GFX942-NEXT: s_waitcnt lgkmcnt(0)
@@ -532,12 +524,13 @@ define amdgpu_kernel void @test_mfma_f32_32x32x8bf16_1k(ptr addrspace(1) %arg) #
; GFX942-NEXT: v_accvgpr_write_b32 a14, s14
; GFX942-NEXT: v_accvgpr_write_b32 a15, s15
; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_mfma_f32_32x32x8_bf16 a[0:15], v[2:3], v[0:1], a[0:15] cbsz:1 abid:2 blgp:3
-; GFX942-NEXT: s_nop 10
-; GFX942-NEXT: global_store_dwordx4 v1, a[12:15], s[16:17] offset:48
-; GFX942-NEXT: global_store_dwordx4 v1, a[8:11], s[16:17] offset:32
-; GFX942-NEXT: global_store_dwordx4 v1, a[4:7], s[16:17] offset:16
-; GFX942-NEXT: global_store_dwordx4 v1, a[0:3], s[16:17]
+; GFX942-NEXT: v_mfma_f32_32x32x8_bf16 a[0:15], v[0:1], v[2:3], a[0:15] cbsz:1 abid:2 blgp:3
+; GFX942-NEXT: v_mov_b32_e32 v0, 0
+; GFX942-NEXT: s_nop 9
+; GFX942-NEXT: global_store_dwordx4 v0, a[12:15], s[16:17] offset:48
+; GFX942-NEXT: global_store_dwordx4 v0, a[8:11], s[16:17] offset:32
+; GFX942-NEXT: global_store_dwordx4 v0, a[4:7], s[16:17] offset:16
+; GFX942-NEXT: global_store_dwordx4 v0, a[0:3], s[16:17]
; GFX942-NEXT: s_endpgm
;
; GFX90A-VGPR-LABEL: test_mfma_f32_32x32x8bf16_1k:
@@ -571,10 +564,8 @@ define amdgpu_kernel void @test_mfma_f32_32x32x8bf16_1k(ptr addrspace(1) %arg) #
; GFX942-VGPR-LABEL: test_mfma_f32_32x32x8bf16_1k:
; GFX942-VGPR: ; %bb.0: ; %bb
; GFX942-VGPR-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x24
-; GFX942-VGPR-NEXT: v_mov_b32_e32 v17, 0
-; GFX942-VGPR-NEXT: v_mov_b32_e32 v18, 1
-; GFX942-VGPR-NEXT: v_mov_b32_e32 v19, v17
-; GFX942-VGPR-NEXT: v_mov_b32_e32 v16, 2
+; GFX942-VGPR-NEXT: v_mov_b64_e32 v[16:17], 1
+; GFX942-VGPR-NEXT: v_mov_b64_e32 v[18:19], 2
; GFX942-VGPR-NEXT: s_waitcnt lgkmcnt(0)
; GFX942-VGPR-NEXT: s_load_dwordx16 s[0:15], s[16:17], 0x0
; GFX942-VGPR-NEXT: s_waitcnt lgkmcnt(0)
@@ -587,12 +578,13 @@ define amdgpu_kernel void @test_mfma_f32_32x32x8bf16_1k(ptr addrspace(1) %arg) #
; GFX942-VGPR-NEXT: v_mov_b64_e32 v[12:13], s[12:13]
; GFX942-VGPR-NEXT: v_mov_b64_e32 v[14:15], s[14:15]
; GFX942-VGPR-NEXT: s_nop 1
-; GFX942-VGPR-NEXT: v_mfma_f32_32x32x8_bf16 v[0:15], v[18:19], v[16:17], v[0:15] cbsz:1 abid:2 blgp:3
-; GFX942-VGPR-NEXT: s_nop 10
-; GFX942-VGPR-NEXT: global_store_dwordx4 v17, v[12:15], s[16:17] offset:48
-; GFX942-VGPR-NEXT: global_store_dwordx4 v17, v[8:11], s[16:17] offset:32
-; GFX942-VGPR-NEXT: global_store_dwordx4 v17, v[4:7], s[16:17] offset:16
-; GFX942-VGPR-NEXT: global_store_dwordx4 v17, v[0:3], s[16:17]
+; GFX942-VGPR-NEXT: v_mfma_f32_32x32x8_bf16 v[0:15], v[16:17], v[18:19], v[0:15] cbsz:1 abid:2 blgp:3
+; GFX942-VGPR-NEXT: v_mov_b32_e32 v16, 0
+; GFX942-VGPR-NEXT: s_nop 9
+; GFX942-VGPR-NEXT: global_store_dwordx4 v16, v[12:15], s[16:17] offset:48
+; GFX942-VGPR-NEXT: global_store_dwordx4 v16, v[8:11], s[16:17] offset:32
+; GFX942-VGPR-NEXT: global_store_dwordx4 v16, v[4:7], s[16:17] offset:16
+; GFX942-VGPR-NEXT: global_store_dwordx4 v16, v[0:3], s[16:17]
; GFX942-VGPR-NEXT: s_endpgm
bb:
%in.1 = load <16 x float>, ptr addrspace(1) %arg
@@ -627,10 +619,9 @@ define amdgpu_kernel void @test_mfma_f32_16x16x16bf16_1k(ptr addrspace(1) %arg)
; GFX942-LABEL: test_mfma_f32_16x16x16bf16_1k:
; GFX942: ; %bb.0: ; %bb
; GFX942-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x24
-; GFX942-NEXT: v_mov_b32_e32 v1, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, 1
-; GFX942-NEXT: v_mov_b32_e32 v3, v1
-; GFX942-NEXT: v_mov_b32_e32 v0, 2
+; GFX942-NEXT: v_mov_b64_e32 v[0:1], 1
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], 2
+; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: s_waitcnt lgkmcnt(0)
; GFX942-NEXT: s_load_dwordx4 s[0:3], s[6:7], 0x0
; GFX942-NEXT: s_waitcnt lgkmcnt(0)
@@ -639,9 +630,9 @@ define amdgpu_kernel void @test_mfma_f32_16x16x16bf16_1k(ptr addrspace(1) %arg)
; GFX942-NEXT: v_accvgpr_write_b32 a2, s2
; GFX942-NEXT: v_accvgpr_write_b32 a3, s3
; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_mfma_f32_16x16x16_bf16 a[0:3], v[2:3], v[0:1], a[0:3] cbsz:1 abid:2 blgp:3
+; GFX942-NEXT: v_mfma_f32_16x16x16_bf16 a[0:3], v[0:1], v[2:3], a[0:3] cbsz:1 abid:2 blgp:3
; GFX942-NEXT: s_nop 6
-; GFX942-NEXT: global_store_dwordx4 v1, a[0:3], s[6:7]
+; GFX942-NEXT: global_store_dwordx4 v4, a[0:3], s[6:7]
; GFX942-NEXT: s_endpgm
;
; GFX90A-VGPR-LABEL: test_mfma_f32_16x16x16bf16_1k:
@@ -665,19 +656,18 @@ define amdgpu_kernel void @test_mfma_f32_16x16x16bf16_1k(ptr addrspace(1) %arg)
; GFX942-VGPR-LABEL: test_mfma_f32_16x16x16bf16_1k:
; GFX942-VGPR: ; %bb.0: ; %bb
; GFX942-VGPR-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x24
-; GFX942-VGPR-NEXT: v_mov_b32_e32 v5, 0
-; GFX942-VGPR-NEXT: v_mov_b32_e32 v6, 1
-; GFX942-VGPR-NEXT: v_mov_b32_e32 v7, v5
-; GFX942-VGPR-NEXT: v_mov_b32_e32 v4, 2
+; GFX942-VGPR-NEXT: v_mov_b64_e32 v[4:5], 1
+; GFX942-VGPR-NEXT: v_mov_b64_e32 v[6:7], 2
+; GFX942-VGPR-NEXT: v_mov_b32_e32 v8, 0
; GFX942-VGPR-NEXT: s_waitcnt lgkmcnt(0)
; GFX942-VGPR-NEXT: s_load_dwordx4 s[0:3], s[6:7], 0x0
; GFX942-VGPR-NEXT: s_waitcnt lgkmcnt(0)
; GFX942-VGPR-NEXT: v_mov_b64_e32 v[0:1], s[0:1]
; GFX942-VGPR-NEXT: v_mov_b64_e32 v[2:3], s[2:3]
; GFX942-VGPR-NEXT: s_nop 1
-; GFX942-VGPR-NEXT: v_mfma_f32_16x16x16_bf16 v[0:3], v[6:7], v[4:5], v[0:3] cbsz:1 abid:2 blgp:3
+; GFX942-VGPR-NEXT: v_mfma_f32_16x16x16_bf16 v[0:3], v[4:5], v[6:7], v[0:3] cbsz:1 abid:2 blgp:3
; GFX942-VGPR-NEXT: s_nop 6
-; GFX942-VGPR-NEXT: global_store_dwordx4 v5, v[0:3], s[6:7]
+; GFX942-VGPR-NEXT: global_store_dwordx4 v8, v[0:3], s[6:7]
; GFX942-VGPR-NEXT: s_endpgm
bb:
%in.1 = load <4 x float>, ptr addrspace(1) %arg
@@ -1273,14 +1263,14 @@ define amdgpu_kernel void @test_mfma_f64_16x16x4f64_splat_imm_int_64_in_high_bit
; GFX942-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34
; GFX942-NEXT: v_accvgpr_write_b32 a0, 0
; GFX942-NEXT: v_accvgpr_write_b32 a1, 64
-; GFX942-NEXT: v_accvgpr_mov_b32 a2, a0
+; GFX942-NEXT: v_accvgpr_mov_b32 a3, a1
; GFX942-NEXT: s_waitcnt lgkmcnt(0)
; GFX942-NEXT: v_mov_b64_e32 v[0:1], s[2:3]
-; GFX942-NEXT: v_accvgpr_mov_b32 a3, a1
-; GFX942-NEXT: v_accvgpr_mov_b32 a4, a0
; GFX942-NEXT: v_accvgpr_mov_b32 a5, a1
-; GFX942-NEXT: v_accvgpr_mov_b32 a6, a0
; GFX942-NEXT: v_accvgpr_mov_b32 a7, a1
+; GFX942-NEXT: v_accvgpr_mov_b32 a2, a0
+; GFX942-NEXT: v_accvgpr_mov_b32 a4, a0
+; GFX942-NEXT: v_accvgpr_mov_b32 a6, a0
; GFX942-NEXT: v_mov_b64_e32 v[2:3], s[6:7]
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_mfma_f64_16x16x4_f64 a[0:7], v[0:1], v[2:3], a[0:7]
@@ -1326,26 +1316,20 @@ define amdgpu_kernel void @test_mfma_f64_16x16x4f64_splat_imm_int_64_in_high_bit
; GFX942-VGPR-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34
; GFX942-VGPR-NEXT: v_mov_b32_e32 v0, 0
; GFX942-VGPR-NEXT: v_mov_b32_e32 v1, 64
-; GFX942-VGPR-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-VGPR-NEXT: s_waitcnt lgkmcnt(0)
-; GFX942-VGPR-NEXT: v_mov_b64_e32 v[10:11], s[2:3]
-; GFX942-VGPR-NEXT: v_mov_b32_e32 v3, v1
-; GFX942-VGPR-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-VGPR-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-VGPR-NEXT: v_mov_b32_e32 v6, v0
-; GFX942-VGPR-NEXT: v_mov_b32_e32 v7, v1
-; GFX942-VGPR-NEXT: v_mov_b64_e32 v[8:9], v[6:7]
-; GFX942-VGPR-NEXT: v_mov_b64_e32 v[12:13], s[6:7]
-; GFX942-VGPR-NEXT: v_mov_b64_e32 v[6:7], v[4:5]
-; GFX942-VGPR-NEXT: v_mov_b64_e32 v[4:5], v[2:3]
; GFX942-VGPR-NEXT: v_mov_b64_e32 v[2:3], v[0:1]
+; GFX942-VGPR-NEXT: s_waitcnt lgkmcnt(0)
+; GFX942-VGPR-NEXT: v_mov_b64_e32 v[8:9], s[2:3]
+; GFX942-VGPR-NEXT: v_mov_b64_e32 v[4:5], v[0:1]
+; GFX942-VGPR-NEXT: v_mov_b64_e32 v[6:7], v[0:1]
+; GFX942-VGPR-NEXT: v_mov_b64_e32 v[10:11], s[6:7]
; GFX942-VGPR-NEXT: s_nop 1
-; GFX942-VGPR-NEXT: v_mfma_f64_16x16x4_f64 v[2:9], v[10:11], v[12:13], v[2:9]
-; GFX942-VGPR-NEXT: v_mfma_f64_16x16x4_f64 v[2:9], v[10:11], v[12:13], v[2:9] cbsz:1 abid:2 neg:[1,1,0]
+; GFX942-VGPR-NEXT: v_mfma_f64_16x16x4_f64 v[0:7], v[8:9], v[10:11], v[0:7]
+; GFX942-VGPR-NEXT: v_mfma_f64_16x16x4_f64 v[0:7], v[8:9], v[10:11], v[0:7] cbsz:1 abid:2 neg:[1,1,0]
+; GFX942-VGPR-NEXT: v_mov_b32_e32 v8, 0
; GFX942-VGPR-NEXT: s_nop 15
-; GFX942-VGPR-NEXT: s_nop 1
-; GFX942-VGPR-NEXT: global_store_dwordx4 v0, v[6:9], s[0:1] offset:16
-; GFX942-VGPR-NEXT: global_store_dwordx4 v0, v[2:5], s[0:1]
+; GFX942-VGPR-NEXT: s_nop 0
+; GFX942-VGPR-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16
+; GFX942-VGPR-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
; GFX942-VGPR-NEXT: s_endpgm
bb:
%mai.1 = tail call <4 x double> @llvm.amdgcn.mfma.f64.16x16x4f64(double %a, double %b, <4 x double> splat (double bitcast (i64 274877906944 to double)), i32 0, i32 0, i32 0)
@@ -1386,14 +1370,14 @@ define amdgpu_kernel void @test_mfma_f64_16x16x4f64_splat_imm_int_64_in_high_and
; GFX942-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34
; GFX942-NEXT: v_accvgpr_write_b32 a0, 64
; GFX942-NEXT: v_accvgpr_mov_b32 a1, a0
-; GFX942-NEXT: v_accvgpr_mov_b32 a2, a0
+; GFX942-NEXT: v_accvgpr_mov_b32 a3, a1
; GFX942-NEXT: s_waitcnt lgkmcnt(0)
; GFX942-NEXT: v_mov_b64_e32 v[0:1], s[2:3]
-; GFX942-NEXT: v_accvgpr_mov_b32 a3, a0
+; GFX942-NEXT: v_accvgpr_mov_b32 a5, a1
+; GFX942-NEXT: v_accvgpr_mov_b32 a7, a1
+; GFX942-NEXT: v_accvgpr_mov_b32 a2, a0
; GFX942-NEXT: v_accvgpr_mov_b32 a4, a0
-; GFX942-NEXT: v_accvgpr_mov_b32 a5, a0
; GFX942-NEXT: v_accvgpr_mov_b32 a6, a0
-; GFX942-NEXT: v_accvgpr_mov_b32 a7, a0
; GFX942-NEXT: v_mov_b64_e32 v[2:3], s[6:7]
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_mfma_f64_16x16x4_f64 a[0:7], v[0:1], v[2:3], a[0:7]
@@ -1433,18 +1417,16 @@ define amdgpu_kernel void @test_mfma_f64_16x16x4f64_splat_imm_int_64_in_high_and
; GFX942-VGPR-LABEL: test_mfma_f64_16x16x4f64_splat_imm_int_64_in_high_and_low:
; GFX942-VGPR: ; %bb.0: ; %bb
; GFX942-VGPR-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
-; GFX942-VGPR-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34
-; GFX942-VGPR-NEXT: v_mov_b32_e32 v0, 64
-; GFX942-VGPR-NEXT: v_mov_b32_e32 v1, v0
-; GFX942-VGPR-NEXT: v_mov_b32_e32 v2, v0
+; GFX942-VGPR-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x34
+; GFX942-VGPR-NEXT: s_mov_b32 s6, 64
+; GFX942-VGPR-NEXT: s_mov_b32 s7, s6
+; GFX942-VGPR-NEXT: v_mov_b64_e32 v[0:1], s[6:7]
; GFX942-VGPR-NEXT: s_waitcnt lgkmcnt(0)
; GFX942-VGPR-NEXT: v_mov_b64_e32 v[8:9], s[2:3]
-; GFX942-VGPR-NEXT: v_mov_b32_e32 v3, v0
-; GFX942-VGPR-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-VGPR-NEXT: v_mov_b32_e32 v5, v0
-; GFX942-VGPR-NEXT: v_mov_b32_e32 v6, v0
-; GFX942-VGPR-NEXT: v_mov_b32_e32 v7, v0
-; GFX942-VGPR-NEXT: v_mov_b64_e32 v[10:11], s[6:7]
+; GFX942-VGPR-NEXT: v_mov_b64_e32 v[2:3], s[6:7]
+; GFX942-VGPR-NEXT: v_mov_b64_e32 v[4:5], s[6:7]
+; GFX942-VGPR-NEXT: v_mov_b64_e32 v[6:7], s[6:7]
+; GFX942-VGPR-NEXT: v_mov_b64_e32 v[10:11], s[8:9]
; GFX942-VGPR-NEXT: s_nop 1
; GFX942-VGPR-NEXT: v_mfma_f64_16x16x4_f64 v[0:7], v[8:9], v[10:11], v[0:7]
; GFX942-VGPR-NEXT: v_mfma_f64_16x16x4_f64 v[0:7], v[8:9], v[10:11], v[0:7] cbsz:1 abid:2 neg:[1,1,0]
@@ -1493,14 +1475,14 @@ define amdgpu_kernel void @test_mfma_f64_16x16x4f64_splat_imm_f32_1_in_high_and_
; GFX942-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34
; GFX942-NEXT: v_accvgpr_write_b32 a0, 1.0
; GFX942-NEXT: v_accvgpr_mov_b32 a1, a0
-; GFX942-NEXT: v_accvgpr_mov_b32 a2, a0
+; GFX942-NEXT: v_accvgpr_mov_b32 a3, a1
; GFX942-NEXT: s_waitcnt lgkmcnt(0)
; GFX942-NEXT: v_mov_b64_e32 v[0:1], s[2:3]
-; GFX942-NEXT: v_accvgpr_mov_b32 a3, a0
+; GFX942-NEXT: v_accvgpr_mov_b32 a5, a1
+; GFX942-NEXT: v_accvgpr_mov_b32 a7, a1
+; GFX942-NEXT: v_accvgpr_mov_b32 a2, a0
; GFX942-NEXT: v_accvgpr_mov_b32 a4, a0
-; GFX942-NEXT: v_accvgpr_mov_b32 a5, a0
; GFX942-NEXT: v_accvgpr_mov_b32 a6, a0
-; GFX942-NEXT: v_accvgpr_mov_b32 a7, a0
; GFX942-NEXT: v_mov_b64_e32 v[2:3], s[6:7]
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_mfma_f64_16x16x4_f64 a[0:7], v[0:1], v[2:3], a[0:7]
@@ -1540,18 +1522,16 @@ define amdgpu_kernel void @test_mfma_f64_16x16x4f64_splat_imm_f32_1_in_high_and_
; GFX942-VGPR-LABEL: test_mfma_f64_16x16x4f64_splat_imm_f32_1_in_high_and_low:
; GFX942-VGPR: ; %bb.0: ; %bb
; GFX942-VGPR-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
-; GFX942-VGPR-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34
-; GFX942-VGPR-NEXT: v_mov_b32_e32 v0, 1.0
-; GFX942-VGPR-NEXT: v_mov_b32_e32 v1, v0
-; GFX942-VGPR-NEXT: v_mov_b32_e32 v2, v0
+; GFX942-VGPR-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x34
+; GFX942-VGPR-NEXT: s_mov_b32 s6, 1.0
+; GFX942-VGPR-NEXT: s_mov_b32 s7, s6
+; GFX942-VGPR-NEXT: v_mov_b64_e32 v[0:1], s[6:7]
; GFX942-VGPR-NEXT: s_waitcnt lgkmcnt(0)
; GFX942-VGPR-NEXT: v_mov_b64_e32 v[8:9], s[2:3]
-; GFX942-VGPR-NEXT: v_mov_b32_e32 v3, v0
-; GFX942-VGPR-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-VGPR-NEXT: v_mov_b32_e32 v5, v0
-; GFX942-VGPR-NEXT: v_mov_b32_e32 v6, v0
-; GFX942-VGPR-NEXT: v_mov_b32_e32 v7, v0
-; GFX942-VGPR-NEXT: v_mov_b64_e32 v[10:11], s[6:7]
+; GFX942-VGPR-NEXT: v_mov_b64_e32 v[2:3], s[6:7]
+; GFX942-VGPR-NEXT: v_mov_b64_e32 v[4:5], s[6:7]
+; GFX942-VGPR-NEXT: v_mov_b64_e32 v[6:7], s[6:7]
+; GFX942-VGPR-NEXT: v_mov_b64_e32 v[10:11], s[8:9]
; GFX942-VGPR-NEXT: s_nop 1
; GFX942-VGPR-NEXT: v_mfma_f64_16x16x4_f64 v[0:7], v[8:9], v[10:11], v[0:7]
; GFX942-VGPR-NEXT: v_mfma_f64_16x16x4_f64 v[0:7], v[8:9], v[10:11], v[0:7] cbsz:1 abid:2 neg:[1,1,0]
@@ -1600,17 +1580,17 @@ define amdgpu_kernel void @test_mfma_f64_16x16x4f64_imm(ptr addrspace(1) %arg, d
; GFX942-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
; GFX942-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34
; GFX942-NEXT: v_accvgpr_write_b32 a0, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, 0x3ff00000
-; GFX942-NEXT: v_accvgpr_write_b32 a7, v0
+; GFX942-NEXT: v_accvgpr_write_b32 a1, 0
+; GFX942-NEXT: v_mov_b64_e32 v[0:1], 1.0
; GFX942-NEXT: s_waitcnt lgkmcnt(0)
; GFX942-NEXT: v_mov_b32_e32 v2, s2
; GFX942-NEXT: v_mov_b32_e32 v3, s3
-; GFX942-NEXT: v_accvgpr_mov_b32 a1, a0
+; GFX942-NEXT: v_accvgpr_write_b32 a7, v1
+; GFX942-NEXT: v_accvgpr_mov_b32 a3, a1
+; GFX942-NEXT: v_accvgpr_mov_b32 a5, a1
+; GFX942-NEXT: v_accvgpr_write_b32 a6, v0
; GFX942-NEXT: v_accvgpr_mov_b32 a2, a0
-; GFX942-NEXT: v_accvgpr_mov_b32 a3, a0
; GFX942-NEXT: v_accvgpr_mov_b32 a4, a0
-; GFX942-NEXT: v_accvgpr_mov_b32 a5, a0
-; GFX942-NEXT: v_accvgpr_mov_b32 a6, a0
; GFX942-NEXT: v_mov_b64_e32 v[0:1], s[6:7]
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_mfma_f64_16x16x4_f64 a[0:7], v[2:3], v[0:1], a[0:7]
@@ -1653,28 +1633,21 @@ define amdgpu_kernel void @test_mfma_f64_16x16x4f64_imm(ptr addrspace(1) %arg, d
; GFX942-VGPR: ; %bb.0: ; %bb
; GFX942-VGPR-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
; GFX942-VGPR-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34
-; GFX942-VGPR-NEXT: v_mov_b32_e32 v0, 0
-; GFX942-VGPR-NEXT: v_mov_b32_e32 v7, 0x3ff00000
-; GFX942-VGPR-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-VGPR-NEXT: s_waitcnt lgkmcnt(0)
-; GFX942-VGPR-NEXT: v_mov_b32_e32 v12, s2
-; GFX942-VGPR-NEXT: v_mov_b32_e32 v13, s3
-; GFX942-VGPR-NEXT: v_mov_b32_e32 v3, v0
-; GFX942-VGPR-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-VGPR-NEXT: v_mov_b32_e32 v5, v0
-; GFX942-VGPR-NEXT: v_mov_b32_e32 v6, v0
-; GFX942-VGPR-NEXT: v_mov_b32_e32 v1, v0
-; GFX942-VGPR-NEXT: v_mov_b64_e32 v[8:9], v[6:7]
-; GFX942-VGPR-NEXT: v_mov_b64_e32 v[10:11], s[6:7]
-; GFX942-VGPR-NEXT: v_mov_b64_e32 v[6:7], v[4:5]
-; GFX942-VGPR-NEXT: v_mov_b64_e32 v[4:5], v[2:3]
+; GFX942-VGPR-NEXT: v_mov_b64_e32 v[0:1], 0
+; GFX942-VGPR-NEXT: v_mov_b64_e32 v[6:7], 1.0
; GFX942-VGPR-NEXT: v_mov_b64_e32 v[2:3], v[0:1]
+; GFX942-VGPR-NEXT: s_waitcnt lgkmcnt(0)
+; GFX942-VGPR-NEXT: v_mov_b32_e32 v10, s2
+; GFX942-VGPR-NEXT: v_mov_b32_e32 v11, s3
+; GFX942-VGPR-NEXT: v_mov_b64_e32 v[4:5], v[0:1]
+; GFX942-VGPR-NEXT: v_mov_b64_e32 v[8:9], s[6:7]
; GFX942-VGPR-NEXT: s_nop 1
-; GFX942-VGPR-NEXT: v_mfma_f64_16x16x4_f64 v[2:9], v[12:13], v[10:11], v[2:9]
+; GFX942-VGPR-NEXT: v_mfma_f64_16x16x4_f64 v[0:7], v[10:11], v[8:9], v[0:7]
+; GFX942-VGPR-NEXT: v_mov_b32_e32 v8, 0
; GFX942-VGPR-NEXT: s_nop 15
-; GFX942-VGPR-NEXT: s_nop 1
-; GFX942-VGPR-NEXT: global_store_dwordx4 v0, v[6:9], s[0:1] offset:16
-; GFX942-VGPR-NEXT: global_store_dwordx4 v0, v[2:5], s[0:1]
+; GFX942-VGPR-NEXT: s_nop 0
+; GFX942-VGPR-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16
+; GFX942-VGPR-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
; GFX942-VGPR-NEXT: s_endpgm
bb:
%mai.1 = tail call <4 x double> @llvm.amdgcn.mfma.f64.16x16x4f64(double %a, double %b, <4 x double> <double 0.0, double 0.0, double 0.0, double 1.0>, i32 0, i32 0, i32 0)
@@ -1711,20 +1684,27 @@ define amdgpu_kernel void @test_mfma_f64_16x16x4f64_splat_lit(ptr addrspace(1) %
;
; GFX942-LABEL: test_mfma_f64_16x16x4f64_splat_lit:
; GFX942: ; %bb.0: ; %bb
+; GFX942-NEXT: v_mov_b32_e32 v0, 0
+; GFX942-NEXT: v_mov_b32_e32 v1, 0x405ec000
+; GFX942-NEXT: v_accvgpr_write_b32 a0, v0
; GFX942-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
; GFX942-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34
-; GFX942-NEXT: v_mov_b32_e32 v0, 0x405ec000
-; GFX942-NEXT: v_accvgpr_write_b32 a0, 0
-; GFX942-NEXT: v_accvgpr_write_b32 a1, v0
+; GFX942-NEXT: v_accvgpr_write_b32 a1, v1
+; GFX942-NEXT: v_mov_b32_e32 v0, 0
+; GFX942-NEXT: v_mov_b32_e32 v1, 0x405ec000
+; GFX942-NEXT: v_accvgpr_write_b32 a3, v1
+; GFX942-NEXT: v_accvgpr_write_b32 a2, v0
+; GFX942-NEXT: v_mov_b32_e32 v0, 0
+; GFX942-NEXT: v_mov_b32_e32 v1, 0x405ec000
; GFX942-NEXT: s_waitcnt lgkmcnt(0)
; GFX942-NEXT: v_mov_b32_e32 v2, s2
; GFX942-NEXT: v_mov_b32_e32 v3, s3
-; GFX942-NEXT: v_accvgpr_mov_b32 a2, a0
-; GFX942-NEXT: v_accvgpr_mov_b32 a3, a1
-; GFX942-NEXT: v_accvgpr_mov_b32 a4, a0
-; GFX942-NEXT: v_accvgpr_mov_b32 a5, a1
-; GFX942-NEXT: v_accvgpr_mov_b32 a6, a0
-; GFX942-NEXT: v_accvgpr_mov_b32 a7, a1
+; GFX942-NEXT: v_accvgpr_write_b32 a5, v1
+; GFX942-NEXT: v_accvgpr_write_b32 a4, v0
+; GFX942-NEXT: v_mov_b32_e32 v0, 0
+; GFX942-NEXT: v_mov_b32_e32 v1, 0x405ec000
+; GFX942-NEXT: v_accvgpr_write_b32 a7, v1
+; GFX942-NEXT: v_accvgpr_write_b32 a6, v0
; GFX942-NEXT: v_mov_b64_e32 v[0:1], s[6:7]
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_mfma_f64_16x16x4_f64 a[0:7], v[2:3], v[0:1], a[0:7]
@@ -1769,26 +1749,20 @@ define amdgpu_kernel void @test_mfma_f64_16x16x4f64_splat_lit(ptr addrspace(1) %
; GFX942-VGPR-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34
; GFX942-VGPR-NEXT: v_mov_b32_e32 v0, 0
; GFX942-VGPR-NEXT: v_mov_b32_e32 v1, 0x405ec000
-; GFX942-VGPR-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-VGPR-NEXT: s_waitcnt lgkmcnt(0)
-; GFX942-VGPR-NEXT: v_mov_b32_e32 v12, s2
-; GFX942-VGPR-NEXT: v_mov_b32_e32 v13, s3
-; GFX942-VGPR-NEXT: v_mov_b32_e32 v3, v1
-; GFX942-VGPR-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-VGPR-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-VGPR-NEXT: v_mov_b32_e32 v6, v0
-; GFX942-VGPR-NEXT: v_mov_b32_e32 v7, v1
-; GFX942-VGPR-NEXT: v_mov_b64_e32 v[8:9], v[6:7]
-; GFX942-VGPR-NEXT: v_mov_b64_e32 v[10:11], s[6:7]
-; GFX942-VGPR-NEXT: v_mov_b64_e32 v[6:7], v[4:5]
-; GFX942-VGPR-NEXT: v_mov_b64_e32 v[4:5], v[2:3]
; GFX942-VGPR-NEXT: v_mov_b64_e32 v[2:3], v[0:1]
+; GFX942-VGPR-NEXT: s_waitcnt lgkmcnt(0)
+; GFX942-VGPR-NEXT: v_mov_b32_e32 v10, s2
+; GFX942-VGPR-NEXT: v_mov_b32_e32 v11, s3
+; GFX942-VGPR-NEXT: v_mov_b64_e32 v[4:5], v[0:1]
+; GFX942-VGPR-NEXT: v_mov_b64_e32 v[6:7], v[0:1]
+; GFX942-VGPR-NEXT: v_mov_b64_e32 v[8:9], s[6:7]
; GFX942-VGPR-NEXT: s_nop 1
-; GFX942-VGPR-NEXT: v_mfma_f64_16x16x4_f64 v[2:9], v[12:13], v[10:11], v[2:9]
+; GFX942-VGPR-NEXT: v_mfma_f64_16x16x4_f64 v[0:7], v[10:11], v[8:9], v[0:7]
+; GFX942-VGPR-NEXT: v_mov_b32_e32 v8, 0
; GFX942-VGPR-NEXT: s_nop 15
-; GFX942-VGPR-NEXT: s_nop 1
-; GFX942-VGPR-NEXT: global_store_dwordx4 v0, v[6:9], s[0:1] offset:16
-; GFX942-VGPR-NEXT: global_store_dwordx4 v0, v[2:5], s[0:1]
+; GFX942-VGPR-NEXT: s_nop 0
+; GFX942-VGPR-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16
+; GFX942-VGPR-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
; GFX942-VGPR-NEXT: s_endpgm
bb:
%mai.1 = tail call <4 x double> @llvm.amdgcn.mfma.f64.16x16x4f64(double %a, double %b, <4 x double> <double 123.0, double 123.0, double 123.0, double 123.0>, i32 0, i32 0, i32 0)
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.ll
index 7e30af96bb8b9..3cd009126666c 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.ll
@@ -5489,38 +5489,38 @@ define amdgpu_kernel void @test_mfma_f32_32x32x1f32_imm(ptr addrspace(1) %arg) #
; GFX942-LABEL: test_mfma_f32_32x32x1f32_imm:
; GFX942: ; %bb.0: ; %bb
; GFX942-NEXT: v_mov_b32_e32 v1, 1.0
-; GFX942-NEXT: v_accvgpr_write_b32 a1, 0
+; GFX942-NEXT: v_accvgpr_write_b32 a2, 0
+; GFX942-NEXT: v_accvgpr_write_b32 a3, 0
; GFX942-NEXT: v_accvgpr_write_b32 a0, 1.0
-; GFX942-NEXT: v_accvgpr_mov_b32 a2, a1
-; GFX942-NEXT: v_accvgpr_mov_b32 a3, a1
-; GFX942-NEXT: v_accvgpr_mov_b32 a4, a1
-; GFX942-NEXT: v_accvgpr_mov_b32 a5, a1
-; GFX942-NEXT: v_accvgpr_mov_b32 a6, a1
-; GFX942-NEXT: v_accvgpr_mov_b32 a7, a1
-; GFX942-NEXT: v_accvgpr_mov_b32 a8, a1
-; GFX942-NEXT: v_accvgpr_mov_b32 a9, a1
-; GFX942-NEXT: v_accvgpr_mov_b32 a10, a1
-; GFX942-NEXT: v_accvgpr_mov_b32 a11, a1
-; GFX942-NEXT: v_accvgpr_mov_b32 a12, a1
-; GFX942-NEXT: v_accvgpr_mov_b32 a13, a1
-; GFX942-NEXT: v_accvgpr_mov_b32 a14, a1
-; GFX942-NEXT: v_accvgpr_mov_b32 a15, a1
-; GFX942-NEXT: v_accvgpr_mov_b32 a16, a1
-; GFX942-NEXT: v_accvgpr_mov_b32 a17, a1
-; GFX942-NEXT: v_accvgpr_mov_b32 a18, a1
-; GFX942-NEXT: v_accvgpr_mov_b32 a19, a1
-; GFX942-NEXT: v_accvgpr_mov_b32 a20, a1
-; GFX942-NEXT: v_accvgpr_mov_b32 a21, a1
-; GFX942-NEXT: v_accvgpr_mov_b32 a22, a1
-; GFX942-NEXT: v_accvgpr_mov_b32 a23, a1
-; GFX942-NEXT: v_accvgpr_mov_b32 a24, a1
-; GFX942-NEXT: v_accvgpr_mov_b32 a25, a1
-; GFX942-NEXT: v_accvgpr_mov_b32 a26, a1
-; GFX942-NEXT: v_accvgpr_mov_b32 a27, a1
-; GFX942-NEXT: v_accvgpr_mov_b32 a28, a1
-; GFX942-NEXT: v_accvgpr_mov_b32 a29, a1
-; GFX942-NEXT: v_accvgpr_mov_b32 a30, a1
-; GFX942-NEXT: v_accvgpr_mov_b32 a31, a1
+; GFX942-NEXT: v_accvgpr_mov_b32 a5, a3
+; GFX942-NEXT: v_accvgpr_mov_b32 a7, a3
+; GFX942-NEXT: v_accvgpr_mov_b32 a9, a3
+; GFX942-NEXT: v_accvgpr_mov_b32 a11, a3
+; GFX942-NEXT: v_accvgpr_mov_b32 a13, a3
+; GFX942-NEXT: v_accvgpr_mov_b32 a15, a3
+; GFX942-NEXT: v_accvgpr_mov_b32 a17, a3
+; GFX942-NEXT: v_accvgpr_mov_b32 a19, a3
+; GFX942-NEXT: v_accvgpr_mov_b32 a21, a3
+; GFX942-NEXT: v_accvgpr_mov_b32 a23, a3
+; GFX942-NEXT: v_accvgpr_mov_b32 a25, a3
+; GFX942-NEXT: v_accvgpr_mov_b32 a27, a3
+; GFX942-NEXT: v_accvgpr_mov_b32 a29, a3
+; GFX942-NEXT: v_accvgpr_mov_b32 a31, a3
+; GFX942-NEXT: v_accvgpr_write_b32 a1, 0
+; GFX942-NEXT: v_accvgpr_mov_b32 a4, a2
+; GFX942-NEXT: v_accvgpr_mov_b32 a6, a2
+; GFX942-NEXT: v_accvgpr_mov_b32 a8, a2
+; GFX942-NEXT: v_accvgpr_mov_b32 a10, a2
+; GFX942-NEXT: v_accvgpr_mov_b32 a12, a2
+; GFX942-NEXT: v_accvgpr_mov_b32 a14, a2
+; GFX942-NEXT: v_accvgpr_mov_b32 a16, a2
+; GFX942-NEXT: v_accvgpr_mov_b32 a18, a2
+; GFX942-NEXT: v_accvgpr_mov_b32 a20, a2
+; GFX942-NEXT: v_accvgpr_mov_b32 a22, a2
+; GFX942-NEXT: v_accvgpr_mov_b32 a24, a2
+; GFX942-NEXT: v_accvgpr_mov_b32 a26, a2
+; GFX942-NEXT: v_accvgpr_mov_b32 a28, a2
+; GFX942-NEXT: v_accvgpr_mov_b32 a30, a2
; GFX942-NEXT: v_mov_b32_e32 v2, 2.0
; GFX942-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
; GFX942-NEXT: v_mov_b32_e32 v0, 0
@@ -5540,69 +5540,38 @@ define amdgpu_kernel void @test_mfma_f32_32x32x1f32_imm(ptr addrspace(1) %arg) #
;
; GFX942-VGPR-LABEL: test_mfma_f32_32x32x1f32_imm:
; GFX942-VGPR: ; %bb.0: ; %bb
-; GFX942-VGPR-NEXT: v_mov_b32_e32 v0, 1.0
-; GFX942-VGPR-NEXT: v_mov_b32_e32 v1, 0
-; GFX942-VGPR-NEXT: v_mov_b32_e32 v2, v1
-; GFX942-VGPR-NEXT: v_mov_b32_e32 v3, v1
-; GFX942-VGPR-NEXT: v_mov_b32_e32 v4, v1
-; GFX942-VGPR-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-VGPR-NEXT: v_mov_b32_e32 v6, v1
-; GFX942-VGPR-NEXT: v_mov_b32_e32 v7, v1
-; GFX942-VGPR-NEXT: v_mov_b32_e32 v8, v1
-; GFX942-VGPR-NEXT: v_mov_b32_e32 v9, v1
-; GFX942-VGPR-NEXT: v_mov_b32_e32 v10, v1
-; GFX942-VGPR-NEXT: v_mov_b32_e32 v11, v1
-; GFX942-VGPR-NEXT: v_mov_b32_e32 v12, v1
-; GFX942-VGPR-NEXT: v_mov_b32_e32 v13, v1
-; GFX942-VGPR-NEXT: v_mov_b32_e32 v14, v1
-; GFX942-VGPR-NEXT: v_mov_b32_e32 v15, v1
-; GFX942-VGPR-NEXT: v_mov_b32_e32 v16, v1
-; GFX942-VGPR-NEXT: v_mov_b32_e32 v17, v1
-; GFX942-VGPR-NEXT: v_mov_b32_e32 v18, v1
-; GFX942-VGPR-NEXT: v_mov_b32_e32 v19, v1
-; GFX942-VGPR-NEXT: v_mov_b32_e32 v20, v1
-; GFX942-VGPR-NEXT: v_mov_b32_e32 v21, v1
-; GFX942-VGPR-NEXT: v_mov_b32_e32 v22, v1
-; GFX942-VGPR-NEXT: v_mov_b32_e32 v23, v1
-; GFX942-VGPR-NEXT: v_mov_b32_e32 v24, v1
-; GFX942-VGPR-NEXT: v_mov_b32_e32 v25, v1
-; GFX942-VGPR-NEXT: v_mov_b32_e32 v26, v1
-; GFX942-VGPR-NEXT: v_mov_b32_e32 v27, v1
-; GFX942-VGPR-NEXT: v_mov_b32_e32 v28, v1
-; GFX942-VGPR-NEXT: v_mov_b32_e32 v29, v1
-; GFX942-VGPR-NEXT: v_mov_b32_e32 v30, v1
-; GFX942-VGPR-NEXT: v_mov_b32_e32 v31, v1
-; GFX942-VGPR-NEXT: v_mov_b64_e32 v[32:33], v[30:31]
-; GFX942-VGPR-NEXT: v_mov_b32_e32 v34, 2.0
-; GFX942-VGPR-NEXT: v_mov_b64_e32 v[30:31], v[28:29]
-; GFX942-VGPR-NEXT: v_mov_b64_e32 v[28:29], v[26:27]
-; GFX942-VGPR-NEXT: v_mov_b64_e32 v[26:27], v[24:25]
-; GFX942-VGPR-NEXT: v_mov_b64_e32 v[24:25], v[22:23]
-; GFX942-VGPR-NEXT: v_mov_b64_e32 v[22:23], v[20:21]
-; GFX942-VGPR-NEXT: v_mov_b64_e32 v[20:21], v[18:19]
-; GFX942-VGPR-NEXT: v_mov_b64_e32 v[18:19], v[16:17]
-; GFX942-VGPR-NEXT: v_mov_b64_e32 v[16:17], v[14:15]
-; GFX942-VGPR-NEXT: v_mov_b64_e32 v[14:15], v[12:13]
-; GFX942-VGPR-NEXT: v_mov_b64_e32 v[12:13], v[10:11]
-; GFX942-VGPR-NEXT: v_mov_b64_e32 v[10:11], v[8:9]
-; GFX942-VGPR-NEXT: v_mov_b64_e32 v[8:9], v[6:7]
-; GFX942-VGPR-NEXT: v_mov_b64_e32 v[6:7], v[4:5]
+; GFX942-VGPR-NEXT: v_mov_b32_e32 v33, 1.0
+; GFX942-VGPR-NEXT: v_mov_b64_e32 v[2:3], 0
+; GFX942-VGPR-NEXT: v_mov_b64_e32 v[0:1], 0x3f800000
; GFX942-VGPR-NEXT: v_mov_b64_e32 v[4:5], v[2:3]
-; GFX942-VGPR-NEXT: v_mov_b64_e32 v[2:3], v[0:1]
+; GFX942-VGPR-NEXT: v_mov_b64_e32 v[6:7], v[2:3]
+; GFX942-VGPR-NEXT: v_mov_b64_e32 v[8:9], v[2:3]
+; GFX942-VGPR-NEXT: v_mov_b64_e32 v[10:11], v[2:3]
+; GFX942-VGPR-NEXT: v_mov_b64_e32 v[12:13], v[2:3]
+; GFX942-VGPR-NEXT: v_mov_b64_e32 v[14:15], v[2:3]
+; GFX942-VGPR-NEXT: v_mov_b64_e32 v[16:17], v[2:3]
+; GFX942-VGPR-NEXT: v_mov_b64_e32 v[18:19], v[2:3]
+; GFX942-VGPR-NEXT: v_mov_b64_e32 v[20:21], v[2:3]
+; GFX942-VGPR-NEXT: v_mov_b64_e32 v[22:23], v[2:3]
+; GFX942-VGPR-NEXT: v_mov_b64_e32 v[24:25], v[2:3]
+; GFX942-VGPR-NEXT: v_mov_b64_e32 v[26:27], v[2:3]
+; GFX942-VGPR-NEXT: v_mov_b64_e32 v[28:29], v[2:3]
+; GFX942-VGPR-NEXT: v_mov_b64_e32 v[30:31], v[2:3]
+; GFX942-VGPR-NEXT: v_mov_b32_e32 v34, 2.0
; GFX942-VGPR-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX942-VGPR-NEXT: s_nop 0
-; GFX942-VGPR-NEXT: v_mfma_f32_32x32x1_2b_f32 v[2:33], v0, v34, v[2:33]
+; GFX942-VGPR-NEXT: v_mov_b32_e32 v32, 0
+; GFX942-VGPR-NEXT: v_mfma_f32_32x32x1_2b_f32 v[0:31], v33, v34, v[0:31]
; GFX942-VGPR-NEXT: s_waitcnt lgkmcnt(0)
; GFX942-VGPR-NEXT: s_nop 15
; GFX942-VGPR-NEXT: s_nop 0
-; GFX942-VGPR-NEXT: global_store_dwordx4 v1, v[30:33], s[0:1] offset:112
-; GFX942-VGPR-NEXT: global_store_dwordx4 v1, v[26:29], s[0:1] offset:96
-; GFX942-VGPR-NEXT: global_store_dwordx4 v1, v[22:25], s[0:1] offset:80
-; GFX942-VGPR-NEXT: global_store_dwordx4 v1, v[18:21], s[0:1] offset:64
-; GFX942-VGPR-NEXT: global_store_dwordx4 v1, v[14:17], s[0:1] offset:48
-; GFX942-VGPR-NEXT: global_store_dwordx4 v1, v[10:13], s[0:1] offset:32
-; GFX942-VGPR-NEXT: global_store_dwordx4 v1, v[6:9], s[0:1] offset:16
-; GFX942-VGPR-NEXT: global_store_dwordx4 v1, v[2:5], s[0:1]
+; GFX942-VGPR-NEXT: global_store_dwordx4 v32, v[28:31], s[0:1] offset:112
+; GFX942-VGPR-NEXT: global_store_dwordx4 v32, v[24:27], s[0:1] offset:96
+; GFX942-VGPR-NEXT: global_store_dwordx4 v32, v[20:23], s[0:1] offset:80
+; GFX942-VGPR-NEXT: global_store_dwordx4 v32, v[16:19], s[0:1] offset:64
+; GFX942-VGPR-NEXT: global_store_dwordx4 v32, v[12:15], s[0:1] offset:48
+; GFX942-VGPR-NEXT: global_store_dwordx4 v32, v[8:11], s[0:1] offset:32
+; GFX942-VGPR-NEXT: global_store_dwordx4 v32, v[4:7], s[0:1] offset:16
+; GFX942-VGPR-NEXT: global_store_dwordx4 v32, v[0:3], s[0:1]
; GFX942-VGPR-NEXT: s_endpgm
bb:
%mai.1 = tail call <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float 1.0, float 2.0, <32 x float> <float 1.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0>, i32 0, i32 0, i32 0)
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.maximum.f64.ll b/llvm/test/CodeGen/AMDGPU/llvm.maximum.f64.ll
index f971080e02c5b..090707eda3ca5 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.maximum.f64.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.maximum.f64.ll
@@ -561,17 +561,17 @@ define <2 x double> @v_maximum_v2f64(<2 x double> %src0, <2 x double> %src1) {
; GFX950-LABEL: v_maximum_v2f64:
; GFX950: ; %bb.0:
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-NEXT: v_max_f64 v[8:9], v[0:1], v[4:5]
-; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
-; GFX950-NEXT: v_max_f64 v[4:5], v[2:3], v[6:7]
-; GFX950-NEXT: s_nop 0
-; GFX950-NEXT: v_cndmask_b32_e64 v0, v8, 0, vcc
-; GFX950-NEXT: v_mov_b32_e32 v8, 0x7ff80000
-; GFX950-NEXT: v_cndmask_b32_e32 v1, v9, v8, vcc
+; GFX950-NEXT: v_max_f64 v[8:9], v[2:3], v[6:7]
+; GFX950-NEXT: v_mov_b32_e32 v10, 0x7ff80000
; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[6:7]
+; GFX950-NEXT: v_max_f64 v[6:7], v[0:1], v[4:5]
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e32 v3, v9, v10, vcc
+; GFX950-NEXT: v_cndmask_b32_e64 v2, v8, 0, vcc
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
; GFX950-NEXT: s_nop 1
-; GFX950-NEXT: v_cndmask_b32_e64 v2, v4, 0, vcc
-; GFX950-NEXT: v_cndmask_b32_e32 v3, v5, v8, vcc
+; GFX950-NEXT: v_cndmask_b32_e32 v1, v7, v10, vcc
+; GFX950-NEXT: v_cndmask_b32_e64 v0, v6, 0, vcc
; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximum_v2f64:
@@ -630,12 +630,19 @@ define <2 x double> @v_maximum_v2f64__nnan(<2 x double> %src0, <2 x double> %src
; GFX8-NEXT: v_max_f64 v[2:3], v[2:3], v[6:7]
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_maximum_v2f64__nnan:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f64 v[0:1], v[0:1], v[4:5]
-; GFX9-NEXT: v_max_f64 v[2:3], v[2:3], v[6:7]
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_maximum_v2f64__nnan:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_max_f64 v[0:1], v[0:1], v[4:5]
+; GFX900-NEXT: v_max_f64 v[2:3], v[2:3], v[6:7]
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_maximum_v2f64__nnan:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_max_f64 v[2:3], v[2:3], v[6:7]
+; GFX950-NEXT: v_max_f64 v[0:1], v[0:1], v[4:5]
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximum_v2f64__nnan:
; GFX10: ; %bb.0:
@@ -711,17 +718,17 @@ define <2 x double> @v_maximum_v2f64__nsz(<2 x double> %src0, <2 x double> %src1
; GFX950-LABEL: v_maximum_v2f64__nsz:
; GFX950: ; %bb.0:
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-NEXT: v_max_f64 v[8:9], v[0:1], v[4:5]
-; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
-; GFX950-NEXT: v_max_f64 v[4:5], v[2:3], v[6:7]
-; GFX950-NEXT: s_nop 0
-; GFX950-NEXT: v_cndmask_b32_e64 v0, v8, 0, vcc
-; GFX950-NEXT: v_mov_b32_e32 v8, 0x7ff80000
-; GFX950-NEXT: v_cndmask_b32_e32 v1, v9, v8, vcc
+; GFX950-NEXT: v_max_f64 v[8:9], v[2:3], v[6:7]
+; GFX950-NEXT: v_mov_b32_e32 v10, 0x7ff80000
; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[6:7]
+; GFX950-NEXT: v_max_f64 v[6:7], v[0:1], v[4:5]
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e32 v3, v9, v10, vcc
+; GFX950-NEXT: v_cndmask_b32_e64 v2, v8, 0, vcc
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
; GFX950-NEXT: s_nop 1
-; GFX950-NEXT: v_cndmask_b32_e64 v2, v4, 0, vcc
-; GFX950-NEXT: v_cndmask_b32_e32 v3, v5, v8, vcc
+; GFX950-NEXT: v_cndmask_b32_e32 v1, v7, v10, vcc
+; GFX950-NEXT: v_cndmask_b32_e64 v0, v6, 0, vcc
; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximum_v2f64__nsz:
@@ -780,12 +787,19 @@ define <2 x double> @v_maximum_v2f64__nnan_nsz(<2 x double> %src0, <2 x double>
; GFX8-NEXT: v_max_f64 v[2:3], v[2:3], v[6:7]
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_maximum_v2f64__nnan_nsz:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f64 v[0:1], v[0:1], v[4:5]
-; GFX9-NEXT: v_max_f64 v[2:3], v[2:3], v[6:7]
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_maximum_v2f64__nnan_nsz:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_max_f64 v[0:1], v[0:1], v[4:5]
+; GFX900-NEXT: v_max_f64 v[2:3], v[2:3], v[6:7]
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_maximum_v2f64__nnan_nsz:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_max_f64 v[2:3], v[2:3], v[6:7]
+; GFX950-NEXT: v_max_f64 v[0:1], v[0:1], v[4:5]
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximum_v2f64__nnan_nsz:
; GFX10: ; %bb.0:
@@ -1008,22 +1022,22 @@ define <3 x double> @v_maximum_v3f64(<3 x double> %src0, <3 x double> %src1) {
; GFX950-LABEL: v_maximum_v3f64:
; GFX950: ; %bb.0:
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-NEXT: v_max_f64 v[12:13], v[0:1], v[6:7]
-; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[6:7]
-; GFX950-NEXT: v_max_f64 v[6:7], v[2:3], v[8:9]
+; GFX950-NEXT: v_max_f64 v[12:13], v[4:5], v[10:11]
+; GFX950-NEXT: v_mov_b32_e32 v14, 0x7ff80000
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[4:5], v[10:11]
+; GFX950-NEXT: v_max_f64 v[10:11], v[2:3], v[8:9]
; GFX950-NEXT: s_nop 0
-; GFX950-NEXT: v_cndmask_b32_e64 v0, v12, 0, vcc
-; GFX950-NEXT: v_mov_b32_e32 v12, 0x7ff80000
-; GFX950-NEXT: v_cndmask_b32_e32 v1, v13, v12, vcc
+; GFX950-NEXT: v_cndmask_b32_e32 v5, v13, v14, vcc
+; GFX950-NEXT: v_cndmask_b32_e64 v4, v12, 0, vcc
; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[8:9]
+; GFX950-NEXT: v_max_f64 v[8:9], v[0:1], v[6:7]
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e32 v3, v11, v14, vcc
+; GFX950-NEXT: v_cndmask_b32_e64 v2, v10, 0, vcc
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[6:7]
; GFX950-NEXT: s_nop 1
-; GFX950-NEXT: v_cndmask_b32_e64 v2, v6, 0, vcc
-; GFX950-NEXT: v_cndmask_b32_e32 v3, v7, v12, vcc
-; GFX950-NEXT: v_max_f64 v[6:7], v[4:5], v[10:11]
-; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[4:5], v[10:11]
-; GFX950-NEXT: s_nop 1
-; GFX950-NEXT: v_cndmask_b32_e64 v4, v6, 0, vcc
-; GFX950-NEXT: v_cndmask_b32_e32 v5, v7, v12, vcc
+; GFX950-NEXT: v_cndmask_b32_e32 v1, v9, v14, vcc
+; GFX950-NEXT: v_cndmask_b32_e64 v0, v8, 0, vcc
; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximum_v3f64:
@@ -1092,13 +1106,21 @@ define <3 x double> @v_maximum_v3f64__nnan(<3 x double> %src0, <3 x double> %src
; GFX8-NEXT: v_max_f64 v[4:5], v[4:5], v[10:11]
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_maximum_v3f64__nnan:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f64 v[0:1], v[0:1], v[6:7]
-; GFX9-NEXT: v_max_f64 v[2:3], v[2:3], v[8:9]
-; GFX9-NEXT: v_max_f64 v[4:5], v[4:5], v[10:11]
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_maximum_v3f64__nnan:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_max_f64 v[0:1], v[0:1], v[6:7]
+; GFX900-NEXT: v_max_f64 v[2:3], v[2:3], v[8:9]
+; GFX900-NEXT: v_max_f64 v[4:5], v[4:5], v[10:11]
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_maximum_v3f64__nnan:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_max_f64 v[4:5], v[4:5], v[10:11]
+; GFX950-NEXT: v_max_f64 v[2:3], v[2:3], v[8:9]
+; GFX950-NEXT: v_max_f64 v[0:1], v[0:1], v[6:7]
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximum_v3f64__nnan:
; GFX10: ; %bb.0:
@@ -1189,22 +1211,22 @@ define <3 x double> @v_maximum_v3f64__nsz(<3 x double> %src0, <3 x double> %src1
; GFX950-LABEL: v_maximum_v3f64__nsz:
; GFX950: ; %bb.0:
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-NEXT: v_max_f64 v[12:13], v[0:1], v[6:7]
-; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[6:7]
-; GFX950-NEXT: v_max_f64 v[6:7], v[2:3], v[8:9]
+; GFX950-NEXT: v_max_f64 v[12:13], v[4:5], v[10:11]
+; GFX950-NEXT: v_mov_b32_e32 v14, 0x7ff80000
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[4:5], v[10:11]
+; GFX950-NEXT: v_max_f64 v[10:11], v[2:3], v[8:9]
; GFX950-NEXT: s_nop 0
-; GFX950-NEXT: v_cndmask_b32_e64 v0, v12, 0, vcc
-; GFX950-NEXT: v_mov_b32_e32 v12, 0x7ff80000
-; GFX950-NEXT: v_cndmask_b32_e32 v1, v13, v12, vcc
+; GFX950-NEXT: v_cndmask_b32_e32 v5, v13, v14, vcc
+; GFX950-NEXT: v_cndmask_b32_e64 v4, v12, 0, vcc
; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[8:9]
+; GFX950-NEXT: v_max_f64 v[8:9], v[0:1], v[6:7]
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e32 v3, v11, v14, vcc
+; GFX950-NEXT: v_cndmask_b32_e64 v2, v10, 0, vcc
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[6:7]
; GFX950-NEXT: s_nop 1
-; GFX950-NEXT: v_cndmask_b32_e64 v2, v6, 0, vcc
-; GFX950-NEXT: v_cndmask_b32_e32 v3, v7, v12, vcc
-; GFX950-NEXT: v_max_f64 v[6:7], v[4:5], v[10:11]
-; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[4:5], v[10:11]
-; GFX950-NEXT: s_nop 1
-; GFX950-NEXT: v_cndmask_b32_e64 v4, v6, 0, vcc
-; GFX950-NEXT: v_cndmask_b32_e32 v5, v7, v12, vcc
+; GFX950-NEXT: v_cndmask_b32_e32 v1, v9, v14, vcc
+; GFX950-NEXT: v_cndmask_b32_e64 v0, v8, 0, vcc
; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximum_v3f64__nsz:
@@ -1273,13 +1295,21 @@ define <3 x double> @v_maximum_v3f64__nnan_nsz(<3 x double> %src0, <3 x double>
; GFX8-NEXT: v_max_f64 v[4:5], v[4:5], v[10:11]
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_maximum_v3f64__nnan_nsz:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f64 v[0:1], v[0:1], v[6:7]
-; GFX9-NEXT: v_max_f64 v[2:3], v[2:3], v[8:9]
-; GFX9-NEXT: v_max_f64 v[4:5], v[4:5], v[10:11]
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_maximum_v3f64__nnan_nsz:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_max_f64 v[0:1], v[0:1], v[6:7]
+; GFX900-NEXT: v_max_f64 v[2:3], v[2:3], v[8:9]
+; GFX900-NEXT: v_max_f64 v[4:5], v[4:5], v[10:11]
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_maximum_v3f64__nnan_nsz:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_max_f64 v[4:5], v[4:5], v[10:11]
+; GFX950-NEXT: v_max_f64 v[2:3], v[2:3], v[8:9]
+; GFX950-NEXT: v_max_f64 v[0:1], v[0:1], v[6:7]
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximum_v3f64__nnan_nsz:
; GFX10: ; %bb.0:
@@ -1382,27 +1412,27 @@ define <4 x double> @v_maximum_v4f64(<4 x double> %src0, <4 x double> %src1) {
; GFX950-LABEL: v_maximum_v4f64:
; GFX950: ; %bb.0:
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-NEXT: v_max_f64 v[16:17], v[0:1], v[8:9]
-; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[8:9]
-; GFX950-NEXT: v_max_f64 v[8:9], v[2:3], v[10:11]
+; GFX950-NEXT: v_max_f64 v[16:17], v[6:7], v[14:15]
+; GFX950-NEXT: v_mov_b32_e32 v18, 0x7ff80000
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[6:7], v[14:15]
+; GFX950-NEXT: v_max_f64 v[14:15], v[4:5], v[12:13]
; GFX950-NEXT: s_nop 0
-; GFX950-NEXT: v_cndmask_b32_e64 v0, v16, 0, vcc
-; GFX950-NEXT: v_mov_b32_e32 v16, 0x7ff80000
-; GFX950-NEXT: v_cndmask_b32_e32 v1, v17, v16, vcc
-; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[10:11]
-; GFX950-NEXT: s_nop 1
-; GFX950-NEXT: v_cndmask_b32_e64 v2, v8, 0, vcc
-; GFX950-NEXT: v_cndmask_b32_e32 v3, v9, v16, vcc
-; GFX950-NEXT: v_max_f64 v[8:9], v[4:5], v[12:13]
+; GFX950-NEXT: v_cndmask_b32_e32 v7, v17, v18, vcc
+; GFX950-NEXT: v_cndmask_b32_e64 v6, v16, 0, vcc
; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[4:5], v[12:13]
+; GFX950-NEXT: v_max_f64 v[12:13], v[2:3], v[10:11]
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e32 v5, v15, v18, vcc
+; GFX950-NEXT: v_cndmask_b32_e64 v4, v14, 0, vcc
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[10:11]
+; GFX950-NEXT: v_max_f64 v[10:11], v[0:1], v[8:9]
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e32 v3, v13, v18, vcc
+; GFX950-NEXT: v_cndmask_b32_e64 v2, v12, 0, vcc
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[8:9]
; GFX950-NEXT: s_nop 1
-; GFX950-NEXT: v_cndmask_b32_e64 v4, v8, 0, vcc
-; GFX950-NEXT: v_cndmask_b32_e32 v5, v9, v16, vcc
-; GFX950-NEXT: v_max_f64 v[8:9], v[6:7], v[14:15]
-; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[6:7], v[14:15]
-; GFX950-NEXT: s_nop 1
-; GFX950-NEXT: v_cndmask_b32_e64 v6, v8, 0, vcc
-; GFX950-NEXT: v_cndmask_b32_e32 v7, v9, v16, vcc
+; GFX950-NEXT: v_cndmask_b32_e32 v1, v11, v18, vcc
+; GFX950-NEXT: v_cndmask_b32_e64 v0, v10, 0, vcc
; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximum_v4f64:
@@ -1482,14 +1512,23 @@ define <4 x double> @v_maximum_v4f64__nnan(<4 x double> %src0, <4 x double> %src
; GFX8-NEXT: v_max_f64 v[6:7], v[6:7], v[14:15]
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_maximum_v4f64__nnan:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f64 v[0:1], v[0:1], v[8:9]
-; GFX9-NEXT: v_max_f64 v[2:3], v[2:3], v[10:11]
-; GFX9-NEXT: v_max_f64 v[4:5], v[4:5], v[12:13]
-; GFX9-NEXT: v_max_f64 v[6:7], v[6:7], v[14:15]
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_maximum_v4f64__nnan:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_max_f64 v[0:1], v[0:1], v[8:9]
+; GFX900-NEXT: v_max_f64 v[2:3], v[2:3], v[10:11]
+; GFX900-NEXT: v_max_f64 v[4:5], v[4:5], v[12:13]
+; GFX900-NEXT: v_max_f64 v[6:7], v[6:7], v[14:15]
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_maximum_v4f64__nnan:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_max_f64 v[6:7], v[6:7], v[14:15]
+; GFX950-NEXT: v_max_f64 v[4:5], v[4:5], v[12:13]
+; GFX950-NEXT: v_max_f64 v[2:3], v[2:3], v[10:11]
+; GFX950-NEXT: v_max_f64 v[0:1], v[0:1], v[8:9]
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximum_v4f64__nnan:
; GFX10: ; %bb.0:
@@ -1595,27 +1634,27 @@ define <4 x double> @v_maximum_v4f64__nsz(<4 x double> %src0, <4 x double> %src1
; GFX950-LABEL: v_maximum_v4f64__nsz:
; GFX950: ; %bb.0:
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-NEXT: v_max_f64 v[16:17], v[0:1], v[8:9]
-; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[8:9]
-; GFX950-NEXT: v_max_f64 v[8:9], v[2:3], v[10:11]
+; GFX950-NEXT: v_max_f64 v[16:17], v[6:7], v[14:15]
+; GFX950-NEXT: v_mov_b32_e32 v18, 0x7ff80000
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[6:7], v[14:15]
+; GFX950-NEXT: v_max_f64 v[14:15], v[4:5], v[12:13]
; GFX950-NEXT: s_nop 0
-; GFX950-NEXT: v_cndmask_b32_e64 v0, v16, 0, vcc
-; GFX950-NEXT: v_mov_b32_e32 v16, 0x7ff80000
-; GFX950-NEXT: v_cndmask_b32_e32 v1, v17, v16, vcc
-; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[10:11]
-; GFX950-NEXT: s_nop 1
-; GFX950-NEXT: v_cndmask_b32_e64 v2, v8, 0, vcc
-; GFX950-NEXT: v_cndmask_b32_e32 v3, v9, v16, vcc
-; GFX950-NEXT: v_max_f64 v[8:9], v[4:5], v[12:13]
+; GFX950-NEXT: v_cndmask_b32_e32 v7, v17, v18, vcc
+; GFX950-NEXT: v_cndmask_b32_e64 v6, v16, 0, vcc
; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[4:5], v[12:13]
+; GFX950-NEXT: v_max_f64 v[12:13], v[2:3], v[10:11]
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e32 v5, v15, v18, vcc
+; GFX950-NEXT: v_cndmask_b32_e64 v4, v14, 0, vcc
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[10:11]
+; GFX950-NEXT: v_max_f64 v[10:11], v[0:1], v[8:9]
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e32 v3, v13, v18, vcc
+; GFX950-NEXT: v_cndmask_b32_e64 v2, v12, 0, vcc
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[8:9]
; GFX950-NEXT: s_nop 1
-; GFX950-NEXT: v_cndmask_b32_e64 v4, v8, 0, vcc
-; GFX950-NEXT: v_cndmask_b32_e32 v5, v9, v16, vcc
-; GFX950-NEXT: v_max_f64 v[8:9], v[6:7], v[14:15]
-; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[6:7], v[14:15]
-; GFX950-NEXT: s_nop 1
-; GFX950-NEXT: v_cndmask_b32_e64 v6, v8, 0, vcc
-; GFX950-NEXT: v_cndmask_b32_e32 v7, v9, v16, vcc
+; GFX950-NEXT: v_cndmask_b32_e32 v1, v11, v18, vcc
+; GFX950-NEXT: v_cndmask_b32_e64 v0, v10, 0, vcc
; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximum_v4f64__nsz:
@@ -1695,14 +1734,23 @@ define <4 x double> @v_maximum_v4f64__nnan_nsz(<4 x double> %src0, <4 x double>
; GFX8-NEXT: v_max_f64 v[6:7], v[6:7], v[14:15]
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_maximum_v4f64__nnan_nsz:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f64 v[0:1], v[0:1], v[8:9]
-; GFX9-NEXT: v_max_f64 v[2:3], v[2:3], v[10:11]
-; GFX9-NEXT: v_max_f64 v[4:5], v[4:5], v[12:13]
-; GFX9-NEXT: v_max_f64 v[6:7], v[6:7], v[14:15]
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_maximum_v4f64__nnan_nsz:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_max_f64 v[0:1], v[0:1], v[8:9]
+; GFX900-NEXT: v_max_f64 v[2:3], v[2:3], v[10:11]
+; GFX900-NEXT: v_max_f64 v[4:5], v[4:5], v[12:13]
+; GFX900-NEXT: v_max_f64 v[6:7], v[6:7], v[14:15]
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_maximum_v4f64__nnan_nsz:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_max_f64 v[6:7], v[6:7], v[14:15]
+; GFX950-NEXT: v_max_f64 v[4:5], v[4:5], v[12:13]
+; GFX950-NEXT: v_max_f64 v[2:3], v[2:3], v[10:11]
+; GFX950-NEXT: v_max_f64 v[0:1], v[0:1], v[8:9]
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximum_v4f64__nnan_nsz:
; GFX10: ; %bb.0:
@@ -1864,43 +1912,43 @@ define <8 x double> @v_maximum_v8f64(<8 x double> %src0, <8 x double> %src1) {
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX950-NEXT: scratch_load_dword v31, off, s32
; GFX950-NEXT: v_mov_b32_e32 v54, 0x7ff80000
-; GFX950-NEXT: v_max_f64 v[32:33], v[0:1], v[16:17]
-; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[16:17]
-; GFX950-NEXT: v_max_f64 v[34:35], v[2:3], v[18:19]
-; GFX950-NEXT: v_max_f64 v[36:37], v[4:5], v[20:21]
-; GFX950-NEXT: v_cndmask_b32_e64 v0, v32, 0, vcc
-; GFX950-NEXT: v_cndmask_b32_e32 v1, v33, v54, vcc
-; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[18:19]
+; GFX950-NEXT: v_max_f64 v[32:33], v[12:13], v[28:29]
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[12:13], v[28:29]
+; GFX950-NEXT: v_max_f64 v[34:35], v[10:11], v[26:27]
+; GFX950-NEXT: v_max_f64 v[36:37], v[8:9], v[24:25]
+; GFX950-NEXT: v_cndmask_b32_e32 v13, v33, v54, vcc
+; GFX950-NEXT: v_cndmask_b32_e64 v12, v32, 0, vcc
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[10:11], v[26:27]
; GFX950-NEXT: v_max_f64 v[38:39], v[6:7], v[22:23]
-; GFX950-NEXT: v_max_f64 v[48:49], v[8:9], v[24:25]
-; GFX950-NEXT: v_cndmask_b32_e64 v2, v34, 0, vcc
-; GFX950-NEXT: v_cndmask_b32_e32 v3, v35, v54, vcc
-; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[4:5], v[20:21]
-; GFX950-NEXT: v_max_f64 v[50:51], v[10:11], v[26:27]
-; GFX950-NEXT: v_max_f64 v[52:53], v[12:13], v[28:29]
-; GFX950-NEXT: v_cndmask_b32_e64 v4, v36, 0, vcc
-; GFX950-NEXT: v_cndmask_b32_e32 v5, v37, v54, vcc
-; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[6:7], v[22:23]
-; GFX950-NEXT: s_waitcnt vmcnt(0)
-; GFX950-NEXT: v_max_f64 v[16:17], v[14:15], v[30:31]
-; GFX950-NEXT: v_cndmask_b32_e64 v6, v38, 0, vcc
-; GFX950-NEXT: v_cndmask_b32_e32 v7, v39, v54, vcc
+; GFX950-NEXT: v_max_f64 v[48:49], v[4:5], v[20:21]
+; GFX950-NEXT: v_cndmask_b32_e32 v11, v35, v54, vcc
+; GFX950-NEXT: v_cndmask_b32_e64 v10, v34, 0, vcc
; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[8:9], v[24:25]
+; GFX950-NEXT: v_max_f64 v[50:51], v[2:3], v[18:19]
+; GFX950-NEXT: v_max_f64 v[52:53], v[0:1], v[16:17]
+; GFX950-NEXT: v_cndmask_b32_e32 v9, v37, v54, vcc
+; GFX950-NEXT: v_cndmask_b32_e64 v8, v36, 0, vcc
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[6:7], v[22:23]
; GFX950-NEXT: s_nop 1
-; GFX950-NEXT: v_cndmask_b32_e64 v8, v48, 0, vcc
-; GFX950-NEXT: v_cndmask_b32_e32 v9, v49, v54, vcc
-; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[10:11], v[26:27]
+; GFX950-NEXT: v_cndmask_b32_e32 v7, v39, v54, vcc
+; GFX950-NEXT: v_cndmask_b32_e64 v6, v38, 0, vcc
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[4:5], v[20:21]
; GFX950-NEXT: s_nop 1
-; GFX950-NEXT: v_cndmask_b32_e64 v10, v50, 0, vcc
-; GFX950-NEXT: v_cndmask_b32_e32 v11, v51, v54, vcc
-; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[12:13], v[28:29]
+; GFX950-NEXT: v_cndmask_b32_e32 v5, v49, v54, vcc
+; GFX950-NEXT: v_cndmask_b32_e64 v4, v48, 0, vcc
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[18:19]
; GFX950-NEXT: s_nop 1
-; GFX950-NEXT: v_cndmask_b32_e64 v12, v52, 0, vcc
-; GFX950-NEXT: v_cndmask_b32_e32 v13, v53, v54, vcc
+; GFX950-NEXT: v_cndmask_b32_e32 v3, v51, v54, vcc
+; GFX950-NEXT: v_cndmask_b32_e64 v2, v50, 0, vcc
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[16:17]
+; GFX950-NEXT: s_waitcnt vmcnt(0)
+; GFX950-NEXT: v_max_f64 v[16:17], v[14:15], v[30:31]
+; GFX950-NEXT: v_cndmask_b32_e32 v1, v53, v54, vcc
+; GFX950-NEXT: v_cndmask_b32_e64 v0, v52, 0, vcc
; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[14:15], v[30:31]
; GFX950-NEXT: s_nop 1
-; GFX950-NEXT: v_cndmask_b32_e64 v14, v16, 0, vcc
; GFX950-NEXT: v_cndmask_b32_e32 v15, v17, v54, vcc
+; GFX950-NEXT: v_cndmask_b32_e64 v14, v16, 0, vcc
; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximum_v8f64:
@@ -2371,152 +2419,144 @@ define <16 x double> @v_maximum_v16f64(<16 x double> %src0, <16 x double> %src1)
; GFX950-NEXT: v_accvgpr_write_b32 a3, v43 ; Reload Reuse
; GFX950-NEXT: v_accvgpr_write_b32 a4, v44 ; Reload Reuse
; GFX950-NEXT: v_accvgpr_write_b32 a5, v45 ; Reload Reuse
-; GFX950-NEXT: v_accvgpr_write_b32 a6, v46 ; Reload Reuse
-; GFX950-NEXT: v_accvgpr_write_b32 a7, v47 ; Reload Reuse
; GFX950-NEXT: v_accvgpr_write_b32 a8, v56 ; Reload Reuse
; GFX950-NEXT: v_accvgpr_write_b32 a9, v57 ; Reload Reuse
-; GFX950-NEXT: scratch_load_dword v33, off, s32 offset:8
-; GFX950-NEXT: scratch_load_dword v32, off, s32 offset:4
-; GFX950-NEXT: scratch_load_dword v37, off, s32 offset:16
-; GFX950-NEXT: scratch_load_dword v36, off, s32 offset:12
-; GFX950-NEXT: scratch_load_dword v39, off, s32 offset:24
-; GFX950-NEXT: scratch_load_dword v38, off, s32 offset:20
+; GFX950-NEXT: scratch_load_dword v41, off, s32 offset:120
+; GFX950-NEXT: scratch_load_dword v40, off, s32 offset:116
; GFX950-NEXT: scratch_load_dword v57, off, s32 offset:32
; GFX950-NEXT: scratch_load_dword v56, off, s32 offset:28
-; GFX950-NEXT: scratch_load_dword v47, off, s32 offset:40
-; GFX950-NEXT: scratch_load_dword v46, off, s32 offset:36
-; GFX950-NEXT: scratch_load_dword v45, off, s32 offset:48
-; GFX950-NEXT: scratch_load_dword v44, off, s32 offset:44
-; GFX950-NEXT: scratch_load_dword v43, off, s32 offset:56
-; GFX950-NEXT: scratch_load_dword v42, off, s32 offset:52
-; GFX950-NEXT: scratch_load_dword v41, off, s32 offset:64
-; GFX950-NEXT: scratch_load_dword v40, off, s32 offset:60
-; GFX950-NEXT: scratch_load_dword v55, off, s32 offset:72
-; GFX950-NEXT: scratch_load_dword v54, off, s32 offset:68
-; GFX950-NEXT: scratch_load_dword v53, off, s32 offset:80
-; GFX950-NEXT: scratch_load_dword v52, off, s32 offset:76
-; GFX950-NEXT: scratch_load_dword v51, off, s32 offset:88
-; GFX950-NEXT: scratch_load_dword v50, off, s32 offset:84
-; GFX950-NEXT: scratch_load_dword v49, off, s32 offset:96
-; GFX950-NEXT: scratch_load_dword v48, off, s32 offset:92
+; GFX950-NEXT: scratch_load_dword v45, off, s32 offset:24
+; GFX950-NEXT: scratch_load_dword v44, off, s32 offset:20
+; GFX950-NEXT: scratch_load_dword v43, off, s32 offset:16
+; GFX950-NEXT: scratch_load_dword v42, off, s32 offset:12
+; GFX950-NEXT: scratch_load_dword v53, off, s32 offset:8
+; GFX950-NEXT: scratch_load_dword v52, off, s32 offset:4
+; GFX950-NEXT: scratch_load_dword v49, off, s32 offset:112
; GFX950-NEXT: scratch_load_dword v31, off, s32
-; GFX950-NEXT: scratch_load_dword v35, off, s32 offset:104
-; GFX950-NEXT: scratch_load_dword v34, off, s32 offset:100
+; GFX950-NEXT: scratch_load_dword v55, off, s32 offset:128
+; GFX950-NEXT: scratch_load_dword v54, off, s32 offset:124
+; GFX950-NEXT: scratch_load_dword v48, off, s32 offset:108
+; GFX950-NEXT: scratch_load_dword v51, off, s32 offset:104
+; GFX950-NEXT: scratch_load_dword v50, off, s32 offset:100
+; GFX950-NEXT: scratch_load_dword v39, off, s32 offset:96
+; GFX950-NEXT: scratch_load_dword v38, off, s32 offset:92
+; GFX950-NEXT: scratch_load_dword v37, off, s32 offset:88
+; GFX950-NEXT: scratch_load_dword v36, off, s32 offset:84
+; GFX950-NEXT: scratch_load_dword v35, off, s32 offset:80
+; GFX950-NEXT: scratch_load_dword v34, off, s32 offset:76
+; GFX950-NEXT: scratch_load_dword v33, off, s32 offset:72
+; GFX950-NEXT: scratch_load_dword v32, off, s32 offset:68
+; GFX950-NEXT: v_accvgpr_write_b32 a6, v46 ; Reload Reuse
+; GFX950-NEXT: v_accvgpr_write_b32 a7, v47 ; Reload Reuse
; GFX950-NEXT: v_accvgpr_write_b32 a10, v58 ; Reload Reuse
; GFX950-NEXT: v_accvgpr_write_b32 a11, v59 ; Reload Reuse
; GFX950-NEXT: v_accvgpr_write_b32 a12, v60 ; Reload Reuse
-; GFX950-NEXT: v_accvgpr_write_b32 a13, v61 ; Reload Reuse
-; GFX950-NEXT: v_accvgpr_write_b32 a14, v62 ; Reload Reuse
-; GFX950-NEXT: v_accvgpr_write_b32 a15, v63 ; Reload Reuse
-; GFX950-NEXT: s_waitcnt vmcnt(25)
-; GFX950-NEXT: v_max_f64 v[58:59], v[0:1], v[32:33]
-; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[32:33]
-; GFX950-NEXT: scratch_load_dword v33, off, s32 offset:112
-; GFX950-NEXT: scratch_load_dword v32, off, s32 offset:108
-; GFX950-NEXT: s_waitcnt vmcnt(25)
-; GFX950-NEXT: v_max_f64 v[60:61], v[2:3], v[36:37]
-; GFX950-NEXT: v_cmp_u_f64_e64 s[0:1], v[2:3], v[36:37]
-; GFX950-NEXT: scratch_load_dword v37, off, s32 offset:120
-; GFX950-NEXT: scratch_load_dword v36, off, s32 offset:116
-; GFX950-NEXT: s_waitcnt vmcnt(25)
-; GFX950-NEXT: v_max_f64 v[62:63], v[4:5], v[38:39]
-; GFX950-NEXT: v_cmp_u_f64_e64 s[2:3], v[4:5], v[38:39]
-; GFX950-NEXT: scratch_load_dword v39, off, s32 offset:128
-; GFX950-NEXT: scratch_load_dword v38, off, s32 offset:124
-; GFX950-NEXT: v_mov_b32_e32 v2, 0x7ff80000
-; GFX950-NEXT: s_waitcnt vmcnt(25)
-; GFX950-NEXT: v_max_f64 v[0:1], v[6:7], v[56:57]
-; GFX950-NEXT: v_cmp_u_f64_e64 s[4:5], v[6:7], v[56:57]
+; GFX950-NEXT: v_mov_b32_e32 v60, 0x7ff80000
; GFX950-NEXT: s_waitcnt vmcnt(23)
-; GFX950-NEXT: v_max_f64 v[56:57], v[8:9], v[46:47]
-; GFX950-NEXT: v_cndmask_b32_e64 v58, v58, 0, vcc
-; GFX950-NEXT: v_cndmask_b32_e32 v59, v59, v2, vcc
-; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[8:9], v[46:47]
-; GFX950-NEXT: v_cndmask_b32_e64 v6, v0, 0, s[4:5]
-; GFX950-NEXT: v_cndmask_b32_e64 v7, v1, v2, s[4:5]
-; GFX950-NEXT: v_cndmask_b32_e64 v8, v56, 0, vcc
-; GFX950-NEXT: v_cndmask_b32_e32 v9, v57, v2, vcc
-; GFX950-NEXT: s_waitcnt vmcnt(21)
-; GFX950-NEXT: v_max_f64 v[0:1], v[10:11], v[44:45]
-; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[10:11], v[44:45]
-; GFX950-NEXT: v_cndmask_b32_e64 v60, v60, 0, s[0:1]
-; GFX950-NEXT: v_cndmask_b32_e64 v3, v61, v2, s[0:1]
-; GFX950-NEXT: v_cndmask_b32_e64 v10, v0, 0, vcc
-; GFX950-NEXT: v_cndmask_b32_e32 v11, v1, v2, vcc
+; GFX950-NEXT: v_max_f64 v[46:47], v[28:29], v[40:41]
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[28:29], v[40:41]
+; GFX950-NEXT: scratch_load_dword v41, off, s32 offset:64
+; GFX950-NEXT: scratch_load_dword v40, off, s32 offset:60
+; GFX950-NEXT: s_waitcnt vmcnt(23)
+; GFX950-NEXT: v_max_f64 v[58:59], v[6:7], v[56:57]
+; GFX950-NEXT: v_cmp_u_f64_e64 s[0:1], v[6:7], v[56:57]
+; GFX950-NEXT: scratch_load_dword v7, off, s32 offset:56
+; GFX950-NEXT: scratch_load_dword v6, off, s32 offset:52
+; GFX950-NEXT: s_waitcnt vmcnt(23)
+; GFX950-NEXT: v_max_f64 v[56:57], v[4:5], v[44:45]
+; GFX950-NEXT: v_cmp_u_f64_e64 s[2:3], v[4:5], v[44:45]
+; GFX950-NEXT: scratch_load_dword v5, off, s32 offset:48
+; GFX950-NEXT: scratch_load_dword v4, off, s32 offset:44
+; GFX950-NEXT: s_waitcnt vmcnt(23)
+; GFX950-NEXT: v_max_f64 v[44:45], v[2:3], v[42:43]
+; GFX950-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[42:43]
+; GFX950-NEXT: scratch_load_dword v3, off, s32 offset:40
+; GFX950-NEXT: scratch_load_dword v2, off, s32 offset:36
+; GFX950-NEXT: s_waitcnt vmcnt(23)
+; GFX950-NEXT: v_max_f64 v[42:43], v[0:1], v[52:53]
+; GFX950-NEXT: v_cmp_u_f64_e64 s[6:7], v[0:1], v[52:53]
; GFX950-NEXT: s_waitcnt vmcnt(19)
-; GFX950-NEXT: v_max_f64 v[0:1], v[12:13], v[42:43]
-; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[12:13], v[42:43]
-; GFX950-NEXT: v_cndmask_b32_e64 v4, v62, 0, s[2:3]
-; GFX950-NEXT: v_cndmask_b32_e64 v5, v63, v2, s[2:3]
-; GFX950-NEXT: v_cndmask_b32_e64 v12, v0, 0, vcc
-; GFX950-NEXT: v_cndmask_b32_e32 v13, v1, v2, vcc
-; GFX950-NEXT: s_waitcnt vmcnt(17)
-; GFX950-NEXT: v_max_f64 v[0:1], v[14:15], v[40:41]
-; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[14:15], v[40:41]
-; GFX950-NEXT: v_accvgpr_read_b32 v63, a15 ; Reload Reuse
-; GFX950-NEXT: v_accvgpr_read_b32 v62, a14 ; Reload Reuse
-; GFX950-NEXT: v_cndmask_b32_e64 v14, v0, 0, vcc
-; GFX950-NEXT: v_cndmask_b32_e32 v15, v1, v2, vcc
-; GFX950-NEXT: s_waitcnt vmcnt(15)
-; GFX950-NEXT: v_max_f64 v[0:1], v[16:17], v[54:55]
-; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[16:17], v[54:55]
-; GFX950-NEXT: v_accvgpr_read_b32 v61, a13 ; Reload Reuse
-; GFX950-NEXT: v_accvgpr_read_b32 v57, a9 ; Reload Reuse
-; GFX950-NEXT: v_cndmask_b32_e64 v16, v0, 0, vcc
-; GFX950-NEXT: v_cndmask_b32_e32 v17, v1, v2, vcc
-; GFX950-NEXT: s_waitcnt vmcnt(13)
-; GFX950-NEXT: v_max_f64 v[0:1], v[18:19], v[52:53]
-; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[18:19], v[52:53]
-; GFX950-NEXT: v_accvgpr_read_b32 v56, a8 ; Reload Reuse
+; GFX950-NEXT: v_max_f64 v[0:1], v[30:31], v[54:55]
+; GFX950-NEXT: s_waitcnt vmcnt(18)
+; GFX950-NEXT: v_max_f64 v[52:53], v[26:27], v[48:49]
+; GFX950-NEXT: v_cmp_u_f64_e64 s[8:9], v[30:31], v[54:55]
+; GFX950-NEXT: v_cndmask_b32_e32 v29, v47, v60, vcc
+; GFX950-NEXT: v_cndmask_b32_e64 v28, v46, 0, vcc
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[26:27], v[48:49]
+; GFX950-NEXT: v_cndmask_b32_e64 v31, v1, v60, s[8:9]
+; GFX950-NEXT: v_cndmask_b32_e64 v30, v0, 0, s[8:9]
+; GFX950-NEXT: v_cndmask_b32_e32 v27, v53, v60, vcc
+; GFX950-NEXT: v_cndmask_b32_e64 v26, v52, 0, vcc
+; GFX950-NEXT: s_waitcnt vmcnt(16)
+; GFX950-NEXT: v_max_f64 v[0:1], v[24:25], v[50:51]
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[24:25], v[50:51]
; GFX950-NEXT: v_accvgpr_read_b32 v47, a7 ; Reload Reuse
-; GFX950-NEXT: v_cndmask_b32_e64 v18, v0, 0, vcc
-; GFX950-NEXT: v_cndmask_b32_e32 v19, v1, v2, vcc
-; GFX950-NEXT: s_waitcnt vmcnt(11)
-; GFX950-NEXT: v_max_f64 v[0:1], v[20:21], v[50:51]
-; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[20:21], v[50:51]
; GFX950-NEXT: v_accvgpr_read_b32 v46, a6 ; Reload Reuse
-; GFX950-NEXT: v_accvgpr_read_b32 v45, a5 ; Reload Reuse
-; GFX950-NEXT: v_cndmask_b32_e64 v20, v0, 0, vcc
-; GFX950-NEXT: v_cndmask_b32_e32 v21, v1, v2, vcc
-; GFX950-NEXT: s_waitcnt vmcnt(9)
-; GFX950-NEXT: v_max_f64 v[0:1], v[22:23], v[48:49]
-; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[22:23], v[48:49]
-; GFX950-NEXT: v_accvgpr_read_b32 v44, a4 ; Reload Reuse
-; GFX950-NEXT: v_accvgpr_read_b32 v43, a3 ; Reload Reuse
+; GFX950-NEXT: v_cndmask_b32_e32 v25, v1, v60, vcc
+; GFX950-NEXT: v_cndmask_b32_e64 v24, v0, 0, vcc
+; GFX950-NEXT: s_waitcnt vmcnt(14)
+; GFX950-NEXT: v_max_f64 v[0:1], v[22:23], v[38:39]
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[22:23], v[38:39]
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v23, v1, v60, vcc
; GFX950-NEXT: v_cndmask_b32_e64 v22, v0, 0, vcc
-; GFX950-NEXT: v_cndmask_b32_e32 v23, v1, v2, vcc
+; GFX950-NEXT: s_waitcnt vmcnt(12)
+; GFX950-NEXT: v_max_f64 v[0:1], v[20:21], v[36:37]
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[20:21], v[36:37]
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v21, v1, v60, vcc
+; GFX950-NEXT: v_cndmask_b32_e64 v20, v0, 0, vcc
+; GFX950-NEXT: s_waitcnt vmcnt(10)
+; GFX950-NEXT: v_max_f64 v[0:1], v[18:19], v[34:35]
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[18:19], v[34:35]
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v19, v1, v60, vcc
+; GFX950-NEXT: v_cndmask_b32_e64 v18, v0, 0, vcc
+; GFX950-NEXT: s_waitcnt vmcnt(8)
+; GFX950-NEXT: v_max_f64 v[0:1], v[16:17], v[32:33]
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[16:17], v[32:33]
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v17, v1, v60, vcc
+; GFX950-NEXT: v_cndmask_b32_e64 v16, v0, 0, vcc
; GFX950-NEXT: s_waitcnt vmcnt(6)
-; GFX950-NEXT: v_max_f64 v[0:1], v[24:25], v[34:35]
-; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[24:25], v[34:35]
-; GFX950-NEXT: v_accvgpr_read_b32 v42, a2 ; Reload Reuse
+; GFX950-NEXT: v_max_f64 v[0:1], v[14:15], v[40:41]
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[14:15], v[40:41]
; GFX950-NEXT: v_accvgpr_read_b32 v41, a1 ; Reload Reuse
-; GFX950-NEXT: v_cndmask_b32_e64 v24, v0, 0, vcc
-; GFX950-NEXT: v_cndmask_b32_e32 v25, v1, v2, vcc
; GFX950-NEXT: v_accvgpr_read_b32 v40, a0 ; Reload Reuse
+; GFX950-NEXT: v_cndmask_b32_e32 v15, v1, v60, vcc
+; GFX950-NEXT: v_cndmask_b32_e64 v14, v0, 0, vcc
; GFX950-NEXT: s_waitcnt vmcnt(4)
-; GFX950-NEXT: v_max_f64 v[0:1], v[26:27], v[32:33]
-; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[26:27], v[32:33]
-; GFX950-NEXT: s_nop 1
-; GFX950-NEXT: v_cndmask_b32_e64 v26, v0, 0, vcc
-; GFX950-NEXT: v_cndmask_b32_e32 v27, v1, v2, vcc
+; GFX950-NEXT: v_max_f64 v[0:1], v[12:13], v[6:7]
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[12:13], v[6:7]
+; GFX950-NEXT: v_cndmask_b32_e64 v7, v59, v60, s[0:1]
+; GFX950-NEXT: v_cndmask_b32_e64 v6, v58, 0, s[0:1]
+; GFX950-NEXT: v_cndmask_b32_e32 v13, v1, v60, vcc
+; GFX950-NEXT: v_cndmask_b32_e64 v12, v0, 0, vcc
; GFX950-NEXT: s_waitcnt vmcnt(2)
-; GFX950-NEXT: v_max_f64 v[0:1], v[28:29], v[36:37]
-; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[28:29], v[36:37]
-; GFX950-NEXT: s_nop 1
-; GFX950-NEXT: v_cndmask_b32_e64 v28, v0, 0, vcc
-; GFX950-NEXT: v_cndmask_b32_e32 v29, v1, v2, vcc
+; GFX950-NEXT: v_max_f64 v[0:1], v[10:11], v[4:5]
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[10:11], v[4:5]
+; GFX950-NEXT: v_cndmask_b32_e64 v5, v57, v60, s[2:3]
+; GFX950-NEXT: v_cndmask_b32_e64 v4, v56, 0, s[2:3]
+; GFX950-NEXT: v_cndmask_b32_e32 v11, v1, v60, vcc
+; GFX950-NEXT: v_cndmask_b32_e64 v10, v0, 0, vcc
; GFX950-NEXT: s_waitcnt vmcnt(0)
-; GFX950-NEXT: v_max_f64 v[0:1], v[30:31], v[38:39]
-; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[30:31], v[38:39]
-; GFX950-NEXT: s_nop 1
-; GFX950-NEXT: v_cndmask_b32_e64 v30, v0, 0, vcc
-; GFX950-NEXT: v_cndmask_b32_e32 v31, v1, v2, vcc
-; GFX950-NEXT: v_mov_b32_e32 v0, v58
-; GFX950-NEXT: v_mov_b32_e32 v1, v59
-; GFX950-NEXT: v_mov_b32_e32 v2, v60
+; GFX950-NEXT: v_max_f64 v[0:1], v[8:9], v[2:3]
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[8:9], v[2:3]
+; GFX950-NEXT: v_cndmask_b32_e64 v3, v45, v60, s[4:5]
+; GFX950-NEXT: v_cndmask_b32_e64 v2, v44, 0, s[4:5]
+; GFX950-NEXT: v_cndmask_b32_e32 v9, v1, v60, vcc
+; GFX950-NEXT: v_cndmask_b32_e64 v8, v0, 0, vcc
+; GFX950-NEXT: v_cndmask_b32_e64 v1, v43, v60, s[6:7]
+; GFX950-NEXT: v_cndmask_b32_e64 v0, v42, 0, s[6:7]
; GFX950-NEXT: v_accvgpr_read_b32 v60, a12 ; Reload Reuse
; GFX950-NEXT: v_accvgpr_read_b32 v59, a11 ; Reload Reuse
; GFX950-NEXT: v_accvgpr_read_b32 v58, a10 ; Reload Reuse
+; GFX950-NEXT: v_accvgpr_read_b32 v57, a9 ; Reload Reuse
+; GFX950-NEXT: v_accvgpr_read_b32 v56, a8 ; Reload Reuse
+; GFX950-NEXT: v_accvgpr_read_b32 v45, a5 ; Reload Reuse
+; GFX950-NEXT: v_accvgpr_read_b32 v44, a4 ; Reload Reuse
+; GFX950-NEXT: v_accvgpr_read_b32 v43, a3 ; Reload Reuse
+; GFX950-NEXT: v_accvgpr_read_b32 v42, a2 ; Reload Reuse
; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximum_v16f64:
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.minimum.f64.ll b/llvm/test/CodeGen/AMDGPU/llvm.minimum.f64.ll
index dfd67873c3b86..b119dd425463b 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.minimum.f64.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.minimum.f64.ll
@@ -561,17 +561,17 @@ define <2 x double> @v_minimum_v2f64(<2 x double> %src0, <2 x double> %src1) {
; GFX950-LABEL: v_minimum_v2f64:
; GFX950: ; %bb.0:
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-NEXT: v_min_f64 v[8:9], v[0:1], v[4:5]
-; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
-; GFX950-NEXT: v_min_f64 v[4:5], v[2:3], v[6:7]
-; GFX950-NEXT: s_nop 0
-; GFX950-NEXT: v_cndmask_b32_e64 v0, v8, 0, vcc
-; GFX950-NEXT: v_mov_b32_e32 v8, 0x7ff80000
-; GFX950-NEXT: v_cndmask_b32_e32 v1, v9, v8, vcc
+; GFX950-NEXT: v_min_f64 v[8:9], v[2:3], v[6:7]
+; GFX950-NEXT: v_mov_b32_e32 v10, 0x7ff80000
; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[6:7]
+; GFX950-NEXT: v_min_f64 v[6:7], v[0:1], v[4:5]
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e32 v3, v9, v10, vcc
+; GFX950-NEXT: v_cndmask_b32_e64 v2, v8, 0, vcc
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
; GFX950-NEXT: s_nop 1
-; GFX950-NEXT: v_cndmask_b32_e64 v2, v4, 0, vcc
-; GFX950-NEXT: v_cndmask_b32_e32 v3, v5, v8, vcc
+; GFX950-NEXT: v_cndmask_b32_e32 v1, v7, v10, vcc
+; GFX950-NEXT: v_cndmask_b32_e64 v0, v6, 0, vcc
; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimum_v2f64:
@@ -630,12 +630,19 @@ define <2 x double> @v_minimum_v2f64__nnan(<2 x double> %src0, <2 x double> %src
; GFX8-NEXT: v_min_f64 v[2:3], v[2:3], v[6:7]
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_minimum_v2f64__nnan:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_min_f64 v[0:1], v[0:1], v[4:5]
-; GFX9-NEXT: v_min_f64 v[2:3], v[2:3], v[6:7]
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_minimum_v2f64__nnan:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_min_f64 v[0:1], v[0:1], v[4:5]
+; GFX900-NEXT: v_min_f64 v[2:3], v[2:3], v[6:7]
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_minimum_v2f64__nnan:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_min_f64 v[2:3], v[2:3], v[6:7]
+; GFX950-NEXT: v_min_f64 v[0:1], v[0:1], v[4:5]
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimum_v2f64__nnan:
; GFX10: ; %bb.0:
@@ -711,17 +718,17 @@ define <2 x double> @v_minimum_v2f64__nsz(<2 x double> %src0, <2 x double> %src1
; GFX950-LABEL: v_minimum_v2f64__nsz:
; GFX950: ; %bb.0:
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-NEXT: v_min_f64 v[8:9], v[0:1], v[4:5]
-; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
-; GFX950-NEXT: v_min_f64 v[4:5], v[2:3], v[6:7]
-; GFX950-NEXT: s_nop 0
-; GFX950-NEXT: v_cndmask_b32_e64 v0, v8, 0, vcc
-; GFX950-NEXT: v_mov_b32_e32 v8, 0x7ff80000
-; GFX950-NEXT: v_cndmask_b32_e32 v1, v9, v8, vcc
+; GFX950-NEXT: v_min_f64 v[8:9], v[2:3], v[6:7]
+; GFX950-NEXT: v_mov_b32_e32 v10, 0x7ff80000
; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[6:7]
+; GFX950-NEXT: v_min_f64 v[6:7], v[0:1], v[4:5]
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e32 v3, v9, v10, vcc
+; GFX950-NEXT: v_cndmask_b32_e64 v2, v8, 0, vcc
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
; GFX950-NEXT: s_nop 1
-; GFX950-NEXT: v_cndmask_b32_e64 v2, v4, 0, vcc
-; GFX950-NEXT: v_cndmask_b32_e32 v3, v5, v8, vcc
+; GFX950-NEXT: v_cndmask_b32_e32 v1, v7, v10, vcc
+; GFX950-NEXT: v_cndmask_b32_e64 v0, v6, 0, vcc
; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimum_v2f64__nsz:
@@ -780,12 +787,19 @@ define <2 x double> @v_minimum_v2f64__nnan_nsz(<2 x double> %src0, <2 x double>
; GFX8-NEXT: v_min_f64 v[2:3], v[2:3], v[6:7]
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_minimum_v2f64__nnan_nsz:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_min_f64 v[0:1], v[0:1], v[4:5]
-; GFX9-NEXT: v_min_f64 v[2:3], v[2:3], v[6:7]
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_minimum_v2f64__nnan_nsz:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_min_f64 v[0:1], v[0:1], v[4:5]
+; GFX900-NEXT: v_min_f64 v[2:3], v[2:3], v[6:7]
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_minimum_v2f64__nnan_nsz:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_min_f64 v[2:3], v[2:3], v[6:7]
+; GFX950-NEXT: v_min_f64 v[0:1], v[0:1], v[4:5]
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimum_v2f64__nnan_nsz:
; GFX10: ; %bb.0:
@@ -1008,22 +1022,22 @@ define <3 x double> @v_minimum_v3f64(<3 x double> %src0, <3 x double> %src1) {
; GFX950-LABEL: v_minimum_v3f64:
; GFX950: ; %bb.0:
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-NEXT: v_min_f64 v[12:13], v[0:1], v[6:7]
-; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[6:7]
-; GFX950-NEXT: v_min_f64 v[6:7], v[2:3], v[8:9]
+; GFX950-NEXT: v_min_f64 v[12:13], v[4:5], v[10:11]
+; GFX950-NEXT: v_mov_b32_e32 v14, 0x7ff80000
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[4:5], v[10:11]
+; GFX950-NEXT: v_min_f64 v[10:11], v[2:3], v[8:9]
; GFX950-NEXT: s_nop 0
-; GFX950-NEXT: v_cndmask_b32_e64 v0, v12, 0, vcc
-; GFX950-NEXT: v_mov_b32_e32 v12, 0x7ff80000
-; GFX950-NEXT: v_cndmask_b32_e32 v1, v13, v12, vcc
+; GFX950-NEXT: v_cndmask_b32_e32 v5, v13, v14, vcc
+; GFX950-NEXT: v_cndmask_b32_e64 v4, v12, 0, vcc
; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[8:9]
+; GFX950-NEXT: v_min_f64 v[8:9], v[0:1], v[6:7]
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e32 v3, v11, v14, vcc
+; GFX950-NEXT: v_cndmask_b32_e64 v2, v10, 0, vcc
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[6:7]
; GFX950-NEXT: s_nop 1
-; GFX950-NEXT: v_cndmask_b32_e64 v2, v6, 0, vcc
-; GFX950-NEXT: v_cndmask_b32_e32 v3, v7, v12, vcc
-; GFX950-NEXT: v_min_f64 v[6:7], v[4:5], v[10:11]
-; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[4:5], v[10:11]
-; GFX950-NEXT: s_nop 1
-; GFX950-NEXT: v_cndmask_b32_e64 v4, v6, 0, vcc
-; GFX950-NEXT: v_cndmask_b32_e32 v5, v7, v12, vcc
+; GFX950-NEXT: v_cndmask_b32_e32 v1, v9, v14, vcc
+; GFX950-NEXT: v_cndmask_b32_e64 v0, v8, 0, vcc
; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimum_v3f64:
@@ -1092,13 +1106,21 @@ define <3 x double> @v_minimum_v3f64__nnan(<3 x double> %src0, <3 x double> %src
; GFX8-NEXT: v_min_f64 v[4:5], v[4:5], v[10:11]
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_minimum_v3f64__nnan:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_min_f64 v[0:1], v[0:1], v[6:7]
-; GFX9-NEXT: v_min_f64 v[2:3], v[2:3], v[8:9]
-; GFX9-NEXT: v_min_f64 v[4:5], v[4:5], v[10:11]
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_minimum_v3f64__nnan:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_min_f64 v[0:1], v[0:1], v[6:7]
+; GFX900-NEXT: v_min_f64 v[2:3], v[2:3], v[8:9]
+; GFX900-NEXT: v_min_f64 v[4:5], v[4:5], v[10:11]
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_minimum_v3f64__nnan:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_min_f64 v[4:5], v[4:5], v[10:11]
+; GFX950-NEXT: v_min_f64 v[2:3], v[2:3], v[8:9]
+; GFX950-NEXT: v_min_f64 v[0:1], v[0:1], v[6:7]
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimum_v3f64__nnan:
; GFX10: ; %bb.0:
@@ -1189,22 +1211,22 @@ define <3 x double> @v_minimum_v3f64__nsz(<3 x double> %src0, <3 x double> %src1
; GFX950-LABEL: v_minimum_v3f64__nsz:
; GFX950: ; %bb.0:
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-NEXT: v_min_f64 v[12:13], v[0:1], v[6:7]
-; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[6:7]
-; GFX950-NEXT: v_min_f64 v[6:7], v[2:3], v[8:9]
+; GFX950-NEXT: v_min_f64 v[12:13], v[4:5], v[10:11]
+; GFX950-NEXT: v_mov_b32_e32 v14, 0x7ff80000
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[4:5], v[10:11]
+; GFX950-NEXT: v_min_f64 v[10:11], v[2:3], v[8:9]
; GFX950-NEXT: s_nop 0
-; GFX950-NEXT: v_cndmask_b32_e64 v0, v12, 0, vcc
-; GFX950-NEXT: v_mov_b32_e32 v12, 0x7ff80000
-; GFX950-NEXT: v_cndmask_b32_e32 v1, v13, v12, vcc
+; GFX950-NEXT: v_cndmask_b32_e32 v5, v13, v14, vcc
+; GFX950-NEXT: v_cndmask_b32_e64 v4, v12, 0, vcc
; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[8:9]
+; GFX950-NEXT: v_min_f64 v[8:9], v[0:1], v[6:7]
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e32 v3, v11, v14, vcc
+; GFX950-NEXT: v_cndmask_b32_e64 v2, v10, 0, vcc
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[6:7]
; GFX950-NEXT: s_nop 1
-; GFX950-NEXT: v_cndmask_b32_e64 v2, v6, 0, vcc
-; GFX950-NEXT: v_cndmask_b32_e32 v3, v7, v12, vcc
-; GFX950-NEXT: v_min_f64 v[6:7], v[4:5], v[10:11]
-; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[4:5], v[10:11]
-; GFX950-NEXT: s_nop 1
-; GFX950-NEXT: v_cndmask_b32_e64 v4, v6, 0, vcc
-; GFX950-NEXT: v_cndmask_b32_e32 v5, v7, v12, vcc
+; GFX950-NEXT: v_cndmask_b32_e32 v1, v9, v14, vcc
+; GFX950-NEXT: v_cndmask_b32_e64 v0, v8, 0, vcc
; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimum_v3f64__nsz:
@@ -1273,13 +1295,21 @@ define <3 x double> @v_minimum_v3f64__nnan_nsz(<3 x double> %src0, <3 x double>
; GFX8-NEXT: v_min_f64 v[4:5], v[4:5], v[10:11]
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_minimum_v3f64__nnan_nsz:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_min_f64 v[0:1], v[0:1], v[6:7]
-; GFX9-NEXT: v_min_f64 v[2:3], v[2:3], v[8:9]
-; GFX9-NEXT: v_min_f64 v[4:5], v[4:5], v[10:11]
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_minimum_v3f64__nnan_nsz:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_min_f64 v[0:1], v[0:1], v[6:7]
+; GFX900-NEXT: v_min_f64 v[2:3], v[2:3], v[8:9]
+; GFX900-NEXT: v_min_f64 v[4:5], v[4:5], v[10:11]
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_minimum_v3f64__nnan_nsz:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_min_f64 v[4:5], v[4:5], v[10:11]
+; GFX950-NEXT: v_min_f64 v[2:3], v[2:3], v[8:9]
+; GFX950-NEXT: v_min_f64 v[0:1], v[0:1], v[6:7]
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimum_v3f64__nnan_nsz:
; GFX10: ; %bb.0:
@@ -1382,27 +1412,27 @@ define <4 x double> @v_minimum_v4f64(<4 x double> %src0, <4 x double> %src1) {
; GFX950-LABEL: v_minimum_v4f64:
; GFX950: ; %bb.0:
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-NEXT: v_min_f64 v[16:17], v[0:1], v[8:9]
-; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[8:9]
-; GFX950-NEXT: v_min_f64 v[8:9], v[2:3], v[10:11]
+; GFX950-NEXT: v_min_f64 v[16:17], v[6:7], v[14:15]
+; GFX950-NEXT: v_mov_b32_e32 v18, 0x7ff80000
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[6:7], v[14:15]
+; GFX950-NEXT: v_min_f64 v[14:15], v[4:5], v[12:13]
; GFX950-NEXT: s_nop 0
-; GFX950-NEXT: v_cndmask_b32_e64 v0, v16, 0, vcc
-; GFX950-NEXT: v_mov_b32_e32 v16, 0x7ff80000
-; GFX950-NEXT: v_cndmask_b32_e32 v1, v17, v16, vcc
-; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[10:11]
-; GFX950-NEXT: s_nop 1
-; GFX950-NEXT: v_cndmask_b32_e64 v2, v8, 0, vcc
-; GFX950-NEXT: v_cndmask_b32_e32 v3, v9, v16, vcc
-; GFX950-NEXT: v_min_f64 v[8:9], v[4:5], v[12:13]
+; GFX950-NEXT: v_cndmask_b32_e32 v7, v17, v18, vcc
+; GFX950-NEXT: v_cndmask_b32_e64 v6, v16, 0, vcc
; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[4:5], v[12:13]
+; GFX950-NEXT: v_min_f64 v[12:13], v[2:3], v[10:11]
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e32 v5, v15, v18, vcc
+; GFX950-NEXT: v_cndmask_b32_e64 v4, v14, 0, vcc
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[10:11]
+; GFX950-NEXT: v_min_f64 v[10:11], v[0:1], v[8:9]
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e32 v3, v13, v18, vcc
+; GFX950-NEXT: v_cndmask_b32_e64 v2, v12, 0, vcc
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[8:9]
; GFX950-NEXT: s_nop 1
-; GFX950-NEXT: v_cndmask_b32_e64 v4, v8, 0, vcc
-; GFX950-NEXT: v_cndmask_b32_e32 v5, v9, v16, vcc
-; GFX950-NEXT: v_min_f64 v[8:9], v[6:7], v[14:15]
-; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[6:7], v[14:15]
-; GFX950-NEXT: s_nop 1
-; GFX950-NEXT: v_cndmask_b32_e64 v6, v8, 0, vcc
-; GFX950-NEXT: v_cndmask_b32_e32 v7, v9, v16, vcc
+; GFX950-NEXT: v_cndmask_b32_e32 v1, v11, v18, vcc
+; GFX950-NEXT: v_cndmask_b32_e64 v0, v10, 0, vcc
; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimum_v4f64:
@@ -1482,14 +1512,23 @@ define <4 x double> @v_minimum_v4f64__nnan(<4 x double> %src0, <4 x double> %src
; GFX8-NEXT: v_min_f64 v[6:7], v[6:7], v[14:15]
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_minimum_v4f64__nnan:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_min_f64 v[0:1], v[0:1], v[8:9]
-; GFX9-NEXT: v_min_f64 v[2:3], v[2:3], v[10:11]
-; GFX9-NEXT: v_min_f64 v[4:5], v[4:5], v[12:13]
-; GFX9-NEXT: v_min_f64 v[6:7], v[6:7], v[14:15]
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_minimum_v4f64__nnan:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_min_f64 v[0:1], v[0:1], v[8:9]
+; GFX900-NEXT: v_min_f64 v[2:3], v[2:3], v[10:11]
+; GFX900-NEXT: v_min_f64 v[4:5], v[4:5], v[12:13]
+; GFX900-NEXT: v_min_f64 v[6:7], v[6:7], v[14:15]
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_minimum_v4f64__nnan:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_min_f64 v[6:7], v[6:7], v[14:15]
+; GFX950-NEXT: v_min_f64 v[4:5], v[4:5], v[12:13]
+; GFX950-NEXT: v_min_f64 v[2:3], v[2:3], v[10:11]
+; GFX950-NEXT: v_min_f64 v[0:1], v[0:1], v[8:9]
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimum_v4f64__nnan:
; GFX10: ; %bb.0:
@@ -1595,27 +1634,27 @@ define <4 x double> @v_minimum_v4f64__nsz(<4 x double> %src0, <4 x double> %src1
; GFX950-LABEL: v_minimum_v4f64__nsz:
; GFX950: ; %bb.0:
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-NEXT: v_min_f64 v[16:17], v[0:1], v[8:9]
-; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[8:9]
-; GFX950-NEXT: v_min_f64 v[8:9], v[2:3], v[10:11]
+; GFX950-NEXT: v_min_f64 v[16:17], v[6:7], v[14:15]
+; GFX950-NEXT: v_mov_b32_e32 v18, 0x7ff80000
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[6:7], v[14:15]
+; GFX950-NEXT: v_min_f64 v[14:15], v[4:5], v[12:13]
; GFX950-NEXT: s_nop 0
-; GFX950-NEXT: v_cndmask_b32_e64 v0, v16, 0, vcc
-; GFX950-NEXT: v_mov_b32_e32 v16, 0x7ff80000
-; GFX950-NEXT: v_cndmask_b32_e32 v1, v17, v16, vcc
-; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[10:11]
-; GFX950-NEXT: s_nop 1
-; GFX950-NEXT: v_cndmask_b32_e64 v2, v8, 0, vcc
-; GFX950-NEXT: v_cndmask_b32_e32 v3, v9, v16, vcc
-; GFX950-NEXT: v_min_f64 v[8:9], v[4:5], v[12:13]
+; GFX950-NEXT: v_cndmask_b32_e32 v7, v17, v18, vcc
+; GFX950-NEXT: v_cndmask_b32_e64 v6, v16, 0, vcc
; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[4:5], v[12:13]
+; GFX950-NEXT: v_min_f64 v[12:13], v[2:3], v[10:11]
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e32 v5, v15, v18, vcc
+; GFX950-NEXT: v_cndmask_b32_e64 v4, v14, 0, vcc
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[10:11]
+; GFX950-NEXT: v_min_f64 v[10:11], v[0:1], v[8:9]
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e32 v3, v13, v18, vcc
+; GFX950-NEXT: v_cndmask_b32_e64 v2, v12, 0, vcc
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[8:9]
; GFX950-NEXT: s_nop 1
-; GFX950-NEXT: v_cndmask_b32_e64 v4, v8, 0, vcc
-; GFX950-NEXT: v_cndmask_b32_e32 v5, v9, v16, vcc
-; GFX950-NEXT: v_min_f64 v[8:9], v[6:7], v[14:15]
-; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[6:7], v[14:15]
-; GFX950-NEXT: s_nop 1
-; GFX950-NEXT: v_cndmask_b32_e64 v6, v8, 0, vcc
-; GFX950-NEXT: v_cndmask_b32_e32 v7, v9, v16, vcc
+; GFX950-NEXT: v_cndmask_b32_e32 v1, v11, v18, vcc
+; GFX950-NEXT: v_cndmask_b32_e64 v0, v10, 0, vcc
; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimum_v4f64__nsz:
@@ -1695,14 +1734,23 @@ define <4 x double> @v_minimum_v4f64__nnan_nsz(<4 x double> %src0, <4 x double>
; GFX8-NEXT: v_min_f64 v[6:7], v[6:7], v[14:15]
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_minimum_v4f64__nnan_nsz:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_min_f64 v[0:1], v[0:1], v[8:9]
-; GFX9-NEXT: v_min_f64 v[2:3], v[2:3], v[10:11]
-; GFX9-NEXT: v_min_f64 v[4:5], v[4:5], v[12:13]
-; GFX9-NEXT: v_min_f64 v[6:7], v[6:7], v[14:15]
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_minimum_v4f64__nnan_nsz:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_min_f64 v[0:1], v[0:1], v[8:9]
+; GFX900-NEXT: v_min_f64 v[2:3], v[2:3], v[10:11]
+; GFX900-NEXT: v_min_f64 v[4:5], v[4:5], v[12:13]
+; GFX900-NEXT: v_min_f64 v[6:7], v[6:7], v[14:15]
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_minimum_v4f64__nnan_nsz:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_min_f64 v[6:7], v[6:7], v[14:15]
+; GFX950-NEXT: v_min_f64 v[4:5], v[4:5], v[12:13]
+; GFX950-NEXT: v_min_f64 v[2:3], v[2:3], v[10:11]
+; GFX950-NEXT: v_min_f64 v[0:1], v[0:1], v[8:9]
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimum_v4f64__nnan_nsz:
; GFX10: ; %bb.0:
@@ -1864,43 +1912,43 @@ define <8 x double> @v_minimum_v8f64(<8 x double> %src0, <8 x double> %src1) {
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX950-NEXT: scratch_load_dword v31, off, s32
; GFX950-NEXT: v_mov_b32_e32 v54, 0x7ff80000
-; GFX950-NEXT: v_min_f64 v[32:33], v[0:1], v[16:17]
-; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[16:17]
-; GFX950-NEXT: v_min_f64 v[34:35], v[2:3], v[18:19]
-; GFX950-NEXT: v_min_f64 v[36:37], v[4:5], v[20:21]
-; GFX950-NEXT: v_cndmask_b32_e64 v0, v32, 0, vcc
-; GFX950-NEXT: v_cndmask_b32_e32 v1, v33, v54, vcc
-; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[18:19]
+; GFX950-NEXT: v_min_f64 v[32:33], v[12:13], v[28:29]
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[12:13], v[28:29]
+; GFX950-NEXT: v_min_f64 v[34:35], v[10:11], v[26:27]
+; GFX950-NEXT: v_min_f64 v[36:37], v[8:9], v[24:25]
+; GFX950-NEXT: v_cndmask_b32_e32 v13, v33, v54, vcc
+; GFX950-NEXT: v_cndmask_b32_e64 v12, v32, 0, vcc
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[10:11], v[26:27]
; GFX950-NEXT: v_min_f64 v[38:39], v[6:7], v[22:23]
-; GFX950-NEXT: v_min_f64 v[48:49], v[8:9], v[24:25]
-; GFX950-NEXT: v_cndmask_b32_e64 v2, v34, 0, vcc
-; GFX950-NEXT: v_cndmask_b32_e32 v3, v35, v54, vcc
-; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[4:5], v[20:21]
-; GFX950-NEXT: v_min_f64 v[50:51], v[10:11], v[26:27]
-; GFX950-NEXT: v_min_f64 v[52:53], v[12:13], v[28:29]
-; GFX950-NEXT: v_cndmask_b32_e64 v4, v36, 0, vcc
-; GFX950-NEXT: v_cndmask_b32_e32 v5, v37, v54, vcc
-; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[6:7], v[22:23]
-; GFX950-NEXT: s_waitcnt vmcnt(0)
-; GFX950-NEXT: v_min_f64 v[16:17], v[14:15], v[30:31]
-; GFX950-NEXT: v_cndmask_b32_e64 v6, v38, 0, vcc
-; GFX950-NEXT: v_cndmask_b32_e32 v7, v39, v54, vcc
+; GFX950-NEXT: v_min_f64 v[48:49], v[4:5], v[20:21]
+; GFX950-NEXT: v_cndmask_b32_e32 v11, v35, v54, vcc
+; GFX950-NEXT: v_cndmask_b32_e64 v10, v34, 0, vcc
; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[8:9], v[24:25]
+; GFX950-NEXT: v_min_f64 v[50:51], v[2:3], v[18:19]
+; GFX950-NEXT: v_min_f64 v[52:53], v[0:1], v[16:17]
+; GFX950-NEXT: v_cndmask_b32_e32 v9, v37, v54, vcc
+; GFX950-NEXT: v_cndmask_b32_e64 v8, v36, 0, vcc
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[6:7], v[22:23]
; GFX950-NEXT: s_nop 1
-; GFX950-NEXT: v_cndmask_b32_e64 v8, v48, 0, vcc
-; GFX950-NEXT: v_cndmask_b32_e32 v9, v49, v54, vcc
-; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[10:11], v[26:27]
+; GFX950-NEXT: v_cndmask_b32_e32 v7, v39, v54, vcc
+; GFX950-NEXT: v_cndmask_b32_e64 v6, v38, 0, vcc
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[4:5], v[20:21]
; GFX950-NEXT: s_nop 1
-; GFX950-NEXT: v_cndmask_b32_e64 v10, v50, 0, vcc
-; GFX950-NEXT: v_cndmask_b32_e32 v11, v51, v54, vcc
-; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[12:13], v[28:29]
+; GFX950-NEXT: v_cndmask_b32_e32 v5, v49, v54, vcc
+; GFX950-NEXT: v_cndmask_b32_e64 v4, v48, 0, vcc
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[18:19]
; GFX950-NEXT: s_nop 1
-; GFX950-NEXT: v_cndmask_b32_e64 v12, v52, 0, vcc
-; GFX950-NEXT: v_cndmask_b32_e32 v13, v53, v54, vcc
+; GFX950-NEXT: v_cndmask_b32_e32 v3, v51, v54, vcc
+; GFX950-NEXT: v_cndmask_b32_e64 v2, v50, 0, vcc
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[16:17]
+; GFX950-NEXT: s_waitcnt vmcnt(0)
+; GFX950-NEXT: v_min_f64 v[16:17], v[14:15], v[30:31]
+; GFX950-NEXT: v_cndmask_b32_e32 v1, v53, v54, vcc
+; GFX950-NEXT: v_cndmask_b32_e64 v0, v52, 0, vcc
; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[14:15], v[30:31]
; GFX950-NEXT: s_nop 1
-; GFX950-NEXT: v_cndmask_b32_e64 v14, v16, 0, vcc
; GFX950-NEXT: v_cndmask_b32_e32 v15, v17, v54, vcc
+; GFX950-NEXT: v_cndmask_b32_e64 v14, v16, 0, vcc
; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimum_v8f64:
@@ -2371,152 +2419,144 @@ define <16 x double> @v_minimum_v16f64(<16 x double> %src0, <16 x double> %src1)
; GFX950-NEXT: v_accvgpr_write_b32 a3, v43 ; Reload Reuse
; GFX950-NEXT: v_accvgpr_write_b32 a4, v44 ; Reload Reuse
; GFX950-NEXT: v_accvgpr_write_b32 a5, v45 ; Reload Reuse
-; GFX950-NEXT: v_accvgpr_write_b32 a6, v46 ; Reload Reuse
-; GFX950-NEXT: v_accvgpr_write_b32 a7, v47 ; Reload Reuse
; GFX950-NEXT: v_accvgpr_write_b32 a8, v56 ; Reload Reuse
; GFX950-NEXT: v_accvgpr_write_b32 a9, v57 ; Reload Reuse
-; GFX950-NEXT: scratch_load_dword v33, off, s32 offset:8
-; GFX950-NEXT: scratch_load_dword v32, off, s32 offset:4
-; GFX950-NEXT: scratch_load_dword v37, off, s32 offset:16
-; GFX950-NEXT: scratch_load_dword v36, off, s32 offset:12
-; GFX950-NEXT: scratch_load_dword v39, off, s32 offset:24
-; GFX950-NEXT: scratch_load_dword v38, off, s32 offset:20
+; GFX950-NEXT: scratch_load_dword v41, off, s32 offset:120
+; GFX950-NEXT: scratch_load_dword v40, off, s32 offset:116
; GFX950-NEXT: scratch_load_dword v57, off, s32 offset:32
; GFX950-NEXT: scratch_load_dword v56, off, s32 offset:28
-; GFX950-NEXT: scratch_load_dword v47, off, s32 offset:40
-; GFX950-NEXT: scratch_load_dword v46, off, s32 offset:36
-; GFX950-NEXT: scratch_load_dword v45, off, s32 offset:48
-; GFX950-NEXT: scratch_load_dword v44, off, s32 offset:44
-; GFX950-NEXT: scratch_load_dword v43, off, s32 offset:56
-; GFX950-NEXT: scratch_load_dword v42, off, s32 offset:52
-; GFX950-NEXT: scratch_load_dword v41, off, s32 offset:64
-; GFX950-NEXT: scratch_load_dword v40, off, s32 offset:60
-; GFX950-NEXT: scratch_load_dword v55, off, s32 offset:72
-; GFX950-NEXT: scratch_load_dword v54, off, s32 offset:68
-; GFX950-NEXT: scratch_load_dword v53, off, s32 offset:80
-; GFX950-NEXT: scratch_load_dword v52, off, s32 offset:76
-; GFX950-NEXT: scratch_load_dword v51, off, s32 offset:88
-; GFX950-NEXT: scratch_load_dword v50, off, s32 offset:84
-; GFX950-NEXT: scratch_load_dword v49, off, s32 offset:96
-; GFX950-NEXT: scratch_load_dword v48, off, s32 offset:92
+; GFX950-NEXT: scratch_load_dword v45, off, s32 offset:24
+; GFX950-NEXT: scratch_load_dword v44, off, s32 offset:20
+; GFX950-NEXT: scratch_load_dword v43, off, s32 offset:16
+; GFX950-NEXT: scratch_load_dword v42, off, s32 offset:12
+; GFX950-NEXT: scratch_load_dword v53, off, s32 offset:8
+; GFX950-NEXT: scratch_load_dword v52, off, s32 offset:4
+; GFX950-NEXT: scratch_load_dword v49, off, s32 offset:112
; GFX950-NEXT: scratch_load_dword v31, off, s32
-; GFX950-NEXT: scratch_load_dword v35, off, s32 offset:104
-; GFX950-NEXT: scratch_load_dword v34, off, s32 offset:100
+; GFX950-NEXT: scratch_load_dword v55, off, s32 offset:128
+; GFX950-NEXT: scratch_load_dword v54, off, s32 offset:124
+; GFX950-NEXT: scratch_load_dword v48, off, s32 offset:108
+; GFX950-NEXT: scratch_load_dword v51, off, s32 offset:104
+; GFX950-NEXT: scratch_load_dword v50, off, s32 offset:100
+; GFX950-NEXT: scratch_load_dword v39, off, s32 offset:96
+; GFX950-NEXT: scratch_load_dword v38, off, s32 offset:92
+; GFX950-NEXT: scratch_load_dword v37, off, s32 offset:88
+; GFX950-NEXT: scratch_load_dword v36, off, s32 offset:84
+; GFX950-NEXT: scratch_load_dword v35, off, s32 offset:80
+; GFX950-NEXT: scratch_load_dword v34, off, s32 offset:76
+; GFX950-NEXT: scratch_load_dword v33, off, s32 offset:72
+; GFX950-NEXT: scratch_load_dword v32, off, s32 offset:68
+; GFX950-NEXT: v_accvgpr_write_b32 a6, v46 ; Reload Reuse
+; GFX950-NEXT: v_accvgpr_write_b32 a7, v47 ; Reload Reuse
; GFX950-NEXT: v_accvgpr_write_b32 a10, v58 ; Reload Reuse
; GFX950-NEXT: v_accvgpr_write_b32 a11, v59 ; Reload Reuse
; GFX950-NEXT: v_accvgpr_write_b32 a12, v60 ; Reload Reuse
-; GFX950-NEXT: v_accvgpr_write_b32 a13, v61 ; Reload Reuse
-; GFX950-NEXT: v_accvgpr_write_b32 a14, v62 ; Reload Reuse
-; GFX950-NEXT: v_accvgpr_write_b32 a15, v63 ; Reload Reuse
-; GFX950-NEXT: s_waitcnt vmcnt(25)
-; GFX950-NEXT: v_min_f64 v[58:59], v[0:1], v[32:33]
-; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[32:33]
-; GFX950-NEXT: scratch_load_dword v33, off, s32 offset:112
-; GFX950-NEXT: scratch_load_dword v32, off, s32 offset:108
-; GFX950-NEXT: s_waitcnt vmcnt(25)
-; GFX950-NEXT: v_min_f64 v[60:61], v[2:3], v[36:37]
-; GFX950-NEXT: v_cmp_u_f64_e64 s[0:1], v[2:3], v[36:37]
-; GFX950-NEXT: scratch_load_dword v37, off, s32 offset:120
-; GFX950-NEXT: scratch_load_dword v36, off, s32 offset:116
-; GFX950-NEXT: s_waitcnt vmcnt(25)
-; GFX950-NEXT: v_min_f64 v[62:63], v[4:5], v[38:39]
-; GFX950-NEXT: v_cmp_u_f64_e64 s[2:3], v[4:5], v[38:39]
-; GFX950-NEXT: scratch_load_dword v39, off, s32 offset:128
-; GFX950-NEXT: scratch_load_dword v38, off, s32 offset:124
-; GFX950-NEXT: v_mov_b32_e32 v2, 0x7ff80000
-; GFX950-NEXT: s_waitcnt vmcnt(25)
-; GFX950-NEXT: v_min_f64 v[0:1], v[6:7], v[56:57]
-; GFX950-NEXT: v_cmp_u_f64_e64 s[4:5], v[6:7], v[56:57]
+; GFX950-NEXT: v_mov_b32_e32 v60, 0x7ff80000
; GFX950-NEXT: s_waitcnt vmcnt(23)
-; GFX950-NEXT: v_min_f64 v[56:57], v[8:9], v[46:47]
-; GFX950-NEXT: v_cndmask_b32_e64 v58, v58, 0, vcc
-; GFX950-NEXT: v_cndmask_b32_e32 v59, v59, v2, vcc
-; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[8:9], v[46:47]
-; GFX950-NEXT: v_cndmask_b32_e64 v6, v0, 0, s[4:5]
-; GFX950-NEXT: v_cndmask_b32_e64 v7, v1, v2, s[4:5]
-; GFX950-NEXT: v_cndmask_b32_e64 v8, v56, 0, vcc
-; GFX950-NEXT: v_cndmask_b32_e32 v9, v57, v2, vcc
-; GFX950-NEXT: s_waitcnt vmcnt(21)
-; GFX950-NEXT: v_min_f64 v[0:1], v[10:11], v[44:45]
-; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[10:11], v[44:45]
-; GFX950-NEXT: v_cndmask_b32_e64 v60, v60, 0, s[0:1]
-; GFX950-NEXT: v_cndmask_b32_e64 v3, v61, v2, s[0:1]
-; GFX950-NEXT: v_cndmask_b32_e64 v10, v0, 0, vcc
-; GFX950-NEXT: v_cndmask_b32_e32 v11, v1, v2, vcc
+; GFX950-NEXT: v_min_f64 v[46:47], v[28:29], v[40:41]
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[28:29], v[40:41]
+; GFX950-NEXT: scratch_load_dword v41, off, s32 offset:64
+; GFX950-NEXT: scratch_load_dword v40, off, s32 offset:60
+; GFX950-NEXT: s_waitcnt vmcnt(23)
+; GFX950-NEXT: v_min_f64 v[58:59], v[6:7], v[56:57]
+; GFX950-NEXT: v_cmp_u_f64_e64 s[0:1], v[6:7], v[56:57]
+; GFX950-NEXT: scratch_load_dword v7, off, s32 offset:56
+; GFX950-NEXT: scratch_load_dword v6, off, s32 offset:52
+; GFX950-NEXT: s_waitcnt vmcnt(23)
+; GFX950-NEXT: v_min_f64 v[56:57], v[4:5], v[44:45]
+; GFX950-NEXT: v_cmp_u_f64_e64 s[2:3], v[4:5], v[44:45]
+; GFX950-NEXT: scratch_load_dword v5, off, s32 offset:48
+; GFX950-NEXT: scratch_load_dword v4, off, s32 offset:44
+; GFX950-NEXT: s_waitcnt vmcnt(23)
+; GFX950-NEXT: v_min_f64 v[44:45], v[2:3], v[42:43]
+; GFX950-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[42:43]
+; GFX950-NEXT: scratch_load_dword v3, off, s32 offset:40
+; GFX950-NEXT: scratch_load_dword v2, off, s32 offset:36
+; GFX950-NEXT: s_waitcnt vmcnt(23)
+; GFX950-NEXT: v_min_f64 v[42:43], v[0:1], v[52:53]
+; GFX950-NEXT: v_cmp_u_f64_e64 s[6:7], v[0:1], v[52:53]
; GFX950-NEXT: s_waitcnt vmcnt(19)
-; GFX950-NEXT: v_min_f64 v[0:1], v[12:13], v[42:43]
-; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[12:13], v[42:43]
-; GFX950-NEXT: v_cndmask_b32_e64 v4, v62, 0, s[2:3]
-; GFX950-NEXT: v_cndmask_b32_e64 v5, v63, v2, s[2:3]
-; GFX950-NEXT: v_cndmask_b32_e64 v12, v0, 0, vcc
-; GFX950-NEXT: v_cndmask_b32_e32 v13, v1, v2, vcc
-; GFX950-NEXT: s_waitcnt vmcnt(17)
-; GFX950-NEXT: v_min_f64 v[0:1], v[14:15], v[40:41]
-; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[14:15], v[40:41]
-; GFX950-NEXT: v_accvgpr_read_b32 v63, a15 ; Reload Reuse
-; GFX950-NEXT: v_accvgpr_read_b32 v62, a14 ; Reload Reuse
-; GFX950-NEXT: v_cndmask_b32_e64 v14, v0, 0, vcc
-; GFX950-NEXT: v_cndmask_b32_e32 v15, v1, v2, vcc
-; GFX950-NEXT: s_waitcnt vmcnt(15)
-; GFX950-NEXT: v_min_f64 v[0:1], v[16:17], v[54:55]
-; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[16:17], v[54:55]
-; GFX950-NEXT: v_accvgpr_read_b32 v61, a13 ; Reload Reuse
-; GFX950-NEXT: v_accvgpr_read_b32 v57, a9 ; Reload Reuse
-; GFX950-NEXT: v_cndmask_b32_e64 v16, v0, 0, vcc
-; GFX950-NEXT: v_cndmask_b32_e32 v17, v1, v2, vcc
-; GFX950-NEXT: s_waitcnt vmcnt(13)
-; GFX950-NEXT: v_min_f64 v[0:1], v[18:19], v[52:53]
-; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[18:19], v[52:53]
-; GFX950-NEXT: v_accvgpr_read_b32 v56, a8 ; Reload Reuse
+; GFX950-NEXT: v_min_f64 v[0:1], v[30:31], v[54:55]
+; GFX950-NEXT: s_waitcnt vmcnt(18)
+; GFX950-NEXT: v_min_f64 v[52:53], v[26:27], v[48:49]
+; GFX950-NEXT: v_cmp_u_f64_e64 s[8:9], v[30:31], v[54:55]
+; GFX950-NEXT: v_cndmask_b32_e32 v29, v47, v60, vcc
+; GFX950-NEXT: v_cndmask_b32_e64 v28, v46, 0, vcc
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[26:27], v[48:49]
+; GFX950-NEXT: v_cndmask_b32_e64 v31, v1, v60, s[8:9]
+; GFX950-NEXT: v_cndmask_b32_e64 v30, v0, 0, s[8:9]
+; GFX950-NEXT: v_cndmask_b32_e32 v27, v53, v60, vcc
+; GFX950-NEXT: v_cndmask_b32_e64 v26, v52, 0, vcc
+; GFX950-NEXT: s_waitcnt vmcnt(16)
+; GFX950-NEXT: v_min_f64 v[0:1], v[24:25], v[50:51]
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[24:25], v[50:51]
; GFX950-NEXT: v_accvgpr_read_b32 v47, a7 ; Reload Reuse
-; GFX950-NEXT: v_cndmask_b32_e64 v18, v0, 0, vcc
-; GFX950-NEXT: v_cndmask_b32_e32 v19, v1, v2, vcc
-; GFX950-NEXT: s_waitcnt vmcnt(11)
-; GFX950-NEXT: v_min_f64 v[0:1], v[20:21], v[50:51]
-; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[20:21], v[50:51]
; GFX950-NEXT: v_accvgpr_read_b32 v46, a6 ; Reload Reuse
-; GFX950-NEXT: v_accvgpr_read_b32 v45, a5 ; Reload Reuse
-; GFX950-NEXT: v_cndmask_b32_e64 v20, v0, 0, vcc
-; GFX950-NEXT: v_cndmask_b32_e32 v21, v1, v2, vcc
-; GFX950-NEXT: s_waitcnt vmcnt(9)
-; GFX950-NEXT: v_min_f64 v[0:1], v[22:23], v[48:49]
-; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[22:23], v[48:49]
-; GFX950-NEXT: v_accvgpr_read_b32 v44, a4 ; Reload Reuse
-; GFX950-NEXT: v_accvgpr_read_b32 v43, a3 ; Reload Reuse
+; GFX950-NEXT: v_cndmask_b32_e32 v25, v1, v60, vcc
+; GFX950-NEXT: v_cndmask_b32_e64 v24, v0, 0, vcc
+; GFX950-NEXT: s_waitcnt vmcnt(14)
+; GFX950-NEXT: v_min_f64 v[0:1], v[22:23], v[38:39]
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[22:23], v[38:39]
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v23, v1, v60, vcc
; GFX950-NEXT: v_cndmask_b32_e64 v22, v0, 0, vcc
-; GFX950-NEXT: v_cndmask_b32_e32 v23, v1, v2, vcc
+; GFX950-NEXT: s_waitcnt vmcnt(12)
+; GFX950-NEXT: v_min_f64 v[0:1], v[20:21], v[36:37]
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[20:21], v[36:37]
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v21, v1, v60, vcc
+; GFX950-NEXT: v_cndmask_b32_e64 v20, v0, 0, vcc
+; GFX950-NEXT: s_waitcnt vmcnt(10)
+; GFX950-NEXT: v_min_f64 v[0:1], v[18:19], v[34:35]
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[18:19], v[34:35]
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v19, v1, v60, vcc
+; GFX950-NEXT: v_cndmask_b32_e64 v18, v0, 0, vcc
+; GFX950-NEXT: s_waitcnt vmcnt(8)
+; GFX950-NEXT: v_min_f64 v[0:1], v[16:17], v[32:33]
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[16:17], v[32:33]
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v17, v1, v60, vcc
+; GFX950-NEXT: v_cndmask_b32_e64 v16, v0, 0, vcc
; GFX950-NEXT: s_waitcnt vmcnt(6)
-; GFX950-NEXT: v_min_f64 v[0:1], v[24:25], v[34:35]
-; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[24:25], v[34:35]
-; GFX950-NEXT: v_accvgpr_read_b32 v42, a2 ; Reload Reuse
+; GFX950-NEXT: v_min_f64 v[0:1], v[14:15], v[40:41]
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[14:15], v[40:41]
; GFX950-NEXT: v_accvgpr_read_b32 v41, a1 ; Reload Reuse
-; GFX950-NEXT: v_cndmask_b32_e64 v24, v0, 0, vcc
-; GFX950-NEXT: v_cndmask_b32_e32 v25, v1, v2, vcc
; GFX950-NEXT: v_accvgpr_read_b32 v40, a0 ; Reload Reuse
+; GFX950-NEXT: v_cndmask_b32_e32 v15, v1, v60, vcc
+; GFX950-NEXT: v_cndmask_b32_e64 v14, v0, 0, vcc
; GFX950-NEXT: s_waitcnt vmcnt(4)
-; GFX950-NEXT: v_min_f64 v[0:1], v[26:27], v[32:33]
-; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[26:27], v[32:33]
-; GFX950-NEXT: s_nop 1
-; GFX950-NEXT: v_cndmask_b32_e64 v26, v0, 0, vcc
-; GFX950-NEXT: v_cndmask_b32_e32 v27, v1, v2, vcc
+; GFX950-NEXT: v_min_f64 v[0:1], v[12:13], v[6:7]
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[12:13], v[6:7]
+; GFX950-NEXT: v_cndmask_b32_e64 v7, v59, v60, s[0:1]
+; GFX950-NEXT: v_cndmask_b32_e64 v6, v58, 0, s[0:1]
+; GFX950-NEXT: v_cndmask_b32_e32 v13, v1, v60, vcc
+; GFX950-NEXT: v_cndmask_b32_e64 v12, v0, 0, vcc
; GFX950-NEXT: s_waitcnt vmcnt(2)
-; GFX950-NEXT: v_min_f64 v[0:1], v[28:29], v[36:37]
-; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[28:29], v[36:37]
-; GFX950-NEXT: s_nop 1
-; GFX950-NEXT: v_cndmask_b32_e64 v28, v0, 0, vcc
-; GFX950-NEXT: v_cndmask_b32_e32 v29, v1, v2, vcc
+; GFX950-NEXT: v_min_f64 v[0:1], v[10:11], v[4:5]
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[10:11], v[4:5]
+; GFX950-NEXT: v_cndmask_b32_e64 v5, v57, v60, s[2:3]
+; GFX950-NEXT: v_cndmask_b32_e64 v4, v56, 0, s[2:3]
+; GFX950-NEXT: v_cndmask_b32_e32 v11, v1, v60, vcc
+; GFX950-NEXT: v_cndmask_b32_e64 v10, v0, 0, vcc
; GFX950-NEXT: s_waitcnt vmcnt(0)
-; GFX950-NEXT: v_min_f64 v[0:1], v[30:31], v[38:39]
-; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[30:31], v[38:39]
-; GFX950-NEXT: s_nop 1
-; GFX950-NEXT: v_cndmask_b32_e64 v30, v0, 0, vcc
-; GFX950-NEXT: v_cndmask_b32_e32 v31, v1, v2, vcc
-; GFX950-NEXT: v_mov_b32_e32 v0, v58
-; GFX950-NEXT: v_mov_b32_e32 v1, v59
-; GFX950-NEXT: v_mov_b32_e32 v2, v60
+; GFX950-NEXT: v_min_f64 v[0:1], v[8:9], v[2:3]
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[8:9], v[2:3]
+; GFX950-NEXT: v_cndmask_b32_e64 v3, v45, v60, s[4:5]
+; GFX950-NEXT: v_cndmask_b32_e64 v2, v44, 0, s[4:5]
+; GFX950-NEXT: v_cndmask_b32_e32 v9, v1, v60, vcc
+; GFX950-NEXT: v_cndmask_b32_e64 v8, v0, 0, vcc
+; GFX950-NEXT: v_cndmask_b32_e64 v1, v43, v60, s[6:7]
+; GFX950-NEXT: v_cndmask_b32_e64 v0, v42, 0, s[6:7]
; GFX950-NEXT: v_accvgpr_read_b32 v60, a12 ; Reload Reuse
; GFX950-NEXT: v_accvgpr_read_b32 v59, a11 ; Reload Reuse
; GFX950-NEXT: v_accvgpr_read_b32 v58, a10 ; Reload Reuse
+; GFX950-NEXT: v_accvgpr_read_b32 v57, a9 ; Reload Reuse
+; GFX950-NEXT: v_accvgpr_read_b32 v56, a8 ; Reload Reuse
+; GFX950-NEXT: v_accvgpr_read_b32 v45, a5 ; Reload Reuse
+; GFX950-NEXT: v_accvgpr_read_b32 v44, a4 ; Reload Reuse
+; GFX950-NEXT: v_accvgpr_read_b32 v43, a3 ; Reload Reuse
+; GFX950-NEXT: v_accvgpr_read_b32 v42, a2 ; Reload Reuse
; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimum_v16f64:
diff --git a/llvm/test/CodeGen/AMDGPU/load-constant-i1.ll b/llvm/test/CodeGen/AMDGPU/load-constant-i1.ll
index 5b2213592f495..d947f543a1a0a 100644
--- a/llvm/test/CodeGen/AMDGPU/load-constant-i1.ll
+++ b/llvm/test/CodeGen/AMDGPU/load-constant-i1.ll
@@ -5312,18 +5312,15 @@ define amdgpu_kernel void @constant_zextload_v2i1_to_v2i64(ptr addrspace(1) %out
; GFX1250-LABEL: constant_zextload_v2i1_to_v2i64:
; GFX1250: ; %bb.0:
; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
-; GFX1250-NEXT: v_mov_b32_e32 v1, 0
+; GFX1250-NEXT: v_mov_b32_e32 v3, 0
; GFX1250-NEXT: s_wait_kmcnt 0x0
-; GFX1250-NEXT: global_load_u8 v0, v1, s[2:3]
+; GFX1250-NEXT: global_load_u8 v0, v3, s[2:3]
; GFX1250-NEXT: s_wait_loadcnt 0x0
-; GFX1250-NEXT: v_and_b32_e32 v2, 0xffff, v0
-; GFX1250-NEXT: v_dual_mov_b32 v3, v1 :: v_dual_bitop2_b32 v0, 1, v0 bitop3:0x40
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX1250-NEXT: v_lshrrev_b32_e32 v2, 1, v2
-; GFX1250-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX1250-NEXT: v_and_b32_e32 v2, 0xffff, v2
-; GFX1250-NEXT: global_store_b128 v1, v[0:3], s[0:1]
+; GFX1250-NEXT: v_and_b32_e32 v1, 0xffff, v0
+; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-NEXT: v_dual_lshrrev_b32 v2, 1, v1 :: v_dual_bitop2_b32 v0, 1, v0 bitop3:0x40
+; GFX1250-NEXT: v_mov_b32_e32 v1, v3
+; GFX1250-NEXT: global_store_b128 v3, v[0:3], s[0:1]
; GFX1250-NEXT: s_endpgm
%load = load <2 x i1>, ptr addrspace(4) %in
%ext = zext <2 x i1> %load to <2 x i64>
@@ -5531,22 +5528,19 @@ define amdgpu_kernel void @constant_zextload_v3i1_to_v3i64(ptr addrspace(1) %out
; GFX1250-LABEL: constant_zextload_v3i1_to_v3i64:
; GFX1250: ; %bb.0:
; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
-; GFX1250-NEXT: v_mov_b32_e32 v5, 0
+; GFX1250-NEXT: v_mov_b32_e32 v3, 0
; GFX1250-NEXT: s_wait_kmcnt 0x0
-; GFX1250-NEXT: global_load_u8 v0, v5, s[2:3]
+; GFX1250-NEXT: global_load_u8 v2, v3, s[2:3]
; GFX1250-NEXT: s_wait_loadcnt 0x0
-; GFX1250-NEXT: v_and_b32_e32 v1, 0xffff, v0
-; GFX1250-NEXT: v_bfe_u32 v2, v0, 1, 1
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_3)
-; GFX1250-NEXT: v_dual_lshrrev_b32 v4, 2, v1 :: v_dual_bitop2_b32 v0, 1, v0 bitop3:0x40
-; GFX1250-NEXT: v_dual_mov_b32 v1, v5 :: v_dual_mov_b32 v3, v5
-; GFX1250-NEXT: v_and_b32_e32 v2, 0xffff, v2
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_4)
-; GFX1250-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX1250-NEXT: v_and_b32_e32 v0, 0xffff, v2
+; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3)
+; GFX1250-NEXT: v_dual_mov_b32 v1, v3 :: v_dual_lshrrev_b32 v4, 2, v0
+; GFX1250-NEXT: v_dual_mov_b32 v5, v3 :: v_dual_bitop2_b32 v0, 1, v2 bitop3:0x40
+; GFX1250-NEXT: v_bfe_u32 v2, v2, 1, 1
; GFX1250-NEXT: v_and_b32_e32 v4, 0xffff, v4
; GFX1250-NEXT: s_clause 0x1
-; GFX1250-NEXT: global_store_b64 v5, v[4:5], s[0:1] offset:16
-; GFX1250-NEXT: global_store_b128 v5, v[0:3], s[0:1]
+; GFX1250-NEXT: global_store_b64 v3, v[4:5], s[0:1] offset:16
+; GFX1250-NEXT: global_store_b128 v3, v[0:3], s[0:1]
; GFX1250-NEXT: s_endpgm
%load = load <3 x i1>, ptr addrspace(4) %in
%ext = zext <3 x i1> %load to <3 x i64>
@@ -5800,27 +5794,20 @@ define amdgpu_kernel void @constant_zextload_v4i1_to_v4i64(ptr addrspace(1) %out
; GFX1250-LABEL: constant_zextload_v4i1_to_v4i64:
; GFX1250: ; %bb.0:
; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
-; GFX1250-NEXT: v_mov_b32_e32 v1, 0
+; GFX1250-NEXT: v_mov_b32_e32 v3, 0
; GFX1250-NEXT: s_wait_kmcnt 0x0
-; GFX1250-NEXT: global_load_u8 v0, v1, s[2:3]
+; GFX1250-NEXT: global_load_u8 v6, v3, s[2:3]
; GFX1250-NEXT: s_wait_loadcnt 0x0
-; GFX1250-NEXT: v_readfirstlane_b32 s2, v0
-; GFX1250-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX1250-NEXT: s_bfe_u32 s3, s2, 0x10002
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
-; GFX1250-NEXT: v_lshrrev_b32_e32 v2, 3, v0
-; GFX1250-NEXT: s_and_b32 s3, 0xffff, s3
-; GFX1250-NEXT: v_dual_mov_b32 v3, v1 :: v_dual_mov_b32 v0, s3
-; GFX1250-NEXT: s_bfe_u32 s3, s2, 0x10001
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX1250-NEXT: v_and_b32_e32 v2, 0xffff, v2
-; GFX1250-NEXT: s_and_b32 s2, s2, 1
-; GFX1250-NEXT: s_and_b32 s3, 0xffff, s3
-; GFX1250-NEXT: s_and_b32 s2, 0xffff, s2
-; GFX1250-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:16
-; GFX1250-NEXT: s_wait_xcnt 0x0
-; GFX1250-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v2, s3
-; GFX1250-NEXT: global_store_b128 v1, v[0:3], s[0:1]
+; GFX1250-NEXT: v_and_b32_e32 v2, 0xffff, v6
+; GFX1250-NEXT: v_dual_mov_b32 v1, v3 :: v_dual_bitop2_b32 v4, 1, v6 bitop3:0x40
+; GFX1250-NEXT: v_dual_mov_b32 v7, v3 :: v_dual_mov_b32 v5, v3
+; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_3)
+; GFX1250-NEXT: v_lshrrev_b32_e32 v2, 3, v2
+; GFX1250-NEXT: v_bfe_u32 v0, v6, 2, 1
+; GFX1250-NEXT: v_bfe_u32 v6, v6, 1, 1
+; GFX1250-NEXT: s_clause 0x1
+; GFX1250-NEXT: global_store_b128 v3, v[0:3], s[0:1] offset:16
+; GFX1250-NEXT: global_store_b128 v3, v[4:7], s[0:1]
; GFX1250-NEXT: s_endpgm
%load = load <4 x i1>, ptr addrspace(4) %in
%ext = zext <4 x i1> %load to <4 x i64>
@@ -6136,28 +6123,28 @@ define amdgpu_kernel void @constant_zextload_v8i1_to_v8i64(ptr addrspace(1) %out
; GFX1250-LABEL: constant_zextload_v8i1_to_v8i64:
; GFX1250: ; %bb.0:
; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
-; GFX1250-NEXT: v_mov_b32_e32 v1, 0
+; GFX1250-NEXT: v_mov_b32_e32 v3, 0
; GFX1250-NEXT: s_wait_kmcnt 0x0
-; GFX1250-NEXT: global_load_u8 v12, v1, s[2:3]
+; GFX1250-NEXT: global_load_u8 v12, v3, s[2:3]
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: v_and_b32_e32 v0, 0xffff, v12
; GFX1250-NEXT: v_bfe_u32 v6, v12, 5, 1
; GFX1250-NEXT: v_bfe_u32 v4, v12, 4, 1
; GFX1250-NEXT: v_bfe_u32 v10, v12, 3, 1
; GFX1250-NEXT: v_bfe_u32 v8, v12, 2, 1
-; GFX1250-NEXT: v_dual_mov_b32 v3, v1 :: v_dual_lshrrev_b32 v2, 7, v0
-; GFX1250-NEXT: v_mov_b32_e32 v5, v1
+; GFX1250-NEXT: v_dual_mov_b32 v1, v3 :: v_dual_lshrrev_b32 v2, 7, v0
+; GFX1250-NEXT: v_mov_b32_e32 v7, v3
; GFX1250-NEXT: v_bfe_u32 v0, v0, 6, 1
-; GFX1250-NEXT: v_dual_mov_b32 v7, v1 :: v_dual_mov_b32 v9, v1
-; GFX1250-NEXT: v_dual_mov_b32 v11, v1 :: v_dual_mov_b32 v13, v1
-; GFX1250-NEXT: v_mov_b32_e32 v15, v1
+; GFX1250-NEXT: v_dual_mov_b32 v5, v3 :: v_dual_mov_b32 v11, v3
+; GFX1250-NEXT: v_dual_mov_b32 v9, v3 :: v_dual_mov_b32 v15, v3
+; GFX1250-NEXT: v_mov_b32_e32 v13, v3
; GFX1250-NEXT: v_bfe_u32 v14, v12, 1, 1
; GFX1250-NEXT: v_and_b32_e32 v12, 1, v12
; GFX1250-NEXT: s_clause 0x3
-; GFX1250-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:48
-; GFX1250-NEXT: global_store_b128 v1, v[4:7], s[0:1] offset:32
-; GFX1250-NEXT: global_store_b128 v1, v[8:11], s[0:1] offset:16
-; GFX1250-NEXT: global_store_b128 v1, v[12:15], s[0:1]
+; GFX1250-NEXT: global_store_b128 v3, v[0:3], s[0:1] offset:48
+; GFX1250-NEXT: global_store_b128 v3, v[4:7], s[0:1] offset:32
+; GFX1250-NEXT: global_store_b128 v3, v[8:11], s[0:1] offset:16
+; GFX1250-NEXT: global_store_b128 v3, v[12:15], s[0:1]
; GFX1250-NEXT: s_endpgm
%load = load <8 x i1>, ptr addrspace(4) %in
%ext = zext <8 x i1> %load to <8 x i64>
@@ -6374,35 +6361,35 @@ define amdgpu_kernel void @constant_sextload_v8i1_to_v8i64(ptr addrspace(1) %out
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: v_readfirstlane_b32 s3, v0
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1250-NEXT: v_mov_b32_e32 v10, s3
-; GFX1250-NEXT: s_lshr_b32 s2, s3, 6
-; GFX1250-NEXT: s_lshr_b32 s4, s3, 7
-; GFX1250-NEXT: s_lshr_b32 s6, s3, 4
-; GFX1250-NEXT: s_lshr_b32 s8, s3, 5
-; GFX1250-NEXT: s_lshr_b32 s10, s3, 2
-; GFX1250-NEXT: s_lshr_b32 s12, s3, 3
-; GFX1250-NEXT: s_lshr_b32 s14, s3, 1
-; GFX1250-NEXT: s_bfe_i64 s[2:3], s[2:3], 0x10000
-; GFX1250-NEXT: s_bfe_i64 s[4:5], s[4:5], 0x10000
-; GFX1250-NEXT: v_bfe_i32 v12, v10, 0, 1
-; GFX1250-NEXT: s_bfe_i64 s[8:9], s[8:9], 0x10000
-; GFX1250-NEXT: s_bfe_i64 s[6:7], s[6:7], 0x10000
+; GFX1250-NEXT: v_mov_b32_e32 v4, s3
+; GFX1250-NEXT: s_lshr_b32 s12, s3, 6
+; GFX1250-NEXT: s_lshr_b32 s14, s3, 7
+; GFX1250-NEXT: s_lshr_b32 s8, s3, 4
+; GFX1250-NEXT: s_lshr_b32 s10, s3, 5
+; GFX1250-NEXT: s_lshr_b32 s4, s3, 2
+; GFX1250-NEXT: s_lshr_b32 s6, s3, 3
+; GFX1250-NEXT: s_bfe_i64 s[14:15], s[14:15], 0x10000
; GFX1250-NEXT: s_bfe_i64 s[12:13], s[12:13], 0x10000
+; GFX1250-NEXT: s_lshr_b32 s2, s3, 1
; GFX1250-NEXT: s_bfe_i64 s[10:11], s[10:11], 0x10000
-; GFX1250-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
-; GFX1250-NEXT: v_dual_mov_b32 v2, s4 :: v_dual_mov_b32 v3, s5
-; GFX1250-NEXT: s_bfe_i64 s[14:15], s[14:15], 0x10000
-; GFX1250-NEXT: v_dual_mov_b32 v4, s6 :: v_dual_mov_b32 v5, s7
-; GFX1250-NEXT: v_dual_mov_b32 v6, s8 :: v_dual_mov_b32 v7, s9
-; GFX1250-NEXT: v_dual_mov_b32 v8, s10 :: v_dual_mov_b32 v9, s11
-; GFX1250-NEXT: v_dual_mov_b32 v10, s12 :: v_dual_mov_b32 v11, s13
-; GFX1250-NEXT: v_dual_mov_b32 v14, s14 :: v_dual_ashrrev_i32 v13, 31, v12
-; GFX1250-NEXT: v_mov_b32_e32 v15, s15
+; GFX1250-NEXT: s_bfe_i64 s[8:9], s[8:9], 0x10000
+; GFX1250-NEXT: s_bfe_i64 s[6:7], s[6:7], 0x10000
+; GFX1250-NEXT: s_bfe_i64 s[4:5], s[4:5], 0x10000
+; GFX1250-NEXT: v_bfe_i32 v4, v4, 0, 1
+; GFX1250-NEXT: v_mov_b64_e32 v[12:13], s[12:13]
+; GFX1250-NEXT: v_mov_b64_e32 v[14:15], s[14:15]
+; GFX1250-NEXT: s_bfe_i64 s[2:3], s[2:3], 0x10000
+; GFX1250-NEXT: v_mov_b64_e32 v[8:9], s[8:9]
+; GFX1250-NEXT: v_mov_b64_e32 v[10:11], s[10:11]
+; GFX1250-NEXT: v_mov_b64_e32 v[0:1], s[4:5]
+; GFX1250-NEXT: v_mov_b64_e32 v[2:3], s[6:7]
+; GFX1250-NEXT: v_mov_b64_e32 v[6:7], s[2:3]
+; GFX1250-NEXT: v_ashrrev_i32_e32 v5, 31, v4
; GFX1250-NEXT: s_clause 0x3
-; GFX1250-NEXT: global_store_b128 v16, v[0:3], s[0:1] offset:48
-; GFX1250-NEXT: global_store_b128 v16, v[4:7], s[0:1] offset:32
-; GFX1250-NEXT: global_store_b128 v16, v[8:11], s[0:1] offset:16
-; GFX1250-NEXT: global_store_b128 v16, v[12:15], s[0:1]
+; GFX1250-NEXT: global_store_b128 v16, v[12:15], s[0:1] offset:48
+; GFX1250-NEXT: global_store_b128 v16, v[8:11], s[0:1] offset:32
+; GFX1250-NEXT: global_store_b128 v16, v[0:3], s[0:1] offset:16
+; GFX1250-NEXT: global_store_b128 v16, v[4:7], s[0:1]
; GFX1250-NEXT: s_endpgm
%load = load <8 x i1>, ptr addrspace(4) %in
%ext = sext <8 x i1> %load to <8 x i64>
@@ -6696,44 +6683,43 @@ define amdgpu_kernel void @constant_zextload_v16i1_to_v16i64(ptr addrspace(1) %o
; GFX1250-LABEL: constant_zextload_v16i1_to_v16i64:
; GFX1250: ; %bb.0:
; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
-; GFX1250-NEXT: v_mov_b32_e32 v1, 0
+; GFX1250-NEXT: v_mov_b32_e32 v3, 0
; GFX1250-NEXT: s_wait_kmcnt 0x0
-; GFX1250-NEXT: global_load_u16 v12, v1, s[2:3]
+; GFX1250-NEXT: global_load_u16 v10, v3, s[2:3]
; GFX1250-NEXT: s_wait_loadcnt 0x0
-; GFX1250-NEXT: v_and_b32_e32 v22, 0xffff, v12
-; GFX1250-NEXT: v_dual_mov_b32 v3, v1 :: v_dual_bitop2_b32 v28, 1, v12 bitop3:0x40
-; GFX1250-NEXT: v_mov_b32_e32 v5, v1
-; GFX1250-NEXT: v_bfe_u32 v0, v12, 10, 1
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_4)
-; GFX1250-NEXT: v_bfe_u32 v2, v22, 11, 1
-; GFX1250-NEXT: v_dual_mov_b32 v7, v1 :: v_dual_mov_b32 v9, v1
-; GFX1250-NEXT: v_bfe_u32 v6, v12, 9, 1
-; GFX1250-NEXT: v_bfe_u32 v4, v22, 8, 1
-; GFX1250-NEXT: v_dual_mov_b32 v11, v1 :: v_dual_mov_b32 v13, v1
-; GFX1250-NEXT: v_dual_mov_b32 v31, v1 :: v_dual_lshrrev_b32 v10, 15, v22
-; GFX1250-NEXT: v_bfe_u32 v8, v22, 14, 1
-; GFX1250-NEXT: v_dual_mov_b32 v15, v1 :: v_dual_mov_b32 v17, v1
-; GFX1250-NEXT: v_bfe_u32 v14, v12, 13, 1
-; GFX1250-NEXT: v_bfe_u32 v18, v12, 7, 1
-; GFX1250-NEXT: v_bfe_u32 v26, v12, 3, 1
-; GFX1250-NEXT: v_bfe_u32 v30, v12, 1, 1
-; GFX1250-NEXT: v_bfe_u32 v24, v12, 2, 1
-; GFX1250-NEXT: v_bfe_u32 v20, v12, 4, 1
-; GFX1250-NEXT: v_bfe_u32 v16, v12, 6, 1
-; GFX1250-NEXT: v_bfe_u32 v12, v12, 12, 1
-; GFX1250-NEXT: v_dual_mov_b32 v19, v1 :: v_dual_mov_b32 v21, v1
-; GFX1250-NEXT: v_dual_mov_b32 v23, v1 :: v_dual_mov_b32 v25, v1
-; GFX1250-NEXT: v_dual_mov_b32 v27, v1 :: v_dual_mov_b32 v29, v1
+; GFX1250-NEXT: v_and_b32_e32 v22, 0xffff, v10
+; GFX1250-NEXT: v_bfe_u32 v6, v10, 13, 1
+; GFX1250-NEXT: v_bfe_u32 v4, v10, 12, 1
+; GFX1250-NEXT: v_bfe_u32 v30, v10, 1, 1
+; GFX1250-NEXT: v_bfe_u32 v26, v10, 3, 1
+; GFX1250-NEXT: v_dual_mov_b32 v1, v3 :: v_dual_lshrrev_b32 v2, 15, v22
+; GFX1250-NEXT: v_mov_b32_e32 v7, v3
+; GFX1250-NEXT: v_bfe_u32 v0, v22, 14, 1
+; GFX1250-NEXT: v_dual_mov_b32 v5, v3 :: v_dual_mov_b32 v11, v3
+; GFX1250-NEXT: v_dual_mov_b32 v9, v3 :: v_dual_mov_b32 v15, v3
+; GFX1250-NEXT: v_dual_mov_b32 v29, v3 :: v_dual_bitop2_b32 v28, 1, v10 bitop3:0x40
+; GFX1250-NEXT: v_bfe_u32 v18, v10, 7, 1
+; GFX1250-NEXT: v_bfe_u32 v14, v10, 9, 1
+; GFX1250-NEXT: v_bfe_u32 v8, v10, 10, 1
+; GFX1250-NEXT: v_bfe_u32 v16, v10, 6, 1
+; GFX1250-NEXT: v_bfe_u32 v20, v10, 4, 1
+; GFX1250-NEXT: v_bfe_u32 v24, v10, 2, 1
+; GFX1250-NEXT: v_bfe_u32 v10, v22, 11, 1
+; GFX1250-NEXT: v_dual_mov_b32 v13, v3 :: v_dual_mov_b32 v19, v3
+; GFX1250-NEXT: v_bfe_u32 v12, v22, 8, 1
+; GFX1250-NEXT: v_dual_mov_b32 v17, v3 :: v_dual_mov_b32 v23, v3
+; GFX1250-NEXT: v_dual_mov_b32 v21, v3 :: v_dual_mov_b32 v27, v3
+; GFX1250-NEXT: v_dual_mov_b32 v25, v3 :: v_dual_mov_b32 v31, v3
; GFX1250-NEXT: v_bfe_u32 v22, v22, 5, 1
; GFX1250-NEXT: s_clause 0x7
-; GFX1250-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:80
-; GFX1250-NEXT: global_store_b128 v1, v[4:7], s[0:1] offset:64
-; GFX1250-NEXT: global_store_b128 v1, v[8:11], s[0:1] offset:112
-; GFX1250-NEXT: global_store_b128 v1, v[12:15], s[0:1] offset:96
-; GFX1250-NEXT: global_store_b128 v1, v[16:19], s[0:1] offset:48
-; GFX1250-NEXT: global_store_b128 v1, v[20:23], s[0:1] offset:32
-; GFX1250-NEXT: global_store_b128 v1, v[24:27], s[0:1] offset:16
-; GFX1250-NEXT: global_store_b128 v1, v[28:31], s[0:1]
+; GFX1250-NEXT: global_store_b128 v3, v[0:3], s[0:1] offset:112
+; GFX1250-NEXT: global_store_b128 v3, v[4:7], s[0:1] offset:96
+; GFX1250-NEXT: global_store_b128 v3, v[8:11], s[0:1] offset:80
+; GFX1250-NEXT: global_store_b128 v3, v[12:15], s[0:1] offset:64
+; GFX1250-NEXT: global_store_b128 v3, v[16:19], s[0:1] offset:48
+; GFX1250-NEXT: global_store_b128 v3, v[20:23], s[0:1] offset:32
+; GFX1250-NEXT: global_store_b128 v3, v[24:27], s[0:1] offset:16
+; GFX1250-NEXT: global_store_b128 v3, v[28:31], s[0:1]
; GFX1250-NEXT: s_endpgm
%load = load <16 x i1>, ptr addrspace(4) %in
%ext = zext <16 x i1> %load to <16 x i64>
@@ -7106,65 +7092,64 @@ define amdgpu_kernel void @constant_sextload_v16i1_to_v16i64(ptr addrspace(1) %o
; GFX1250-NEXT: global_load_u16 v0, v32, s[2:3]
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: v_readfirstlane_b32 s3, v0
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1250-NEXT: v_mov_b32_e32 v28, s3
-; GFX1250-NEXT: s_lshr_b32 s2, s3, 14
-; GFX1250-NEXT: s_lshr_b32 s4, s3, 15
-; GFX1250-NEXT: s_lshr_b32 s10, s3, 10
-; GFX1250-NEXT: s_lshr_b32 s12, s3, 11
-; GFX1250-NEXT: s_lshr_b32 s6, s3, 12
-; GFX1250-NEXT: s_lshr_b32 s8, s3, 13
-; GFX1250-NEXT: s_lshr_b32 s14, s3, 8
-; GFX1250-NEXT: s_lshr_b32 s16, s3, 9
-; GFX1250-NEXT: s_lshr_b32 s18, s3, 6
-; GFX1250-NEXT: s_lshr_b32 s20, s3, 7
-; GFX1250-NEXT: s_lshr_b32 s22, s3, 4
-; GFX1250-NEXT: s_lshr_b32 s24, s3, 5
-; GFX1250-NEXT: s_lshr_b32 s26, s3, 2
-; GFX1250-NEXT: s_lshr_b32 s28, s3, 3
-; GFX1250-NEXT: s_lshr_b32 s30, s3, 1
-; GFX1250-NEXT: s_bfe_i64 s[12:13], s[12:13], 0x10000
-; GFX1250-NEXT: s_bfe_i64 s[10:11], s[10:11], 0x10000
-; GFX1250-NEXT: s_bfe_i64 s[2:3], s[2:3], 0x10000
-; GFX1250-NEXT: s_bfe_i64 s[4:5], s[4:5], 0x10000
+; GFX1250-NEXT: s_lshr_b32 s20, s3, 10
+; GFX1250-NEXT: s_lshr_b32 s22, s3, 11
+; GFX1250-NEXT: s_lshr_b32 s28, s3, 14
+; GFX1250-NEXT: s_lshr_b32 s30, s3, 15
+; GFX1250-NEXT: s_lshr_b32 s16, s3, 8
+; GFX1250-NEXT: s_lshr_b32 s18, s3, 9
+; GFX1250-NEXT: s_lshr_b32 s24, s3, 12
+; GFX1250-NEXT: s_lshr_b32 s26, s3, 13
+; GFX1250-NEXT: v_mov_b32_e32 v0, s3
+; GFX1250-NEXT: s_lshr_b32 s12, s3, 6
+; GFX1250-NEXT: s_lshr_b32 s14, s3, 7
+; GFX1250-NEXT: s_bfe_i64 s[30:31], s[30:31], 0x10000
+; GFX1250-NEXT: s_bfe_i64 s[28:29], s[28:29], 0x10000
+; GFX1250-NEXT: s_bfe_i64 s[22:23], s[22:23], 0x10000
+; GFX1250-NEXT: s_bfe_i64 s[20:21], s[20:21], 0x10000
+; GFX1250-NEXT: s_lshr_b32 s8, s3, 4
+; GFX1250-NEXT: s_lshr_b32 s10, s3, 5
+; GFX1250-NEXT: s_bfe_i64 s[26:27], s[26:27], 0x10000
+; GFX1250-NEXT: s_bfe_i64 s[24:25], s[24:25], 0x10000
+; GFX1250-NEXT: s_bfe_i64 s[18:19], s[18:19], 0x10000
; GFX1250-NEXT: s_bfe_i64 s[16:17], s[16:17], 0x10000
+; GFX1250-NEXT: s_lshr_b32 s4, s3, 2
+; GFX1250-NEXT: s_lshr_b32 s6, s3, 3
; GFX1250-NEXT: s_bfe_i64 s[14:15], s[14:15], 0x10000
+; GFX1250-NEXT: s_bfe_i64 s[12:13], s[12:13], 0x10000
+; GFX1250-NEXT: v_mov_b64_e32 v[20:21], s[20:21]
+; GFX1250-NEXT: v_mov_b64_e32 v[28:29], s[28:29]
+; GFX1250-NEXT: v_mov_b64_e32 v[30:31], s[30:31]
+; GFX1250-NEXT: v_mov_b64_e32 v[22:23], s[22:23]
+; GFX1250-NEXT: s_lshr_b32 s2, s3, 1
+; GFX1250-NEXT: s_bfe_i64 s[10:11], s[10:11], 0x10000
; GFX1250-NEXT: s_bfe_i64 s[8:9], s[8:9], 0x10000
+; GFX1250-NEXT: v_mov_b64_e32 v[16:17], s[16:17]
+; GFX1250-NEXT: v_mov_b64_e32 v[24:25], s[24:25]
+; GFX1250-NEXT: v_mov_b64_e32 v[26:27], s[26:27]
+; GFX1250-NEXT: v_mov_b64_e32 v[18:19], s[18:19]
; GFX1250-NEXT: s_bfe_i64 s[6:7], s[6:7], 0x10000
-; GFX1250-NEXT: s_bfe_i64 s[20:21], s[20:21], 0x10000
-; GFX1250-NEXT: s_bfe_i64 s[18:19], s[18:19], 0x10000
-; GFX1250-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
-; GFX1250-NEXT: v_dual_mov_b32 v2, s4 :: v_dual_mov_b32 v3, s5
-; GFX1250-NEXT: v_dual_mov_b32 v8, s10 :: v_dual_mov_b32 v9, s11
-; GFX1250-NEXT: v_dual_mov_b32 v10, s12 :: v_dual_mov_b32 v11, s13
-; GFX1250-NEXT: v_bfe_i32 v28, v28, 0, 1
-; GFX1250-NEXT: s_bfe_i64 s[24:25], s[24:25], 0x10000
-; GFX1250-NEXT: s_bfe_i64 s[22:23], s[22:23], 0x10000
-; GFX1250-NEXT: v_dual_mov_b32 v4, s6 :: v_dual_mov_b32 v5, s7
-; GFX1250-NEXT: v_dual_mov_b32 v6, s8 :: v_dual_mov_b32 v7, s9
-; GFX1250-NEXT: v_dual_mov_b32 v12, s14 :: v_dual_mov_b32 v13, s15
-; GFX1250-NEXT: v_dual_mov_b32 v14, s16 :: v_dual_mov_b32 v15, s17
-; GFX1250-NEXT: s_bfe_i64 s[28:29], s[28:29], 0x10000
-; GFX1250-NEXT: s_bfe_i64 s[26:27], s[26:27], 0x10000
-; GFX1250-NEXT: v_dual_mov_b32 v16, s18 :: v_dual_mov_b32 v17, s19
-; GFX1250-NEXT: v_dual_mov_b32 v18, s20 :: v_dual_mov_b32 v19, s21
-; GFX1250-NEXT: s_bfe_i64 s[30:31], s[30:31], 0x10000
-; GFX1250-NEXT: v_dual_mov_b32 v20, s22 :: v_dual_mov_b32 v21, s23
-; GFX1250-NEXT: v_dual_mov_b32 v22, s24 :: v_dual_mov_b32 v23, s25
-; GFX1250-NEXT: v_dual_mov_b32 v24, s26 :: v_dual_mov_b32 v25, s27
-; GFX1250-NEXT: v_dual_mov_b32 v26, s28 :: v_dual_mov_b32 v27, s29
-; GFX1250-NEXT: v_dual_mov_b32 v30, s30 :: v_dual_mov_b32 v31, s31
+; GFX1250-NEXT: s_bfe_i64 s[4:5], s[4:5], 0x10000
+; GFX1250-NEXT: v_mov_b64_e32 v[12:13], s[12:13]
+; GFX1250-NEXT: v_mov_b64_e32 v[14:15], s[14:15]
+; GFX1250-NEXT: v_bfe_i32 v0, v0, 0, 1
+; GFX1250-NEXT: s_bfe_i64 s[2:3], s[2:3], 0x10000
+; GFX1250-NEXT: v_mov_b64_e32 v[8:9], s[8:9]
+; GFX1250-NEXT: v_mov_b64_e32 v[10:11], s[10:11]
+; GFX1250-NEXT: v_mov_b64_e32 v[4:5], s[4:5]
+; GFX1250-NEXT: v_mov_b64_e32 v[6:7], s[6:7]
+; GFX1250-NEXT: v_mov_b64_e32 v[2:3], s[2:3]
; GFX1250-NEXT: s_clause 0x1
-; GFX1250-NEXT: global_store_b128 v32, v[0:3], s[0:1] offset:112
-; GFX1250-NEXT: global_store_b128 v32, v[4:7], s[0:1] offset:96
-; GFX1250-NEXT: v_ashrrev_i32_e32 v29, 31, v28
+; GFX1250-NEXT: global_store_b128 v32, v[28:31], s[0:1] offset:112
+; GFX1250-NEXT: global_store_b128 v32, v[24:27], s[0:1] offset:96
+; GFX1250-NEXT: v_ashrrev_i32_e32 v1, 31, v0
; GFX1250-NEXT: s_clause 0x5
-; GFX1250-NEXT: global_store_b128 v32, v[8:11], s[0:1] offset:80
-; GFX1250-NEXT: global_store_b128 v32, v[12:15], s[0:1] offset:64
-; GFX1250-NEXT: global_store_b128 v32, v[16:19], s[0:1] offset:48
-; GFX1250-NEXT: global_store_b128 v32, v[20:23], s[0:1] offset:32
-; GFX1250-NEXT: global_store_b128 v32, v[24:27], s[0:1] offset:16
-; GFX1250-NEXT: global_store_b128 v32, v[28:31], s[0:1]
+; GFX1250-NEXT: global_store_b128 v32, v[20:23], s[0:1] offset:80
+; GFX1250-NEXT: global_store_b128 v32, v[16:19], s[0:1] offset:64
+; GFX1250-NEXT: global_store_b128 v32, v[12:15], s[0:1] offset:48
+; GFX1250-NEXT: global_store_b128 v32, v[8:11], s[0:1] offset:32
+; GFX1250-NEXT: global_store_b128 v32, v[4:7], s[0:1] offset:16
+; GFX1250-NEXT: global_store_b128 v32, v[0:3], s[0:1]
; GFX1250-NEXT: s_endpgm
%load = load <16 x i1>, ptr addrspace(4) %in
%ext = sext <16 x i1> %load to <16 x i64>
@@ -7727,11 +7712,11 @@ define amdgpu_kernel void @constant_zextload_v32i1_to_v32i64(ptr addrspace(1) %o
; GFX1250-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:192
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: v_dual_mov_b32 v0, s4 :: v_dual_mov_b32 v2, s3
-; GFX1250-NEXT: s_bfe_u32 s3, s2, 0x10014
-; GFX1250-NEXT: s_bfe_u32 s4, s2, 0x10015
+; GFX1250-NEXT: s_bfe_u32 s3, s2, 0x10015
+; GFX1250-NEXT: s_bfe_u32 s4, s2, 0x10014
; GFX1250-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:176
; GFX1250-NEXT: s_wait_xcnt 0x0
-; GFX1250-NEXT: v_dual_mov_b32 v0, s3 :: v_dual_mov_b32 v2, s4
+; GFX1250-NEXT: v_dual_mov_b32 v0, s4 :: v_dual_mov_b32 v2, s3
; GFX1250-NEXT: s_bfe_u32 s3, s2, 0x10013
; GFX1250-NEXT: s_bfe_u32 s4, s2, 0x10012
; GFX1250-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:160
@@ -8499,87 +8484,87 @@ define amdgpu_kernel void @constant_sextload_v32i1_to_v32i64(ptr addrspace(1) %o
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: s_load_b32 s2, s[2:3], 0x0
; GFX1250-NEXT: s_wait_kmcnt 0x0
-; GFX1250-NEXT: s_lshr_b32 s34, s2, 30
-; GFX1250-NEXT: s_lshr_b32 s36, s2, 31
-; GFX1250-NEXT: s_lshr_b32 s38, s2, 28
-; GFX1250-NEXT: s_lshr_b32 s40, s2, 29
-; GFX1250-NEXT: s_lshr_b32 s42, s2, 26
-; GFX1250-NEXT: s_lshr_b32 s44, s2, 27
-; GFX1250-NEXT: s_bfe_i64 s[34:35], s[34:35], 0x10000
-; GFX1250-NEXT: s_bfe_i64 s[36:37], s[36:37], 0x10000
-; GFX1250-NEXT: s_lshr_b32 s46, s2, 24
-; GFX1250-NEXT: s_lshr_b32 s48, s2, 25
-; GFX1250-NEXT: s_bfe_i64 s[40:41], s[40:41], 0x10000
-; GFX1250-NEXT: s_bfe_i64 s[38:39], s[38:39], 0x10000
-; GFX1250-NEXT: v_dual_mov_b32 v24, 0 :: v_dual_mov_b32 v0, s34
-; GFX1250-NEXT: s_bfe_i64 s[44:45], s[44:45], 0x10000
-; GFX1250-NEXT: s_bfe_i64 s[42:43], s[42:43], 0x10000
-; GFX1250-NEXT: v_dual_mov_b32 v1, s35 :: v_dual_mov_b32 v2, s36
-; GFX1250-NEXT: v_dual_mov_b32 v3, s37 :: v_dual_mov_b32 v4, s38
-; GFX1250-NEXT: s_lshr_b32 s26, s2, 22
+; GFX1250-NEXT: s_lshr_b32 s64, s2, 30
+; GFX1250-NEXT: s_lshr_b32 s66, s2, 31
+; GFX1250-NEXT: s_lshr_b32 s60, s2, 28
+; GFX1250-NEXT: s_lshr_b32 s62, s2, 29
+; GFX1250-NEXT: s_lshr_b32 s56, s2, 26
+; GFX1250-NEXT: s_lshr_b32 s58, s2, 27
+; GFX1250-NEXT: s_bfe_i64 s[66:67], s[66:67], 0x10000
+; GFX1250-NEXT: s_bfe_i64 s[64:65], s[64:65], 0x10000
+; GFX1250-NEXT: s_lshr_b32 s52, s2, 24
+; GFX1250-NEXT: s_lshr_b32 s54, s2, 25
+; GFX1250-NEXT: s_bfe_i64 s[62:63], s[62:63], 0x10000
+; GFX1250-NEXT: s_bfe_i64 s[60:61], s[60:61], 0x10000
+; GFX1250-NEXT: v_dual_mov_b32 v24, 0 :: v_dual_mov_b32 v0, s64
+; GFX1250-NEXT: s_bfe_i64 s[58:59], s[58:59], 0x10000
+; GFX1250-NEXT: s_bfe_i64 s[56:57], s[56:57], 0x10000
+; GFX1250-NEXT: v_dual_mov_b32 v1, s65 :: v_dual_mov_b32 v2, s66
+; GFX1250-NEXT: v_dual_mov_b32 v3, s67 :: v_dual_mov_b32 v4, s60
+; GFX1250-NEXT: s_lshr_b32 s48, s2, 22
; GFX1250-NEXT: s_lshr_b32 s50, s2, 23
-; GFX1250-NEXT: s_bfe_i64 s[48:49], s[48:49], 0x10000
-; GFX1250-NEXT: s_bfe_i64 s[46:47], s[46:47], 0x10000
-; GFX1250-NEXT: v_dual_mov_b32 v5, s39 :: v_dual_mov_b32 v6, s40
-; GFX1250-NEXT: v_dual_mov_b32 v7, s41 :: v_dual_mov_b32 v8, s42
-; GFX1250-NEXT: s_lshr_b32 s52, s2, 20
-; GFX1250-NEXT: s_lshr_b32 s54, s2, 21
-; GFX1250-NEXT: v_dual_mov_b32 v9, s43 :: v_dual_mov_b32 v10, s44
-; GFX1250-NEXT: v_dual_mov_b32 v11, s45 :: v_dual_mov_b32 v12, s46
-; GFX1250-NEXT: s_lshr_b32 s56, s2, 18
-; GFX1250-NEXT: s_lshr_b32 s58, s2, 19
-; GFX1250-NEXT: s_bfe_i64 s[50:51], s[50:51], 0x10000
-; GFX1250-NEXT: v_dual_mov_b32 v13, s47 :: v_dual_mov_b32 v14, s48
-; GFX1250-NEXT: s_bfe_i64 s[26:27], s[26:27], 0x10000
-; GFX1250-NEXT: v_mov_b32_e32 v15, s49
-; GFX1250-NEXT: s_lshr_b32 s60, s2, 16
-; GFX1250-NEXT: s_lshr_b32 s62, s2, 17
; GFX1250-NEXT: s_bfe_i64 s[54:55], s[54:55], 0x10000
; GFX1250-NEXT: s_bfe_i64 s[52:53], s[52:53], 0x10000
-; GFX1250-NEXT: s_lshr_b32 s64, s2, 14
-; GFX1250-NEXT: s_lshr_b32 s66, s2, 15
-; GFX1250-NEXT: s_bfe_i64 s[58:59], s[58:59], 0x10000
-; GFX1250-NEXT: s_bfe_i64 s[56:57], s[56:57], 0x10000
+; GFX1250-NEXT: v_dual_mov_b32 v5, s61 :: v_dual_mov_b32 v6, s62
+; GFX1250-NEXT: v_dual_mov_b32 v7, s63 :: v_dual_mov_b32 v8, s56
+; GFX1250-NEXT: s_lshr_b32 s44, s2, 20
+; GFX1250-NEXT: s_lshr_b32 s46, s2, 21
+; GFX1250-NEXT: v_dual_mov_b32 v9, s57 :: v_dual_mov_b32 v10, s58
+; GFX1250-NEXT: v_dual_mov_b32 v11, s59 :: v_dual_mov_b32 v12, s52
+; GFX1250-NEXT: s_lshr_b32 s40, s2, 18
+; GFX1250-NEXT: s_lshr_b32 s42, s2, 19
+; GFX1250-NEXT: s_bfe_i64 s[50:51], s[50:51], 0x10000
+; GFX1250-NEXT: s_bfe_i64 s[48:49], s[48:49], 0x10000
+; GFX1250-NEXT: v_dual_mov_b32 v13, s53 :: v_dual_mov_b32 v14, s54
+; GFX1250-NEXT: v_mov_b32_e32 v15, s55
+; GFX1250-NEXT: s_lshr_b32 s24, s2, 16
+; GFX1250-NEXT: s_lshr_b32 s38, s2, 17
+; GFX1250-NEXT: s_bfe_i64 s[46:47], s[46:47], 0x10000
+; GFX1250-NEXT: s_bfe_i64 s[44:45], s[44:45], 0x10000
+; GFX1250-NEXT: s_lshr_b32 s36, s2, 14
+; GFX1250-NEXT: s_lshr_b32 s34, s2, 15
+; GFX1250-NEXT: s_bfe_i64 s[42:43], s[42:43], 0x10000
+; GFX1250-NEXT: s_bfe_i64 s[40:41], s[40:41], 0x10000
; GFX1250-NEXT: s_clause 0x3
; GFX1250-NEXT: global_store_b128 v24, v[0:3], s[0:1] offset:240
; GFX1250-NEXT: global_store_b128 v24, v[4:7], s[0:1] offset:224
; GFX1250-NEXT: global_store_b128 v24, v[8:11], s[0:1] offset:208
; GFX1250-NEXT: global_store_b128 v24, v[12:15], s[0:1] offset:192
; GFX1250-NEXT: s_wait_xcnt 0x3
-; GFX1250-NEXT: v_dual_mov_b32 v0, s26 :: v_dual_mov_b32 v1, s27
+; GFX1250-NEXT: v_dual_mov_b32 v0, s48 :: v_dual_mov_b32 v1, s49
; GFX1250-NEXT: v_dual_mov_b32 v2, s50 :: v_dual_mov_b32 v3, s51
; GFX1250-NEXT: s_wait_xcnt 0x2
-; GFX1250-NEXT: v_mov_b32_e32 v4, s52
+; GFX1250-NEXT: v_mov_b32_e32 v4, s44
+; GFX1250-NEXT: s_lshr_b32 s26, s2, 10
+; GFX1250-NEXT: s_lshr_b32 s22, s2, 11
; GFX1250-NEXT: s_lshr_b32 s30, s2, 12
; GFX1250-NEXT: s_lshr_b32 s28, s2, 13
-; GFX1250-NEXT: s_lshr_b32 s24, s2, 10
-; GFX1250-NEXT: s_lshr_b32 s22, s2, 11
-; GFX1250-NEXT: s_bfe_i64 s[62:63], s[62:63], 0x10000
-; GFX1250-NEXT: s_bfe_i64 s[60:61], s[60:61], 0x10000
-; GFX1250-NEXT: v_dual_mov_b32 v5, s53 :: v_dual_mov_b32 v6, s54
+; GFX1250-NEXT: s_bfe_i64 s[38:39], s[38:39], 0x10000
+; GFX1250-NEXT: s_bfe_i64 s[24:25], s[24:25], 0x10000
+; GFX1250-NEXT: v_dual_mov_b32 v5, s45 :: v_dual_mov_b32 v6, s46
; GFX1250-NEXT: s_wait_xcnt 0x1
-; GFX1250-NEXT: v_dual_mov_b32 v7, s55 :: v_dual_mov_b32 v8, s56
+; GFX1250-NEXT: v_dual_mov_b32 v7, s47 :: v_dual_mov_b32 v8, s40
; GFX1250-NEXT: s_lshr_b32 s20, s2, 8
; GFX1250-NEXT: s_lshr_b32 s18, s2, 9
-; GFX1250-NEXT: s_bfe_i64 s[66:67], s[66:67], 0x10000
-; GFX1250-NEXT: s_bfe_i64 s[64:65], s[64:65], 0x10000
-; GFX1250-NEXT: v_dual_mov_b32 v9, s57 :: v_dual_mov_b32 v10, s58
+; GFX1250-NEXT: s_bfe_i64 s[34:35], s[34:35], 0x10000
+; GFX1250-NEXT: s_bfe_i64 s[36:37], s[36:37], 0x10000
+; GFX1250-NEXT: v_dual_mov_b32 v9, s41 :: v_dual_mov_b32 v10, s42
; GFX1250-NEXT: s_wait_xcnt 0x0
-; GFX1250-NEXT: v_dual_mov_b32 v11, s59 :: v_dual_mov_b32 v12, s60
+; GFX1250-NEXT: v_dual_mov_b32 v11, s43 :: v_dual_mov_b32 v12, s24
; GFX1250-NEXT: s_lshr_b32 s16, s2, 6
; GFX1250-NEXT: s_lshr_b32 s14, s2, 7
-; GFX1250-NEXT: s_bfe_i64 s[22:23], s[22:23], 0x10000
-; GFX1250-NEXT: s_bfe_i64 s[24:25], s[24:25], 0x10000
; GFX1250-NEXT: s_bfe_i64 s[28:29], s[28:29], 0x10000
; GFX1250-NEXT: s_bfe_i64 s[30:31], s[30:31], 0x10000
-; GFX1250-NEXT: v_dual_mov_b32 v13, s61 :: v_dual_mov_b32 v14, s62
-; GFX1250-NEXT: v_dual_mov_b32 v15, s63 :: v_dual_mov_b32 v16, s64
+; GFX1250-NEXT: s_bfe_i64 s[22:23], s[22:23], 0x10000
+; GFX1250-NEXT: s_bfe_i64 s[26:27], s[26:27], 0x10000
+; GFX1250-NEXT: v_dual_mov_b32 v13, s25 :: v_dual_mov_b32 v14, s38
+; GFX1250-NEXT: v_dual_mov_b32 v15, s39 :: v_dual_mov_b32 v16, s36
; GFX1250-NEXT: s_lshr_b32 s12, s2, 4
; GFX1250-NEXT: s_lshr_b32 s10, s2, 5
; GFX1250-NEXT: s_bfe_i64 s[18:19], s[18:19], 0x10000
; GFX1250-NEXT: s_bfe_i64 s[20:21], s[20:21], 0x10000
-; GFX1250-NEXT: v_dual_mov_b32 v17, s65 :: v_dual_mov_b32 v18, s66
-; GFX1250-NEXT: v_dual_mov_b32 v19, s67 :: v_dual_mov_b32 v20, s30
+; GFX1250-NEXT: v_dual_mov_b32 v17, s37 :: v_dual_mov_b32 v18, s34
+; GFX1250-NEXT: v_dual_mov_b32 v19, s35 :: v_dual_mov_b32 v20, s30
; GFX1250-NEXT: s_lshr_b32 s8, s2, 2
; GFX1250-NEXT: s_lshr_b32 s6, s2, 3
; GFX1250-NEXT: s_bfe_i64 s[14:15], s[14:15], 0x10000
@@ -8594,11 +8579,11 @@ define amdgpu_kernel void @constant_sextload_v32i1_to_v32i64(ptr addrspace(1) %o
; GFX1250-NEXT: global_store_b128 v24, v[16:19], s[0:1] offset:112
; GFX1250-NEXT: global_store_b128 v24, v[20:23], s[0:1] offset:96
; GFX1250-NEXT: s_wait_xcnt 0x5
-; GFX1250-NEXT: v_dual_mov_b32 v0, s24 :: v_dual_mov_b32 v1, s25
+; GFX1250-NEXT: v_dual_mov_b32 v0, s26 :: v_dual_mov_b32 v1, s27
; GFX1250-NEXT: v_dual_mov_b32 v2, s22 :: v_dual_mov_b32 v3, s23
; GFX1250-NEXT: s_wait_xcnt 0x4
; GFX1250-NEXT: v_mov_b32_e32 v4, s20
-; GFX1250-NEXT: s_lshr_b32 s68, s2, 1
+; GFX1250-NEXT: s_lshr_b32 s4, s2, 1
; GFX1250-NEXT: s_bfe_i64 s[10:11], s[10:11], 0x10000
; GFX1250-NEXT: s_bfe_i64 s[12:13], s[12:13], 0x10000
; GFX1250-NEXT: v_dual_mov_b32 v5, s21 :: v_dual_mov_b32 v6, s18
@@ -8609,16 +8594,16 @@ define amdgpu_kernel void @constant_sextload_v32i1_to_v32i64(ptr addrspace(1) %o
; GFX1250-NEXT: v_dual_mov_b32 v9, s17 :: v_dual_mov_b32 v10, s14
; GFX1250-NEXT: s_wait_xcnt 0x2
; GFX1250-NEXT: v_dual_mov_b32 v11, s15 :: v_dual_mov_b32 v12, s12
-; GFX1250-NEXT: s_bfe_i64 s[4:5], s[2:3], 0x10000
-; GFX1250-NEXT: s_bfe_i64 s[2:3], s[68:69], 0x10000
+; GFX1250-NEXT: s_bfe_i64 s[2:3], s[2:3], 0x10000
+; GFX1250-NEXT: s_bfe_i64 s[4:5], s[4:5], 0x10000
; GFX1250-NEXT: v_dual_mov_b32 v13, s13 :: v_dual_mov_b32 v14, s10
; GFX1250-NEXT: s_wait_xcnt 0x1
; GFX1250-NEXT: v_dual_mov_b32 v15, s11 :: v_dual_mov_b32 v16, s8
; GFX1250-NEXT: v_dual_mov_b32 v17, s9 :: v_dual_mov_b32 v18, s6
; GFX1250-NEXT: s_wait_xcnt 0x0
-; GFX1250-NEXT: v_dual_mov_b32 v19, s7 :: v_dual_mov_b32 v20, s4
-; GFX1250-NEXT: v_dual_mov_b32 v21, s5 :: v_dual_mov_b32 v22, s2
-; GFX1250-NEXT: v_mov_b32_e32 v23, s3
+; GFX1250-NEXT: v_dual_mov_b32 v19, s7 :: v_dual_mov_b32 v20, s2
+; GFX1250-NEXT: v_dual_mov_b32 v21, s3 :: v_dual_mov_b32 v22, s4
+; GFX1250-NEXT: v_mov_b32_e32 v23, s5
; GFX1250-NEXT: s_clause 0x5
; GFX1250-NEXT: global_store_b128 v24, v[0:3], s[0:1] offset:80
; GFX1250-NEXT: global_store_b128 v24, v[4:7], s[0:1] offset:64
@@ -9661,20 +9646,16 @@ define amdgpu_kernel void @constant_zextload_v64i1_to_v64i64(ptr addrspace(1) %o
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: s_load_b64 s[2:3], s[2:3], 0x0
; GFX1250-NEXT: s_wait_kmcnt 0x0
-; GFX1250-NEXT: s_bfe_u32 s4, s3, 0x10014
+; GFX1250-NEXT: s_bfe_u32 s4, s3, 0x1001e
; GFX1250-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(VALU_DEP_1)
; GFX1250-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s4
-; GFX1250-NEXT: s_bfe_u32 s5, s3, 0x10015
-; GFX1250-NEXT: s_lshr_b32 s4, s3, 31
-; GFX1250-NEXT: v_dual_mov_b32 v2, s5 :: v_dual_mov_b32 v3, v1
-; GFX1250-NEXT: s_bfe_u32 s5, s3, 0x1001e
-; GFX1250-NEXT: s_bfe_u32 s6, s2, 0x10004
-; GFX1250-NEXT: s_and_b32 s7, s2, 1
-; GFX1250-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:416
-; GFX1250-NEXT: s_wait_xcnt 0x0
-; GFX1250-NEXT: v_dual_mov_b32 v0, s5 :: v_dual_mov_b32 v2, s4
+; GFX1250-NEXT: s_lshr_b32 s5, s3, 31
; GFX1250-NEXT: s_bfe_u32 s4, s3, 0x1001d
+; GFX1250-NEXT: v_dual_mov_b32 v2, s5 :: v_dual_mov_b32 v3, v1
; GFX1250-NEXT: s_bfe_u32 s5, s3, 0x1001c
+; GFX1250-NEXT: s_bfe_u32 s6, s2, 0x10004
+; GFX1250-NEXT: s_bfe_u32 s7, s2, 0x10002
+; GFX1250-NEXT: s_and_b32 s8, s2, 1
; GFX1250-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:496
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: v_dual_mov_b32 v0, s5 :: v_dual_mov_b32 v2, s4
@@ -9693,9 +9674,14 @@ define amdgpu_kernel void @constant_zextload_v64i1_to_v64i64(ptr addrspace(1) %o
; GFX1250-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:448
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: v_dual_mov_b32 v0, s5 :: v_dual_mov_b32 v2, s4
+; GFX1250-NEXT: s_bfe_u32 s4, s3, 0x10015
+; GFX1250-NEXT: s_bfe_u32 s5, s3, 0x10014
+; GFX1250-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:432
+; GFX1250-NEXT: s_wait_xcnt 0x0
+; GFX1250-NEXT: v_dual_mov_b32 v0, s5 :: v_dual_mov_b32 v2, s4
; GFX1250-NEXT: s_bfe_u32 s4, s3, 0x10013
; GFX1250-NEXT: s_bfe_u32 s5, s3, 0x10012
-; GFX1250-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:432
+; GFX1250-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:416
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: v_dual_mov_b32 v0, s5 :: v_dual_mov_b32 v2, s4
; GFX1250-NEXT: s_bfe_u32 s4, s3, 0x10011
@@ -9763,11 +9749,11 @@ define amdgpu_kernel void @constant_zextload_v64i1_to_v64i64(ptr addrspace(1) %o
; GFX1250-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:192
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: v_dual_mov_b32 v0, s5 :: v_dual_mov_b32 v2, s4
-; GFX1250-NEXT: s_bfe_u32 s4, s2, 0x10014
-; GFX1250-NEXT: s_bfe_u32 s5, s2, 0x10015
+; GFX1250-NEXT: s_bfe_u32 s4, s2, 0x10015
+; GFX1250-NEXT: s_bfe_u32 s5, s2, 0x10014
; GFX1250-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:176
; GFX1250-NEXT: s_wait_xcnt 0x0
-; GFX1250-NEXT: v_dual_mov_b32 v0, s4 :: v_dual_mov_b32 v2, s5
+; GFX1250-NEXT: v_dual_mov_b32 v0, s5 :: v_dual_mov_b32 v2, s4
; GFX1250-NEXT: s_bfe_u32 s4, s2, 0x10013
; GFX1250-NEXT: s_bfe_u32 s5, s2, 0x10012
; GFX1250-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:160
@@ -9813,16 +9799,15 @@ define amdgpu_kernel void @constant_zextload_v64i1_to_v64i64(ptr addrspace(1) %o
; GFX1250-NEXT: v_dual_mov_b32 v0, s6 :: v_dual_mov_b32 v2, s3
; GFX1250-NEXT: s_bfe_u32 s3, s2, 0x10003
; GFX1250-NEXT: s_bfe_u32 s6, s2, 0x10001
-; GFX1250-NEXT: s_bfe_u32 s2, s2, 0x10002
; GFX1250-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:32
; GFX1250-NEXT: s_wait_xcnt 0x0
-; GFX1250-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v2, s3
+; GFX1250-NEXT: v_dual_mov_b32 v0, s7 :: v_dual_mov_b32 v2, s3
; GFX1250-NEXT: s_and_b64 s[2:3], s[4:5], 1
; GFX1250-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1250-NEXT: v_dual_mov_b32 v4, s2 :: v_dual_mov_b32 v5, s3
; GFX1250-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:16
; GFX1250-NEXT: s_wait_xcnt 0x0
-; GFX1250-NEXT: v_dual_mov_b32 v0, s7 :: v_dual_mov_b32 v2, s6
+; GFX1250-NEXT: v_dual_mov_b32 v0, s8 :: v_dual_mov_b32 v2, s6
; GFX1250-NEXT: s_clause 0x1
; GFX1250-NEXT: global_store_b128 v1, v[4:7], s[0:1] offset:256
; GFX1250-NEXT: global_store_b128 v1, v[0:3], s[0:1]
@@ -11207,266 +11192,284 @@ define amdgpu_kernel void @constant_sextload_v64i1_to_v64i64(ptr addrspace(1) %o
; GFX1250-LABEL: constant_sextload_v64i1_to_v64i64:
; GFX1250: ; %bb.0:
; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1250-NEXT: ; implicit-def: $vgpr13 : SGPR spill to VGPR lane
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: s_load_b64 s[10:11], s[2:3], 0x0
; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: s_lshr_b32 s2, s11, 1
+; GFX1250-NEXT: s_lshr_b32 s40, s11, 18
+; GFX1250-NEXT: v_writelane_b32 v13, s2, 0
+; GFX1250-NEXT: s_lshr_b32 s44, s11, 19
+; GFX1250-NEXT: s_lshr_b32 s48, s11, 20
+; GFX1250-NEXT: s_lshr_b32 s58, s11, 22
+; GFX1250-NEXT: s_lshr_b32 s62, s11, 23
+; GFX1250-NEXT: v_writelane_b32 v13, s3, 1
+; GFX1250-NEXT: s_lshr_b32 s2, s11, 2
+; GFX1250-NEXT: s_lshr_b32 s70, s11, 25
; GFX1250-NEXT: s_lshr_b32 s96, s11, 30
; GFX1250-NEXT: s_lshr_b32 s98, s11, 31
-; GFX1250-NEXT: s_lshr_b32 s92, s11, 28
-; GFX1250-NEXT: s_lshr_b32 s94, s11, 29
-; GFX1250-NEXT: s_lshr_b32 s78, s11, 26
-; GFX1250-NEXT: s_lshr_b32 s88, s11, 27
+; GFX1250-NEXT: v_writelane_b32 v13, s2, 2
+; GFX1250-NEXT: s_lshr_b32 s54, s11, 21
+; GFX1250-NEXT: s_lshr_b32 s86, s11, 28
+; GFX1250-NEXT: s_lshr_b32 s92, s11, 29
+; GFX1250-NEXT: s_lshr_b32 s66, s11, 24
+; GFX1250-NEXT: v_writelane_b32 v13, s3, 3
+; GFX1250-NEXT: s_lshr_b32 s2, s11, 3
+; GFX1250-NEXT: s_lshr_b32 s74, s11, 26
+; GFX1250-NEXT: s_lshr_b32 s82, s11, 27
; GFX1250-NEXT: s_bfe_i64 s[96:97], s[96:97], 0x10000
+; GFX1250-NEXT: v_writelane_b32 v13, s2, 4
; GFX1250-NEXT: s_bfe_i64 s[100:101], s[98:99], 0x10000
-; GFX1250-NEXT: s_lshr_b32 s66, s11, 24
-; GFX1250-NEXT: s_lshr_b32 s74, s11, 25
+; GFX1250-NEXT: s_bfe_i64 s[70:71], s[70:71], 0x10000
+; GFX1250-NEXT: s_bfe_i64 s[58:59], s[58:59], 0x10000
+; GFX1250-NEXT: s_bfe_i64 s[62:63], s[62:63], 0x10000
+; GFX1250-NEXT: v_writelane_b32 v13, s3, 5
+; GFX1250-NEXT: s_lshr_b32 s2, s11, 4
+; GFX1250-NEXT: s_bfe_i64 s[48:49], s[48:49], 0x10000
+; GFX1250-NEXT: s_bfe_i64 s[40:41], s[40:41], 0x10000
+; GFX1250-NEXT: s_bfe_i64 s[44:45], s[44:45], 0x10000
+; GFX1250-NEXT: v_writelane_b32 v13, s2, 6
+; GFX1250-NEXT: s_bfe_i64 s[86:87], s[86:87], 0x10000
; GFX1250-NEXT: s_bfe_i64 s[92:93], s[92:93], 0x10000
-; GFX1250-NEXT: s_bfe_i64 s[94:95], s[94:95], 0x10000
-; GFX1250-NEXT: v_dual_mov_b32 v8, 0 :: v_dual_mov_b32 v0, s96
-; GFX1250-NEXT: s_lshr_b32 s56, s11, 22
-; GFX1250-NEXT: s_lshr_b32 s62, s11, 23
+; GFX1250-NEXT: s_bfe_i64 s[54:55], s[54:55], 0x10000
+; GFX1250-NEXT: v_dual_mov_b32 v12, 0 :: v_dual_mov_b32 v0, s96
+; GFX1250-NEXT: v_writelane_b32 v13, s3, 7
+; GFX1250-NEXT: s_lshr_b32 s24, s11, 16
+; GFX1250-NEXT: s_lshr_b32 s34, s11, 13
+; GFX1250-NEXT: s_lshr_b32 s36, s11, 14
+; GFX1250-NEXT: s_lshr_b32 s38, s11, 15
; GFX1250-NEXT: v_dual_mov_b32 v1, s97 :: v_dual_mov_b32 v2, s100
-; GFX1250-NEXT: v_dual_mov_b32 v3, s101 :: v_dual_mov_b32 v4, s92
-; GFX1250-NEXT: s_bfe_i64 s[78:79], s[78:79], 0x10000
-; GFX1250-NEXT: s_bfe_i64 s[88:89], s[88:89], 0x10000
-; GFX1250-NEXT: s_lshr_b32 s44, s11, 20
-; GFX1250-NEXT: s_lshr_b32 s52, s11, 21
-; GFX1250-NEXT: s_lshr_b32 s30, s11, 18
-; GFX1250-NEXT: s_lshr_b32 s40, s11, 19
-; GFX1250-NEXT: s_lshr_b32 s18, s11, 16
-; GFX1250-NEXT: s_lshr_b32 s26, s11, 17
-; GFX1250-NEXT: s_lshr_b32 s2, s11, 14
-; GFX1250-NEXT: s_lshr_b32 s4, s11, 15
-; GFX1250-NEXT: v_dual_mov_b32 v5, s93 :: v_dual_mov_b32 v6, s94
-; GFX1250-NEXT: v_dual_mov_b32 v7, s95 :: v_dual_mov_b32 v10, s78
-; GFX1250-NEXT: s_bfe_i64 s[66:67], s[66:67], 0x10000
+; GFX1250-NEXT: v_dual_mov_b32 v3, s101 :: v_dual_mov_b32 v4, s86
; GFX1250-NEXT: s_bfe_i64 s[74:75], s[74:75], 0x10000
-; GFX1250-NEXT: s_lshr_b32 s6, s11, 12
-; GFX1250-NEXT: s_lshr_b32 s8, s11, 13
-; GFX1250-NEXT: v_dual_mov_b32 v11, s79 :: v_dual_mov_b32 v12, s88
-; GFX1250-NEXT: v_dual_mov_b32 v13, s89 :: v_dual_mov_b32 v14, s66
-; GFX1250-NEXT: s_bfe_i64 s[56:57], s[56:57], 0x10000
-; GFX1250-NEXT: s_bfe_i64 s[62:63], s[62:63], 0x10000
-; GFX1250-NEXT: s_lshr_b32 s12, s11, 10
-; GFX1250-NEXT: s_lshr_b32 s14, s11, 11
-; GFX1250-NEXT: v_dual_mov_b32 v15, s67 :: v_dual_mov_b32 v16, s74
-; GFX1250-NEXT: v_dual_mov_b32 v17, s75 :: v_dual_mov_b32 v18, s56
-; GFX1250-NEXT: s_bfe_i64 s[44:45], s[44:45], 0x10000
-; GFX1250-NEXT: s_bfe_i64 s[52:53], s[52:53], 0x10000
-; GFX1250-NEXT: s_bfe_i64 s[30:31], s[30:31], 0x10000
-; GFX1250-NEXT: s_bfe_i64 s[40:41], s[40:41], 0x10000
-; GFX1250-NEXT: s_bfe_i64 s[18:19], s[18:19], 0x10000
+; GFX1250-NEXT: s_bfe_i64 s[82:83], s[82:83], 0x10000
+; GFX1250-NEXT: s_lshr_b32 s8, s10, 4
+; GFX1250-NEXT: s_lshr_b32 s6, s10, 5
+; GFX1250-NEXT: s_bfe_i64 s[66:67], s[66:67], 0x10000
+; GFX1250-NEXT: s_lshr_b32 s4, s10, 6
+; GFX1250-NEXT: v_dual_mov_b32 v17, s71 :: v_dual_mov_b32 v18, s58
+; GFX1250-NEXT: s_lshr_b32 s58, s10, 8
+; GFX1250-NEXT: v_dual_mov_b32 v21, s63 :: v_dual_mov_b32 v22, s48
+; GFX1250-NEXT: s_lshr_b32 s48, s10, 10
+; GFX1250-NEXT: v_dual_mov_b32 v27, s41 :: v_dual_mov_b32 v28, s44
+; GFX1250-NEXT: s_lshr_b32 s44, s10, 13
+; GFX1250-NEXT: s_lshr_b32 s26, s11, 17
+; GFX1250-NEXT: s_mov_b32 s42, s11
+; GFX1250-NEXT: v_dual_mov_b32 v5, s87 :: v_dual_mov_b32 v6, s92
+; GFX1250-NEXT: v_dual_mov_b32 v7, s93 :: v_dual_mov_b32 v8, s74
+; GFX1250-NEXT: s_lshr_b32 s2, s10, 7
+; GFX1250-NEXT: v_dual_mov_b32 v23, s49 :: v_dual_mov_b32 v24, s54
+; GFX1250-NEXT: s_lshr_b32 s54, s10, 11
+; GFX1250-NEXT: s_lshr_b32 s30, s11, 12
+; GFX1250-NEXT: v_dual_mov_b32 v9, s75 :: v_dual_mov_b32 v10, s82
+; GFX1250-NEXT: v_dual_mov_b32 v11, s83 :: v_dual_mov_b32 v14, s66
+; GFX1250-NEXT: v_dual_mov_b32 v15, s67 :: v_dual_mov_b32 v16, s70
+; GFX1250-NEXT: v_dual_mov_b32 v19, s59 :: v_dual_mov_b32 v20, s62
+; GFX1250-NEXT: s_bfe_i64 s[24:25], s[24:25], 0x10000
+; GFX1250-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX1250-NEXT: v_dual_mov_b32 v29, s45 :: v_dual_mov_b32 v30, s24
+; GFX1250-NEXT: s_bfe_i64 s[82:83], s[44:45], 0x10000
+; GFX1250-NEXT: s_bfe_i64 s[74:75], s[48:49], 0x10000
+; GFX1250-NEXT: s_bfe_i64 s[66:67], s[58:59], 0x10000
+; GFX1250-NEXT: s_bfe_i64 s[58:59], s[4:5], 0x10000
+; GFX1250-NEXT: s_bfe_i64 s[44:45], s[6:7], 0x10000
+; GFX1250-NEXT: s_bfe_i64 s[48:49], s[8:9], 0x10000
+; GFX1250-NEXT: s_bfe_i64 s[4:5], s[38:39], 0x10000
+; GFX1250-NEXT: s_bfe_i64 s[6:7], s[36:37], 0x10000
+; GFX1250-NEXT: s_bfe_i64 s[8:9], s[34:35], 0x10000
+; GFX1250-NEXT: v_readlane_b32 s34, v13, 6
+; GFX1250-NEXT: v_readlane_b32 s36, v13, 4
+; GFX1250-NEXT: v_readlane_b32 s38, v13, 2
+; GFX1250-NEXT: s_lshr_b32 s22, s11, 10
+; GFX1250-NEXT: s_lshr_b32 s28, s11, 11
+; GFX1250-NEXT: v_dual_mov_b32 v25, s55 :: v_dual_mov_b32 v26, s40
; GFX1250-NEXT: s_bfe_i64 s[26:27], s[26:27], 0x10000
-; GFX1250-NEXT: s_bfe_i64 s[4:5], s[4:5], 0x10000
-; GFX1250-NEXT: s_bfe_i64 s[2:3], s[2:3], 0x10000
-; GFX1250-NEXT: s_lshr_b32 s16, s11, 8
+; GFX1250-NEXT: s_bfe_i64 s[70:71], s[54:55], 0x10000
+; GFX1250-NEXT: s_bfe_i64 s[54:55], s[2:3], 0x10000
+; GFX1250-NEXT: s_bfe_i64 s[2:3], s[42:43], 0x10000
+; GFX1250-NEXT: v_readlane_b32 s35, v13, 7
+; GFX1250-NEXT: v_readlane_b32 s37, v13, 5
+; GFX1250-NEXT: v_readlane_b32 s39, v13, 3
+; GFX1250-NEXT: v_readlane_b32 s42, v13, 0
+; GFX1250-NEXT: s_lshr_b32 s18, s11, 8
; GFX1250-NEXT: s_lshr_b32 s20, s11, 9
-; GFX1250-NEXT: v_dual_mov_b32 v19, s57 :: v_dual_mov_b32 v20, s62
-; GFX1250-NEXT: v_dual_mov_b32 v21, s63 :: v_dual_mov_b32 v22, s44
-; GFX1250-NEXT: s_bfe_i64 s[8:9], s[8:9], 0x10000
-; GFX1250-NEXT: s_bfe_i64 s[6:7], s[6:7], 0x10000
-; GFX1250-NEXT: s_lshr_b32 s22, s11, 6
-; GFX1250-NEXT: s_lshr_b32 s24, s11, 7
-; GFX1250-NEXT: v_dual_mov_b32 v23, s45 :: v_dual_mov_b32 v24, s52
-; GFX1250-NEXT: v_dual_mov_b32 v25, s53 :: v_dual_mov_b32 v26, s30
-; GFX1250-NEXT: v_dual_mov_b32 v27, s31 :: v_dual_mov_b32 v28, s40
-; GFX1250-NEXT: v_dual_mov_b32 v29, s41 :: v_dual_mov_b32 v30, s18
-; GFX1250-NEXT: v_dual_mov_b32 v31, s19 :: v_dual_mov_b32 v32, s26
+; GFX1250-NEXT: s_bfe_i64 s[30:31], s[30:31], 0x10000
+; GFX1250-NEXT: v_readlane_b32 s43, v13, 1
+; GFX1250-NEXT: s_lshr_b32 s14, s11, 6
+; GFX1250-NEXT: s_lshr_b32 s16, s11, 7
+; GFX1250-NEXT: v_dual_mov_b32 v31, s25 :: v_dual_mov_b32 v32, s26
; GFX1250-NEXT: v_mov_b32_e32 v33, s27
-; GFX1250-NEXT: s_bfe_i64 s[14:15], s[14:15], 0x10000
-; GFX1250-NEXT: s_bfe_i64 s[12:13], s[12:13], 0x10000
+; GFX1250-NEXT: s_bfe_i64 s[28:29], s[28:29], 0x10000
+; GFX1250-NEXT: s_bfe_i64 s[22:23], s[22:23], 0x10000
; GFX1250-NEXT: s_clause 0x7
-; GFX1250-NEXT: global_store_b128 v8, v[0:3], s[0:1] offset:496
-; GFX1250-NEXT: global_store_b128 v8, v[4:7], s[0:1] offset:480
-; GFX1250-NEXT: global_store_b128 v8, v[10:13], s[0:1] offset:464
-; GFX1250-NEXT: global_store_b128 v8, v[14:17], s[0:1] offset:448
-; GFX1250-NEXT: global_store_b128 v8, v[18:21], s[0:1] offset:432
-; GFX1250-NEXT: global_store_b128 v8, v[22:25], s[0:1] offset:416
-; GFX1250-NEXT: global_store_b128 v8, v[26:29], s[0:1] offset:400
-; GFX1250-NEXT: global_store_b128 v8, v[30:33], s[0:1] offset:384
+; GFX1250-NEXT: global_store_b128 v12, v[0:3], s[0:1] offset:496
+; GFX1250-NEXT: global_store_b128 v12, v[4:7], s[0:1] offset:480
+; GFX1250-NEXT: global_store_b128 v12, v[8:11], s[0:1] offset:464
+; GFX1250-NEXT: global_store_b128 v12, v[14:17], s[0:1] offset:448
+; GFX1250-NEXT: global_store_b128 v12, v[18:21], s[0:1] offset:432
+; GFX1250-NEXT: global_store_b128 v12, v[22:25], s[0:1] offset:416
+; GFX1250-NEXT: global_store_b128 v12, v[26:29], s[0:1] offset:400
+; GFX1250-NEXT: global_store_b128 v12, v[30:33], s[0:1] offset:384
; GFX1250-NEXT: s_wait_xcnt 0x7
-; GFX1250-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
+; GFX1250-NEXT: v_dual_mov_b32 v0, s6 :: v_dual_mov_b32 v1, s7
; GFX1250-NEXT: v_dual_mov_b32 v2, s4 :: v_dual_mov_b32 v3, s5
; GFX1250-NEXT: s_wait_xcnt 0x6
-; GFX1250-NEXT: v_mov_b32_e32 v4, s6
-; GFX1250-NEXT: s_lshr_b32 s28, s11, 4
-; GFX1250-NEXT: s_lshr_b32 s34, s11, 5
-; GFX1250-NEXT: s_lshr_b32 s36, s11, 2
-; GFX1250-NEXT: s_lshr_b32 s38, s11, 3
+; GFX1250-NEXT: v_mov_b32_e32 v4, s30
+; GFX1250-NEXT: s_lshr_b32 s12, s11, 5
; GFX1250-NEXT: s_bfe_i64 s[20:21], s[20:21], 0x10000
-; GFX1250-NEXT: s_bfe_i64 s[16:17], s[16:17], 0x10000
-; GFX1250-NEXT: v_dual_mov_b32 v5, s7 :: v_dual_mov_b32 v6, s8
+; GFX1250-NEXT: s_bfe_i64 s[18:19], s[18:19], 0x10000
+; GFX1250-NEXT: v_dual_mov_b32 v5, s31 :: v_dual_mov_b32 v6, s8
; GFX1250-NEXT: s_wait_xcnt 0x5
-; GFX1250-NEXT: v_dual_mov_b32 v7, s9 :: v_dual_mov_b32 v10, s12
-; GFX1250-NEXT: s_lshr_b32 s42, s11, 1
-; GFX1250-NEXT: s_mov_b32 s46, s11
-; GFX1250-NEXT: s_bfe_i64 s[24:25], s[24:25], 0x10000
-; GFX1250-NEXT: s_bfe_i64 s[22:23], s[22:23], 0x10000
-; GFX1250-NEXT: v_dual_mov_b32 v11, s13 :: v_dual_mov_b32 v12, s14
+; GFX1250-NEXT: v_dual_mov_b32 v7, s9 :: v_dual_mov_b32 v8, s22
+; GFX1250-NEXT: s_bfe_i64 s[16:17], s[16:17], 0x10000
+; GFX1250-NEXT: s_bfe_i64 s[14:15], s[14:15], 0x10000
+; GFX1250-NEXT: v_dual_mov_b32 v9, s23 :: v_dual_mov_b32 v10, s28
; GFX1250-NEXT: s_wait_xcnt 0x4
-; GFX1250-NEXT: v_dual_mov_b32 v13, s15 :: v_dual_mov_b32 v14, s16
-; GFX1250-NEXT: s_lshr_b32 s48, s10, 30
-; GFX1250-NEXT: s_lshr_b32 s50, s10, 31
-; GFX1250-NEXT: s_bfe_i64 s[38:39], s[38:39], 0x10000
-; GFX1250-NEXT: s_bfe_i64 s[36:37], s[36:37], 0x10000
+; GFX1250-NEXT: v_dual_mov_b32 v11, s29 :: v_dual_mov_b32 v14, s18
+; GFX1250-NEXT: s_lshr_b32 s94, s10, 30
+; GFX1250-NEXT: s_lshr_b32 s98, s10, 31
+; GFX1250-NEXT: s_bfe_i64 s[12:13], s[12:13], 0x10000
; GFX1250-NEXT: s_bfe_i64 s[34:35], s[34:35], 0x10000
-; GFX1250-NEXT: s_bfe_i64 s[28:29], s[28:29], 0x10000
-; GFX1250-NEXT: v_dual_mov_b32 v15, s17 :: v_dual_mov_b32 v16, s20
+; GFX1250-NEXT: s_bfe_i64 s[36:37], s[36:37], 0x10000
+; GFX1250-NEXT: s_bfe_i64 s[38:39], s[38:39], 0x10000
+; GFX1250-NEXT: v_dual_mov_b32 v15, s19 :: v_dual_mov_b32 v16, s20
; GFX1250-NEXT: s_wait_xcnt 0x3
-; GFX1250-NEXT: v_dual_mov_b32 v17, s21 :: v_dual_mov_b32 v18, s22
-; GFX1250-NEXT: s_lshr_b32 s54, s10, 28
-; GFX1250-NEXT: s_lshr_b32 s58, s10, 29
-; GFX1250-NEXT: s_bfe_i64 s[46:47], s[46:47], 0x10000
+; GFX1250-NEXT: v_dual_mov_b32 v17, s21 :: v_dual_mov_b32 v18, s14
+; GFX1250-NEXT: s_lshr_b32 s88, s10, 28
+; GFX1250-NEXT: s_lshr_b32 s90, s10, 29
; GFX1250-NEXT: s_bfe_i64 s[42:43], s[42:43], 0x10000
-; GFX1250-NEXT: v_dual_mov_b32 v19, s23 :: v_dual_mov_b32 v20, s24
+; GFX1250-NEXT: v_dual_mov_b32 v19, s15 :: v_dual_mov_b32 v20, s16
; GFX1250-NEXT: s_wait_xcnt 0x2
-; GFX1250-NEXT: v_dual_mov_b32 v21, s25 :: v_dual_mov_b32 v22, s28
-; GFX1250-NEXT: s_lshr_b32 s60, s10, 26
-; GFX1250-NEXT: s_lshr_b32 s64, s10, 27
-; GFX1250-NEXT: s_bfe_i64 s[50:51], s[50:51], 0x10000
-; GFX1250-NEXT: s_bfe_i64 s[48:49], s[48:49], 0x10000
-; GFX1250-NEXT: v_dual_mov_b32 v23, s29 :: v_dual_mov_b32 v24, s34
-; GFX1250-NEXT: v_mov_b32_e32 v25, s35
+; GFX1250-NEXT: v_dual_mov_b32 v21, s17 :: v_dual_mov_b32 v22, s34
+; GFX1250-NEXT: s_lshr_b32 s80, s10, 26
+; GFX1250-NEXT: s_lshr_b32 s84, s10, 27
+; GFX1250-NEXT: s_bfe_i64 s[98:99], s[98:99], 0x10000
+; GFX1250-NEXT: s_bfe_i64 s[94:95], s[94:95], 0x10000
+; GFX1250-NEXT: v_dual_mov_b32 v23, s35 :: v_dual_mov_b32 v24, s12
+; GFX1250-NEXT: v_mov_b32_e32 v25, s13
; GFX1250-NEXT: s_clause 0x5
-; GFX1250-NEXT: global_store_b128 v8, v[0:3], s[0:1] offset:368
-; GFX1250-NEXT: global_store_b128 v8, v[4:7], s[0:1] offset:352
-; GFX1250-NEXT: global_store_b128 v8, v[10:13], s[0:1] offset:336
-; GFX1250-NEXT: global_store_b128 v8, v[14:17], s[0:1] offset:320
-; GFX1250-NEXT: global_store_b128 v8, v[18:21], s[0:1] offset:304
-; GFX1250-NEXT: global_store_b128 v8, v[22:25], s[0:1] offset:288
+; GFX1250-NEXT: global_store_b128 v12, v[0:3], s[0:1] offset:368
+; GFX1250-NEXT: global_store_b128 v12, v[4:7], s[0:1] offset:352
+; GFX1250-NEXT: global_store_b128 v12, v[8:11], s[0:1] offset:336
+; GFX1250-NEXT: global_store_b128 v12, v[14:17], s[0:1] offset:320
+; GFX1250-NEXT: global_store_b128 v12, v[18:21], s[0:1] offset:304
+; GFX1250-NEXT: global_store_b128 v12, v[22:25], s[0:1] offset:288
; GFX1250-NEXT: s_wait_xcnt 0x5
-; GFX1250-NEXT: v_dual_mov_b32 v0, s36 :: v_dual_mov_b32 v1, s37
-; GFX1250-NEXT: v_dual_mov_b32 v2, s38 :: v_dual_mov_b32 v3, s39
+; GFX1250-NEXT: v_dual_mov_b32 v0, s38 :: v_dual_mov_b32 v1, s39
+; GFX1250-NEXT: v_dual_mov_b32 v2, s36 :: v_dual_mov_b32 v3, s37
; GFX1250-NEXT: s_wait_xcnt 0x4
-; GFX1250-NEXT: v_mov_b32_e32 v4, s46
-; GFX1250-NEXT: s_lshr_b32 s68, s10, 24
-; GFX1250-NEXT: s_lshr_b32 s70, s10, 25
-; GFX1250-NEXT: s_lshr_b32 s72, s10, 22
-; GFX1250-NEXT: s_lshr_b32 s76, s10, 23
-; GFX1250-NEXT: s_bfe_i64 s[58:59], s[58:59], 0x10000
-; GFX1250-NEXT: s_bfe_i64 s[54:55], s[54:55], 0x10000
-; GFX1250-NEXT: v_dual_mov_b32 v5, s47 :: v_dual_mov_b32 v6, s42
+; GFX1250-NEXT: v_mov_b32_e32 v4, s2
+; GFX1250-NEXT: s_lshr_b32 s68, s10, 22
+; GFX1250-NEXT: s_lshr_b32 s72, s10, 23
+; GFX1250-NEXT: s_lshr_b32 s76, s10, 24
+; GFX1250-NEXT: s_lshr_b32 s78, s10, 25
+; GFX1250-NEXT: s_bfe_i64 s[90:91], s[90:91], 0x10000
+; GFX1250-NEXT: s_bfe_i64 s[88:89], s[88:89], 0x10000
+; GFX1250-NEXT: v_dual_mov_b32 v5, s3 :: v_dual_mov_b32 v6, s42
; GFX1250-NEXT: s_wait_xcnt 0x3
-; GFX1250-NEXT: v_dual_mov_b32 v7, s43 :: v_dual_mov_b32 v10, s48
-; GFX1250-NEXT: s_lshr_b32 s80, s10, 20
-; GFX1250-NEXT: s_lshr_b32 s82, s10, 21
-; GFX1250-NEXT: s_bfe_i64 s[64:65], s[64:65], 0x10000
-; GFX1250-NEXT: s_bfe_i64 s[60:61], s[60:61], 0x10000
-; GFX1250-NEXT: v_dual_mov_b32 v11, s49 :: v_dual_mov_b32 v12, s50
+; GFX1250-NEXT: v_dual_mov_b32 v7, s43 :: v_dual_mov_b32 v8, s94
+; GFX1250-NEXT: s_lshr_b32 s60, s10, 20
+; GFX1250-NEXT: s_lshr_b32 s64, s10, 21
+; GFX1250-NEXT: s_bfe_i64 s[84:85], s[84:85], 0x10000
+; GFX1250-NEXT: s_bfe_i64 s[80:81], s[80:81], 0x10000
+; GFX1250-NEXT: v_dual_mov_b32 v9, s95 :: v_dual_mov_b32 v10, s98
; GFX1250-NEXT: s_wait_xcnt 0x2
-; GFX1250-NEXT: v_dual_mov_b32 v13, s51 :: v_dual_mov_b32 v14, s54
-; GFX1250-NEXT: s_lshr_b32 s84, s10, 18
-; GFX1250-NEXT: s_lshr_b32 s86, s10, 19
+; GFX1250-NEXT: v_dual_mov_b32 v11, s99 :: v_dual_mov_b32 v14, s88
+; GFX1250-NEXT: s_lshr_b32 s52, s10, 18
+; GFX1250-NEXT: s_lshr_b32 s56, s10, 19
+; GFX1250-NEXT: s_bfe_i64 s[78:79], s[78:79], 0x10000
; GFX1250-NEXT: s_bfe_i64 s[76:77], s[76:77], 0x10000
; GFX1250-NEXT: s_bfe_i64 s[72:73], s[72:73], 0x10000
-; GFX1250-NEXT: s_bfe_i64 s[70:71], s[70:71], 0x10000
; GFX1250-NEXT: s_bfe_i64 s[68:69], s[68:69], 0x10000
-; GFX1250-NEXT: v_dual_mov_b32 v15, s55 :: v_dual_mov_b32 v16, s58
+; GFX1250-NEXT: v_dual_mov_b32 v15, s89 :: v_dual_mov_b32 v16, s90
; GFX1250-NEXT: s_wait_xcnt 0x1
-; GFX1250-NEXT: v_dual_mov_b32 v17, s59 :: v_dual_mov_b32 v18, s60
-; GFX1250-NEXT: s_lshr_b32 s90, s10, 16
-; GFX1250-NEXT: s_lshr_b32 s98, s10, 17
-; GFX1250-NEXT: s_bfe_i64 s[82:83], s[82:83], 0x10000
-; GFX1250-NEXT: s_bfe_i64 s[80:81], s[80:81], 0x10000
-; GFX1250-NEXT: v_dual_mov_b32 v19, s61 :: v_dual_mov_b32 v20, s64
+; GFX1250-NEXT: v_dual_mov_b32 v17, s91 :: v_dual_mov_b32 v18, s80
+; GFX1250-NEXT: s_lshr_b32 s46, s10, 16
+; GFX1250-NEXT: s_lshr_b32 s50, s10, 17
+; GFX1250-NEXT: s_bfe_i64 s[64:65], s[64:65], 0x10000
+; GFX1250-NEXT: s_bfe_i64 s[60:61], s[60:61], 0x10000
+; GFX1250-NEXT: v_dual_mov_b32 v19, s81 :: v_dual_mov_b32 v20, s84
; GFX1250-NEXT: s_wait_xcnt 0x0
-; GFX1250-NEXT: v_dual_mov_b32 v21, s65 :: v_dual_mov_b32 v22, s68
-; GFX1250-NEXT: s_lshr_b32 s96, s10, 14
-; GFX1250-NEXT: s_lshr_b32 s100, s10, 15
-; GFX1250-NEXT: s_lshr_b32 s94, s10, 13
-; GFX1250-NEXT: s_lshr_b32 s88, s10, 11
-; GFX1250-NEXT: s_lshr_b32 s74, s10, 9
-; GFX1250-NEXT: s_lshr_b32 s62, s10, 7
-; GFX1250-NEXT: s_lshr_b32 s52, s10, 5
-; GFX1250-NEXT: s_lshr_b32 s40, s10, 3
-; GFX1250-NEXT: s_lshr_b32 s26, s10, 1
-; GFX1250-NEXT: s_bfe_i64 s[86:87], s[86:87], 0x10000
-; GFX1250-NEXT: s_bfe_i64 s[84:85], s[84:85], 0x10000
-; GFX1250-NEXT: v_dual_mov_b32 v23, s69 :: v_dual_mov_b32 v24, s70
-; GFX1250-NEXT: v_mov_b32_e32 v25, s71
+; GFX1250-NEXT: v_dual_mov_b32 v21, s85 :: v_dual_mov_b32 v22, s76
+; GFX1250-NEXT: s_lshr_b32 s24, s10, 14
+; GFX1250-NEXT: s_lshr_b32 s26, s10, 15
+; GFX1250-NEXT: s_bfe_i64 s[56:57], s[56:57], 0x10000
+; GFX1250-NEXT: s_bfe_i64 s[52:53], s[52:53], 0x10000
+; GFX1250-NEXT: v_dual_mov_b32 v23, s77 :: v_dual_mov_b32 v24, s78
+; GFX1250-NEXT: v_mov_b32_e32 v25, s79
; GFX1250-NEXT: s_clause 0x5
-; GFX1250-NEXT: global_store_b128 v8, v[0:3], s[0:1] offset:272
-; GFX1250-NEXT: global_store_b128 v8, v[4:7], s[0:1] offset:256
-; GFX1250-NEXT: global_store_b128 v8, v[10:13], s[0:1] offset:240
-; GFX1250-NEXT: global_store_b128 v8, v[14:17], s[0:1] offset:224
-; GFX1250-NEXT: global_store_b128 v8, v[18:21], s[0:1] offset:208
-; GFX1250-NEXT: global_store_b128 v8, v[22:25], s[0:1] offset:192
+; GFX1250-NEXT: global_store_b128 v12, v[0:3], s[0:1] offset:272
+; GFX1250-NEXT: global_store_b128 v12, v[4:7], s[0:1] offset:256
+; GFX1250-NEXT: global_store_b128 v12, v[8:11], s[0:1] offset:240
+; GFX1250-NEXT: global_store_b128 v12, v[14:17], s[0:1] offset:224
+; GFX1250-NEXT: global_store_b128 v12, v[18:21], s[0:1] offset:208
+; GFX1250-NEXT: global_store_b128 v12, v[22:25], s[0:1] offset:192
; GFX1250-NEXT: s_wait_xcnt 0x5
-; GFX1250-NEXT: v_dual_mov_b32 v0, s72 :: v_dual_mov_b32 v1, s73
-; GFX1250-NEXT: v_dual_mov_b32 v2, s76 :: v_dual_mov_b32 v3, s77
+; GFX1250-NEXT: v_dual_mov_b32 v0, s68 :: v_dual_mov_b32 v1, s69
+; GFX1250-NEXT: v_dual_mov_b32 v2, s72 :: v_dual_mov_b32 v3, s73
; GFX1250-NEXT: s_wait_xcnt 0x4
-; GFX1250-NEXT: v_mov_b32_e32 v4, s80
-; GFX1250-NEXT: s_lshr_b32 s92, s10, 12
-; GFX1250-NEXT: s_lshr_b32 s78, s10, 10
-; GFX1250-NEXT: s_bfe_i64 s[98:99], s[98:99], 0x10000
-; GFX1250-NEXT: s_bfe_i64 s[90:91], s[90:91], 0x10000
-; GFX1250-NEXT: v_dual_mov_b32 v5, s81 :: v_dual_mov_b32 v6, s82
+; GFX1250-NEXT: v_mov_b32_e32 v4, s60
+; GFX1250-NEXT: s_lshr_b32 s40, s10, 12
+; GFX1250-NEXT: s_bfe_i64 s[50:51], s[50:51], 0x10000
+; GFX1250-NEXT: s_bfe_i64 s[46:47], s[46:47], 0x10000
+; GFX1250-NEXT: v_dual_mov_b32 v5, s61 :: v_dual_mov_b32 v6, s64
; GFX1250-NEXT: s_wait_xcnt 0x3
-; GFX1250-NEXT: v_dual_mov_b32 v7, s83 :: v_dual_mov_b32 v10, s84
-; GFX1250-NEXT: s_lshr_b32 s66, s10, 8
-; GFX1250-NEXT: s_lshr_b32 s56, s10, 6
-; GFX1250-NEXT: s_lshr_b32 s44, s10, 4
-; GFX1250-NEXT: s_lshr_b32 s30, s10, 2
-; GFX1250-NEXT: s_bfe_i64 s[18:19], s[10:11], 0x10000
-; GFX1250-NEXT: s_bfe_i64 s[10:11], s[26:27], 0x10000
-; GFX1250-NEXT: s_bfe_i64 s[26:27], s[40:41], 0x10000
-; GFX1250-NEXT: s_bfe_i64 s[40:41], s[52:53], 0x10000
-; GFX1250-NEXT: s_bfe_i64 s[52:53], s[62:63], 0x10000
-; GFX1250-NEXT: s_bfe_i64 s[62:63], s[74:75], 0x10000
-; GFX1250-NEXT: s_bfe_i64 s[74:75], s[88:89], 0x10000
-; GFX1250-NEXT: s_bfe_i64 s[88:89], s[94:95], 0x10000
-; GFX1250-NEXT: s_bfe_i64 s[94:95], s[100:101], 0x10000
-; GFX1250-NEXT: s_bfe_i64 s[96:97], s[96:97], 0x10000
-; GFX1250-NEXT: v_dual_mov_b32 v11, s85 :: v_dual_mov_b32 v12, s86
+; GFX1250-NEXT: v_dual_mov_b32 v7, s65 :: v_dual_mov_b32 v8, s52
+; GFX1250-NEXT: s_lshr_b32 s62, s10, 9
+; GFX1250-NEXT: s_bfe_i64 s[92:93], s[26:27], 0x10000
+; GFX1250-NEXT: s_bfe_i64 s[96:97], s[24:25], 0x10000
+; GFX1250-NEXT: v_dual_mov_b32 v9, s53 :: v_dual_mov_b32 v10, s56
; GFX1250-NEXT: s_wait_xcnt 0x2
-; GFX1250-NEXT: v_dual_mov_b32 v13, s87 :: v_dual_mov_b32 v14, s90
-; GFX1250-NEXT: s_bfe_i64 s[78:79], s[78:79], 0x10000
-; GFX1250-NEXT: s_bfe_i64 s[92:93], s[92:93], 0x10000
-; GFX1250-NEXT: v_dual_mov_b32 v15, s91 :: v_dual_mov_b32 v16, s98
+; GFX1250-NEXT: v_dual_mov_b32 v11, s57 :: v_dual_mov_b32 v14, s46
+; GFX1250-NEXT: s_bfe_i64 s[86:87], s[40:41], 0x10000
+; GFX1250-NEXT: v_dual_mov_b32 v15, s47 :: v_dual_mov_b32 v16, s50
; GFX1250-NEXT: s_wait_xcnt 0x1
-; GFX1250-NEXT: v_dual_mov_b32 v17, s99 :: v_dual_mov_b32 v18, s96
-; GFX1250-NEXT: s_bfe_i64 s[66:67], s[66:67], 0x10000
-; GFX1250-NEXT: v_dual_mov_b32 v19, s97 :: v_dual_mov_b32 v20, s94
+; GFX1250-NEXT: v_dual_mov_b32 v17, s51 :: v_dual_mov_b32 v18, s96
+; GFX1250-NEXT: s_bfe_i64 s[62:63], s[62:63], 0x10000
+; GFX1250-NEXT: v_dual_mov_b32 v19, s97 :: v_dual_mov_b32 v20, s92
; GFX1250-NEXT: s_wait_xcnt 0x0
-; GFX1250-NEXT: v_dual_mov_b32 v21, s95 :: v_dual_mov_b32 v22, s92
-; GFX1250-NEXT: s_bfe_i64 s[56:57], s[56:57], 0x10000
-; GFX1250-NEXT: v_dual_mov_b32 v23, s93 :: v_dual_mov_b32 v24, s88
-; GFX1250-NEXT: v_mov_b32_e32 v25, s89
+; GFX1250-NEXT: v_dual_mov_b32 v21, s93 :: v_dual_mov_b32 v22, s86
+; GFX1250-NEXT: s_lshr_b32 s102, s10, 2
+; GFX1250-NEXT: s_lshr_b32 vcc_lo, s10, 3
+; GFX1250-NEXT: v_dual_mov_b32 v23, s87 :: v_dual_mov_b32 v24, s82
+; GFX1250-NEXT: v_mov_b32_e32 v25, s83
; GFX1250-NEXT: s_clause 0x5
-; GFX1250-NEXT: global_store_b128 v8, v[0:3], s[0:1] offset:176
-; GFX1250-NEXT: global_store_b128 v8, v[4:7], s[0:1] offset:160
-; GFX1250-NEXT: global_store_b128 v8, v[10:13], s[0:1] offset:144
-; GFX1250-NEXT: global_store_b128 v8, v[14:17], s[0:1] offset:128
-; GFX1250-NEXT: global_store_b128 v8, v[18:21], s[0:1] offset:112
-; GFX1250-NEXT: global_store_b128 v8, v[22:25], s[0:1] offset:96
+; GFX1250-NEXT: global_store_b128 v12, v[0:3], s[0:1] offset:176
+; GFX1250-NEXT: global_store_b128 v12, v[4:7], s[0:1] offset:160
+; GFX1250-NEXT: global_store_b128 v12, v[8:11], s[0:1] offset:144
+; GFX1250-NEXT: global_store_b128 v12, v[14:17], s[0:1] offset:128
+; GFX1250-NEXT: global_store_b128 v12, v[18:21], s[0:1] offset:112
+; GFX1250-NEXT: global_store_b128 v12, v[22:25], s[0:1] offset:96
; GFX1250-NEXT: s_wait_xcnt 0x5
-; GFX1250-NEXT: v_dual_mov_b32 v0, s78 :: v_dual_mov_b32 v1, s79
-; GFX1250-NEXT: v_dual_mov_b32 v2, s74 :: v_dual_mov_b32 v3, s75
+; GFX1250-NEXT: v_dual_mov_b32 v0, s74 :: v_dual_mov_b32 v1, s75
+; GFX1250-NEXT: v_dual_mov_b32 v2, s70 :: v_dual_mov_b32 v3, s71
; GFX1250-NEXT: s_wait_xcnt 0x4
; GFX1250-NEXT: v_mov_b32_e32 v4, s66
-; GFX1250-NEXT: s_bfe_i64 s[44:45], s[44:45], 0x10000
+; GFX1250-NEXT: s_lshr_b32 s100, s10, 1
; GFX1250-NEXT: v_dual_mov_b32 v5, s67 :: v_dual_mov_b32 v6, s62
; GFX1250-NEXT: s_wait_xcnt 0x3
-; GFX1250-NEXT: v_dual_mov_b32 v7, s63 :: v_dual_mov_b32 v10, s56
-; GFX1250-NEXT: s_bfe_i64 s[30:31], s[30:31], 0x10000
-; GFX1250-NEXT: v_dual_mov_b32 v11, s57 :: v_dual_mov_b32 v12, s52
+; GFX1250-NEXT: v_dual_mov_b32 v7, s63 :: v_dual_mov_b32 v8, s58
+; GFX1250-NEXT: s_bfe_i64 s[26:27], vcc, 0x10000
+; GFX1250-NEXT: s_bfe_i64 s[40:41], s[102:103], 0x10000
+; GFX1250-NEXT: v_dual_mov_b32 v9, s59 :: v_dual_mov_b32 v10, s54
; GFX1250-NEXT: s_wait_xcnt 0x2
-; GFX1250-NEXT: v_dual_mov_b32 v13, s53 :: v_dual_mov_b32 v14, s44
-; GFX1250-NEXT: v_dual_mov_b32 v15, s45 :: v_dual_mov_b32 v16, s40
+; GFX1250-NEXT: v_dual_mov_b32 v11, s55 :: v_dual_mov_b32 v14, s48
+; GFX1250-NEXT: s_bfe_i64 s[10:11], s[10:11], 0x10000
+; GFX1250-NEXT: s_bfe_i64 s[24:25], s[100:101], 0x10000
+; GFX1250-NEXT: v_dual_mov_b32 v15, s49 :: v_dual_mov_b32 v16, s44
; GFX1250-NEXT: s_wait_xcnt 0x1
-; GFX1250-NEXT: v_dual_mov_b32 v17, s41 :: v_dual_mov_b32 v18, s30
-; GFX1250-NEXT: v_dual_mov_b32 v19, s31 :: v_dual_mov_b32 v20, s26
+; GFX1250-NEXT: v_dual_mov_b32 v17, s45 :: v_dual_mov_b32 v18, s40
+; GFX1250-NEXT: v_dual_mov_b32 v19, s41 :: v_dual_mov_b32 v20, s26
; GFX1250-NEXT: s_wait_xcnt 0x0
-; GFX1250-NEXT: v_dual_mov_b32 v21, s27 :: v_dual_mov_b32 v22, s18
-; GFX1250-NEXT: v_dual_mov_b32 v23, s19 :: v_dual_mov_b32 v24, s10
-; GFX1250-NEXT: v_mov_b32_e32 v25, s11
+; GFX1250-NEXT: v_dual_mov_b32 v21, s27 :: v_dual_mov_b32 v22, s10
+; GFX1250-NEXT: v_dual_mov_b32 v23, s11 :: v_dual_mov_b32 v24, s24
+; GFX1250-NEXT: v_mov_b32_e32 v25, s25
; GFX1250-NEXT: s_clause 0x5
-; GFX1250-NEXT: global_store_b128 v8, v[0:3], s[0:1] offset:80
-; GFX1250-NEXT: global_store_b128 v8, v[4:7], s[0:1] offset:64
-; GFX1250-NEXT: global_store_b128 v8, v[10:13], s[0:1] offset:48
-; GFX1250-NEXT: global_store_b128 v8, v[14:17], s[0:1] offset:32
-; GFX1250-NEXT: global_store_b128 v8, v[18:21], s[0:1] offset:16
-; GFX1250-NEXT: global_store_b128 v8, v[22:25], s[0:1]
+; GFX1250-NEXT: global_store_b128 v12, v[0:3], s[0:1] offset:80
+; GFX1250-NEXT: global_store_b128 v12, v[4:7], s[0:1] offset:64
+; GFX1250-NEXT: global_store_b128 v12, v[8:11], s[0:1] offset:48
+; GFX1250-NEXT: global_store_b128 v12, v[14:17], s[0:1] offset:32
+; GFX1250-NEXT: global_store_b128 v12, v[18:21], s[0:1] offset:16
+; GFX1250-NEXT: global_store_b128 v12, v[22:25], s[0:1]
; GFX1250-NEXT: s_endpgm
%load = load <64 x i1>, ptr addrspace(4) %in
%ext = sext <64 x i1> %load to <64 x i64>
diff --git a/llvm/test/CodeGen/AMDGPU/load-constant-i32.ll b/llvm/test/CodeGen/AMDGPU/load-constant-i32.ll
index 6f7ee70812264..2e1abfc33115c 100644
--- a/llvm/test/CodeGen/AMDGPU/load-constant-i32.ll
+++ b/llvm/test/CodeGen/AMDGPU/load-constant-i32.ll
@@ -2193,15 +2193,15 @@ define amdgpu_kernel void @constant_sextload_v2i32_to_v2i64(ptr addrspace(1) %ou
; GFX1250-LABEL: constant_sextload_v2i32_to_v2i64:
; GFX1250: ; %bb.0:
; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1250-NEXT: v_mov_b32_e32 v4, 0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: s_load_b64 s[2:3], s[2:3], 0x0
; GFX1250-NEXT: s_wait_kmcnt 0x0
-; GFX1250-NEXT: v_dual_mov_b32 v4, 0 :: v_dual_mov_b32 v0, s2
-; GFX1250-NEXT: s_ashr_i32 s4, s3, 31
-; GFX1250-NEXT: s_ashr_i32 s5, s2, 31
-; GFX1250-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
-; GFX1250-NEXT: v_dual_mov_b32 v2, s3 :: v_dual_mov_b32 v1, s5
-; GFX1250-NEXT: v_mov_b32_e32 v3, s4
+; GFX1250-NEXT: s_ashr_i32 s5, s3, 31
+; GFX1250-NEXT: s_mov_b32 s4, s3
+; GFX1250-NEXT: s_ashr_i32 s3, s2, 31
+; GFX1250-NEXT: v_mov_b64_e32 v[2:3], s[4:5]
+; GFX1250-NEXT: v_mov_b64_e32 v[0:1], s[2:3]
; GFX1250-NEXT: global_store_b128 v4, v[0:3], s[0:1]
; GFX1250-NEXT: s_endpgm
%ld = load <2 x i32>, ptr addrspace(4) %in
@@ -2520,14 +2520,15 @@ define amdgpu_kernel void @constant_sextload_v4i32_to_v4i64(ptr addrspace(1) %ou
; GFX1250-NEXT: s_load_b128 s[4:7], s[2:3], 0x0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_dual_mov_b32 v8, 0 :: v_dual_mov_b32 v0, s6
-; GFX1250-NEXT: s_ashr_i32 s8, s7, 31
-; GFX1250-NEXT: s_ashr_i32 s9, s6, 31
-; GFX1250-NEXT: s_ashr_i32 s2, s5, 31
-; GFX1250-NEXT: s_ashr_i32 s3, s4, 31
-; GFX1250-NEXT: v_dual_mov_b32 v2, s7 :: v_dual_mov_b32 v4, s4
-; GFX1250-NEXT: v_dual_mov_b32 v6, s5 :: v_dual_mov_b32 v1, s9
-; GFX1250-NEXT: v_dual_mov_b32 v3, s8 :: v_dual_mov_b32 v5, s3
-; GFX1250-NEXT: v_mov_b32_e32 v7, s2
+; GFX1250-NEXT: s_ashr_i32 s2, s7, 31
+; GFX1250-NEXT: s_mov_b32 s3, s7
+; GFX1250-NEXT: s_ashr_i32 s7, s6, 31
+; GFX1250-NEXT: s_ashr_i32 s8, s5, 31
+; GFX1250-NEXT: s_ashr_i32 s9, s4, 31
+; GFX1250-NEXT: v_dual_mov_b32 v4, s4 :: v_dual_mov_b32 v1, s7
+; GFX1250-NEXT: v_dual_mov_b32 v2, s3 :: v_dual_mov_b32 v3, s2
+; GFX1250-NEXT: v_dual_mov_b32 v5, s9 :: v_dual_mov_b32 v6, s5
+; GFX1250-NEXT: v_mov_b32_e32 v7, s8
; GFX1250-NEXT: s_clause 0x1
; GFX1250-NEXT: global_store_b128 v8, v[0:3], s[0:1] offset:16
; GFX1250-NEXT: global_store_b128 v8, v[4:7], s[0:1]
@@ -3025,32 +3026,32 @@ define amdgpu_kernel void @constant_sextload_v8i32_to_v8i64(ptr addrspace(1) %ou
;
; GFX1250-LABEL: constant_sextload_v8i32_to_v8i64:
; GFX1250: ; %bb.0:
-; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1250-NEXT: s_load_b128 s[8:11], s[4:5], 0x24
; GFX1250-NEXT: s_wait_kmcnt 0x0
-; GFX1250-NEXT: s_load_b256 s[4:11], s[2:3], 0x0
+; GFX1250-NEXT: s_load_b256 s[0:7], s[10:11], 0x0
; GFX1250-NEXT: s_wait_kmcnt 0x0
-; GFX1250-NEXT: v_dual_mov_b32 v16, 0 :: v_dual_mov_b32 v0, s10
-; GFX1250-NEXT: s_ashr_i32 s16, s11, 31
-; GFX1250-NEXT: s_ashr_i32 s17, s10, 31
-; GFX1250-NEXT: s_ashr_i32 s14, s9, 31
-; GFX1250-NEXT: s_ashr_i32 s15, s8, 31
-; GFX1250-NEXT: s_ashr_i32 s12, s7, 31
-; GFX1250-NEXT: s_ashr_i32 s13, s6, 31
-; GFX1250-NEXT: v_dual_mov_b32 v2, s11 :: v_dual_mov_b32 v4, s8
-; GFX1250-NEXT: v_dual_mov_b32 v14, s5 :: v_dual_mov_b32 v1, s17
-; GFX1250-NEXT: v_dual_mov_b32 v3, s16 :: v_dual_mov_b32 v5, s15
-; GFX1250-NEXT: s_ashr_i32 s2, s5, 31
-; GFX1250-NEXT: s_ashr_i32 s3, s4, 31
-; GFX1250-NEXT: v_dual_mov_b32 v6, s9 :: v_dual_mov_b32 v8, s6
-; GFX1250-NEXT: v_dual_mov_b32 v7, s14 :: v_dual_mov_b32 v9, s13
-; GFX1250-NEXT: v_dual_mov_b32 v10, s7 :: v_dual_mov_b32 v12, s4
-; GFX1250-NEXT: v_dual_mov_b32 v11, s12 :: v_dual_mov_b32 v13, s3
-; GFX1250-NEXT: v_mov_b32_e32 v15, s2
+; GFX1250-NEXT: v_dual_mov_b32 v16, 0 :: v_dual_mov_b32 v0, s6
+; GFX1250-NEXT: s_ashr_i32 s10, s7, 31
+; GFX1250-NEXT: s_ashr_i32 s11, s6, 31
+; GFX1250-NEXT: s_ashr_i32 s12, s5, 31
+; GFX1250-NEXT: s_ashr_i32 s13, s4, 31
+; GFX1250-NEXT: s_ashr_i32 s14, s3, 31
+; GFX1250-NEXT: s_ashr_i32 s15, s2, 31
+; GFX1250-NEXT: v_dual_mov_b32 v12, s0 :: v_dual_mov_b32 v1, s11
+; GFX1250-NEXT: v_dual_mov_b32 v2, s7 :: v_dual_mov_b32 v3, s10
+; GFX1250-NEXT: s_ashr_i32 s16, s1, 31
+; GFX1250-NEXT: s_ashr_i32 s17, s0, 31
+; GFX1250-NEXT: v_dual_mov_b32 v4, s4 :: v_dual_mov_b32 v8, s2
+; GFX1250-NEXT: v_dual_mov_b32 v5, s13 :: v_dual_mov_b32 v6, s5
+; GFX1250-NEXT: v_dual_mov_b32 v7, s12 :: v_dual_mov_b32 v9, s15
+; GFX1250-NEXT: v_dual_mov_b32 v10, s3 :: v_dual_mov_b32 v11, s14
+; GFX1250-NEXT: v_dual_mov_b32 v13, s17 :: v_dual_mov_b32 v14, s1
+; GFX1250-NEXT: v_mov_b32_e32 v15, s16
; GFX1250-NEXT: s_clause 0x3
-; GFX1250-NEXT: global_store_b128 v16, v[0:3], s[0:1] offset:48
-; GFX1250-NEXT: global_store_b128 v16, v[4:7], s[0:1] offset:32
-; GFX1250-NEXT: global_store_b128 v16, v[8:11], s[0:1] offset:16
-; GFX1250-NEXT: global_store_b128 v16, v[12:15], s[0:1]
+; GFX1250-NEXT: global_store_b128 v16, v[0:3], s[8:9] offset:48
+; GFX1250-NEXT: global_store_b128 v16, v[4:7], s[8:9] offset:32
+; GFX1250-NEXT: global_store_b128 v16, v[8:11], s[8:9] offset:16
+; GFX1250-NEXT: global_store_b128 v16, v[12:15], s[8:9]
; GFX1250-NEXT: s_endpgm
%ld = load <8 x i32>, ptr addrspace(4) %in
%ext = sext <8 x i32> %ld to <8 x i64>
@@ -3536,50 +3537,51 @@ define amdgpu_kernel void @constant_sextload_v16i32_to_v16i64(ptr addrspace(1) %
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: s_load_b512 s[0:15], s[18:19], 0x0
; GFX1250-NEXT: s_wait_kmcnt 0x0
-; GFX1250-NEXT: v_dual_mov_b32 v28, 0 :: v_dual_mov_b32 v0, s14
-; GFX1250-NEXT: s_ashr_i32 s28, s11, 31
-; GFX1250-NEXT: s_ashr_i32 s29, s10, 31
-; GFX1250-NEXT: s_ashr_i32 s30, s13, 31
-; GFX1250-NEXT: s_ashr_i32 s33, s15, 31
-; GFX1250-NEXT: s_ashr_i32 s34, s14, 31
-; GFX1250-NEXT: s_ashr_i32 s26, s9, 31
-; GFX1250-NEXT: s_ashr_i32 s27, s8, 31
-; GFX1250-NEXT: s_ashr_i32 s31, s12, 31
-; GFX1250-NEXT: s_ashr_i32 s24, s7, 31
-; GFX1250-NEXT: s_ashr_i32 s25, s6, 31
-; GFX1250-NEXT: v_dual_mov_b32 v2, s15 :: v_dual_mov_b32 v4, s12
-; GFX1250-NEXT: v_dual_mov_b32 v6, s13 :: v_dual_mov_b32 v8, s10
-; GFX1250-NEXT: v_dual_mov_b32 v10, s11 :: v_dual_mov_b32 v12, s8
-; GFX1250-NEXT: v_dual_mov_b32 v26, s3 :: v_dual_mov_b32 v1, s34
-; GFX1250-NEXT: v_dual_mov_b32 v3, s33 :: v_dual_mov_b32 v5, s31
-; GFX1250-NEXT: v_dual_mov_b32 v7, s30 :: v_dual_mov_b32 v9, s29
-; GFX1250-NEXT: v_dual_mov_b32 v11, s28 :: v_dual_mov_b32 v13, s27
-; GFX1250-NEXT: s_ashr_i32 s22, s5, 31
-; GFX1250-NEXT: s_ashr_i32 s23, s4, 31
-; GFX1250-NEXT: v_dual_mov_b32 v14, s9 :: v_dual_mov_b32 v16, s6
-; GFX1250-NEXT: v_dual_mov_b32 v15, s26 :: v_dual_mov_b32 v17, s25
-; GFX1250-NEXT: s_ashr_i32 s20, s3, 31
-; GFX1250-NEXT: s_ashr_i32 s21, s2, 31
-; GFX1250-NEXT: v_dual_mov_b32 v18, s7 :: v_dual_mov_b32 v20, s4
-; GFX1250-NEXT: v_dual_mov_b32 v19, s24 :: v_dual_mov_b32 v21, s23
-; GFX1250-NEXT: s_ashr_i32 s18, s1, 31
-; GFX1250-NEXT: s_ashr_i32 s19, s0, 31
-; GFX1250-NEXT: v_dual_mov_b32 v22, s5 :: v_dual_mov_b32 v24, s2
-; GFX1250-NEXT: v_dual_mov_b32 v23, s22 :: v_dual_mov_b32 v25, s21
-; GFX1250-NEXT: v_mov_b32_e32 v27, s20
+; GFX1250-NEXT: v_dual_mov_b32 v24, 0 :: v_dual_mov_b32 v0, s14
+; GFX1250-NEXT: s_ashr_i32 s18, s15, 31
+; GFX1250-NEXT: s_ashr_i32 s19, s14, 31
+; GFX1250-NEXT: s_ashr_i32 s20, s13, 31
+; GFX1250-NEXT: s_ashr_i32 s21, s12, 31
+; GFX1250-NEXT: s_ashr_i32 s22, s11, 31
+; GFX1250-NEXT: s_ashr_i32 s23, s10, 31
+; GFX1250-NEXT: v_dual_mov_b32 v1, s19 :: v_dual_mov_b32 v2, s15
+; GFX1250-NEXT: v_dual_mov_b32 v3, s18 :: v_dual_mov_b32 v5, s21
+; GFX1250-NEXT: s_ashr_i32 s24, s9, 31
+; GFX1250-NEXT: s_ashr_i32 s25, s8, 31
+; GFX1250-NEXT: v_dual_mov_b32 v4, s12 :: v_dual_mov_b32 v8, s10
+; GFX1250-NEXT: v_dual_mov_b32 v6, s13 :: v_dual_mov_b32 v7, s20
+; GFX1250-NEXT: s_ashr_i32 s26, s7, 31
+; GFX1250-NEXT: s_ashr_i32 s27, s6, 31
+; GFX1250-NEXT: v_dual_mov_b32 v9, s23 :: v_dual_mov_b32 v10, s11
+; GFX1250-NEXT: v_dual_mov_b32 v11, s22 :: v_dual_mov_b32 v13, s25
+; GFX1250-NEXT: s_ashr_i32 s28, s5, 31
+; GFX1250-NEXT: s_ashr_i32 s29, s4, 31
+; GFX1250-NEXT: v_dual_mov_b32 v12, s8 :: v_dual_mov_b32 v16, s6
+; GFX1250-NEXT: v_dual_mov_b32 v14, s9 :: v_dual_mov_b32 v15, s24
+; GFX1250-NEXT: s_ashr_i32 s30, s3, 31
+; GFX1250-NEXT: s_ashr_i32 s31, s2, 31
+; GFX1250-NEXT: v_dual_mov_b32 v17, s27 :: v_dual_mov_b32 v18, s7
+; GFX1250-NEXT: v_mov_b32_e32 v19, s26
+; GFX1250-NEXT: s_ashr_i32 s33, s1, 31
+; GFX1250-NEXT: s_ashr_i32 s34, s0, 31
; GFX1250-NEXT: s_clause 0x1
-; GFX1250-NEXT: global_store_b128 v28, v[0:3], s[16:17] offset:112
-; GFX1250-NEXT: global_store_b128 v28, v[4:7], s[16:17] offset:96
+; GFX1250-NEXT: global_store_b128 v24, v[0:3], s[16:17] offset:112
+; GFX1250-NEXT: global_store_b128 v24, v[4:7], s[16:17] offset:96
; GFX1250-NEXT: s_wait_xcnt 0x1
-; GFX1250-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s19
-; GFX1250-NEXT: v_dual_mov_b32 v2, s1 :: v_dual_mov_b32 v3, s18
+; GFX1250-NEXT: v_dual_mov_b32 v0, s4 :: v_dual_mov_b32 v1, s29
+; GFX1250-NEXT: v_dual_mov_b32 v2, s5 :: v_dual_mov_b32 v3, s28
+; GFX1250-NEXT: s_wait_xcnt 0x0
+; GFX1250-NEXT: v_dual_mov_b32 v4, s2 :: v_dual_mov_b32 v5, s31
+; GFX1250-NEXT: v_dual_mov_b32 v6, s3 :: v_dual_mov_b32 v7, s30
+; GFX1250-NEXT: v_dual_mov_b32 v20, s0 :: v_dual_mov_b32 v21, s34
+; GFX1250-NEXT: v_dual_mov_b32 v22, s1 :: v_dual_mov_b32 v23, s33
; GFX1250-NEXT: s_clause 0x5
-; GFX1250-NEXT: global_store_b128 v28, v[8:11], s[16:17] offset:80
-; GFX1250-NEXT: global_store_b128 v28, v[12:15], s[16:17] offset:64
-; GFX1250-NEXT: global_store_b128 v28, v[16:19], s[16:17] offset:48
-; GFX1250-NEXT: global_store_b128 v28, v[20:23], s[16:17] offset:32
-; GFX1250-NEXT: global_store_b128 v28, v[24:27], s[16:17] offset:16
-; GFX1250-NEXT: global_store_b128 v28, v[0:3], s[16:17]
+; GFX1250-NEXT: global_store_b128 v24, v[8:11], s[16:17] offset:80
+; GFX1250-NEXT: global_store_b128 v24, v[12:15], s[16:17] offset:64
+; GFX1250-NEXT: global_store_b128 v24, v[16:19], s[16:17] offset:48
+; GFX1250-NEXT: global_store_b128 v24, v[0:3], s[16:17] offset:32
+; GFX1250-NEXT: global_store_b128 v24, v[4:7], s[16:17] offset:16
+; GFX1250-NEXT: global_store_b128 v24, v[20:23], s[16:17]
; GFX1250-NEXT: s_endpgm
%ld = load <16 x i32>, ptr addrspace(4) %in
%ext = sext <16 x i32> %ld to <16 x i64>
@@ -4873,106 +4875,105 @@ define amdgpu_kernel void @constant_sextload_v32i32_to_v32i64(ptr addrspace(1) %
; GFX1250-NEXT: s_load_b128 s[36:39], s[4:5], 0x24
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: s_clause 0x1
-; GFX1250-NEXT: s_load_b512 s[0:15], s[38:39], 0x0
; GFX1250-NEXT: s_load_b512 s[16:31], s[38:39], 0x40
-; GFX1250-NEXT: v_mov_b32_e32 v24, 0
+; GFX1250-NEXT: s_load_b512 s[0:15], s[38:39], 0x0
; GFX1250-NEXT: s_wait_kmcnt 0x0
-; GFX1250-NEXT: s_ashr_i32 s49, s15, 31
-; GFX1250-NEXT: s_ashr_i32 s64, s31, 31
-; GFX1250-NEXT: s_ashr_i32 s65, s30, 31
-; GFX1250-NEXT: s_ashr_i32 s62, s29, 31
-; GFX1250-NEXT: s_ashr_i32 s63, s28, 31
-; GFX1250-NEXT: s_ashr_i32 s60, s27, 31
-; GFX1250-NEXT: s_ashr_i32 s61, s26, 31
-; GFX1250-NEXT: v_dual_mov_b32 v0, s30 :: v_dual_mov_b32 v2, s31
-; GFX1250-NEXT: v_dual_mov_b32 v4, s28 :: v_dual_mov_b32 v1, s65
-; GFX1250-NEXT: v_mov_b32_e32 v3, s64
-; GFX1250-NEXT: s_ashr_i32 s58, s25, 31
-; GFX1250-NEXT: s_ashr_i32 s59, s24, 31
-; GFX1250-NEXT: v_dual_mov_b32 v6, s29 :: v_dual_mov_b32 v8, s26
-; GFX1250-NEXT: v_dual_mov_b32 v5, s63 :: v_dual_mov_b32 v7, s62
-; GFX1250-NEXT: v_dual_mov_b32 v9, s61 :: v_dual_mov_b32 v10, s27
-; GFX1250-NEXT: v_dual_mov_b32 v11, s60 :: v_dual_mov_b32 v12, s24
-; GFX1250-NEXT: s_ashr_i32 s57, s23, 31
-; GFX1250-NEXT: v_dual_mov_b32 v13, s59 :: v_dual_mov_b32 v14, s25
-; GFX1250-NEXT: v_mov_b32_e32 v15, s58
-; GFX1250-NEXT: s_ashr_i32 s24, s22, 31
-; GFX1250-NEXT: s_ashr_i32 s55, s21, 31
-; GFX1250-NEXT: s_ashr_i32 s56, s20, 31
-; GFX1250-NEXT: s_ashr_i32 s53, s19, 31
-; GFX1250-NEXT: s_ashr_i32 s54, s18, 31
+; GFX1250-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v2, s30
+; GFX1250-NEXT: s_ashr_i32 s51, s31, 31
+; GFX1250-NEXT: s_ashr_i32 s52, s30, 31
+; GFX1250-NEXT: s_ashr_i32 s53, s29, 31
+; GFX1250-NEXT: s_ashr_i32 s54, s28, 31
+; GFX1250-NEXT: s_ashr_i32 s55, s27, 31
+; GFX1250-NEXT: s_ashr_i32 s56, s26, 31
+; GFX1250-NEXT: v_dual_mov_b32 v3, s52 :: v_dual_mov_b32 v4, s31
+; GFX1250-NEXT: v_dual_mov_b32 v5, s51 :: v_dual_mov_b32 v6, s28
+; GFX1250-NEXT: s_ashr_i32 s57, s25, 31
+; GFX1250-NEXT: s_ashr_i32 s58, s24, 31
+; GFX1250-NEXT: v_dual_mov_b32 v7, s54 :: v_dual_mov_b32 v8, s29
+; GFX1250-NEXT: v_dual_mov_b32 v9, s53 :: v_dual_mov_b32 v10, s26
+; GFX1250-NEXT: v_dual_mov_b32 v11, s56 :: v_dual_mov_b32 v12, s27
+; GFX1250-NEXT: v_dual_mov_b32 v13, s55 :: v_dual_mov_b32 v14, s24
+; GFX1250-NEXT: s_ashr_i32 s59, s23, 31
+; GFX1250-NEXT: s_ashr_i32 s60, s22, 31
+; GFX1250-NEXT: v_dual_mov_b32 v15, s58 :: v_dual_mov_b32 v16, s25
+; GFX1250-NEXT: v_mov_b32_e32 v17, s57
+; GFX1250-NEXT: s_ashr_i32 s61, s21, 31
+; GFX1250-NEXT: s_ashr_i32 s62, s20, 31
+; GFX1250-NEXT: s_ashr_i32 s63, s19, 31
+; GFX1250-NEXT: s_ashr_i32 s64, s18, 31
; GFX1250-NEXT: s_clause 0x3
-; GFX1250-NEXT: global_store_b128 v24, v[0:3], s[36:37] offset:240
-; GFX1250-NEXT: global_store_b128 v24, v[4:7], s[36:37] offset:224
-; GFX1250-NEXT: global_store_b128 v24, v[8:11], s[36:37] offset:208
-; GFX1250-NEXT: global_store_b128 v24, v[12:15], s[36:37] offset:192
+; GFX1250-NEXT: global_store_b128 v0, v[2:5], s[36:37] offset:240
+; GFX1250-NEXT: global_store_b128 v0, v[6:9], s[36:37] offset:224
+; GFX1250-NEXT: global_store_b128 v0, v[10:13], s[36:37] offset:208
+; GFX1250-NEXT: global_store_b128 v0, v[14:17], s[36:37] offset:192
; GFX1250-NEXT: s_wait_xcnt 0x3
-; GFX1250-NEXT: v_dual_mov_b32 v0, s22 :: v_dual_mov_b32 v1, s24
-; GFX1250-NEXT: v_dual_mov_b32 v2, s23 :: v_dual_mov_b32 v3, s57
+; GFX1250-NEXT: v_dual_mov_b32 v2, s22 :: v_dual_mov_b32 v3, s60
+; GFX1250-NEXT: v_dual_mov_b32 v4, s23 :: v_dual_mov_b32 v5, s59
; GFX1250-NEXT: s_wait_xcnt 0x2
-; GFX1250-NEXT: v_mov_b32_e32 v4, s20
-; GFX1250-NEXT: s_ashr_i32 s51, s17, 31
-; GFX1250-NEXT: s_ashr_i32 s52, s16, 31
-; GFX1250-NEXT: v_dual_mov_b32 v5, s56 :: v_dual_mov_b32 v6, s21
+; GFX1250-NEXT: v_mov_b32_e32 v6, s20
+; GFX1250-NEXT: s_ashr_i32 s65, s17, 31
+; GFX1250-NEXT: s_ashr_i32 s24, s16, 31
+; GFX1250-NEXT: v_dual_mov_b32 v7, s62 :: v_dual_mov_b32 v8, s21
; GFX1250-NEXT: s_wait_xcnt 0x1
-; GFX1250-NEXT: v_dual_mov_b32 v7, s55 :: v_dual_mov_b32 v8, s18
-; GFX1250-NEXT: s_ashr_i32 s50, s14, 31
-; GFX1250-NEXT: v_dual_mov_b32 v9, s54 :: v_dual_mov_b32 v10, s19
+; GFX1250-NEXT: v_dual_mov_b32 v9, s61 :: v_dual_mov_b32 v10, s18
+; GFX1250-NEXT: s_ashr_i32 s33, s15, 31
+; GFX1250-NEXT: s_ashr_i32 s34, s14, 31
+; GFX1250-NEXT: v_dual_mov_b32 v11, s64 :: v_dual_mov_b32 v12, s19
; GFX1250-NEXT: s_wait_xcnt 0x0
-; GFX1250-NEXT: v_dual_mov_b32 v11, s53 :: v_dual_mov_b32 v12, s16
-; GFX1250-NEXT: s_ashr_i32 s45, s11, 31
-; GFX1250-NEXT: s_ashr_i32 s46, s10, 31
-; GFX1250-NEXT: s_ashr_i32 s47, s13, 31
-; GFX1250-NEXT: s_ashr_i32 s48, s12, 31
-; GFX1250-NEXT: v_dual_mov_b32 v13, s52 :: v_dual_mov_b32 v14, s17
-; GFX1250-NEXT: v_dual_mov_b32 v15, s51 :: v_dual_mov_b32 v16, s14
-; GFX1250-NEXT: s_ashr_i32 s43, s9, 31
-; GFX1250-NEXT: s_ashr_i32 s44, s8, 31
-; GFX1250-NEXT: v_dual_mov_b32 v17, s50 :: v_dual_mov_b32 v18, s15
-; GFX1250-NEXT: v_dual_mov_b32 v19, s49 :: v_dual_mov_b32 v20, s12
-; GFX1250-NEXT: s_ashr_i32 s41, s7, 31
-; GFX1250-NEXT: s_ashr_i32 s42, s6, 31
-; GFX1250-NEXT: v_dual_mov_b32 v21, s48 :: v_dual_mov_b32 v22, s13
-; GFX1250-NEXT: v_mov_b32_e32 v23, s47
+; GFX1250-NEXT: v_dual_mov_b32 v13, s63 :: v_dual_mov_b32 v14, s16
+; GFX1250-NEXT: s_ashr_i32 s35, s13, 31
+; GFX1250-NEXT: s_ashr_i32 s38, s12, 31
+; GFX1250-NEXT: s_ashr_i32 s39, s11, 31
+; GFX1250-NEXT: s_ashr_i32 s40, s10, 31
+; GFX1250-NEXT: v_dual_mov_b32 v15, s24 :: v_dual_mov_b32 v16, s17
+; GFX1250-NEXT: v_dual_mov_b32 v17, s65 :: v_dual_mov_b32 v18, s14
+; GFX1250-NEXT: s_ashr_i32 s41, s9, 31
+; GFX1250-NEXT: s_ashr_i32 s42, s8, 31
+; GFX1250-NEXT: v_dual_mov_b32 v19, s34 :: v_dual_mov_b32 v20, s15
+; GFX1250-NEXT: v_dual_mov_b32 v21, s33 :: v_dual_mov_b32 v22, s12
+; GFX1250-NEXT: s_ashr_i32 s43, s7, 31
+; GFX1250-NEXT: s_ashr_i32 s44, s6, 31
+; GFX1250-NEXT: v_dual_mov_b32 v23, s38 :: v_dual_mov_b32 v24, s13
+; GFX1250-NEXT: v_mov_b32_e32 v25, s35
; GFX1250-NEXT: s_clause 0x5
-; GFX1250-NEXT: global_store_b128 v24, v[0:3], s[36:37] offset:176
-; GFX1250-NEXT: global_store_b128 v24, v[4:7], s[36:37] offset:160
-; GFX1250-NEXT: global_store_b128 v24, v[8:11], s[36:37] offset:144
-; GFX1250-NEXT: global_store_b128 v24, v[12:15], s[36:37] offset:128
-; GFX1250-NEXT: global_store_b128 v24, v[16:19], s[36:37] offset:112
-; GFX1250-NEXT: global_store_b128 v24, v[20:23], s[36:37] offset:96
+; GFX1250-NEXT: global_store_b128 v0, v[2:5], s[36:37] offset:176
+; GFX1250-NEXT: global_store_b128 v0, v[6:9], s[36:37] offset:160
+; GFX1250-NEXT: global_store_b128 v0, v[10:13], s[36:37] offset:144
+; GFX1250-NEXT: global_store_b128 v0, v[14:17], s[36:37] offset:128
+; GFX1250-NEXT: global_store_b128 v0, v[18:21], s[36:37] offset:112
+; GFX1250-NEXT: global_store_b128 v0, v[22:25], s[36:37] offset:96
; GFX1250-NEXT: s_wait_xcnt 0x5
-; GFX1250-NEXT: v_dual_mov_b32 v0, s10 :: v_dual_mov_b32 v1, s46
-; GFX1250-NEXT: v_dual_mov_b32 v2, s11 :: v_dual_mov_b32 v3, s45
+; GFX1250-NEXT: v_dual_mov_b32 v2, s10 :: v_dual_mov_b32 v3, s40
+; GFX1250-NEXT: v_dual_mov_b32 v4, s11 :: v_dual_mov_b32 v5, s39
; GFX1250-NEXT: s_wait_xcnt 0x4
-; GFX1250-NEXT: v_mov_b32_e32 v4, s8
-; GFX1250-NEXT: s_ashr_i32 s39, s5, 31
-; GFX1250-NEXT: s_ashr_i32 s40, s4, 31
-; GFX1250-NEXT: v_dual_mov_b32 v5, s44 :: v_dual_mov_b32 v6, s9
+; GFX1250-NEXT: v_mov_b32_e32 v6, s8
+; GFX1250-NEXT: s_ashr_i32 s45, s5, 31
+; GFX1250-NEXT: s_ashr_i32 s46, s4, 31
+; GFX1250-NEXT: v_dual_mov_b32 v7, s42 :: v_dual_mov_b32 v8, s9
; GFX1250-NEXT: s_wait_xcnt 0x3
-; GFX1250-NEXT: v_dual_mov_b32 v7, s43 :: v_dual_mov_b32 v8, s6
-; GFX1250-NEXT: s_ashr_i32 s35, s3, 31
-; GFX1250-NEXT: s_ashr_i32 s38, s2, 31
-; GFX1250-NEXT: v_dual_mov_b32 v9, s42 :: v_dual_mov_b32 v10, s7
+; GFX1250-NEXT: v_dual_mov_b32 v9, s41 :: v_dual_mov_b32 v10, s6
+; GFX1250-NEXT: s_ashr_i32 s47, s3, 31
+; GFX1250-NEXT: s_ashr_i32 s48, s2, 31
+; GFX1250-NEXT: v_dual_mov_b32 v11, s44 :: v_dual_mov_b32 v12, s7
; GFX1250-NEXT: s_wait_xcnt 0x2
-; GFX1250-NEXT: v_dual_mov_b32 v11, s41 :: v_dual_mov_b32 v12, s4
-; GFX1250-NEXT: s_ashr_i32 s33, s1, 31
-; GFX1250-NEXT: s_ashr_i32 s34, s0, 31
-; GFX1250-NEXT: v_dual_mov_b32 v13, s40 :: v_dual_mov_b32 v14, s5
+; GFX1250-NEXT: v_dual_mov_b32 v13, s43 :: v_dual_mov_b32 v14, s4
+; GFX1250-NEXT: s_ashr_i32 s49, s1, 31
+; GFX1250-NEXT: s_ashr_i32 s50, s0, 31
+; GFX1250-NEXT: v_dual_mov_b32 v15, s46 :: v_dual_mov_b32 v16, s5
; GFX1250-NEXT: s_wait_xcnt 0x1
-; GFX1250-NEXT: v_dual_mov_b32 v15, s39 :: v_dual_mov_b32 v16, s2
-; GFX1250-NEXT: v_dual_mov_b32 v17, s38 :: v_dual_mov_b32 v18, s3
+; GFX1250-NEXT: v_dual_mov_b32 v17, s45 :: v_dual_mov_b32 v18, s2
+; GFX1250-NEXT: v_dual_mov_b32 v19, s48 :: v_dual_mov_b32 v20, s3
; GFX1250-NEXT: s_wait_xcnt 0x0
-; GFX1250-NEXT: v_dual_mov_b32 v19, s35 :: v_dual_mov_b32 v20, s0
-; GFX1250-NEXT: v_dual_mov_b32 v21, s34 :: v_dual_mov_b32 v22, s1
-; GFX1250-NEXT: v_mov_b32_e32 v23, s33
+; GFX1250-NEXT: v_dual_mov_b32 v21, s47 :: v_dual_mov_b32 v22, s0
+; GFX1250-NEXT: v_dual_mov_b32 v23, s50 :: v_dual_mov_b32 v24, s1
+; GFX1250-NEXT: v_mov_b32_e32 v25, s49
; GFX1250-NEXT: s_clause 0x5
-; GFX1250-NEXT: global_store_b128 v24, v[0:3], s[36:37] offset:80
-; GFX1250-NEXT: global_store_b128 v24, v[4:7], s[36:37] offset:64
-; GFX1250-NEXT: global_store_b128 v24, v[8:11], s[36:37] offset:48
-; GFX1250-NEXT: global_store_b128 v24, v[12:15], s[36:37] offset:32
-; GFX1250-NEXT: global_store_b128 v24, v[16:19], s[36:37] offset:16
-; GFX1250-NEXT: global_store_b128 v24, v[20:23], s[36:37]
+; GFX1250-NEXT: global_store_b128 v0, v[2:5], s[36:37] offset:80
+; GFX1250-NEXT: global_store_b128 v0, v[6:9], s[36:37] offset:64
+; GFX1250-NEXT: global_store_b128 v0, v[10:13], s[36:37] offset:48
+; GFX1250-NEXT: global_store_b128 v0, v[14:17], s[36:37] offset:32
+; GFX1250-NEXT: global_store_b128 v0, v[18:21], s[36:37] offset:16
+; GFX1250-NEXT: global_store_b128 v0, v[22:25], s[36:37]
; GFX1250-NEXT: s_endpgm
%ld = load <32 x i32>, ptr addrspace(4) %in
%ext = sext <32 x i32> %ld to <32 x i64>
diff --git a/llvm/test/CodeGen/AMDGPU/masked-load-vectortypes.ll b/llvm/test/CodeGen/AMDGPU/masked-load-vectortypes.ll
index deb97a9812b42..b884689051f5b 100644
--- a/llvm/test/CodeGen/AMDGPU/masked-load-vectortypes.ll
+++ b/llvm/test/CodeGen/AMDGPU/masked-load-vectortypes.ll
@@ -7,11 +7,11 @@ define <2 x i32> @uniform_masked_load_ptr1_mask_v2i32(ptr addrspace(1) inreg noc
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: v_and_b32_e32 v0, 1, v0
; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX942-NEXT: v_mov_b32_e32 v0, 0
-; GFX942-NEXT: v_mov_b32_e32 v1, v0
+; GFX942-NEXT: v_mov_b64_e32 v[0:1], 0
; GFX942-NEXT: s_and_saveexec_b64 s[2:3], vcc
; GFX942-NEXT: s_cbranch_execz .LBB0_2
; GFX942-NEXT: ; %bb.1: ; %cond.load
+; GFX942-NEXT: v_mov_b32_e32 v0, 0
; GFX942-NEXT: global_load_dwordx2 v[0:1], v0, s[0:1]
; GFX942-NEXT: .LBB0_2:
; GFX942-NEXT: s_or_b64 exec, exec, s[2:3]
@@ -30,13 +30,12 @@ define <4 x i32> @uniform_masked_load_ptr1_mask_v4i32(ptr addrspace(1) inreg noc
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: v_and_b32_e32 v0, 1, v0
; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX942-NEXT: v_mov_b32_e32 v0, 0
-; GFX942-NEXT: v_mov_b32_e32 v1, v0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: v_mov_b32_e32 v3, v0
+; GFX942-NEXT: v_mov_b64_e32 v[0:1], 0
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[0:1]
; GFX942-NEXT: s_and_saveexec_b64 s[2:3], vcc
; GFX942-NEXT: s_cbranch_execz .LBB1_2
; GFX942-NEXT: ; %bb.1: ; %cond.load
+; GFX942-NEXT: v_mov_b32_e32 v0, 0
; GFX942-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1]
; GFX942-NEXT: .LBB1_2:
; GFX942-NEXT: s_or_b64 exec, exec, s[2:3]
@@ -55,13 +54,12 @@ define <4 x float> @uniform_masked_load_ptr1_mask_v4f32(ptr addrspace(1) inreg n
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: v_and_b32_e32 v0, 1, v0
; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX942-NEXT: v_mov_b32_e32 v0, 0
-; GFX942-NEXT: v_mov_b32_e32 v1, v0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: v_mov_b32_e32 v3, v0
+; GFX942-NEXT: v_mov_b64_e32 v[0:1], 0
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[0:1]
; GFX942-NEXT: s_and_saveexec_b64 s[2:3], vcc
; GFX942-NEXT: s_cbranch_execz .LBB2_2
; GFX942-NEXT: ; %bb.1: ; %cond.load
+; GFX942-NEXT: v_mov_b32_e32 v0, 0
; GFX942-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1]
; GFX942-NEXT: .LBB2_2:
; GFX942-NEXT: s_or_b64 exec, exec, s[2:3]
@@ -80,20 +78,16 @@ define <8 x i32> @uniform_masked_load_ptr1_mask_v8i32(ptr addrspace(1) inreg noc
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: v_and_b32_e32 v0, 1, v0
; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX942-NEXT: v_mov_b32_e32 v0, 0
-; GFX942-NEXT: v_mov_b32_e32 v1, v0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: v_mov_b32_e32 v3, v0
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v0
-; GFX942-NEXT: v_mov_b32_e32 v6, v0
-; GFX942-NEXT: v_mov_b32_e32 v7, v0
+; GFX942-NEXT: v_mov_b64_e32 v[0:1], 0
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[0:1]
+; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[0:1]
+; GFX942-NEXT: v_mov_b64_e32 v[6:7], v[0:1]
; GFX942-NEXT: s_and_saveexec_b64 s[2:3], vcc
; GFX942-NEXT: s_cbranch_execz .LBB3_2
; GFX942-NEXT: ; %bb.1: ; %cond.load
-; GFX942-NEXT: global_load_dwordx4 v[4:7], v0, s[0:1] offset:16
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: global_load_dwordx4 v[4:7], v8, s[0:1] offset:16
+; GFX942-NEXT: global_load_dwordx4 v[0:3], v8, s[0:1]
; GFX942-NEXT: .LBB3_2:
; GFX942-NEXT: s_or_b64 exec, exec, s[2:3]
; GFX942-NEXT: s_waitcnt vmcnt(0)
@@ -111,20 +105,16 @@ define <8 x float> @uniform_masked_load_ptr1_mask_v8f32(ptr addrspace(1) inreg n
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: v_and_b32_e32 v0, 1, v0
; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX942-NEXT: v_mov_b32_e32 v0, 0
-; GFX942-NEXT: v_mov_b32_e32 v1, v0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: v_mov_b32_e32 v3, v0
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v0
-; GFX942-NEXT: v_mov_b32_e32 v6, v0
-; GFX942-NEXT: v_mov_b32_e32 v7, v0
+; GFX942-NEXT: v_mov_b64_e32 v[0:1], 0
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[0:1]
+; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[0:1]
+; GFX942-NEXT: v_mov_b64_e32 v[6:7], v[0:1]
; GFX942-NEXT: s_and_saveexec_b64 s[2:3], vcc
; GFX942-NEXT: s_cbranch_execz .LBB4_2
; GFX942-NEXT: ; %bb.1: ; %cond.load
-; GFX942-NEXT: global_load_dwordx4 v[4:7], v0, s[0:1] offset:16
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: global_load_dwordx4 v[4:7], v8, s[0:1] offset:16
+; GFX942-NEXT: global_load_dwordx4 v[0:3], v8, s[0:1]
; GFX942-NEXT: .LBB4_2:
; GFX942-NEXT: s_or_b64 exec, exec, s[2:3]
; GFX942-NEXT: s_waitcnt vmcnt(0)
@@ -142,13 +132,12 @@ define <8 x i16> @uniform_masked_load_ptr1_mask_v8i16(ptr addrspace(1) inreg noc
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: v_and_b32_e32 v0, 1, v0
; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX942-NEXT: v_mov_b32_e32 v0, 0
-; GFX942-NEXT: v_mov_b32_e32 v1, v0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: v_mov_b32_e32 v3, v0
+; GFX942-NEXT: v_mov_b64_e32 v[0:1], 0
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[0:1]
; GFX942-NEXT: s_and_saveexec_b64 s[2:3], vcc
; GFX942-NEXT: s_cbranch_execz .LBB5_2
; GFX942-NEXT: ; %bb.1: ; %cond.load
+; GFX942-NEXT: v_mov_b32_e32 v0, 0
; GFX942-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1]
; GFX942-NEXT: .LBB5_2:
; GFX942-NEXT: s_or_b64 exec, exec, s[2:3]
@@ -167,13 +156,12 @@ define <8 x half> @uniform_masked_load_ptr1_mask_v8f16(ptr addrspace(1) inreg no
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: v_and_b32_e32 v0, 1, v0
; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX942-NEXT: v_mov_b32_e32 v0, 0
-; GFX942-NEXT: v_mov_b32_e32 v1, v0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: v_mov_b32_e32 v3, v0
+; GFX942-NEXT: v_mov_b64_e32 v[0:1], 0
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[0:1]
; GFX942-NEXT: s_and_saveexec_b64 s[2:3], vcc
; GFX942-NEXT: s_cbranch_execz .LBB6_2
; GFX942-NEXT: ; %bb.1: ; %cond.load
+; GFX942-NEXT: v_mov_b32_e32 v0, 0
; GFX942-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1]
; GFX942-NEXT: .LBB6_2:
; GFX942-NEXT: s_or_b64 exec, exec, s[2:3]
@@ -192,13 +180,12 @@ define <8 x bfloat> @uniform_masked_load_ptr1_mask_v8bf16(ptr addrspace(1) inreg
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: v_and_b32_e32 v0, 1, v0
; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX942-NEXT: v_mov_b32_e32 v0, 0
-; GFX942-NEXT: v_mov_b32_e32 v1, v0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: v_mov_b32_e32 v3, v0
+; GFX942-NEXT: v_mov_b64_e32 v[0:1], 0
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[0:1]
; GFX942-NEXT: s_and_saveexec_b64 s[2:3], vcc
; GFX942-NEXT: s_cbranch_execz .LBB7_2
; GFX942-NEXT: ; %bb.1: ; %cond.load
+; GFX942-NEXT: v_mov_b32_e32 v0, 0
; GFX942-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1]
; GFX942-NEXT: .LBB7_2:
; GFX942-NEXT: s_or_b64 exec, exec, s[2:3]
diff --git a/llvm/test/CodeGen/AMDGPU/maximumnum.ll b/llvm/test/CodeGen/AMDGPU/maximumnum.ll
index 4f73e8e9c1883..64c602f81cb23 100644
--- a/llvm/test/CodeGen/AMDGPU/maximumnum.ll
+++ b/llvm/test/CodeGen/AMDGPU/maximumnum.ll
@@ -5,11 +5,11 @@
; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 < %s | FileCheck -check-prefixes=GFX8,GFX8-SDAG %s
; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 < %s | FileCheck -check-prefixes=GFX8,GFX8-GISEL %s
-; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX900,GFX9-SDAG,GFX900-SDAG %s
-; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX900,GFX9-GISEL,GFX900-GISEL %s
+; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX900,GFX900-SDAG %s
+; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX900,GFX900-GISEL %s
-; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 < %s | FileCheck -check-prefixes=GFX9,GFX950,GFX9-SDAG,GFX950-SDAG %s
-; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 < %s | FileCheck -check-prefixes=GFX9,GFX950,GFX9-GISEL,GFX950-GISEL %s
+; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 < %s | FileCheck -check-prefixes=GFX950,GFX950-SDAG %s
+; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 < %s | FileCheck -check-prefixes=GFX950,GFX950-GISEL %s
; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 < %s | FileCheck -check-prefixes=GFX10,GFX10-SDAG %s
; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 < %s | FileCheck -check-prefixes=GFX10,GFX10-GISEL %s
@@ -62,21 +62,37 @@ define half @v_maximumnum_f16(half %x, half %y) {
; GFX8-GISEL-NEXT: v_max_f16_e32 v0, v0, v1
; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-SDAG-LABEL: v_maximumnum_f16:
-; GFX9-SDAG: ; %bb.0:
-; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-SDAG-NEXT: v_max_f16_e32 v1, v1, v1
-; GFX9-SDAG-NEXT: v_max_f16_e32 v0, v0, v0
-; GFX9-SDAG-NEXT: v_max_f16_e32 v0, v0, v1
-; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-GISEL-LABEL: v_maximumnum_f16:
-; GFX9-GISEL: ; %bb.0:
-; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-GISEL-NEXT: v_max_f16_e32 v0, v0, v0
-; GFX9-GISEL-NEXT: v_max_f16_e32 v1, v1, v1
-; GFX9-GISEL-NEXT: v_max_f16_e32 v0, v0, v1
-; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
+; GFX900-SDAG-LABEL: v_maximumnum_f16:
+; GFX900-SDAG: ; %bb.0:
+; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-SDAG-NEXT: v_max_f16_e32 v1, v1, v1
+; GFX900-SDAG-NEXT: v_max_f16_e32 v0, v0, v0
+; GFX900-SDAG-NEXT: v_max_f16_e32 v0, v0, v1
+; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-GISEL-LABEL: v_maximumnum_f16:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: v_max_f16_e32 v0, v0, v0
+; GFX900-GISEL-NEXT: v_max_f16_e32 v1, v1, v1
+; GFX900-GISEL-NEXT: v_max_f16_e32 v0, v0, v1
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: v_maximumnum_f16:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: v_max_f16_e32 v1, v1, v1
+; GFX950-SDAG-NEXT: v_max_f16_e32 v0, v0, v0
+; GFX950-SDAG-NEXT: v_max_f16_e32 v0, v0, v1
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-GISEL-LABEL: v_maximumnum_f16:
+; GFX950-GISEL: ; %bb.0:
+; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-GISEL-NEXT: v_max_f16_e32 v0, v0, v0
+; GFX950-GISEL-NEXT: v_max_f16_e32 v1, v1, v1
+; GFX950-GISEL-NEXT: v_max_f16_e32 v0, v0, v1
+; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-SDAG-LABEL: v_maximumnum_f16:
; GFX10-SDAG: ; %bb.0:
@@ -211,11 +227,17 @@ define half @v_maximumnum_f16_nnan(half %x, half %y) {
; GFX8-NEXT: v_max_f16_e32 v0, v0, v1
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_maximumnum_f16_nnan:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f16_e32 v0, v0, v1
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_maximumnum_f16_nnan:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_max_f16_e32 v0, v0, v1
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_maximumnum_f16_nnan:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_max_f16_e32 v0, v0, v1
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximumnum_f16_nnan:
; GFX10: ; %bb.0:
@@ -283,12 +305,19 @@ define half @v_maximumnum_f16_1.0(half %x) {
; GFX8-NEXT: v_max_f16_e32 v0, 1.0, v0
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_maximumnum_f16_1.0:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f16_e32 v0, v0, v0
-; GFX9-NEXT: v_max_f16_e32 v0, 1.0, v0
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_maximumnum_f16_1.0:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_max_f16_e32 v0, v0, v0
+; GFX900-NEXT: v_max_f16_e32 v0, 1.0, v0
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_maximumnum_f16_1.0:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_max_f16_e32 v0, v0, v0
+; GFX950-NEXT: v_max_f16_e32 v0, 1.0, v0
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximumnum_f16_1.0:
; GFX10: ; %bb.0:
@@ -373,21 +402,37 @@ define float @v_maximumnum_f32(float %x, float %y) {
; GFX8-GISEL-NEXT: v_max_f32_e32 v0, v0, v1
; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-SDAG-LABEL: v_maximumnum_f32:
-; GFX9-SDAG: ; %bb.0:
-; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-SDAG-NEXT: v_max_f32_e32 v1, v1, v1
-; GFX9-SDAG-NEXT: v_max_f32_e32 v0, v0, v0
-; GFX9-SDAG-NEXT: v_max_f32_e32 v0, v0, v1
-; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-GISEL-LABEL: v_maximumnum_f32:
-; GFX9-GISEL: ; %bb.0:
-; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-GISEL-NEXT: v_max_f32_e32 v0, v0, v0
-; GFX9-GISEL-NEXT: v_max_f32_e32 v1, v1, v1
-; GFX9-GISEL-NEXT: v_max_f32_e32 v0, v0, v1
-; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
+; GFX900-SDAG-LABEL: v_maximumnum_f32:
+; GFX900-SDAG: ; %bb.0:
+; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-SDAG-NEXT: v_max_f32_e32 v1, v1, v1
+; GFX900-SDAG-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX900-SDAG-NEXT: v_max_f32_e32 v0, v0, v1
+; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-GISEL-LABEL: v_maximumnum_f32:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX900-GISEL-NEXT: v_max_f32_e32 v1, v1, v1
+; GFX900-GISEL-NEXT: v_max_f32_e32 v0, v0, v1
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: v_maximumnum_f32:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: v_max_f32_e32 v1, v1, v1
+; GFX950-SDAG-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX950-SDAG-NEXT: v_max_f32_e32 v0, v0, v1
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-GISEL-LABEL: v_maximumnum_f32:
+; GFX950-GISEL: ; %bb.0:
+; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-GISEL-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX950-GISEL-NEXT: v_max_f32_e32 v1, v1, v1
+; GFX950-GISEL-NEXT: v_max_f32_e32 v0, v0, v1
+; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-SDAG-LABEL: v_maximumnum_f32:
; GFX10-SDAG: ; %bb.0:
@@ -461,11 +506,17 @@ define float @v_maximumnum_f32_nnan(float %x, float %y) {
; GFX8-NEXT: v_max_f32_e32 v0, v0, v1
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_maximumnum_f32_nnan:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f32_e32 v0, v0, v1
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_maximumnum_f32_nnan:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_max_f32_e32 v0, v0, v1
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_maximumnum_f32_nnan:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_max_f32_e32 v0, v0, v1
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximumnum_f32_nnan:
; GFX10: ; %bb.0:
@@ -525,21 +576,37 @@ define double @v_maximumnum_f64(double %x, double %y) {
; GFX8-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3]
; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-SDAG-LABEL: v_maximumnum_f64:
-; GFX9-SDAG: ; %bb.0:
-; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
-; GFX9-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
-; GFX9-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3]
-; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-GISEL-LABEL: v_maximumnum_f64:
-; GFX9-GISEL: ; %bb.0:
-; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
-; GFX9-GISEL-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
-; GFX9-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3]
-; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
+; GFX900-SDAG-LABEL: v_maximumnum_f64:
+; GFX900-SDAG: ; %bb.0:
+; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
+; GFX900-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX900-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-GISEL-LABEL: v_maximumnum_f64:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX900-GISEL-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
+; GFX900-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: v_maximumnum_f64:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
+; GFX950-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX950-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-GISEL-LABEL: v_maximumnum_f64:
+; GFX950-GISEL: ; %bb.0:
+; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX950-GISEL-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
+; GFX950-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-SDAG-LABEL: v_maximumnum_f64:
; GFX10-SDAG: ; %bb.0:
@@ -617,11 +684,17 @@ define double @v_maximumnum_f64_nnan(double %x, double %y) {
; GFX8-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3]
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_maximumnum_f64_nnan:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3]
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_maximumnum_f64_nnan:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_maximumnum_f64_nnan:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximumnum_f64_nnan:
; GFX10: ; %bb.0:
@@ -663,12 +736,19 @@ define float @v_maximumnum_f32_1.0(float %x) {
; GFX8-NEXT: v_max_f32_e32 v0, 1.0, v0
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_maximumnum_f32_1.0:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f32_e32 v0, v0, v0
-; GFX9-NEXT: v_max_f32_e32 v0, 1.0, v0
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_maximumnum_f32_1.0:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX900-NEXT: v_max_f32_e32 v0, 1.0, v0
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_maximumnum_f32_1.0:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX950-NEXT: v_max_f32_e32 v0, 1.0, v0
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximumnum_f32_1.0:
; GFX10: ; %bb.0:
@@ -717,13 +797,21 @@ define float @v_maximumnum_f32_rhs_not_snan(float %x, float %y) {
; GFX8-NEXT: v_max_f32_e32 v0, v0, v1
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_maximumnum_f32_rhs_not_snan:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f32_e32 v1, v1, v1
-; GFX9-NEXT: v_max_f32_e32 v0, v0, v0
-; GFX9-NEXT: v_max_f32_e32 v0, v0, v1
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_maximumnum_f32_rhs_not_snan:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_max_f32_e32 v1, v1, v1
+; GFX900-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX900-NEXT: v_max_f32_e32 v0, v0, v1
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_maximumnum_f32_rhs_not_snan:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_max_f32_e32 v1, v1, v1
+; GFX950-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX950-NEXT: v_max_f32_e32 v0, v0, v1
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximumnum_f32_rhs_not_snan:
; GFX10: ; %bb.0:
@@ -774,13 +862,21 @@ define float @v_maximumnum_f32_lhs_not_snan(float %x, float %y) {
; GFX8-NEXT: v_max_f32_e32 v0, v0, v1
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_maximumnum_f32_lhs_not_snan:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f32_e32 v0, v0, v0
-; GFX9-NEXT: v_max_f32_e32 v1, v1, v1
-; GFX9-NEXT: v_max_f32_e32 v0, v0, v1
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_maximumnum_f32_lhs_not_snan:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX900-NEXT: v_max_f32_e32 v1, v1, v1
+; GFX900-NEXT: v_max_f32_e32 v0, v0, v1
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_maximumnum_f32_lhs_not_snan:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX950-NEXT: v_max_f32_e32 v1, v1, v1
+; GFX950-NEXT: v_max_f32_e32 v0, v0, v1
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximumnum_f32_lhs_not_snan:
; GFX10: ; %bb.0:
@@ -831,13 +927,21 @@ define float @v_maximumnum_f32_both_operands_not_snan(float %x, float %y) {
; GFX8-NEXT: v_max_f32_e32 v0, v0, v1
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_maximumnum_f32_both_operands_not_snan:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f32_e32 v0, v0, v0
-; GFX9-NEXT: v_max_f32_e32 v1, v1, v1
-; GFX9-NEXT: v_max_f32_e32 v0, v0, v1
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_maximumnum_f32_both_operands_not_snan:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX900-NEXT: v_max_f32_e32 v1, v1, v1
+; GFX900-NEXT: v_max_f32_e32 v0, v0, v1
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_maximumnum_f32_both_operands_not_snan:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX950-NEXT: v_max_f32_e32 v1, v1, v1
+; GFX950-NEXT: v_max_f32_e32 v0, v0, v1
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximumnum_f32_both_operands_not_snan:
; GFX10: ; %bb.0:
@@ -887,12 +991,19 @@ define double @v_maximumnum_f64_1.0(double %x) {
; GFX8-NEXT: v_max_f64 v[0:1], v[0:1], 1.0
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_maximumnum_f64_1.0:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
-; GFX9-NEXT: v_max_f64 v[0:1], v[0:1], 1.0
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_maximumnum_f64_1.0:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX900-NEXT: v_max_f64 v[0:1], v[0:1], 1.0
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_maximumnum_f64_1.0:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX950-NEXT: v_max_f64 v[0:1], v[0:1], 1.0
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximumnum_f64_1.0:
; GFX10: ; %bb.0:
@@ -2190,21 +2301,37 @@ define float @v_maximumnum_f32_fabs_rhs(float %x, float %y) {
; GFX8-GISEL-NEXT: v_max_f32_e32 v0, v0, v1
; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-SDAG-LABEL: v_maximumnum_f32_fabs_rhs:
-; GFX9-SDAG: ; %bb.0:
-; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-SDAG-NEXT: v_max_f32_e64 v1, |v1|, |v1|
-; GFX9-SDAG-NEXT: v_max_f32_e32 v0, v0, v0
-; GFX9-SDAG-NEXT: v_max_f32_e32 v0, v0, v1
-; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-GISEL-LABEL: v_maximumnum_f32_fabs_rhs:
-; GFX9-GISEL: ; %bb.0:
-; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-GISEL-NEXT: v_max_f32_e32 v0, v0, v0
-; GFX9-GISEL-NEXT: v_max_f32_e64 v1, |v1|, |v1|
-; GFX9-GISEL-NEXT: v_max_f32_e32 v0, v0, v1
-; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
+; GFX900-SDAG-LABEL: v_maximumnum_f32_fabs_rhs:
+; GFX900-SDAG: ; %bb.0:
+; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-SDAG-NEXT: v_max_f32_e64 v1, |v1|, |v1|
+; GFX900-SDAG-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX900-SDAG-NEXT: v_max_f32_e32 v0, v0, v1
+; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-GISEL-LABEL: v_maximumnum_f32_fabs_rhs:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX900-GISEL-NEXT: v_max_f32_e64 v1, |v1|, |v1|
+; GFX900-GISEL-NEXT: v_max_f32_e32 v0, v0, v1
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: v_maximumnum_f32_fabs_rhs:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: v_max_f32_e64 v1, |v1|, |v1|
+; GFX950-SDAG-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX950-SDAG-NEXT: v_max_f32_e32 v0, v0, v1
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-GISEL-LABEL: v_maximumnum_f32_fabs_rhs:
+; GFX950-GISEL: ; %bb.0:
+; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-GISEL-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX950-GISEL-NEXT: v_max_f32_e64 v1, |v1|, |v1|
+; GFX950-GISEL-NEXT: v_max_f32_e32 v0, v0, v1
+; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-SDAG-LABEL: v_maximumnum_f32_fabs_rhs:
; GFX10-SDAG: ; %bb.0:
@@ -2303,21 +2430,37 @@ define float @v_maximumnum_f32_fneg_fabs_rhs(float %x, float %y) {
; GFX8-GISEL-NEXT: v_max_f32_e32 v0, v0, v1
; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-SDAG-LABEL: v_maximumnum_f32_fneg_fabs_rhs:
-; GFX9-SDAG: ; %bb.0:
-; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-SDAG-NEXT: v_max_f32_e64 v1, -|v1|, -|v1|
-; GFX9-SDAG-NEXT: v_max_f32_e32 v0, v0, v0
-; GFX9-SDAG-NEXT: v_max_f32_e32 v0, v0, v1
-; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-GISEL-LABEL: v_maximumnum_f32_fneg_fabs_rhs:
-; GFX9-GISEL: ; %bb.0:
-; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-GISEL-NEXT: v_max_f32_e32 v0, v0, v0
-; GFX9-GISEL-NEXT: v_max_f32_e64 v1, -|v1|, -|v1|
-; GFX9-GISEL-NEXT: v_max_f32_e32 v0, v0, v1
-; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
+; GFX900-SDAG-LABEL: v_maximumnum_f32_fneg_fabs_rhs:
+; GFX900-SDAG: ; %bb.0:
+; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-SDAG-NEXT: v_max_f32_e64 v1, -|v1|, -|v1|
+; GFX900-SDAG-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX900-SDAG-NEXT: v_max_f32_e32 v0, v0, v1
+; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-GISEL-LABEL: v_maximumnum_f32_fneg_fabs_rhs:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX900-GISEL-NEXT: v_max_f32_e64 v1, -|v1|, -|v1|
+; GFX900-GISEL-NEXT: v_max_f32_e32 v0, v0, v1
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: v_maximumnum_f32_fneg_fabs_rhs:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: v_max_f32_e64 v1, -|v1|, -|v1|
+; GFX950-SDAG-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX950-SDAG-NEXT: v_max_f32_e32 v0, v0, v1
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-GISEL-LABEL: v_maximumnum_f32_fneg_fabs_rhs:
+; GFX950-GISEL: ; %bb.0:
+; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-GISEL-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX950-GISEL-NEXT: v_max_f32_e64 v1, -|v1|, -|v1|
+; GFX950-GISEL-NEXT: v_max_f32_e32 v0, v0, v1
+; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-SDAG-LABEL: v_maximumnum_f32_fneg_fabs_rhs:
; GFX10-SDAG: ; %bb.0:
@@ -2417,21 +2560,37 @@ define float @v_maximumnum_f32_fabs(float %x, float %y) {
; GFX8-GISEL-NEXT: v_max_f32_e32 v0, v0, v1
; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-SDAG-LABEL: v_maximumnum_f32_fabs:
-; GFX9-SDAG: ; %bb.0:
-; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-SDAG-NEXT: v_max_f32_e64 v1, |v1|, |v1|
-; GFX9-SDAG-NEXT: v_max_f32_e64 v0, |v0|, |v0|
-; GFX9-SDAG-NEXT: v_max_f32_e32 v0, v0, v1
-; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-GISEL-LABEL: v_maximumnum_f32_fabs:
-; GFX9-GISEL: ; %bb.0:
-; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-GISEL-NEXT: v_max_f32_e64 v0, |v0|, |v0|
-; GFX9-GISEL-NEXT: v_max_f32_e64 v1, |v1|, |v1|
-; GFX9-GISEL-NEXT: v_max_f32_e32 v0, v0, v1
-; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
+; GFX900-SDAG-LABEL: v_maximumnum_f32_fabs:
+; GFX900-SDAG: ; %bb.0:
+; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-SDAG-NEXT: v_max_f32_e64 v1, |v1|, |v1|
+; GFX900-SDAG-NEXT: v_max_f32_e64 v0, |v0|, |v0|
+; GFX900-SDAG-NEXT: v_max_f32_e32 v0, v0, v1
+; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-GISEL-LABEL: v_maximumnum_f32_fabs:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: v_max_f32_e64 v0, |v0|, |v0|
+; GFX900-GISEL-NEXT: v_max_f32_e64 v1, |v1|, |v1|
+; GFX900-GISEL-NEXT: v_max_f32_e32 v0, v0, v1
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: v_maximumnum_f32_fabs:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: v_max_f32_e64 v1, |v1|, |v1|
+; GFX950-SDAG-NEXT: v_max_f32_e64 v0, |v0|, |v0|
+; GFX950-SDAG-NEXT: v_max_f32_e32 v0, v0, v1
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-GISEL-LABEL: v_maximumnum_f32_fabs:
+; GFX950-GISEL: ; %bb.0:
+; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-GISEL-NEXT: v_max_f32_e64 v0, |v0|, |v0|
+; GFX950-GISEL-NEXT: v_max_f32_e64 v1, |v1|, |v1|
+; GFX950-GISEL-NEXT: v_max_f32_e32 v0, v0, v1
+; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-SDAG-LABEL: v_maximumnum_f32_fabs:
; GFX10-SDAG: ; %bb.0:
@@ -2531,21 +2690,37 @@ define float @v_maximumnum_f32_fneg(float %x, float %y) {
; GFX8-GISEL-NEXT: v_max_f32_e32 v0, v0, v1
; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-SDAG-LABEL: v_maximumnum_f32_fneg:
-; GFX9-SDAG: ; %bb.0:
-; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-SDAG-NEXT: v_max_f32_e64 v1, -v1, -v1
-; GFX9-SDAG-NEXT: v_max_f32_e64 v0, -v0, -v0
-; GFX9-SDAG-NEXT: v_max_f32_e32 v0, v0, v1
-; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-GISEL-LABEL: v_maximumnum_f32_fneg:
-; GFX9-GISEL: ; %bb.0:
-; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-GISEL-NEXT: v_max_f32_e64 v0, -v0, -v0
-; GFX9-GISEL-NEXT: v_max_f32_e64 v1, -v1, -v1
-; GFX9-GISEL-NEXT: v_max_f32_e32 v0, v0, v1
-; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
+; GFX900-SDAG-LABEL: v_maximumnum_f32_fneg:
+; GFX900-SDAG: ; %bb.0:
+; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-SDAG-NEXT: v_max_f32_e64 v1, -v1, -v1
+; GFX900-SDAG-NEXT: v_max_f32_e64 v0, -v0, -v0
+; GFX900-SDAG-NEXT: v_max_f32_e32 v0, v0, v1
+; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-GISEL-LABEL: v_maximumnum_f32_fneg:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: v_max_f32_e64 v0, -v0, -v0
+; GFX900-GISEL-NEXT: v_max_f32_e64 v1, -v1, -v1
+; GFX900-GISEL-NEXT: v_max_f32_e32 v0, v0, v1
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: v_maximumnum_f32_fneg:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: v_max_f32_e64 v1, -v1, -v1
+; GFX950-SDAG-NEXT: v_max_f32_e64 v0, -v0, -v0
+; GFX950-SDAG-NEXT: v_max_f32_e32 v0, v0, v1
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-GISEL-LABEL: v_maximumnum_f32_fneg:
+; GFX950-GISEL: ; %bb.0:
+; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-GISEL-NEXT: v_max_f32_e64 v0, -v0, -v0
+; GFX950-GISEL-NEXT: v_max_f32_e64 v1, -v1, -v1
+; GFX950-GISEL-NEXT: v_max_f32_e32 v0, v0, v1
+; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-SDAG-LABEL: v_maximumnum_f32_fneg:
; GFX10-SDAG: ; %bb.0:
@@ -2648,21 +2823,37 @@ define half @v_maximumnum_f16_fabs_rhs(half %x, half %y) {
; GFX8-GISEL-NEXT: v_max_f16_e32 v0, v0, v1
; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-SDAG-LABEL: v_maximumnum_f16_fabs_rhs:
-; GFX9-SDAG: ; %bb.0:
-; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-SDAG-NEXT: v_max_f16_e64 v1, |v1|, |v1|
-; GFX9-SDAG-NEXT: v_max_f16_e32 v0, v0, v0
-; GFX9-SDAG-NEXT: v_max_f16_e32 v0, v0, v1
-; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-GISEL-LABEL: v_maximumnum_f16_fabs_rhs:
-; GFX9-GISEL: ; %bb.0:
-; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-GISEL-NEXT: v_max_f16_e32 v0, v0, v0
-; GFX9-GISEL-NEXT: v_max_f16_e64 v1, |v1|, |v1|
-; GFX9-GISEL-NEXT: v_max_f16_e32 v0, v0, v1
-; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
+; GFX900-SDAG-LABEL: v_maximumnum_f16_fabs_rhs:
+; GFX900-SDAG: ; %bb.0:
+; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-SDAG-NEXT: v_max_f16_e64 v1, |v1|, |v1|
+; GFX900-SDAG-NEXT: v_max_f16_e32 v0, v0, v0
+; GFX900-SDAG-NEXT: v_max_f16_e32 v0, v0, v1
+; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-GISEL-LABEL: v_maximumnum_f16_fabs_rhs:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: v_max_f16_e32 v0, v0, v0
+; GFX900-GISEL-NEXT: v_max_f16_e64 v1, |v1|, |v1|
+; GFX900-GISEL-NEXT: v_max_f16_e32 v0, v0, v1
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: v_maximumnum_f16_fabs_rhs:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: v_max_f16_e64 v1, |v1|, |v1|
+; GFX950-SDAG-NEXT: v_max_f16_e32 v0, v0, v0
+; GFX950-SDAG-NEXT: v_max_f16_e32 v0, v0, v1
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-GISEL-LABEL: v_maximumnum_f16_fabs_rhs:
+; GFX950-GISEL: ; %bb.0:
+; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-GISEL-NEXT: v_max_f16_e32 v0, v0, v0
+; GFX950-GISEL-NEXT: v_max_f16_e64 v1, |v1|, |v1|
+; GFX950-GISEL-NEXT: v_max_f16_e32 v0, v0, v1
+; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-SDAG-LABEL: v_maximumnum_f16_fabs_rhs:
; GFX10-SDAG: ; %bb.0:
@@ -2808,21 +2999,37 @@ define half @v_maximumnum_f16_fneg_fabs_rhs(half %x, half %y) {
; GFX8-GISEL-NEXT: v_max_f16_e32 v0, v0, v1
; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-SDAG-LABEL: v_maximumnum_f16_fneg_fabs_rhs:
-; GFX9-SDAG: ; %bb.0:
-; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-SDAG-NEXT: v_max_f16_e64 v1, -|v1|, -|v1|
-; GFX9-SDAG-NEXT: v_max_f16_e32 v0, v0, v0
-; GFX9-SDAG-NEXT: v_max_f16_e32 v0, v0, v1
-; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-GISEL-LABEL: v_maximumnum_f16_fneg_fabs_rhs:
-; GFX9-GISEL: ; %bb.0:
-; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-GISEL-NEXT: v_max_f16_e32 v0, v0, v0
-; GFX9-GISEL-NEXT: v_max_f16_e64 v1, -|v1|, -|v1|
-; GFX9-GISEL-NEXT: v_max_f16_e32 v0, v0, v1
-; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
+; GFX900-SDAG-LABEL: v_maximumnum_f16_fneg_fabs_rhs:
+; GFX900-SDAG: ; %bb.0:
+; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-SDAG-NEXT: v_max_f16_e64 v1, -|v1|, -|v1|
+; GFX900-SDAG-NEXT: v_max_f16_e32 v0, v0, v0
+; GFX900-SDAG-NEXT: v_max_f16_e32 v0, v0, v1
+; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-GISEL-LABEL: v_maximumnum_f16_fneg_fabs_rhs:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: v_max_f16_e32 v0, v0, v0
+; GFX900-GISEL-NEXT: v_max_f16_e64 v1, -|v1|, -|v1|
+; GFX900-GISEL-NEXT: v_max_f16_e32 v0, v0, v1
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: v_maximumnum_f16_fneg_fabs_rhs:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: v_max_f16_e64 v1, -|v1|, -|v1|
+; GFX950-SDAG-NEXT: v_max_f16_e32 v0, v0, v0
+; GFX950-SDAG-NEXT: v_max_f16_e32 v0, v0, v1
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-GISEL-LABEL: v_maximumnum_f16_fneg_fabs_rhs:
+; GFX950-GISEL: ; %bb.0:
+; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-GISEL-NEXT: v_max_f16_e32 v0, v0, v0
+; GFX950-GISEL-NEXT: v_max_f16_e64 v1, -|v1|, -|v1|
+; GFX950-GISEL-NEXT: v_max_f16_e32 v0, v0, v1
+; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-SDAG-LABEL: v_maximumnum_f16_fneg_fabs_rhs:
; GFX10-SDAG: ; %bb.0:
@@ -2969,21 +3176,37 @@ define half @v_maximumnum_f16_fabs(half %x, half %y) {
; GFX8-GISEL-NEXT: v_max_f16_e32 v0, v0, v1
; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-SDAG-LABEL: v_maximumnum_f16_fabs:
-; GFX9-SDAG: ; %bb.0:
-; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-SDAG-NEXT: v_max_f16_e64 v1, |v1|, |v1|
-; GFX9-SDAG-NEXT: v_max_f16_e64 v0, |v0|, |v0|
-; GFX9-SDAG-NEXT: v_max_f16_e32 v0, v0, v1
-; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-GISEL-LABEL: v_maximumnum_f16_fabs:
-; GFX9-GISEL: ; %bb.0:
-; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-GISEL-NEXT: v_max_f16_e64 v0, |v0|, |v0|
-; GFX9-GISEL-NEXT: v_max_f16_e64 v1, |v1|, |v1|
-; GFX9-GISEL-NEXT: v_max_f16_e32 v0, v0, v1
-; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
+; GFX900-SDAG-LABEL: v_maximumnum_f16_fabs:
+; GFX900-SDAG: ; %bb.0:
+; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-SDAG-NEXT: v_max_f16_e64 v1, |v1|, |v1|
+; GFX900-SDAG-NEXT: v_max_f16_e64 v0, |v0|, |v0|
+; GFX900-SDAG-NEXT: v_max_f16_e32 v0, v0, v1
+; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-GISEL-LABEL: v_maximumnum_f16_fabs:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: v_max_f16_e64 v0, |v0|, |v0|
+; GFX900-GISEL-NEXT: v_max_f16_e64 v1, |v1|, |v1|
+; GFX900-GISEL-NEXT: v_max_f16_e32 v0, v0, v1
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: v_maximumnum_f16_fabs:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: v_max_f16_e64 v1, |v1|, |v1|
+; GFX950-SDAG-NEXT: v_max_f16_e64 v0, |v0|, |v0|
+; GFX950-SDAG-NEXT: v_max_f16_e32 v0, v0, v1
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-GISEL-LABEL: v_maximumnum_f16_fabs:
+; GFX950-GISEL: ; %bb.0:
+; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-GISEL-NEXT: v_max_f16_e64 v0, |v0|, |v0|
+; GFX950-GISEL-NEXT: v_max_f16_e64 v1, |v1|, |v1|
+; GFX950-GISEL-NEXT: v_max_f16_e32 v0, v0, v1
+; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-SDAG-LABEL: v_maximumnum_f16_fabs:
; GFX10-SDAG: ; %bb.0:
@@ -3130,21 +3353,37 @@ define half @v_maximumnum_f16_fneg(half %x, half %y) {
; GFX8-GISEL-NEXT: v_max_f16_e32 v0, v0, v1
; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-SDAG-LABEL: v_maximumnum_f16_fneg:
-; GFX9-SDAG: ; %bb.0:
-; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-SDAG-NEXT: v_max_f16_e64 v1, -v1, -v1
-; GFX9-SDAG-NEXT: v_max_f16_e64 v0, -v0, -v0
-; GFX9-SDAG-NEXT: v_max_f16_e32 v0, v0, v1
-; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-GISEL-LABEL: v_maximumnum_f16_fneg:
-; GFX9-GISEL: ; %bb.0:
-; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-GISEL-NEXT: v_max_f16_e64 v0, -v0, -v0
-; GFX9-GISEL-NEXT: v_max_f16_e64 v1, -v1, -v1
-; GFX9-GISEL-NEXT: v_max_f16_e32 v0, v0, v1
-; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
+; GFX900-SDAG-LABEL: v_maximumnum_f16_fneg:
+; GFX900-SDAG: ; %bb.0:
+; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-SDAG-NEXT: v_max_f16_e64 v1, -v1, -v1
+; GFX900-SDAG-NEXT: v_max_f16_e64 v0, -v0, -v0
+; GFX900-SDAG-NEXT: v_max_f16_e32 v0, v0, v1
+; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-GISEL-LABEL: v_maximumnum_f16_fneg:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: v_max_f16_e64 v0, -v0, -v0
+; GFX900-GISEL-NEXT: v_max_f16_e64 v1, -v1, -v1
+; GFX900-GISEL-NEXT: v_max_f16_e32 v0, v0, v1
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: v_maximumnum_f16_fneg:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: v_max_f16_e64 v1, -v1, -v1
+; GFX950-SDAG-NEXT: v_max_f16_e64 v0, -v0, -v0
+; GFX950-SDAG-NEXT: v_max_f16_e32 v0, v0, v1
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-GISEL-LABEL: v_maximumnum_f16_fneg:
+; GFX950-GISEL: ; %bb.0:
+; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-GISEL-NEXT: v_max_f16_e64 v0, -v0, -v0
+; GFX950-GISEL-NEXT: v_max_f16_e64 v1, -v1, -v1
+; GFX950-GISEL-NEXT: v_max_f16_e32 v0, v0, v1
+; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-SDAG-LABEL: v_maximumnum_f16_fneg:
; GFX10-SDAG: ; %bb.0:
@@ -3288,21 +3527,37 @@ define double @v_maximumnum_f64_fneg(double %x, double %y) {
; GFX8-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3]
; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-SDAG-LABEL: v_maximumnum_f64_fneg:
-; GFX9-SDAG: ; %bb.0:
-; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-SDAG-NEXT: v_max_f64 v[2:3], -v[2:3], -v[2:3]
-; GFX9-SDAG-NEXT: v_max_f64 v[0:1], -v[0:1], -v[0:1]
-; GFX9-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3]
-; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-GISEL-LABEL: v_maximumnum_f64_fneg:
-; GFX9-GISEL: ; %bb.0:
-; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-GISEL-NEXT: v_max_f64 v[0:1], -v[0:1], -v[0:1]
-; GFX9-GISEL-NEXT: v_max_f64 v[2:3], -v[2:3], -v[2:3]
-; GFX9-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3]
-; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
+; GFX900-SDAG-LABEL: v_maximumnum_f64_fneg:
+; GFX900-SDAG: ; %bb.0:
+; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-SDAG-NEXT: v_max_f64 v[2:3], -v[2:3], -v[2:3]
+; GFX900-SDAG-NEXT: v_max_f64 v[0:1], -v[0:1], -v[0:1]
+; GFX900-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-GISEL-LABEL: v_maximumnum_f64_fneg:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: v_max_f64 v[0:1], -v[0:1], -v[0:1]
+; GFX900-GISEL-NEXT: v_max_f64 v[2:3], -v[2:3], -v[2:3]
+; GFX900-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: v_maximumnum_f64_fneg:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: v_max_f64 v[2:3], -v[2:3], -v[2:3]
+; GFX950-SDAG-NEXT: v_max_f64 v[0:1], -v[0:1], -v[0:1]
+; GFX950-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-GISEL-LABEL: v_maximumnum_f64_fneg:
+; GFX950-GISEL: ; %bb.0:
+; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-GISEL-NEXT: v_max_f64 v[0:1], -v[0:1], -v[0:1]
+; GFX950-GISEL-NEXT: v_max_f64 v[2:3], -v[2:3], -v[2:3]
+; GFX950-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-SDAG-LABEL: v_maximumnum_f64_fneg:
; GFX10-SDAG: ; %bb.0:
@@ -3564,11 +3819,17 @@ define <2 x half> @v_maximumnum_v2f16_nnan(<2 x half> %x, <2 x half> %y) {
; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v2, v0
; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_maximumnum_v2f16_nnan:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_pk_max_f16 v0, v0, v1
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_maximumnum_v2f16_nnan:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_pk_max_f16 v0, v0, v1
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_maximumnum_v2f16_nnan:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_pk_max_f16 v0, v0, v1
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximumnum_v2f16_nnan:
; GFX10: ; %bb.0:
@@ -3663,16 +3924,16 @@ define <3 x half> @v_maximumnum_v3f16(<3 x half> %x, <3 x half> %y) {
; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v4, v0
; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-SDAG-LABEL: v_maximumnum_v3f16:
-; GFX9-SDAG: ; %bb.0:
-; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-SDAG-NEXT: v_pk_max_f16 v3, v3, v3
-; GFX9-SDAG-NEXT: v_pk_max_f16 v1, v1, v1
-; GFX9-SDAG-NEXT: v_pk_max_f16 v2, v2, v2
-; GFX9-SDAG-NEXT: v_pk_max_f16 v0, v0, v0
-; GFX9-SDAG-NEXT: v_pk_max_f16 v1, v1, v3
-; GFX9-SDAG-NEXT: v_pk_max_f16 v0, v0, v2
-; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
+; GFX900-SDAG-LABEL: v_maximumnum_v3f16:
+; GFX900-SDAG: ; %bb.0:
+; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-SDAG-NEXT: v_pk_max_f16 v3, v3, v3
+; GFX900-SDAG-NEXT: v_pk_max_f16 v1, v1, v1
+; GFX900-SDAG-NEXT: v_pk_max_f16 v2, v2, v2
+; GFX900-SDAG-NEXT: v_pk_max_f16 v0, v0, v0
+; GFX900-SDAG-NEXT: v_pk_max_f16 v1, v1, v3
+; GFX900-SDAG-NEXT: v_pk_max_f16 v0, v0, v2
+; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX900-GISEL-LABEL: v_maximumnum_v3f16:
; GFX900-GISEL: ; %bb.0:
@@ -3685,6 +3946,17 @@ define <3 x half> @v_maximumnum_v3f16(<3 x half> %x, <3 x half> %y) {
; GFX900-GISEL-NEXT: v_pk_max_f16 v1, v1, v2
; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
;
+; GFX950-SDAG-LABEL: v_maximumnum_v3f16:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: v_pk_max_f16 v3, v3, v3
+; GFX950-SDAG-NEXT: v_pk_max_f16 v1, v1, v1
+; GFX950-SDAG-NEXT: v_pk_max_f16 v2, v2, v2
+; GFX950-SDAG-NEXT: v_pk_max_f16 v0, v0, v0
+; GFX950-SDAG-NEXT: v_pk_max_f16 v1, v1, v3
+; GFX950-SDAG-NEXT: v_pk_max_f16 v0, v0, v2
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
; GFX950-GISEL-LABEL: v_maximumnum_v3f16:
; GFX950-GISEL: ; %bb.0:
; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -3834,19 +4106,33 @@ define <3 x half> @v_maximumnum_v3f16_nnan(<3 x half> %x, <3 x half> %y) {
; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v4, v0
; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-SDAG-LABEL: v_maximumnum_v3f16_nnan:
-; GFX9-SDAG: ; %bb.0:
-; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-SDAG-NEXT: v_pk_max_f16 v1, v1, v3
-; GFX9-SDAG-NEXT: v_pk_max_f16 v0, v0, v2
-; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
+; GFX900-SDAG-LABEL: v_maximumnum_v3f16_nnan:
+; GFX900-SDAG: ; %bb.0:
+; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-SDAG-NEXT: v_pk_max_f16 v1, v1, v3
+; GFX900-SDAG-NEXT: v_pk_max_f16 v0, v0, v2
+; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-GISEL-LABEL: v_maximumnum_v3f16_nnan:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: v_pk_max_f16 v0, v0, v2
+; GFX900-GISEL-NEXT: v_pk_max_f16 v1, v1, v3
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: v_maximumnum_v3f16_nnan:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: v_pk_max_f16 v1, v1, v3
+; GFX950-SDAG-NEXT: v_pk_max_f16 v0, v0, v2
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-GISEL-LABEL: v_maximumnum_v3f16_nnan:
-; GFX9-GISEL: ; %bb.0:
-; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-GISEL-NEXT: v_pk_max_f16 v0, v0, v2
-; GFX9-GISEL-NEXT: v_pk_max_f16 v1, v1, v3
-; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
+; GFX950-GISEL-LABEL: v_maximumnum_v3f16_nnan:
+; GFX950-GISEL: ; %bb.0:
+; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-GISEL-NEXT: v_pk_max_f16 v0, v0, v2
+; GFX950-GISEL-NEXT: v_pk_max_f16 v1, v1, v3
+; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximumnum_v3f16_nnan:
; GFX10: ; %bb.0:
@@ -4157,12 +4443,19 @@ define <4 x half> @v_maximumnum_v4f16_nnan(<4 x half> %x, <4 x half> %y) {
; GFX8-GISEL-NEXT: v_or_b32_e32 v1, v2, v1
; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_maximumnum_v4f16_nnan:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_pk_max_f16 v0, v0, v2
-; GFX9-NEXT: v_pk_max_f16 v1, v1, v3
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_maximumnum_v4f16_nnan:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_pk_max_f16 v0, v0, v2
+; GFX900-NEXT: v_pk_max_f16 v1, v1, v3
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_maximumnum_v4f16_nnan:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_pk_max_f16 v0, v0, v2
+; GFX950-NEXT: v_pk_max_f16 v1, v1, v3
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximumnum_v4f16_nnan:
; GFX10: ; %bb.0:
@@ -6691,27 +6984,49 @@ define <2 x float> @v_maximumnum_v2f32(<2 x float> %x, <2 x float> %y) {
; GFX8-GISEL-NEXT: v_max_f32_e32 v1, v1, v2
; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-SDAG-LABEL: v_maximumnum_v2f32:
-; GFX9-SDAG: ; %bb.0:
-; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-SDAG-NEXT: v_max_f32_e32 v2, v2, v2
-; GFX9-SDAG-NEXT: v_max_f32_e32 v0, v0, v0
-; GFX9-SDAG-NEXT: v_max_f32_e32 v0, v0, v2
-; GFX9-SDAG-NEXT: v_max_f32_e32 v2, v3, v3
-; GFX9-SDAG-NEXT: v_max_f32_e32 v1, v1, v1
-; GFX9-SDAG-NEXT: v_max_f32_e32 v1, v1, v2
-; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-GISEL-LABEL: v_maximumnum_v2f32:
-; GFX9-GISEL: ; %bb.0:
-; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-GISEL-NEXT: v_max_f32_e32 v0, v0, v0
-; GFX9-GISEL-NEXT: v_max_f32_e32 v2, v2, v2
-; GFX9-GISEL-NEXT: v_max_f32_e32 v0, v0, v2
-; GFX9-GISEL-NEXT: v_max_f32_e32 v1, v1, v1
-; GFX9-GISEL-NEXT: v_max_f32_e32 v2, v3, v3
-; GFX9-GISEL-NEXT: v_max_f32_e32 v1, v1, v2
-; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
+; GFX900-SDAG-LABEL: v_maximumnum_v2f32:
+; GFX900-SDAG: ; %bb.0:
+; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-SDAG-NEXT: v_max_f32_e32 v2, v2, v2
+; GFX900-SDAG-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX900-SDAG-NEXT: v_max_f32_e32 v0, v0, v2
+; GFX900-SDAG-NEXT: v_max_f32_e32 v2, v3, v3
+; GFX900-SDAG-NEXT: v_max_f32_e32 v1, v1, v1
+; GFX900-SDAG-NEXT: v_max_f32_e32 v1, v1, v2
+; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-GISEL-LABEL: v_maximumnum_v2f32:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX900-GISEL-NEXT: v_max_f32_e32 v2, v2, v2
+; GFX900-GISEL-NEXT: v_max_f32_e32 v0, v0, v2
+; GFX900-GISEL-NEXT: v_max_f32_e32 v1, v1, v1
+; GFX900-GISEL-NEXT: v_max_f32_e32 v2, v3, v3
+; GFX900-GISEL-NEXT: v_max_f32_e32 v1, v1, v2
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: v_maximumnum_v2f32:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: v_max_f32_e32 v2, v2, v2
+; GFX950-SDAG-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX950-SDAG-NEXT: v_max_f32_e32 v0, v0, v2
+; GFX950-SDAG-NEXT: v_max_f32_e32 v2, v3, v3
+; GFX950-SDAG-NEXT: v_max_f32_e32 v1, v1, v1
+; GFX950-SDAG-NEXT: v_max_f32_e32 v1, v1, v2
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-GISEL-LABEL: v_maximumnum_v2f32:
+; GFX950-GISEL: ; %bb.0:
+; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-GISEL-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX950-GISEL-NEXT: v_max_f32_e32 v2, v2, v2
+; GFX950-GISEL-NEXT: v_max_f32_e32 v0, v0, v2
+; GFX950-GISEL-NEXT: v_max_f32_e32 v1, v1, v1
+; GFX950-GISEL-NEXT: v_max_f32_e32 v2, v3, v3
+; GFX950-GISEL-NEXT: v_max_f32_e32 v1, v1, v2
+; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-SDAG-LABEL: v_maximumnum_v2f32:
; GFX10-SDAG: ; %bb.0:
@@ -6797,12 +7112,19 @@ define <2 x float> @v_maximumnum_v2f32_nnan(<2 x float> %x, <2 x float> %y) {
; GFX8-NEXT: v_max_f32_e32 v1, v1, v3
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_maximumnum_v2f32_nnan:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f32_e32 v0, v0, v2
-; GFX9-NEXT: v_max_f32_e32 v1, v1, v3
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_maximumnum_v2f32_nnan:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_max_f32_e32 v0, v0, v2
+; GFX900-NEXT: v_max_f32_e32 v1, v1, v3
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_maximumnum_v2f32_nnan:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_max_f32_e32 v0, v0, v2
+; GFX950-NEXT: v_max_f32_e32 v1, v1, v3
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximumnum_v2f32_nnan:
; GFX10: ; %bb.0:
@@ -6887,33 +7209,61 @@ define <3 x float> @v_maximumnum_v3f32(<3 x float> %x, <3 x float> %y) {
; GFX8-GISEL-NEXT: v_max_f32_e32 v2, v2, v3
; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-SDAG-LABEL: v_maximumnum_v3f32:
-; GFX9-SDAG: ; %bb.0:
-; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-SDAG-NEXT: v_max_f32_e32 v3, v3, v3
-; GFX9-SDAG-NEXT: v_max_f32_e32 v0, v0, v0
-; GFX9-SDAG-NEXT: v_max_f32_e32 v0, v0, v3
-; GFX9-SDAG-NEXT: v_max_f32_e32 v3, v4, v4
-; GFX9-SDAG-NEXT: v_max_f32_e32 v1, v1, v1
-; GFX9-SDAG-NEXT: v_max_f32_e32 v1, v1, v3
-; GFX9-SDAG-NEXT: v_max_f32_e32 v3, v5, v5
-; GFX9-SDAG-NEXT: v_max_f32_e32 v2, v2, v2
-; GFX9-SDAG-NEXT: v_max_f32_e32 v2, v2, v3
-; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-GISEL-LABEL: v_maximumnum_v3f32:
-; GFX9-GISEL: ; %bb.0:
-; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-GISEL-NEXT: v_max_f32_e32 v0, v0, v0
-; GFX9-GISEL-NEXT: v_max_f32_e32 v3, v3, v3
-; GFX9-GISEL-NEXT: v_max_f32_e32 v0, v0, v3
-; GFX9-GISEL-NEXT: v_max_f32_e32 v1, v1, v1
-; GFX9-GISEL-NEXT: v_max_f32_e32 v3, v4, v4
-; GFX9-GISEL-NEXT: v_max_f32_e32 v1, v1, v3
-; GFX9-GISEL-NEXT: v_max_f32_e32 v2, v2, v2
-; GFX9-GISEL-NEXT: v_max_f32_e32 v3, v5, v5
-; GFX9-GISEL-NEXT: v_max_f32_e32 v2, v2, v3
-; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
+; GFX900-SDAG-LABEL: v_maximumnum_v3f32:
+; GFX900-SDAG: ; %bb.0:
+; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-SDAG-NEXT: v_max_f32_e32 v3, v3, v3
+; GFX900-SDAG-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX900-SDAG-NEXT: v_max_f32_e32 v0, v0, v3
+; GFX900-SDAG-NEXT: v_max_f32_e32 v3, v4, v4
+; GFX900-SDAG-NEXT: v_max_f32_e32 v1, v1, v1
+; GFX900-SDAG-NEXT: v_max_f32_e32 v1, v1, v3
+; GFX900-SDAG-NEXT: v_max_f32_e32 v3, v5, v5
+; GFX900-SDAG-NEXT: v_max_f32_e32 v2, v2, v2
+; GFX900-SDAG-NEXT: v_max_f32_e32 v2, v2, v3
+; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-GISEL-LABEL: v_maximumnum_v3f32:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX900-GISEL-NEXT: v_max_f32_e32 v3, v3, v3
+; GFX900-GISEL-NEXT: v_max_f32_e32 v0, v0, v3
+; GFX900-GISEL-NEXT: v_max_f32_e32 v1, v1, v1
+; GFX900-GISEL-NEXT: v_max_f32_e32 v3, v4, v4
+; GFX900-GISEL-NEXT: v_max_f32_e32 v1, v1, v3
+; GFX900-GISEL-NEXT: v_max_f32_e32 v2, v2, v2
+; GFX900-GISEL-NEXT: v_max_f32_e32 v3, v5, v5
+; GFX900-GISEL-NEXT: v_max_f32_e32 v2, v2, v3
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: v_maximumnum_v3f32:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: v_max_f32_e32 v3, v3, v3
+; GFX950-SDAG-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX950-SDAG-NEXT: v_max_f32_e32 v0, v0, v3
+; GFX950-SDAG-NEXT: v_max_f32_e32 v3, v4, v4
+; GFX950-SDAG-NEXT: v_max_f32_e32 v1, v1, v1
+; GFX950-SDAG-NEXT: v_max_f32_e32 v1, v1, v3
+; GFX950-SDAG-NEXT: v_max_f32_e32 v3, v5, v5
+; GFX950-SDAG-NEXT: v_max_f32_e32 v2, v2, v2
+; GFX950-SDAG-NEXT: v_max_f32_e32 v2, v2, v3
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-GISEL-LABEL: v_maximumnum_v3f32:
+; GFX950-GISEL: ; %bb.0:
+; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-GISEL-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX950-GISEL-NEXT: v_max_f32_e32 v3, v3, v3
+; GFX950-GISEL-NEXT: v_max_f32_e32 v0, v0, v3
+; GFX950-GISEL-NEXT: v_max_f32_e32 v1, v1, v1
+; GFX950-GISEL-NEXT: v_max_f32_e32 v3, v4, v4
+; GFX950-GISEL-NEXT: v_max_f32_e32 v1, v1, v3
+; GFX950-GISEL-NEXT: v_max_f32_e32 v2, v2, v2
+; GFX950-GISEL-NEXT: v_max_f32_e32 v3, v5, v5
+; GFX950-GISEL-NEXT: v_max_f32_e32 v2, v2, v3
+; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-SDAG-LABEL: v_maximumnum_v3f32:
; GFX10-SDAG: ; %bb.0:
@@ -7015,13 +7365,21 @@ define <3 x float> @v_maximumnum_v3f32_nnan(<3 x float> %x, <3 x float> %y) {
; GFX8-NEXT: v_max_f32_e32 v2, v2, v5
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_maximumnum_v3f32_nnan:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f32_e32 v0, v0, v3
-; GFX9-NEXT: v_max_f32_e32 v1, v1, v4
-; GFX9-NEXT: v_max_f32_e32 v2, v2, v5
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_maximumnum_v3f32_nnan:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_max_f32_e32 v0, v0, v3
+; GFX900-NEXT: v_max_f32_e32 v1, v1, v4
+; GFX900-NEXT: v_max_f32_e32 v2, v2, v5
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_maximumnum_v3f32_nnan:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_max_f32_e32 v0, v0, v3
+; GFX950-NEXT: v_max_f32_e32 v1, v1, v4
+; GFX950-NEXT: v_max_f32_e32 v2, v2, v5
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximumnum_v3f32_nnan:
; GFX10: ; %bb.0:
@@ -7121,39 +7479,73 @@ define <4 x float> @v_maximumnum_v4f32(<4 x float> %x, <4 x float> %y) {
; GFX8-GISEL-NEXT: v_max_f32_e32 v3, v3, v4
; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-SDAG-LABEL: v_maximumnum_v4f32:
-; GFX9-SDAG: ; %bb.0:
-; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-SDAG-NEXT: v_max_f32_e32 v4, v4, v4
-; GFX9-SDAG-NEXT: v_max_f32_e32 v0, v0, v0
-; GFX9-SDAG-NEXT: v_max_f32_e32 v0, v0, v4
-; GFX9-SDAG-NEXT: v_max_f32_e32 v4, v5, v5
-; GFX9-SDAG-NEXT: v_max_f32_e32 v1, v1, v1
-; GFX9-SDAG-NEXT: v_max_f32_e32 v1, v1, v4
-; GFX9-SDAG-NEXT: v_max_f32_e32 v4, v6, v6
-; GFX9-SDAG-NEXT: v_max_f32_e32 v2, v2, v2
-; GFX9-SDAG-NEXT: v_max_f32_e32 v2, v2, v4
-; GFX9-SDAG-NEXT: v_max_f32_e32 v4, v7, v7
-; GFX9-SDAG-NEXT: v_max_f32_e32 v3, v3, v3
-; GFX9-SDAG-NEXT: v_max_f32_e32 v3, v3, v4
-; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-GISEL-LABEL: v_maximumnum_v4f32:
-; GFX9-GISEL: ; %bb.0:
-; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-GISEL-NEXT: v_max_f32_e32 v0, v0, v0
-; GFX9-GISEL-NEXT: v_max_f32_e32 v4, v4, v4
-; GFX9-GISEL-NEXT: v_max_f32_e32 v0, v0, v4
-; GFX9-GISEL-NEXT: v_max_f32_e32 v1, v1, v1
-; GFX9-GISEL-NEXT: v_max_f32_e32 v4, v5, v5
-; GFX9-GISEL-NEXT: v_max_f32_e32 v1, v1, v4
-; GFX9-GISEL-NEXT: v_max_f32_e32 v2, v2, v2
-; GFX9-GISEL-NEXT: v_max_f32_e32 v4, v6, v6
-; GFX9-GISEL-NEXT: v_max_f32_e32 v2, v2, v4
-; GFX9-GISEL-NEXT: v_max_f32_e32 v3, v3, v3
-; GFX9-GISEL-NEXT: v_max_f32_e32 v4, v7, v7
-; GFX9-GISEL-NEXT: v_max_f32_e32 v3, v3, v4
-; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
+; GFX900-SDAG-LABEL: v_maximumnum_v4f32:
+; GFX900-SDAG: ; %bb.0:
+; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-SDAG-NEXT: v_max_f32_e32 v4, v4, v4
+; GFX900-SDAG-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX900-SDAG-NEXT: v_max_f32_e32 v0, v0, v4
+; GFX900-SDAG-NEXT: v_max_f32_e32 v4, v5, v5
+; GFX900-SDAG-NEXT: v_max_f32_e32 v1, v1, v1
+; GFX900-SDAG-NEXT: v_max_f32_e32 v1, v1, v4
+; GFX900-SDAG-NEXT: v_max_f32_e32 v4, v6, v6
+; GFX900-SDAG-NEXT: v_max_f32_e32 v2, v2, v2
+; GFX900-SDAG-NEXT: v_max_f32_e32 v2, v2, v4
+; GFX900-SDAG-NEXT: v_max_f32_e32 v4, v7, v7
+; GFX900-SDAG-NEXT: v_max_f32_e32 v3, v3, v3
+; GFX900-SDAG-NEXT: v_max_f32_e32 v3, v3, v4
+; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-GISEL-LABEL: v_maximumnum_v4f32:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX900-GISEL-NEXT: v_max_f32_e32 v4, v4, v4
+; GFX900-GISEL-NEXT: v_max_f32_e32 v0, v0, v4
+; GFX900-GISEL-NEXT: v_max_f32_e32 v1, v1, v1
+; GFX900-GISEL-NEXT: v_max_f32_e32 v4, v5, v5
+; GFX900-GISEL-NEXT: v_max_f32_e32 v1, v1, v4
+; GFX900-GISEL-NEXT: v_max_f32_e32 v2, v2, v2
+; GFX900-GISEL-NEXT: v_max_f32_e32 v4, v6, v6
+; GFX900-GISEL-NEXT: v_max_f32_e32 v2, v2, v4
+; GFX900-GISEL-NEXT: v_max_f32_e32 v3, v3, v3
+; GFX900-GISEL-NEXT: v_max_f32_e32 v4, v7, v7
+; GFX900-GISEL-NEXT: v_max_f32_e32 v3, v3, v4
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: v_maximumnum_v4f32:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: v_max_f32_e32 v4, v4, v4
+; GFX950-SDAG-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX950-SDAG-NEXT: v_max_f32_e32 v0, v0, v4
+; GFX950-SDAG-NEXT: v_max_f32_e32 v4, v5, v5
+; GFX950-SDAG-NEXT: v_max_f32_e32 v1, v1, v1
+; GFX950-SDAG-NEXT: v_max_f32_e32 v1, v1, v4
+; GFX950-SDAG-NEXT: v_max_f32_e32 v4, v6, v6
+; GFX950-SDAG-NEXT: v_max_f32_e32 v2, v2, v2
+; GFX950-SDAG-NEXT: v_max_f32_e32 v2, v2, v4
+; GFX950-SDAG-NEXT: v_max_f32_e32 v4, v7, v7
+; GFX950-SDAG-NEXT: v_max_f32_e32 v3, v3, v3
+; GFX950-SDAG-NEXT: v_max_f32_e32 v3, v3, v4
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-GISEL-LABEL: v_maximumnum_v4f32:
+; GFX950-GISEL: ; %bb.0:
+; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-GISEL-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX950-GISEL-NEXT: v_max_f32_e32 v4, v4, v4
+; GFX950-GISEL-NEXT: v_max_f32_e32 v0, v0, v4
+; GFX950-GISEL-NEXT: v_max_f32_e32 v1, v1, v1
+; GFX950-GISEL-NEXT: v_max_f32_e32 v4, v5, v5
+; GFX950-GISEL-NEXT: v_max_f32_e32 v1, v1, v4
+; GFX950-GISEL-NEXT: v_max_f32_e32 v2, v2, v2
+; GFX950-GISEL-NEXT: v_max_f32_e32 v4, v6, v6
+; GFX950-GISEL-NEXT: v_max_f32_e32 v2, v2, v4
+; GFX950-GISEL-NEXT: v_max_f32_e32 v3, v3, v3
+; GFX950-GISEL-NEXT: v_max_f32_e32 v4, v7, v7
+; GFX950-GISEL-NEXT: v_max_f32_e32 v3, v3, v4
+; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-SDAG-LABEL: v_maximumnum_v4f32:
; GFX10-SDAG: ; %bb.0:
@@ -7267,14 +7659,23 @@ define <4 x float> @v_maximumnum_v4f32_nnan(<4 x float> %x, <4 x float> %y) {
; GFX8-NEXT: v_max_f32_e32 v3, v3, v7
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_maximumnum_v4f32_nnan:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f32_e32 v0, v0, v4
-; GFX9-NEXT: v_max_f32_e32 v1, v1, v5
-; GFX9-NEXT: v_max_f32_e32 v2, v2, v6
-; GFX9-NEXT: v_max_f32_e32 v3, v3, v7
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_maximumnum_v4f32_nnan:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_max_f32_e32 v0, v0, v4
+; GFX900-NEXT: v_max_f32_e32 v1, v1, v5
+; GFX900-NEXT: v_max_f32_e32 v2, v2, v6
+; GFX900-NEXT: v_max_f32_e32 v3, v3, v7
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_maximumnum_v4f32_nnan:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_max_f32_e32 v0, v0, v4
+; GFX950-NEXT: v_max_f32_e32 v1, v1, v5
+; GFX950-NEXT: v_max_f32_e32 v2, v2, v6
+; GFX950-NEXT: v_max_f32_e32 v3, v3, v7
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximumnum_v4f32_nnan:
; GFX10: ; %bb.0:
@@ -7376,12 +7777,12 @@ define <2 x double> @v_maximumnum_v2f64(<2 x double> %x, <2 x double> %y) {
; GFX950-SDAG-LABEL: v_maximumnum_v2f64:
; GFX950-SDAG: ; %bb.0:
; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7]
+; GFX950-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
; GFX950-SDAG-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5]
; GFX950-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX950-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[6:7]
; GFX950-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[4:5]
-; GFX950-SDAG-NEXT: v_max_f64 v[4:5], v[6:7], v[6:7]
-; GFX950-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
-; GFX950-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[4:5]
; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX950-GISEL-LABEL: v_maximumnum_v2f64:
@@ -7491,12 +7892,26 @@ define <2 x double> @v_maximumnum_v2f64_nnan(<2 x double> %x, <2 x double> %y) {
; GFX8-NEXT: v_max_f64 v[2:3], v[2:3], v[6:7]
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_maximumnum_v2f64_nnan:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f64 v[0:1], v[0:1], v[4:5]
-; GFX9-NEXT: v_max_f64 v[2:3], v[2:3], v[6:7]
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_maximumnum_v2f64_nnan:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_max_f64 v[0:1], v[0:1], v[4:5]
+; GFX900-NEXT: v_max_f64 v[2:3], v[2:3], v[6:7]
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: v_maximumnum_v2f64_nnan:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[6:7]
+; GFX950-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[4:5]
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-GISEL-LABEL: v_maximumnum_v2f64_nnan:
+; GFX950-GISEL: ; %bb.0:
+; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[4:5]
+; GFX950-GISEL-NEXT: v_max_f64 v[2:3], v[2:3], v[6:7]
+; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximumnum_v2f64_nnan:
; GFX10: ; %bb.0:
@@ -7614,15 +8029,15 @@ define <3 x double> @v_maximumnum_v3f64(<3 x double> %x, <3 x double> %y) {
; GFX950-SDAG-LABEL: v_maximumnum_v3f64:
; GFX950-SDAG: ; %bb.0:
; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11]
+; GFX950-SDAG-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5]
+; GFX950-SDAG-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9]
+; GFX950-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
; GFX950-SDAG-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7]
; GFX950-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX950-SDAG-NEXT: v_max_f64 v[4:5], v[4:5], v[10:11]
+; GFX950-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[8:9]
; GFX950-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[6:7]
-; GFX950-SDAG-NEXT: v_max_f64 v[6:7], v[8:9], v[8:9]
-; GFX950-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
-; GFX950-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[6:7]
-; GFX950-SDAG-NEXT: v_max_f64 v[6:7], v[10:11], v[10:11]
-; GFX950-SDAG-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5]
-; GFX950-SDAG-NEXT: v_max_f64 v[4:5], v[4:5], v[6:7]
; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX950-GISEL-LABEL: v_maximumnum_v3f64:
@@ -7755,13 +8170,29 @@ define <3 x double> @v_maximumnum_v3f64_nnan(<3 x double> %x, <3 x double> %y) {
; GFX8-NEXT: v_max_f64 v[4:5], v[4:5], v[10:11]
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_maximumnum_v3f64_nnan:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f64 v[0:1], v[0:1], v[6:7]
-; GFX9-NEXT: v_max_f64 v[2:3], v[2:3], v[8:9]
-; GFX9-NEXT: v_max_f64 v[4:5], v[4:5], v[10:11]
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_maximumnum_v3f64_nnan:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_max_f64 v[0:1], v[0:1], v[6:7]
+; GFX900-NEXT: v_max_f64 v[2:3], v[2:3], v[8:9]
+; GFX900-NEXT: v_max_f64 v[4:5], v[4:5], v[10:11]
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: v_maximumnum_v3f64_nnan:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: v_max_f64 v[4:5], v[4:5], v[10:11]
+; GFX950-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[8:9]
+; GFX950-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[6:7]
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-GISEL-LABEL: v_maximumnum_v3f64_nnan:
+; GFX950-GISEL: ; %bb.0:
+; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[6:7]
+; GFX950-GISEL-NEXT: v_max_f64 v[2:3], v[2:3], v[8:9]
+; GFX950-GISEL-NEXT: v_max_f64 v[4:5], v[4:5], v[10:11]
+; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximumnum_v3f64_nnan:
; GFX10: ; %bb.0:
@@ -7900,18 +8331,18 @@ define <4 x double> @v_maximumnum_v4f64(<4 x double> %x, <4 x double> %y) {
; GFX950-SDAG-LABEL: v_maximumnum_v4f64:
; GFX950-SDAG: ; %bb.0:
; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: v_max_f64 v[14:15], v[14:15], v[14:15]
+; GFX950-SDAG-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7]
+; GFX950-SDAG-NEXT: v_max_f64 v[12:13], v[12:13], v[12:13]
+; GFX950-SDAG-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5]
+; GFX950-SDAG-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11]
+; GFX950-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
; GFX950-SDAG-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9]
; GFX950-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX950-SDAG-NEXT: v_max_f64 v[6:7], v[6:7], v[14:15]
+; GFX950-SDAG-NEXT: v_max_f64 v[4:5], v[4:5], v[12:13]
+; GFX950-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[10:11]
; GFX950-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[8:9]
-; GFX950-SDAG-NEXT: v_max_f64 v[8:9], v[10:11], v[10:11]
-; GFX950-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
-; GFX950-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[8:9]
-; GFX950-SDAG-NEXT: v_max_f64 v[8:9], v[12:13], v[12:13]
-; GFX950-SDAG-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5]
-; GFX950-SDAG-NEXT: v_max_f64 v[4:5], v[4:5], v[8:9]
-; GFX950-SDAG-NEXT: v_max_f64 v[8:9], v[14:15], v[14:15]
-; GFX950-SDAG-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7]
-; GFX950-SDAG-NEXT: v_max_f64 v[6:7], v[6:7], v[8:9]
; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX950-GISEL-LABEL: v_maximumnum_v4f64:
@@ -8067,14 +8498,32 @@ define <4 x double> @v_maximumnum_v4f64_nnan(<4 x double> %x, <4 x double> %y) {
; GFX8-NEXT: v_max_f64 v[6:7], v[6:7], v[14:15]
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_maximumnum_v4f64_nnan:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f64 v[0:1], v[0:1], v[8:9]
-; GFX9-NEXT: v_max_f64 v[2:3], v[2:3], v[10:11]
-; GFX9-NEXT: v_max_f64 v[4:5], v[4:5], v[12:13]
-; GFX9-NEXT: v_max_f64 v[6:7], v[6:7], v[14:15]
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_maximumnum_v4f64_nnan:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_max_f64 v[0:1], v[0:1], v[8:9]
+; GFX900-NEXT: v_max_f64 v[2:3], v[2:3], v[10:11]
+; GFX900-NEXT: v_max_f64 v[4:5], v[4:5], v[12:13]
+; GFX900-NEXT: v_max_f64 v[6:7], v[6:7], v[14:15]
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: v_maximumnum_v4f64_nnan:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: v_max_f64 v[6:7], v[6:7], v[14:15]
+; GFX950-SDAG-NEXT: v_max_f64 v[4:5], v[4:5], v[12:13]
+; GFX950-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[10:11]
+; GFX950-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[8:9]
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-GISEL-LABEL: v_maximumnum_v4f64_nnan:
+; GFX950-GISEL: ; %bb.0:
+; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[8:9]
+; GFX950-GISEL-NEXT: v_max_f64 v[2:3], v[2:3], v[10:11]
+; GFX950-GISEL-NEXT: v_max_f64 v[4:5], v[4:5], v[12:13]
+; GFX950-GISEL-NEXT: v_max_f64 v[6:7], v[6:7], v[14:15]
+; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximumnum_v4f64_nnan:
; GFX10: ; %bb.0:
@@ -8136,11 +8585,17 @@ define half @v_maximumnum_f16_no_ieee(half %x, half %y) #0 {
; GFX8-NEXT: v_max_f16_e32 v0, v0, v1
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_maximumnum_f16_no_ieee:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f16_e32 v0, v0, v1
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_maximumnum_f16_no_ieee:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_max_f16_e32 v0, v0, v1
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_maximumnum_f16_no_ieee:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_max_f16_e32 v0, v0, v1
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximumnum_f16_no_ieee:
; GFX10: ; %bb.0:
@@ -8241,11 +8696,17 @@ define half @v_maximumnum_f16_nan_no_ieee(half %x, half %y) #0 {
; GFX8-NEXT: v_max_f16_e32 v0, v0, v1
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_maximumnum_f16_nan_no_ieee:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f16_e32 v0, v0, v1
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_maximumnum_f16_nan_no_ieee:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_max_f16_e32 v0, v0, v1
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_maximumnum_f16_nan_no_ieee:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_max_f16_e32 v0, v0, v1
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximumnum_f16_nan_no_ieee:
; GFX10: ; %bb.0:
@@ -8301,11 +8762,17 @@ define float @v_maximumnum_f32_no_ieee(float %x, float %y) #0 {
; GFX8-NEXT: v_max_f32_e32 v0, v0, v1
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_maximumnum_f32_no_ieee:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f32_e32 v0, v0, v1
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_maximumnum_f32_no_ieee:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_max_f32_e32 v0, v0, v1
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_maximumnum_f32_no_ieee:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_max_f32_e32 v0, v0, v1
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximumnum_f32_no_ieee:
; GFX10: ; %bb.0:
@@ -8359,11 +8826,17 @@ define float @v_maximumnum_f32_nnan_no_ieee(float %x, float %y) #0 {
; GFX8-NEXT: v_max_f32_e32 v0, v0, v1
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_maximumnum_f32_nnan_no_ieee:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f32_e32 v0, v0, v1
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_maximumnum_f32_nnan_no_ieee:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_max_f32_e32 v0, v0, v1
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_maximumnum_f32_nnan_no_ieee:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_max_f32_e32 v0, v0, v1
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximumnum_f32_nnan_no_ieee:
; GFX10: ; %bb.0:
@@ -8403,11 +8876,17 @@ define double @v_maximumnum_f64_no_ieee(double %x, double %y) #0 {
; GFX8-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3]
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_maximumnum_f64_no_ieee:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3]
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_maximumnum_f64_no_ieee:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_maximumnum_f64_no_ieee:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximumnum_f64_no_ieee:
; GFX10: ; %bb.0:
@@ -8463,11 +8942,17 @@ define double @v_maximumnum_f64_nnan_no_ieee(double %x, double %y) #0 {
; GFX8-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3]
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_maximumnum_f64_nnan_no_ieee:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3]
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_maximumnum_f64_nnan_no_ieee:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_maximumnum_f64_nnan_no_ieee:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximumnum_f64_nnan_no_ieee:
; GFX10: ; %bb.0:
@@ -8539,11 +9024,17 @@ define <2 x half> @v_maximumnum_v2f16_no_ieee(<2 x half> %x, <2 x half> %y) #0 {
; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v2, v0
; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_maximumnum_v2f16_no_ieee:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_pk_max_f16 v0, v0, v1
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_maximumnum_v2f16_no_ieee:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_pk_max_f16 v0, v0, v1
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_maximumnum_v2f16_no_ieee:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_pk_max_f16 v0, v0, v1
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximumnum_v2f16_no_ieee:
; GFX10: ; %bb.0:
@@ -8631,11 +9122,17 @@ define <2 x half> @v_maximumnum_v2f16_nnan_no_ieee(<2 x half> %x, <2 x half> %y)
; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v2, v0
; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_maximumnum_v2f16_nnan_no_ieee:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_pk_max_f16 v0, v0, v1
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_maximumnum_v2f16_nnan_no_ieee:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_pk_max_f16 v0, v0, v1
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_maximumnum_v2f16_nnan_no_ieee:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_pk_max_f16 v0, v0, v1
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximumnum_v2f16_nnan_no_ieee:
; GFX10: ; %bb.0:
@@ -8718,19 +9215,33 @@ define <3 x half> @v_maximumnum_v3f16_nnan_no_ieee(<3 x half> %x, <3 x half> %y)
; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v4, v0
; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-SDAG-LABEL: v_maximumnum_v3f16_nnan_no_ieee:
-; GFX9-SDAG: ; %bb.0:
-; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-SDAG-NEXT: v_pk_max_f16 v1, v1, v3
-; GFX9-SDAG-NEXT: v_pk_max_f16 v0, v0, v2
-; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
+; GFX900-SDAG-LABEL: v_maximumnum_v3f16_nnan_no_ieee:
+; GFX900-SDAG: ; %bb.0:
+; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-SDAG-NEXT: v_pk_max_f16 v1, v1, v3
+; GFX900-SDAG-NEXT: v_pk_max_f16 v0, v0, v2
+; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-GISEL-LABEL: v_maximumnum_v3f16_nnan_no_ieee:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: v_pk_max_f16 v0, v0, v2
+; GFX900-GISEL-NEXT: v_pk_max_f16 v1, v1, v3
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: v_maximumnum_v3f16_nnan_no_ieee:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: v_pk_max_f16 v1, v1, v3
+; GFX950-SDAG-NEXT: v_pk_max_f16 v0, v0, v2
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-GISEL-LABEL: v_maximumnum_v3f16_nnan_no_ieee:
-; GFX9-GISEL: ; %bb.0:
-; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-GISEL-NEXT: v_pk_max_f16 v0, v0, v2
-; GFX9-GISEL-NEXT: v_pk_max_f16 v1, v1, v3
-; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
+; GFX950-GISEL-LABEL: v_maximumnum_v3f16_nnan_no_ieee:
+; GFX950-GISEL: ; %bb.0:
+; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-GISEL-NEXT: v_pk_max_f16 v0, v0, v2
+; GFX950-GISEL-NEXT: v_pk_max_f16 v1, v1, v3
+; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximumnum_v3f16_nnan_no_ieee:
; GFX10: ; %bb.0:
@@ -8829,12 +9340,19 @@ define <4 x half> @v_maximumnum_v4f16_nnan_no_ieee(<4 x half> %x, <4 x half> %y)
; GFX8-GISEL-NEXT: v_or_b32_e32 v1, v2, v1
; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_maximumnum_v4f16_nnan_no_ieee:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_pk_max_f16 v0, v0, v2
-; GFX9-NEXT: v_pk_max_f16 v1, v1, v3
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_maximumnum_v4f16_nnan_no_ieee:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_pk_max_f16 v0, v0, v2
+; GFX900-NEXT: v_pk_max_f16 v1, v1, v3
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_maximumnum_v4f16_nnan_no_ieee:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_pk_max_f16 v0, v0, v2
+; GFX950-NEXT: v_pk_max_f16 v1, v1, v3
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximumnum_v4f16_nnan_no_ieee:
; GFX10: ; %bb.0:
@@ -8865,6 +9383,3 @@ define <4 x half> @v_maximumnum_v4f16_nnan_no_ieee(<4 x half> %x, <4 x half> %y)
}
attributes #0 = { "amdgpu-ieee"="false" }
-;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
-; GFX900: {{.*}}
-; GFX950: {{.*}}
diff --git a/llvm/test/CodeGen/AMDGPU/mfma-loop.ll b/llvm/test/CodeGen/AMDGPU/mfma-loop.ll
index 3b8efafba06f4..fef0adf3b5b32 100644
--- a/llvm/test/CodeGen/AMDGPU/mfma-loop.ll
+++ b/llvm/test/CodeGen/AMDGPU/mfma-loop.ll
@@ -2435,39 +2435,40 @@ define amdgpu_kernel void @test_mfma_nested_loop_zeroinit(ptr addrspace(1) %arg)
;
; GFX942-LABEL: test_mfma_nested_loop_zeroinit:
; GFX942: ; %bb.0: ; %entry
-; GFX942-NEXT: v_accvgpr_write_b32 a0, 0
+; GFX942-NEXT: v_mov_b64_e32 v[0:1], 0
+; GFX942-NEXT: v_accvgpr_write_b32 a0, v0
+; GFX942-NEXT: v_accvgpr_write_b32 a3, v1
+; GFX942-NEXT: v_accvgpr_write_b32 a5, v1
+; GFX942-NEXT: v_accvgpr_write_b32 a7, v1
+; GFX942-NEXT: v_accvgpr_write_b32 a9, v1
+; GFX942-NEXT: v_accvgpr_write_b32 a11, v1
+; GFX942-NEXT: v_accvgpr_write_b32 a13, v1
+; GFX942-NEXT: v_accvgpr_write_b32 a15, v1
+; GFX942-NEXT: v_accvgpr_write_b32 a17, v1
+; GFX942-NEXT: v_accvgpr_write_b32 a19, v1
+; GFX942-NEXT: v_accvgpr_write_b32 a21, v1
+; GFX942-NEXT: v_accvgpr_write_b32 a23, v1
+; GFX942-NEXT: v_accvgpr_write_b32 a25, v1
+; GFX942-NEXT: v_accvgpr_write_b32 a27, v1
+; GFX942-NEXT: v_accvgpr_write_b32 a29, v1
+; GFX942-NEXT: v_accvgpr_write_b32 a31, v1
; GFX942-NEXT: s_mov_b32 s0, 0
-; GFX942-NEXT: v_accvgpr_mov_b32 a1, a0
-; GFX942-NEXT: v_accvgpr_mov_b32 a2, a0
-; GFX942-NEXT: v_accvgpr_mov_b32 a3, a0
-; GFX942-NEXT: v_accvgpr_mov_b32 a4, a0
-; GFX942-NEXT: v_accvgpr_mov_b32 a5, a0
-; GFX942-NEXT: v_accvgpr_mov_b32 a6, a0
-; GFX942-NEXT: v_accvgpr_mov_b32 a7, a0
-; GFX942-NEXT: v_accvgpr_mov_b32 a8, a0
-; GFX942-NEXT: v_accvgpr_mov_b32 a9, a0
-; GFX942-NEXT: v_accvgpr_mov_b32 a10, a0
-; GFX942-NEXT: v_accvgpr_mov_b32 a11, a0
-; GFX942-NEXT: v_accvgpr_mov_b32 a12, a0
-; GFX942-NEXT: v_accvgpr_mov_b32 a13, a0
-; GFX942-NEXT: v_accvgpr_mov_b32 a14, a0
-; GFX942-NEXT: v_accvgpr_mov_b32 a15, a0
-; GFX942-NEXT: v_accvgpr_mov_b32 a16, a0
-; GFX942-NEXT: v_accvgpr_mov_b32 a17, a0
-; GFX942-NEXT: v_accvgpr_mov_b32 a18, a0
-; GFX942-NEXT: v_accvgpr_mov_b32 a19, a0
-; GFX942-NEXT: v_accvgpr_mov_b32 a20, a0
-; GFX942-NEXT: v_accvgpr_mov_b32 a21, a0
-; GFX942-NEXT: v_accvgpr_mov_b32 a22, a0
-; GFX942-NEXT: v_accvgpr_mov_b32 a23, a0
-; GFX942-NEXT: v_accvgpr_mov_b32 a24, a0
-; GFX942-NEXT: v_accvgpr_mov_b32 a25, a0
-; GFX942-NEXT: v_accvgpr_mov_b32 a26, a0
-; GFX942-NEXT: v_accvgpr_mov_b32 a27, a0
-; GFX942-NEXT: v_accvgpr_mov_b32 a28, a0
-; GFX942-NEXT: v_accvgpr_mov_b32 a29, a0
-; GFX942-NEXT: v_accvgpr_mov_b32 a30, a0
-; GFX942-NEXT: v_accvgpr_mov_b32 a31, a0
+; GFX942-NEXT: v_accvgpr_write_b32 a1, v1
+; GFX942-NEXT: v_accvgpr_write_b32 a2, v0
+; GFX942-NEXT: v_accvgpr_write_b32 a4, v0
+; GFX942-NEXT: v_accvgpr_write_b32 a6, v0
+; GFX942-NEXT: v_accvgpr_write_b32 a8, v0
+; GFX942-NEXT: v_accvgpr_write_b32 a10, v0
+; GFX942-NEXT: v_accvgpr_write_b32 a12, v0
+; GFX942-NEXT: v_accvgpr_write_b32 a14, v0
+; GFX942-NEXT: v_accvgpr_write_b32 a16, v0
+; GFX942-NEXT: v_accvgpr_write_b32 a18, v0
+; GFX942-NEXT: v_accvgpr_write_b32 a20, v0
+; GFX942-NEXT: v_accvgpr_write_b32 a22, v0
+; GFX942-NEXT: v_accvgpr_write_b32 a24, v0
+; GFX942-NEXT: v_accvgpr_write_b32 a26, v0
+; GFX942-NEXT: v_accvgpr_write_b32 a28, v0
+; GFX942-NEXT: v_accvgpr_write_b32 a30, v0
; GFX942-NEXT: v_mov_b32_e32 v0, 2.0
; GFX942-NEXT: v_mov_b32_e32 v1, 1.0
; GFX942-NEXT: .LBB9_1: ; %for.cond.preheader
diff --git a/llvm/test/CodeGen/AMDGPU/minimumnum.ll b/llvm/test/CodeGen/AMDGPU/minimumnum.ll
index 558006d2b6957..9c4a1ca797110 100644
--- a/llvm/test/CodeGen/AMDGPU/minimumnum.ll
+++ b/llvm/test/CodeGen/AMDGPU/minimumnum.ll
@@ -5,11 +5,11 @@
; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 < %s | FileCheck -check-prefixes=GFX8,GFX8-SDAG %s
; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 < %s | FileCheck -check-prefixes=GFX8,GFX8-GISEL %s
-; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX900,GFX9-SDAG,GFX900-SDAG %s
-; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX900,GFX9-GISEL,GFX900-GISEL %s
+; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX900,GFX900-SDAG %s
+; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX900,GFX900-GISEL %s
-; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 < %s | FileCheck -check-prefixes=GFX9,GFX950,GFX9-SDAG,GFX950-SDAG %s
-; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 < %s | FileCheck -check-prefixes=GFX9,GFX950,GFX9-GISEL,GFX950-GISEL %s
+; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 < %s | FileCheck -check-prefixes=GFX950,GFX950-SDAG %s
+; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 < %s | FileCheck -check-prefixes=GFX950,GFX950-GISEL %s
; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 < %s | FileCheck -check-prefixes=GFX10,GFX10-SDAG %s
; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 < %s | FileCheck -check-prefixes=GFX10,GFX10-GISEL %s
@@ -62,21 +62,37 @@ define half @v_minimumnum_f16(half %x, half %y) {
; GFX8-GISEL-NEXT: v_min_f16_e32 v0, v0, v1
; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-SDAG-LABEL: v_minimumnum_f16:
-; GFX9-SDAG: ; %bb.0:
-; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-SDAG-NEXT: v_max_f16_e32 v1, v1, v1
-; GFX9-SDAG-NEXT: v_max_f16_e32 v0, v0, v0
-; GFX9-SDAG-NEXT: v_min_f16_e32 v0, v0, v1
-; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-GISEL-LABEL: v_minimumnum_f16:
-; GFX9-GISEL: ; %bb.0:
-; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-GISEL-NEXT: v_max_f16_e32 v0, v0, v0
-; GFX9-GISEL-NEXT: v_max_f16_e32 v1, v1, v1
-; GFX9-GISEL-NEXT: v_min_f16_e32 v0, v0, v1
-; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
+; GFX900-SDAG-LABEL: v_minimumnum_f16:
+; GFX900-SDAG: ; %bb.0:
+; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-SDAG-NEXT: v_max_f16_e32 v1, v1, v1
+; GFX900-SDAG-NEXT: v_max_f16_e32 v0, v0, v0
+; GFX900-SDAG-NEXT: v_min_f16_e32 v0, v0, v1
+; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-GISEL-LABEL: v_minimumnum_f16:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: v_max_f16_e32 v0, v0, v0
+; GFX900-GISEL-NEXT: v_max_f16_e32 v1, v1, v1
+; GFX900-GISEL-NEXT: v_min_f16_e32 v0, v0, v1
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: v_minimumnum_f16:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: v_max_f16_e32 v1, v1, v1
+; GFX950-SDAG-NEXT: v_max_f16_e32 v0, v0, v0
+; GFX950-SDAG-NEXT: v_min_f16_e32 v0, v0, v1
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-GISEL-LABEL: v_minimumnum_f16:
+; GFX950-GISEL: ; %bb.0:
+; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-GISEL-NEXT: v_max_f16_e32 v0, v0, v0
+; GFX950-GISEL-NEXT: v_max_f16_e32 v1, v1, v1
+; GFX950-GISEL-NEXT: v_min_f16_e32 v0, v0, v1
+; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-SDAG-LABEL: v_minimumnum_f16:
; GFX10-SDAG: ; %bb.0:
@@ -211,11 +227,17 @@ define half @v_minimumnum_f16_nnan(half %x, half %y) {
; GFX8-NEXT: v_min_f16_e32 v0, v0, v1
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_minimumnum_f16_nnan:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_min_f16_e32 v0, v0, v1
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_minimumnum_f16_nnan:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_min_f16_e32 v0, v0, v1
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_minimumnum_f16_nnan:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_min_f16_e32 v0, v0, v1
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimumnum_f16_nnan:
; GFX10: ; %bb.0:
@@ -283,12 +305,19 @@ define half @v_minimumnum_f16_1.0(half %x) {
; GFX8-NEXT: v_min_f16_e32 v0, 1.0, v0
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_minimumnum_f16_1.0:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f16_e32 v0, v0, v0
-; GFX9-NEXT: v_min_f16_e32 v0, 1.0, v0
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_minimumnum_f16_1.0:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_max_f16_e32 v0, v0, v0
+; GFX900-NEXT: v_min_f16_e32 v0, 1.0, v0
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_minimumnum_f16_1.0:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_max_f16_e32 v0, v0, v0
+; GFX950-NEXT: v_min_f16_e32 v0, 1.0, v0
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimumnum_f16_1.0:
; GFX10: ; %bb.0:
@@ -373,21 +402,37 @@ define float @v_minimumnum_f32(float %x, float %y) {
; GFX8-GISEL-NEXT: v_min_f32_e32 v0, v0, v1
; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-SDAG-LABEL: v_minimumnum_f32:
-; GFX9-SDAG: ; %bb.0:
-; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-SDAG-NEXT: v_max_f32_e32 v1, v1, v1
-; GFX9-SDAG-NEXT: v_max_f32_e32 v0, v0, v0
-; GFX9-SDAG-NEXT: v_min_f32_e32 v0, v0, v1
-; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-GISEL-LABEL: v_minimumnum_f32:
-; GFX9-GISEL: ; %bb.0:
-; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-GISEL-NEXT: v_max_f32_e32 v0, v0, v0
-; GFX9-GISEL-NEXT: v_max_f32_e32 v1, v1, v1
-; GFX9-GISEL-NEXT: v_min_f32_e32 v0, v0, v1
-; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
+; GFX900-SDAG-LABEL: v_minimumnum_f32:
+; GFX900-SDAG: ; %bb.0:
+; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-SDAG-NEXT: v_max_f32_e32 v1, v1, v1
+; GFX900-SDAG-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX900-SDAG-NEXT: v_min_f32_e32 v0, v0, v1
+; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-GISEL-LABEL: v_minimumnum_f32:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX900-GISEL-NEXT: v_max_f32_e32 v1, v1, v1
+; GFX900-GISEL-NEXT: v_min_f32_e32 v0, v0, v1
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: v_minimumnum_f32:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: v_max_f32_e32 v1, v1, v1
+; GFX950-SDAG-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX950-SDAG-NEXT: v_min_f32_e32 v0, v0, v1
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-GISEL-LABEL: v_minimumnum_f32:
+; GFX950-GISEL: ; %bb.0:
+; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-GISEL-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX950-GISEL-NEXT: v_max_f32_e32 v1, v1, v1
+; GFX950-GISEL-NEXT: v_min_f32_e32 v0, v0, v1
+; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-SDAG-LABEL: v_minimumnum_f32:
; GFX10-SDAG: ; %bb.0:
@@ -461,11 +506,17 @@ define float @v_minimumnum_f32_nnan(float %x, float %y) {
; GFX8-NEXT: v_min_f32_e32 v0, v0, v1
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_minimumnum_f32_nnan:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_min_f32_e32 v0, v0, v1
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_minimumnum_f32_nnan:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_min_f32_e32 v0, v0, v1
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_minimumnum_f32_nnan:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_min_f32_e32 v0, v0, v1
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimumnum_f32_nnan:
; GFX10: ; %bb.0:
@@ -525,21 +576,37 @@ define double @v_minimumnum_f64(double %x, double %y) {
; GFX8-GISEL-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3]
; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-SDAG-LABEL: v_minimumnum_f64:
-; GFX9-SDAG: ; %bb.0:
-; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
-; GFX9-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
-; GFX9-SDAG-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3]
-; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-GISEL-LABEL: v_minimumnum_f64:
-; GFX9-GISEL: ; %bb.0:
-; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
-; GFX9-GISEL-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
-; GFX9-GISEL-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3]
-; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
+; GFX900-SDAG-LABEL: v_minimumnum_f64:
+; GFX900-SDAG: ; %bb.0:
+; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
+; GFX900-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX900-SDAG-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-GISEL-LABEL: v_minimumnum_f64:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX900-GISEL-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
+; GFX900-GISEL-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: v_minimumnum_f64:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
+; GFX950-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX950-SDAG-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-GISEL-LABEL: v_minimumnum_f64:
+; GFX950-GISEL: ; %bb.0:
+; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX950-GISEL-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
+; GFX950-GISEL-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-SDAG-LABEL: v_minimumnum_f64:
; GFX10-SDAG: ; %bb.0:
@@ -617,11 +684,17 @@ define double @v_minimumnum_f64_nnan(double %x, double %y) {
; GFX8-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3]
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_minimumnum_f64_nnan:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3]
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_minimumnum_f64_nnan:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_minimumnum_f64_nnan:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimumnum_f64_nnan:
; GFX10: ; %bb.0:
@@ -663,12 +736,19 @@ define float @v_minimumnum_f32_1.0(float %x) {
; GFX8-NEXT: v_min_f32_e32 v0, 1.0, v0
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_minimumnum_f32_1.0:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f32_e32 v0, v0, v0
-; GFX9-NEXT: v_min_f32_e32 v0, 1.0, v0
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_minimumnum_f32_1.0:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX900-NEXT: v_min_f32_e32 v0, 1.0, v0
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_minimumnum_f32_1.0:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX950-NEXT: v_min_f32_e32 v0, 1.0, v0
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimumnum_f32_1.0:
; GFX10: ; %bb.0:
@@ -717,13 +797,21 @@ define float @v_minimumnum_f32_rhs_not_snan(float %x, float %y) {
; GFX8-NEXT: v_min_f32_e32 v0, v0, v1
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_minimumnum_f32_rhs_not_snan:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f32_e32 v1, v1, v1
-; GFX9-NEXT: v_max_f32_e32 v0, v0, v0
-; GFX9-NEXT: v_min_f32_e32 v0, v0, v1
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_minimumnum_f32_rhs_not_snan:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_max_f32_e32 v1, v1, v1
+; GFX900-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX900-NEXT: v_min_f32_e32 v0, v0, v1
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_minimumnum_f32_rhs_not_snan:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_max_f32_e32 v1, v1, v1
+; GFX950-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX950-NEXT: v_min_f32_e32 v0, v0, v1
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimumnum_f32_rhs_not_snan:
; GFX10: ; %bb.0:
@@ -774,13 +862,21 @@ define float @v_minimumnum_f32_lhs_not_snan(float %x, float %y) {
; GFX8-NEXT: v_min_f32_e32 v0, v0, v1
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_minimumnum_f32_lhs_not_snan:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f32_e32 v0, v0, v0
-; GFX9-NEXT: v_max_f32_e32 v1, v1, v1
-; GFX9-NEXT: v_min_f32_e32 v0, v0, v1
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_minimumnum_f32_lhs_not_snan:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX900-NEXT: v_max_f32_e32 v1, v1, v1
+; GFX900-NEXT: v_min_f32_e32 v0, v0, v1
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_minimumnum_f32_lhs_not_snan:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX950-NEXT: v_max_f32_e32 v1, v1, v1
+; GFX950-NEXT: v_min_f32_e32 v0, v0, v1
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimumnum_f32_lhs_not_snan:
; GFX10: ; %bb.0:
@@ -831,13 +927,21 @@ define float @v_minimumnum_f32_both_operands_not_snan(float %x, float %y) {
; GFX8-NEXT: v_min_f32_e32 v0, v0, v1
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_minimumnum_f32_both_operands_not_snan:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f32_e32 v0, v0, v0
-; GFX9-NEXT: v_max_f32_e32 v1, v1, v1
-; GFX9-NEXT: v_min_f32_e32 v0, v0, v1
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_minimumnum_f32_both_operands_not_snan:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX900-NEXT: v_max_f32_e32 v1, v1, v1
+; GFX900-NEXT: v_min_f32_e32 v0, v0, v1
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_minimumnum_f32_both_operands_not_snan:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX950-NEXT: v_max_f32_e32 v1, v1, v1
+; GFX950-NEXT: v_min_f32_e32 v0, v0, v1
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimumnum_f32_both_operands_not_snan:
; GFX10: ; %bb.0:
@@ -887,12 +991,19 @@ define double @v_minimumnum_f64_1.0(double %x) {
; GFX8-NEXT: v_min_f64 v[0:1], v[0:1], 1.0
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_minimumnum_f64_1.0:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
-; GFX9-NEXT: v_min_f64 v[0:1], v[0:1], 1.0
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_minimumnum_f64_1.0:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX900-NEXT: v_min_f64 v[0:1], v[0:1], 1.0
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_minimumnum_f64_1.0:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX950-NEXT: v_min_f64 v[0:1], v[0:1], 1.0
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimumnum_f64_1.0:
; GFX10: ; %bb.0:
@@ -2015,21 +2126,37 @@ define float @v_minimumnum_f32_fabs_rhs(float %x, float %y) {
; GFX8-GISEL-NEXT: v_min_f32_e32 v0, v0, v1
; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-SDAG-LABEL: v_minimumnum_f32_fabs_rhs:
-; GFX9-SDAG: ; %bb.0:
-; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-SDAG-NEXT: v_max_f32_e64 v1, |v1|, |v1|
-; GFX9-SDAG-NEXT: v_max_f32_e32 v0, v0, v0
-; GFX9-SDAG-NEXT: v_min_f32_e32 v0, v0, v1
-; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-GISEL-LABEL: v_minimumnum_f32_fabs_rhs:
-; GFX9-GISEL: ; %bb.0:
-; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-GISEL-NEXT: v_max_f32_e32 v0, v0, v0
-; GFX9-GISEL-NEXT: v_max_f32_e64 v1, |v1|, |v1|
-; GFX9-GISEL-NEXT: v_min_f32_e32 v0, v0, v1
-; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
+; GFX900-SDAG-LABEL: v_minimumnum_f32_fabs_rhs:
+; GFX900-SDAG: ; %bb.0:
+; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-SDAG-NEXT: v_max_f32_e64 v1, |v1|, |v1|
+; GFX900-SDAG-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX900-SDAG-NEXT: v_min_f32_e32 v0, v0, v1
+; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-GISEL-LABEL: v_minimumnum_f32_fabs_rhs:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX900-GISEL-NEXT: v_max_f32_e64 v1, |v1|, |v1|
+; GFX900-GISEL-NEXT: v_min_f32_e32 v0, v0, v1
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: v_minimumnum_f32_fabs_rhs:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: v_max_f32_e64 v1, |v1|, |v1|
+; GFX950-SDAG-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX950-SDAG-NEXT: v_min_f32_e32 v0, v0, v1
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-GISEL-LABEL: v_minimumnum_f32_fabs_rhs:
+; GFX950-GISEL: ; %bb.0:
+; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-GISEL-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX950-GISEL-NEXT: v_max_f32_e64 v1, |v1|, |v1|
+; GFX950-GISEL-NEXT: v_min_f32_e32 v0, v0, v1
+; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-SDAG-LABEL: v_minimumnum_f32_fabs_rhs:
; GFX10-SDAG: ; %bb.0:
@@ -2128,21 +2255,37 @@ define float @v_minimumnum_f32_fneg_fabs_rhs(float %x, float %y) {
; GFX8-GISEL-NEXT: v_min_f32_e32 v0, v0, v1
; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-SDAG-LABEL: v_minimumnum_f32_fneg_fabs_rhs:
-; GFX9-SDAG: ; %bb.0:
-; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-SDAG-NEXT: v_max_f32_e64 v1, -|v1|, -|v1|
-; GFX9-SDAG-NEXT: v_max_f32_e32 v0, v0, v0
-; GFX9-SDAG-NEXT: v_min_f32_e32 v0, v0, v1
-; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-GISEL-LABEL: v_minimumnum_f32_fneg_fabs_rhs:
-; GFX9-GISEL: ; %bb.0:
-; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-GISEL-NEXT: v_max_f32_e32 v0, v0, v0
-; GFX9-GISEL-NEXT: v_max_f32_e64 v1, -|v1|, -|v1|
-; GFX9-GISEL-NEXT: v_min_f32_e32 v0, v0, v1
-; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
+; GFX900-SDAG-LABEL: v_minimumnum_f32_fneg_fabs_rhs:
+; GFX900-SDAG: ; %bb.0:
+; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-SDAG-NEXT: v_max_f32_e64 v1, -|v1|, -|v1|
+; GFX900-SDAG-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX900-SDAG-NEXT: v_min_f32_e32 v0, v0, v1
+; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-GISEL-LABEL: v_minimumnum_f32_fneg_fabs_rhs:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX900-GISEL-NEXT: v_max_f32_e64 v1, -|v1|, -|v1|
+; GFX900-GISEL-NEXT: v_min_f32_e32 v0, v0, v1
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: v_minimumnum_f32_fneg_fabs_rhs:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: v_max_f32_e64 v1, -|v1|, -|v1|
+; GFX950-SDAG-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX950-SDAG-NEXT: v_min_f32_e32 v0, v0, v1
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-GISEL-LABEL: v_minimumnum_f32_fneg_fabs_rhs:
+; GFX950-GISEL: ; %bb.0:
+; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-GISEL-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX950-GISEL-NEXT: v_max_f32_e64 v1, -|v1|, -|v1|
+; GFX950-GISEL-NEXT: v_min_f32_e32 v0, v0, v1
+; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-SDAG-LABEL: v_minimumnum_f32_fneg_fabs_rhs:
; GFX10-SDAG: ; %bb.0:
@@ -2242,21 +2385,37 @@ define float @v_minimumnum_f32_fabs(float %x, float %y) {
; GFX8-GISEL-NEXT: v_min_f32_e32 v0, v0, v1
; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-SDAG-LABEL: v_minimumnum_f32_fabs:
-; GFX9-SDAG: ; %bb.0:
-; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-SDAG-NEXT: v_max_f32_e64 v1, |v1|, |v1|
-; GFX9-SDAG-NEXT: v_max_f32_e64 v0, |v0|, |v0|
-; GFX9-SDAG-NEXT: v_min_f32_e32 v0, v0, v1
-; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-GISEL-LABEL: v_minimumnum_f32_fabs:
-; GFX9-GISEL: ; %bb.0:
-; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-GISEL-NEXT: v_max_f32_e64 v0, |v0|, |v0|
-; GFX9-GISEL-NEXT: v_max_f32_e64 v1, |v1|, |v1|
-; GFX9-GISEL-NEXT: v_min_f32_e32 v0, v0, v1
-; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
+; GFX900-SDAG-LABEL: v_minimumnum_f32_fabs:
+; GFX900-SDAG: ; %bb.0:
+; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-SDAG-NEXT: v_max_f32_e64 v1, |v1|, |v1|
+; GFX900-SDAG-NEXT: v_max_f32_e64 v0, |v0|, |v0|
+; GFX900-SDAG-NEXT: v_min_f32_e32 v0, v0, v1
+; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-GISEL-LABEL: v_minimumnum_f32_fabs:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: v_max_f32_e64 v0, |v0|, |v0|
+; GFX900-GISEL-NEXT: v_max_f32_e64 v1, |v1|, |v1|
+; GFX900-GISEL-NEXT: v_min_f32_e32 v0, v0, v1
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: v_minimumnum_f32_fabs:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: v_max_f32_e64 v1, |v1|, |v1|
+; GFX950-SDAG-NEXT: v_max_f32_e64 v0, |v0|, |v0|
+; GFX950-SDAG-NEXT: v_min_f32_e32 v0, v0, v1
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-GISEL-LABEL: v_minimumnum_f32_fabs:
+; GFX950-GISEL: ; %bb.0:
+; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-GISEL-NEXT: v_max_f32_e64 v0, |v0|, |v0|
+; GFX950-GISEL-NEXT: v_max_f32_e64 v1, |v1|, |v1|
+; GFX950-GISEL-NEXT: v_min_f32_e32 v0, v0, v1
+; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-SDAG-LABEL: v_minimumnum_f32_fabs:
; GFX10-SDAG: ; %bb.0:
@@ -2356,21 +2515,37 @@ define float @v_minimumnum_f32_fneg(float %x, float %y) {
; GFX8-GISEL-NEXT: v_min_f32_e32 v0, v0, v1
; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-SDAG-LABEL: v_minimumnum_f32_fneg:
-; GFX9-SDAG: ; %bb.0:
-; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-SDAG-NEXT: v_max_f32_e64 v1, -v1, -v1
-; GFX9-SDAG-NEXT: v_max_f32_e64 v0, -v0, -v0
-; GFX9-SDAG-NEXT: v_min_f32_e32 v0, v0, v1
-; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-GISEL-LABEL: v_minimumnum_f32_fneg:
-; GFX9-GISEL: ; %bb.0:
-; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-GISEL-NEXT: v_max_f32_e64 v0, -v0, -v0
-; GFX9-GISEL-NEXT: v_max_f32_e64 v1, -v1, -v1
-; GFX9-GISEL-NEXT: v_min_f32_e32 v0, v0, v1
-; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
+; GFX900-SDAG-LABEL: v_minimumnum_f32_fneg:
+; GFX900-SDAG: ; %bb.0:
+; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-SDAG-NEXT: v_max_f32_e64 v1, -v1, -v1
+; GFX900-SDAG-NEXT: v_max_f32_e64 v0, -v0, -v0
+; GFX900-SDAG-NEXT: v_min_f32_e32 v0, v0, v1
+; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-GISEL-LABEL: v_minimumnum_f32_fneg:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: v_max_f32_e64 v0, -v0, -v0
+; GFX900-GISEL-NEXT: v_max_f32_e64 v1, -v1, -v1
+; GFX900-GISEL-NEXT: v_min_f32_e32 v0, v0, v1
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: v_minimumnum_f32_fneg:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: v_max_f32_e64 v1, -v1, -v1
+; GFX950-SDAG-NEXT: v_max_f32_e64 v0, -v0, -v0
+; GFX950-SDAG-NEXT: v_min_f32_e32 v0, v0, v1
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-GISEL-LABEL: v_minimumnum_f32_fneg:
+; GFX950-GISEL: ; %bb.0:
+; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-GISEL-NEXT: v_max_f32_e64 v0, -v0, -v0
+; GFX950-GISEL-NEXT: v_max_f32_e64 v1, -v1, -v1
+; GFX950-GISEL-NEXT: v_min_f32_e32 v0, v0, v1
+; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-SDAG-LABEL: v_minimumnum_f32_fneg:
; GFX10-SDAG: ; %bb.0:
@@ -2473,21 +2648,37 @@ define half @v_minimumnum_f16_fabs_rhs(half %x, half %y) {
; GFX8-GISEL-NEXT: v_min_f16_e32 v0, v0, v1
; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-SDAG-LABEL: v_minimumnum_f16_fabs_rhs:
-; GFX9-SDAG: ; %bb.0:
-; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-SDAG-NEXT: v_max_f16_e64 v1, |v1|, |v1|
-; GFX9-SDAG-NEXT: v_max_f16_e32 v0, v0, v0
-; GFX9-SDAG-NEXT: v_min_f16_e32 v0, v0, v1
-; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-GISEL-LABEL: v_minimumnum_f16_fabs_rhs:
-; GFX9-GISEL: ; %bb.0:
-; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-GISEL-NEXT: v_max_f16_e32 v0, v0, v0
-; GFX9-GISEL-NEXT: v_max_f16_e64 v1, |v1|, |v1|
-; GFX9-GISEL-NEXT: v_min_f16_e32 v0, v0, v1
-; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
+; GFX900-SDAG-LABEL: v_minimumnum_f16_fabs_rhs:
+; GFX900-SDAG: ; %bb.0:
+; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-SDAG-NEXT: v_max_f16_e64 v1, |v1|, |v1|
+; GFX900-SDAG-NEXT: v_max_f16_e32 v0, v0, v0
+; GFX900-SDAG-NEXT: v_min_f16_e32 v0, v0, v1
+; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-GISEL-LABEL: v_minimumnum_f16_fabs_rhs:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: v_max_f16_e32 v0, v0, v0
+; GFX900-GISEL-NEXT: v_max_f16_e64 v1, |v1|, |v1|
+; GFX900-GISEL-NEXT: v_min_f16_e32 v0, v0, v1
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: v_minimumnum_f16_fabs_rhs:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: v_max_f16_e64 v1, |v1|, |v1|
+; GFX950-SDAG-NEXT: v_max_f16_e32 v0, v0, v0
+; GFX950-SDAG-NEXT: v_min_f16_e32 v0, v0, v1
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-GISEL-LABEL: v_minimumnum_f16_fabs_rhs:
+; GFX950-GISEL: ; %bb.0:
+; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-GISEL-NEXT: v_max_f16_e32 v0, v0, v0
+; GFX950-GISEL-NEXT: v_max_f16_e64 v1, |v1|, |v1|
+; GFX950-GISEL-NEXT: v_min_f16_e32 v0, v0, v1
+; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-SDAG-LABEL: v_minimumnum_f16_fabs_rhs:
; GFX10-SDAG: ; %bb.0:
@@ -2633,21 +2824,37 @@ define half @v_minimumnum_f16_fneg_fabs_rhs(half %x, half %y) {
; GFX8-GISEL-NEXT: v_min_f16_e32 v0, v0, v1
; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-SDAG-LABEL: v_minimumnum_f16_fneg_fabs_rhs:
-; GFX9-SDAG: ; %bb.0:
-; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-SDAG-NEXT: v_max_f16_e64 v1, -|v1|, -|v1|
-; GFX9-SDAG-NEXT: v_max_f16_e32 v0, v0, v0
-; GFX9-SDAG-NEXT: v_min_f16_e32 v0, v0, v1
-; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-GISEL-LABEL: v_minimumnum_f16_fneg_fabs_rhs:
-; GFX9-GISEL: ; %bb.0:
-; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-GISEL-NEXT: v_max_f16_e32 v0, v0, v0
-; GFX9-GISEL-NEXT: v_max_f16_e64 v1, -|v1|, -|v1|
-; GFX9-GISEL-NEXT: v_min_f16_e32 v0, v0, v1
-; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
+; GFX900-SDAG-LABEL: v_minimumnum_f16_fneg_fabs_rhs:
+; GFX900-SDAG: ; %bb.0:
+; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-SDAG-NEXT: v_max_f16_e64 v1, -|v1|, -|v1|
+; GFX900-SDAG-NEXT: v_max_f16_e32 v0, v0, v0
+; GFX900-SDAG-NEXT: v_min_f16_e32 v0, v0, v1
+; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-GISEL-LABEL: v_minimumnum_f16_fneg_fabs_rhs:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: v_max_f16_e32 v0, v0, v0
+; GFX900-GISEL-NEXT: v_max_f16_e64 v1, -|v1|, -|v1|
+; GFX900-GISEL-NEXT: v_min_f16_e32 v0, v0, v1
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: v_minimumnum_f16_fneg_fabs_rhs:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: v_max_f16_e64 v1, -|v1|, -|v1|
+; GFX950-SDAG-NEXT: v_max_f16_e32 v0, v0, v0
+; GFX950-SDAG-NEXT: v_min_f16_e32 v0, v0, v1
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-GISEL-LABEL: v_minimumnum_f16_fneg_fabs_rhs:
+; GFX950-GISEL: ; %bb.0:
+; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-GISEL-NEXT: v_max_f16_e32 v0, v0, v0
+; GFX950-GISEL-NEXT: v_max_f16_e64 v1, -|v1|, -|v1|
+; GFX950-GISEL-NEXT: v_min_f16_e32 v0, v0, v1
+; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-SDAG-LABEL: v_minimumnum_f16_fneg_fabs_rhs:
; GFX10-SDAG: ; %bb.0:
@@ -2794,21 +3001,37 @@ define half @v_minimumnum_f16_fabs(half %x, half %y) {
; GFX8-GISEL-NEXT: v_min_f16_e32 v0, v0, v1
; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-SDAG-LABEL: v_minimumnum_f16_fabs:
-; GFX9-SDAG: ; %bb.0:
-; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-SDAG-NEXT: v_max_f16_e64 v1, |v1|, |v1|
-; GFX9-SDAG-NEXT: v_max_f16_e64 v0, |v0|, |v0|
-; GFX9-SDAG-NEXT: v_min_f16_e32 v0, v0, v1
-; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-GISEL-LABEL: v_minimumnum_f16_fabs:
-; GFX9-GISEL: ; %bb.0:
-; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-GISEL-NEXT: v_max_f16_e64 v0, |v0|, |v0|
-; GFX9-GISEL-NEXT: v_max_f16_e64 v1, |v1|, |v1|
-; GFX9-GISEL-NEXT: v_min_f16_e32 v0, v0, v1
-; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
+; GFX900-SDAG-LABEL: v_minimumnum_f16_fabs:
+; GFX900-SDAG: ; %bb.0:
+; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-SDAG-NEXT: v_max_f16_e64 v1, |v1|, |v1|
+; GFX900-SDAG-NEXT: v_max_f16_e64 v0, |v0|, |v0|
+; GFX900-SDAG-NEXT: v_min_f16_e32 v0, v0, v1
+; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-GISEL-LABEL: v_minimumnum_f16_fabs:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: v_max_f16_e64 v0, |v0|, |v0|
+; GFX900-GISEL-NEXT: v_max_f16_e64 v1, |v1|, |v1|
+; GFX900-GISEL-NEXT: v_min_f16_e32 v0, v0, v1
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: v_minimumnum_f16_fabs:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: v_max_f16_e64 v1, |v1|, |v1|
+; GFX950-SDAG-NEXT: v_max_f16_e64 v0, |v0|, |v0|
+; GFX950-SDAG-NEXT: v_min_f16_e32 v0, v0, v1
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-GISEL-LABEL: v_minimumnum_f16_fabs:
+; GFX950-GISEL: ; %bb.0:
+; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-GISEL-NEXT: v_max_f16_e64 v0, |v0|, |v0|
+; GFX950-GISEL-NEXT: v_max_f16_e64 v1, |v1|, |v1|
+; GFX950-GISEL-NEXT: v_min_f16_e32 v0, v0, v1
+; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-SDAG-LABEL: v_minimumnum_f16_fabs:
; GFX10-SDAG: ; %bb.0:
@@ -2955,21 +3178,37 @@ define half @v_minimumnum_f16_fneg(half %x, half %y) {
; GFX8-GISEL-NEXT: v_min_f16_e32 v0, v0, v1
; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-SDAG-LABEL: v_minimumnum_f16_fneg:
-; GFX9-SDAG: ; %bb.0:
-; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-SDAG-NEXT: v_max_f16_e64 v1, -v1, -v1
-; GFX9-SDAG-NEXT: v_max_f16_e64 v0, -v0, -v0
-; GFX9-SDAG-NEXT: v_min_f16_e32 v0, v0, v1
-; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-GISEL-LABEL: v_minimumnum_f16_fneg:
-; GFX9-GISEL: ; %bb.0:
-; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-GISEL-NEXT: v_max_f16_e64 v0, -v0, -v0
-; GFX9-GISEL-NEXT: v_max_f16_e64 v1, -v1, -v1
-; GFX9-GISEL-NEXT: v_min_f16_e32 v0, v0, v1
-; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
+; GFX900-SDAG-LABEL: v_minimumnum_f16_fneg:
+; GFX900-SDAG: ; %bb.0:
+; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-SDAG-NEXT: v_max_f16_e64 v1, -v1, -v1
+; GFX900-SDAG-NEXT: v_max_f16_e64 v0, -v0, -v0
+; GFX900-SDAG-NEXT: v_min_f16_e32 v0, v0, v1
+; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-GISEL-LABEL: v_minimumnum_f16_fneg:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: v_max_f16_e64 v0, -v0, -v0
+; GFX900-GISEL-NEXT: v_max_f16_e64 v1, -v1, -v1
+; GFX900-GISEL-NEXT: v_min_f16_e32 v0, v0, v1
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: v_minimumnum_f16_fneg:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: v_max_f16_e64 v1, -v1, -v1
+; GFX950-SDAG-NEXT: v_max_f16_e64 v0, -v0, -v0
+; GFX950-SDAG-NEXT: v_min_f16_e32 v0, v0, v1
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-GISEL-LABEL: v_minimumnum_f16_fneg:
+; GFX950-GISEL: ; %bb.0:
+; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-GISEL-NEXT: v_max_f16_e64 v0, -v0, -v0
+; GFX950-GISEL-NEXT: v_max_f16_e64 v1, -v1, -v1
+; GFX950-GISEL-NEXT: v_min_f16_e32 v0, v0, v1
+; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-SDAG-LABEL: v_minimumnum_f16_fneg:
; GFX10-SDAG: ; %bb.0:
@@ -3113,21 +3352,37 @@ define double @v_minimumnum_f64_fneg(double %x, double %y) {
; GFX8-GISEL-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3]
; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-SDAG-LABEL: v_minimumnum_f64_fneg:
-; GFX9-SDAG: ; %bb.0:
-; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-SDAG-NEXT: v_max_f64 v[2:3], -v[2:3], -v[2:3]
-; GFX9-SDAG-NEXT: v_max_f64 v[0:1], -v[0:1], -v[0:1]
-; GFX9-SDAG-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3]
-; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-GISEL-LABEL: v_minimumnum_f64_fneg:
-; GFX9-GISEL: ; %bb.0:
-; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-GISEL-NEXT: v_max_f64 v[0:1], -v[0:1], -v[0:1]
-; GFX9-GISEL-NEXT: v_max_f64 v[2:3], -v[2:3], -v[2:3]
-; GFX9-GISEL-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3]
-; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
+; GFX900-SDAG-LABEL: v_minimumnum_f64_fneg:
+; GFX900-SDAG: ; %bb.0:
+; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-SDAG-NEXT: v_max_f64 v[2:3], -v[2:3], -v[2:3]
+; GFX900-SDAG-NEXT: v_max_f64 v[0:1], -v[0:1], -v[0:1]
+; GFX900-SDAG-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-GISEL-LABEL: v_minimumnum_f64_fneg:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: v_max_f64 v[0:1], -v[0:1], -v[0:1]
+; GFX900-GISEL-NEXT: v_max_f64 v[2:3], -v[2:3], -v[2:3]
+; GFX900-GISEL-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: v_minimumnum_f64_fneg:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: v_max_f64 v[2:3], -v[2:3], -v[2:3]
+; GFX950-SDAG-NEXT: v_max_f64 v[0:1], -v[0:1], -v[0:1]
+; GFX950-SDAG-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-GISEL-LABEL: v_minimumnum_f64_fneg:
+; GFX950-GISEL: ; %bb.0:
+; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-GISEL-NEXT: v_max_f64 v[0:1], -v[0:1], -v[0:1]
+; GFX950-GISEL-NEXT: v_max_f64 v[2:3], -v[2:3], -v[2:3]
+; GFX950-GISEL-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-SDAG-LABEL: v_minimumnum_f64_fneg:
; GFX10-SDAG: ; %bb.0:
@@ -3389,11 +3644,17 @@ define <2 x half> @v_minimumnum_v2f16_nnan(<2 x half> %x, <2 x half> %y) {
; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v2, v0
; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_minimumnum_v2f16_nnan:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_pk_min_f16 v0, v0, v1
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_minimumnum_v2f16_nnan:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_pk_min_f16 v0, v0, v1
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_minimumnum_v2f16_nnan:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_pk_min_f16 v0, v0, v1
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimumnum_v2f16_nnan:
; GFX10: ; %bb.0:
@@ -3488,16 +3749,16 @@ define <3 x half> @v_minimumnum_v3f16(<3 x half> %x, <3 x half> %y) {
; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v4, v0
; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-SDAG-LABEL: v_minimumnum_v3f16:
-; GFX9-SDAG: ; %bb.0:
-; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-SDAG-NEXT: v_pk_max_f16 v3, v3, v3
-; GFX9-SDAG-NEXT: v_pk_max_f16 v1, v1, v1
-; GFX9-SDAG-NEXT: v_pk_max_f16 v2, v2, v2
-; GFX9-SDAG-NEXT: v_pk_max_f16 v0, v0, v0
-; GFX9-SDAG-NEXT: v_pk_min_f16 v1, v1, v3
-; GFX9-SDAG-NEXT: v_pk_min_f16 v0, v0, v2
-; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
+; GFX900-SDAG-LABEL: v_minimumnum_v3f16:
+; GFX900-SDAG: ; %bb.0:
+; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-SDAG-NEXT: v_pk_max_f16 v3, v3, v3
+; GFX900-SDAG-NEXT: v_pk_max_f16 v1, v1, v1
+; GFX900-SDAG-NEXT: v_pk_max_f16 v2, v2, v2
+; GFX900-SDAG-NEXT: v_pk_max_f16 v0, v0, v0
+; GFX900-SDAG-NEXT: v_pk_min_f16 v1, v1, v3
+; GFX900-SDAG-NEXT: v_pk_min_f16 v0, v0, v2
+; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX900-GISEL-LABEL: v_minimumnum_v3f16:
; GFX900-GISEL: ; %bb.0:
@@ -3510,6 +3771,17 @@ define <3 x half> @v_minimumnum_v3f16(<3 x half> %x, <3 x half> %y) {
; GFX900-GISEL-NEXT: v_pk_min_f16 v1, v1, v2
; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
;
+; GFX950-SDAG-LABEL: v_minimumnum_v3f16:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: v_pk_max_f16 v3, v3, v3
+; GFX950-SDAG-NEXT: v_pk_max_f16 v1, v1, v1
+; GFX950-SDAG-NEXT: v_pk_max_f16 v2, v2, v2
+; GFX950-SDAG-NEXT: v_pk_max_f16 v0, v0, v0
+; GFX950-SDAG-NEXT: v_pk_min_f16 v1, v1, v3
+; GFX950-SDAG-NEXT: v_pk_min_f16 v0, v0, v2
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
; GFX950-GISEL-LABEL: v_minimumnum_v3f16:
; GFX950-GISEL: ; %bb.0:
; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -3659,19 +3931,33 @@ define <3 x half> @v_minimumnum_v3f16_nnan(<3 x half> %x, <3 x half> %y) {
; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v4, v0
; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-SDAG-LABEL: v_minimumnum_v3f16_nnan:
-; GFX9-SDAG: ; %bb.0:
-; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-SDAG-NEXT: v_pk_min_f16 v1, v1, v3
-; GFX9-SDAG-NEXT: v_pk_min_f16 v0, v0, v2
-; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
+; GFX900-SDAG-LABEL: v_minimumnum_v3f16_nnan:
+; GFX900-SDAG: ; %bb.0:
+; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-SDAG-NEXT: v_pk_min_f16 v1, v1, v3
+; GFX900-SDAG-NEXT: v_pk_min_f16 v0, v0, v2
+; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-GISEL-LABEL: v_minimumnum_v3f16_nnan:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: v_pk_min_f16 v0, v0, v2
+; GFX900-GISEL-NEXT: v_pk_min_f16 v1, v1, v3
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: v_minimumnum_v3f16_nnan:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: v_pk_min_f16 v1, v1, v3
+; GFX950-SDAG-NEXT: v_pk_min_f16 v0, v0, v2
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-GISEL-LABEL: v_minimumnum_v3f16_nnan:
-; GFX9-GISEL: ; %bb.0:
-; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-GISEL-NEXT: v_pk_min_f16 v0, v0, v2
-; GFX9-GISEL-NEXT: v_pk_min_f16 v1, v1, v3
-; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
+; GFX950-GISEL-LABEL: v_minimumnum_v3f16_nnan:
+; GFX950-GISEL: ; %bb.0:
+; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-GISEL-NEXT: v_pk_min_f16 v0, v0, v2
+; GFX950-GISEL-NEXT: v_pk_min_f16 v1, v1, v3
+; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimumnum_v3f16_nnan:
; GFX10: ; %bb.0:
@@ -3982,12 +4268,19 @@ define <4 x half> @v_minimumnum_v4f16_nnan(<4 x half> %x, <4 x half> %y) {
; GFX8-GISEL-NEXT: v_or_b32_e32 v1, v2, v1
; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_minimumnum_v4f16_nnan:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_pk_min_f16 v0, v0, v2
-; GFX9-NEXT: v_pk_min_f16 v1, v1, v3
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_minimumnum_v4f16_nnan:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_pk_min_f16 v0, v0, v2
+; GFX900-NEXT: v_pk_min_f16 v1, v1, v3
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_minimumnum_v4f16_nnan:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_pk_min_f16 v0, v0, v2
+; GFX950-NEXT: v_pk_min_f16 v1, v1, v3
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimumnum_v4f16_nnan:
; GFX10: ; %bb.0:
@@ -6516,27 +6809,49 @@ define <2 x float> @v_minimumnum_v2f32(<2 x float> %x, <2 x float> %y) {
; GFX8-GISEL-NEXT: v_min_f32_e32 v1, v1, v2
; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-SDAG-LABEL: v_minimumnum_v2f32:
-; GFX9-SDAG: ; %bb.0:
-; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-SDAG-NEXT: v_max_f32_e32 v2, v2, v2
-; GFX9-SDAG-NEXT: v_max_f32_e32 v0, v0, v0
-; GFX9-SDAG-NEXT: v_min_f32_e32 v0, v0, v2
-; GFX9-SDAG-NEXT: v_max_f32_e32 v2, v3, v3
-; GFX9-SDAG-NEXT: v_max_f32_e32 v1, v1, v1
-; GFX9-SDAG-NEXT: v_min_f32_e32 v1, v1, v2
-; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-GISEL-LABEL: v_minimumnum_v2f32:
-; GFX9-GISEL: ; %bb.0:
-; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-GISEL-NEXT: v_max_f32_e32 v0, v0, v0
-; GFX9-GISEL-NEXT: v_max_f32_e32 v2, v2, v2
-; GFX9-GISEL-NEXT: v_min_f32_e32 v0, v0, v2
-; GFX9-GISEL-NEXT: v_max_f32_e32 v1, v1, v1
-; GFX9-GISEL-NEXT: v_max_f32_e32 v2, v3, v3
-; GFX9-GISEL-NEXT: v_min_f32_e32 v1, v1, v2
-; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
+; GFX900-SDAG-LABEL: v_minimumnum_v2f32:
+; GFX900-SDAG: ; %bb.0:
+; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-SDAG-NEXT: v_max_f32_e32 v2, v2, v2
+; GFX900-SDAG-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX900-SDAG-NEXT: v_min_f32_e32 v0, v0, v2
+; GFX900-SDAG-NEXT: v_max_f32_e32 v2, v3, v3
+; GFX900-SDAG-NEXT: v_max_f32_e32 v1, v1, v1
+; GFX900-SDAG-NEXT: v_min_f32_e32 v1, v1, v2
+; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-GISEL-LABEL: v_minimumnum_v2f32:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX900-GISEL-NEXT: v_max_f32_e32 v2, v2, v2
+; GFX900-GISEL-NEXT: v_min_f32_e32 v0, v0, v2
+; GFX900-GISEL-NEXT: v_max_f32_e32 v1, v1, v1
+; GFX900-GISEL-NEXT: v_max_f32_e32 v2, v3, v3
+; GFX900-GISEL-NEXT: v_min_f32_e32 v1, v1, v2
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: v_minimumnum_v2f32:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: v_max_f32_e32 v2, v2, v2
+; GFX950-SDAG-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX950-SDAG-NEXT: v_min_f32_e32 v0, v0, v2
+; GFX950-SDAG-NEXT: v_max_f32_e32 v2, v3, v3
+; GFX950-SDAG-NEXT: v_max_f32_e32 v1, v1, v1
+; GFX950-SDAG-NEXT: v_min_f32_e32 v1, v1, v2
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-GISEL-LABEL: v_minimumnum_v2f32:
+; GFX950-GISEL: ; %bb.0:
+; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-GISEL-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX950-GISEL-NEXT: v_max_f32_e32 v2, v2, v2
+; GFX950-GISEL-NEXT: v_min_f32_e32 v0, v0, v2
+; GFX950-GISEL-NEXT: v_max_f32_e32 v1, v1, v1
+; GFX950-GISEL-NEXT: v_max_f32_e32 v2, v3, v3
+; GFX950-GISEL-NEXT: v_min_f32_e32 v1, v1, v2
+; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-SDAG-LABEL: v_minimumnum_v2f32:
; GFX10-SDAG: ; %bb.0:
@@ -6622,12 +6937,19 @@ define <2 x float> @v_minimumnum_v2f32_nnan(<2 x float> %x, <2 x float> %y) {
; GFX8-NEXT: v_min_f32_e32 v1, v1, v3
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_minimumnum_v2f32_nnan:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_min_f32_e32 v0, v0, v2
-; GFX9-NEXT: v_min_f32_e32 v1, v1, v3
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_minimumnum_v2f32_nnan:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_min_f32_e32 v0, v0, v2
+; GFX900-NEXT: v_min_f32_e32 v1, v1, v3
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_minimumnum_v2f32_nnan:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_min_f32_e32 v0, v0, v2
+; GFX950-NEXT: v_min_f32_e32 v1, v1, v3
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimumnum_v2f32_nnan:
; GFX10: ; %bb.0:
@@ -6712,33 +7034,61 @@ define <3 x float> @v_minimumnum_v3f32(<3 x float> %x, <3 x float> %y) {
; GFX8-GISEL-NEXT: v_min_f32_e32 v2, v2, v3
; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-SDAG-LABEL: v_minimumnum_v3f32:
-; GFX9-SDAG: ; %bb.0:
-; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-SDAG-NEXT: v_max_f32_e32 v3, v3, v3
-; GFX9-SDAG-NEXT: v_max_f32_e32 v0, v0, v0
-; GFX9-SDAG-NEXT: v_min_f32_e32 v0, v0, v3
-; GFX9-SDAG-NEXT: v_max_f32_e32 v3, v4, v4
-; GFX9-SDAG-NEXT: v_max_f32_e32 v1, v1, v1
-; GFX9-SDAG-NEXT: v_min_f32_e32 v1, v1, v3
-; GFX9-SDAG-NEXT: v_max_f32_e32 v3, v5, v5
-; GFX9-SDAG-NEXT: v_max_f32_e32 v2, v2, v2
-; GFX9-SDAG-NEXT: v_min_f32_e32 v2, v2, v3
-; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-GISEL-LABEL: v_minimumnum_v3f32:
-; GFX9-GISEL: ; %bb.0:
-; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-GISEL-NEXT: v_max_f32_e32 v0, v0, v0
-; GFX9-GISEL-NEXT: v_max_f32_e32 v3, v3, v3
-; GFX9-GISEL-NEXT: v_min_f32_e32 v0, v0, v3
-; GFX9-GISEL-NEXT: v_max_f32_e32 v1, v1, v1
-; GFX9-GISEL-NEXT: v_max_f32_e32 v3, v4, v4
-; GFX9-GISEL-NEXT: v_min_f32_e32 v1, v1, v3
-; GFX9-GISEL-NEXT: v_max_f32_e32 v2, v2, v2
-; GFX9-GISEL-NEXT: v_max_f32_e32 v3, v5, v5
-; GFX9-GISEL-NEXT: v_min_f32_e32 v2, v2, v3
-; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
+; GFX900-SDAG-LABEL: v_minimumnum_v3f32:
+; GFX900-SDAG: ; %bb.0:
+; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-SDAG-NEXT: v_max_f32_e32 v3, v3, v3
+; GFX900-SDAG-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX900-SDAG-NEXT: v_min_f32_e32 v0, v0, v3
+; GFX900-SDAG-NEXT: v_max_f32_e32 v3, v4, v4
+; GFX900-SDAG-NEXT: v_max_f32_e32 v1, v1, v1
+; GFX900-SDAG-NEXT: v_min_f32_e32 v1, v1, v3
+; GFX900-SDAG-NEXT: v_max_f32_e32 v3, v5, v5
+; GFX900-SDAG-NEXT: v_max_f32_e32 v2, v2, v2
+; GFX900-SDAG-NEXT: v_min_f32_e32 v2, v2, v3
+; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-GISEL-LABEL: v_minimumnum_v3f32:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX900-GISEL-NEXT: v_max_f32_e32 v3, v3, v3
+; GFX900-GISEL-NEXT: v_min_f32_e32 v0, v0, v3
+; GFX900-GISEL-NEXT: v_max_f32_e32 v1, v1, v1
+; GFX900-GISEL-NEXT: v_max_f32_e32 v3, v4, v4
+; GFX900-GISEL-NEXT: v_min_f32_e32 v1, v1, v3
+; GFX900-GISEL-NEXT: v_max_f32_e32 v2, v2, v2
+; GFX900-GISEL-NEXT: v_max_f32_e32 v3, v5, v5
+; GFX900-GISEL-NEXT: v_min_f32_e32 v2, v2, v3
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: v_minimumnum_v3f32:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: v_max_f32_e32 v3, v3, v3
+; GFX950-SDAG-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX950-SDAG-NEXT: v_min_f32_e32 v0, v0, v3
+; GFX950-SDAG-NEXT: v_max_f32_e32 v3, v4, v4
+; GFX950-SDAG-NEXT: v_max_f32_e32 v1, v1, v1
+; GFX950-SDAG-NEXT: v_min_f32_e32 v1, v1, v3
+; GFX950-SDAG-NEXT: v_max_f32_e32 v3, v5, v5
+; GFX950-SDAG-NEXT: v_max_f32_e32 v2, v2, v2
+; GFX950-SDAG-NEXT: v_min_f32_e32 v2, v2, v3
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-GISEL-LABEL: v_minimumnum_v3f32:
+; GFX950-GISEL: ; %bb.0:
+; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-GISEL-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX950-GISEL-NEXT: v_max_f32_e32 v3, v3, v3
+; GFX950-GISEL-NEXT: v_min_f32_e32 v0, v0, v3
+; GFX950-GISEL-NEXT: v_max_f32_e32 v1, v1, v1
+; GFX950-GISEL-NEXT: v_max_f32_e32 v3, v4, v4
+; GFX950-GISEL-NEXT: v_min_f32_e32 v1, v1, v3
+; GFX950-GISEL-NEXT: v_max_f32_e32 v2, v2, v2
+; GFX950-GISEL-NEXT: v_max_f32_e32 v3, v5, v5
+; GFX950-GISEL-NEXT: v_min_f32_e32 v2, v2, v3
+; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-SDAG-LABEL: v_minimumnum_v3f32:
; GFX10-SDAG: ; %bb.0:
@@ -6840,13 +7190,21 @@ define <3 x float> @v_minimumnum_v3f32_nnan(<3 x float> %x, <3 x float> %y) {
; GFX8-NEXT: v_min_f32_e32 v2, v2, v5
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_minimumnum_v3f32_nnan:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_min_f32_e32 v0, v0, v3
-; GFX9-NEXT: v_min_f32_e32 v1, v1, v4
-; GFX9-NEXT: v_min_f32_e32 v2, v2, v5
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_minimumnum_v3f32_nnan:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_min_f32_e32 v0, v0, v3
+; GFX900-NEXT: v_min_f32_e32 v1, v1, v4
+; GFX900-NEXT: v_min_f32_e32 v2, v2, v5
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_minimumnum_v3f32_nnan:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_min_f32_e32 v0, v0, v3
+; GFX950-NEXT: v_min_f32_e32 v1, v1, v4
+; GFX950-NEXT: v_min_f32_e32 v2, v2, v5
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimumnum_v3f32_nnan:
; GFX10: ; %bb.0:
@@ -6946,39 +7304,73 @@ define <4 x float> @v_minimumnum_v4f32(<4 x float> %x, <4 x float> %y) {
; GFX8-GISEL-NEXT: v_min_f32_e32 v3, v3, v4
; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-SDAG-LABEL: v_minimumnum_v4f32:
-; GFX9-SDAG: ; %bb.0:
-; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-SDAG-NEXT: v_max_f32_e32 v4, v4, v4
-; GFX9-SDAG-NEXT: v_max_f32_e32 v0, v0, v0
-; GFX9-SDAG-NEXT: v_min_f32_e32 v0, v0, v4
-; GFX9-SDAG-NEXT: v_max_f32_e32 v4, v5, v5
-; GFX9-SDAG-NEXT: v_max_f32_e32 v1, v1, v1
-; GFX9-SDAG-NEXT: v_min_f32_e32 v1, v1, v4
-; GFX9-SDAG-NEXT: v_max_f32_e32 v4, v6, v6
-; GFX9-SDAG-NEXT: v_max_f32_e32 v2, v2, v2
-; GFX9-SDAG-NEXT: v_min_f32_e32 v2, v2, v4
-; GFX9-SDAG-NEXT: v_max_f32_e32 v4, v7, v7
-; GFX9-SDAG-NEXT: v_max_f32_e32 v3, v3, v3
-; GFX9-SDAG-NEXT: v_min_f32_e32 v3, v3, v4
-; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-GISEL-LABEL: v_minimumnum_v4f32:
-; GFX9-GISEL: ; %bb.0:
-; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-GISEL-NEXT: v_max_f32_e32 v0, v0, v0
-; GFX9-GISEL-NEXT: v_max_f32_e32 v4, v4, v4
-; GFX9-GISEL-NEXT: v_min_f32_e32 v0, v0, v4
-; GFX9-GISEL-NEXT: v_max_f32_e32 v1, v1, v1
-; GFX9-GISEL-NEXT: v_max_f32_e32 v4, v5, v5
-; GFX9-GISEL-NEXT: v_min_f32_e32 v1, v1, v4
-; GFX9-GISEL-NEXT: v_max_f32_e32 v2, v2, v2
-; GFX9-GISEL-NEXT: v_max_f32_e32 v4, v6, v6
-; GFX9-GISEL-NEXT: v_min_f32_e32 v2, v2, v4
-; GFX9-GISEL-NEXT: v_max_f32_e32 v3, v3, v3
-; GFX9-GISEL-NEXT: v_max_f32_e32 v4, v7, v7
-; GFX9-GISEL-NEXT: v_min_f32_e32 v3, v3, v4
-; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
+; GFX900-SDAG-LABEL: v_minimumnum_v4f32:
+; GFX900-SDAG: ; %bb.0:
+; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-SDAG-NEXT: v_max_f32_e32 v4, v4, v4
+; GFX900-SDAG-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX900-SDAG-NEXT: v_min_f32_e32 v0, v0, v4
+; GFX900-SDAG-NEXT: v_max_f32_e32 v4, v5, v5
+; GFX900-SDAG-NEXT: v_max_f32_e32 v1, v1, v1
+; GFX900-SDAG-NEXT: v_min_f32_e32 v1, v1, v4
+; GFX900-SDAG-NEXT: v_max_f32_e32 v4, v6, v6
+; GFX900-SDAG-NEXT: v_max_f32_e32 v2, v2, v2
+; GFX900-SDAG-NEXT: v_min_f32_e32 v2, v2, v4
+; GFX900-SDAG-NEXT: v_max_f32_e32 v4, v7, v7
+; GFX900-SDAG-NEXT: v_max_f32_e32 v3, v3, v3
+; GFX900-SDAG-NEXT: v_min_f32_e32 v3, v3, v4
+; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-GISEL-LABEL: v_minimumnum_v4f32:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX900-GISEL-NEXT: v_max_f32_e32 v4, v4, v4
+; GFX900-GISEL-NEXT: v_min_f32_e32 v0, v0, v4
+; GFX900-GISEL-NEXT: v_max_f32_e32 v1, v1, v1
+; GFX900-GISEL-NEXT: v_max_f32_e32 v4, v5, v5
+; GFX900-GISEL-NEXT: v_min_f32_e32 v1, v1, v4
+; GFX900-GISEL-NEXT: v_max_f32_e32 v2, v2, v2
+; GFX900-GISEL-NEXT: v_max_f32_e32 v4, v6, v6
+; GFX900-GISEL-NEXT: v_min_f32_e32 v2, v2, v4
+; GFX900-GISEL-NEXT: v_max_f32_e32 v3, v3, v3
+; GFX900-GISEL-NEXT: v_max_f32_e32 v4, v7, v7
+; GFX900-GISEL-NEXT: v_min_f32_e32 v3, v3, v4
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: v_minimumnum_v4f32:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: v_max_f32_e32 v4, v4, v4
+; GFX950-SDAG-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX950-SDAG-NEXT: v_min_f32_e32 v0, v0, v4
+; GFX950-SDAG-NEXT: v_max_f32_e32 v4, v5, v5
+; GFX950-SDAG-NEXT: v_max_f32_e32 v1, v1, v1
+; GFX950-SDAG-NEXT: v_min_f32_e32 v1, v1, v4
+; GFX950-SDAG-NEXT: v_max_f32_e32 v4, v6, v6
+; GFX950-SDAG-NEXT: v_max_f32_e32 v2, v2, v2
+; GFX950-SDAG-NEXT: v_min_f32_e32 v2, v2, v4
+; GFX950-SDAG-NEXT: v_max_f32_e32 v4, v7, v7
+; GFX950-SDAG-NEXT: v_max_f32_e32 v3, v3, v3
+; GFX950-SDAG-NEXT: v_min_f32_e32 v3, v3, v4
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-GISEL-LABEL: v_minimumnum_v4f32:
+; GFX950-GISEL: ; %bb.0:
+; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-GISEL-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX950-GISEL-NEXT: v_max_f32_e32 v4, v4, v4
+; GFX950-GISEL-NEXT: v_min_f32_e32 v0, v0, v4
+; GFX950-GISEL-NEXT: v_max_f32_e32 v1, v1, v1
+; GFX950-GISEL-NEXT: v_max_f32_e32 v4, v5, v5
+; GFX950-GISEL-NEXT: v_min_f32_e32 v1, v1, v4
+; GFX950-GISEL-NEXT: v_max_f32_e32 v2, v2, v2
+; GFX950-GISEL-NEXT: v_max_f32_e32 v4, v6, v6
+; GFX950-GISEL-NEXT: v_min_f32_e32 v2, v2, v4
+; GFX950-GISEL-NEXT: v_max_f32_e32 v3, v3, v3
+; GFX950-GISEL-NEXT: v_max_f32_e32 v4, v7, v7
+; GFX950-GISEL-NEXT: v_min_f32_e32 v3, v3, v4
+; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-SDAG-LABEL: v_minimumnum_v4f32:
; GFX10-SDAG: ; %bb.0:
@@ -7092,14 +7484,23 @@ define <4 x float> @v_minimumnum_v4f32_nnan(<4 x float> %x, <4 x float> %y) {
; GFX8-NEXT: v_min_f32_e32 v3, v3, v7
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_minimumnum_v4f32_nnan:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_min_f32_e32 v0, v0, v4
-; GFX9-NEXT: v_min_f32_e32 v1, v1, v5
-; GFX9-NEXT: v_min_f32_e32 v2, v2, v6
-; GFX9-NEXT: v_min_f32_e32 v3, v3, v7
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_minimumnum_v4f32_nnan:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_min_f32_e32 v0, v0, v4
+; GFX900-NEXT: v_min_f32_e32 v1, v1, v5
+; GFX900-NEXT: v_min_f32_e32 v2, v2, v6
+; GFX900-NEXT: v_min_f32_e32 v3, v3, v7
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_minimumnum_v4f32_nnan:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_min_f32_e32 v0, v0, v4
+; GFX950-NEXT: v_min_f32_e32 v1, v1, v5
+; GFX950-NEXT: v_min_f32_e32 v2, v2, v6
+; GFX950-NEXT: v_min_f32_e32 v3, v3, v7
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimumnum_v4f32_nnan:
; GFX10: ; %bb.0:
@@ -7201,12 +7602,12 @@ define <2 x double> @v_minimumnum_v2f64(<2 x double> %x, <2 x double> %y) {
; GFX950-SDAG-LABEL: v_minimumnum_v2f64:
; GFX950-SDAG: ; %bb.0:
; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7]
+; GFX950-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
; GFX950-SDAG-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5]
; GFX950-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX950-SDAG-NEXT: v_min_f64 v[2:3], v[2:3], v[6:7]
; GFX950-SDAG-NEXT: v_min_f64 v[0:1], v[0:1], v[4:5]
-; GFX950-SDAG-NEXT: v_max_f64 v[4:5], v[6:7], v[6:7]
-; GFX950-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
-; GFX950-SDAG-NEXT: v_min_f64 v[2:3], v[2:3], v[4:5]
; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX950-GISEL-LABEL: v_minimumnum_v2f64:
@@ -7316,12 +7717,26 @@ define <2 x double> @v_minimumnum_v2f64_nnan(<2 x double> %x, <2 x double> %y) {
; GFX8-NEXT: v_min_f64 v[2:3], v[2:3], v[6:7]
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_minimumnum_v2f64_nnan:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_min_f64 v[0:1], v[0:1], v[4:5]
-; GFX9-NEXT: v_min_f64 v[2:3], v[2:3], v[6:7]
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_minimumnum_v2f64_nnan:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_min_f64 v[0:1], v[0:1], v[4:5]
+; GFX900-NEXT: v_min_f64 v[2:3], v[2:3], v[6:7]
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: v_minimumnum_v2f64_nnan:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: v_min_f64 v[2:3], v[2:3], v[6:7]
+; GFX950-SDAG-NEXT: v_min_f64 v[0:1], v[0:1], v[4:5]
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-GISEL-LABEL: v_minimumnum_v2f64_nnan:
+; GFX950-GISEL: ; %bb.0:
+; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-GISEL-NEXT: v_min_f64 v[0:1], v[0:1], v[4:5]
+; GFX950-GISEL-NEXT: v_min_f64 v[2:3], v[2:3], v[6:7]
+; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimumnum_v2f64_nnan:
; GFX10: ; %bb.0:
@@ -7439,15 +7854,15 @@ define <3 x double> @v_minimumnum_v3f64(<3 x double> %x, <3 x double> %y) {
; GFX950-SDAG-LABEL: v_minimumnum_v3f64:
; GFX950-SDAG: ; %bb.0:
; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11]
+; GFX950-SDAG-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5]
+; GFX950-SDAG-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9]
+; GFX950-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
; GFX950-SDAG-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7]
; GFX950-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX950-SDAG-NEXT: v_min_f64 v[4:5], v[4:5], v[10:11]
+; GFX950-SDAG-NEXT: v_min_f64 v[2:3], v[2:3], v[8:9]
; GFX950-SDAG-NEXT: v_min_f64 v[0:1], v[0:1], v[6:7]
-; GFX950-SDAG-NEXT: v_max_f64 v[6:7], v[8:9], v[8:9]
-; GFX950-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
-; GFX950-SDAG-NEXT: v_min_f64 v[2:3], v[2:3], v[6:7]
-; GFX950-SDAG-NEXT: v_max_f64 v[6:7], v[10:11], v[10:11]
-; GFX950-SDAG-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5]
-; GFX950-SDAG-NEXT: v_min_f64 v[4:5], v[4:5], v[6:7]
; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX950-GISEL-LABEL: v_minimumnum_v3f64:
@@ -7580,13 +7995,29 @@ define <3 x double> @v_minimumnum_v3f64_nnan(<3 x double> %x, <3 x double> %y) {
; GFX8-NEXT: v_min_f64 v[4:5], v[4:5], v[10:11]
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_minimumnum_v3f64_nnan:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_min_f64 v[0:1], v[0:1], v[6:7]
-; GFX9-NEXT: v_min_f64 v[2:3], v[2:3], v[8:9]
-; GFX9-NEXT: v_min_f64 v[4:5], v[4:5], v[10:11]
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_minimumnum_v3f64_nnan:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_min_f64 v[0:1], v[0:1], v[6:7]
+; GFX900-NEXT: v_min_f64 v[2:3], v[2:3], v[8:9]
+; GFX900-NEXT: v_min_f64 v[4:5], v[4:5], v[10:11]
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: v_minimumnum_v3f64_nnan:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: v_min_f64 v[4:5], v[4:5], v[10:11]
+; GFX950-SDAG-NEXT: v_min_f64 v[2:3], v[2:3], v[8:9]
+; GFX950-SDAG-NEXT: v_min_f64 v[0:1], v[0:1], v[6:7]
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-GISEL-LABEL: v_minimumnum_v3f64_nnan:
+; GFX950-GISEL: ; %bb.0:
+; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-GISEL-NEXT: v_min_f64 v[0:1], v[0:1], v[6:7]
+; GFX950-GISEL-NEXT: v_min_f64 v[2:3], v[2:3], v[8:9]
+; GFX950-GISEL-NEXT: v_min_f64 v[4:5], v[4:5], v[10:11]
+; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimumnum_v3f64_nnan:
; GFX10: ; %bb.0:
@@ -7725,18 +8156,18 @@ define <4 x double> @v_minimumnum_v4f64(<4 x double> %x, <4 x double> %y) {
; GFX950-SDAG-LABEL: v_minimumnum_v4f64:
; GFX950-SDAG: ; %bb.0:
; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: v_max_f64 v[14:15], v[14:15], v[14:15]
+; GFX950-SDAG-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7]
+; GFX950-SDAG-NEXT: v_max_f64 v[12:13], v[12:13], v[12:13]
+; GFX950-SDAG-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5]
+; GFX950-SDAG-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11]
+; GFX950-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
; GFX950-SDAG-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9]
; GFX950-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX950-SDAG-NEXT: v_min_f64 v[6:7], v[6:7], v[14:15]
+; GFX950-SDAG-NEXT: v_min_f64 v[4:5], v[4:5], v[12:13]
+; GFX950-SDAG-NEXT: v_min_f64 v[2:3], v[2:3], v[10:11]
; GFX950-SDAG-NEXT: v_min_f64 v[0:1], v[0:1], v[8:9]
-; GFX950-SDAG-NEXT: v_max_f64 v[8:9], v[10:11], v[10:11]
-; GFX950-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
-; GFX950-SDAG-NEXT: v_min_f64 v[2:3], v[2:3], v[8:9]
-; GFX950-SDAG-NEXT: v_max_f64 v[8:9], v[12:13], v[12:13]
-; GFX950-SDAG-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5]
-; GFX950-SDAG-NEXT: v_min_f64 v[4:5], v[4:5], v[8:9]
-; GFX950-SDAG-NEXT: v_max_f64 v[8:9], v[14:15], v[14:15]
-; GFX950-SDAG-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7]
-; GFX950-SDAG-NEXT: v_min_f64 v[6:7], v[6:7], v[8:9]
; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX950-GISEL-LABEL: v_minimumnum_v4f64:
@@ -7892,14 +8323,32 @@ define <4 x double> @v_minimumnum_v4f64_nnan(<4 x double> %x, <4 x double> %y) {
; GFX8-NEXT: v_min_f64 v[6:7], v[6:7], v[14:15]
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_minimumnum_v4f64_nnan:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_min_f64 v[0:1], v[0:1], v[8:9]
-; GFX9-NEXT: v_min_f64 v[2:3], v[2:3], v[10:11]
-; GFX9-NEXT: v_min_f64 v[4:5], v[4:5], v[12:13]
-; GFX9-NEXT: v_min_f64 v[6:7], v[6:7], v[14:15]
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_minimumnum_v4f64_nnan:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_min_f64 v[0:1], v[0:1], v[8:9]
+; GFX900-NEXT: v_min_f64 v[2:3], v[2:3], v[10:11]
+; GFX900-NEXT: v_min_f64 v[4:5], v[4:5], v[12:13]
+; GFX900-NEXT: v_min_f64 v[6:7], v[6:7], v[14:15]
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: v_minimumnum_v4f64_nnan:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: v_min_f64 v[6:7], v[6:7], v[14:15]
+; GFX950-SDAG-NEXT: v_min_f64 v[4:5], v[4:5], v[12:13]
+; GFX950-SDAG-NEXT: v_min_f64 v[2:3], v[2:3], v[10:11]
+; GFX950-SDAG-NEXT: v_min_f64 v[0:1], v[0:1], v[8:9]
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-GISEL-LABEL: v_minimumnum_v4f64_nnan:
+; GFX950-GISEL: ; %bb.0:
+; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-GISEL-NEXT: v_min_f64 v[0:1], v[0:1], v[8:9]
+; GFX950-GISEL-NEXT: v_min_f64 v[2:3], v[2:3], v[10:11]
+; GFX950-GISEL-NEXT: v_min_f64 v[4:5], v[4:5], v[12:13]
+; GFX950-GISEL-NEXT: v_min_f64 v[6:7], v[6:7], v[14:15]
+; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimumnum_v4f64_nnan:
; GFX10: ; %bb.0:
@@ -7961,11 +8410,17 @@ define half @v_minimumnum_f16_no_ieee(half %x, half %y) #0 {
; GFX8-NEXT: v_min_f16_e32 v0, v0, v1
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_minimumnum_f16_no_ieee:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_min_f16_e32 v0, v0, v1
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_minimumnum_f16_no_ieee:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_min_f16_e32 v0, v0, v1
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_minimumnum_f16_no_ieee:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_min_f16_e32 v0, v0, v1
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimumnum_f16_no_ieee:
; GFX10: ; %bb.0:
@@ -8066,11 +8521,17 @@ define half @v_minimumnum_f16_nan_no_ieee(half %x, half %y) #0 {
; GFX8-NEXT: v_min_f16_e32 v0, v0, v1
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_minimumnum_f16_nan_no_ieee:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_min_f16_e32 v0, v0, v1
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_minimumnum_f16_nan_no_ieee:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_min_f16_e32 v0, v0, v1
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_minimumnum_f16_nan_no_ieee:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_min_f16_e32 v0, v0, v1
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimumnum_f16_nan_no_ieee:
; GFX10: ; %bb.0:
@@ -8126,11 +8587,17 @@ define float @v_minimumnum_f32_no_ieee(float %x, float %y) #0 {
; GFX8-NEXT: v_min_f32_e32 v0, v0, v1
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_minimumnum_f32_no_ieee:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_min_f32_e32 v0, v0, v1
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_minimumnum_f32_no_ieee:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_min_f32_e32 v0, v0, v1
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_minimumnum_f32_no_ieee:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_min_f32_e32 v0, v0, v1
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimumnum_f32_no_ieee:
; GFX10: ; %bb.0:
@@ -8184,11 +8651,17 @@ define float @v_minimumnum_f32_nnan_no_ieee(float %x, float %y) #0 {
; GFX8-NEXT: v_min_f32_e32 v0, v0, v1
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_minimumnum_f32_nnan_no_ieee:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_min_f32_e32 v0, v0, v1
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_minimumnum_f32_nnan_no_ieee:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_min_f32_e32 v0, v0, v1
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_minimumnum_f32_nnan_no_ieee:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_min_f32_e32 v0, v0, v1
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimumnum_f32_nnan_no_ieee:
; GFX10: ; %bb.0:
@@ -8228,11 +8701,17 @@ define double @v_minimumnum_f64_no_ieee(double %x, double %y) #0 {
; GFX8-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3]
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_minimumnum_f64_no_ieee:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3]
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_minimumnum_f64_no_ieee:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_minimumnum_f64_no_ieee:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimumnum_f64_no_ieee:
; GFX10: ; %bb.0:
@@ -8288,11 +8767,17 @@ define double @v_minimumnum_f64_nnan_no_ieee(double %x, double %y) #0 {
; GFX8-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3]
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_minimumnum_f64_nnan_no_ieee:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3]
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_minimumnum_f64_nnan_no_ieee:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_minimumnum_f64_nnan_no_ieee:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimumnum_f64_nnan_no_ieee:
; GFX10: ; %bb.0:
@@ -8364,11 +8849,17 @@ define <2 x half> @v_minimumnum_v2f16_no_ieee(<2 x half> %x, <2 x half> %y) #0 {
; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v2, v0
; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_minimumnum_v2f16_no_ieee:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_pk_min_f16 v0, v0, v1
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_minimumnum_v2f16_no_ieee:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_pk_min_f16 v0, v0, v1
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_minimumnum_v2f16_no_ieee:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_pk_min_f16 v0, v0, v1
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimumnum_v2f16_no_ieee:
; GFX10: ; %bb.0:
@@ -8456,11 +8947,17 @@ define <2 x half> @v_minimumnum_v2f16_nnan_no_ieee(<2 x half> %x, <2 x half> %y)
; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v2, v0
; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_minimumnum_v2f16_nnan_no_ieee:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_pk_min_f16 v0, v0, v1
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_minimumnum_v2f16_nnan_no_ieee:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_pk_min_f16 v0, v0, v1
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_minimumnum_v2f16_nnan_no_ieee:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_pk_min_f16 v0, v0, v1
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimumnum_v2f16_nnan_no_ieee:
; GFX10: ; %bb.0:
@@ -8543,19 +9040,33 @@ define <3 x half> @v_minimumnum_v3f16_nnan_no_ieee(<3 x half> %x, <3 x half> %y)
; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v4, v0
; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-SDAG-LABEL: v_minimumnum_v3f16_nnan_no_ieee:
-; GFX9-SDAG: ; %bb.0:
-; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-SDAG-NEXT: v_pk_min_f16 v1, v1, v3
-; GFX9-SDAG-NEXT: v_pk_min_f16 v0, v0, v2
-; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
+; GFX900-SDAG-LABEL: v_minimumnum_v3f16_nnan_no_ieee:
+; GFX900-SDAG: ; %bb.0:
+; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-SDAG-NEXT: v_pk_min_f16 v1, v1, v3
+; GFX900-SDAG-NEXT: v_pk_min_f16 v0, v0, v2
+; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-GISEL-LABEL: v_minimumnum_v3f16_nnan_no_ieee:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: v_pk_min_f16 v0, v0, v2
+; GFX900-GISEL-NEXT: v_pk_min_f16 v1, v1, v3
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: v_minimumnum_v3f16_nnan_no_ieee:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: v_pk_min_f16 v1, v1, v3
+; GFX950-SDAG-NEXT: v_pk_min_f16 v0, v0, v2
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-GISEL-LABEL: v_minimumnum_v3f16_nnan_no_ieee:
-; GFX9-GISEL: ; %bb.0:
-; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-GISEL-NEXT: v_pk_min_f16 v0, v0, v2
-; GFX9-GISEL-NEXT: v_pk_min_f16 v1, v1, v3
-; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
+; GFX950-GISEL-LABEL: v_minimumnum_v3f16_nnan_no_ieee:
+; GFX950-GISEL: ; %bb.0:
+; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-GISEL-NEXT: v_pk_min_f16 v0, v0, v2
+; GFX950-GISEL-NEXT: v_pk_min_f16 v1, v1, v3
+; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimumnum_v3f16_nnan_no_ieee:
; GFX10: ; %bb.0:
@@ -8654,12 +9165,19 @@ define <4 x half> @v_minimumnum_v4f16_nnan_no_ieee(<4 x half> %x, <4 x half> %y)
; GFX8-GISEL-NEXT: v_or_b32_e32 v1, v2, v1
; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_minimumnum_v4f16_nnan_no_ieee:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_pk_min_f16 v0, v0, v2
-; GFX9-NEXT: v_pk_min_f16 v1, v1, v3
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_minimumnum_v4f16_nnan_no_ieee:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_pk_min_f16 v0, v0, v2
+; GFX900-NEXT: v_pk_min_f16 v1, v1, v3
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_minimumnum_v4f16_nnan_no_ieee:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_pk_min_f16 v0, v0, v2
+; GFX950-NEXT: v_pk_min_f16 v1, v1, v3
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimumnum_v4f16_nnan_no_ieee:
; GFX10: ; %bb.0:
@@ -8690,6 +9208,3 @@ define <4 x half> @v_minimumnum_v4f16_nnan_no_ieee(<4 x half> %x, <4 x half> %y)
}
attributes #0 = { "amdgpu-ieee"="false" }
-;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
-; GFX900: {{.*}}
-; GFX950: {{.*}}
diff --git a/llvm/test/CodeGen/AMDGPU/mul.ll b/llvm/test/CodeGen/AMDGPU/mul.ll
index 7e3d5c97391e1..57e1b87a01063 100644
--- a/llvm/test/CodeGen/AMDGPU/mul.ll
+++ b/llvm/test/CodeGen/AMDGPU/mul.ll
@@ -3232,29 +3232,28 @@ define amdgpu_kernel void @s_mul_i128(ptr addrspace(1) %out, [8 x i32], i128 %a,
; GFX1250-NEXT: s_mov_b32 s2, s8
; GFX1250-NEXT: s_and_b64 s[4:5], s[12:13], s[4:5]
; GFX1250-NEXT: s_mov_b32 s6, s13
-; GFX1250-NEXT: s_mul_u64 s[10:11], s[10:11], s[12:13]
-; GFX1250-NEXT: s_mul_u64 s[12:13], s[4:5], s[2:3]
+; GFX1250-NEXT: s_mul_u64 s[22:23], s[4:5], s[2:3]
+; GFX1250-NEXT: s_mul_u64 s[24:25], s[6:7], s[2:3]
+; GFX1250-NEXT: s_mov_b32 s2, s23
; GFX1250-NEXT: s_mov_b32 s16, s9
; GFX1250-NEXT: s_mul_u64 s[8:9], s[8:9], s[14:15]
-; GFX1250-NEXT: s_mul_u64 s[14:15], s[6:7], s[2:3]
-; GFX1250-NEXT: s_mov_b32 s2, s13
+; GFX1250-NEXT: s_add_nc_u64 s[14:15], s[24:25], s[2:3]
; GFX1250-NEXT: s_mul_u64 s[4:5], s[4:5], s[16:17]
-; GFX1250-NEXT: s_add_nc_u64 s[14:15], s[14:15], s[2:3]
-; GFX1250-NEXT: s_mul_u64 s[6:7], s[6:7], s[16:17]
; GFX1250-NEXT: s_mov_b32 s2, s15
; GFX1250-NEXT: s_mov_b32 s15, s3
-; GFX1250-NEXT: s_mov_b32 s13, s3
+; GFX1250-NEXT: s_mul_u64 s[10:11], s[10:11], s[12:13]
; GFX1250-NEXT: s_add_nc_u64 s[4:5], s[4:5], s[14:15]
-; GFX1250-NEXT: s_add_nc_u64 s[8:9], s[10:11], s[8:9]
+; GFX1250-NEXT: s_mul_u64 s[6:7], s[6:7], s[16:17]
; GFX1250-NEXT: s_mov_b32 s18, s5
-; GFX1250-NEXT: s_mov_b32 s21, s4
+; GFX1250-NEXT: s_mov_b32 s23, s3
; GFX1250-NEXT: s_add_nc_u64 s[2:3], s[2:3], s[18:19]
-; GFX1250-NEXT: s_or_b64 s[4:5], s[12:13], s[20:21]
+; GFX1250-NEXT: s_mov_b32 s21, s4
; GFX1250-NEXT: s_add_nc_u64 s[2:3], s[6:7], s[2:3]
-; GFX1250-NEXT: v_dual_mov_b32 v0, s4 :: v_dual_mov_b32 v1, s5
-; GFX1250-NEXT: s_add_nc_u64 s[2:3], s[2:3], s[8:9]
-; GFX1250-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
-; GFX1250-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3
+; GFX1250-NEXT: s_add_nc_u64 s[4:5], s[10:11], s[8:9]
+; GFX1250-NEXT: s_or_b64 s[6:7], s[22:23], s[20:21]
+; GFX1250-NEXT: s_add_nc_u64 s[2:3], s[2:3], s[4:5]
+; GFX1250-NEXT: v_mov_b64_e32 v[0:1], s[6:7]
+; GFX1250-NEXT: v_mov_b64_e32 v[2:3], s[2:3]
; GFX1250-NEXT: s_mov_b32 s3, 0x31016000
; GFX1250-NEXT: s_mov_b32 s2, -1
; GFX1250-NEXT: buffer_store_b128 v[0:3], off, s[0:3], null
diff --git a/llvm/test/CodeGen/AMDGPU/no-fold-accvgpr-mov.ll b/llvm/test/CodeGen/AMDGPU/no-fold-accvgpr-mov.ll
index cf244f0b1f884..cba06c17b51ef 100644
--- a/llvm/test/CodeGen/AMDGPU/no-fold-accvgpr-mov.ll
+++ b/llvm/test/CodeGen/AMDGPU/no-fold-accvgpr-mov.ll
@@ -7,34 +7,32 @@ define amdgpu_kernel void @matmul_kernel(i32 %a0, i32 %a1) {
; GFX942: ; %bb.0: ; %entry
; GFX942-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
; GFX942-NEXT: v_accvgpr_write_b32 a2, 0
-; GFX942-NEXT: s_mov_b32 s2, 0
+; GFX942-NEXT: s_mov_b32 s4, 0
; GFX942-NEXT: v_accvgpr_write_b32 a1, 0
-; GFX942-NEXT: s_mov_b32 s3, 0
; GFX942-NEXT: s_waitcnt lgkmcnt(0)
; GFX942-NEXT: s_cmp_lg_u32 s0, 0
; GFX942-NEXT: s_cselect_b64 s[0:1], -1, 0
; GFX942-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1]
; GFX942-NEXT: v_cmp_ne_u32_e64 s[0:1], 1, v0
+; GFX942-NEXT: v_mov_b64_e32 v[0:1], 0
; GFX942-NEXT: s_branch .LBB0_2
; GFX942-NEXT: .LBB0_1: ; %bb2
; GFX942-NEXT: ; in Loop: Header=BB0_2 Depth=1
-; GFX942-NEXT: s_or_b32 s4, s3, 1
-; GFX942-NEXT: s_ashr_i32 s5, s3, 31
-; GFX942-NEXT: s_mov_b32 s3, s2
-; GFX942-NEXT: v_mov_b64_e32 v[2:3], s[2:3]
+; GFX942-NEXT: s_nop 2
; GFX942-NEXT: v_accvgpr_mov_b32 a0, a2
; GFX942-NEXT: v_accvgpr_mov_b32 a2, a1
; GFX942-NEXT: v_accvgpr_mov_b32 a3, a1
-; GFX942-NEXT: s_and_b32 s3, s5, s4
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mfma_f32_16x16x16_f16 a[2:5], v[2:3], v[2:3], a[0:3]
+; GFX942-NEXT: s_or_b32 s2, s4, 1
+; GFX942-NEXT: s_ashr_i32 s3, s4, 31
+; GFX942-NEXT: v_mfma_f32_16x16x16_f16 a[2:5], v[0:1], v[0:1], a[0:3]
+; GFX942-NEXT: s_and_b32 s4, s3, s2
; GFX942-NEXT: s_cbranch_execz .LBB0_4
; GFX942-NEXT: .LBB0_2: ; %bb
; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX942-NEXT: s_and_b64 vcc, exec, s[0:1]
; GFX942-NEXT: s_cbranch_vccz .LBB0_1
; GFX942-NEXT: ; %bb.3:
-; GFX942-NEXT: ; implicit-def: $sgpr3
+; GFX942-NEXT: ; implicit-def: $sgpr4
; GFX942-NEXT: ; implicit-def: $agpr2
; GFX942-NEXT: .LBB0_4: ; %common.ret
; GFX942-NEXT: s_endpgm
diff --git a/llvm/test/CodeGen/AMDGPU/packed-fp32.ll b/llvm/test/CodeGen/AMDGPU/packed-fp32.ll
index 9f27e1ffd9130..788fe0474738e 100644
--- a/llvm/test/CodeGen/AMDGPU/packed-fp32.ll
+++ b/llvm/test/CodeGen/AMDGPU/packed-fp32.ll
@@ -715,18 +715,43 @@ define amdgpu_kernel void @fadd_v2_v_lit_hi0(ptr addrspace(1) %a) {
; GFX900-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
; GFX900-NEXT: s_endpgm
;
-; PACKED-LABEL: fadd_v2_v_lit_hi0:
-; PACKED: ; %bb.0:
-; PACKED-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; PACKED-NEXT: v_and_b32_e32 v0, 0x3ff, v0
-; PACKED-NEXT: v_lshlrev_b32_e32 v2, 3, v0
-; PACKED-NEXT: s_mov_b64 s[2:3], 0x3f800000
-; PACKED-NEXT: s_waitcnt lgkmcnt(0)
-; PACKED-NEXT: global_load_dwordx2 v[0:1], v2, s[0:1]
-; PACKED-NEXT: s_waitcnt vmcnt(0)
-; PACKED-NEXT: v_pk_add_f32 v[0:1], v[0:1], s[2:3]
-; PACKED-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
-; PACKED-NEXT: s_endpgm
+; GFX90A-SDAG-LABEL: fadd_v2_v_lit_hi0:
+; GFX90A-SDAG: ; %bb.0:
+; GFX90A-SDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX90A-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0
+; GFX90A-SDAG-NEXT: v_lshlrev_b32_e32 v2, 3, v0
+; GFX90A-SDAG-NEXT: s_mov_b64 s[2:3], 0x3f800000
+; GFX90A-SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; GFX90A-SDAG-NEXT: global_load_dwordx2 v[0:1], v2, s[0:1]
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT: v_pk_add_f32 v[0:1], v[0:1], s[2:3]
+; GFX90A-SDAG-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
+; GFX90A-SDAG-NEXT: s_endpgm
+;
+; PACKED-GISEL-LABEL: fadd_v2_v_lit_hi0:
+; PACKED-GISEL: ; %bb.0:
+; PACKED-GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; PACKED-GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0
+; PACKED-GISEL-NEXT: v_lshlrev_b32_e32 v2, 3, v0
+; PACKED-GISEL-NEXT: s_mov_b64 s[2:3], 0x3f800000
+; PACKED-GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; PACKED-GISEL-NEXT: global_load_dwordx2 v[0:1], v2, s[0:1]
+; PACKED-GISEL-NEXT: s_waitcnt vmcnt(0)
+; PACKED-GISEL-NEXT: v_pk_add_f32 v[0:1], v[0:1], s[2:3]
+; PACKED-GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
+; PACKED-GISEL-NEXT: s_endpgm
+;
+; GFX942-SDAG-LABEL: fadd_v2_v_lit_hi0:
+; GFX942-SDAG: ; %bb.0:
+; GFX942-SDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX942-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0
+; GFX942-SDAG-NEXT: v_lshlrev_b32_e32 v2, 3, v0
+; GFX942-SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; GFX942-SDAG-NEXT: global_load_dwordx2 v[0:1], v2, s[0:1]
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT: v_pk_add_f32 v[0:1], v[0:1], 1.0
+; GFX942-SDAG-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
+; GFX942-SDAG-NEXT: s_endpgm
;
; GFX1250-SDAG-LABEL: fadd_v2_v_lit_hi0:
; GFX1250-SDAG: ; %bb.0:
@@ -3780,6 +3805,3 @@ declare i32 @llvm.amdgcn.workitem.id.x()
declare <2 x float> @llvm.fma.v2f32(<2 x float>, <2 x float>, <2 x float>)
declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>)
declare <32 x float> @llvm.fma.v32f32(<32 x float>, <32 x float>, <32 x float>)
-;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
-; GFX90A-SDAG: {{.*}}
-; GFX942-SDAG: {{.*}}
diff --git a/llvm/test/CodeGen/AMDGPU/preload-kernargs.ll b/llvm/test/CodeGen/AMDGPU/preload-kernargs.ll
index 4d367ef7ffd9d..a0b49f47dac8c 100644
--- a/llvm/test/CodeGen/AMDGPU/preload-kernargs.ll
+++ b/llvm/test/CodeGen/AMDGPU/preload-kernargs.ll
@@ -1215,10 +1215,8 @@ define amdgpu_kernel void @fp128_kernel_preload_arg(ptr addrspace(1) inreg %out,
; GFX942-NEXT: ; %bb.2:
; GFX942-NEXT: .LBB25_0:
; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, s6
-; GFX942-NEXT: v_mov_b32_e32 v1, s7
-; GFX942-NEXT: v_mov_b32_e32 v2, s8
-; GFX942-NEXT: v_mov_b32_e32 v3, s9
+; GFX942-NEXT: v_mov_b64_e32 v[0:1], s[6:7]
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], s[8:9]
; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[2:3]
; GFX942-NEXT: s_endpgm
;
@@ -1240,9 +1238,9 @@ define amdgpu_kernel void @fp128_kernel_preload_arg(ptr addrspace(1) inreg %out,
;
; GFX1250-LABEL: fp128_kernel_preload_arg:
; GFX1250: ; %bb.0:
-; GFX1250-NEXT: v_dual_mov_b32 v4, 0 :: v_dual_mov_b32 v0, s6
-; GFX1250-NEXT: v_dual_mov_b32 v1, s7 :: v_dual_mov_b32 v2, s8
-; GFX1250-NEXT: v_mov_b32_e32 v3, s9
+; GFX1250-NEXT: v_mov_b64_e32 v[0:1], s[6:7]
+; GFX1250-NEXT: v_mov_b64_e32 v[2:3], s[8:9]
+; GFX1250-NEXT: v_mov_b32_e32 v4, 0
; GFX1250-NEXT: global_store_b128 v4, v[0:3], s[2:3]
; GFX1250-NEXT: s_endpgm
store fp128 %in, ptr addrspace(1) %out
diff --git a/llvm/test/CodeGen/AMDGPU/rewrite-vgpr-mfma-to-agpr.ll b/llvm/test/CodeGen/AMDGPU/rewrite-vgpr-mfma-to-agpr.ll
index b9e9893ede4e2..8c7fe50d4dade 100644
--- a/llvm/test/CodeGen/AMDGPU/rewrite-vgpr-mfma-to-agpr.ll
+++ b/llvm/test/CodeGen/AMDGPU/rewrite-vgpr-mfma-to-agpr.ll
@@ -367,9 +367,7 @@ bb:
define amdgpu_kernel void @illegal_mfma_after_rewrite() #1 {
; CHECK-LABEL: illegal_mfma_after_rewrite:
; CHECK: ; %bb.0: ; %entry
-; CHECK-NEXT: s_mov_b32 s0, 0
-; CHECK-NEXT: s_mov_b32 s1, s0
-; CHECK-NEXT: v_mov_b64_e32 v[8:9], s[0:1]
+; CHECK-NEXT: v_mov_b64_e32 v[8:9], 0
; CHECK-NEXT: ;;#ASMSTART
; CHECK-NEXT: ; def s[0:3]
; CHECK-NEXT: ;;#ASMEND
@@ -402,49 +400,48 @@ define amdgpu_kernel void @illegal_mfma_after_rewrite() #1 {
; CHECK-NEXT: ;;#ASMEND
; CHECK-NEXT: s_nop 0
; CHECK-NEXT: v_mfma_f32_16x16x16_f16 v[0:3], v[8:9], v[12:13], v[4:7]
+; CHECK-NEXT: s_nop 2
+; CHECK-NEXT: v_cvt_f16_f32_e32 v14, v14
+; CHECK-NEXT: global_store_short v[8:9], v14, off
; CHECK-NEXT: v_mfma_f32_16x16x16_f16 v[26:29], v[8:9], v[8:9], v[4:7]
+; CHECK-NEXT: buffer_wbl2 sc0 sc1
+; CHECK-NEXT: s_waitcnt vmcnt(0)
+; CHECK-NEXT: buffer_inv sc0 sc1
; CHECK-NEXT: v_mfma_f32_16x16x16_f16 v[0:3], v[8:9], v[8:9], v[0:3]
-; CHECK-NEXT: v_mfma_f32_16x16x16_f16 v[22:25], v[8:9], v[8:9], v[22:25]
; CHECK-NEXT: v_mfma_f32_16x16x16_f16 v[4:7], v[8:9], v[8:9], v[26:29]
-; CHECK-NEXT: s_nop 5
-; CHECK-NEXT: v_cvt_f16_f32_e32 v23, v14
-; CHECK-NEXT: v_mfma_f32_16x16x16_f16 v[14:17], v[8:9], v[8:9], v[18:21]
+; CHECK-NEXT: v_mfma_f32_16x16x16_f16 v[22:25], v[8:9], v[8:9], v[22:25]
; CHECK-NEXT: v_mfma_f32_16x16x16_f16 v[0:3], v[12:13], v[8:9], v[0:3]
-; CHECK-NEXT: s_nop 1
-; CHECK-NEXT: v_accvgpr_read_b32 v19, a3
-; CHECK-NEXT: v_accvgpr_read_b32 v18, a2
-; CHECK-NEXT: v_mov_b64_e32 v[20:21], 0
-; CHECK-NEXT: s_nop 0
-; CHECK-NEXT: v_accvgpr_read_b32 v17, a1
-; CHECK-NEXT: v_accvgpr_read_b32 v16, a0
-; CHECK-NEXT: v_cvt_f16_f32_e32 v15, v22
-; CHECK-NEXT: v_cvt_f16_f32_e32 v14, v14
-; CHECK-NEXT: v_mfma_f32_16x16x16_f16 v[16:19], v[8:9], v[8:9], v[16:19]
+; CHECK-NEXT: v_mfma_f32_16x16x16_f16 v[16:19], v[8:9], v[8:9], v[18:21]
+; CHECK-NEXT: s_nop 4
+; CHECK-NEXT: v_cvt_f16_f32_e32 v14, v22
+; CHECK-NEXT: global_store_short v[8:9], v14, off
+; CHECK-NEXT: v_accvgpr_read_b32 v21, a3
+; CHECK-NEXT: v_accvgpr_read_b32 v20, a2
+; CHECK-NEXT: v_accvgpr_read_b32 v19, a1
+; CHECK-NEXT: v_accvgpr_read_b32 v18, a0
; CHECK-NEXT: v_cvt_f16_f32_e32 v12, v0
-; CHECK-NEXT: global_store_short v[20:21], v23, off
-; CHECK-NEXT: buffer_wbl2 sc0 sc1
-; CHECK-NEXT: s_waitcnt vmcnt(0)
-; CHECK-NEXT: buffer_inv sc0 sc1
; CHECK-NEXT: v_mfma_f32_16x16x16_f16 v[0:3], v[10:11], v[8:9], v[4:7]
-; CHECK-NEXT: global_store_short v[20:21], v15, off
+; CHECK-NEXT: v_cvt_f16_f32_e32 v15, v16
; CHECK-NEXT: buffer_wbl2 sc0 sc1
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: buffer_inv sc0 sc1
-; CHECK-NEXT: global_store_short v[20:21], v14, off
-; CHECK-NEXT: v_cvt_f16_f32_e32 v14, v16
+; CHECK-NEXT: global_store_short v[8:9], v15, off
+; CHECK-NEXT: v_mfma_f32_16x16x16_f16 v[18:21], v[8:9], v[8:9], v[18:21]
; CHECK-NEXT: buffer_wbl2 sc0 sc1
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: buffer_inv sc0 sc1
-; CHECK-NEXT: global_store_short v[20:21], v14, off
; CHECK-NEXT: v_cvt_f16_f32_e32 v0, v0
+; CHECK-NEXT: s_nop 2
+; CHECK-NEXT: v_cvt_f16_f32_e32 v14, v18
+; CHECK-NEXT: global_store_short v[8:9], v14, off
; CHECK-NEXT: buffer_wbl2 sc0 sc1
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: buffer_inv sc0 sc1
-; CHECK-NEXT: global_store_short v[20:21], v12, off
+; CHECK-NEXT: global_store_short v[8:9], v12, off
; CHECK-NEXT: buffer_wbl2 sc0 sc1
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: buffer_inv sc0 sc1
-; CHECK-NEXT: global_store_short v[20:21], v0, off
+; CHECK-NEXT: global_store_short v[8:9], v0, off
; CHECK-NEXT: s_endpgm
entry:
%k0 = call <4 x float> asm sideeffect "; def $0", "=s"()
diff --git a/llvm/test/CodeGen/AMDGPU/sgpr-to-vreg1-copy.ll b/llvm/test/CodeGen/AMDGPU/sgpr-to-vreg1-copy.ll
index 192bd2073886a..5a7f7fb00f04e 100644
--- a/llvm/test/CodeGen/AMDGPU/sgpr-to-vreg1-copy.ll
+++ b/llvm/test/CodeGen/AMDGPU/sgpr-to-vreg1-copy.ll
@@ -18,12 +18,11 @@ define amdgpu_kernel void @copy_to_vreg_1(i32 %0) {
; GCN-NEXT: s_addc_u32 s0, 1, 0
; GCN-NEXT: v_readfirstlane_b32 s2, v1
; GCN-NEXT: s_cmp_ge_u32 s3, s4
-; GCN-NEXT: s_cselect_b32 s4, s0, s2
; GCN-NEXT: v_mov_b32_e32 v1, 0
-; GCN-NEXT: s_cmp_lg_u64 0, 0
+; GCN-NEXT: s_cselect_b32 s4, s0, s2
; GCN-NEXT: s_mov_b64 s[0:1], 0
; GCN-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[0:1]
-; GCN-NEXT: s_cselect_b64 s[2:3], -1, 0
+; GCN-NEXT: s_mov_b64 s[2:3], 0
; GCN-NEXT: s_branch .LBB0_3
; GCN-NEXT: .LBB0_1: ; %Flow
; GCN-NEXT: ; in Loop: Header=BB0_3 Depth=1
@@ -33,9 +32,9 @@ define amdgpu_kernel void @copy_to_vreg_1(i32 %0) {
; GCN-NEXT: ; in Loop: Header=BB0_3 Depth=1
; GCN-NEXT: s_or_b64 exec, exec, s[4:5]
; GCN-NEXT: s_and_b64 s[4:5], exec, s[8:9]
-; GCN-NEXT: s_or_b64 s[0:1], s[4:5], s[0:1]
+; GCN-NEXT: s_or_b64 s[2:3], s[4:5], s[2:3]
; GCN-NEXT: s_mov_b32 s4, 0
-; GCN-NEXT: s_andn2_b64 exec, exec, s[0:1]
+; GCN-NEXT: s_andn2_b64 exec, exec, s[2:3]
; GCN-NEXT: s_cbranch_execz .LBB0_8
; GCN-NEXT: .LBB0_3: ; %.lr.ph27
; GCN-NEXT: ; =>This Inner Loop Header: Depth=1
@@ -57,7 +56,7 @@ define amdgpu_kernel void @copy_to_vreg_1(i32 %0) {
; GCN-NEXT: s_cbranch_execz .LBB0_2
; GCN-NEXT: ; %bb.6: ; %pred.store.continue
; GCN-NEXT: ; in Loop: Header=BB0_3 Depth=1
-; GCN-NEXT: s_and_saveexec_b64 s[6:7], s[2:3]
+; GCN-NEXT: s_and_saveexec_b64 s[6:7], s[0:1]
; GCN-NEXT: s_xor_b64 s[6:7], exec, s[6:7]
; GCN-NEXT: s_cbranch_execz .LBB0_1
; GCN-NEXT: ; %bb.7: ; %pred.store.if41
diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v2i64.v2i64.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v2i64.v2i64.ll
index 51dc9a51ec9d0..3fd7f1cb481a4 100644
--- a/llvm/test/CodeGen/AMDGPU/shufflevector.v2i64.v2i64.ll
+++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v2i64.v2i64.ll
@@ -88,8 +88,7 @@ define void @v_shuffle_v2i64_v2i64__1_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
+; GFX942-NEXT: v_mov_b64_e32 v[0:1], v[2:3]
; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -144,8 +143,7 @@ define void @v_shuffle_v2i64_v2i64__3_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
+; GFX942-NEXT: v_mov_b64_e32 v[0:1], v[2:3]
; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -204,10 +202,8 @@ define void @v_shuffle_v2i64_v2i64__3_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[4:5]
+; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[0:1]
; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -262,8 +258,7 @@ define void @v_shuffle_v2i64_v2i64__3_1(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
+; GFX942-NEXT: v_mov_b64_e32 v[0:1], v[6:7]
; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -309,8 +304,7 @@ define void @v_shuffle_v2i64_v2i64__3_2(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[0:1]
; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -355,8 +349,7 @@ define void @v_shuffle_v2i64_v2i64__3_3(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
+; GFX942-NEXT: v_mov_b64_e32 v[0:1], v[2:3]
; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -401,8 +394,7 @@ define void @v_shuffle_v2i64_v2i64__u_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: v_mov_b32_e32 v3, v1
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[0:1]
; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -446,8 +438,7 @@ define void @v_shuffle_v2i64_v2i64__0_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: v_mov_b32_e32 v3, v1
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[0:1]
; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -492,8 +483,7 @@ define void @v_shuffle_v2i64_v2i64__1_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[0:1]
; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -537,8 +527,7 @@ define void @v_shuffle_v2i64_v2i64__2_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: v_mov_b32_e32 v3, v1
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[0:1]
; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -660,8 +649,7 @@ define void @v_shuffle_v2i64_v2i64__1_1(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
+; GFX942-NEXT: v_mov_b64_e32 v[0:1], v[2:3]
; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -794,8 +782,7 @@ define void @v_shuffle_v2i64_v2i64__1_2(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
+; GFX942-NEXT: v_mov_b64_e32 v[0:1], v[2:3]
; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -900,8 +887,7 @@ define void @v_shuffle_v2i64_v2i64__0_3(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[4:5]
; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -956,8 +942,7 @@ define void @v_shuffle_v2i64_v2i64__1_3(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v2
-; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[2:3]
; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -1095,8 +1080,7 @@ define void @s_shuffle_v2i64_v2i64__1_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -1154,8 +1138,7 @@ define void @s_shuffle_v2i64_v2i64__3_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -1213,10 +1196,8 @@ define void @s_shuffle_v2i64_v2i64__3_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -1270,8 +1251,7 @@ define void @s_shuffle_v2i64_v2i64__3_1() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -1320,10 +1300,8 @@ define void @s_shuffle_v2i64_v2i64__3_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -1336,18 +1314,43 @@ define void @s_shuffle_v2i64_v2i64__3_2() {
}
define void @s_shuffle_v2i64_v2i64__3_3() {
-; GFX9-LABEL: s_shuffle_v2i64_v2i64__3_3:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s10
-; GFX9-NEXT: s_mov_b32 s9, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v2i64_v2i64__3_3:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2i64_v2i64__3_3:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2i64_v2i64__3_3:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=s"()
%vec1 = call <2 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <2 x i32> <i32 3, i32 3>
@@ -1388,8 +1391,7 @@ define void @s_shuffle_v2i64_v2i64__u_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -1401,18 +1403,43 @@ define void @s_shuffle_v2i64_v2i64__u_0() {
}
define void @s_shuffle_v2i64_v2i64__0_0() {
-; GFX9-LABEL: s_shuffle_v2i64_v2i64__0_0:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s8
-; GFX9-NEXT: s_mov_b32 s11, s9
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v2i64_v2i64__0_0:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s8
+; GFX900-NEXT: s_mov_b32 s11, s9
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2i64_v2i64__0_0:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s8
+; GFX90A-NEXT: s_mov_b32 s11, s9
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2i64_v2i64__0_0:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[8:9]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <2 x i64> %vec0, <2 x i64> poison, <2 x i32> zeroinitializer
call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf)
@@ -1456,10 +1483,8 @@ define void @s_shuffle_v2i64_v2i64__1_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -1503,8 +1528,7 @@ define void @s_shuffle_v2i64_v2i64__2_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -1596,18 +1620,43 @@ define void @s_shuffle_v2i64_v2i64__0_1() {
}
define void @s_shuffle_v2i64_v2i64__1_1() {
-; GFX9-LABEL: s_shuffle_v2i64_v2i64__1_1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s10
-; GFX9-NEXT: s_mov_b32 s9, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v2i64_v2i64__1_1:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2i64_v2i64__1_1:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2i64_v2i64__1_1:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <2 x i64> %vec0, <2 x i64> poison, <2 x i32> <i32 1, i32 1>
call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf)
@@ -1741,8 +1790,7 @@ define void @s_shuffle_v2i64_v2i64__1_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -1850,8 +1898,7 @@ define void @s_shuffle_v2i64_v2i64__0_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s2
-; GFX942-NEXT: s_mov_b32 s11, s3
+; GFX942-NEXT: s_mov_b64 s[10:11], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -1905,8 +1952,7 @@ define void @s_shuffle_v2i64_v2i64__1_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v2i64.v3i64.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v2i64.v3i64.ll
index bc8a56a30d8f9..f54d45b1367cc 100644
--- a/llvm/test/CodeGen/AMDGPU/shufflevector.v2i64.v3i64.ll
+++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v2i64.v3i64.ll
@@ -127,8 +127,7 @@ define void @v_shuffle_v2i64_v3i64__2_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v4
-; GFX942-NEXT: v_mov_b32_e32 v1, v5
+; GFX942-NEXT: v_mov_b64_e32 v[0:1], v[4:5]
; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -223,8 +222,7 @@ define void @v_shuffle_v2i64_v3i64__5_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v4
-; GFX942-NEXT: v_mov_b32_e32 v1, v5
+; GFX942-NEXT: v_mov_b64_e32 v[0:1], v[4:5]
; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -283,10 +281,8 @@ define void @v_shuffle_v2i64_v3i64__5_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[2:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v6
-; GFX942-NEXT: v_mov_b32_e32 v3, v7
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[6:7]
+; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[0:1]
; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -341,8 +337,7 @@ define void @v_shuffle_v2i64_v3i64__5_1(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[4:9]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v8
-; GFX942-NEXT: v_mov_b32_e32 v1, v9
+; GFX942-NEXT: v_mov_b64_e32 v[0:1], v[8:9]
; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -397,8 +392,7 @@ define void @v_shuffle_v2i64_v3i64__5_2(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[6:11]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v10
-; GFX942-NEXT: v_mov_b32_e32 v3, v11
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[10:11]
; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -447,10 +441,8 @@ define void @v_shuffle_v2i64_v3i64__5_3(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[4:5]
+; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[0:1]
; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -495,8 +487,7 @@ define void @v_shuffle_v2i64_v3i64__5_4(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v4
-; GFX942-NEXT: v_mov_b32_e32 v1, v5
+; GFX942-NEXT: v_mov_b64_e32 v[0:1], v[4:5]
; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -541,8 +532,7 @@ define void @v_shuffle_v2i64_v3i64__5_5(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[4:5]
; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -587,8 +577,7 @@ define void @v_shuffle_v2i64_v3i64__u_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: v_mov_b32_e32 v3, v1
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[0:1]
; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -632,8 +621,7 @@ define void @v_shuffle_v2i64_v3i64__0_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: v_mov_b32_e32 v3, v1
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[0:1]
; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -677,8 +665,7 @@ define void @v_shuffle_v2i64_v3i64__1_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[0:1]
; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -726,10 +713,8 @@ define void @v_shuffle_v2i64_v3i64__2_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[4:5]
+; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[0:1]
; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -773,8 +758,7 @@ define void @v_shuffle_v2i64_v3i64__3_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: v_mov_b32_e32 v3, v1
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[0:1]
; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -828,8 +812,7 @@ define void @v_shuffle_v2i64_v3i64__4_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[2:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v0
-; GFX942-NEXT: v_mov_b32_e32 v7, v1
+; GFX942-NEXT: v_mov_b64_e32 v[6:7], v[0:1]
; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -952,8 +935,7 @@ define void @v_shuffle_v2i64_v3i64__1_1(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v2
-; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[2:3]
; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -997,8 +979,7 @@ define void @v_shuffle_v2i64_v3i64__2_1(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v4
-; GFX942-NEXT: v_mov_b32_e32 v1, v5
+; GFX942-NEXT: v_mov_b64_e32 v[0:1], v[4:5]
; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -1091,8 +1072,7 @@ define void @v_shuffle_v2i64_v3i64__4_1(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[4:9]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v8, v2
-; GFX942-NEXT: v_mov_b32_e32 v9, v3
+; GFX942-NEXT: v_mov_b64_e32 v[8:9], v[2:3]
; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -1176,8 +1156,7 @@ define void @v_shuffle_v2i64_v3i64__0_2(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[4:5]
; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -1260,8 +1239,7 @@ define void @v_shuffle_v2i64_v3i64__2_2(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[4:5]
; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -1354,8 +1332,7 @@ define void @v_shuffle_v2i64_v3i64__4_2(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v10, v4
-; GFX942-NEXT: v_mov_b32_e32 v11, v5
+; GFX942-NEXT: v_mov_b64_e32 v[10:11], v[4:5]
; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -1489,8 +1466,7 @@ define void @v_shuffle_v2i64_v3i64__2_3(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v4
-; GFX942-NEXT: v_mov_b32_e32 v1, v5
+; GFX942-NEXT: v_mov_b64_e32 v[0:1], v[4:5]
; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -1545,8 +1521,7 @@ define void @v_shuffle_v2i64_v3i64__4_3(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[0:1]
; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -1641,8 +1616,7 @@ define void @v_shuffle_v2i64_v3i64__0_4(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[2:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[4:5]
; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -1697,8 +1671,7 @@ define void @v_shuffle_v2i64_v3i64__1_4(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[4:9]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v7
+; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[6:7]
; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -1753,8 +1726,7 @@ define void @v_shuffle_v2i64_v3i64__2_4(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v4
-; GFX942-NEXT: v_mov_b32_e32 v7, v5
+; GFX942-NEXT: v_mov_b64_e32 v[6:7], v[4:5]
; GFX942-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -1839,8 +1811,7 @@ define void @v_shuffle_v2i64_v3i64__4_4(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v2
-; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[2:3]
; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -1935,8 +1906,7 @@ define void @v_shuffle_v2i64_v3i64__0_5(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[2:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v6
-; GFX942-NEXT: v_mov_b32_e32 v3, v7
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[6:7]
; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -1991,8 +1961,7 @@ define void @v_shuffle_v2i64_v3i64__1_5(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[4:9]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v8
-; GFX942-NEXT: v_mov_b32_e32 v5, v9
+; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[8:9]
; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -2047,8 +2016,7 @@ define void @v_shuffle_v2i64_v3i64__2_5(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v8, v4
-; GFX942-NEXT: v_mov_b32_e32 v9, v5
+; GFX942-NEXT: v_mov_b64_e32 v[8:9], v[4:5]
; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -2093,8 +2061,7 @@ define void @v_shuffle_v2i64_v3i64__3_5(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[4:5]
; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -2232,8 +2199,7 @@ define void @s_shuffle_v2i64_v3i64__1_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -2273,8 +2239,7 @@ define void @s_shuffle_v2i64_v3i64__2_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -2332,8 +2297,7 @@ define void @s_shuffle_v2i64_v3i64__4_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -2374,8 +2338,7 @@ define void @s_shuffle_v2i64_v3i64__5_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -2430,11 +2393,11 @@ define void @s_shuffle_v2i64_v3i64__5_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s0
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -2484,8 +2447,7 @@ define void @s_shuffle_v2i64_v3i64__5_1() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -2539,10 +2501,8 @@ define void @s_shuffle_v2i64_v3i64__5_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -2587,10 +2547,8 @@ define void @s_shuffle_v2i64_v3i64__5_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -2603,18 +2561,43 @@ define void @s_shuffle_v2i64_v3i64__5_3() {
}
define void @s_shuffle_v2i64_v3i64__5_4() {
-; GFX9-LABEL: s_shuffle_v2i64_v3i64__5_4:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s12
-; GFX9-NEXT: s_mov_b32 s9, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v2i64_v3i64__5_4:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s12
+; GFX900-NEXT: s_mov_b32 s9, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2i64_v3i64__5_4:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s12
+; GFX90A-NEXT: s_mov_b32 s9, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2i64_v3i64__5_4:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=s"()
%vec1 = call <3 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <2 x i32> <i32 5, i32 4>
@@ -2659,10 +2642,8 @@ define void @s_shuffle_v2i64_v3i64__5_5() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -2707,8 +2688,7 @@ define void @s_shuffle_v2i64_v3i64__u_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -2720,18 +2700,43 @@ define void @s_shuffle_v2i64_v3i64__u_0() {
}
define void @s_shuffle_v2i64_v3i64__0_0() {
-; GFX9-LABEL: s_shuffle_v2i64_v3i64__0_0:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s8
-; GFX9-NEXT: s_mov_b32 s11, s9
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v2i64_v3i64__0_0:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s8
+; GFX900-NEXT: s_mov_b32 s11, s9
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2i64_v3i64__0_0:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s8
+; GFX90A-NEXT: s_mov_b32 s11, s9
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2i64_v3i64__0_0:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[8:9]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <2 x i32> zeroinitializer
call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf)
@@ -2775,10 +2780,8 @@ define void @s_shuffle_v2i64_v3i64__1_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -2822,10 +2825,8 @@ define void @s_shuffle_v2i64_v3i64__2_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -2869,8 +2870,7 @@ define void @s_shuffle_v2i64_v3i64__3_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -2924,13 +2924,11 @@ define void @s_shuffle_v2i64_v3i64__4_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s0
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -3023,18 +3021,43 @@ define void @s_shuffle_v2i64_v3i64__0_1() {
}
define void @s_shuffle_v2i64_v3i64__1_1() {
-; GFX9-LABEL: s_shuffle_v2i64_v3i64__1_1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s10
-; GFX9-NEXT: s_mov_b32 s9, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v2i64_v3i64__1_1:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2i64_v3i64__1_1:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2i64_v3i64__1_1:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <2 x i32> <i32 1, i32 1>
call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf)
@@ -3042,18 +3065,43 @@ define void @s_shuffle_v2i64_v3i64__1_1() {
}
define void @s_shuffle_v2i64_v3i64__2_1() {
-; GFX9-LABEL: s_shuffle_v2i64_v3i64__2_1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s12
-; GFX9-NEXT: s_mov_b32 s9, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v2i64_v3i64__2_1:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s12
+; GFX900-NEXT: s_mov_b32 s9, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2i64_v3i64__2_1:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s12
+; GFX90A-NEXT: s_mov_b32 s9, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2i64_v3i64__2_1:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <2 x i32> <i32 2, i32 1>
call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf)
@@ -3142,8 +3190,7 @@ define void @s_shuffle_v2i64_v3i64__4_1() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -3188,8 +3235,7 @@ define void @s_shuffle_v2i64_v3i64__u_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -3201,18 +3247,43 @@ define void @s_shuffle_v2i64_v3i64__u_2() {
}
define void @s_shuffle_v2i64_v3i64__0_2() {
-; GFX9-LABEL: s_shuffle_v2i64_v3i64__0_2:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v2i64_v3i64__0_2:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2i64_v3i64__0_2:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2i64_v3i64__0_2:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <2 x i32> <i32 0, i32 2>
call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf)
@@ -3256,10 +3327,8 @@ define void @s_shuffle_v2i64_v3i64__1_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -3307,10 +3376,8 @@ define void @s_shuffle_v2i64_v3i64__2_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -3354,8 +3421,7 @@ define void @s_shuffle_v2i64_v3i64__3_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -3412,10 +3478,8 @@ define void @s_shuffle_v2i64_v3i64__4_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -3514,8 +3578,7 @@ define void @s_shuffle_v2i64_v3i64__1_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -3555,8 +3618,7 @@ define void @s_shuffle_v2i64_v3i64__2_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -3618,10 +3680,8 @@ define void @s_shuffle_v2i64_v3i64__4_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -3716,8 +3776,7 @@ define void @s_shuffle_v2i64_v3i64__0_4() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s2
-; GFX942-NEXT: s_mov_b32 s11, s3
+; GFX942-NEXT: s_mov_b64 s[10:11], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -3771,8 +3830,7 @@ define void @s_shuffle_v2i64_v3i64__1_4() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -3826,8 +3884,7 @@ define void @s_shuffle_v2i64_v3i64__2_4() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -3881,18 +3938,43 @@ define void @s_shuffle_v2i64_v3i64__3_4() {
}
define void @s_shuffle_v2i64_v3i64__4_4() {
-; GFX9-LABEL: s_shuffle_v2i64_v3i64__4_4:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s10
-; GFX9-NEXT: s_mov_b32 s9, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v2i64_v3i64__4_4:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2i64_v3i64__4_4:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2i64_v3i64__4_4:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=s"()
%vec1 = call <3 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <2 x i32> <i32 4, i32 4>
@@ -3933,8 +4015,7 @@ define void @s_shuffle_v2i64_v3i64__u_5() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -3988,8 +4069,7 @@ define void @s_shuffle_v2i64_v3i64__0_5() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -4047,10 +4127,8 @@ define void @s_shuffle_v2i64_v3i64__1_5() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -4104,10 +4182,8 @@ define void @s_shuffle_v2i64_v3i64__2_5() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -4120,18 +4196,43 @@ define void @s_shuffle_v2i64_v3i64__2_5() {
}
define void @s_shuffle_v2i64_v3i64__3_5() {
-; GFX9-LABEL: s_shuffle_v2i64_v3i64__3_5:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v2i64_v3i64__3_5:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2i64_v3i64__3_5:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2i64_v3i64__3_5:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=s"()
%vec1 = call <3 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <2 x i32> <i32 3, i32 5>
@@ -4176,10 +4277,8 @@ define void @s_shuffle_v2i64_v3i64__4_5() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v2i64.v4i64.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v2i64.v4i64.ll
index dd42a1dd44320..1c738b8f4f1d9 100644
--- a/llvm/test/CodeGen/AMDGPU/shufflevector.v2i64.v4i64.ll
+++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v2i64.v4i64.ll
@@ -166,8 +166,7 @@ define void @v_shuffle_v2i64_v4i64__3_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
+; GFX942-NEXT: v_mov_b64_e32 v[0:1], v[6:7]
; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -302,8 +301,7 @@ define void @v_shuffle_v2i64_v4i64__7_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
+; GFX942-NEXT: v_mov_b64_e32 v[0:1], v[6:7]
; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -362,10 +360,8 @@ define void @v_shuffle_v2i64_v4i64__7_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[2:9]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v8
-; GFX942-NEXT: v_mov_b32_e32 v3, v9
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[8:9]
+; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[0:1]
; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -420,8 +416,7 @@ define void @v_shuffle_v2i64_v4i64__7_1(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[4:11]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v10
-; GFX942-NEXT: v_mov_b32_e32 v1, v11
+; GFX942-NEXT: v_mov_b64_e32 v[0:1], v[10:11]
; GFX942-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -476,8 +471,7 @@ define void @v_shuffle_v2i64_v4i64__7_2(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[6:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v12
-; GFX942-NEXT: v_mov_b32_e32 v3, v13
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[12:13]
; GFX942-NEXT: global_store_dwordx4 v14, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -532,8 +526,7 @@ define void @v_shuffle_v2i64_v4i64__7_3(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v14
-; GFX942-NEXT: v_mov_b32_e32 v5, v15
+; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[14:15]
; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -582,10 +575,8 @@ define void @v_shuffle_v2i64_v4i64__7_4(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v6
-; GFX942-NEXT: v_mov_b32_e32 v3, v7
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[6:7]
+; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[0:1]
; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -630,8 +621,7 @@ define void @v_shuffle_v2i64_v4i64__7_5(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
+; GFX942-NEXT: v_mov_b64_e32 v[0:1], v[6:7]
; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -676,8 +666,7 @@ define void @v_shuffle_v2i64_v4i64__7_6(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v6
-; GFX942-NEXT: v_mov_b32_e32 v3, v7
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[6:7]
; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -722,8 +711,7 @@ define void @v_shuffle_v2i64_v4i64__7_7(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v7
+; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[6:7]
; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -768,8 +756,7 @@ define void @v_shuffle_v2i64_v4i64__u_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: v_mov_b32_e32 v3, v1
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[0:1]
; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -813,8 +800,7 @@ define void @v_shuffle_v2i64_v4i64__0_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: v_mov_b32_e32 v3, v1
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[0:1]
; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -858,8 +844,7 @@ define void @v_shuffle_v2i64_v4i64__1_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[0:1]
; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -903,8 +888,7 @@ define void @v_shuffle_v2i64_v4i64__2_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v0
-; GFX942-NEXT: v_mov_b32_e32 v7, v1
+; GFX942-NEXT: v_mov_b64_e32 v[6:7], v[0:1]
; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -952,10 +936,8 @@ define void @v_shuffle_v2i64_v4i64__3_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v6
-; GFX942-NEXT: v_mov_b32_e32 v3, v7
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[6:7]
+; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[0:1]
; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -999,8 +981,7 @@ define void @v_shuffle_v2i64_v4i64__4_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: v_mov_b32_e32 v3, v1
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[0:1]
; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -1054,8 +1035,7 @@ define void @v_shuffle_v2i64_v4i64__5_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[2:9]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v0
-; GFX942-NEXT: v_mov_b32_e32 v7, v1
+; GFX942-NEXT: v_mov_b64_e32 v[6:7], v[0:1]
; GFX942-NEXT: global_store_dwordx4 v10, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -1110,8 +1090,7 @@ define void @v_shuffle_v2i64_v4i64__6_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[2:9]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v8, v0
-; GFX942-NEXT: v_mov_b32_e32 v9, v1
+; GFX942-NEXT: v_mov_b64_e32 v[8:9], v[0:1]
; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -1234,8 +1213,7 @@ define void @v_shuffle_v2i64_v4i64__1_1(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v2
-; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[2:3]
; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -1279,8 +1257,7 @@ define void @v_shuffle_v2i64_v4i64__2_1(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v2
-; GFX942-NEXT: v_mov_b32_e32 v7, v3
+; GFX942-NEXT: v_mov_b64_e32 v[6:7], v[2:3]
; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -1324,8 +1301,7 @@ define void @v_shuffle_v2i64_v4i64__3_1(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
+; GFX942-NEXT: v_mov_b64_e32 v[0:1], v[6:7]
; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -1418,8 +1394,7 @@ define void @v_shuffle_v2i64_v4i64__5_1(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[4:11]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v8, v2
-; GFX942-NEXT: v_mov_b32_e32 v9, v3
+; GFX942-NEXT: v_mov_b64_e32 v[8:9], v[2:3]
; GFX942-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -1474,8 +1449,7 @@ define void @v_shuffle_v2i64_v4i64__6_1(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[4:11]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v10, v2
-; GFX942-NEXT: v_mov_b32_e32 v11, v3
+; GFX942-NEXT: v_mov_b64_e32 v[10:11], v[2:3]
; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -1559,8 +1533,7 @@ define void @v_shuffle_v2i64_v4i64__0_2(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[4:5]
; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -1643,8 +1616,7 @@ define void @v_shuffle_v2i64_v4i64__2_2(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v4
-; GFX942-NEXT: v_mov_b32_e32 v7, v5
+; GFX942-NEXT: v_mov_b64_e32 v[6:7], v[4:5]
; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -1688,8 +1660,7 @@ define void @v_shuffle_v2i64_v4i64__3_2(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v6
-; GFX942-NEXT: v_mov_b32_e32 v3, v7
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[6:7]
; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -1782,8 +1753,7 @@ define void @v_shuffle_v2i64_v4i64__5_2(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[6:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v10, v4
-; GFX942-NEXT: v_mov_b32_e32 v11, v5
+; GFX942-NEXT: v_mov_b64_e32 v[10:11], v[4:5]
; GFX942-NEXT: global_store_dwordx4 v14, v[8:11], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -1838,8 +1808,7 @@ define void @v_shuffle_v2i64_v4i64__6_2(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[6:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v12, v4
-; GFX942-NEXT: v_mov_b32_e32 v13, v5
+; GFX942-NEXT: v_mov_b64_e32 v[12:13], v[4:5]
; GFX942-NEXT: global_store_dwordx4 v14, v[10:13], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -1923,8 +1892,7 @@ define void @v_shuffle_v2i64_v4i64__0_3(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v6
-; GFX942-NEXT: v_mov_b32_e32 v3, v7
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[6:7]
; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -1968,8 +1936,7 @@ define void @v_shuffle_v2i64_v4i64__1_3(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v7
+; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[6:7]
; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -2052,8 +2019,7 @@ define void @v_shuffle_v2i64_v4i64__3_3(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v7
+; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[6:7]
; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -2146,8 +2112,7 @@ define void @v_shuffle_v2i64_v4i64__5_3(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v12, v6
-; GFX942-NEXT: v_mov_b32_e32 v13, v7
+; GFX942-NEXT: v_mov_b64_e32 v[12:13], v[6:7]
; GFX942-NEXT: global_store_dwordx4 v16, v[10:13], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -2202,8 +2167,7 @@ define void @v_shuffle_v2i64_v4i64__6_3(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v14, v6
-; GFX942-NEXT: v_mov_b32_e32 v15, v7
+; GFX942-NEXT: v_mov_b64_e32 v[14:15], v[6:7]
; GFX942-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -2376,8 +2340,7 @@ define void @v_shuffle_v2i64_v4i64__3_4(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
+; GFX942-NEXT: v_mov_b64_e32 v[0:1], v[6:7]
; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -2432,8 +2395,7 @@ define void @v_shuffle_v2i64_v4i64__5_4(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[0:1]
; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -2478,8 +2440,7 @@ define void @v_shuffle_v2i64_v4i64__6_4(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v0
-; GFX942-NEXT: v_mov_b32_e32 v7, v1
+; GFX942-NEXT: v_mov_b64_e32 v[6:7], v[0:1]
; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -2574,8 +2535,7 @@ define void @v_shuffle_v2i64_v4i64__0_5(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[2:9]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[4:5]
; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -2630,8 +2590,7 @@ define void @v_shuffle_v2i64_v4i64__1_5(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[4:11]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v7
+; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[6:7]
; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -2686,8 +2645,7 @@ define void @v_shuffle_v2i64_v4i64__2_5(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[6:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v8
-; GFX942-NEXT: v_mov_b32_e32 v7, v9
+; GFX942-NEXT: v_mov_b64_e32 v[6:7], v[8:9]
; GFX942-NEXT: global_store_dwordx4 v14, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -2742,8 +2700,7 @@ define void @v_shuffle_v2i64_v4i64__3_5(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v8, v6
-; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: v_mov_b64_e32 v[8:9], v[6:7]
; GFX942-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -2828,8 +2785,7 @@ define void @v_shuffle_v2i64_v4i64__5_5(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v2
-; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[2:3]
; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -2874,8 +2830,7 @@ define void @v_shuffle_v2i64_v4i64__6_5(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v2
-; GFX942-NEXT: v_mov_b32_e32 v7, v3
+; GFX942-NEXT: v_mov_b64_e32 v[6:7], v[2:3]
; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -2970,8 +2925,7 @@ define void @v_shuffle_v2i64_v4i64__0_6(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[2:9]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v6
-; GFX942-NEXT: v_mov_b32_e32 v3, v7
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[6:7]
; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -3026,8 +2980,7 @@ define void @v_shuffle_v2i64_v4i64__1_6(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[4:11]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v8
-; GFX942-NEXT: v_mov_b32_e32 v5, v9
+; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[8:9]
; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -3082,8 +3035,7 @@ define void @v_shuffle_v2i64_v4i64__2_6(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[6:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v10
-; GFX942-NEXT: v_mov_b32_e32 v7, v11
+; GFX942-NEXT: v_mov_b64_e32 v[6:7], v[10:11]
; GFX942-NEXT: global_store_dwordx4 v14, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -3138,8 +3090,7 @@ define void @v_shuffle_v2i64_v4i64__3_6(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v10, v6
-; GFX942-NEXT: v_mov_b32_e32 v11, v7
+; GFX942-NEXT: v_mov_b64_e32 v[10:11], v[6:7]
; GFX942-NEXT: global_store_dwordx4 v16, v[10:13], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -3184,8 +3135,7 @@ define void @v_shuffle_v2i64_v4i64__4_6(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[4:5]
; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -3270,8 +3220,7 @@ define void @v_shuffle_v2i64_v4i64__6_6(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v4
-; GFX942-NEXT: v_mov_b32_e32 v7, v5
+; GFX942-NEXT: v_mov_b64_e32 v[6:7], v[4:5]
; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -3366,8 +3315,7 @@ define void @v_shuffle_v2i64_v4i64__0_7(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[2:9]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v8
-; GFX942-NEXT: v_mov_b32_e32 v3, v9
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[8:9]
; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -3422,8 +3370,7 @@ define void @v_shuffle_v2i64_v4i64__1_7(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[4:11]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v10
-; GFX942-NEXT: v_mov_b32_e32 v5, v11
+; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[10:11]
; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -3478,8 +3425,7 @@ define void @v_shuffle_v2i64_v4i64__2_7(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[6:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v12
-; GFX942-NEXT: v_mov_b32_e32 v7, v13
+; GFX942-NEXT: v_mov_b64_e32 v[6:7], v[12:13]
; GFX942-NEXT: global_store_dwordx4 v14, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -3534,8 +3480,7 @@ define void @v_shuffle_v2i64_v4i64__3_7(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v12, v6
-; GFX942-NEXT: v_mov_b32_e32 v13, v7
+; GFX942-NEXT: v_mov_b64_e32 v[12:13], v[6:7]
; GFX942-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -3580,8 +3525,7 @@ define void @v_shuffle_v2i64_v4i64__4_7(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v6
-; GFX942-NEXT: v_mov_b32_e32 v3, v7
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[6:7]
; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -3626,8 +3570,7 @@ define void @v_shuffle_v2i64_v4i64__5_7(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v7
+; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[6:7]
; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -3765,8 +3708,7 @@ define void @s_shuffle_v2i64_v4i64__1_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -3850,8 +3792,7 @@ define void @s_shuffle_v2i64_v4i64__3_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -3909,8 +3850,7 @@ define void @s_shuffle_v2i64_v4i64__5_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -3996,8 +3936,7 @@ define void @s_shuffle_v2i64_v4i64__7_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -4056,10 +3995,8 @@ define void @s_shuffle_v2i64_v4i64__7_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -4113,8 +4050,7 @@ define void @s_shuffle_v2i64_v4i64__7_1() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -4172,10 +4108,8 @@ define void @s_shuffle_v2i64_v4i64__7_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -4230,8 +4164,7 @@ define void @s_shuffle_v2i64_v4i64__7_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -4280,10 +4213,8 @@ define void @s_shuffle_v2i64_v4i64__7_4() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -4296,18 +4227,43 @@ define void @s_shuffle_v2i64_v4i64__7_4() {
}
define void @s_shuffle_v2i64_v4i64__7_5() {
-; GFX9-LABEL: s_shuffle_v2i64_v4i64__7_5:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s14
-; GFX9-NEXT: s_mov_b32 s9, s15
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v2i64_v4i64__7_5:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2i64_v4i64__7_5:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2i64_v4i64__7_5:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <2 x i32> <i32 7, i32 5>
@@ -4352,10 +4308,8 @@ define void @s_shuffle_v2i64_v4i64__7_6() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -4368,18 +4322,43 @@ define void @s_shuffle_v2i64_v4i64__7_6() {
}
define void @s_shuffle_v2i64_v4i64__7_7() {
-; GFX9-LABEL: s_shuffle_v2i64_v4i64__7_7:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[4:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s10
-; GFX9-NEXT: s_mov_b32 s9, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v2i64_v4i64__7_7:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2i64_v4i64__7_7:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2i64_v4i64__7_7:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[4:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <2 x i32> <i32 7, i32 7>
@@ -4420,8 +4399,7 @@ define void @s_shuffle_v2i64_v4i64__u_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -4433,18 +4411,43 @@ define void @s_shuffle_v2i64_v4i64__u_0() {
}
define void @s_shuffle_v2i64_v4i64__0_0() {
-; GFX9-LABEL: s_shuffle_v2i64_v4i64__0_0:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s8
-; GFX9-NEXT: s_mov_b32 s11, s9
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v2i64_v4i64__0_0:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s8
+; GFX900-NEXT: s_mov_b32 s11, s9
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2i64_v4i64__0_0:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s8
+; GFX90A-NEXT: s_mov_b32 s11, s9
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2i64_v4i64__0_0:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[8:9]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <2 x i32> zeroinitializer
call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf)
@@ -4488,10 +4491,8 @@ define void @s_shuffle_v2i64_v4i64__1_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -4503,18 +4504,43 @@ define void @s_shuffle_v2i64_v4i64__1_0() {
}
define void @s_shuffle_v2i64_v4i64__2_0() {
-; GFX9-LABEL: s_shuffle_v2i64_v4i64__2_0:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[4:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s4
-; GFX9-NEXT: s_mov_b32 s11, s5
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v2i64_v4i64__2_0:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s4
+; GFX900-NEXT: s_mov_b32 s11, s5
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2i64_v4i64__2_0:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s4
+; GFX90A-NEXT: s_mov_b32 s11, s5
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2i64_v4i64__2_0:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[4:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <2 x i32> <i32 2, i32 0>
call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf)
@@ -4558,10 +4584,8 @@ define void @s_shuffle_v2i64_v4i64__3_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -4605,8 +4629,7 @@ define void @s_shuffle_v2i64_v4i64__4_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -4664,10 +4687,8 @@ define void @s_shuffle_v2i64_v4i64__5_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -4722,8 +4743,7 @@ define void @s_shuffle_v2i64_v4i64__6_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -4816,18 +4836,43 @@ define void @s_shuffle_v2i64_v4i64__0_1() {
}
define void @s_shuffle_v2i64_v4i64__1_1() {
-; GFX9-LABEL: s_shuffle_v2i64_v4i64__1_1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s10
-; GFX9-NEXT: s_mov_b32 s9, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v2i64_v4i64__1_1:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2i64_v4i64__1_1:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2i64_v4i64__1_1:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <2 x i32> <i32 1, i32 1>
call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf)
@@ -4835,18 +4880,43 @@ define void @s_shuffle_v2i64_v4i64__1_1() {
}
define void @s_shuffle_v2i64_v4i64__2_1() {
-; GFX9-LABEL: s_shuffle_v2i64_v4i64__2_1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[4:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s6
-; GFX9-NEXT: s_mov_b32 s11, s7
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v2i64_v4i64__2_1:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s6
+; GFX900-NEXT: s_mov_b32 s11, s7
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2i64_v4i64__2_1:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s6
+; GFX90A-NEXT: s_mov_b32 s11, s7
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2i64_v4i64__2_1:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[4:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <2 x i32> <i32 2, i32 1>
call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf)
@@ -4854,18 +4924,43 @@ define void @s_shuffle_v2i64_v4i64__2_1() {
}
define void @s_shuffle_v2i64_v4i64__3_1() {
-; GFX9-LABEL: s_shuffle_v2i64_v4i64__3_1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s14
-; GFX9-NEXT: s_mov_b32 s9, s15
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v2i64_v4i64__3_1:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2i64_v4i64__3_1:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2i64_v4i64__3_1:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <2 x i32> <i32 3, i32 1>
call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf)
@@ -4954,8 +5049,7 @@ define void @s_shuffle_v2i64_v4i64__5_1() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -5010,8 +5104,7 @@ define void @s_shuffle_v2i64_v4i64__6_1() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s2
-; GFX942-NEXT: s_mov_b32 s11, s3
+; GFX942-NEXT: s_mov_b64 s[10:11], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -5056,8 +5149,7 @@ define void @s_shuffle_v2i64_v4i64__u_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -5069,18 +5161,43 @@ define void @s_shuffle_v2i64_v4i64__u_2() {
}
define void @s_shuffle_v2i64_v4i64__0_2() {
-; GFX9-LABEL: s_shuffle_v2i64_v4i64__0_2:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v2i64_v4i64__0_2:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2i64_v4i64__0_2:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2i64_v4i64__0_2:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <2 x i32> <i32 0, i32 2>
call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf)
@@ -5124,10 +5241,8 @@ define void @s_shuffle_v2i64_v4i64__1_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -5139,18 +5254,43 @@ define void @s_shuffle_v2i64_v4i64__1_2() {
}
define void @s_shuffle_v2i64_v4i64__2_2() {
-; GFX9-LABEL: s_shuffle_v2i64_v4i64__2_2:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[4:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s8
-; GFX9-NEXT: s_mov_b32 s11, s9
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v2i64_v4i64__2_2:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s8
+; GFX900-NEXT: s_mov_b32 s11, s9
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2i64_v4i64__2_2:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s8
+; GFX90A-NEXT: s_mov_b32 s11, s9
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2i64_v4i64__2_2:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[4:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[8:9]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <2 x i32> <i32 2, i32 2>
call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf)
@@ -5194,10 +5334,8 @@ define void @s_shuffle_v2i64_v4i64__3_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -5241,8 +5379,7 @@ define void @s_shuffle_v2i64_v4i64__4_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -5299,10 +5436,8 @@ define void @s_shuffle_v2i64_v4i64__5_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -5357,8 +5492,7 @@ define void @s_shuffle_v2i64_v4i64__6_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -5411,18 +5545,43 @@ define void @s_shuffle_v2i64_v4i64__u_3() {
}
define void @s_shuffle_v2i64_v4i64__0_3() {
-; GFX9-LABEL: s_shuffle_v2i64_v4i64__0_3:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s14
-; GFX9-NEXT: s_mov_b32 s11, s15
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v2i64_v4i64__0_3:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s14
+; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2i64_v4i64__0_3:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s14
+; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2i64_v4i64__0_3:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <2 x i32> <i32 0, i32 3>
call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf)
@@ -5430,18 +5589,43 @@ define void @s_shuffle_v2i64_v4i64__0_3() {
}
define void @s_shuffle_v2i64_v4i64__1_3() {
-; GFX9-LABEL: s_shuffle_v2i64_v4i64__1_3:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[4:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s6
-; GFX9-NEXT: s_mov_b32 s9, s7
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v2i64_v4i64__1_3:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s6
+; GFX900-NEXT: s_mov_b32 s9, s7
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2i64_v4i64__1_3:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s6
+; GFX90A-NEXT: s_mov_b32 s9, s7
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2i64_v4i64__1_3:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[4:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <2 x i32> <i32 1, i32 3>
call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf)
@@ -5489,18 +5673,43 @@ define void @s_shuffle_v2i64_v4i64__2_3() {
}
define void @s_shuffle_v2i64_v4i64__3_3() {
-; GFX9-LABEL: s_shuffle_v2i64_v4i64__3_3:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[4:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s10
-; GFX9-NEXT: s_mov_b32 s9, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v2i64_v4i64__3_3:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2i64_v4i64__3_3:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2i64_v4i64__3_3:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[4:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <2 x i32> <i32 3, i32 3>
call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf)
@@ -5590,8 +5799,7 @@ define void @s_shuffle_v2i64_v4i64__5_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -5646,8 +5854,7 @@ define void @s_shuffle_v2i64_v4i64__6_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -5746,8 +5953,7 @@ define void @s_shuffle_v2i64_v4i64__1_4() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -5831,8 +6037,7 @@ define void @s_shuffle_v2i64_v4i64__3_4() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -5894,10 +6099,8 @@ define void @s_shuffle_v2i64_v4i64__5_4() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -5910,18 +6113,43 @@ define void @s_shuffle_v2i64_v4i64__5_4() {
}
define void @s_shuffle_v2i64_v4i64__6_4() {
-; GFX9-LABEL: s_shuffle_v2i64_v4i64__6_4:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[4:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s4
-; GFX9-NEXT: s_mov_b32 s11, s5
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v2i64_v4i64__6_4:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s4
+; GFX900-NEXT: s_mov_b32 s11, s5
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2i64_v4i64__6_4:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s4
+; GFX90A-NEXT: s_mov_b32 s11, s5
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2i64_v4i64__6_4:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[4:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <2 x i32> <i32 6, i32 4>
@@ -6012,8 +6240,7 @@ define void @s_shuffle_v2i64_v4i64__0_5() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s2
-; GFX942-NEXT: s_mov_b32 s11, s3
+; GFX942-NEXT: s_mov_b64 s[10:11], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -6067,8 +6294,7 @@ define void @s_shuffle_v2i64_v4i64__1_5() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -6123,8 +6349,7 @@ define void @s_shuffle_v2i64_v4i64__2_5() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s2
-; GFX942-NEXT: s_mov_b32 s11, s3
+; GFX942-NEXT: s_mov_b64 s[10:11], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -6178,8 +6403,7 @@ define void @s_shuffle_v2i64_v4i64__3_5() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -6233,18 +6457,43 @@ define void @s_shuffle_v2i64_v4i64__4_5() {
}
define void @s_shuffle_v2i64_v4i64__5_5() {
-; GFX9-LABEL: s_shuffle_v2i64_v4i64__5_5:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s10
-; GFX9-NEXT: s_mov_b32 s9, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v2i64_v4i64__5_5:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2i64_v4i64__5_5:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2i64_v4i64__5_5:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <2 x i32> <i32 5, i32 5>
@@ -6253,18 +6502,43 @@ define void @s_shuffle_v2i64_v4i64__5_5() {
}
define void @s_shuffle_v2i64_v4i64__6_5() {
-; GFX9-LABEL: s_shuffle_v2i64_v4i64__6_5:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[4:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s6
-; GFX9-NEXT: s_mov_b32 s11, s7
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v2i64_v4i64__6_5:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s6
+; GFX900-NEXT: s_mov_b32 s11, s7
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2i64_v4i64__6_5:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s6
+; GFX90A-NEXT: s_mov_b32 s11, s7
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2i64_v4i64__6_5:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[4:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <2 x i32> <i32 6, i32 5>
@@ -6305,8 +6579,7 @@ define void @s_shuffle_v2i64_v4i64__u_6() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -6360,8 +6633,7 @@ define void @s_shuffle_v2i64_v4i64__0_6() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -6419,10 +6691,8 @@ define void @s_shuffle_v2i64_v4i64__1_6() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -6477,8 +6747,7 @@ define void @s_shuffle_v2i64_v4i64__2_6() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -6536,10 +6805,8 @@ define void @s_shuffle_v2i64_v4i64__3_6() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -6552,18 +6819,43 @@ define void @s_shuffle_v2i64_v4i64__3_6() {
}
define void @s_shuffle_v2i64_v4i64__4_6() {
-; GFX9-LABEL: s_shuffle_v2i64_v4i64__4_6:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v2i64_v4i64__4_6:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2i64_v4i64__4_6:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2i64_v4i64__4_6:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <2 x i32> <i32 4, i32 6>
@@ -6608,10 +6900,8 @@ define void @s_shuffle_v2i64_v4i64__5_6() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -6624,18 +6914,43 @@ define void @s_shuffle_v2i64_v4i64__5_6() {
}
define void @s_shuffle_v2i64_v4i64__6_6() {
-; GFX9-LABEL: s_shuffle_v2i64_v4i64__6_6:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[4:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s8
-; GFX9-NEXT: s_mov_b32 s11, s9
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v2i64_v4i64__6_6:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s8
+; GFX900-NEXT: s_mov_b32 s11, s9
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2i64_v4i64__6_6:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s8
+; GFX90A-NEXT: s_mov_b32 s11, s9
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2i64_v4i64__6_6:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[4:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[8:9]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <2 x i32> <i32 6, i32 6>
@@ -6726,8 +7041,7 @@ define void @s_shuffle_v2i64_v4i64__0_7() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -6782,8 +7096,7 @@ define void @s_shuffle_v2i64_v4i64__1_7() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -6838,8 +7151,7 @@ define void @s_shuffle_v2i64_v4i64__2_7() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -6894,8 +7206,7 @@ define void @s_shuffle_v2i64_v4i64__3_7() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -6908,18 +7219,43 @@ define void @s_shuffle_v2i64_v4i64__3_7() {
}
define void @s_shuffle_v2i64_v4i64__4_7() {
-; GFX9-LABEL: s_shuffle_v2i64_v4i64__4_7:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s14
-; GFX9-NEXT: s_mov_b32 s11, s15
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v2i64_v4i64__4_7:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s14
+; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2i64_v4i64__4_7:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s14
+; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2i64_v4i64__4_7:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <2 x i32> <i32 4, i32 7>
@@ -6928,18 +7264,43 @@ define void @s_shuffle_v2i64_v4i64__4_7() {
}
define void @s_shuffle_v2i64_v4i64__5_7() {
-; GFX9-LABEL: s_shuffle_v2i64_v4i64__5_7:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[4:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s6
-; GFX9-NEXT: s_mov_b32 s9, s7
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v2i64_v4i64__5_7:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s6
+; GFX900-NEXT: s_mov_b32 s9, s7
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2i64_v4i64__5_7:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s6
+; GFX90A-NEXT: s_mov_b32 s9, s7
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2i64_v4i64__5_7:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[4:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <2 x i32> <i32 5, i32 7>
diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v2i64.v8i64.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v2i64.v8i64.ll
index 7ee7c83e0122d..c8aac3a841c69 100644
--- a/llvm/test/CodeGen/AMDGPU/shufflevector.v2i64.v8i64.ll
+++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v2i64.v8i64.ll
@@ -322,8 +322,7 @@ define void @v_shuffle_v2i64_v8i64__7_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v14
-; GFX942-NEXT: v_mov_b32_e32 v1, v15
+; GFX942-NEXT: v_mov_b64_e32 v[0:1], v[14:15]
; GFX942-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -618,8 +617,7 @@ define void @v_shuffle_v2i64_v8i64__15_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v14
-; GFX942-NEXT: v_mov_b32_e32 v1, v15
+; GFX942-NEXT: v_mov_b64_e32 v[0:1], v[14:15]
; GFX942-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -678,10 +676,8 @@ define void @v_shuffle_v2i64_v8i64__15_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[2:17]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v16
-; GFX942-NEXT: v_mov_b32_e32 v3, v17
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[16:17]
+; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[0:1]
; GFX942-NEXT: global_store_dwordx4 v18, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -736,8 +732,7 @@ define void @v_shuffle_v2i64_v8i64__15_1(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[4:19]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v18
-; GFX942-NEXT: v_mov_b32_e32 v1, v19
+; GFX942-NEXT: v_mov_b64_e32 v[0:1], v[18:19]
; GFX942-NEXT: global_store_dwordx4 v20, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -792,8 +787,7 @@ define void @v_shuffle_v2i64_v8i64__15_2(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[6:21]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v20
-; GFX942-NEXT: v_mov_b32_e32 v3, v21
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[20:21]
; GFX942-NEXT: global_store_dwordx4 v22, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -848,8 +842,7 @@ define void @v_shuffle_v2i64_v8i64__15_3(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[8:23]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v22
-; GFX942-NEXT: v_mov_b32_e32 v5, v23
+; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[22:23]
; GFX942-NEXT: global_store_dwordx4 v24, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -904,8 +897,7 @@ define void @v_shuffle_v2i64_v8i64__15_4(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[10:25]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v24
-; GFX942-NEXT: v_mov_b32_e32 v7, v25
+; GFX942-NEXT: v_mov_b64_e32 v[6:7], v[24:25]
; GFX942-NEXT: global_store_dwordx4 v26, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -960,8 +952,7 @@ define void @v_shuffle_v2i64_v8i64__15_5(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[12:27]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v8, v26
-; GFX942-NEXT: v_mov_b32_e32 v9, v27
+; GFX942-NEXT: v_mov_b64_e32 v[8:9], v[26:27]
; GFX942-NEXT: global_store_dwordx4 v28, v[8:11], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -1016,8 +1007,7 @@ define void @v_shuffle_v2i64_v8i64__15_6(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[14:29]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v10, v28
-; GFX942-NEXT: v_mov_b32_e32 v11, v29
+; GFX942-NEXT: v_mov_b64_e32 v[10:11], v[28:29]
; GFX942-NEXT: global_store_dwordx4 v30, v[10:13], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -1072,8 +1062,7 @@ define void @v_shuffle_v2i64_v8i64__15_7(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[16:31]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v12, v30
-; GFX942-NEXT: v_mov_b32_e32 v13, v31
+; GFX942-NEXT: v_mov_b64_e32 v[12:13], v[30:31]
; GFX942-NEXT: global_store_dwordx4 v32, v[12:15], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -1122,10 +1111,8 @@ define void @v_shuffle_v2i64_v8i64__15_8(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v14
-; GFX942-NEXT: v_mov_b32_e32 v3, v15
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[14:15]
+; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[0:1]
; GFX942-NEXT: global_store_dwordx4 v16, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -1170,8 +1157,7 @@ define void @v_shuffle_v2i64_v8i64__15_9(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v14
-; GFX942-NEXT: v_mov_b32_e32 v1, v15
+; GFX942-NEXT: v_mov_b64_e32 v[0:1], v[14:15]
; GFX942-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -1216,8 +1202,7 @@ define void @v_shuffle_v2i64_v8i64__15_10(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v14
-; GFX942-NEXT: v_mov_b32_e32 v3, v15
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[14:15]
; GFX942-NEXT: global_store_dwordx4 v16, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -1262,8 +1247,7 @@ define void @v_shuffle_v2i64_v8i64__15_11(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v14
-; GFX942-NEXT: v_mov_b32_e32 v5, v15
+; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[14:15]
; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -1308,8 +1292,7 @@ define void @v_shuffle_v2i64_v8i64__15_12(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v14
-; GFX942-NEXT: v_mov_b32_e32 v7, v15
+; GFX942-NEXT: v_mov_b64_e32 v[6:7], v[14:15]
; GFX942-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -1354,8 +1337,7 @@ define void @v_shuffle_v2i64_v8i64__15_13(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v8, v14
-; GFX942-NEXT: v_mov_b32_e32 v9, v15
+; GFX942-NEXT: v_mov_b64_e32 v[8:9], v[14:15]
; GFX942-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -1400,8 +1382,7 @@ define void @v_shuffle_v2i64_v8i64__15_14(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v10, v14
-; GFX942-NEXT: v_mov_b32_e32 v11, v15
+; GFX942-NEXT: v_mov_b64_e32 v[10:11], v[14:15]
; GFX942-NEXT: global_store_dwordx4 v16, v[10:13], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -1446,8 +1427,7 @@ define void @v_shuffle_v2i64_v8i64__15_15(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v12, v14
-; GFX942-NEXT: v_mov_b32_e32 v13, v15
+; GFX942-NEXT: v_mov_b64_e32 v[12:13], v[14:15]
; GFX942-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -1492,8 +1472,7 @@ define void @v_shuffle_v2i64_v8i64__u_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: v_mov_b32_e32 v3, v1
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[0:1]
; GFX942-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -1537,8 +1516,7 @@ define void @v_shuffle_v2i64_v8i64__0_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: v_mov_b32_e32 v3, v1
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[0:1]
; GFX942-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -1582,8 +1560,7 @@ define void @v_shuffle_v2i64_v8i64__1_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[0:1]
; GFX942-NEXT: global_store_dwordx4 v16, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -1627,8 +1604,7 @@ define void @v_shuffle_v2i64_v8i64__2_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v0
-; GFX942-NEXT: v_mov_b32_e32 v7, v1
+; GFX942-NEXT: v_mov_b64_e32 v[6:7], v[0:1]
; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -1672,8 +1648,7 @@ define void @v_shuffle_v2i64_v8i64__3_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v8, v0
-; GFX942-NEXT: v_mov_b32_e32 v9, v1
+; GFX942-NEXT: v_mov_b64_e32 v[8:9], v[0:1]
; GFX942-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -1717,8 +1692,7 @@ define void @v_shuffle_v2i64_v8i64__4_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v10, v0
-; GFX942-NEXT: v_mov_b32_e32 v11, v1
+; GFX942-NEXT: v_mov_b64_e32 v[10:11], v[0:1]
; GFX942-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -1762,8 +1736,7 @@ define void @v_shuffle_v2i64_v8i64__5_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v12, v0
-; GFX942-NEXT: v_mov_b32_e32 v13, v1
+; GFX942-NEXT: v_mov_b64_e32 v[12:13], v[0:1]
; GFX942-NEXT: global_store_dwordx4 v16, v[10:13], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -1807,8 +1780,7 @@ define void @v_shuffle_v2i64_v8i64__6_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v14, v0
-; GFX942-NEXT: v_mov_b32_e32 v15, v1
+; GFX942-NEXT: v_mov_b64_e32 v[14:15], v[0:1]
; GFX942-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -1856,10 +1828,8 @@ define void @v_shuffle_v2i64_v8i64__7_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v14
-; GFX942-NEXT: v_mov_b32_e32 v3, v15
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[14:15]
+; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[0:1]
; GFX942-NEXT: global_store_dwordx4 v16, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -1903,8 +1873,7 @@ define void @v_shuffle_v2i64_v8i64__8_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: v_mov_b32_e32 v3, v1
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[0:1]
; GFX942-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -1958,8 +1927,7 @@ define void @v_shuffle_v2i64_v8i64__9_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[2:17]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v0
-; GFX942-NEXT: v_mov_b32_e32 v7, v1
+; GFX942-NEXT: v_mov_b64_e32 v[6:7], v[0:1]
; GFX942-NEXT: global_store_dwordx4 v18, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -2014,8 +1982,7 @@ define void @v_shuffle_v2i64_v8i64__10_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[2:17]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v8, v0
-; GFX942-NEXT: v_mov_b32_e32 v9, v1
+; GFX942-NEXT: v_mov_b64_e32 v[8:9], v[0:1]
; GFX942-NEXT: global_store_dwordx4 v18, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -2070,8 +2037,7 @@ define void @v_shuffle_v2i64_v8i64__11_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[2:17]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v10, v0
-; GFX942-NEXT: v_mov_b32_e32 v11, v1
+; GFX942-NEXT: v_mov_b64_e32 v[10:11], v[0:1]
; GFX942-NEXT: global_store_dwordx4 v18, v[8:11], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -2126,8 +2092,7 @@ define void @v_shuffle_v2i64_v8i64__12_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[2:17]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v12, v0
-; GFX942-NEXT: v_mov_b32_e32 v13, v1
+; GFX942-NEXT: v_mov_b64_e32 v[12:13], v[0:1]
; GFX942-NEXT: global_store_dwordx4 v18, v[10:13], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -2182,8 +2147,7 @@ define void @v_shuffle_v2i64_v8i64__13_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[2:17]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v14, v0
-; GFX942-NEXT: v_mov_b32_e32 v15, v1
+; GFX942-NEXT: v_mov_b64_e32 v[14:15], v[0:1]
; GFX942-NEXT: global_store_dwordx4 v18, v[12:15], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -2238,8 +2202,7 @@ define void @v_shuffle_v2i64_v8i64__14_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[2:17]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v16, v0
-; GFX942-NEXT: v_mov_b32_e32 v17, v1
+; GFX942-NEXT: v_mov_b64_e32 v[16:17], v[0:1]
; GFX942-NEXT: global_store_dwordx4 v18, v[14:17], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -2362,8 +2325,7 @@ define void @v_shuffle_v2i64_v8i64__1_1(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v2
-; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[2:3]
; GFX942-NEXT: global_store_dwordx4 v16, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -2407,8 +2369,7 @@ define void @v_shuffle_v2i64_v8i64__2_1(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v2
-; GFX942-NEXT: v_mov_b32_e32 v7, v3
+; GFX942-NEXT: v_mov_b64_e32 v[6:7], v[2:3]
; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -2452,8 +2413,7 @@ define void @v_shuffle_v2i64_v8i64__3_1(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v8, v2
-; GFX942-NEXT: v_mov_b32_e32 v9, v3
+; GFX942-NEXT: v_mov_b64_e32 v[8:9], v[2:3]
; GFX942-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -2497,8 +2457,7 @@ define void @v_shuffle_v2i64_v8i64__4_1(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v10, v2
-; GFX942-NEXT: v_mov_b32_e32 v11, v3
+; GFX942-NEXT: v_mov_b64_e32 v[10:11], v[2:3]
; GFX942-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -2542,8 +2501,7 @@ define void @v_shuffle_v2i64_v8i64__5_1(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v12, v2
-; GFX942-NEXT: v_mov_b32_e32 v13, v3
+; GFX942-NEXT: v_mov_b64_e32 v[12:13], v[2:3]
; GFX942-NEXT: global_store_dwordx4 v16, v[10:13], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -2587,8 +2545,7 @@ define void @v_shuffle_v2i64_v8i64__6_1(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v14, v2
-; GFX942-NEXT: v_mov_b32_e32 v15, v3
+; GFX942-NEXT: v_mov_b64_e32 v[14:15], v[2:3]
; GFX942-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -2632,8 +2589,7 @@ define void @v_shuffle_v2i64_v8i64__7_1(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v14
-; GFX942-NEXT: v_mov_b32_e32 v1, v15
+; GFX942-NEXT: v_mov_b64_e32 v[0:1], v[14:15]
; GFX942-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -2726,8 +2682,7 @@ define void @v_shuffle_v2i64_v8i64__9_1(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[4:19]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v8, v2
-; GFX942-NEXT: v_mov_b32_e32 v9, v3
+; GFX942-NEXT: v_mov_b64_e32 v[8:9], v[2:3]
; GFX942-NEXT: global_store_dwordx4 v20, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -2782,8 +2737,7 @@ define void @v_shuffle_v2i64_v8i64__10_1(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[4:19]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v10, v2
-; GFX942-NEXT: v_mov_b32_e32 v11, v3
+; GFX942-NEXT: v_mov_b64_e32 v[10:11], v[2:3]
; GFX942-NEXT: global_store_dwordx4 v20, v[8:11], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -2838,8 +2792,7 @@ define void @v_shuffle_v2i64_v8i64__11_1(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[4:19]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v12, v2
-; GFX942-NEXT: v_mov_b32_e32 v13, v3
+; GFX942-NEXT: v_mov_b64_e32 v[12:13], v[2:3]
; GFX942-NEXT: global_store_dwordx4 v20, v[10:13], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -2894,8 +2847,7 @@ define void @v_shuffle_v2i64_v8i64__12_1(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[4:19]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v14, v2
-; GFX942-NEXT: v_mov_b32_e32 v15, v3
+; GFX942-NEXT: v_mov_b64_e32 v[14:15], v[2:3]
; GFX942-NEXT: global_store_dwordx4 v20, v[12:15], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -2950,8 +2902,7 @@ define void @v_shuffle_v2i64_v8i64__13_1(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[4:19]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v16, v2
-; GFX942-NEXT: v_mov_b32_e32 v17, v3
+; GFX942-NEXT: v_mov_b64_e32 v[16:17], v[2:3]
; GFX942-NEXT: global_store_dwordx4 v20, v[14:17], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -3006,8 +2957,7 @@ define void @v_shuffle_v2i64_v8i64__14_1(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[4:19]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v18, v2
-; GFX942-NEXT: v_mov_b32_e32 v19, v3
+; GFX942-NEXT: v_mov_b64_e32 v[18:19], v[2:3]
; GFX942-NEXT: global_store_dwordx4 v20, v[16:19], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -3091,8 +3041,7 @@ define void @v_shuffle_v2i64_v8i64__0_2(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[4:5]
; GFX942-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -3175,8 +3124,7 @@ define void @v_shuffle_v2i64_v8i64__2_2(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v4
-; GFX942-NEXT: v_mov_b32_e32 v7, v5
+; GFX942-NEXT: v_mov_b64_e32 v[6:7], v[4:5]
; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -3220,8 +3168,7 @@ define void @v_shuffle_v2i64_v8i64__3_2(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v8, v4
-; GFX942-NEXT: v_mov_b32_e32 v9, v5
+; GFX942-NEXT: v_mov_b64_e32 v[8:9], v[4:5]
; GFX942-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -3265,8 +3212,7 @@ define void @v_shuffle_v2i64_v8i64__4_2(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v10, v4
-; GFX942-NEXT: v_mov_b32_e32 v11, v5
+; GFX942-NEXT: v_mov_b64_e32 v[10:11], v[4:5]
; GFX942-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -3310,8 +3256,7 @@ define void @v_shuffle_v2i64_v8i64__5_2(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v12, v4
-; GFX942-NEXT: v_mov_b32_e32 v13, v5
+; GFX942-NEXT: v_mov_b64_e32 v[12:13], v[4:5]
; GFX942-NEXT: global_store_dwordx4 v16, v[10:13], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -3355,8 +3300,7 @@ define void @v_shuffle_v2i64_v8i64__6_2(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v14, v4
-; GFX942-NEXT: v_mov_b32_e32 v15, v5
+; GFX942-NEXT: v_mov_b64_e32 v[14:15], v[4:5]
; GFX942-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -3400,8 +3344,7 @@ define void @v_shuffle_v2i64_v8i64__7_2(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v14
-; GFX942-NEXT: v_mov_b32_e32 v3, v15
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[14:15]
; GFX942-NEXT: global_store_dwordx4 v16, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -3494,8 +3437,7 @@ define void @v_shuffle_v2i64_v8i64__9_2(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[6:21]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v10, v4
-; GFX942-NEXT: v_mov_b32_e32 v11, v5
+; GFX942-NEXT: v_mov_b64_e32 v[10:11], v[4:5]
; GFX942-NEXT: global_store_dwordx4 v22, v[8:11], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -3550,8 +3492,7 @@ define void @v_shuffle_v2i64_v8i64__10_2(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[6:21]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v12, v4
-; GFX942-NEXT: v_mov_b32_e32 v13, v5
+; GFX942-NEXT: v_mov_b64_e32 v[12:13], v[4:5]
; GFX942-NEXT: global_store_dwordx4 v22, v[10:13], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -3606,8 +3547,7 @@ define void @v_shuffle_v2i64_v8i64__11_2(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[6:21]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v14, v4
-; GFX942-NEXT: v_mov_b32_e32 v15, v5
+; GFX942-NEXT: v_mov_b64_e32 v[14:15], v[4:5]
; GFX942-NEXT: global_store_dwordx4 v22, v[12:15], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -3662,8 +3602,7 @@ define void @v_shuffle_v2i64_v8i64__12_2(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[6:21]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v16, v4
-; GFX942-NEXT: v_mov_b32_e32 v17, v5
+; GFX942-NEXT: v_mov_b64_e32 v[16:17], v[4:5]
; GFX942-NEXT: global_store_dwordx4 v22, v[14:17], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -3718,8 +3657,7 @@ define void @v_shuffle_v2i64_v8i64__13_2(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[6:21]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v18, v4
-; GFX942-NEXT: v_mov_b32_e32 v19, v5
+; GFX942-NEXT: v_mov_b64_e32 v[18:19], v[4:5]
; GFX942-NEXT: global_store_dwordx4 v22, v[16:19], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -3774,8 +3712,7 @@ define void @v_shuffle_v2i64_v8i64__14_2(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[6:21]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v20, v4
-; GFX942-NEXT: v_mov_b32_e32 v21, v5
+; GFX942-NEXT: v_mov_b64_e32 v[20:21], v[4:5]
; GFX942-NEXT: global_store_dwordx4 v22, v[18:21], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -3859,8 +3796,7 @@ define void @v_shuffle_v2i64_v8i64__0_3(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v6
-; GFX942-NEXT: v_mov_b32_e32 v3, v7
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[6:7]
; GFX942-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -3904,8 +3840,7 @@ define void @v_shuffle_v2i64_v8i64__1_3(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v7
+; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[6:7]
; GFX942-NEXT: global_store_dwordx4 v16, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -3988,8 +3923,7 @@ define void @v_shuffle_v2i64_v8i64__3_3(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v8, v6
-; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: v_mov_b64_e32 v[8:9], v[6:7]
; GFX942-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -4033,8 +3967,7 @@ define void @v_shuffle_v2i64_v8i64__4_3(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v10, v6
-; GFX942-NEXT: v_mov_b32_e32 v11, v7
+; GFX942-NEXT: v_mov_b64_e32 v[10:11], v[6:7]
; GFX942-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -4078,8 +4011,7 @@ define void @v_shuffle_v2i64_v8i64__5_3(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v12, v6
-; GFX942-NEXT: v_mov_b32_e32 v13, v7
+; GFX942-NEXT: v_mov_b64_e32 v[12:13], v[6:7]
; GFX942-NEXT: global_store_dwordx4 v16, v[10:13], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -4123,8 +4055,7 @@ define void @v_shuffle_v2i64_v8i64__6_3(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v14, v6
-; GFX942-NEXT: v_mov_b32_e32 v15, v7
+; GFX942-NEXT: v_mov_b64_e32 v[14:15], v[6:7]
; GFX942-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -4168,8 +4099,7 @@ define void @v_shuffle_v2i64_v8i64__7_3(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v14
-; GFX942-NEXT: v_mov_b32_e32 v5, v15
+; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[14:15]
; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -4262,8 +4192,7 @@ define void @v_shuffle_v2i64_v8i64__9_3(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[8:23]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v12, v6
-; GFX942-NEXT: v_mov_b32_e32 v13, v7
+; GFX942-NEXT: v_mov_b64_e32 v[12:13], v[6:7]
; GFX942-NEXT: global_store_dwordx4 v24, v[10:13], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -4318,8 +4247,7 @@ define void @v_shuffle_v2i64_v8i64__10_3(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[8:23]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v14, v6
-; GFX942-NEXT: v_mov_b32_e32 v15, v7
+; GFX942-NEXT: v_mov_b64_e32 v[14:15], v[6:7]
; GFX942-NEXT: global_store_dwordx4 v24, v[12:15], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -4374,8 +4302,7 @@ define void @v_shuffle_v2i64_v8i64__11_3(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[8:23]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v16, v6
-; GFX942-NEXT: v_mov_b32_e32 v17, v7
+; GFX942-NEXT: v_mov_b64_e32 v[16:17], v[6:7]
; GFX942-NEXT: global_store_dwordx4 v24, v[14:17], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -4430,8 +4357,7 @@ define void @v_shuffle_v2i64_v8i64__12_3(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[8:23]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v18, v6
-; GFX942-NEXT: v_mov_b32_e32 v19, v7
+; GFX942-NEXT: v_mov_b64_e32 v[18:19], v[6:7]
; GFX942-NEXT: global_store_dwordx4 v24, v[16:19], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -4486,8 +4412,7 @@ define void @v_shuffle_v2i64_v8i64__13_3(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[8:23]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v20, v6
-; GFX942-NEXT: v_mov_b32_e32 v21, v7
+; GFX942-NEXT: v_mov_b64_e32 v[20:21], v[6:7]
; GFX942-NEXT: global_store_dwordx4 v24, v[18:21], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -4542,8 +4467,7 @@ define void @v_shuffle_v2i64_v8i64__14_3(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[8:23]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v22, v6
-; GFX942-NEXT: v_mov_b32_e32 v23, v7
+; GFX942-NEXT: v_mov_b64_e32 v[22:23], v[6:7]
; GFX942-NEXT: global_store_dwordx4 v24, v[20:23], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -4627,8 +4551,7 @@ define void @v_shuffle_v2i64_v8i64__0_4(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v8
-; GFX942-NEXT: v_mov_b32_e32 v3, v9
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[8:9]
; GFX942-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -4672,8 +4595,7 @@ define void @v_shuffle_v2i64_v8i64__1_4(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v8
-; GFX942-NEXT: v_mov_b32_e32 v5, v9
+; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[8:9]
; GFX942-NEXT: global_store_dwordx4 v16, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -4717,8 +4639,7 @@ define void @v_shuffle_v2i64_v8i64__2_4(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v8
-; GFX942-NEXT: v_mov_b32_e32 v7, v9
+; GFX942-NEXT: v_mov_b64_e32 v[6:7], v[8:9]
; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -4801,8 +4722,7 @@ define void @v_shuffle_v2i64_v8i64__4_4(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v10, v8
-; GFX942-NEXT: v_mov_b32_e32 v11, v9
+; GFX942-NEXT: v_mov_b64_e32 v[10:11], v[8:9]
; GFX942-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -4846,8 +4766,7 @@ define void @v_shuffle_v2i64_v8i64__5_4(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v12, v8
-; GFX942-NEXT: v_mov_b32_e32 v13, v9
+; GFX942-NEXT: v_mov_b64_e32 v[12:13], v[8:9]
; GFX942-NEXT: global_store_dwordx4 v16, v[10:13], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -4891,8 +4810,7 @@ define void @v_shuffle_v2i64_v8i64__6_4(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v14, v8
-; GFX942-NEXT: v_mov_b32_e32 v15, v9
+; GFX942-NEXT: v_mov_b64_e32 v[14:15], v[8:9]
; GFX942-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -4936,8 +4854,7 @@ define void @v_shuffle_v2i64_v8i64__7_4(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v14
-; GFX942-NEXT: v_mov_b32_e32 v7, v15
+; GFX942-NEXT: v_mov_b64_e32 v[6:7], v[14:15]
; GFX942-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -5030,8 +4947,7 @@ define void @v_shuffle_v2i64_v8i64__9_4(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[10:25]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v14, v8
-; GFX942-NEXT: v_mov_b32_e32 v15, v9
+; GFX942-NEXT: v_mov_b64_e32 v[14:15], v[8:9]
; GFX942-NEXT: global_store_dwordx4 v26, v[12:15], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -5086,8 +5002,7 @@ define void @v_shuffle_v2i64_v8i64__10_4(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[10:25]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v16, v8
-; GFX942-NEXT: v_mov_b32_e32 v17, v9
+; GFX942-NEXT: v_mov_b64_e32 v[16:17], v[8:9]
; GFX942-NEXT: global_store_dwordx4 v26, v[14:17], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -5142,8 +5057,7 @@ define void @v_shuffle_v2i64_v8i64__11_4(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[10:25]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v18, v8
-; GFX942-NEXT: v_mov_b32_e32 v19, v9
+; GFX942-NEXT: v_mov_b64_e32 v[18:19], v[8:9]
; GFX942-NEXT: global_store_dwordx4 v26, v[16:19], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -5198,8 +5112,7 @@ define void @v_shuffle_v2i64_v8i64__12_4(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[10:25]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v20, v8
-; GFX942-NEXT: v_mov_b32_e32 v21, v9
+; GFX942-NEXT: v_mov_b64_e32 v[20:21], v[8:9]
; GFX942-NEXT: global_store_dwordx4 v26, v[18:21], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -5254,8 +5167,7 @@ define void @v_shuffle_v2i64_v8i64__13_4(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[10:25]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v22, v8
-; GFX942-NEXT: v_mov_b32_e32 v23, v9
+; GFX942-NEXT: v_mov_b64_e32 v[22:23], v[8:9]
; GFX942-NEXT: global_store_dwordx4 v26, v[20:23], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -5310,8 +5222,7 @@ define void @v_shuffle_v2i64_v8i64__14_4(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[10:25]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v24, v8
-; GFX942-NEXT: v_mov_b32_e32 v25, v9
+; GFX942-NEXT: v_mov_b64_e32 v[24:25], v[8:9]
; GFX942-NEXT: global_store_dwordx4 v26, v[22:25], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -5395,8 +5306,7 @@ define void @v_shuffle_v2i64_v8i64__0_5(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v10
-; GFX942-NEXT: v_mov_b32_e32 v3, v11
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[10:11]
; GFX942-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -5440,8 +5350,7 @@ define void @v_shuffle_v2i64_v8i64__1_5(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v10
-; GFX942-NEXT: v_mov_b32_e32 v5, v11
+; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[10:11]
; GFX942-NEXT: global_store_dwordx4 v16, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -5485,8 +5394,7 @@ define void @v_shuffle_v2i64_v8i64__2_5(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v10
-; GFX942-NEXT: v_mov_b32_e32 v7, v11
+; GFX942-NEXT: v_mov_b64_e32 v[6:7], v[10:11]
; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -5530,8 +5438,7 @@ define void @v_shuffle_v2i64_v8i64__3_5(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v8, v10
-; GFX942-NEXT: v_mov_b32_e32 v9, v11
+; GFX942-NEXT: v_mov_b64_e32 v[8:9], v[10:11]
; GFX942-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -5614,8 +5521,7 @@ define void @v_shuffle_v2i64_v8i64__5_5(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v12, v10
-; GFX942-NEXT: v_mov_b32_e32 v13, v11
+; GFX942-NEXT: v_mov_b64_e32 v[12:13], v[10:11]
; GFX942-NEXT: global_store_dwordx4 v16, v[10:13], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -5659,8 +5565,7 @@ define void @v_shuffle_v2i64_v8i64__6_5(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v14, v10
-; GFX942-NEXT: v_mov_b32_e32 v15, v11
+; GFX942-NEXT: v_mov_b64_e32 v[14:15], v[10:11]
; GFX942-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -5704,8 +5609,7 @@ define void @v_shuffle_v2i64_v8i64__7_5(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v8, v14
-; GFX942-NEXT: v_mov_b32_e32 v9, v15
+; GFX942-NEXT: v_mov_b64_e32 v[8:9], v[14:15]
; GFX942-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -5798,8 +5702,7 @@ define void @v_shuffle_v2i64_v8i64__9_5(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[12:27]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v16, v10
-; GFX942-NEXT: v_mov_b32_e32 v17, v11
+; GFX942-NEXT: v_mov_b64_e32 v[16:17], v[10:11]
; GFX942-NEXT: global_store_dwordx4 v28, v[14:17], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -5854,8 +5757,7 @@ define void @v_shuffle_v2i64_v8i64__10_5(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[12:27]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v18, v10
-; GFX942-NEXT: v_mov_b32_e32 v19, v11
+; GFX942-NEXT: v_mov_b64_e32 v[18:19], v[10:11]
; GFX942-NEXT: global_store_dwordx4 v28, v[16:19], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -5910,8 +5812,7 @@ define void @v_shuffle_v2i64_v8i64__11_5(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[12:27]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v20, v10
-; GFX942-NEXT: v_mov_b32_e32 v21, v11
+; GFX942-NEXT: v_mov_b64_e32 v[20:21], v[10:11]
; GFX942-NEXT: global_store_dwordx4 v28, v[18:21], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -5966,8 +5867,7 @@ define void @v_shuffle_v2i64_v8i64__12_5(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[12:27]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v22, v10
-; GFX942-NEXT: v_mov_b32_e32 v23, v11
+; GFX942-NEXT: v_mov_b64_e32 v[22:23], v[10:11]
; GFX942-NEXT: global_store_dwordx4 v28, v[20:23], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -6022,8 +5922,7 @@ define void @v_shuffle_v2i64_v8i64__13_5(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[12:27]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v24, v10
-; GFX942-NEXT: v_mov_b32_e32 v25, v11
+; GFX942-NEXT: v_mov_b64_e32 v[24:25], v[10:11]
; GFX942-NEXT: global_store_dwordx4 v28, v[22:25], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -6078,8 +5977,7 @@ define void @v_shuffle_v2i64_v8i64__14_5(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[12:27]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v26, v10
-; GFX942-NEXT: v_mov_b32_e32 v27, v11
+; GFX942-NEXT: v_mov_b64_e32 v[26:27], v[10:11]
; GFX942-NEXT: global_store_dwordx4 v28, v[24:27], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -6163,8 +6061,7 @@ define void @v_shuffle_v2i64_v8i64__0_6(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v12
-; GFX942-NEXT: v_mov_b32_e32 v3, v13
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[12:13]
; GFX942-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -6208,8 +6105,7 @@ define void @v_shuffle_v2i64_v8i64__1_6(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v12
-; GFX942-NEXT: v_mov_b32_e32 v5, v13
+; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[12:13]
; GFX942-NEXT: global_store_dwordx4 v16, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -6253,8 +6149,7 @@ define void @v_shuffle_v2i64_v8i64__2_6(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v12
-; GFX942-NEXT: v_mov_b32_e32 v7, v13
+; GFX942-NEXT: v_mov_b64_e32 v[6:7], v[12:13]
; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -6298,8 +6193,7 @@ define void @v_shuffle_v2i64_v8i64__3_6(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v8, v12
-; GFX942-NEXT: v_mov_b32_e32 v9, v13
+; GFX942-NEXT: v_mov_b64_e32 v[8:9], v[12:13]
; GFX942-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -6343,8 +6237,7 @@ define void @v_shuffle_v2i64_v8i64__4_6(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v10, v12
-; GFX942-NEXT: v_mov_b32_e32 v11, v13
+; GFX942-NEXT: v_mov_b64_e32 v[10:11], v[12:13]
; GFX942-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -6427,8 +6320,7 @@ define void @v_shuffle_v2i64_v8i64__6_6(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v14, v12
-; GFX942-NEXT: v_mov_b32_e32 v15, v13
+; GFX942-NEXT: v_mov_b64_e32 v[14:15], v[12:13]
; GFX942-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -6472,8 +6364,7 @@ define void @v_shuffle_v2i64_v8i64__7_6(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v10, v14
-; GFX942-NEXT: v_mov_b32_e32 v11, v15
+; GFX942-NEXT: v_mov_b64_e32 v[10:11], v[14:15]
; GFX942-NEXT: global_store_dwordx4 v16, v[10:13], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -6566,8 +6457,7 @@ define void @v_shuffle_v2i64_v8i64__9_6(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[14:29]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v18, v12
-; GFX942-NEXT: v_mov_b32_e32 v19, v13
+; GFX942-NEXT: v_mov_b64_e32 v[18:19], v[12:13]
; GFX942-NEXT: global_store_dwordx4 v30, v[16:19], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -6622,8 +6512,7 @@ define void @v_shuffle_v2i64_v8i64__10_6(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[14:29]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v20, v12
-; GFX942-NEXT: v_mov_b32_e32 v21, v13
+; GFX942-NEXT: v_mov_b64_e32 v[20:21], v[12:13]
; GFX942-NEXT: global_store_dwordx4 v30, v[18:21], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -6678,8 +6567,7 @@ define void @v_shuffle_v2i64_v8i64__11_6(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[14:29]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v22, v12
-; GFX942-NEXT: v_mov_b32_e32 v23, v13
+; GFX942-NEXT: v_mov_b64_e32 v[22:23], v[12:13]
; GFX942-NEXT: global_store_dwordx4 v30, v[20:23], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -6734,8 +6622,7 @@ define void @v_shuffle_v2i64_v8i64__12_6(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[14:29]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v24, v12
-; GFX942-NEXT: v_mov_b32_e32 v25, v13
+; GFX942-NEXT: v_mov_b64_e32 v[24:25], v[12:13]
; GFX942-NEXT: global_store_dwordx4 v30, v[22:25], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -6790,8 +6677,7 @@ define void @v_shuffle_v2i64_v8i64__13_6(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[14:29]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v26, v12
-; GFX942-NEXT: v_mov_b32_e32 v27, v13
+; GFX942-NEXT: v_mov_b64_e32 v[26:27], v[12:13]
; GFX942-NEXT: global_store_dwordx4 v30, v[24:27], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -6846,8 +6732,7 @@ define void @v_shuffle_v2i64_v8i64__14_6(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[14:29]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v28, v12
-; GFX942-NEXT: v_mov_b32_e32 v29, v13
+; GFX942-NEXT: v_mov_b64_e32 v[28:29], v[12:13]
; GFX942-NEXT: global_store_dwordx4 v30, v[26:29], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -6931,8 +6816,7 @@ define void @v_shuffle_v2i64_v8i64__0_7(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v14
-; GFX942-NEXT: v_mov_b32_e32 v3, v15
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[14:15]
; GFX942-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -6976,8 +6860,7 @@ define void @v_shuffle_v2i64_v8i64__1_7(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v14
-; GFX942-NEXT: v_mov_b32_e32 v5, v15
+; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[14:15]
; GFX942-NEXT: global_store_dwordx4 v16, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -7021,8 +6904,7 @@ define void @v_shuffle_v2i64_v8i64__2_7(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v14
-; GFX942-NEXT: v_mov_b32_e32 v7, v15
+; GFX942-NEXT: v_mov_b64_e32 v[6:7], v[14:15]
; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -7066,8 +6948,7 @@ define void @v_shuffle_v2i64_v8i64__3_7(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v8, v14
-; GFX942-NEXT: v_mov_b32_e32 v9, v15
+; GFX942-NEXT: v_mov_b64_e32 v[8:9], v[14:15]
; GFX942-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -7111,8 +6992,7 @@ define void @v_shuffle_v2i64_v8i64__4_7(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v10, v14
-; GFX942-NEXT: v_mov_b32_e32 v11, v15
+; GFX942-NEXT: v_mov_b64_e32 v[10:11], v[14:15]
; GFX942-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -7156,8 +7036,7 @@ define void @v_shuffle_v2i64_v8i64__5_7(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v12, v14
-; GFX942-NEXT: v_mov_b32_e32 v13, v15
+; GFX942-NEXT: v_mov_b64_e32 v[12:13], v[14:15]
; GFX942-NEXT: global_store_dwordx4 v16, v[10:13], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -7240,8 +7119,7 @@ define void @v_shuffle_v2i64_v8i64__7_7(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v12, v14
-; GFX942-NEXT: v_mov_b32_e32 v13, v15
+; GFX942-NEXT: v_mov_b64_e32 v[12:13], v[14:15]
; GFX942-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -7334,8 +7212,7 @@ define void @v_shuffle_v2i64_v8i64__9_7(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v20, v14
-; GFX942-NEXT: v_mov_b32_e32 v21, v15
+; GFX942-NEXT: v_mov_b64_e32 v[20:21], v[14:15]
; GFX942-NEXT: global_store_dwordx4 v32, v[18:21], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -7390,8 +7267,7 @@ define void @v_shuffle_v2i64_v8i64__10_7(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v22, v14
-; GFX942-NEXT: v_mov_b32_e32 v23, v15
+; GFX942-NEXT: v_mov_b64_e32 v[22:23], v[14:15]
; GFX942-NEXT: global_store_dwordx4 v32, v[20:23], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -7446,8 +7322,7 @@ define void @v_shuffle_v2i64_v8i64__11_7(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v24, v14
-; GFX942-NEXT: v_mov_b32_e32 v25, v15
+; GFX942-NEXT: v_mov_b64_e32 v[24:25], v[14:15]
; GFX942-NEXT: global_store_dwordx4 v32, v[22:25], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -7502,8 +7377,7 @@ define void @v_shuffle_v2i64_v8i64__12_7(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v26, v14
-; GFX942-NEXT: v_mov_b32_e32 v27, v15
+; GFX942-NEXT: v_mov_b64_e32 v[26:27], v[14:15]
; GFX942-NEXT: global_store_dwordx4 v32, v[24:27], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -7558,8 +7432,7 @@ define void @v_shuffle_v2i64_v8i64__13_7(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v28, v14
-; GFX942-NEXT: v_mov_b32_e32 v29, v15
+; GFX942-NEXT: v_mov_b64_e32 v[28:29], v[14:15]
; GFX942-NEXT: global_store_dwordx4 v32, v[26:29], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -7614,8 +7487,7 @@ define void @v_shuffle_v2i64_v8i64__14_7(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v30, v14
-; GFX942-NEXT: v_mov_b32_e32 v31, v15
+; GFX942-NEXT: v_mov_b64_e32 v[30:31], v[14:15]
; GFX942-NEXT: global_store_dwordx4 v32, v[28:31], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -7944,8 +7816,7 @@ define void @v_shuffle_v2i64_v8i64__7_8(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v14
-; GFX942-NEXT: v_mov_b32_e32 v1, v15
+; GFX942-NEXT: v_mov_b64_e32 v[0:1], v[14:15]
; GFX942-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -8000,8 +7871,7 @@ define void @v_shuffle_v2i64_v8i64__9_8(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[0:1]
; GFX942-NEXT: global_store_dwordx4 v16, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -8046,8 +7916,7 @@ define void @v_shuffle_v2i64_v8i64__10_8(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v0
-; GFX942-NEXT: v_mov_b32_e32 v7, v1
+; GFX942-NEXT: v_mov_b64_e32 v[6:7], v[0:1]
; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -8092,8 +7961,7 @@ define void @v_shuffle_v2i64_v8i64__11_8(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v8, v0
-; GFX942-NEXT: v_mov_b32_e32 v9, v1
+; GFX942-NEXT: v_mov_b64_e32 v[8:9], v[0:1]
; GFX942-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -8138,8 +8006,7 @@ define void @v_shuffle_v2i64_v8i64__12_8(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v10, v0
-; GFX942-NEXT: v_mov_b32_e32 v11, v1
+; GFX942-NEXT: v_mov_b64_e32 v[10:11], v[0:1]
; GFX942-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -8184,8 +8051,7 @@ define void @v_shuffle_v2i64_v8i64__13_8(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v12, v0
-; GFX942-NEXT: v_mov_b32_e32 v13, v1
+; GFX942-NEXT: v_mov_b64_e32 v[12:13], v[0:1]
; GFX942-NEXT: global_store_dwordx4 v16, v[10:13], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -8230,8 +8096,7 @@ define void @v_shuffle_v2i64_v8i64__14_8(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v14, v0
-; GFX942-NEXT: v_mov_b32_e32 v15, v1
+; GFX942-NEXT: v_mov_b64_e32 v[14:15], v[0:1]
; GFX942-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -8326,8 +8191,7 @@ define void @v_shuffle_v2i64_v8i64__0_9(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[2:17]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[4:5]
; GFX942-NEXT: global_store_dwordx4 v18, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -8382,8 +8246,7 @@ define void @v_shuffle_v2i64_v8i64__1_9(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[4:19]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v7
+; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[6:7]
; GFX942-NEXT: global_store_dwordx4 v20, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -8438,8 +8301,7 @@ define void @v_shuffle_v2i64_v8i64__2_9(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[6:21]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v8
-; GFX942-NEXT: v_mov_b32_e32 v7, v9
+; GFX942-NEXT: v_mov_b64_e32 v[6:7], v[8:9]
; GFX942-NEXT: global_store_dwordx4 v22, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -8494,8 +8356,7 @@ define void @v_shuffle_v2i64_v8i64__3_9(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[8:23]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v8, v10
-; GFX942-NEXT: v_mov_b32_e32 v9, v11
+; GFX942-NEXT: v_mov_b64_e32 v[8:9], v[10:11]
; GFX942-NEXT: global_store_dwordx4 v24, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -8550,8 +8411,7 @@ define void @v_shuffle_v2i64_v8i64__4_9(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[10:25]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v10, v12
-; GFX942-NEXT: v_mov_b32_e32 v11, v13
+; GFX942-NEXT: v_mov_b64_e32 v[10:11], v[12:13]
; GFX942-NEXT: global_store_dwordx4 v26, v[8:11], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -8606,8 +8466,7 @@ define void @v_shuffle_v2i64_v8i64__5_9(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[12:27]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v12, v14
-; GFX942-NEXT: v_mov_b32_e32 v13, v15
+; GFX942-NEXT: v_mov_b64_e32 v[12:13], v[14:15]
; GFX942-NEXT: global_store_dwordx4 v28, v[10:13], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -8662,8 +8521,7 @@ define void @v_shuffle_v2i64_v8i64__6_9(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[14:29]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v14, v16
-; GFX942-NEXT: v_mov_b32_e32 v15, v17
+; GFX942-NEXT: v_mov_b64_e32 v[14:15], v[16:17]
; GFX942-NEXT: global_store_dwordx4 v30, v[12:15], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -8718,8 +8576,7 @@ define void @v_shuffle_v2i64_v8i64__7_9(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v16
-; GFX942-NEXT: v_mov_b32_e32 v1, v17
+; GFX942-NEXT: v_mov_b64_e32 v[0:1], v[16:17]
; GFX942-NEXT: global_store_dwordx4 v18, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -8804,8 +8661,7 @@ define void @v_shuffle_v2i64_v8i64__9_9(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v2
-; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[2:3]
; GFX942-NEXT: global_store_dwordx4 v16, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -8850,8 +8706,7 @@ define void @v_shuffle_v2i64_v8i64__10_9(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v2
-; GFX942-NEXT: v_mov_b32_e32 v7, v3
+; GFX942-NEXT: v_mov_b64_e32 v[6:7], v[2:3]
; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -8896,8 +8751,7 @@ define void @v_shuffle_v2i64_v8i64__11_9(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v8, v2
-; GFX942-NEXT: v_mov_b32_e32 v9, v3
+; GFX942-NEXT: v_mov_b64_e32 v[8:9], v[2:3]
; GFX942-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -8942,8 +8796,7 @@ define void @v_shuffle_v2i64_v8i64__12_9(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v10, v2
-; GFX942-NEXT: v_mov_b32_e32 v11, v3
+; GFX942-NEXT: v_mov_b64_e32 v[10:11], v[2:3]
; GFX942-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -8988,8 +8841,7 @@ define void @v_shuffle_v2i64_v8i64__13_9(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v12, v2
-; GFX942-NEXT: v_mov_b32_e32 v13, v3
+; GFX942-NEXT: v_mov_b64_e32 v[12:13], v[2:3]
; GFX942-NEXT: global_store_dwordx4 v16, v[10:13], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -9034,8 +8886,7 @@ define void @v_shuffle_v2i64_v8i64__14_9(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v14, v2
-; GFX942-NEXT: v_mov_b32_e32 v15, v3
+; GFX942-NEXT: v_mov_b64_e32 v[14:15], v[2:3]
; GFX942-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -9130,8 +8981,7 @@ define void @v_shuffle_v2i64_v8i64__0_10(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[2:17]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v6
-; GFX942-NEXT: v_mov_b32_e32 v3, v7
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[6:7]
; GFX942-NEXT: global_store_dwordx4 v18, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -9186,8 +9036,7 @@ define void @v_shuffle_v2i64_v8i64__1_10(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[4:19]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v8
-; GFX942-NEXT: v_mov_b32_e32 v5, v9
+; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[8:9]
; GFX942-NEXT: global_store_dwordx4 v20, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -9242,8 +9091,7 @@ define void @v_shuffle_v2i64_v8i64__2_10(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[6:21]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v10
-; GFX942-NEXT: v_mov_b32_e32 v7, v11
+; GFX942-NEXT: v_mov_b64_e32 v[6:7], v[10:11]
; GFX942-NEXT: global_store_dwordx4 v22, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -9298,8 +9146,7 @@ define void @v_shuffle_v2i64_v8i64__3_10(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[8:23]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v8, v12
-; GFX942-NEXT: v_mov_b32_e32 v9, v13
+; GFX942-NEXT: v_mov_b64_e32 v[8:9], v[12:13]
; GFX942-NEXT: global_store_dwordx4 v24, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -9354,8 +9201,7 @@ define void @v_shuffle_v2i64_v8i64__4_10(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[10:25]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v10, v14
-; GFX942-NEXT: v_mov_b32_e32 v11, v15
+; GFX942-NEXT: v_mov_b64_e32 v[10:11], v[14:15]
; GFX942-NEXT: global_store_dwordx4 v26, v[8:11], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -9410,8 +9256,7 @@ define void @v_shuffle_v2i64_v8i64__5_10(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[12:27]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v12, v16
-; GFX942-NEXT: v_mov_b32_e32 v13, v17
+; GFX942-NEXT: v_mov_b64_e32 v[12:13], v[16:17]
; GFX942-NEXT: global_store_dwordx4 v28, v[10:13], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -9466,8 +9311,7 @@ define void @v_shuffle_v2i64_v8i64__6_10(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[14:29]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v14, v18
-; GFX942-NEXT: v_mov_b32_e32 v15, v19
+; GFX942-NEXT: v_mov_b64_e32 v[14:15], v[18:19]
; GFX942-NEXT: global_store_dwordx4 v30, v[12:15], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -9522,8 +9366,7 @@ define void @v_shuffle_v2i64_v8i64__7_10(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v16
-; GFX942-NEXT: v_mov_b32_e32 v3, v17
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[16:17]
; GFX942-NEXT: global_store_dwordx4 v18, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -9568,8 +9411,7 @@ define void @v_shuffle_v2i64_v8i64__8_10(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[4:5]
; GFX942-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -9654,8 +9496,7 @@ define void @v_shuffle_v2i64_v8i64__10_10(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v4
-; GFX942-NEXT: v_mov_b32_e32 v7, v5
+; GFX942-NEXT: v_mov_b64_e32 v[6:7], v[4:5]
; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -9700,8 +9541,7 @@ define void @v_shuffle_v2i64_v8i64__11_10(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v8, v4
-; GFX942-NEXT: v_mov_b32_e32 v9, v5
+; GFX942-NEXT: v_mov_b64_e32 v[8:9], v[4:5]
; GFX942-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -9746,8 +9586,7 @@ define void @v_shuffle_v2i64_v8i64__12_10(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v10, v4
-; GFX942-NEXT: v_mov_b32_e32 v11, v5
+; GFX942-NEXT: v_mov_b64_e32 v[10:11], v[4:5]
; GFX942-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -9792,8 +9631,7 @@ define void @v_shuffle_v2i64_v8i64__13_10(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v12, v4
-; GFX942-NEXT: v_mov_b32_e32 v13, v5
+; GFX942-NEXT: v_mov_b64_e32 v[12:13], v[4:5]
; GFX942-NEXT: global_store_dwordx4 v16, v[10:13], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -9838,8 +9676,7 @@ define void @v_shuffle_v2i64_v8i64__14_10(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v14, v4
-; GFX942-NEXT: v_mov_b32_e32 v15, v5
+; GFX942-NEXT: v_mov_b64_e32 v[14:15], v[4:5]
; GFX942-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -9934,8 +9771,7 @@ define void @v_shuffle_v2i64_v8i64__0_11(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[2:17]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v8
-; GFX942-NEXT: v_mov_b32_e32 v3, v9
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[8:9]
; GFX942-NEXT: global_store_dwordx4 v18, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -9990,8 +9826,7 @@ define void @v_shuffle_v2i64_v8i64__1_11(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[4:19]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v10
-; GFX942-NEXT: v_mov_b32_e32 v5, v11
+; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[10:11]
; GFX942-NEXT: global_store_dwordx4 v20, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -10046,8 +9881,7 @@ define void @v_shuffle_v2i64_v8i64__2_11(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[6:21]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v12
-; GFX942-NEXT: v_mov_b32_e32 v7, v13
+; GFX942-NEXT: v_mov_b64_e32 v[6:7], v[12:13]
; GFX942-NEXT: global_store_dwordx4 v22, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -10102,8 +9936,7 @@ define void @v_shuffle_v2i64_v8i64__3_11(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[8:23]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v8, v14
-; GFX942-NEXT: v_mov_b32_e32 v9, v15
+; GFX942-NEXT: v_mov_b64_e32 v[8:9], v[14:15]
; GFX942-NEXT: global_store_dwordx4 v24, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -10158,8 +9991,7 @@ define void @v_shuffle_v2i64_v8i64__4_11(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[10:25]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v10, v16
-; GFX942-NEXT: v_mov_b32_e32 v11, v17
+; GFX942-NEXT: v_mov_b64_e32 v[10:11], v[16:17]
; GFX942-NEXT: global_store_dwordx4 v26, v[8:11], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -10214,8 +10046,7 @@ define void @v_shuffle_v2i64_v8i64__5_11(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[12:27]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v12, v18
-; GFX942-NEXT: v_mov_b32_e32 v13, v19
+; GFX942-NEXT: v_mov_b64_e32 v[12:13], v[18:19]
; GFX942-NEXT: global_store_dwordx4 v28, v[10:13], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -10270,8 +10101,7 @@ define void @v_shuffle_v2i64_v8i64__6_11(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[14:29]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v14, v20
-; GFX942-NEXT: v_mov_b32_e32 v15, v21
+; GFX942-NEXT: v_mov_b64_e32 v[14:15], v[20:21]
; GFX942-NEXT: global_store_dwordx4 v30, v[12:15], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -10326,8 +10156,7 @@ define void @v_shuffle_v2i64_v8i64__7_11(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v16
-; GFX942-NEXT: v_mov_b32_e32 v5, v17
+; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[16:17]
; GFX942-NEXT: global_store_dwordx4 v18, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -10372,8 +10201,7 @@ define void @v_shuffle_v2i64_v8i64__8_11(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v6
-; GFX942-NEXT: v_mov_b32_e32 v3, v7
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[6:7]
; GFX942-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -10418,8 +10246,7 @@ define void @v_shuffle_v2i64_v8i64__9_11(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v7
+; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[6:7]
; GFX942-NEXT: global_store_dwordx4 v16, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -10504,8 +10331,7 @@ define void @v_shuffle_v2i64_v8i64__11_11(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v8, v6
-; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: v_mov_b64_e32 v[8:9], v[6:7]
; GFX942-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -10550,8 +10376,7 @@ define void @v_shuffle_v2i64_v8i64__12_11(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v10, v6
-; GFX942-NEXT: v_mov_b32_e32 v11, v7
+; GFX942-NEXT: v_mov_b64_e32 v[10:11], v[6:7]
; GFX942-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -10596,8 +10421,7 @@ define void @v_shuffle_v2i64_v8i64__13_11(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v12, v6
-; GFX942-NEXT: v_mov_b32_e32 v13, v7
+; GFX942-NEXT: v_mov_b64_e32 v[12:13], v[6:7]
; GFX942-NEXT: global_store_dwordx4 v16, v[10:13], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -10642,8 +10466,7 @@ define void @v_shuffle_v2i64_v8i64__14_11(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v14, v6
-; GFX942-NEXT: v_mov_b32_e32 v15, v7
+; GFX942-NEXT: v_mov_b64_e32 v[14:15], v[6:7]
; GFX942-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -10738,8 +10561,7 @@ define void @v_shuffle_v2i64_v8i64__0_12(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[2:17]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v10
-; GFX942-NEXT: v_mov_b32_e32 v3, v11
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[10:11]
; GFX942-NEXT: global_store_dwordx4 v18, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -10794,8 +10616,7 @@ define void @v_shuffle_v2i64_v8i64__1_12(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[4:19]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v12
-; GFX942-NEXT: v_mov_b32_e32 v5, v13
+; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[12:13]
; GFX942-NEXT: global_store_dwordx4 v20, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -10850,8 +10671,7 @@ define void @v_shuffle_v2i64_v8i64__2_12(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[6:21]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v14
-; GFX942-NEXT: v_mov_b32_e32 v7, v15
+; GFX942-NEXT: v_mov_b64_e32 v[6:7], v[14:15]
; GFX942-NEXT: global_store_dwordx4 v22, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -10906,8 +10726,7 @@ define void @v_shuffle_v2i64_v8i64__3_12(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[8:23]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v8, v16
-; GFX942-NEXT: v_mov_b32_e32 v9, v17
+; GFX942-NEXT: v_mov_b64_e32 v[8:9], v[16:17]
; GFX942-NEXT: global_store_dwordx4 v24, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -10962,8 +10781,7 @@ define void @v_shuffle_v2i64_v8i64__4_12(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[10:25]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v10, v18
-; GFX942-NEXT: v_mov_b32_e32 v11, v19
+; GFX942-NEXT: v_mov_b64_e32 v[10:11], v[18:19]
; GFX942-NEXT: global_store_dwordx4 v26, v[8:11], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -11018,8 +10836,7 @@ define void @v_shuffle_v2i64_v8i64__5_12(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[12:27]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v12, v20
-; GFX942-NEXT: v_mov_b32_e32 v13, v21
+; GFX942-NEXT: v_mov_b64_e32 v[12:13], v[20:21]
; GFX942-NEXT: global_store_dwordx4 v28, v[10:13], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -11074,8 +10891,7 @@ define void @v_shuffle_v2i64_v8i64__6_12(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[14:29]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v14, v22
-; GFX942-NEXT: v_mov_b32_e32 v15, v23
+; GFX942-NEXT: v_mov_b64_e32 v[14:15], v[22:23]
; GFX942-NEXT: global_store_dwordx4 v30, v[12:15], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -11130,8 +10946,7 @@ define void @v_shuffle_v2i64_v8i64__7_12(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v16
-; GFX942-NEXT: v_mov_b32_e32 v7, v17
+; GFX942-NEXT: v_mov_b64_e32 v[6:7], v[16:17]
; GFX942-NEXT: global_store_dwordx4 v18, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -11176,8 +10991,7 @@ define void @v_shuffle_v2i64_v8i64__8_12(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v8
-; GFX942-NEXT: v_mov_b32_e32 v3, v9
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[8:9]
; GFX942-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -11222,8 +11036,7 @@ define void @v_shuffle_v2i64_v8i64__9_12(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v8
-; GFX942-NEXT: v_mov_b32_e32 v5, v9
+; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[8:9]
; GFX942-NEXT: global_store_dwordx4 v16, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -11268,8 +11081,7 @@ define void @v_shuffle_v2i64_v8i64__10_12(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v8
-; GFX942-NEXT: v_mov_b32_e32 v7, v9
+; GFX942-NEXT: v_mov_b64_e32 v[6:7], v[8:9]
; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -11354,8 +11166,7 @@ define void @v_shuffle_v2i64_v8i64__12_12(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v10, v8
-; GFX942-NEXT: v_mov_b32_e32 v11, v9
+; GFX942-NEXT: v_mov_b64_e32 v[10:11], v[8:9]
; GFX942-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -11400,8 +11211,7 @@ define void @v_shuffle_v2i64_v8i64__13_12(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v12, v8
-; GFX942-NEXT: v_mov_b32_e32 v13, v9
+; GFX942-NEXT: v_mov_b64_e32 v[12:13], v[8:9]
; GFX942-NEXT: global_store_dwordx4 v16, v[10:13], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -11446,8 +11256,7 @@ define void @v_shuffle_v2i64_v8i64__14_12(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v14, v8
-; GFX942-NEXT: v_mov_b32_e32 v15, v9
+; GFX942-NEXT: v_mov_b64_e32 v[14:15], v[8:9]
; GFX942-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -11542,8 +11351,7 @@ define void @v_shuffle_v2i64_v8i64__0_13(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[2:17]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v12
-; GFX942-NEXT: v_mov_b32_e32 v3, v13
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[12:13]
; GFX942-NEXT: global_store_dwordx4 v18, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -11598,8 +11406,7 @@ define void @v_shuffle_v2i64_v8i64__1_13(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[4:19]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v14
-; GFX942-NEXT: v_mov_b32_e32 v5, v15
+; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[14:15]
; GFX942-NEXT: global_store_dwordx4 v20, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -11654,8 +11461,7 @@ define void @v_shuffle_v2i64_v8i64__2_13(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[6:21]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v16
-; GFX942-NEXT: v_mov_b32_e32 v7, v17
+; GFX942-NEXT: v_mov_b64_e32 v[6:7], v[16:17]
; GFX942-NEXT: global_store_dwordx4 v22, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -11710,8 +11516,7 @@ define void @v_shuffle_v2i64_v8i64__3_13(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[8:23]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v8, v18
-; GFX942-NEXT: v_mov_b32_e32 v9, v19
+; GFX942-NEXT: v_mov_b64_e32 v[8:9], v[18:19]
; GFX942-NEXT: global_store_dwordx4 v24, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -11766,8 +11571,7 @@ define void @v_shuffle_v2i64_v8i64__4_13(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[10:25]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v10, v20
-; GFX942-NEXT: v_mov_b32_e32 v11, v21
+; GFX942-NEXT: v_mov_b64_e32 v[10:11], v[20:21]
; GFX942-NEXT: global_store_dwordx4 v26, v[8:11], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -11822,8 +11626,7 @@ define void @v_shuffle_v2i64_v8i64__5_13(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[12:27]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v12, v22
-; GFX942-NEXT: v_mov_b32_e32 v13, v23
+; GFX942-NEXT: v_mov_b64_e32 v[12:13], v[22:23]
; GFX942-NEXT: global_store_dwordx4 v28, v[10:13], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -11878,8 +11681,7 @@ define void @v_shuffle_v2i64_v8i64__6_13(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[14:29]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v14, v24
-; GFX942-NEXT: v_mov_b32_e32 v15, v25
+; GFX942-NEXT: v_mov_b64_e32 v[14:15], v[24:25]
; GFX942-NEXT: global_store_dwordx4 v30, v[12:15], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -11934,8 +11736,7 @@ define void @v_shuffle_v2i64_v8i64__7_13(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v8, v16
-; GFX942-NEXT: v_mov_b32_e32 v9, v17
+; GFX942-NEXT: v_mov_b64_e32 v[8:9], v[16:17]
; GFX942-NEXT: global_store_dwordx4 v18, v[8:11], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -11980,8 +11781,7 @@ define void @v_shuffle_v2i64_v8i64__8_13(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v10
-; GFX942-NEXT: v_mov_b32_e32 v3, v11
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[10:11]
; GFX942-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -12026,8 +11826,7 @@ define void @v_shuffle_v2i64_v8i64__9_13(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v10
-; GFX942-NEXT: v_mov_b32_e32 v5, v11
+; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[10:11]
; GFX942-NEXT: global_store_dwordx4 v16, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -12072,8 +11871,7 @@ define void @v_shuffle_v2i64_v8i64__10_13(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v10
-; GFX942-NEXT: v_mov_b32_e32 v7, v11
+; GFX942-NEXT: v_mov_b64_e32 v[6:7], v[10:11]
; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -12118,8 +11916,7 @@ define void @v_shuffle_v2i64_v8i64__11_13(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v8, v10
-; GFX942-NEXT: v_mov_b32_e32 v9, v11
+; GFX942-NEXT: v_mov_b64_e32 v[8:9], v[10:11]
; GFX942-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -12204,8 +12001,7 @@ define void @v_shuffle_v2i64_v8i64__13_13(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v12, v10
-; GFX942-NEXT: v_mov_b32_e32 v13, v11
+; GFX942-NEXT: v_mov_b64_e32 v[12:13], v[10:11]
; GFX942-NEXT: global_store_dwordx4 v16, v[10:13], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -12250,8 +12046,7 @@ define void @v_shuffle_v2i64_v8i64__14_13(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v14, v10
-; GFX942-NEXT: v_mov_b32_e32 v15, v11
+; GFX942-NEXT: v_mov_b64_e32 v[14:15], v[10:11]
; GFX942-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -12346,8 +12141,7 @@ define void @v_shuffle_v2i64_v8i64__0_14(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[2:17]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v14
-; GFX942-NEXT: v_mov_b32_e32 v3, v15
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[14:15]
; GFX942-NEXT: global_store_dwordx4 v18, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -12402,8 +12196,7 @@ define void @v_shuffle_v2i64_v8i64__1_14(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[4:19]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v16
-; GFX942-NEXT: v_mov_b32_e32 v5, v17
+; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[16:17]
; GFX942-NEXT: global_store_dwordx4 v20, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -12458,8 +12251,7 @@ define void @v_shuffle_v2i64_v8i64__2_14(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[6:21]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v18
-; GFX942-NEXT: v_mov_b32_e32 v7, v19
+; GFX942-NEXT: v_mov_b64_e32 v[6:7], v[18:19]
; GFX942-NEXT: global_store_dwordx4 v22, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -12514,8 +12306,7 @@ define void @v_shuffle_v2i64_v8i64__3_14(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[8:23]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v8, v20
-; GFX942-NEXT: v_mov_b32_e32 v9, v21
+; GFX942-NEXT: v_mov_b64_e32 v[8:9], v[20:21]
; GFX942-NEXT: global_store_dwordx4 v24, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -12570,8 +12361,7 @@ define void @v_shuffle_v2i64_v8i64__4_14(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[10:25]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v10, v22
-; GFX942-NEXT: v_mov_b32_e32 v11, v23
+; GFX942-NEXT: v_mov_b64_e32 v[10:11], v[22:23]
; GFX942-NEXT: global_store_dwordx4 v26, v[8:11], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -12626,8 +12416,7 @@ define void @v_shuffle_v2i64_v8i64__5_14(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[12:27]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v12, v24
-; GFX942-NEXT: v_mov_b32_e32 v13, v25
+; GFX942-NEXT: v_mov_b64_e32 v[12:13], v[24:25]
; GFX942-NEXT: global_store_dwordx4 v28, v[10:13], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -12682,8 +12471,7 @@ define void @v_shuffle_v2i64_v8i64__6_14(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[14:29]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v14, v26
-; GFX942-NEXT: v_mov_b32_e32 v15, v27
+; GFX942-NEXT: v_mov_b64_e32 v[14:15], v[26:27]
; GFX942-NEXT: global_store_dwordx4 v30, v[12:15], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -12738,8 +12526,7 @@ define void @v_shuffle_v2i64_v8i64__7_14(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v10, v16
-; GFX942-NEXT: v_mov_b32_e32 v11, v17
+; GFX942-NEXT: v_mov_b64_e32 v[10:11], v[16:17]
; GFX942-NEXT: global_store_dwordx4 v18, v[10:13], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -12784,8 +12571,7 @@ define void @v_shuffle_v2i64_v8i64__8_14(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v12
-; GFX942-NEXT: v_mov_b32_e32 v3, v13
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[12:13]
; GFX942-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -12830,8 +12616,7 @@ define void @v_shuffle_v2i64_v8i64__9_14(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v12
-; GFX942-NEXT: v_mov_b32_e32 v5, v13
+; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[12:13]
; GFX942-NEXT: global_store_dwordx4 v16, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -12876,8 +12661,7 @@ define void @v_shuffle_v2i64_v8i64__10_14(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v12
-; GFX942-NEXT: v_mov_b32_e32 v7, v13
+; GFX942-NEXT: v_mov_b64_e32 v[6:7], v[12:13]
; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -12922,8 +12706,7 @@ define void @v_shuffle_v2i64_v8i64__11_14(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v8, v12
-; GFX942-NEXT: v_mov_b32_e32 v9, v13
+; GFX942-NEXT: v_mov_b64_e32 v[8:9], v[12:13]
; GFX942-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -12968,8 +12751,7 @@ define void @v_shuffle_v2i64_v8i64__12_14(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v10, v12
-; GFX942-NEXT: v_mov_b32_e32 v11, v13
+; GFX942-NEXT: v_mov_b64_e32 v[10:11], v[12:13]
; GFX942-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -13054,8 +12836,7 @@ define void @v_shuffle_v2i64_v8i64__14_14(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v14, v12
-; GFX942-NEXT: v_mov_b32_e32 v15, v13
+; GFX942-NEXT: v_mov_b64_e32 v[14:15], v[12:13]
; GFX942-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -13150,8 +12931,7 @@ define void @v_shuffle_v2i64_v8i64__0_15(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[2:17]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v16
-; GFX942-NEXT: v_mov_b32_e32 v3, v17
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[16:17]
; GFX942-NEXT: global_store_dwordx4 v18, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -13206,8 +12986,7 @@ define void @v_shuffle_v2i64_v8i64__1_15(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[4:19]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v18
-; GFX942-NEXT: v_mov_b32_e32 v5, v19
+; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[18:19]
; GFX942-NEXT: global_store_dwordx4 v20, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -13262,8 +13041,7 @@ define void @v_shuffle_v2i64_v8i64__2_15(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[6:21]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v20
-; GFX942-NEXT: v_mov_b32_e32 v7, v21
+; GFX942-NEXT: v_mov_b64_e32 v[6:7], v[20:21]
; GFX942-NEXT: global_store_dwordx4 v22, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -13318,8 +13096,7 @@ define void @v_shuffle_v2i64_v8i64__3_15(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[8:23]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v8, v22
-; GFX942-NEXT: v_mov_b32_e32 v9, v23
+; GFX942-NEXT: v_mov_b64_e32 v[8:9], v[22:23]
; GFX942-NEXT: global_store_dwordx4 v24, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -13374,8 +13151,7 @@ define void @v_shuffle_v2i64_v8i64__4_15(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[10:25]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v10, v24
-; GFX942-NEXT: v_mov_b32_e32 v11, v25
+; GFX942-NEXT: v_mov_b64_e32 v[10:11], v[24:25]
; GFX942-NEXT: global_store_dwordx4 v26, v[8:11], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -13430,8 +13206,7 @@ define void @v_shuffle_v2i64_v8i64__5_15(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[12:27]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v12, v26
-; GFX942-NEXT: v_mov_b32_e32 v13, v27
+; GFX942-NEXT: v_mov_b64_e32 v[12:13], v[26:27]
; GFX942-NEXT: global_store_dwordx4 v28, v[10:13], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -13486,8 +13261,7 @@ define void @v_shuffle_v2i64_v8i64__6_15(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[14:29]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v14, v28
-; GFX942-NEXT: v_mov_b32_e32 v15, v29
+; GFX942-NEXT: v_mov_b64_e32 v[14:15], v[28:29]
; GFX942-NEXT: global_store_dwordx4 v30, v[12:15], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -13542,8 +13316,7 @@ define void @v_shuffle_v2i64_v8i64__7_15(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v12, v16
-; GFX942-NEXT: v_mov_b32_e32 v13, v17
+; GFX942-NEXT: v_mov_b64_e32 v[12:13], v[16:17]
; GFX942-NEXT: global_store_dwordx4 v18, v[12:15], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -13588,8 +13361,7 @@ define void @v_shuffle_v2i64_v8i64__8_15(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v14
-; GFX942-NEXT: v_mov_b32_e32 v3, v15
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[14:15]
; GFX942-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -13634,8 +13406,7 @@ define void @v_shuffle_v2i64_v8i64__9_15(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v14
-; GFX942-NEXT: v_mov_b32_e32 v5, v15
+; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[14:15]
; GFX942-NEXT: global_store_dwordx4 v16, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -13680,8 +13451,7 @@ define void @v_shuffle_v2i64_v8i64__10_15(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v14
-; GFX942-NEXT: v_mov_b32_e32 v7, v15
+; GFX942-NEXT: v_mov_b64_e32 v[6:7], v[14:15]
; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -13726,8 +13496,7 @@ define void @v_shuffle_v2i64_v8i64__11_15(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v8, v14
-; GFX942-NEXT: v_mov_b32_e32 v9, v15
+; GFX942-NEXT: v_mov_b64_e32 v[8:9], v[14:15]
; GFX942-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -13772,8 +13541,7 @@ define void @v_shuffle_v2i64_v8i64__12_15(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v10, v14
-; GFX942-NEXT: v_mov_b32_e32 v11, v15
+; GFX942-NEXT: v_mov_b64_e32 v[10:11], v[14:15]
; GFX942-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -13818,8 +13586,7 @@ define void @v_shuffle_v2i64_v8i64__13_15(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v12, v14
-; GFX942-NEXT: v_mov_b32_e32 v13, v15
+; GFX942-NEXT: v_mov_b64_e32 v[12:13], v[14:15]
; GFX942-NEXT: global_store_dwordx4 v16, v[10:13], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -13957,8 +13724,7 @@ define void @s_shuffle_v2i64_v8i64__1_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -14042,8 +13808,7 @@ define void @s_shuffle_v2i64_v8i64__3_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -14131,8 +13896,7 @@ define void @s_shuffle_v2i64_v8i64__5_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -14221,8 +13985,7 @@ define void @s_shuffle_v2i64_v8i64__7_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -14280,8 +14043,7 @@ define void @s_shuffle_v2i64_v8i64__9_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -14367,8 +14129,7 @@ define void @s_shuffle_v2i64_v8i64__11_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -14458,8 +14219,7 @@ define void @s_shuffle_v2i64_v8i64__13_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -14550,8 +14310,7 @@ define void @s_shuffle_v2i64_v8i64__15_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -14610,10 +14369,8 @@ define void @s_shuffle_v2i64_v8i64__15_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:19]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s18
-; GFX942-NEXT: s_mov_b32 s9, s19
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[18:19]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -14668,8 +14425,7 @@ define void @s_shuffle_v2i64_v8i64__15_1() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[12:27]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s26
-; GFX942-NEXT: s_mov_b32 s9, s27
+; GFX942-NEXT: s_mov_b64 s[8:9], s[26:27]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -14750,10 +14506,8 @@ define void @s_shuffle_v2i64_v8i64__15_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:23]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s22
-; GFX942-NEXT: s_mov_b32 s9, s23
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[22:23]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -14808,8 +14562,7 @@ define void @s_shuffle_v2i64_v8i64__15_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[12:27]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s26
-; GFX942-NEXT: s_mov_b32 s9, s27
+; GFX942-NEXT: s_mov_b64 s[8:9], s[26:27]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -14894,10 +14647,8 @@ define void @s_shuffle_v2i64_v8i64__15_4() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[16:31]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s30
-; GFX942-NEXT: s_mov_b32 s9, s31
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
+; GFX942-NEXT: s_mov_b64 s[8:9], s[30:31]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -14984,8 +14735,7 @@ define void @s_shuffle_v2i64_v8i64__15_5() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[12:27]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s26
-; GFX942-NEXT: s_mov_b32 s9, s27
+; GFX942-NEXT: s_mov_b64 s[8:9], s[26:27]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -15094,10 +14844,8 @@ define void @s_shuffle_v2i64_v8i64__15_6() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[16:31]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s30
-; GFX942-NEXT: s_mov_b32 s9, s31
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
+; GFX942-NEXT: s_mov_b64 s[8:9], s[30:31]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -15212,8 +14960,7 @@ define void @s_shuffle_v2i64_v8i64__15_7() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[16:31]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s30
-; GFX942-NEXT: s_mov_b32 s13, s31
+; GFX942-NEXT: s_mov_b64 s[12:13], s[30:31]
; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
@@ -15270,10 +15017,8 @@ define void @s_shuffle_v2i64_v8i64__15_8() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -15286,18 +15031,43 @@ define void @s_shuffle_v2i64_v8i64__15_8() {
}
define void @s_shuffle_v2i64_v8i64__15_9() {
-; GFX9-LABEL: s_shuffle_v2i64_v8i64__15_9:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:23]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s22
-; GFX9-NEXT: s_mov_b32 s9, s23
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v2i64_v8i64__15_9:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:23]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s22
+; GFX900-NEXT: s_mov_b32 s9, s23
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2i64_v8i64__15_9:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:23]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s22
+; GFX90A-NEXT: s_mov_b32 s9, s23
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2i64_v8i64__15_9:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:23]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[22:23]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <8 x i64> asm "; def $0", "=s"()
%vec1 = call <8 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> <i32 15, i32 9>
@@ -15342,10 +15112,8 @@ define void @s_shuffle_v2i64_v8i64__15_10() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -15358,18 +15126,43 @@ define void @s_shuffle_v2i64_v8i64__15_10() {
}
define void @s_shuffle_v2i64_v8i64__15_11() {
-; GFX9-LABEL: s_shuffle_v2i64_v8i64__15_11:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[4:19]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s18
-; GFX9-NEXT: s_mov_b32 s9, s19
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v2i64_v8i64__15_11:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:19]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s18
+; GFX900-NEXT: s_mov_b32 s9, s19
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2i64_v8i64__15_11:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:19]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s18
+; GFX90A-NEXT: s_mov_b32 s9, s19
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2i64_v8i64__15_11:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[4:19]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[18:19]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <8 x i64> asm "; def $0", "=s"()
%vec1 = call <8 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> <i32 15, i32 11>
@@ -15378,20 +15171,48 @@ define void @s_shuffle_v2i64_v8i64__15_11() {
}
define void @s_shuffle_v2i64_v8i64__15_12() {
-; GFX9-LABEL: s_shuffle_v2i64_v8i64__15_12:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[4:19]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s18
-; GFX9-NEXT: s_mov_b32 s9, s19
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v2i64_v8i64__15_12:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:19]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s18
+; GFX900-NEXT: s_mov_b32 s9, s19
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2i64_v8i64__15_12:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:19]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s18
+; GFX90A-NEXT: s_mov_b32 s9, s19
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2i64_v8i64__15_12:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[4:19]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[18:19]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <8 x i64> asm "; def $0", "=s"()
%vec1 = call <8 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> <i32 15, i32 12>
@@ -15436,8 +15257,7 @@ define void @s_shuffle_v2i64_v8i64__15_13() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -15486,10 +15306,8 @@ define void @s_shuffle_v2i64_v8i64__15_14() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -15538,8 +15356,7 @@ define void @s_shuffle_v2i64_v8i64__15_15() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
@@ -15586,8 +15403,7 @@ define void @s_shuffle_v2i64_v8i64__u_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -15599,18 +15415,43 @@ define void @s_shuffle_v2i64_v8i64__u_0() {
}
define void @s_shuffle_v2i64_v8i64__0_0() {
-; GFX9-LABEL: s_shuffle_v2i64_v8i64__0_0:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:23]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s8
-; GFX9-NEXT: s_mov_b32 s11, s9
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v2i64_v8i64__0_0:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:23]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s8
+; GFX900-NEXT: s_mov_b32 s11, s9
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2i64_v8i64__0_0:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:23]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s8
+; GFX90A-NEXT: s_mov_b32 s11, s9
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2i64_v8i64__0_0:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:23]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[8:9]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <8 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> zeroinitializer
call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf)
@@ -15654,10 +15495,8 @@ define void @s_shuffle_v2i64_v8i64__1_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -15669,18 +15508,43 @@ define void @s_shuffle_v2i64_v8i64__1_0() {
}
define void @s_shuffle_v2i64_v8i64__2_0() {
-; GFX9-LABEL: s_shuffle_v2i64_v8i64__2_0:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[4:19]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s4
-; GFX9-NEXT: s_mov_b32 s11, s5
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v2i64_v8i64__2_0:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:19]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s4
+; GFX900-NEXT: s_mov_b32 s11, s5
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2i64_v8i64__2_0:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:19]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s4
+; GFX90A-NEXT: s_mov_b32 s11, s5
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2i64_v8i64__2_0:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[4:19]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <8 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> <i32 2, i32 0>
call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf)
@@ -15724,10 +15588,8 @@ define void @s_shuffle_v2i64_v8i64__3_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -15775,8 +15637,7 @@ define void @s_shuffle_v2i64_v8i64__4_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -15824,10 +15685,8 @@ define void @s_shuffle_v2i64_v8i64__5_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -15875,8 +15734,7 @@ define void @s_shuffle_v2i64_v8i64__6_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
@@ -15926,10 +15784,8 @@ define void @s_shuffle_v2i64_v8i64__7_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -15973,8 +15829,7 @@ define void @s_shuffle_v2i64_v8i64__8_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -16032,10 +15887,8 @@ define void @s_shuffle_v2i64_v8i64__9_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:19]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -16136,8 +15989,7 @@ define void @s_shuffle_v2i64_v8i64__10_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:19]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -16196,10 +16048,8 @@ define void @s_shuffle_v2i64_v8i64__11_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:19]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -16262,8 +16112,7 @@ define void @s_shuffle_v2i64_v8i64__12_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s16
-; GFX942-NEXT: s_mov_b32 s11, s17
+; GFX942-NEXT: s_mov_b64 s[10:11], s[16:17]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -16328,10 +16177,8 @@ define void @s_shuffle_v2i64_v8i64__13_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:19]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -16390,8 +16237,7 @@ define void @s_shuffle_v2i64_v8i64__14_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:19]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s18, s0
-; GFX942-NEXT: s_mov_b32 s19, s1
+; GFX942-NEXT: s_mov_b64 s[18:19], s[0:1]
; GFX942-NEXT: s_mov_b64 s[8:9], s[16:17]
; GFX942-NEXT: s_mov_b64 s[10:11], s[18:19]
; GFX942-NEXT: ;;#ASMSTART
@@ -16486,18 +16332,43 @@ define void @s_shuffle_v2i64_v8i64__0_1() {
}
define void @s_shuffle_v2i64_v8i64__1_1() {
-; GFX9-LABEL: s_shuffle_v2i64_v8i64__1_1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:23]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s10
-; GFX9-NEXT: s_mov_b32 s9, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v2i64_v8i64__1_1:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:23]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2i64_v8i64__1_1:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:23]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2i64_v8i64__1_1:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:23]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <8 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> <i32 1, i32 1>
call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf)
@@ -16505,18 +16376,43 @@ define void @s_shuffle_v2i64_v8i64__1_1() {
}
define void @s_shuffle_v2i64_v8i64__2_1() {
-; GFX9-LABEL: s_shuffle_v2i64_v8i64__2_1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[4:19]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s6
-; GFX9-NEXT: s_mov_b32 s11, s7
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v2i64_v8i64__2_1:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:19]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s6
+; GFX900-NEXT: s_mov_b32 s11, s7
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2i64_v8i64__2_1:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:19]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s6
+; GFX90A-NEXT: s_mov_b32 s11, s7
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2i64_v8i64__2_1:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[4:19]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <8 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> <i32 2, i32 1>
call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf)
@@ -16524,18 +16420,43 @@ define void @s_shuffle_v2i64_v8i64__2_1() {
}
define void @s_shuffle_v2i64_v8i64__3_1() {
-; GFX9-LABEL: s_shuffle_v2i64_v8i64__3_1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:23]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s14
-; GFX9-NEXT: s_mov_b32 s9, s15
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v2i64_v8i64__3_1:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:23]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2i64_v8i64__3_1:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:23]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2i64_v8i64__3_1:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:23]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <8 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> <i32 3, i32 1>
call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf)
@@ -16579,8 +16500,7 @@ define void @s_shuffle_v2i64_v8i64__4_1() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s2
-; GFX942-NEXT: s_mov_b32 s11, s3
+; GFX942-NEXT: s_mov_b64 s[10:11], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -16592,18 +16512,43 @@ define void @s_shuffle_v2i64_v8i64__4_1() {
}
define void @s_shuffle_v2i64_v8i64__5_1() {
-; GFX9-LABEL: s_shuffle_v2i64_v8i64__5_1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:23]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s18
-; GFX9-NEXT: s_mov_b32 s9, s19
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v2i64_v8i64__5_1:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:23]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s18
+; GFX900-NEXT: s_mov_b32 s9, s19
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2i64_v8i64__5_1:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:23]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s18
+; GFX90A-NEXT: s_mov_b32 s9, s19
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2i64_v8i64__5_1:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:23]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[18:19]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <8 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> <i32 5, i32 1>
call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf)
@@ -16647,8 +16592,7 @@ define void @s_shuffle_v2i64_v8i64__6_1() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3]
; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
@@ -16662,18 +16606,43 @@ define void @s_shuffle_v2i64_v8i64__6_1() {
}
define void @s_shuffle_v2i64_v8i64__7_1() {
-; GFX9-LABEL: s_shuffle_v2i64_v8i64__7_1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:23]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s22
-; GFX9-NEXT: s_mov_b32 s9, s23
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v2i64_v8i64__7_1:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:23]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s22
+; GFX900-NEXT: s_mov_b32 s9, s23
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2i64_v8i64__7_1:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:23]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s22
+; GFX90A-NEXT: s_mov_b32 s9, s23
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2i64_v8i64__7_1:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:23]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[22:23]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <8 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> <i32 7, i32 1>
call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf)
@@ -16763,8 +16732,7 @@ define void @s_shuffle_v2i64_v8i64__9_1() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[12:27]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -16865,8 +16833,7 @@ define void @s_shuffle_v2i64_v8i64__10_1() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:19]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s2
-; GFX942-NEXT: s_mov_b32 s11, s3
+; GFX942-NEXT: s_mov_b64 s[10:11], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -16921,8 +16888,7 @@ define void @s_shuffle_v2i64_v8i64__11_1() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[12:27]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s18
-; GFX942-NEXT: s_mov_b32 s9, s19
+; GFX942-NEXT: s_mov_b64 s[8:9], s[18:19]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -16985,8 +16951,7 @@ define void @s_shuffle_v2i64_v8i64__12_1() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s18
-; GFX942-NEXT: s_mov_b32 s11, s19
+; GFX942-NEXT: s_mov_b64 s[10:11], s[18:19]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -17047,8 +17012,7 @@ define void @s_shuffle_v2i64_v8i64__13_1() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[12:27]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s22
-; GFX942-NEXT: s_mov_b32 s9, s23
+; GFX942-NEXT: s_mov_b64 s[8:9], s[22:23]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -17107,8 +17071,7 @@ define void @s_shuffle_v2i64_v8i64__14_1() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:19]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s18, s2
-; GFX942-NEXT: s_mov_b32 s19, s3
+; GFX942-NEXT: s_mov_b64 s[18:19], s[2:3]
; GFX942-NEXT: s_mov_b64 s[8:9], s[16:17]
; GFX942-NEXT: s_mov_b64 s[10:11], s[18:19]
; GFX942-NEXT: ;;#ASMSTART
@@ -17155,8 +17118,7 @@ define void @s_shuffle_v2i64_v8i64__u_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -17168,18 +17130,43 @@ define void @s_shuffle_v2i64_v8i64__u_2() {
}
define void @s_shuffle_v2i64_v8i64__0_2() {
-; GFX9-LABEL: s_shuffle_v2i64_v8i64__0_2:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:23]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v2i64_v8i64__0_2:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:23]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2i64_v8i64__0_2:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:23]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2i64_v8i64__0_2:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:23]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <8 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> <i32 0, i32 2>
call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf)
@@ -17223,10 +17210,8 @@ define void @s_shuffle_v2i64_v8i64__1_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -17238,18 +17223,43 @@ define void @s_shuffle_v2i64_v8i64__1_2() {
}
define void @s_shuffle_v2i64_v8i64__2_2() {
-; GFX9-LABEL: s_shuffle_v2i64_v8i64__2_2:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[4:19]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s8
-; GFX9-NEXT: s_mov_b32 s11, s9
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v2i64_v8i64__2_2:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:19]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s8
+; GFX900-NEXT: s_mov_b32 s11, s9
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2i64_v8i64__2_2:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:19]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s8
+; GFX90A-NEXT: s_mov_b32 s11, s9
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2i64_v8i64__2_2:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[4:19]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[8:9]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <8 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> <i32 2, i32 2>
call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf)
@@ -17293,10 +17303,8 @@ define void @s_shuffle_v2i64_v8i64__3_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -17344,8 +17352,7 @@ define void @s_shuffle_v2i64_v8i64__4_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -17393,10 +17400,8 @@ define void @s_shuffle_v2i64_v8i64__5_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -17444,8 +17449,7 @@ define void @s_shuffle_v2i64_v8i64__6_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
+; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5]
; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
@@ -17495,10 +17499,8 @@ define void @s_shuffle_v2i64_v8i64__7_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -17542,8 +17544,7 @@ define void @s_shuffle_v2i64_v8i64__8_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -17623,10 +17624,8 @@ define void @s_shuffle_v2i64_v8i64__9_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:23]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -17708,8 +17707,7 @@ define void @s_shuffle_v2i64_v8i64__10_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:19]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s20
-; GFX942-NEXT: s_mov_b32 s11, s21
+; GFX942-NEXT: s_mov_b64 s[10:11], s[20:21]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -17796,10 +17794,8 @@ define void @s_shuffle_v2i64_v8i64__11_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:23]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -17858,8 +17854,7 @@ define void @s_shuffle_v2i64_v8i64__12_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s16
-; GFX942-NEXT: s_mov_b32 s11, s17
+; GFX942-NEXT: s_mov_b64 s[10:11], s[16:17]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -17940,10 +17935,8 @@ define void @s_shuffle_v2i64_v8i64__13_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:23]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s18
-; GFX942-NEXT: s_mov_b32 s9, s19
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[18:19]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -18002,8 +17995,7 @@ define void @s_shuffle_v2i64_v8i64__14_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:23]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s22, s4
-; GFX942-NEXT: s_mov_b32 s23, s5
+; GFX942-NEXT: s_mov_b64 s[22:23], s[4:5]
; GFX942-NEXT: s_mov_b64 s[8:9], s[20:21]
; GFX942-NEXT: s_mov_b64 s[10:11], s[22:23]
; GFX942-NEXT: ;;#ASMSTART
@@ -18058,37 +18050,87 @@ define void @s_shuffle_v2i64_v8i64__u_3() {
}
define void @s_shuffle_v2i64_v8i64__0_3() {
-; GFX9-LABEL: s_shuffle_v2i64_v8i64__0_3:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:23]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s14
-; GFX9-NEXT: s_mov_b32 s11, s15
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <8 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> <i32 0, i32 3>
- call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v2i64_v8i64__1_3() {
-; GFX9-LABEL: s_shuffle_v2i64_v8i64__1_3:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[4:19]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s6
-; GFX9-NEXT: s_mov_b32 s9, s7
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v2i64_v8i64__0_3:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:23]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s14
+; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2i64_v8i64__0_3:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:23]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s14
+; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2i64_v8i64__0_3:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:23]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <8 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> <i32 0, i32 3>
+ call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v2i64_v8i64__1_3() {
+; GFX900-LABEL: s_shuffle_v2i64_v8i64__1_3:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:19]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s6
+; GFX900-NEXT: s_mov_b32 s9, s7
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2i64_v8i64__1_3:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:19]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s6
+; GFX90A-NEXT: s_mov_b32 s9, s7
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2i64_v8i64__1_3:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[4:19]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <8 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> <i32 1, i32 3>
call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf)
@@ -18136,18 +18178,43 @@ define void @s_shuffle_v2i64_v8i64__2_3() {
}
define void @s_shuffle_v2i64_v8i64__3_3() {
-; GFX9-LABEL: s_shuffle_v2i64_v8i64__3_3:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[4:19]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s10
-; GFX9-NEXT: s_mov_b32 s9, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v2i64_v8i64__3_3:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:19]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2i64_v8i64__3_3:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:19]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2i64_v8i64__3_3:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[4:19]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <8 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> <i32 3, i32 3>
call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf)
@@ -18191,8 +18258,7 @@ define void @s_shuffle_v2i64_v8i64__4_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -18204,18 +18270,43 @@ define void @s_shuffle_v2i64_v8i64__4_3() {
}
define void @s_shuffle_v2i64_v8i64__5_3() {
-; GFX9-LABEL: s_shuffle_v2i64_v8i64__5_3:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[4:19]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s14
-; GFX9-NEXT: s_mov_b32 s9, s15
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v2i64_v8i64__5_3:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:19]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2i64_v8i64__5_3:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:19]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2i64_v8i64__5_3:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[4:19]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <8 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> <i32 5, i32 3>
call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf)
@@ -18259,8 +18350,7 @@ define void @s_shuffle_v2i64_v8i64__6_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s14, s6
-; GFX942-NEXT: s_mov_b32 s15, s7
+; GFX942-NEXT: s_mov_b64 s[14:15], s[6:7]
; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
@@ -18274,18 +18364,43 @@ define void @s_shuffle_v2i64_v8i64__6_3() {
}
define void @s_shuffle_v2i64_v8i64__7_3() {
-; GFX9-LABEL: s_shuffle_v2i64_v8i64__7_3:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[4:19]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s18
-; GFX9-NEXT: s_mov_b32 s9, s19
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v2i64_v8i64__7_3:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:19]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s18
+; GFX900-NEXT: s_mov_b32 s9, s19
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2i64_v8i64__7_3:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:19]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s18
+; GFX90A-NEXT: s_mov_b32 s9, s19
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2i64_v8i64__7_3:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[4:19]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[18:19]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <8 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> <i32 7, i32 3>
call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf)
@@ -18375,8 +18490,7 @@ define void @s_shuffle_v2i64_v8i64__9_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[12:27]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -18458,8 +18572,7 @@ define void @s_shuffle_v2i64_v8i64__10_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:19]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s22
-; GFX942-NEXT: s_mov_b32 s11, s23
+; GFX942-NEXT: s_mov_b64 s[10:11], s[22:23]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -18520,8 +18633,7 @@ define void @s_shuffle_v2i64_v8i64__11_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[12:27]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s18
-; GFX942-NEXT: s_mov_b32 s9, s19
+; GFX942-NEXT: s_mov_b64 s[8:9], s[18:19]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -18580,8 +18692,7 @@ define void @s_shuffle_v2i64_v8i64__12_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s18
-; GFX942-NEXT: s_mov_b32 s11, s19
+; GFX942-NEXT: s_mov_b64 s[10:11], s[18:19]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -18636,8 +18747,7 @@ define void @s_shuffle_v2i64_v8i64__13_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[12:27]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s22
-; GFX942-NEXT: s_mov_b32 s9, s23
+; GFX942-NEXT: s_mov_b64 s[8:9], s[22:23]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -18696,8 +18806,7 @@ define void @s_shuffle_v2i64_v8i64__14_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:23]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s22, s6
-; GFX942-NEXT: s_mov_b32 s23, s7
+; GFX942-NEXT: s_mov_b64 s[22:23], s[6:7]
; GFX942-NEXT: s_mov_b64 s[8:9], s[20:21]
; GFX942-NEXT: s_mov_b64 s[10:11], s[22:23]
; GFX942-NEXT: ;;#ASMSTART
@@ -18744,8 +18853,7 @@ define void @s_shuffle_v2i64_v8i64__u_4() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s8
-; GFX942-NEXT: s_mov_b32 s11, s9
+; GFX942-NEXT: s_mov_b64 s[10:11], s[8:9]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -18757,18 +18865,43 @@ define void @s_shuffle_v2i64_v8i64__u_4() {
}
define void @s_shuffle_v2i64_v8i64__0_4() {
-; GFX9-LABEL: s_shuffle_v2i64_v8i64__0_4:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:23]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s16
-; GFX9-NEXT: s_mov_b32 s11, s17
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v2i64_v8i64__0_4:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:23]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s16
+; GFX900-NEXT: s_mov_b32 s11, s17
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2i64_v8i64__0_4:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:23]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s16
+; GFX90A-NEXT: s_mov_b32 s11, s17
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2i64_v8i64__0_4:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:23]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[16:17]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <8 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> <i32 0, i32 4>
call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf)
@@ -18776,20 +18909,48 @@ define void @s_shuffle_v2i64_v8i64__0_4() {
}
define void @s_shuffle_v2i64_v8i64__1_4() {
-; GFX9-LABEL: s_shuffle_v2i64_v8i64__1_4:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[4:19]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s6
-; GFX9-NEXT: s_mov_b32 s9, s7
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v2i64_v8i64__1_4:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:19]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s6
+; GFX900-NEXT: s_mov_b32 s9, s7
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2i64_v8i64__1_4:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:19]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s6
+; GFX90A-NEXT: s_mov_b32 s9, s7
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2i64_v8i64__1_4:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[4:19]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <8 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> <i32 1, i32 4>
call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf)
@@ -18797,18 +18958,43 @@ define void @s_shuffle_v2i64_v8i64__1_4() {
}
define void @s_shuffle_v2i64_v8i64__2_4() {
-; GFX9-LABEL: s_shuffle_v2i64_v8i64__2_4:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[4:19]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v2i64_v8i64__2_4:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:19]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2i64_v8i64__2_4:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:19]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2i64_v8i64__2_4:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[4:19]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <8 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> <i32 2, i32 4>
call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf)
@@ -18816,90 +19002,145 @@ define void @s_shuffle_v2i64_v8i64__2_4() {
}
define void @s_shuffle_v2i64_v8i64__3_4() {
-; GFX9-LABEL: s_shuffle_v2i64_v8i64__3_4:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[4:19]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s10
-; GFX9-NEXT: s_mov_b32 s9, s11
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v2i64_v8i64__3_4:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:19]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2i64_v8i64__3_4:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:19]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2i64_v8i64__3_4:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[4:19]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <8 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> <i32 3, i32 4>
+ call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v2i64_v8i64__4_4() {
+; GFX900-LABEL: s_shuffle_v2i64_v8i64__4_4:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:19]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s14, s12
+; GFX900-NEXT: s_mov_b32 s15, s13
+; GFX900-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX900-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2i64_v8i64__4_4:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:19]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s14, s12
+; GFX90A-NEXT: s_mov_b32 s15, s13
+; GFX90A-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX90A-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2i64_v8i64__4_4:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[0:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[8:9]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <8 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> <i32 3, i32 4>
+ %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> <i32 4, i32 4>
call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf)
ret void
}
-define void @s_shuffle_v2i64_v8i64__4_4() {
-; GFX900-LABEL: s_shuffle_v2i64_v8i64__4_4:
+define void @s_shuffle_v2i64_v8i64__5_4() {
+; GFX900-LABEL: s_shuffle_v2i64_v8i64__5_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:19]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s14, s12
-; GFX900-NEXT: s_mov_b32 s15, s13
-; GFX900-NEXT: s_mov_b64 s[8:9], s[12:13]
-; GFX900-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v2i64_v8i64__4_4:
+; GFX90A-LABEL: s_shuffle_v2i64_v8i64__5_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:19]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s14, s12
-; GFX90A-NEXT: s_mov_b32 s15, s13
-; GFX90A-NEXT: s_mov_b64 s[8:9], s[12:13]
-; GFX90A-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v2i64_v8i64__4_4:
+; GFX942-LABEL: s_shuffle_v2i64_v8i64__5_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:15]
+; GFX942-NEXT: ; def s[4:19]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s8
-; GFX942-NEXT: s_mov_b32 s11, s9
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <8 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> <i32 4, i32 4>
- call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v2i64_v8i64__5_4() {
-; GFX9-LABEL: s_shuffle_v2i64_v8i64__5_4:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[4:19]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s14
-; GFX9-NEXT: s_mov_b32 s9, s15
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <8 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> <i32 5, i32 4>
call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf)
@@ -18943,8 +19184,7 @@ define void @s_shuffle_v2i64_v8i64__6_4() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s14, s8
-; GFX942-NEXT: s_mov_b32 s15, s9
+; GFX942-NEXT: s_mov_b64 s[14:15], s[8:9]
; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
@@ -18958,20 +19198,48 @@ define void @s_shuffle_v2i64_v8i64__6_4() {
}
define void @s_shuffle_v2i64_v8i64__7_4() {
-; GFX9-LABEL: s_shuffle_v2i64_v8i64__7_4:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[4:19]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s18
-; GFX9-NEXT: s_mov_b32 s9, s19
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v2i64_v8i64__7_4:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:19]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s18
+; GFX900-NEXT: s_mov_b32 s9, s19
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2i64_v8i64__7_4:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:19]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s18
+; GFX90A-NEXT: s_mov_b32 s9, s19
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2i64_v8i64__7_4:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[4:19]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[18:19]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <8 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> <i32 7, i32 4>
call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf)
@@ -19011,8 +19279,7 @@ define void @s_shuffle_v2i64_v8i64__8_4() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s8
-; GFX942-NEXT: s_mov_b32 s11, s9
+; GFX942-NEXT: s_mov_b64 s[10:11], s[8:9]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -19096,10 +19363,8 @@ define void @s_shuffle_v2i64_v8i64__9_4() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[16:31]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s18
-; GFX942-NEXT: s_mov_b32 s9, s19
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
+; GFX942-NEXT: s_mov_b64 s[8:9], s[18:19]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -19160,8 +19425,7 @@ define void @s_shuffle_v2i64_v8i64__10_4() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:19]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s20
-; GFX942-NEXT: s_mov_b32 s11, s21
+; GFX942-NEXT: s_mov_b64 s[10:11], s[20:21]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -19246,10 +19510,8 @@ define void @s_shuffle_v2i64_v8i64__11_4() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[16:31]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s22
-; GFX942-NEXT: s_mov_b32 s9, s23
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
+; GFX942-NEXT: s_mov_b64 s[8:9], s[22:23]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -19336,8 +19598,7 @@ define void @s_shuffle_v2i64_v8i64__12_4() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s16
-; GFX942-NEXT: s_mov_b32 s11, s17
+; GFX942-NEXT: s_mov_b64 s[10:11], s[16:17]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -19422,10 +19683,8 @@ define void @s_shuffle_v2i64_v8i64__13_4() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[16:31]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s26
-; GFX942-NEXT: s_mov_b32 s9, s27
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
+; GFX942-NEXT: s_mov_b64 s[8:9], s[26:27]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -19512,8 +19771,7 @@ define void @s_shuffle_v2i64_v8i64__14_4() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[12:27]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s26, s8
-; GFX942-NEXT: s_mov_b32 s27, s9
+; GFX942-NEXT: s_mov_b64 s[26:27], s[8:9]
; GFX942-NEXT: s_mov_b64 s[8:9], s[24:25]
; GFX942-NEXT: s_mov_b64 s[10:11], s[26:27]
; GFX942-NEXT: ;;#ASMSTART
@@ -19572,18 +19830,43 @@ define void @s_shuffle_v2i64_v8i64__u_5() {
}
define void @s_shuffle_v2i64_v8i64__0_5() {
-; GFX9-LABEL: s_shuffle_v2i64_v8i64__0_5:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:23]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s18
-; GFX9-NEXT: s_mov_b32 s11, s19
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v2i64_v8i64__0_5:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:23]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s18
+; GFX900-NEXT: s_mov_b32 s11, s19
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2i64_v8i64__0_5:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:23]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s18
+; GFX90A-NEXT: s_mov_b32 s11, s19
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2i64_v8i64__0_5:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:23]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[18:19]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <8 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> <i32 0, i32 5>
call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf)
@@ -19627,8 +19910,7 @@ define void @s_shuffle_v2i64_v8i64__1_5() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -19640,18 +19922,43 @@ define void @s_shuffle_v2i64_v8i64__1_5() {
}
define void @s_shuffle_v2i64_v8i64__2_5() {
-; GFX9-LABEL: s_shuffle_v2i64_v8i64__2_5:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[4:19]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s14
-; GFX9-NEXT: s_mov_b32 s11, s15
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v2i64_v8i64__2_5:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:19]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s14
+; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2i64_v8i64__2_5:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:19]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s14
+; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2i64_v8i64__2_5:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[4:19]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <8 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> <i32 2, i32 5>
call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf)
@@ -19695,8 +20002,7 @@ define void @s_shuffle_v2i64_v8i64__3_5() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -19788,8 +20094,7 @@ define void @s_shuffle_v2i64_v8i64__5_5() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -19837,8 +20142,7 @@ define void @s_shuffle_v2i64_v8i64__6_5() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s14, s10
-; GFX942-NEXT: s_mov_b32 s15, s11
+; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11]
; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
@@ -19888,8 +20192,7 @@ define void @s_shuffle_v2i64_v8i64__7_5() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -20013,8 +20316,7 @@ define void @s_shuffle_v2i64_v8i64__9_5() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[12:27]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -20069,8 +20371,7 @@ define void @s_shuffle_v2i64_v8i64__10_5() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:19]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s22
-; GFX942-NEXT: s_mov_b32 s11, s23
+; GFX942-NEXT: s_mov_b64 s[10:11], s[22:23]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -20151,8 +20452,7 @@ define void @s_shuffle_v2i64_v8i64__11_5() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[12:27]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s18
-; GFX942-NEXT: s_mov_b32 s9, s19
+; GFX942-NEXT: s_mov_b64 s[8:9], s[18:19]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -20233,8 +20533,7 @@ define void @s_shuffle_v2i64_v8i64__12_5() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s18
-; GFX942-NEXT: s_mov_b32 s11, s19
+; GFX942-NEXT: s_mov_b64 s[10:11], s[18:19]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -20315,8 +20614,7 @@ define void @s_shuffle_v2i64_v8i64__13_5() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[12:27]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s22
-; GFX942-NEXT: s_mov_b32 s9, s23
+; GFX942-NEXT: s_mov_b64 s[8:9], s[22:23]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -20397,8 +20695,7 @@ define void @s_shuffle_v2i64_v8i64__14_5() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[12:27]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s26, s10
-; GFX942-NEXT: s_mov_b32 s27, s11
+; GFX942-NEXT: s_mov_b64 s[26:27], s[10:11]
; GFX942-NEXT: s_mov_b64 s[8:9], s[24:25]
; GFX942-NEXT: s_mov_b64 s[10:11], s[26:27]
; GFX942-NEXT: ;;#ASMSTART
@@ -20445,8 +20742,7 @@ define void @s_shuffle_v2i64_v8i64__u_6() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -20458,18 +20754,43 @@ define void @s_shuffle_v2i64_v8i64__u_6() {
}
define void @s_shuffle_v2i64_v8i64__0_6() {
-; GFX9-LABEL: s_shuffle_v2i64_v8i64__0_6:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:23]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s20
-; GFX9-NEXT: s_mov_b32 s11, s21
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v2i64_v8i64__0_6:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:23]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s20
+; GFX900-NEXT: s_mov_b32 s11, s21
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2i64_v8i64__0_6:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:23]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s20
+; GFX90A-NEXT: s_mov_b32 s11, s21
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2i64_v8i64__0_6:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:23]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[20:21]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <8 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> <i32 0, i32 6>
call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf)
@@ -20511,35 +20832,58 @@ define void @s_shuffle_v2i64_v8i64__1_6() {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:15]
+; GFX942-NEXT: ; def s[0:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <8 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> <i32 1, i32 6>
+ call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v2i64_v8i64__2_6() {
+; GFX900-LABEL: s_shuffle_v2i64_v8i64__2_6:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:19]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s16
+; GFX900-NEXT: s_mov_b32 s11, s17
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2i64_v8i64__2_6:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:19]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s16
+; GFX90A-NEXT: s_mov_b32 s11, s17
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2i64_v8i64__2_6:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[4:19]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
+; GFX942-NEXT: s_mov_b64 s[10:11], s[16:17]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <8 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> <i32 1, i32 6>
- call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v2i64_v8i64__2_6() {
-; GFX9-LABEL: s_shuffle_v2i64_v8i64__2_6:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[4:19]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s16
-; GFX9-NEXT: s_mov_b32 s11, s17
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <8 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> <i32 2, i32 6>
call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf)
@@ -20583,10 +20927,8 @@ define void @s_shuffle_v2i64_v8i64__3_6() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -20634,8 +20976,7 @@ define void @s_shuffle_v2i64_v8i64__4_6() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -20683,10 +21024,8 @@ define void @s_shuffle_v2i64_v8i64__5_6() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -20734,8 +21073,7 @@ define void @s_shuffle_v2i64_v8i64__6_6() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s14, s12
-; GFX942-NEXT: s_mov_b32 s15, s13
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
@@ -20785,10 +21123,8 @@ define void @s_shuffle_v2i64_v8i64__7_6() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -20832,8 +21168,7 @@ define void @s_shuffle_v2i64_v8i64__8_6() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -20941,10 +21276,8 @@ define void @s_shuffle_v2i64_v8i64__9_6() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[16:31]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s18
-; GFX942-NEXT: s_mov_b32 s9, s19
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
+; GFX942-NEXT: s_mov_b64 s[8:9], s[18:19]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -21005,8 +21338,7 @@ define void @s_shuffle_v2i64_v8i64__10_6() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:19]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s20
-; GFX942-NEXT: s_mov_b32 s11, s21
+; GFX942-NEXT: s_mov_b64 s[10:11], s[20:21]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -21115,10 +21447,8 @@ define void @s_shuffle_v2i64_v8i64__11_6() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[16:31]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s22
-; GFX942-NEXT: s_mov_b32 s9, s23
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
+; GFX942-NEXT: s_mov_b64 s[8:9], s[22:23]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -21229,8 +21559,7 @@ define void @s_shuffle_v2i64_v8i64__12_6() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s16
-; GFX942-NEXT: s_mov_b32 s11, s17
+; GFX942-NEXT: s_mov_b64 s[10:11], s[16:17]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -21339,10 +21668,8 @@ define void @s_shuffle_v2i64_v8i64__13_6() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[16:31]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s26
-; GFX942-NEXT: s_mov_b32 s9, s27
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
+; GFX942-NEXT: s_mov_b64 s[8:9], s[26:27]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -21457,8 +21784,7 @@ define void @s_shuffle_v2i64_v8i64__14_6() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[16:31]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s30, s12
-; GFX942-NEXT: s_mov_b32 s31, s13
+; GFX942-NEXT: s_mov_b64 s[30:31], s[12:13]
; GFX942-NEXT: s_mov_b64 s[8:9], s[28:29]
; GFX942-NEXT: s_mov_b64 s[10:11], s[30:31]
; GFX942-NEXT: ;;#ASMSTART
@@ -21524,18 +21850,43 @@ define void @s_shuffle_v2i64_v8i64__u_7() {
}
define void @s_shuffle_v2i64_v8i64__0_7() {
-; GFX9-LABEL: s_shuffle_v2i64_v8i64__0_7:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:23]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s22
-; GFX9-NEXT: s_mov_b32 s11, s23
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v2i64_v8i64__0_7:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:23]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s22
+; GFX900-NEXT: s_mov_b32 s11, s23
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2i64_v8i64__0_7:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:23]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s22
+; GFX90A-NEXT: s_mov_b32 s11, s23
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2i64_v8i64__0_7:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:23]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[22:23]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <8 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> <i32 0, i32 7>
call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf)
@@ -21579,8 +21930,7 @@ define void @s_shuffle_v2i64_v8i64__1_7() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
@@ -21594,18 +21944,43 @@ define void @s_shuffle_v2i64_v8i64__1_7() {
}
define void @s_shuffle_v2i64_v8i64__2_7() {
-; GFX9-LABEL: s_shuffle_v2i64_v8i64__2_7:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[4:19]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s18
-; GFX9-NEXT: s_mov_b32 s11, s19
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v2i64_v8i64__2_7:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:19]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s18
+; GFX900-NEXT: s_mov_b32 s11, s19
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2i64_v8i64__2_7:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:19]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s18
+; GFX90A-NEXT: s_mov_b32 s11, s19
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2i64_v8i64__2_7:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[4:19]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[18:19]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <8 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> <i32 2, i32 7>
call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf)
@@ -21649,8 +22024,7 @@ define void @s_shuffle_v2i64_v8i64__3_7() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
+; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7]
; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
@@ -21700,8 +22074,7 @@ define void @s_shuffle_v2i64_v8i64__4_7() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -21749,8 +22122,7 @@ define void @s_shuffle_v2i64_v8i64__5_7() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s10
-; GFX942-NEXT: s_mov_b32 s13, s11
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
@@ -21845,8 +22217,7 @@ define void @s_shuffle_v2i64_v8i64__7_7() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
@@ -22001,8 +22372,7 @@ define void @s_shuffle_v2i64_v8i64__9_7() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[16:31]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s18
-; GFX942-NEXT: s_mov_b32 s13, s19
+; GFX942-NEXT: s_mov_b64 s[12:13], s[18:19]
; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
@@ -22065,8 +22435,7 @@ define void @s_shuffle_v2i64_v8i64__10_7() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:19]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s22
-; GFX942-NEXT: s_mov_b32 s11, s23
+; GFX942-NEXT: s_mov_b64 s[10:11], s[22:23]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -22175,8 +22544,7 @@ define void @s_shuffle_v2i64_v8i64__11_7() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[16:31]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s22
-; GFX942-NEXT: s_mov_b32 s13, s23
+; GFX942-NEXT: s_mov_b64 s[12:13], s[22:23]
; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
@@ -22289,8 +22657,7 @@ define void @s_shuffle_v2i64_v8i64__12_7() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s18
-; GFX942-NEXT: s_mov_b32 s11, s19
+; GFX942-NEXT: s_mov_b64 s[10:11], s[18:19]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -22399,8 +22766,7 @@ define void @s_shuffle_v2i64_v8i64__13_7() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[16:31]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s26
-; GFX942-NEXT: s_mov_b32 s13, s27
+; GFX942-NEXT: s_mov_b64 s[12:13], s[26:27]
; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
@@ -22517,8 +22883,7 @@ define void @s_shuffle_v2i64_v8i64__14_7() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[16:31]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s30, s14
-; GFX942-NEXT: s_mov_b32 s31, s15
+; GFX942-NEXT: s_mov_b64 s[30:31], s[14:15]
; GFX942-NEXT: s_mov_b64 s[8:9], s[28:29]
; GFX942-NEXT: s_mov_b64 s[10:11], s[30:31]
; GFX942-NEXT: ;;#ASMSTART
@@ -22625,8 +22990,7 @@ define void @s_shuffle_v2i64_v8i64__1_8() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -22710,8 +23074,7 @@ define void @s_shuffle_v2i64_v8i64__3_8() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -22799,8 +23162,7 @@ define void @s_shuffle_v2i64_v8i64__5_8() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -22889,8 +23251,7 @@ define void @s_shuffle_v2i64_v8i64__7_8() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -22952,10 +23313,8 @@ define void @s_shuffle_v2i64_v8i64__9_8() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -22968,18 +23327,43 @@ define void @s_shuffle_v2i64_v8i64__9_8() {
}
define void @s_shuffle_v2i64_v8i64__10_8() {
-; GFX9-LABEL: s_shuffle_v2i64_v8i64__10_8:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[4:19]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s4
-; GFX9-NEXT: s_mov_b32 s11, s5
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v2i64_v8i64__10_8:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:19]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s4
+; GFX900-NEXT: s_mov_b32 s11, s5
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2i64_v8i64__10_8:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:19]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s4
+; GFX90A-NEXT: s_mov_b32 s11, s5
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2i64_v8i64__10_8:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[4:19]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <8 x i64> asm "; def $0", "=s"()
%vec1 = call <8 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> <i32 10, i32 8>
@@ -23024,10 +23408,8 @@ define void @s_shuffle_v2i64_v8i64__11_8() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -23076,8 +23458,7 @@ define void @s_shuffle_v2i64_v8i64__12_8() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -23126,10 +23507,8 @@ define void @s_shuffle_v2i64_v8i64__13_8() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -23178,8 +23557,7 @@ define void @s_shuffle_v2i64_v8i64__14_8() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
@@ -23277,8 +23655,7 @@ define void @s_shuffle_v2i64_v8i64__0_9() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[12:27]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -23333,8 +23710,7 @@ define void @s_shuffle_v2i64_v8i64__1_9() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:23]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -23389,8 +23765,7 @@ define void @s_shuffle_v2i64_v8i64__2_9() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[12:27]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -23491,8 +23866,7 @@ define void @s_shuffle_v2i64_v8i64__3_9() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:23]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -23573,8 +23947,7 @@ define void @s_shuffle_v2i64_v8i64__4_9() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[12:27]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -23656,8 +24029,7 @@ define void @s_shuffle_v2i64_v8i64__5_9() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:23]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s26
-; GFX942-NEXT: s_mov_b32 s9, s27
+; GFX942-NEXT: s_mov_b64 s[8:9], s[26:27]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -23772,8 +24144,7 @@ define void @s_shuffle_v2i64_v8i64__6_9() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[16:31]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s14, s18
-; GFX942-NEXT: s_mov_b32 s15, s19
+; GFX942-NEXT: s_mov_b64 s[14:15], s[18:19]
; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
@@ -23836,8 +24207,7 @@ define void @s_shuffle_v2i64_v8i64__7_9() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:23]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s26
-; GFX942-NEXT: s_mov_b32 s9, s27
+; GFX942-NEXT: s_mov_b64 s[8:9], s[26:27]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -23891,18 +24261,43 @@ define void @s_shuffle_v2i64_v8i64__8_9() {
}
define void @s_shuffle_v2i64_v8i64__9_9() {
-; GFX9-LABEL: s_shuffle_v2i64_v8i64__9_9:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:23]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s10
-; GFX9-NEXT: s_mov_b32 s9, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v2i64_v8i64__9_9:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:23]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2i64_v8i64__9_9:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:23]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2i64_v8i64__9_9:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:23]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <8 x i64> asm "; def $0", "=s"()
%vec1 = call <8 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> <i32 9, i32 9>
@@ -23911,18 +24306,43 @@ define void @s_shuffle_v2i64_v8i64__9_9() {
}
define void @s_shuffle_v2i64_v8i64__10_9() {
-; GFX9-LABEL: s_shuffle_v2i64_v8i64__10_9:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[4:19]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s6
-; GFX9-NEXT: s_mov_b32 s11, s7
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v2i64_v8i64__10_9:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:19]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s6
+; GFX900-NEXT: s_mov_b32 s11, s7
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2i64_v8i64__10_9:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:19]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s6
+; GFX90A-NEXT: s_mov_b32 s11, s7
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2i64_v8i64__10_9:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[4:19]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <8 x i64> asm "; def $0", "=s"()
%vec1 = call <8 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> <i32 10, i32 9>
@@ -23931,18 +24351,43 @@ define void @s_shuffle_v2i64_v8i64__10_9() {
}
define void @s_shuffle_v2i64_v8i64__11_9() {
-; GFX9-LABEL: s_shuffle_v2i64_v8i64__11_9:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:23]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s14
-; GFX9-NEXT: s_mov_b32 s9, s15
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v2i64_v8i64__11_9:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:23]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2i64_v8i64__11_9:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:23]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2i64_v8i64__11_9:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:23]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <8 x i64> asm "; def $0", "=s"()
%vec1 = call <8 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> <i32 11, i32 9>
@@ -23987,8 +24432,7 @@ define void @s_shuffle_v2i64_v8i64__12_9() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s2
-; GFX942-NEXT: s_mov_b32 s11, s3
+; GFX942-NEXT: s_mov_b64 s[10:11], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -24001,18 +24445,43 @@ define void @s_shuffle_v2i64_v8i64__12_9() {
}
define void @s_shuffle_v2i64_v8i64__13_9() {
-; GFX9-LABEL: s_shuffle_v2i64_v8i64__13_9:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:23]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s18
-; GFX9-NEXT: s_mov_b32 s9, s19
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v2i64_v8i64__13_9:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:23]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s18
+; GFX900-NEXT: s_mov_b32 s9, s19
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2i64_v8i64__13_9:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:23]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s18
+; GFX90A-NEXT: s_mov_b32 s9, s19
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2i64_v8i64__13_9:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:23]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[18:19]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <8 x i64> asm "; def $0", "=s"()
%vec1 = call <8 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> <i32 13, i32 9>
@@ -24057,8 +24526,7 @@ define void @s_shuffle_v2i64_v8i64__14_9() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3]
; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
@@ -24105,8 +24573,7 @@ define void @s_shuffle_v2i64_v8i64__u_10() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -24161,8 +24628,7 @@ define void @s_shuffle_v2i64_v8i64__0_10() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[12:27]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s16
-; GFX942-NEXT: s_mov_b32 s11, s17
+; GFX942-NEXT: s_mov_b64 s[10:11], s[16:17]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -24221,10 +24687,8 @@ define void @s_shuffle_v2i64_v8i64__1_10() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:23]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -24279,8 +24743,7 @@ define void @s_shuffle_v2i64_v8i64__2_10() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[12:27]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s16
-; GFX942-NEXT: s_mov_b32 s11, s17
+; GFX942-NEXT: s_mov_b64 s[10:11], s[16:17]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -24339,10 +24802,8 @@ define void @s_shuffle_v2i64_v8i64__3_10() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:23]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -24423,8 +24884,7 @@ define void @s_shuffle_v2i64_v8i64__4_10() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[12:27]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s16
-; GFX942-NEXT: s_mov_b32 s11, s17
+; GFX942-NEXT: s_mov_b64 s[10:11], s[16:17]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -24501,13 +24961,11 @@ define void @s_shuffle_v2i64_v8i64__5_10() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[12:27]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s10, s16
-; GFX942-NEXT: s_mov_b32 s11, s17
+; GFX942-NEXT: s_mov_b64 s[10:11], s[16:17]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -24616,8 +25074,7 @@ define void @s_shuffle_v2i64_v8i64__6_10() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[16:31]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s14, s20
-; GFX942-NEXT: s_mov_b32 s15, s21
+; GFX942-NEXT: s_mov_b64 s[14:15], s[20:21]
; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
@@ -24734,10 +25191,8 @@ define void @s_shuffle_v2i64_v8i64__7_10() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[16:31]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s20
-; GFX942-NEXT: s_mov_b32 s11, s21
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[20:21]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -24756,18 +25211,43 @@ define void @s_shuffle_v2i64_v8i64__7_10() {
}
define void @s_shuffle_v2i64_v8i64__8_10() {
-; GFX9-LABEL: s_shuffle_v2i64_v8i64__8_10:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:23]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v2i64_v8i64__8_10:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:23]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2i64_v8i64__8_10:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:23]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2i64_v8i64__8_10:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:23]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <8 x i64> asm "; def $0", "=s"()
%vec1 = call <8 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> <i32 8, i32 10>
@@ -24812,10 +25292,8 @@ define void @s_shuffle_v2i64_v8i64__9_10() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -24828,18 +25306,43 @@ define void @s_shuffle_v2i64_v8i64__9_10() {
}
define void @s_shuffle_v2i64_v8i64__10_10() {
-; GFX9-LABEL: s_shuffle_v2i64_v8i64__10_10:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[4:19]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s8
-; GFX9-NEXT: s_mov_b32 s11, s9
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v2i64_v8i64__10_10:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:19]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s8
+; GFX900-NEXT: s_mov_b32 s11, s9
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2i64_v8i64__10_10:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:19]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s8
+; GFX90A-NEXT: s_mov_b32 s11, s9
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2i64_v8i64__10_10:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[4:19]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[8:9]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <8 x i64> asm "; def $0", "=s"()
%vec1 = call <8 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> <i32 10, i32 10>
@@ -24884,10 +25387,8 @@ define void @s_shuffle_v2i64_v8i64__11_10() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -24936,8 +25437,7 @@ define void @s_shuffle_v2i64_v8i64__12_10() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -24986,10 +25486,8 @@ define void @s_shuffle_v2i64_v8i64__13_10() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -25038,8 +25536,7 @@ define void @s_shuffle_v2i64_v8i64__14_10() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
+; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5]
; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
@@ -25137,8 +25634,7 @@ define void @s_shuffle_v2i64_v8i64__0_11() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[12:27]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s18
-; GFX942-NEXT: s_mov_b32 s11, s19
+; GFX942-NEXT: s_mov_b64 s[10:11], s[18:19]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -25239,8 +25735,7 @@ define void @s_shuffle_v2i64_v8i64__1_11() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:19]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -25295,8 +25790,7 @@ define void @s_shuffle_v2i64_v8i64__2_11() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[12:27]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s18
-; GFX942-NEXT: s_mov_b32 s11, s19
+; GFX942-NEXT: s_mov_b64 s[10:11], s[18:19]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -25378,8 +25872,7 @@ define void @s_shuffle_v2i64_v8i64__3_11() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:19]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s22
-; GFX942-NEXT: s_mov_b32 s9, s23
+; GFX942-NEXT: s_mov_b64 s[8:9], s[22:23]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -25466,8 +25959,7 @@ define void @s_shuffle_v2i64_v8i64__4_11() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[12:27]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s18
-; GFX942-NEXT: s_mov_b32 s11, s19
+; GFX942-NEXT: s_mov_b64 s[10:11], s[18:19]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -25522,8 +26014,7 @@ define void @s_shuffle_v2i64_v8i64__5_11() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:19]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s22
-; GFX942-NEXT: s_mov_b32 s9, s23
+; GFX942-NEXT: s_mov_b64 s[8:9], s[22:23]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -25632,8 +26123,7 @@ define void @s_shuffle_v2i64_v8i64__6_11() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[16:31]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s14, s22
-; GFX942-NEXT: s_mov_b32 s15, s23
+; GFX942-NEXT: s_mov_b64 s[14:15], s[22:23]
; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
@@ -25696,8 +26186,7 @@ define void @s_shuffle_v2i64_v8i64__7_11() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:19]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s22
-; GFX942-NEXT: s_mov_b32 s9, s23
+; GFX942-NEXT: s_mov_b64 s[8:9], s[22:23]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -25710,18 +26199,43 @@ define void @s_shuffle_v2i64_v8i64__7_11() {
}
define void @s_shuffle_v2i64_v8i64__8_11() {
-; GFX9-LABEL: s_shuffle_v2i64_v8i64__8_11:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:23]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s14
-; GFX9-NEXT: s_mov_b32 s11, s15
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v2i64_v8i64__8_11:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:23]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s14
+; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2i64_v8i64__8_11:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:23]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s14
+; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2i64_v8i64__8_11:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:23]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <8 x i64> asm "; def $0", "=s"()
%vec1 = call <8 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> <i32 8, i32 11>
@@ -25730,18 +26244,43 @@ define void @s_shuffle_v2i64_v8i64__8_11() {
}
define void @s_shuffle_v2i64_v8i64__9_11() {
-; GFX9-LABEL: s_shuffle_v2i64_v8i64__9_11:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[4:19]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s6
-; GFX9-NEXT: s_mov_b32 s9, s7
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v2i64_v8i64__9_11:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:19]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s6
+; GFX900-NEXT: s_mov_b32 s9, s7
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2i64_v8i64__9_11:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:19]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s6
+; GFX90A-NEXT: s_mov_b32 s9, s7
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2i64_v8i64__9_11:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[4:19]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <8 x i64> asm "; def $0", "=s"()
%vec1 = call <8 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> <i32 9, i32 11>
@@ -25791,18 +26330,43 @@ define void @s_shuffle_v2i64_v8i64__10_11() {
}
define void @s_shuffle_v2i64_v8i64__11_11() {
-; GFX9-LABEL: s_shuffle_v2i64_v8i64__11_11:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[4:19]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s10
-; GFX9-NEXT: s_mov_b32 s9, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v2i64_v8i64__11_11:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:19]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2i64_v8i64__11_11:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:19]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2i64_v8i64__11_11:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[4:19]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <8 x i64> asm "; def $0", "=s"()
%vec1 = call <8 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> <i32 11, i32 11>
@@ -25847,8 +26411,7 @@ define void @s_shuffle_v2i64_v8i64__12_11() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -25861,18 +26424,43 @@ define void @s_shuffle_v2i64_v8i64__12_11() {
}
define void @s_shuffle_v2i64_v8i64__13_11() {
-; GFX9-LABEL: s_shuffle_v2i64_v8i64__13_11:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[4:19]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s14
-; GFX9-NEXT: s_mov_b32 s9, s15
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v2i64_v8i64__13_11:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:19]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2i64_v8i64__13_11:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:19]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2i64_v8i64__13_11:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[4:19]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <8 x i64> asm "; def $0", "=s"()
%vec1 = call <8 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> <i32 13, i32 11>
@@ -25917,8 +26505,7 @@ define void @s_shuffle_v2i64_v8i64__14_11() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s14, s6
-; GFX942-NEXT: s_mov_b32 s15, s7
+; GFX942-NEXT: s_mov_b64 s[14:15], s[6:7]
; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
@@ -25965,8 +26552,7 @@ define void @s_shuffle_v2i64_v8i64__u_12() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s8
-; GFX942-NEXT: s_mov_b32 s11, s9
+; GFX942-NEXT: s_mov_b64 s[10:11], s[8:9]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -26021,8 +26607,7 @@ define void @s_shuffle_v2i64_v8i64__0_12() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[12:27]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s20
-; GFX942-NEXT: s_mov_b32 s11, s21
+; GFX942-NEXT: s_mov_b64 s[10:11], s[20:21]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -26081,10 +26666,8 @@ define void @s_shuffle_v2i64_v8i64__1_12() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:19]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -26139,8 +26722,7 @@ define void @s_shuffle_v2i64_v8i64__2_12() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[12:27]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s20
-; GFX942-NEXT: s_mov_b32 s11, s21
+; GFX942-NEXT: s_mov_b64 s[10:11], s[20:21]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -26199,10 +26781,8 @@ define void @s_shuffle_v2i64_v8i64__3_12() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:23]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s16
-; GFX942-NEXT: s_mov_b32 s11, s17
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[16:17]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -26283,8 +26863,7 @@ define void @s_shuffle_v2i64_v8i64__4_12() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[12:27]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s20
-; GFX942-NEXT: s_mov_b32 s11, s21
+; GFX942-NEXT: s_mov_b64 s[10:11], s[20:21]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -26361,13 +26940,11 @@ define void @s_shuffle_v2i64_v8i64__5_12() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[12:27]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s10, s20
-; GFX942-NEXT: s_mov_b32 s11, s21
+; GFX942-NEXT: s_mov_b64 s[10:11], s[20:21]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -26476,8 +27053,7 @@ define void @s_shuffle_v2i64_v8i64__6_12() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[16:31]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s14, s24
-; GFX942-NEXT: s_mov_b32 s15, s25
+; GFX942-NEXT: s_mov_b64 s[14:15], s[24:25]
; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
@@ -26539,159 +27115,263 @@ define void @s_shuffle_v2i64_v8i64__7_12() {
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v2i64_v8i64__7_12:
+; GFX90A-LABEL: s_shuffle_v2i64_v8i64__7_12:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: s_xor_saveexec_b64 s[4:5], -1
+; GFX90A-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill
+; GFX90A-NEXT: s_mov_b64 exec, s[4:5]
+; GFX90A-NEXT: v_writelane_b32 v0, s36, 0
+; GFX90A-NEXT: v_writelane_b32 v0, s37, 1
+; GFX90A-NEXT: v_writelane_b32 v0, s38, 2
+; GFX90A-NEXT: v_writelane_b32 v0, s39, 3
+; GFX90A-NEXT: v_writelane_b32 v0, s48, 4
+; GFX90A-NEXT: v_writelane_b32 v0, s49, 5
+; GFX90A-NEXT: v_writelane_b32 v0, s50, 6
+; GFX90A-NEXT: v_writelane_b32 v0, s51, 7
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:19]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[36:51]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s18
+; GFX90A-NEXT: s_mov_b32 s9, s19
+; GFX90A-NEXT: s_mov_b32 s10, s44
+; GFX90A-NEXT: s_mov_b32 s11, s45
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_readlane_b32 s51, v0, 7
+; GFX90A-NEXT: v_readlane_b32 s50, v0, 6
+; GFX90A-NEXT: v_readlane_b32 s49, v0, 5
+; GFX90A-NEXT: v_readlane_b32 s48, v0, 4
+; GFX90A-NEXT: v_readlane_b32 s39, v0, 3
+; GFX90A-NEXT: v_readlane_b32 s38, v0, 2
+; GFX90A-NEXT: v_readlane_b32 s37, v0, 1
+; GFX90A-NEXT: v_readlane_b32 s36, v0, 0
+; GFX90A-NEXT: s_xor_saveexec_b64 s[4:5], -1
+; GFX90A-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
+; GFX90A-NEXT: s_mov_b64 exec, s[4:5]
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2i64_v8i64__7_12:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1
+; GFX942-NEXT: scratch_store_dword off, v0, s32 ; 4-byte Folded Spill
+; GFX942-NEXT: s_mov_b64 exec, s[0:1]
+; GFX942-NEXT: v_writelane_b32 v0, s30, 0
+; GFX942-NEXT: v_writelane_b32 v0, s31, 1
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[0:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[16:31]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[24:25]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: v_readlane_b32 s31, v0, 1
+; GFX942-NEXT: v_readlane_b32 s30, v0, 0
+; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1
+; GFX942-NEXT: scratch_load_dword v0, off, s32 ; 4-byte Folded Reload
+; GFX942-NEXT: s_mov_b64 exec, s[0:1]
+; GFX942-NEXT: s_waitcnt vmcnt(0)
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <8 x i64> asm "; def $0", "=s"()
+ %vec1 = call <8 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> <i32 7, i32 12>
+ call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v2i64_v8i64__8_12() {
+; GFX900-LABEL: s_shuffle_v2i64_v8i64__8_12:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:23]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s16
+; GFX900-NEXT: s_mov_b32 s11, s17
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2i64_v8i64__8_12:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:23]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s16
+; GFX90A-NEXT: s_mov_b32 s11, s17
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2i64_v8i64__8_12:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:23]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[16:17]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <8 x i64> asm "; def $0", "=s"()
+ %vec1 = call <8 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> <i32 8, i32 12>
+ call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v2i64_v8i64__9_12() {
+; GFX900-LABEL: s_shuffle_v2i64_v8i64__9_12:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:19]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s6
+; GFX900-NEXT: s_mov_b32 s9, s7
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2i64_v8i64__9_12:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:19]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s6
+; GFX90A-NEXT: s_mov_b32 s9, s7
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2i64_v8i64__9_12:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[4:19]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <8 x i64> asm "; def $0", "=s"()
+ %vec1 = call <8 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> <i32 9, i32 12>
+ call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v2i64_v8i64__10_12() {
+; GFX900-LABEL: s_shuffle_v2i64_v8i64__10_12:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:19]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2i64_v8i64__10_12:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:19]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2i64_v8i64__10_12:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[4:19]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <8 x i64> asm "; def $0", "=s"()
+ %vec1 = call <8 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> <i32 10, i32 12>
+ call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v2i64_v8i64__11_12() {
+; GFX900-LABEL: s_shuffle_v2i64_v8i64__11_12:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:19]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2i64_v8i64__11_12:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: s_xor_saveexec_b64 s[4:5], -1
-; GFX90A-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill
-; GFX90A-NEXT: s_mov_b64 exec, s[4:5]
-; GFX90A-NEXT: v_writelane_b32 v0, s36, 0
-; GFX90A-NEXT: v_writelane_b32 v0, s37, 1
-; GFX90A-NEXT: v_writelane_b32 v0, s38, 2
-; GFX90A-NEXT: v_writelane_b32 v0, s39, 3
-; GFX90A-NEXT: v_writelane_b32 v0, s48, 4
-; GFX90A-NEXT: v_writelane_b32 v0, s49, 5
-; GFX90A-NEXT: v_writelane_b32 v0, s50, 6
-; GFX90A-NEXT: v_writelane_b32 v0, s51, 7
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:19]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[36:51]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s18
-; GFX90A-NEXT: s_mov_b32 s9, s19
-; GFX90A-NEXT: s_mov_b32 s10, s44
-; GFX90A-NEXT: s_mov_b32 s11, s45
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_readlane_b32 s51, v0, 7
-; GFX90A-NEXT: v_readlane_b32 s50, v0, 6
-; GFX90A-NEXT: v_readlane_b32 s49, v0, 5
-; GFX90A-NEXT: v_readlane_b32 s48, v0, 4
-; GFX90A-NEXT: v_readlane_b32 s39, v0, 3
-; GFX90A-NEXT: v_readlane_b32 s38, v0, 2
-; GFX90A-NEXT: v_readlane_b32 s37, v0, 1
-; GFX90A-NEXT: v_readlane_b32 s36, v0, 0
-; GFX90A-NEXT: s_xor_saveexec_b64 s[4:5], -1
-; GFX90A-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
-; GFX90A-NEXT: s_mov_b64 exec, s[4:5]
-; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v2i64_v8i64__7_12:
+; GFX942-LABEL: s_shuffle_v2i64_v8i64__11_12:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1
-; GFX942-NEXT: scratch_store_dword off, v0, s32 ; 4-byte Folded Spill
-; GFX942-NEXT: s_mov_b64 exec, s[0:1]
-; GFX942-NEXT: v_writelane_b32 v0, s30, 0
-; GFX942-NEXT: v_writelane_b32 v0, s31, 1
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:15]
-; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[16:31]
+; GFX942-NEXT: ; def s[4:19]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s24
-; GFX942-NEXT: s_mov_b32 s11, s25
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_readlane_b32 s31, v0, 1
-; GFX942-NEXT: v_readlane_b32 s30, v0, 0
-; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1
-; GFX942-NEXT: scratch_load_dword v0, off, s32 ; 4-byte Folded Reload
-; GFX942-NEXT: s_mov_b64 exec, s[0:1]
-; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <8 x i64> asm "; def $0", "=s"()
- %vec1 = call <8 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> <i32 7, i32 12>
- call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v2i64_v8i64__8_12() {
-; GFX9-LABEL: s_shuffle_v2i64_v8i64__8_12:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:23]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s16
-; GFX9-NEXT: s_mov_b32 s11, s17
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <8 x i64> asm "; def $0", "=s"()
- %vec1 = call <8 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> <i32 8, i32 12>
- call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v2i64_v8i64__9_12() {
-; GFX9-LABEL: s_shuffle_v2i64_v8i64__9_12:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[4:19]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s6
-; GFX9-NEXT: s_mov_b32 s9, s7
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <8 x i64> asm "; def $0", "=s"()
- %vec1 = call <8 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> <i32 9, i32 12>
- call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v2i64_v8i64__10_12() {
-; GFX9-LABEL: s_shuffle_v2i64_v8i64__10_12:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[4:19]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <8 x i64> asm "; def $0", "=s"()
- %vec1 = call <8 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> <i32 10, i32 12>
- call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v2i64_v8i64__11_12() {
-; GFX9-LABEL: s_shuffle_v2i64_v8i64__11_12:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[4:19]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s10
-; GFX9-NEXT: s_mov_b32 s9, s11
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <8 x i64> asm "; def $0", "=s"()
%vec1 = call <8 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> <i32 11, i32 12>
@@ -26736,8 +27416,7 @@ define void @s_shuffle_v2i64_v8i64__12_12() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s8
-; GFX942-NEXT: s_mov_b32 s11, s9
+; GFX942-NEXT: s_mov_b64 s[10:11], s[8:9]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -26750,20 +27429,48 @@ define void @s_shuffle_v2i64_v8i64__12_12() {
}
define void @s_shuffle_v2i64_v8i64__13_12() {
-; GFX9-LABEL: s_shuffle_v2i64_v8i64__13_12:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[4:19]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s14
-; GFX9-NEXT: s_mov_b32 s9, s15
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v2i64_v8i64__13_12:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:19]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2i64_v8i64__13_12:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:19]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2i64_v8i64__13_12:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[4:19]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <8 x i64> asm "; def $0", "=s"()
%vec1 = call <8 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> <i32 13, i32 12>
@@ -26808,8 +27515,7 @@ define void @s_shuffle_v2i64_v8i64__14_12() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s14, s8
-; GFX942-NEXT: s_mov_b32 s15, s9
+; GFX942-NEXT: s_mov_b64 s[14:15], s[8:9]
; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
@@ -26911,8 +27617,7 @@ define void @s_shuffle_v2i64_v8i64__0_13() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[12:27]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s22
-; GFX942-NEXT: s_mov_b32 s11, s23
+; GFX942-NEXT: s_mov_b64 s[10:11], s[22:23]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -26975,8 +27680,7 @@ define void @s_shuffle_v2i64_v8i64__1_13() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s18
-; GFX942-NEXT: s_mov_b32 s9, s19
+; GFX942-NEXT: s_mov_b64 s[8:9], s[18:19]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -27037,8 +27741,7 @@ define void @s_shuffle_v2i64_v8i64__2_13() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[12:27]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s22
-; GFX942-NEXT: s_mov_b32 s11, s23
+; GFX942-NEXT: s_mov_b64 s[10:11], s[22:23]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -27097,8 +27800,7 @@ define void @s_shuffle_v2i64_v8i64__3_13() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s18
-; GFX942-NEXT: s_mov_b32 s9, s19
+; GFX942-NEXT: s_mov_b64 s[8:9], s[18:19]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -27179,8 +27881,7 @@ define void @s_shuffle_v2i64_v8i64__4_13() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[12:27]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s22
-; GFX942-NEXT: s_mov_b32 s11, s23
+; GFX942-NEXT: s_mov_b64 s[10:11], s[22:23]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -27261,8 +27962,7 @@ define void @s_shuffle_v2i64_v8i64__5_13() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s18
-; GFX942-NEXT: s_mov_b32 s9, s19
+; GFX942-NEXT: s_mov_b64 s[8:9], s[18:19]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -27371,8 +28071,7 @@ define void @s_shuffle_v2i64_v8i64__6_13() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[16:31]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s14, s26
-; GFX942-NEXT: s_mov_b32 s15, s27
+; GFX942-NEXT: s_mov_b64 s[14:15], s[26:27]
; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
@@ -27485,8 +28184,7 @@ define void @s_shuffle_v2i64_v8i64__7_13() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s18
-; GFX942-NEXT: s_mov_b32 s9, s19
+; GFX942-NEXT: s_mov_b64 s[8:9], s[18:19]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -27499,18 +28197,43 @@ define void @s_shuffle_v2i64_v8i64__7_13() {
}
define void @s_shuffle_v2i64_v8i64__8_13() {
-; GFX9-LABEL: s_shuffle_v2i64_v8i64__8_13:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:23]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s18
-; GFX9-NEXT: s_mov_b32 s11, s19
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v2i64_v8i64__8_13:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:23]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s18
+; GFX900-NEXT: s_mov_b32 s11, s19
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2i64_v8i64__8_13:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:23]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s18
+; GFX90A-NEXT: s_mov_b32 s11, s19
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2i64_v8i64__8_13:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:23]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[18:19]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <8 x i64> asm "; def $0", "=s"()
%vec1 = call <8 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> <i32 8, i32 13>
@@ -27525,62 +28248,86 @@ define void @s_shuffle_v2i64_v8i64__9_13() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:19]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s12, s6
-; GFX900-NEXT: s_mov_b32 s13, s7
-; GFX900-NEXT: s_mov_b64 s[8:9], s[12:13]
-; GFX900-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX900-NEXT: s_mov_b32 s12, s6
+; GFX900-NEXT: s_mov_b32 s13, s7
+; GFX900-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX900-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2i64_v8i64__9_13:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:19]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s12, s6
+; GFX90A-NEXT: s_mov_b32 s13, s7
+; GFX90A-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX90A-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2i64_v8i64__9_13:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[0:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <8 x i64> asm "; def $0", "=s"()
+ %vec1 = call <8 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> <i32 9, i32 13>
+ call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v2i64_v8i64__10_13() {
+; GFX900-LABEL: s_shuffle_v2i64_v8i64__10_13:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:19]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s14
+; GFX900-NEXT: s_mov_b32 s11, s15
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v2i64_v8i64__9_13:
+; GFX90A-LABEL: s_shuffle_v2i64_v8i64__10_13:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:19]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s12, s6
-; GFX90A-NEXT: s_mov_b32 s13, s7
-; GFX90A-NEXT: s_mov_b64 s[8:9], s[12:13]
-; GFX90A-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX90A-NEXT: s_mov_b32 s10, s14
+; GFX90A-NEXT: s_mov_b32 s11, s15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v2i64_v8i64__9_13:
+; GFX942-LABEL: s_shuffle_v2i64_v8i64__10_13:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:15]
+; GFX942-NEXT: ; def s[4:19]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <8 x i64> asm "; def $0", "=s"()
- %vec1 = call <8 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> <i32 9, i32 13>
- call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v2i64_v8i64__10_13() {
-; GFX9-LABEL: s_shuffle_v2i64_v8i64__10_13:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[4:19]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s14
-; GFX9-NEXT: s_mov_b32 s11, s15
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <8 x i64> asm "; def $0", "=s"()
%vec1 = call <8 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> <i32 10, i32 13>
@@ -27625,8 +28372,7 @@ define void @s_shuffle_v2i64_v8i64__11_13() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -27720,8 +28466,7 @@ define void @s_shuffle_v2i64_v8i64__13_13() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -27770,8 +28515,7 @@ define void @s_shuffle_v2i64_v8i64__14_13() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s14, s10
-; GFX942-NEXT: s_mov_b32 s15, s11
+; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11]
; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
@@ -27818,8 +28562,7 @@ define void @s_shuffle_v2i64_v8i64__u_14() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -27874,8 +28617,7 @@ define void @s_shuffle_v2i64_v8i64__0_14() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[12:27]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s24
-; GFX942-NEXT: s_mov_b32 s11, s25
+; GFX942-NEXT: s_mov_b64 s[10:11], s[24:25]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -27934,10 +28676,8 @@ define void @s_shuffle_v2i64_v8i64__1_14() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:19]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s16
-; GFX942-NEXT: s_mov_b32 s11, s17
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[16:17]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -27992,8 +28732,7 @@ define void @s_shuffle_v2i64_v8i64__2_14() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[12:27]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s24
-; GFX942-NEXT: s_mov_b32 s11, s25
+; GFX942-NEXT: s_mov_b64 s[10:11], s[24:25]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -28052,10 +28791,8 @@ define void @s_shuffle_v2i64_v8i64__3_14() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:23]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s20
-; GFX942-NEXT: s_mov_b32 s11, s21
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[20:21]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -28136,8 +28873,7 @@ define void @s_shuffle_v2i64_v8i64__4_14() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[12:27]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s24
-; GFX942-NEXT: s_mov_b32 s11, s25
+; GFX942-NEXT: s_mov_b64 s[10:11], s[24:25]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -28214,13 +28950,11 @@ define void @s_shuffle_v2i64_v8i64__5_14() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[12:27]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s10, s24
-; GFX942-NEXT: s_mov_b32 s11, s25
+; GFX942-NEXT: s_mov_b64 s[10:11], s[24:25]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -28329,8 +29063,7 @@ define void @s_shuffle_v2i64_v8i64__6_14() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[16:31]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s14, s28
-; GFX942-NEXT: s_mov_b32 s15, s29
+; GFX942-NEXT: s_mov_b64 s[14:15], s[28:29]
; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
@@ -28447,10 +29180,8 @@ define void @s_shuffle_v2i64_v8i64__7_14() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[16:31]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s28
-; GFX942-NEXT: s_mov_b32 s11, s29
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[28:29]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -28469,18 +29200,43 @@ define void @s_shuffle_v2i64_v8i64__7_14() {
}
define void @s_shuffle_v2i64_v8i64__8_14() {
-; GFX9-LABEL: s_shuffle_v2i64_v8i64__8_14:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:23]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s20
-; GFX9-NEXT: s_mov_b32 s11, s21
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v2i64_v8i64__8_14:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:23]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s20
+; GFX900-NEXT: s_mov_b32 s11, s21
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2i64_v8i64__8_14:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:23]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s20
+; GFX90A-NEXT: s_mov_b32 s11, s21
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2i64_v8i64__8_14:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:23]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[20:21]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <8 x i64> asm "; def $0", "=s"()
%vec1 = call <8 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> <i32 8, i32 14>
@@ -28525,10 +29281,8 @@ define void @s_shuffle_v2i64_v8i64__9_14() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -28541,18 +29295,43 @@ define void @s_shuffle_v2i64_v8i64__9_14() {
}
define void @s_shuffle_v2i64_v8i64__10_14() {
-; GFX9-LABEL: s_shuffle_v2i64_v8i64__10_14:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[4:19]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s16
-; GFX9-NEXT: s_mov_b32 s11, s17
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v2i64_v8i64__10_14:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:19]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s16
+; GFX900-NEXT: s_mov_b32 s11, s17
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2i64_v8i64__10_14:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:19]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s16
+; GFX90A-NEXT: s_mov_b32 s11, s17
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2i64_v8i64__10_14:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[4:19]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[16:17]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <8 x i64> asm "; def $0", "=s"()
%vec1 = call <8 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> <i32 10, i32 14>
@@ -28597,10 +29376,8 @@ define void @s_shuffle_v2i64_v8i64__11_14() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -28649,8 +29426,7 @@ define void @s_shuffle_v2i64_v8i64__12_14() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -28699,10 +29475,8 @@ define void @s_shuffle_v2i64_v8i64__13_14() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -28751,8 +29525,7 @@ define void @s_shuffle_v2i64_v8i64__14_14() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s14, s12
-; GFX942-NEXT: s_mov_b32 s15, s13
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
@@ -28855,8 +29628,7 @@ define void @s_shuffle_v2i64_v8i64__0_15() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[12:27]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s26
-; GFX942-NEXT: s_mov_b32 s11, s27
+; GFX942-NEXT: s_mov_b64 s[10:11], s[26:27]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -28908,20 +29680,29 @@ define void @s_shuffle_v2i64_v8i64__1_15() {
; GFX942-LABEL: s_shuffle_v2i64_v8i64__1_15:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1
+; GFX942-NEXT: scratch_store_dword off, v0, s32 ; 4-byte Folded Spill
+; GFX942-NEXT: s_mov_b64 exec, s[0:1]
+; GFX942-NEXT: v_writelane_b32 v0, s30, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_writelane_b32 v0, s31, 1
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[4:19]
+; GFX942-NEXT: ; def s[16:31]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s16, s2
-; GFX942-NEXT: s_mov_b32 s17, s3
-; GFX942-NEXT: s_mov_b64 s[8:9], s[16:17]
-; GFX942-NEXT: s_mov_b64 s[10:11], s[18:19]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[18:19]
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: v_readlane_b32 s31, v0, 1
+; GFX942-NEXT: v_readlane_b32 s30, v0, 0
+; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1
+; GFX942-NEXT: scratch_load_dword v0, off, s32 ; 4-byte Folded Reload
+; GFX942-NEXT: s_mov_b64 exec, s[0:1]
+; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <8 x i64> asm "; def $0", "=s"()
%vec1 = call <8 x i64> asm "; def $0", "=s"()
@@ -28973,8 +29754,7 @@ define void @s_shuffle_v2i64_v8i64__2_15() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[12:27]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s26
-; GFX942-NEXT: s_mov_b32 s11, s27
+; GFX942-NEXT: s_mov_b64 s[10:11], s[26:27]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -29027,16 +29807,15 @@ define void @s_shuffle_v2i64_v8i64__3_15() {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:15]
+; GFX942-NEXT: ; def s[12:27]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:23]
+; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s20, s6
-; GFX942-NEXT: s_mov_b32 s21, s7
-; GFX942-NEXT: s_mov_b64 s[8:9], s[20:21]
-; GFX942-NEXT: s_mov_b64 s[10:11], s[22:23]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[18:19]
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -29117,8 +29896,7 @@ define void @s_shuffle_v2i64_v8i64__4_15() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[12:27]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s26
-; GFX942-NEXT: s_mov_b32 s11, s27
+; GFX942-NEXT: s_mov_b64 s[10:11], s[26:27]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -29193,16 +29971,15 @@ define void @s_shuffle_v2i64_v8i64__5_15() {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:15]
+; GFX942-NEXT: ; def s[8:23]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[12:27]
+; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s24, s10
-; GFX942-NEXT: s_mov_b32 s25, s11
-; GFX942-NEXT: s_mov_b64 s[8:9], s[24:25]
-; GFX942-NEXT: s_mov_b64 s[10:11], s[26:27]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[18:19]
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -29311,8 +30088,7 @@ define void @s_shuffle_v2i64_v8i64__6_15() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[16:31]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s14, s30
-; GFX942-NEXT: s_mov_b32 s15, s31
+; GFX942-NEXT: s_mov_b64 s[14:15], s[30:31]
; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
@@ -29418,30 +30194,19 @@ define void @s_shuffle_v2i64_v8i64__7_15() {
; GFX942-LABEL: s_shuffle_v2i64_v8i64__7_15:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1
-; GFX942-NEXT: scratch_store_dword off, v0, s32 ; 4-byte Folded Spill
-; GFX942-NEXT: s_mov_b64 exec, s[0:1]
-; GFX942-NEXT: v_writelane_b32 v0, s30, 0
-; GFX942-NEXT: v_writelane_b32 v0, s31, 1
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[16:31]
+; GFX942-NEXT: ; def s[4:19]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s28, s14
-; GFX942-NEXT: s_mov_b32 s29, s15
-; GFX942-NEXT: s_mov_b64 s[8:9], s[28:29]
-; GFX942-NEXT: s_mov_b64 s[10:11], s[30:31]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[18:19]
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_readlane_b32 s31, v0, 1
-; GFX942-NEXT: v_readlane_b32 s30, v0, 0
-; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1
-; GFX942-NEXT: scratch_load_dword v0, off, s32 ; 4-byte Folded Reload
-; GFX942-NEXT: s_mov_b64 exec, s[0:1]
-; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <8 x i64> asm "; def $0", "=s"()
%vec1 = call <8 x i64> asm "; def $0", "=s"()
@@ -29451,18 +30216,43 @@ define void @s_shuffle_v2i64_v8i64__7_15() {
}
define void @s_shuffle_v2i64_v8i64__8_15() {
-; GFX9-LABEL: s_shuffle_v2i64_v8i64__8_15:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:23]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s22
-; GFX9-NEXT: s_mov_b32 s11, s23
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v2i64_v8i64__8_15:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:23]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s22
+; GFX900-NEXT: s_mov_b32 s11, s23
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2i64_v8i64__8_15:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:23]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s22
+; GFX90A-NEXT: s_mov_b32 s11, s23
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2i64_v8i64__8_15:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:23]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[22:23]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <8 x i64> asm "; def $0", "=s"()
%vec1 = call <8 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> <i32 8, i32 15>
@@ -29507,8 +30297,7 @@ define void @s_shuffle_v2i64_v8i64__9_15() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
@@ -29523,18 +30312,43 @@ define void @s_shuffle_v2i64_v8i64__9_15() {
}
define void @s_shuffle_v2i64_v8i64__10_15() {
-; GFX9-LABEL: s_shuffle_v2i64_v8i64__10_15:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[4:19]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s18
-; GFX9-NEXT: s_mov_b32 s11, s19
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v2i64_v8i64__10_15:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:19]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s18
+; GFX900-NEXT: s_mov_b32 s11, s19
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2i64_v8i64__10_15:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:19]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s18
+; GFX90A-NEXT: s_mov_b32 s11, s19
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2i64_v8i64__10_15:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[4:19]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[18:19]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <8 x i64> asm "; def $0", "=s"()
%vec1 = call <8 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> <i32 10, i32 15>
@@ -29579,8 +30393,7 @@ define void @s_shuffle_v2i64_v8i64__11_15() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
+; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7]
; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
@@ -29631,8 +30444,7 @@ define void @s_shuffle_v2i64_v8i64__12_15() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -29681,8 +30493,7 @@ define void @s_shuffle_v2i64_v8i64__13_15() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s10
-; GFX942-NEXT: s_mov_b32 s13, s11
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v2p0.v2p0.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v2p0.v2p0.ll
index 7f8f2dbbb09a1..54e700625d72c 100644
--- a/llvm/test/CodeGen/AMDGPU/shufflevector.v2p0.v2p0.ll
+++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v2p0.v2p0.ll
@@ -88,8 +88,7 @@ define void @v_shuffle_v2p0_v2p0__1_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
+; GFX942-NEXT: v_mov_b64_e32 v[0:1], v[2:3]
; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -144,8 +143,7 @@ define void @v_shuffle_v2p0_v2p0__3_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
+; GFX942-NEXT: v_mov_b64_e32 v[0:1], v[2:3]
; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -204,10 +202,8 @@ define void @v_shuffle_v2p0_v2p0__3_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[4:5]
+; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[0:1]
; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -262,8 +258,7 @@ define void @v_shuffle_v2p0_v2p0__3_1(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
+; GFX942-NEXT: v_mov_b64_e32 v[0:1], v[6:7]
; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -309,8 +304,7 @@ define void @v_shuffle_v2p0_v2p0__3_2(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[0:1]
; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -355,8 +349,7 @@ define void @v_shuffle_v2p0_v2p0__3_3(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
+; GFX942-NEXT: v_mov_b64_e32 v[0:1], v[2:3]
; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -401,8 +394,7 @@ define void @v_shuffle_v2p0_v2p0__u_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: v_mov_b32_e32 v3, v1
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[0:1]
; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -446,8 +438,7 @@ define void @v_shuffle_v2p0_v2p0__0_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: v_mov_b32_e32 v3, v1
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[0:1]
; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -492,8 +483,7 @@ define void @v_shuffle_v2p0_v2p0__1_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[0:1]
; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -537,8 +527,7 @@ define void @v_shuffle_v2p0_v2p0__2_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: v_mov_b32_e32 v3, v1
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[0:1]
; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -660,8 +649,7 @@ define void @v_shuffle_v2p0_v2p0__1_1(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
+; GFX942-NEXT: v_mov_b64_e32 v[0:1], v[2:3]
; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -794,8 +782,7 @@ define void @v_shuffle_v2p0_v2p0__1_2(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
+; GFX942-NEXT: v_mov_b64_e32 v[0:1], v[2:3]
; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -900,8 +887,7 @@ define void @v_shuffle_v2p0_v2p0__0_3(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[4:5]
; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -956,8 +942,7 @@ define void @v_shuffle_v2p0_v2p0__1_3(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v2
-; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[2:3]
; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -1095,8 +1080,7 @@ define void @s_shuffle_v2p0_v2p0__1_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -1154,8 +1138,7 @@ define void @s_shuffle_v2p0_v2p0__3_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -1213,10 +1196,8 @@ define void @s_shuffle_v2p0_v2p0__3_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -1270,8 +1251,7 @@ define void @s_shuffle_v2p0_v2p0__3_1() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -1320,10 +1300,8 @@ define void @s_shuffle_v2p0_v2p0__3_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -1336,18 +1314,43 @@ define void @s_shuffle_v2p0_v2p0__3_2() {
}
define void @s_shuffle_v2p0_v2p0__3_3() {
-; GFX9-LABEL: s_shuffle_v2p0_v2p0__3_3:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s10
-; GFX9-NEXT: s_mov_b32 s9, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v2p0_v2p0__3_3:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2p0_v2p0__3_3:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2p0_v2p0__3_3:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=s"()
%vec1 = call <2 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <2 x i32> <i32 3, i32 3>
@@ -1388,8 +1391,7 @@ define void @s_shuffle_v2p0_v2p0__u_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -1401,18 +1403,43 @@ define void @s_shuffle_v2p0_v2p0__u_0() {
}
define void @s_shuffle_v2p0_v2p0__0_0() {
-; GFX9-LABEL: s_shuffle_v2p0_v2p0__0_0:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s8
-; GFX9-NEXT: s_mov_b32 s11, s9
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v2p0_v2p0__0_0:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s8
+; GFX900-NEXT: s_mov_b32 s11, s9
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2p0_v2p0__0_0:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s8
+; GFX90A-NEXT: s_mov_b32 s11, s9
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2p0_v2p0__0_0:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[8:9]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> poison, <2 x i32> zeroinitializer
call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x ptr> %shuf)
@@ -1456,10 +1483,8 @@ define void @s_shuffle_v2p0_v2p0__1_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -1503,8 +1528,7 @@ define void @s_shuffle_v2p0_v2p0__2_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -1596,18 +1620,43 @@ define void @s_shuffle_v2p0_v2p0__0_1() {
}
define void @s_shuffle_v2p0_v2p0__1_1() {
-; GFX9-LABEL: s_shuffle_v2p0_v2p0__1_1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s10
-; GFX9-NEXT: s_mov_b32 s9, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v2p0_v2p0__1_1:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2p0_v2p0__1_1:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2p0_v2p0__1_1:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> poison, <2 x i32> <i32 1, i32 1>
call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x ptr> %shuf)
@@ -1741,8 +1790,7 @@ define void @s_shuffle_v2p0_v2p0__1_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -1850,8 +1898,7 @@ define void @s_shuffle_v2p0_v2p0__0_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s2
-; GFX942-NEXT: s_mov_b32 s11, s3
+; GFX942-NEXT: s_mov_b64 s[10:11], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -1905,8 +1952,7 @@ define void @s_shuffle_v2p0_v2p0__1_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v2p0.v3p0.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v2p0.v3p0.ll
index 27a6cf11c4cb1..9c770bf1c77cc 100644
--- a/llvm/test/CodeGen/AMDGPU/shufflevector.v2p0.v3p0.ll
+++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v2p0.v3p0.ll
@@ -127,8 +127,7 @@ define void @v_shuffle_v2p0_v3p0__2_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v4
-; GFX942-NEXT: v_mov_b32_e32 v1, v5
+; GFX942-NEXT: v_mov_b64_e32 v[0:1], v[4:5]
; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -223,8 +222,7 @@ define void @v_shuffle_v2p0_v3p0__5_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v4
-; GFX942-NEXT: v_mov_b32_e32 v1, v5
+; GFX942-NEXT: v_mov_b64_e32 v[0:1], v[4:5]
; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -283,10 +281,8 @@ define void @v_shuffle_v2p0_v3p0__5_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[2:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v6
-; GFX942-NEXT: v_mov_b32_e32 v3, v7
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[6:7]
+; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[0:1]
; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -341,8 +337,7 @@ define void @v_shuffle_v2p0_v3p0__5_1(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[4:9]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v8
-; GFX942-NEXT: v_mov_b32_e32 v1, v9
+; GFX942-NEXT: v_mov_b64_e32 v[0:1], v[8:9]
; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -397,8 +392,7 @@ define void @v_shuffle_v2p0_v3p0__5_2(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[6:11]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v10
-; GFX942-NEXT: v_mov_b32_e32 v3, v11
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[10:11]
; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -447,10 +441,8 @@ define void @v_shuffle_v2p0_v3p0__5_3(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[4:5]
+; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[0:1]
; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -495,8 +487,7 @@ define void @v_shuffle_v2p0_v3p0__5_4(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v4
-; GFX942-NEXT: v_mov_b32_e32 v1, v5
+; GFX942-NEXT: v_mov_b64_e32 v[0:1], v[4:5]
; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -541,8 +532,7 @@ define void @v_shuffle_v2p0_v3p0__5_5(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[4:5]
; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -587,8 +577,7 @@ define void @v_shuffle_v2p0_v3p0__u_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: v_mov_b32_e32 v3, v1
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[0:1]
; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -632,8 +621,7 @@ define void @v_shuffle_v2p0_v3p0__0_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: v_mov_b32_e32 v3, v1
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[0:1]
; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -677,8 +665,7 @@ define void @v_shuffle_v2p0_v3p0__1_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[0:1]
; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -726,10 +713,8 @@ define void @v_shuffle_v2p0_v3p0__2_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[4:5]
+; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[0:1]
; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -773,8 +758,7 @@ define void @v_shuffle_v2p0_v3p0__3_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: v_mov_b32_e32 v3, v1
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[0:1]
; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -828,8 +812,7 @@ define void @v_shuffle_v2p0_v3p0__4_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[2:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v0
-; GFX942-NEXT: v_mov_b32_e32 v7, v1
+; GFX942-NEXT: v_mov_b64_e32 v[6:7], v[0:1]
; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -952,8 +935,7 @@ define void @v_shuffle_v2p0_v3p0__1_1(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v2
-; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[2:3]
; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -997,8 +979,7 @@ define void @v_shuffle_v2p0_v3p0__2_1(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v4
-; GFX942-NEXT: v_mov_b32_e32 v1, v5
+; GFX942-NEXT: v_mov_b64_e32 v[0:1], v[4:5]
; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -1091,8 +1072,7 @@ define void @v_shuffle_v2p0_v3p0__4_1(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[4:9]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v8, v2
-; GFX942-NEXT: v_mov_b32_e32 v9, v3
+; GFX942-NEXT: v_mov_b64_e32 v[8:9], v[2:3]
; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -1176,8 +1156,7 @@ define void @v_shuffle_v2p0_v3p0__0_2(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[4:5]
; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -1260,8 +1239,7 @@ define void @v_shuffle_v2p0_v3p0__2_2(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[4:5]
; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -1354,8 +1332,7 @@ define void @v_shuffle_v2p0_v3p0__4_2(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v10, v4
-; GFX942-NEXT: v_mov_b32_e32 v11, v5
+; GFX942-NEXT: v_mov_b64_e32 v[10:11], v[4:5]
; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -1489,8 +1466,7 @@ define void @v_shuffle_v2p0_v3p0__2_3(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v4
-; GFX942-NEXT: v_mov_b32_e32 v1, v5
+; GFX942-NEXT: v_mov_b64_e32 v[0:1], v[4:5]
; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -1545,8 +1521,7 @@ define void @v_shuffle_v2p0_v3p0__4_3(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[0:1]
; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -1641,8 +1616,7 @@ define void @v_shuffle_v2p0_v3p0__0_4(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[2:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[4:5]
; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -1697,8 +1671,7 @@ define void @v_shuffle_v2p0_v3p0__1_4(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[4:9]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v7
+; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[6:7]
; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -1753,8 +1726,7 @@ define void @v_shuffle_v2p0_v3p0__2_4(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v4
-; GFX942-NEXT: v_mov_b32_e32 v7, v5
+; GFX942-NEXT: v_mov_b64_e32 v[6:7], v[4:5]
; GFX942-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -1839,8 +1811,7 @@ define void @v_shuffle_v2p0_v3p0__4_4(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v2
-; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[2:3]
; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -1935,8 +1906,7 @@ define void @v_shuffle_v2p0_v3p0__0_5(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[2:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v6
-; GFX942-NEXT: v_mov_b32_e32 v3, v7
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[6:7]
; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -1991,8 +1961,7 @@ define void @v_shuffle_v2p0_v3p0__1_5(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[4:9]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v8
-; GFX942-NEXT: v_mov_b32_e32 v5, v9
+; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[8:9]
; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -2047,8 +2016,7 @@ define void @v_shuffle_v2p0_v3p0__2_5(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v8, v4
-; GFX942-NEXT: v_mov_b32_e32 v9, v5
+; GFX942-NEXT: v_mov_b64_e32 v[8:9], v[4:5]
; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -2093,8 +2061,7 @@ define void @v_shuffle_v2p0_v3p0__3_5(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[4:5]
; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -2232,8 +2199,7 @@ define void @s_shuffle_v2p0_v3p0__1_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -2273,8 +2239,7 @@ define void @s_shuffle_v2p0_v3p0__2_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -2332,8 +2297,7 @@ define void @s_shuffle_v2p0_v3p0__4_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -2374,8 +2338,7 @@ define void @s_shuffle_v2p0_v3p0__5_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -2430,11 +2393,11 @@ define void @s_shuffle_v2p0_v3p0__5_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s0
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -2484,8 +2447,7 @@ define void @s_shuffle_v2p0_v3p0__5_1() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -2539,10 +2501,8 @@ define void @s_shuffle_v2p0_v3p0__5_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -2587,10 +2547,8 @@ define void @s_shuffle_v2p0_v3p0__5_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -2603,18 +2561,43 @@ define void @s_shuffle_v2p0_v3p0__5_3() {
}
define void @s_shuffle_v2p0_v3p0__5_4() {
-; GFX9-LABEL: s_shuffle_v2p0_v3p0__5_4:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s12
-; GFX9-NEXT: s_mov_b32 s9, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v2p0_v3p0__5_4:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s12
+; GFX900-NEXT: s_mov_b32 s9, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2p0_v3p0__5_4:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s12
+; GFX90A-NEXT: s_mov_b32 s9, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2p0_v3p0__5_4:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=s"()
%vec1 = call <3 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <2 x i32> <i32 5, i32 4>
@@ -2659,10 +2642,8 @@ define void @s_shuffle_v2p0_v3p0__5_5() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -2707,8 +2688,7 @@ define void @s_shuffle_v2p0_v3p0__u_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -2720,18 +2700,43 @@ define void @s_shuffle_v2p0_v3p0__u_0() {
}
define void @s_shuffle_v2p0_v3p0__0_0() {
-; GFX9-LABEL: s_shuffle_v2p0_v3p0__0_0:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s8
-; GFX9-NEXT: s_mov_b32 s11, s9
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v2p0_v3p0__0_0:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s8
+; GFX900-NEXT: s_mov_b32 s11, s9
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2p0_v3p0__0_0:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s8
+; GFX90A-NEXT: s_mov_b32 s11, s9
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2p0_v3p0__0_0:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[8:9]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <2 x i32> zeroinitializer
call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x ptr> %shuf)
@@ -2775,10 +2780,8 @@ define void @s_shuffle_v2p0_v3p0__1_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -2822,10 +2825,8 @@ define void @s_shuffle_v2p0_v3p0__2_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -2869,8 +2870,7 @@ define void @s_shuffle_v2p0_v3p0__3_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -2924,13 +2924,11 @@ define void @s_shuffle_v2p0_v3p0__4_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s0
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -3023,18 +3021,43 @@ define void @s_shuffle_v2p0_v3p0__0_1() {
}
define void @s_shuffle_v2p0_v3p0__1_1() {
-; GFX9-LABEL: s_shuffle_v2p0_v3p0__1_1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s10
-; GFX9-NEXT: s_mov_b32 s9, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v2p0_v3p0__1_1:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2p0_v3p0__1_1:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2p0_v3p0__1_1:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <2 x i32> <i32 1, i32 1>
call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x ptr> %shuf)
@@ -3042,18 +3065,43 @@ define void @s_shuffle_v2p0_v3p0__1_1() {
}
define void @s_shuffle_v2p0_v3p0__2_1() {
-; GFX9-LABEL: s_shuffle_v2p0_v3p0__2_1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s12
-; GFX9-NEXT: s_mov_b32 s9, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v2p0_v3p0__2_1:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s12
+; GFX900-NEXT: s_mov_b32 s9, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2p0_v3p0__2_1:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s12
+; GFX90A-NEXT: s_mov_b32 s9, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2p0_v3p0__2_1:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <2 x i32> <i32 2, i32 1>
call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x ptr> %shuf)
@@ -3142,8 +3190,7 @@ define void @s_shuffle_v2p0_v3p0__4_1() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -3188,8 +3235,7 @@ define void @s_shuffle_v2p0_v3p0__u_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -3201,18 +3247,43 @@ define void @s_shuffle_v2p0_v3p0__u_2() {
}
define void @s_shuffle_v2p0_v3p0__0_2() {
-; GFX9-LABEL: s_shuffle_v2p0_v3p0__0_2:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v2p0_v3p0__0_2:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2p0_v3p0__0_2:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2p0_v3p0__0_2:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <2 x i32> <i32 0, i32 2>
call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x ptr> %shuf)
@@ -3256,10 +3327,8 @@ define void @s_shuffle_v2p0_v3p0__1_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -3307,10 +3376,8 @@ define void @s_shuffle_v2p0_v3p0__2_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -3354,8 +3421,7 @@ define void @s_shuffle_v2p0_v3p0__3_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -3412,10 +3478,8 @@ define void @s_shuffle_v2p0_v3p0__4_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -3514,8 +3578,7 @@ define void @s_shuffle_v2p0_v3p0__1_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -3555,8 +3618,7 @@ define void @s_shuffle_v2p0_v3p0__2_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -3618,10 +3680,8 @@ define void @s_shuffle_v2p0_v3p0__4_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -3716,8 +3776,7 @@ define void @s_shuffle_v2p0_v3p0__0_4() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s2
-; GFX942-NEXT: s_mov_b32 s11, s3
+; GFX942-NEXT: s_mov_b64 s[10:11], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -3771,8 +3830,7 @@ define void @s_shuffle_v2p0_v3p0__1_4() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -3826,8 +3884,7 @@ define void @s_shuffle_v2p0_v3p0__2_4() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -3881,18 +3938,43 @@ define void @s_shuffle_v2p0_v3p0__3_4() {
}
define void @s_shuffle_v2p0_v3p0__4_4() {
-; GFX9-LABEL: s_shuffle_v2p0_v3p0__4_4:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s10
-; GFX9-NEXT: s_mov_b32 s9, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v2p0_v3p0__4_4:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2p0_v3p0__4_4:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2p0_v3p0__4_4:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=s"()
%vec1 = call <3 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <2 x i32> <i32 4, i32 4>
@@ -3933,8 +4015,7 @@ define void @s_shuffle_v2p0_v3p0__u_5() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -3988,8 +4069,7 @@ define void @s_shuffle_v2p0_v3p0__0_5() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -4047,10 +4127,8 @@ define void @s_shuffle_v2p0_v3p0__1_5() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -4104,10 +4182,8 @@ define void @s_shuffle_v2p0_v3p0__2_5() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -4120,18 +4196,43 @@ define void @s_shuffle_v2p0_v3p0__2_5() {
}
define void @s_shuffle_v2p0_v3p0__3_5() {
-; GFX9-LABEL: s_shuffle_v2p0_v3p0__3_5:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v2p0_v3p0__3_5:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2p0_v3p0__3_5:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2p0_v3p0__3_5:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=s"()
%vec1 = call <3 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <2 x i32> <i32 3, i32 5>
@@ -4176,10 +4277,8 @@ define void @s_shuffle_v2p0_v3p0__4_5() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v2p0.v4p0.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v2p0.v4p0.ll
index ae31524ebaa7f..47634638d7674 100644
--- a/llvm/test/CodeGen/AMDGPU/shufflevector.v2p0.v4p0.ll
+++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v2p0.v4p0.ll
@@ -166,8 +166,7 @@ define void @v_shuffle_v2p0_v4p0__3_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
+; GFX942-NEXT: v_mov_b64_e32 v[0:1], v[6:7]
; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -302,8 +301,7 @@ define void @v_shuffle_v2p0_v4p0__7_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
+; GFX942-NEXT: v_mov_b64_e32 v[0:1], v[6:7]
; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -362,10 +360,8 @@ define void @v_shuffle_v2p0_v4p0__7_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[2:9]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v8
-; GFX942-NEXT: v_mov_b32_e32 v3, v9
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[8:9]
+; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[0:1]
; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -420,8 +416,7 @@ define void @v_shuffle_v2p0_v4p0__7_1(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[4:11]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v10
-; GFX942-NEXT: v_mov_b32_e32 v1, v11
+; GFX942-NEXT: v_mov_b64_e32 v[0:1], v[10:11]
; GFX942-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -476,8 +471,7 @@ define void @v_shuffle_v2p0_v4p0__7_2(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[6:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v12
-; GFX942-NEXT: v_mov_b32_e32 v3, v13
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[12:13]
; GFX942-NEXT: global_store_dwordx4 v14, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -532,8 +526,7 @@ define void @v_shuffle_v2p0_v4p0__7_3(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v14
-; GFX942-NEXT: v_mov_b32_e32 v5, v15
+; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[14:15]
; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -582,10 +575,8 @@ define void @v_shuffle_v2p0_v4p0__7_4(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v6
-; GFX942-NEXT: v_mov_b32_e32 v3, v7
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[6:7]
+; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[0:1]
; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -630,8 +621,7 @@ define void @v_shuffle_v2p0_v4p0__7_5(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
+; GFX942-NEXT: v_mov_b64_e32 v[0:1], v[6:7]
; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -676,8 +666,7 @@ define void @v_shuffle_v2p0_v4p0__7_6(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v6
-; GFX942-NEXT: v_mov_b32_e32 v3, v7
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[6:7]
; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -722,8 +711,7 @@ define void @v_shuffle_v2p0_v4p0__7_7(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v7
+; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[6:7]
; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -768,8 +756,7 @@ define void @v_shuffle_v2p0_v4p0__u_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: v_mov_b32_e32 v3, v1
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[0:1]
; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -813,8 +800,7 @@ define void @v_shuffle_v2p0_v4p0__0_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: v_mov_b32_e32 v3, v1
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[0:1]
; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -858,8 +844,7 @@ define void @v_shuffle_v2p0_v4p0__1_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[0:1]
; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -903,8 +888,7 @@ define void @v_shuffle_v2p0_v4p0__2_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v0
-; GFX942-NEXT: v_mov_b32_e32 v7, v1
+; GFX942-NEXT: v_mov_b64_e32 v[6:7], v[0:1]
; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -952,10 +936,8 @@ define void @v_shuffle_v2p0_v4p0__3_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v6
-; GFX942-NEXT: v_mov_b32_e32 v3, v7
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[6:7]
+; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[0:1]
; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -999,8 +981,7 @@ define void @v_shuffle_v2p0_v4p0__4_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: v_mov_b32_e32 v3, v1
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[0:1]
; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -1054,8 +1035,7 @@ define void @v_shuffle_v2p0_v4p0__5_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[2:9]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v0
-; GFX942-NEXT: v_mov_b32_e32 v7, v1
+; GFX942-NEXT: v_mov_b64_e32 v[6:7], v[0:1]
; GFX942-NEXT: global_store_dwordx4 v10, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -1110,8 +1090,7 @@ define void @v_shuffle_v2p0_v4p0__6_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[2:9]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v8, v0
-; GFX942-NEXT: v_mov_b32_e32 v9, v1
+; GFX942-NEXT: v_mov_b64_e32 v[8:9], v[0:1]
; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -1234,8 +1213,7 @@ define void @v_shuffle_v2p0_v4p0__1_1(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v2
-; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[2:3]
; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -1279,8 +1257,7 @@ define void @v_shuffle_v2p0_v4p0__2_1(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v2
-; GFX942-NEXT: v_mov_b32_e32 v7, v3
+; GFX942-NEXT: v_mov_b64_e32 v[6:7], v[2:3]
; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -1324,8 +1301,7 @@ define void @v_shuffle_v2p0_v4p0__3_1(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
+; GFX942-NEXT: v_mov_b64_e32 v[0:1], v[6:7]
; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -1418,8 +1394,7 @@ define void @v_shuffle_v2p0_v4p0__5_1(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[4:11]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v8, v2
-; GFX942-NEXT: v_mov_b32_e32 v9, v3
+; GFX942-NEXT: v_mov_b64_e32 v[8:9], v[2:3]
; GFX942-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -1474,8 +1449,7 @@ define void @v_shuffle_v2p0_v4p0__6_1(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[4:11]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v10, v2
-; GFX942-NEXT: v_mov_b32_e32 v11, v3
+; GFX942-NEXT: v_mov_b64_e32 v[10:11], v[2:3]
; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -1559,8 +1533,7 @@ define void @v_shuffle_v2p0_v4p0__0_2(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[4:5]
; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -1643,8 +1616,7 @@ define void @v_shuffle_v2p0_v4p0__2_2(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v4
-; GFX942-NEXT: v_mov_b32_e32 v7, v5
+; GFX942-NEXT: v_mov_b64_e32 v[6:7], v[4:5]
; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -1688,8 +1660,7 @@ define void @v_shuffle_v2p0_v4p0__3_2(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v6
-; GFX942-NEXT: v_mov_b32_e32 v3, v7
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[6:7]
; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -1782,8 +1753,7 @@ define void @v_shuffle_v2p0_v4p0__5_2(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[6:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v10, v4
-; GFX942-NEXT: v_mov_b32_e32 v11, v5
+; GFX942-NEXT: v_mov_b64_e32 v[10:11], v[4:5]
; GFX942-NEXT: global_store_dwordx4 v14, v[8:11], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -1838,8 +1808,7 @@ define void @v_shuffle_v2p0_v4p0__6_2(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[6:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v12, v4
-; GFX942-NEXT: v_mov_b32_e32 v13, v5
+; GFX942-NEXT: v_mov_b64_e32 v[12:13], v[4:5]
; GFX942-NEXT: global_store_dwordx4 v14, v[10:13], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -1923,8 +1892,7 @@ define void @v_shuffle_v2p0_v4p0__0_3(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v6
-; GFX942-NEXT: v_mov_b32_e32 v3, v7
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[6:7]
; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -1968,8 +1936,7 @@ define void @v_shuffle_v2p0_v4p0__1_3(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v7
+; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[6:7]
; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -2052,8 +2019,7 @@ define void @v_shuffle_v2p0_v4p0__3_3(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v7
+; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[6:7]
; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -2146,8 +2112,7 @@ define void @v_shuffle_v2p0_v4p0__5_3(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v12, v6
-; GFX942-NEXT: v_mov_b32_e32 v13, v7
+; GFX942-NEXT: v_mov_b64_e32 v[12:13], v[6:7]
; GFX942-NEXT: global_store_dwordx4 v16, v[10:13], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -2202,8 +2167,7 @@ define void @v_shuffle_v2p0_v4p0__6_3(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v14, v6
-; GFX942-NEXT: v_mov_b32_e32 v15, v7
+; GFX942-NEXT: v_mov_b64_e32 v[14:15], v[6:7]
; GFX942-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -2376,8 +2340,7 @@ define void @v_shuffle_v2p0_v4p0__3_4(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
+; GFX942-NEXT: v_mov_b64_e32 v[0:1], v[6:7]
; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -2432,8 +2395,7 @@ define void @v_shuffle_v2p0_v4p0__5_4(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[0:1]
; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -2478,8 +2440,7 @@ define void @v_shuffle_v2p0_v4p0__6_4(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v0
-; GFX942-NEXT: v_mov_b32_e32 v7, v1
+; GFX942-NEXT: v_mov_b64_e32 v[6:7], v[0:1]
; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -2574,8 +2535,7 @@ define void @v_shuffle_v2p0_v4p0__0_5(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[2:9]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[4:5]
; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -2630,8 +2590,7 @@ define void @v_shuffle_v2p0_v4p0__1_5(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[4:11]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v7
+; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[6:7]
; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -2686,8 +2645,7 @@ define void @v_shuffle_v2p0_v4p0__2_5(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[6:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v8
-; GFX942-NEXT: v_mov_b32_e32 v7, v9
+; GFX942-NEXT: v_mov_b64_e32 v[6:7], v[8:9]
; GFX942-NEXT: global_store_dwordx4 v14, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -2742,8 +2700,7 @@ define void @v_shuffle_v2p0_v4p0__3_5(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v8, v6
-; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: v_mov_b64_e32 v[8:9], v[6:7]
; GFX942-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -2828,8 +2785,7 @@ define void @v_shuffle_v2p0_v4p0__5_5(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v2
-; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[2:3]
; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -2874,8 +2830,7 @@ define void @v_shuffle_v2p0_v4p0__6_5(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v2
-; GFX942-NEXT: v_mov_b32_e32 v7, v3
+; GFX942-NEXT: v_mov_b64_e32 v[6:7], v[2:3]
; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -2970,8 +2925,7 @@ define void @v_shuffle_v2p0_v4p0__0_6(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[2:9]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v6
-; GFX942-NEXT: v_mov_b32_e32 v3, v7
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[6:7]
; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -3026,8 +2980,7 @@ define void @v_shuffle_v2p0_v4p0__1_6(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[4:11]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v8
-; GFX942-NEXT: v_mov_b32_e32 v5, v9
+; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[8:9]
; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -3082,8 +3035,7 @@ define void @v_shuffle_v2p0_v4p0__2_6(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[6:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v10
-; GFX942-NEXT: v_mov_b32_e32 v7, v11
+; GFX942-NEXT: v_mov_b64_e32 v[6:7], v[10:11]
; GFX942-NEXT: global_store_dwordx4 v14, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -3138,8 +3090,7 @@ define void @v_shuffle_v2p0_v4p0__3_6(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v10, v6
-; GFX942-NEXT: v_mov_b32_e32 v11, v7
+; GFX942-NEXT: v_mov_b64_e32 v[10:11], v[6:7]
; GFX942-NEXT: global_store_dwordx4 v16, v[10:13], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -3184,8 +3135,7 @@ define void @v_shuffle_v2p0_v4p0__4_6(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[4:5]
; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -3270,8 +3220,7 @@ define void @v_shuffle_v2p0_v4p0__6_6(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v4
-; GFX942-NEXT: v_mov_b32_e32 v7, v5
+; GFX942-NEXT: v_mov_b64_e32 v[6:7], v[4:5]
; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -3366,8 +3315,7 @@ define void @v_shuffle_v2p0_v4p0__0_7(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[2:9]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v8
-; GFX942-NEXT: v_mov_b32_e32 v3, v9
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[8:9]
; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -3422,8 +3370,7 @@ define void @v_shuffle_v2p0_v4p0__1_7(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[4:11]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v10
-; GFX942-NEXT: v_mov_b32_e32 v5, v11
+; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[10:11]
; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -3478,8 +3425,7 @@ define void @v_shuffle_v2p0_v4p0__2_7(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[6:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v12
-; GFX942-NEXT: v_mov_b32_e32 v7, v13
+; GFX942-NEXT: v_mov_b64_e32 v[6:7], v[12:13]
; GFX942-NEXT: global_store_dwordx4 v14, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -3534,8 +3480,7 @@ define void @v_shuffle_v2p0_v4p0__3_7(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v12, v6
-; GFX942-NEXT: v_mov_b32_e32 v13, v7
+; GFX942-NEXT: v_mov_b64_e32 v[12:13], v[6:7]
; GFX942-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -3580,8 +3525,7 @@ define void @v_shuffle_v2p0_v4p0__4_7(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v6
-; GFX942-NEXT: v_mov_b32_e32 v3, v7
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[6:7]
; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -3626,8 +3570,7 @@ define void @v_shuffle_v2p0_v4p0__5_7(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v7
+; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[6:7]
; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -3765,8 +3708,7 @@ define void @s_shuffle_v2p0_v4p0__1_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -3850,8 +3792,7 @@ define void @s_shuffle_v2p0_v4p0__3_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -3909,8 +3850,7 @@ define void @s_shuffle_v2p0_v4p0__5_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -3996,8 +3936,7 @@ define void @s_shuffle_v2p0_v4p0__7_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -4056,10 +3995,8 @@ define void @s_shuffle_v2p0_v4p0__7_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -4113,8 +4050,7 @@ define void @s_shuffle_v2p0_v4p0__7_1() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -4172,10 +4108,8 @@ define void @s_shuffle_v2p0_v4p0__7_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -4230,8 +4164,7 @@ define void @s_shuffle_v2p0_v4p0__7_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -4280,10 +4213,8 @@ define void @s_shuffle_v2p0_v4p0__7_4() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -4296,18 +4227,43 @@ define void @s_shuffle_v2p0_v4p0__7_4() {
}
define void @s_shuffle_v2p0_v4p0__7_5() {
-; GFX9-LABEL: s_shuffle_v2p0_v4p0__7_5:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s14
-; GFX9-NEXT: s_mov_b32 s9, s15
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v2p0_v4p0__7_5:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2p0_v4p0__7_5:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2p0_v4p0__7_5:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <2 x i32> <i32 7, i32 5>
@@ -4352,10 +4308,8 @@ define void @s_shuffle_v2p0_v4p0__7_6() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -4368,18 +4322,43 @@ define void @s_shuffle_v2p0_v4p0__7_6() {
}
define void @s_shuffle_v2p0_v4p0__7_7() {
-; GFX9-LABEL: s_shuffle_v2p0_v4p0__7_7:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[4:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s10
-; GFX9-NEXT: s_mov_b32 s9, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v2p0_v4p0__7_7:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2p0_v4p0__7_7:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2p0_v4p0__7_7:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[4:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <2 x i32> <i32 7, i32 7>
@@ -4420,8 +4399,7 @@ define void @s_shuffle_v2p0_v4p0__u_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -4433,18 +4411,43 @@ define void @s_shuffle_v2p0_v4p0__u_0() {
}
define void @s_shuffle_v2p0_v4p0__0_0() {
-; GFX9-LABEL: s_shuffle_v2p0_v4p0__0_0:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s8
-; GFX9-NEXT: s_mov_b32 s11, s9
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v2p0_v4p0__0_0:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s8
+; GFX900-NEXT: s_mov_b32 s11, s9
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2p0_v4p0__0_0:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s8
+; GFX90A-NEXT: s_mov_b32 s11, s9
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2p0_v4p0__0_0:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[8:9]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <2 x i32> zeroinitializer
call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x ptr> %shuf)
@@ -4488,10 +4491,8 @@ define void @s_shuffle_v2p0_v4p0__1_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -4503,18 +4504,43 @@ define void @s_shuffle_v2p0_v4p0__1_0() {
}
define void @s_shuffle_v2p0_v4p0__2_0() {
-; GFX9-LABEL: s_shuffle_v2p0_v4p0__2_0:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[4:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s4
-; GFX9-NEXT: s_mov_b32 s11, s5
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v2p0_v4p0__2_0:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s4
+; GFX900-NEXT: s_mov_b32 s11, s5
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2p0_v4p0__2_0:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s4
+; GFX90A-NEXT: s_mov_b32 s11, s5
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2p0_v4p0__2_0:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[4:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <2 x i32> <i32 2, i32 0>
call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x ptr> %shuf)
@@ -4558,10 +4584,8 @@ define void @s_shuffle_v2p0_v4p0__3_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -4605,8 +4629,7 @@ define void @s_shuffle_v2p0_v4p0__4_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -4664,10 +4687,8 @@ define void @s_shuffle_v2p0_v4p0__5_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -4722,8 +4743,7 @@ define void @s_shuffle_v2p0_v4p0__6_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -4816,18 +4836,43 @@ define void @s_shuffle_v2p0_v4p0__0_1() {
}
define void @s_shuffle_v2p0_v4p0__1_1() {
-; GFX9-LABEL: s_shuffle_v2p0_v4p0__1_1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s10
-; GFX9-NEXT: s_mov_b32 s9, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v2p0_v4p0__1_1:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2p0_v4p0__1_1:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2p0_v4p0__1_1:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <2 x i32> <i32 1, i32 1>
call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x ptr> %shuf)
@@ -4835,18 +4880,43 @@ define void @s_shuffle_v2p0_v4p0__1_1() {
}
define void @s_shuffle_v2p0_v4p0__2_1() {
-; GFX9-LABEL: s_shuffle_v2p0_v4p0__2_1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[4:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s6
-; GFX9-NEXT: s_mov_b32 s11, s7
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v2p0_v4p0__2_1:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s6
+; GFX900-NEXT: s_mov_b32 s11, s7
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2p0_v4p0__2_1:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s6
+; GFX90A-NEXT: s_mov_b32 s11, s7
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2p0_v4p0__2_1:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[4:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <2 x i32> <i32 2, i32 1>
call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x ptr> %shuf)
@@ -4854,18 +4924,43 @@ define void @s_shuffle_v2p0_v4p0__2_1() {
}
define void @s_shuffle_v2p0_v4p0__3_1() {
-; GFX9-LABEL: s_shuffle_v2p0_v4p0__3_1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s14
-; GFX9-NEXT: s_mov_b32 s9, s15
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v2p0_v4p0__3_1:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2p0_v4p0__3_1:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2p0_v4p0__3_1:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <2 x i32> <i32 3, i32 1>
call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x ptr> %shuf)
@@ -4954,8 +5049,7 @@ define void @s_shuffle_v2p0_v4p0__5_1() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -5010,8 +5104,7 @@ define void @s_shuffle_v2p0_v4p0__6_1() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s2
-; GFX942-NEXT: s_mov_b32 s11, s3
+; GFX942-NEXT: s_mov_b64 s[10:11], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -5056,8 +5149,7 @@ define void @s_shuffle_v2p0_v4p0__u_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -5069,18 +5161,43 @@ define void @s_shuffle_v2p0_v4p0__u_2() {
}
define void @s_shuffle_v2p0_v4p0__0_2() {
-; GFX9-LABEL: s_shuffle_v2p0_v4p0__0_2:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v2p0_v4p0__0_2:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2p0_v4p0__0_2:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2p0_v4p0__0_2:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <2 x i32> <i32 0, i32 2>
call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x ptr> %shuf)
@@ -5124,10 +5241,8 @@ define void @s_shuffle_v2p0_v4p0__1_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -5139,18 +5254,43 @@ define void @s_shuffle_v2p0_v4p0__1_2() {
}
define void @s_shuffle_v2p0_v4p0__2_2() {
-; GFX9-LABEL: s_shuffle_v2p0_v4p0__2_2:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[4:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s8
-; GFX9-NEXT: s_mov_b32 s11, s9
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v2p0_v4p0__2_2:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s8
+; GFX900-NEXT: s_mov_b32 s11, s9
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2p0_v4p0__2_2:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s8
+; GFX90A-NEXT: s_mov_b32 s11, s9
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2p0_v4p0__2_2:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[4:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[8:9]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <2 x i32> <i32 2, i32 2>
call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x ptr> %shuf)
@@ -5194,10 +5334,8 @@ define void @s_shuffle_v2p0_v4p0__3_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -5241,8 +5379,7 @@ define void @s_shuffle_v2p0_v4p0__4_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -5299,10 +5436,8 @@ define void @s_shuffle_v2p0_v4p0__5_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -5357,8 +5492,7 @@ define void @s_shuffle_v2p0_v4p0__6_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -5411,18 +5545,43 @@ define void @s_shuffle_v2p0_v4p0__u_3() {
}
define void @s_shuffle_v2p0_v4p0__0_3() {
-; GFX9-LABEL: s_shuffle_v2p0_v4p0__0_3:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s14
-; GFX9-NEXT: s_mov_b32 s11, s15
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v2p0_v4p0__0_3:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s14
+; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2p0_v4p0__0_3:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s14
+; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2p0_v4p0__0_3:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <2 x i32> <i32 0, i32 3>
call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x ptr> %shuf)
@@ -5430,18 +5589,43 @@ define void @s_shuffle_v2p0_v4p0__0_3() {
}
define void @s_shuffle_v2p0_v4p0__1_3() {
-; GFX9-LABEL: s_shuffle_v2p0_v4p0__1_3:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[4:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s6
-; GFX9-NEXT: s_mov_b32 s9, s7
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v2p0_v4p0__1_3:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s6
+; GFX900-NEXT: s_mov_b32 s9, s7
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2p0_v4p0__1_3:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s6
+; GFX90A-NEXT: s_mov_b32 s9, s7
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2p0_v4p0__1_3:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[4:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <2 x i32> <i32 1, i32 3>
call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x ptr> %shuf)
@@ -5489,18 +5673,43 @@ define void @s_shuffle_v2p0_v4p0__2_3() {
}
define void @s_shuffle_v2p0_v4p0__3_3() {
-; GFX9-LABEL: s_shuffle_v2p0_v4p0__3_3:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[4:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s10
-; GFX9-NEXT: s_mov_b32 s9, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v2p0_v4p0__3_3:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2p0_v4p0__3_3:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2p0_v4p0__3_3:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[4:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <2 x i32> <i32 3, i32 3>
call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x ptr> %shuf)
@@ -5590,8 +5799,7 @@ define void @s_shuffle_v2p0_v4p0__5_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -5646,8 +5854,7 @@ define void @s_shuffle_v2p0_v4p0__6_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -5746,8 +5953,7 @@ define void @s_shuffle_v2p0_v4p0__1_4() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -5831,8 +6037,7 @@ define void @s_shuffle_v2p0_v4p0__3_4() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -5894,10 +6099,8 @@ define void @s_shuffle_v2p0_v4p0__5_4() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -5910,18 +6113,43 @@ define void @s_shuffle_v2p0_v4p0__5_4() {
}
define void @s_shuffle_v2p0_v4p0__6_4() {
-; GFX9-LABEL: s_shuffle_v2p0_v4p0__6_4:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[4:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s4
-; GFX9-NEXT: s_mov_b32 s11, s5
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v2p0_v4p0__6_4:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s4
+; GFX900-NEXT: s_mov_b32 s11, s5
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2p0_v4p0__6_4:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s4
+; GFX90A-NEXT: s_mov_b32 s11, s5
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2p0_v4p0__6_4:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[4:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <2 x i32> <i32 6, i32 4>
@@ -6012,8 +6240,7 @@ define void @s_shuffle_v2p0_v4p0__0_5() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s2
-; GFX942-NEXT: s_mov_b32 s11, s3
+; GFX942-NEXT: s_mov_b64 s[10:11], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -6067,8 +6294,7 @@ define void @s_shuffle_v2p0_v4p0__1_5() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -6123,8 +6349,7 @@ define void @s_shuffle_v2p0_v4p0__2_5() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s2
-; GFX942-NEXT: s_mov_b32 s11, s3
+; GFX942-NEXT: s_mov_b64 s[10:11], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -6178,8 +6403,7 @@ define void @s_shuffle_v2p0_v4p0__3_5() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -6233,18 +6457,43 @@ define void @s_shuffle_v2p0_v4p0__4_5() {
}
define void @s_shuffle_v2p0_v4p0__5_5() {
-; GFX9-LABEL: s_shuffle_v2p0_v4p0__5_5:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s10
-; GFX9-NEXT: s_mov_b32 s9, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v2p0_v4p0__5_5:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2p0_v4p0__5_5:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2p0_v4p0__5_5:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <2 x i32> <i32 5, i32 5>
@@ -6253,18 +6502,43 @@ define void @s_shuffle_v2p0_v4p0__5_5() {
}
define void @s_shuffle_v2p0_v4p0__6_5() {
-; GFX9-LABEL: s_shuffle_v2p0_v4p0__6_5:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[4:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s6
-; GFX9-NEXT: s_mov_b32 s11, s7
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v2p0_v4p0__6_5:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s6
+; GFX900-NEXT: s_mov_b32 s11, s7
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2p0_v4p0__6_5:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s6
+; GFX90A-NEXT: s_mov_b32 s11, s7
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2p0_v4p0__6_5:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[4:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <2 x i32> <i32 6, i32 5>
@@ -6305,8 +6579,7 @@ define void @s_shuffle_v2p0_v4p0__u_6() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -6360,8 +6633,7 @@ define void @s_shuffle_v2p0_v4p0__0_6() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -6419,10 +6691,8 @@ define void @s_shuffle_v2p0_v4p0__1_6() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -6477,8 +6747,7 @@ define void @s_shuffle_v2p0_v4p0__2_6() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -6536,10 +6805,8 @@ define void @s_shuffle_v2p0_v4p0__3_6() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -6552,18 +6819,43 @@ define void @s_shuffle_v2p0_v4p0__3_6() {
}
define void @s_shuffle_v2p0_v4p0__4_6() {
-; GFX9-LABEL: s_shuffle_v2p0_v4p0__4_6:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v2p0_v4p0__4_6:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2p0_v4p0__4_6:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2p0_v4p0__4_6:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <2 x i32> <i32 4, i32 6>
@@ -6608,10 +6900,8 @@ define void @s_shuffle_v2p0_v4p0__5_6() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -6624,18 +6914,43 @@ define void @s_shuffle_v2p0_v4p0__5_6() {
}
define void @s_shuffle_v2p0_v4p0__6_6() {
-; GFX9-LABEL: s_shuffle_v2p0_v4p0__6_6:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[4:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s8
-; GFX9-NEXT: s_mov_b32 s11, s9
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v2p0_v4p0__6_6:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s8
+; GFX900-NEXT: s_mov_b32 s11, s9
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2p0_v4p0__6_6:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s8
+; GFX90A-NEXT: s_mov_b32 s11, s9
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2p0_v4p0__6_6:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[4:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[8:9]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <2 x i32> <i32 6, i32 6>
@@ -6726,8 +7041,7 @@ define void @s_shuffle_v2p0_v4p0__0_7() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -6782,8 +7096,7 @@ define void @s_shuffle_v2p0_v4p0__1_7() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -6838,8 +7151,7 @@ define void @s_shuffle_v2p0_v4p0__2_7() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -6894,8 +7206,7 @@ define void @s_shuffle_v2p0_v4p0__3_7() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -6908,18 +7219,43 @@ define void @s_shuffle_v2p0_v4p0__3_7() {
}
define void @s_shuffle_v2p0_v4p0__4_7() {
-; GFX9-LABEL: s_shuffle_v2p0_v4p0__4_7:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s14
-; GFX9-NEXT: s_mov_b32 s11, s15
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v2p0_v4p0__4_7:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s14
+; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2p0_v4p0__4_7:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s14
+; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2p0_v4p0__4_7:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <2 x i32> <i32 4, i32 7>
@@ -6928,18 +7264,43 @@ define void @s_shuffle_v2p0_v4p0__4_7() {
}
define void @s_shuffle_v2p0_v4p0__5_7() {
-; GFX9-LABEL: s_shuffle_v2p0_v4p0__5_7:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[4:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s6
-; GFX9-NEXT: s_mov_b32 s9, s7
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v2p0_v4p0__5_7:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s6
+; GFX900-NEXT: s_mov_b32 s9, s7
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2p0_v4p0__5_7:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s6
+; GFX90A-NEXT: s_mov_b32 s9, s7
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2p0_v4p0__5_7:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[4:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <2 x i32> <i32 5, i32 7>
diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v3i64.v2i64.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v3i64.v2i64.ll
index a15fc3212f474..261257533208b 100644
--- a/llvm/test/CodeGen/AMDGPU/shufflevector.v3i64.v2i64.ll
+++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v3i64.v2i64.ll
@@ -2266,8 +2266,7 @@ define void @s_shuffle_v3i64_v2i64__1_u_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -2325,8 +2324,7 @@ define void @s_shuffle_v3i64_v2i64__3_u_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -2384,10 +2382,8 @@ define void @s_shuffle_v3i64_v2i64__3_0_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -2441,8 +2437,7 @@ define void @s_shuffle_v3i64_v2i64__3_1_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -2491,10 +2486,8 @@ define void @s_shuffle_v3i64_v2i64__3_2_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -2507,18 +2500,43 @@ define void @s_shuffle_v3i64_v2i64__3_2_u() {
}
define void @s_shuffle_v3i64_v2i64__3_3_u() {
-; GFX9-LABEL: s_shuffle_v3i64_v2i64__3_3_u:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s10
-; GFX9-NEXT: s_mov_b32 s9, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v3i64_v2i64__3_3_u:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3i64_v2i64__3_3_u:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3i64_v2i64__3_3_u:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=s"()
%vec1 = call <2 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <3 x i32> <i32 3, i32 3, i32 poison>
@@ -2572,10 +2590,8 @@ define void @s_shuffle_v3i64_v2i64__3_3_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -2633,10 +2649,8 @@ define void @s_shuffle_v3i64_v2i64__3_3_1() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -2689,12 +2703,9 @@ define void @s_shuffle_v3i64_v2i64__3_3_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s2
-; GFX942-NEXT: s_mov_b32 s11, s3
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -2707,20 +2718,48 @@ define void @s_shuffle_v3i64_v2i64__3_3_2() {
}
define void @s_shuffle_v3i64_v2i64__3_3_3() {
-; GFX9-LABEL: s_shuffle_v3i64_v2i64__3_3_3:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s10
-; GFX9-NEXT: s_mov_b32 s9, s11
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v3i64_v2i64__3_3_3:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3i64_v2i64__3_3_3:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3i64_v2i64__3_3_3:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=s"()
%vec1 = call <2 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <3 x i32> <i32 3, i32 3, i32 3>
@@ -2765,10 +2804,8 @@ define void @s_shuffle_v3i64_v2i64__u_0_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -2780,20 +2817,48 @@ define void @s_shuffle_v3i64_v2i64__u_0_0() {
}
define void @s_shuffle_v3i64_v2i64__0_0_0() {
-; GFX9-LABEL: s_shuffle_v3i64_v2i64__0_0_0:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s8
-; GFX9-NEXT: s_mov_b32 s11, s9
-; GFX9-NEXT: s_mov_b32 s12, s8
-; GFX9-NEXT: s_mov_b32 s13, s9
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v3i64_v2i64__0_0_0:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s8
+; GFX900-NEXT: s_mov_b32 s11, s9
+; GFX900-NEXT: s_mov_b32 s12, s8
+; GFX900-NEXT: s_mov_b32 s13, s9
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3i64_v2i64__0_0_0:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s8
+; GFX90A-NEXT: s_mov_b32 s11, s9
+; GFX90A-NEXT: s_mov_b32 s12, s8
+; GFX90A-NEXT: s_mov_b32 s13, s9
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3i64_v2i64__0_0_0:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[8:9]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[8:9]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <2 x i64> %vec0, <2 x i64> poison, <3 x i32> zeroinitializer
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
@@ -2841,12 +2906,9 @@ define void @s_shuffle_v3i64_v2i64__1_0_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -2894,10 +2956,8 @@ define void @s_shuffle_v3i64_v2i64__2_0_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -2958,12 +3018,9 @@ define void @s_shuffle_v3i64_v2i64__3_0_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -3021,10 +3078,8 @@ define void @s_shuffle_v3i64_v2i64__3_u_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -3086,12 +3141,9 @@ define void @s_shuffle_v3i64_v2i64__3_1_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s2
-; GFX942-NEXT: s_mov_b32 s11, s3
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -3153,12 +3205,9 @@ define void @s_shuffle_v3i64_v2i64__3_2_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -3171,18 +3220,43 @@ define void @s_shuffle_v3i64_v2i64__3_2_0() {
}
define void @s_shuffle_v3i64_v2i64__u_1_1() {
-; GFX9-LABEL: s_shuffle_v3i64_v2i64__u_1_1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v3i64_v2i64__u_1_1:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3i64_v2i64__u_1_1:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3i64_v2i64__u_1_1:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <2 x i64> %vec0, <2 x i64> poison, <3 x i32> <i32 poison, i32 1, i32 1>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
@@ -3190,18 +3264,43 @@ define void @s_shuffle_v3i64_v2i64__u_1_1() {
}
define void @s_shuffle_v3i64_v2i64__0_1_1() {
-; GFX9-LABEL: s_shuffle_v3i64_v2i64__0_1_1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v3i64_v2i64__0_1_1:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3i64_v2i64__0_1_1:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3i64_v2i64__0_1_1:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <2 x i64> %vec0, <2 x i64> poison, <3 x i32> <i32 0, i32 1, i32 1>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
@@ -3209,20 +3308,48 @@ define void @s_shuffle_v3i64_v2i64__0_1_1() {
}
define void @s_shuffle_v3i64_v2i64__1_1_1() {
-; GFX9-LABEL: s_shuffle_v3i64_v2i64__1_1_1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s10
-; GFX9-NEXT: s_mov_b32 s9, s11
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v3i64_v2i64__1_1_1:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3i64_v2i64__1_1_1:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3i64_v2i64__1_1_1:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <2 x i64> %vec0, <2 x i64> poison, <3 x i32> <i32 1, i32 1, i32 1>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
@@ -3230,18 +3357,43 @@ define void @s_shuffle_v3i64_v2i64__1_1_1() {
}
define void @s_shuffle_v3i64_v2i64__2_1_1() {
-; GFX9-LABEL: s_shuffle_v3i64_v2i64__2_1_1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v3i64_v2i64__2_1_1:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3i64_v2i64__2_1_1:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3i64_v2i64__2_1_1:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <2 x i64> %vec0, <2 x i64> poison, <3 x i32> <i32 2, i32 1, i32 1>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
@@ -3294,10 +3446,8 @@ define void @s_shuffle_v3i64_v2i64__3_1_1() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s12, s10
-; GFX942-NEXT: s_mov_b32 s13, s11
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -3355,10 +3505,8 @@ define void @s_shuffle_v3i64_v2i64__3_u_1() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -3420,12 +3568,9 @@ define void @s_shuffle_v3i64_v2i64__3_0_1() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -3487,12 +3632,9 @@ define void @s_shuffle_v3i64_v2i64__3_2_1() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -3591,8 +3733,7 @@ define void @s_shuffle_v3i64_v2i64__1_2_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -3658,12 +3799,9 @@ define void @s_shuffle_v3i64_v2i64__3_2_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -3712,10 +3850,8 @@ define void @s_shuffle_v3i64_v2i64__3_u_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -3773,12 +3909,9 @@ define void @s_shuffle_v3i64_v2i64__3_0_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s4
-; GFX942-NEXT: s_mov_b32 s13, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -3836,10 +3969,8 @@ define void @s_shuffle_v3i64_v2i64__3_1_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -3852,18 +3983,43 @@ define void @s_shuffle_v3i64_v2i64__3_1_2() {
}
define void @s_shuffle_v3i64_v2i64__u_3_3() {
-; GFX9-LABEL: s_shuffle_v3i64_v2i64__u_3_3:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v3i64_v2i64__u_3_3:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3i64_v2i64__u_3_3:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3i64_v2i64__u_3_3:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=s"()
%vec1 = call <2 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <3 x i32> <i32 poison, i32 3, i32 3>
@@ -3917,10 +4073,8 @@ define void @s_shuffle_v3i64_v2i64__0_3_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s2
-; GFX942-NEXT: s_mov_b32 s11, s3
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
+; GFX942-NEXT: s_mov_b64 s[10:11], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -3978,10 +4132,8 @@ define void @s_shuffle_v3i64_v2i64__1_3_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s12, s10
-; GFX942-NEXT: s_mov_b32 s13, s11
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -3994,18 +4146,43 @@ define void @s_shuffle_v3i64_v2i64__1_3_3() {
}
define void @s_shuffle_v3i64_v2i64__2_3_3() {
-; GFX9-LABEL: s_shuffle_v3i64_v2i64__2_3_3:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v3i64_v2i64__2_3_3:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3i64_v2i64__2_3_3:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3i64_v2i64__2_3_3:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=s"()
%vec1 = call <2 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <3 x i32> <i32 2, i32 3, i32 3>
@@ -4050,10 +4227,8 @@ define void @s_shuffle_v3i64_v2i64__3_u_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -4115,12 +4290,9 @@ define void @s_shuffle_v3i64_v2i64__3_0_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -4178,10 +4350,8 @@ define void @s_shuffle_v3i64_v2i64__3_1_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -4234,12 +4404,9 @@ define void @s_shuffle_v3i64_v2i64__3_2_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v3i64.v3i64.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v3i64.v3i64.ll
index f15dd7d2772e5..3c546bf8a3130 100644
--- a/llvm/test/CodeGen/AMDGPU/shufflevector.v3i64.v3i64.ll
+++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v3i64.v3i64.ll
@@ -4720,8 +4720,7 @@ define void @s_shuffle_v3i64_v3i64__1_u_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -4761,8 +4760,7 @@ define void @s_shuffle_v3i64_v3i64__2_u_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -4820,8 +4818,7 @@ define void @s_shuffle_v3i64_v3i64__4_u_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -4862,8 +4859,7 @@ define void @s_shuffle_v3i64_v3i64__5_u_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -4918,11 +4914,11 @@ define void @s_shuffle_v3i64_v3i64__5_0_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s0
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -4972,8 +4968,7 @@ define void @s_shuffle_v3i64_v3i64__5_1_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -5027,10 +5022,8 @@ define void @s_shuffle_v3i64_v3i64__5_2_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -5075,10 +5068,8 @@ define void @s_shuffle_v3i64_v3i64__5_3_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -5091,18 +5082,43 @@ define void @s_shuffle_v3i64_v3i64__5_3_u() {
}
define void @s_shuffle_v3i64_v3i64__5_4_u() {
-; GFX9-LABEL: s_shuffle_v3i64_v3i64__5_4_u:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s12
-; GFX9-NEXT: s_mov_b32 s9, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v3i64_v3i64__5_4_u:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s12
+; GFX900-NEXT: s_mov_b32 s9, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3i64_v3i64__5_4_u:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s12
+; GFX90A-NEXT: s_mov_b32 s9, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3i64_v3i64__5_4_u:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=s"()
%vec1 = call <3 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> <i32 5, i32 4, i32 poison>
@@ -5147,10 +5163,8 @@ define void @s_shuffle_v3i64_v3i64__5_5_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -5212,12 +5226,9 @@ define void @s_shuffle_v3i64_v3i64__5_5_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -5279,12 +5290,9 @@ define void @s_shuffle_v3i64_v3i64__5_5_1() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -5342,10 +5350,8 @@ define void @s_shuffle_v3i64_v3i64__5_5_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -5394,12 +5400,9 @@ define void @s_shuffle_v3i64_v3i64__5_5_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -5452,12 +5455,9 @@ define void @s_shuffle_v3i64_v3i64__5_5_4() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -5470,20 +5470,48 @@ define void @s_shuffle_v3i64_v3i64__5_5_4() {
}
define void @s_shuffle_v3i64_v3i64__5_5_5() {
-; GFX9-LABEL: s_shuffle_v3i64_v3i64__5_5_5:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s12
-; GFX9-NEXT: s_mov_b32 s9, s13
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v3i64_v3i64__5_5_5:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s12
+; GFX900-NEXT: s_mov_b32 s9, s13
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3i64_v3i64__5_5_5:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s12
+; GFX90A-NEXT: s_mov_b32 s9, s13
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3i64_v3i64__5_5_5:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=s"()
%vec1 = call <3 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> <i32 5, i32 5, i32 5>
@@ -5528,10 +5556,8 @@ define void @s_shuffle_v3i64_v3i64__u_0_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -5543,20 +5569,48 @@ define void @s_shuffle_v3i64_v3i64__u_0_0() {
}
define void @s_shuffle_v3i64_v3i64__0_0_0() {
-; GFX9-LABEL: s_shuffle_v3i64_v3i64__0_0_0:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s8
-; GFX9-NEXT: s_mov_b32 s11, s9
-; GFX9-NEXT: s_mov_b32 s12, s8
-; GFX9-NEXT: s_mov_b32 s13, s9
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v3i64_v3i64__0_0_0:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s8
+; GFX900-NEXT: s_mov_b32 s11, s9
+; GFX900-NEXT: s_mov_b32 s12, s8
+; GFX900-NEXT: s_mov_b32 s13, s9
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3i64_v3i64__0_0_0:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s8
+; GFX90A-NEXT: s_mov_b32 s11, s9
+; GFX90A-NEXT: s_mov_b32 s12, s8
+; GFX90A-NEXT: s_mov_b32 s13, s9
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3i64_v3i64__0_0_0:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[8:9]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[8:9]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <3 x i32> zeroinitializer
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
@@ -5604,12 +5658,9 @@ define void @s_shuffle_v3i64_v3i64__1_0_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -5657,12 +5708,9 @@ define void @s_shuffle_v3i64_v3i64__2_0_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -5710,10 +5758,8 @@ define void @s_shuffle_v3i64_v3i64__3_0_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -5771,15 +5817,12 @@ define void @s_shuffle_v3i64_v3i64__4_0_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s0
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -5838,13 +5881,11 @@ define void @s_shuffle_v3i64_v3i64__5_0_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s0
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -5899,11 +5940,11 @@ define void @s_shuffle_v3i64_v3i64__5_u_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s0
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -5962,13 +6003,11 @@ define void @s_shuffle_v3i64_v3i64__5_1_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s2
+; GFX942-NEXT: s_mov_b64 s[10:11], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s11, s3
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -6022,12 +6061,9 @@ define void @s_shuffle_v3i64_v3i64__5_2_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -6086,13 +6122,11 @@ define void @s_shuffle_v3i64_v3i64__5_3_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s0
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -6150,10 +6184,8 @@ define void @s_shuffle_v3i64_v3i64__5_4_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -6166,116 +6198,12 @@ define void @s_shuffle_v3i64_v3i64__5_4_0() {
}
define void @s_shuffle_v3i64_v3i64__u_1_1() {
-; GFX9-LABEL: s_shuffle_v3i64_v3i64__u_1_1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <3 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <3 x i32> <i32 poison, i32 1, i32 1>
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v3i64_v3i64__0_1_1() {
-; GFX9-LABEL: s_shuffle_v3i64_v3i64__0_1_1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <3 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <3 x i32> <i32 0, i32 1, i32 1>
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v3i64_v3i64__1_1_1() {
-; GFX9-LABEL: s_shuffle_v3i64_v3i64__1_1_1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s10
-; GFX9-NEXT: s_mov_b32 s9, s11
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <3 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <3 x i32> <i32 1, i32 1, i32 1>
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v3i64_v3i64__2_1_1() {
-; GFX9-LABEL: s_shuffle_v3i64_v3i64__2_1_1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s12
-; GFX9-NEXT: s_mov_b32 s9, s13
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <3 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <3 x i32> <i32 2, i32 1, i32 1>
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v3i64_v3i64__3_1_1() {
-; GFX9-LABEL: s_shuffle_v3i64_v3i64__3_1_1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <3 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <3 x i32> <i32 3, i32 1, i32 1>
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v3i64_v3i64__4_1_1() {
-; GFX900-LABEL: s_shuffle_v3i64_v3i64__4_1_1:
+; GFX900-LABEL: s_shuffle_v3i64_v3i64__u_1_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:13]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s6
-; GFX900-NEXT: s_mov_b32 s9, s7
; GFX900-NEXT: s_mov_b32 s12, s10
; GFX900-NEXT: s_mov_b32 s13, s11
; GFX900-NEXT: ;;#ASMSTART
@@ -6283,17 +6211,12 @@ define void @s_shuffle_v3i64_v3i64__4_1_1() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3i64_v3i64__4_1_1:
+; GFX90A-LABEL: s_shuffle_v3i64_v3i64__u_1_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:13]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s6
-; GFX90A-NEXT: s_mov_b32 s9, s7
; GFX90A-NEXT: s_mov_b32 s12, s10
; GFX90A-NEXT: s_mov_b32 s13, s11
; GFX90A-NEXT: ;;#ASMSTART
@@ -6301,40 +6224,30 @@ define void @s_shuffle_v3i64_v3i64__4_1_1() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3i64_v3i64__4_1_1:
+; GFX942-LABEL: s_shuffle_v3i64_v3i64__u_1_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:13]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s12, s10
-; GFX942-NEXT: s_mov_b32 s13, s11
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=s"()
- %vec1 = call <3 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> <i32 4, i32 1, i32 1>
+ %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <3 x i32> <i32 poison, i32 1, i32 1>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
ret void
}
-define void @s_shuffle_v3i64_v3i64__5_1_1() {
-; GFX900-LABEL: s_shuffle_v3i64_v3i64__5_1_1:
+define void @s_shuffle_v3i64_v3i64__0_1_1() {
+; GFX900-LABEL: s_shuffle_v3i64_v3i64__0_1_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:13]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
-; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s12, s10
; GFX900-NEXT: s_mov_b32 s13, s11
; GFX900-NEXT: ;;#ASMSTART
@@ -6342,15 +6255,12 @@ define void @s_shuffle_v3i64_v3i64__5_1_1() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3i64_v3i64__5_1_1:
+; GFX90A-LABEL: s_shuffle_v3i64_v3i64__0_1_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:13]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
-; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s12, s10
; GFX90A-NEXT: s_mov_b32 s13, s11
; GFX90A-NEXT: ;;#ASMSTART
@@ -6358,60 +6268,309 @@ define void @s_shuffle_v3i64_v3i64__5_1_1() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3i64_v3i64__5_1_1:
+; GFX942-LABEL: s_shuffle_v3i64_v3i64__0_1_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:13]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s12, s10
-; GFX942-NEXT: s_mov_b32 s13, s11
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=s"()
- %vec1 = call <3 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> <i32 5, i32 1, i32 1>
+ %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <3 x i32> <i32 0, i32 1, i32 1>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
ret void
}
-define void @s_shuffle_v3i64_v3i64__5_u_1() {
-; GFX900-LABEL: s_shuffle_v3i64_v3i64__5_u_1:
+define void @s_shuffle_v3i64_v3i64__1_1_1() {
+; GFX900-LABEL: s_shuffle_v3i64_v3i64__1_1_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:13]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s12
-; GFX900-NEXT: s_mov_b32 s9, s13
-; GFX900-NEXT: s_mov_b32 s12, s6
-; GFX900-NEXT: s_mov_b32 s13, s7
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3i64_v3i64__5_u_1:
+; GFX90A-LABEL: s_shuffle_v3i64_v3i64__1_1_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:13]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s12
-; GFX90A-NEXT: s_mov_b32 s9, s13
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3i64_v3i64__1_1_1:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <3 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <3 x i32> <i32 1, i32 1, i32 1>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v3i64_v3i64__2_1_1() {
+; GFX900-LABEL: s_shuffle_v3i64_v3i64__2_1_1:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s12
+; GFX900-NEXT: s_mov_b32 s9, s13
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3i64_v3i64__2_1_1:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s12
+; GFX90A-NEXT: s_mov_b32 s9, s13
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3i64_v3i64__2_1_1:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <3 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <3 x i32> <i32 2, i32 1, i32 1>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v3i64_v3i64__3_1_1() {
+; GFX900-LABEL: s_shuffle_v3i64_v3i64__3_1_1:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3i64_v3i64__3_1_1:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3i64_v3i64__3_1_1:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <3 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <3 x i32> <i32 3, i32 1, i32 1>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v3i64_v3i64__4_1_1() {
+; GFX900-LABEL: s_shuffle_v3i64_v3i64__4_1_1:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:9]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s6
+; GFX900-NEXT: s_mov_b32 s9, s7
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3i64_v3i64__4_1_1:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:9]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s6
+; GFX90A-NEXT: s_mov_b32 s9, s7
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3i64_v3i64__4_1_1:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[0:5]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <3 x i64> asm "; def $0", "=s"()
+ %vec1 = call <3 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> <i32 4, i32 1, i32 1>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v3i64_v3i64__5_1_1() {
+; GFX900-LABEL: s_shuffle_v3i64_v3i64__5_1_1:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:9]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3i64_v3i64__5_1_1:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:9]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3i64_v3i64__5_1_1:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[0:5]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <3 x i64> asm "; def $0", "=s"()
+ %vec1 = call <3 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> <i32 5, i32 1, i32 1>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v3i64_v3i64__5_u_1() {
+; GFX900-LABEL: s_shuffle_v3i64_v3i64__5_u_1:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:9]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s12
+; GFX900-NEXT: s_mov_b32 s9, s13
+; GFX900-NEXT: s_mov_b32 s12, s6
+; GFX900-NEXT: s_mov_b32 s13, s7
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3i64_v3i64__5_u_1:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:9]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s12
+; GFX90A-NEXT: s_mov_b32 s9, s13
; GFX90A-NEXT: s_mov_b32 s12, s6
; GFX90A-NEXT: s_mov_b32 s13, s7
; GFX90A-NEXT: ;;#ASMSTART
@@ -6425,11 +6584,11 @@ define void @s_shuffle_v3i64_v3i64__5_u_1() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s2
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s13, s3
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -6488,13 +6647,11 @@ define void @s_shuffle_v3i64_v3i64__5_0_1() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s0
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -6552,12 +6709,9 @@ define void @s_shuffle_v3i64_v3i64__5_2_1() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -6577,217 +6731,344 @@ define void @s_shuffle_v3i64_v3i64__5_3_1() {
; GFX900-NEXT: ; def s[4:9]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:17]
+; GFX900-NEXT: ; def s[12:17]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s16
+; GFX900-NEXT: s_mov_b32 s9, s17
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s12, s6
+; GFX900-NEXT: s_mov_b32 s13, s7
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3i64_v3i64__5_3_1:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:9]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[12:17]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s16
+; GFX90A-NEXT: s_mov_b32 s9, s17
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s12, s6
+; GFX90A-NEXT: s_mov_b32 s13, s7
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3i64_v3i64__5_3_1:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[0:5]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[4:9]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <3 x i64> asm "; def $0", "=s"()
+ %vec1 = call <3 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> <i32 5, i32 3, i32 1>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v3i64_v3i64__5_4_1() {
+; GFX900-LABEL: s_shuffle_v3i64_v3i64__5_4_1:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:9]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s12
+; GFX900-NEXT: s_mov_b32 s9, s13
+; GFX900-NEXT: s_mov_b32 s12, s6
+; GFX900-NEXT: s_mov_b32 s13, s7
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3i64_v3i64__5_4_1:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:9]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s12
+; GFX90A-NEXT: s_mov_b32 s9, s13
+; GFX90A-NEXT: s_mov_b32 s12, s6
+; GFX90A-NEXT: s_mov_b32 s13, s7
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3i64_v3i64__5_4_1:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[0:5]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <3 x i64> asm "; def $0", "=s"()
+ %vec1 = call <3 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> <i32 5, i32 4, i32 1>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v3i64_v3i64__u_2_2() {
+; GFX900-LABEL: s_shuffle_v3i64_v3i64__u_2_2:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3i64_v3i64__u_2_2:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3i64_v3i64__u_2_2:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <3 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <3 x i32> <i32 poison, i32 2, i32 2>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v3i64_v3i64__0_2_2() {
+; GFX900-LABEL: s_shuffle_v3i64_v3i64__0_2_2:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3i64_v3i64__0_2_2:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3i64_v3i64__0_2_2:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <3 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <3 x i32> <i32 0, i32 2, i32 2>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v3i64_v3i64__1_2_2() {
+; GFX900-LABEL: s_shuffle_v3i64_v3i64__1_2_2:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3i64_v3i64__1_2_2:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3i64_v3i64__1_2_2:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <3 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <3 x i32> <i32 1, i32 2, i32 2>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v3i64_v3i64__2_2_2() {
+; GFX900-LABEL: s_shuffle_v3i64_v3i64__2_2_2:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s16
-; GFX900-NEXT: s_mov_b32 s9, s17
+; GFX900-NEXT: s_mov_b32 s8, s12
+; GFX900-NEXT: s_mov_b32 s9, s13
; GFX900-NEXT: s_mov_b32 s10, s12
; GFX900-NEXT: s_mov_b32 s11, s13
-; GFX900-NEXT: s_mov_b32 s12, s6
-; GFX900-NEXT: s_mov_b32 s13, s7
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3i64_v3i64__5_3_1:
+; GFX90A-LABEL: s_shuffle_v3i64_v3i64__2_2_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:17]
+; GFX90A-NEXT: ; def s[8:13]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s16
-; GFX90A-NEXT: s_mov_b32 s9, s17
+; GFX90A-NEXT: s_mov_b32 s8, s12
+; GFX90A-NEXT: s_mov_b32 s9, s13
; GFX90A-NEXT: s_mov_b32 s10, s12
; GFX90A-NEXT: s_mov_b32 s11, s13
-; GFX90A-NEXT: s_mov_b32 s12, s6
-; GFX90A-NEXT: s_mov_b32 s13, s7
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3i64_v3i64__5_3_1:
+; GFX942-LABEL: s_shuffle_v3i64_v3i64__2_2_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[4:9]
+; GFX942-NEXT: ; def s[8:13]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s13, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=s"()
- %vec1 = call <3 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> <i32 5, i32 3, i32 1>
+ %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <3 x i32> <i32 2, i32 2, i32 2>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
ret void
}
-define void @s_shuffle_v3i64_v3i64__5_4_1() {
-; GFX900-LABEL: s_shuffle_v3i64_v3i64__5_4_1:
+define void @s_shuffle_v3i64_v3i64__3_2_2() {
+; GFX900-LABEL: s_shuffle_v3i64_v3i64__3_2_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:13]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s12
-; GFX900-NEXT: s_mov_b32 s9, s13
-; GFX900-NEXT: s_mov_b32 s12, s6
-; GFX900-NEXT: s_mov_b32 s13, s7
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3i64_v3i64__5_4_1:
+; GFX90A-LABEL: s_shuffle_v3i64_v3i64__3_2_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:13]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s12
-; GFX90A-NEXT: s_mov_b32 s9, s13
-; GFX90A-NEXT: s_mov_b32 s12, s6
-; GFX90A-NEXT: s_mov_b32 s13, s7
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3i64_v3i64__5_4_1:
+; GFX942-LABEL: s_shuffle_v3i64_v3i64__3_2_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:13]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <3 x i64> asm "; def $0", "=s"()
- %vec1 = call <3 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> <i32 5, i32 4, i32 1>
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v3i64_v3i64__u_2_2() {
-; GFX9-LABEL: s_shuffle_v3i64_v3i64__u_2_2:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <3 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <3 x i32> <i32 poison, i32 2, i32 2>
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v3i64_v3i64__0_2_2() {
-; GFX9-LABEL: s_shuffle_v3i64_v3i64__0_2_2:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <3 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <3 x i32> <i32 0, i32 2, i32 2>
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v3i64_v3i64__1_2_2() {
-; GFX9-LABEL: s_shuffle_v3i64_v3i64__1_2_2:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s10
-; GFX9-NEXT: s_mov_b32 s9, s11
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <3 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <3 x i32> <i32 1, i32 2, i32 2>
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v3i64_v3i64__2_2_2() {
-; GFX9-LABEL: s_shuffle_v3i64_v3i64__2_2_2:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s12
-; GFX9-NEXT: s_mov_b32 s9, s13
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <3 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <3 x i32> <i32 2, i32 2, i32 2>
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v3i64_v3i64__3_2_2() {
-; GFX9-LABEL: s_shuffle_v3i64_v3i64__3_2_2:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <3 x i32> <i32 3, i32 2, i32 2>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
@@ -6840,10 +7121,8 @@ define void @s_shuffle_v3i64_v3i64__4_2_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -6897,10 +7176,8 @@ define void @s_shuffle_v3i64_v3i64__5_2_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -6950,8 +7227,7 @@ define void @s_shuffle_v3i64_v3i64__5_u_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -7009,12 +7285,9 @@ define void @s_shuffle_v3i64_v3i64__5_0_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s4
-; GFX942-NEXT: s_mov_b32 s13, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -7064,8 +7337,7 @@ define void @s_shuffle_v3i64_v3i64__5_1_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -7119,10 +7391,8 @@ define void @s_shuffle_v3i64_v3i64__5_3_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -7180,10 +7450,8 @@ define void @s_shuffle_v3i64_v3i64__5_4_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s12, s4
-; GFX942-NEXT: s_mov_b32 s13, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -7282,8 +7550,7 @@ define void @s_shuffle_v3i64_v3i64__1_3_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -7323,8 +7590,7 @@ define void @s_shuffle_v3i64_v3i64__2_3_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -7390,12 +7656,9 @@ define void @s_shuffle_v3i64_v3i64__4_3_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -7444,12 +7707,9 @@ define void @s_shuffle_v3i64_v3i64__5_3_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -7494,10 +7754,8 @@ define void @s_shuffle_v3i64_v3i64__5_u_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -7552,13 +7810,11 @@ define void @s_shuffle_v3i64_v3i64__5_0_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s0
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s4
-; GFX942-NEXT: s_mov_b32 s13, s5
+; GFX942-NEXT: s_mov_b64 s[12:13], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -7612,10 +7868,8 @@ define void @s_shuffle_v3i64_v3i64__5_1_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -7673,10 +7927,8 @@ define void @s_shuffle_v3i64_v3i64__5_2_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[12:17]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s16
-; GFX942-NEXT: s_mov_b32 s9, s17
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[16:17]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -7725,12 +7977,9 @@ define void @s_shuffle_v3i64_v3i64__5_4_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s2
-; GFX942-NEXT: s_mov_b32 s11, s3
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -7743,18 +7992,43 @@ define void @s_shuffle_v3i64_v3i64__5_4_3() {
}
define void @s_shuffle_v3i64_v3i64__u_4_4() {
-; GFX9-LABEL: s_shuffle_v3i64_v3i64__u_4_4:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v3i64_v3i64__u_4_4:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3i64_v3i64__u_4_4:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3i64_v3i64__u_4_4:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=s"()
%vec1 = call <3 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> <i32 poison, i32 4, i32 4>
@@ -7808,10 +8082,8 @@ define void @s_shuffle_v3i64_v3i64__0_4_4() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s2
-; GFX942-NEXT: s_mov_b32 s11, s3
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
+; GFX942-NEXT: s_mov_b64 s[10:11], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -7869,10 +8141,8 @@ define void @s_shuffle_v3i64_v3i64__1_4_4() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s12, s10
-; GFX942-NEXT: s_mov_b32 s13, s11
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -7930,10 +8200,8 @@ define void @s_shuffle_v3i64_v3i64__2_4_4() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s12, s10
-; GFX942-NEXT: s_mov_b32 s13, s11
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -7946,18 +8214,43 @@ define void @s_shuffle_v3i64_v3i64__2_4_4() {
}
define void @s_shuffle_v3i64_v3i64__3_4_4() {
-; GFX9-LABEL: s_shuffle_v3i64_v3i64__3_4_4:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v3i64_v3i64__3_4_4:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3i64_v3i64__3_4_4:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3i64_v3i64__3_4_4:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=s"()
%vec1 = call <3 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> <i32 3, i32 4, i32 4>
@@ -7966,20 +8259,48 @@ define void @s_shuffle_v3i64_v3i64__3_4_4() {
}
define void @s_shuffle_v3i64_v3i64__4_4_4() {
-; GFX9-LABEL: s_shuffle_v3i64_v3i64__4_4_4:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s10
-; GFX9-NEXT: s_mov_b32 s9, s11
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v3i64_v3i64__4_4_4:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3i64_v3i64__4_4_4:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3i64_v3i64__4_4_4:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=s"()
%vec1 = call <3 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> <i32 4, i32 4, i32 4>
@@ -7988,20 +8309,48 @@ define void @s_shuffle_v3i64_v3i64__4_4_4() {
}
define void @s_shuffle_v3i64_v3i64__5_4_4() {
-; GFX9-LABEL: s_shuffle_v3i64_v3i64__5_4_4:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s12
-; GFX9-NEXT: s_mov_b32 s9, s13
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v3i64_v3i64__5_4_4:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s12
+; GFX900-NEXT: s_mov_b32 s9, s13
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3i64_v3i64__5_4_4:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s12
+; GFX90A-NEXT: s_mov_b32 s9, s13
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3i64_v3i64__5_4_4:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=s"()
%vec1 = call <3 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> <i32 5, i32 4, i32 4>
@@ -8042,10 +8391,8 @@ define void @s_shuffle_v3i64_v3i64__5_u_4() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -8104,13 +8451,11 @@ define void @s_shuffle_v3i64_v3i64__5_0_4() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s0
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
+; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -8164,10 +8509,8 @@ define void @s_shuffle_v3i64_v3i64__5_1_4() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -8225,12 +8568,9 @@ define void @s_shuffle_v3i64_v3i64__5_2_4() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s16
-; GFX942-NEXT: s_mov_b32 s9, s17
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
+; GFX942-NEXT: s_mov_b64 s[8:9], s[16:17]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -8279,12 +8619,9 @@ define void @s_shuffle_v3i64_v3i64__5_3_4() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -8297,18 +8634,43 @@ define void @s_shuffle_v3i64_v3i64__5_3_4() {
}
define void @s_shuffle_v3i64_v3i64__u_5_5() {
-; GFX9-LABEL: s_shuffle_v3i64_v3i64__u_5_5:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v3i64_v3i64__u_5_5:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3i64_v3i64__u_5_5:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3i64_v3i64__u_5_5:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=s"()
%vec1 = call <3 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> <i32 poison, i32 5, i32 5>
@@ -8362,10 +8724,8 @@ define void @s_shuffle_v3i64_v3i64__0_5_5() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s4
-; GFX942-NEXT: s_mov_b32 s13, s5
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -8423,10 +8783,8 @@ define void @s_shuffle_v3i64_v3i64__1_5_5() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -8484,10 +8842,8 @@ define void @s_shuffle_v3i64_v3i64__2_5_5() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -8500,18 +8856,43 @@ define void @s_shuffle_v3i64_v3i64__2_5_5() {
}
define void @s_shuffle_v3i64_v3i64__3_5_5() {
-; GFX9-LABEL: s_shuffle_v3i64_v3i64__3_5_5:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v3i64_v3i64__3_5_5:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3i64_v3i64__3_5_5:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3i64_v3i64__3_5_5:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=s"()
%vec1 = call <3 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> <i32 3, i32 5, i32 5>
@@ -8520,20 +8901,48 @@ define void @s_shuffle_v3i64_v3i64__3_5_5() {
}
define void @s_shuffle_v3i64_v3i64__4_5_5() {
-; GFX9-LABEL: s_shuffle_v3i64_v3i64__4_5_5:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s10
-; GFX9-NEXT: s_mov_b32 s9, s11
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v3i64_v3i64__4_5_5:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3i64_v3i64__4_5_5:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3i64_v3i64__4_5_5:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=s"()
%vec1 = call <3 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> <i32 4, i32 5, i32 5>
@@ -8542,18 +8951,43 @@ define void @s_shuffle_v3i64_v3i64__4_5_5() {
}
define void @s_shuffle_v3i64_v3i64__5_u_5() {
-; GFX9-LABEL: s_shuffle_v3i64_v3i64__5_u_5:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s12
-; GFX9-NEXT: s_mov_b32 s9, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v3i64_v3i64__5_u_5:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s12
+; GFX900-NEXT: s_mov_b32 s9, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3i64_v3i64__5_u_5:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s12
+; GFX90A-NEXT: s_mov_b32 s9, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3i64_v3i64__5_u_5:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=s"()
%vec1 = call <3 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> <i32 5, i32 poison, i32 5>
@@ -8607,10 +9041,8 @@ define void @s_shuffle_v3i64_v3i64__5_0_5() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -8668,10 +9100,8 @@ define void @s_shuffle_v3i64_v3i64__5_1_5() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s12, s4
-; GFX942-NEXT: s_mov_b32 s13, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -8729,10 +9159,8 @@ define void @s_shuffle_v3i64_v3i64__5_2_5() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -8785,12 +9213,9 @@ define void @s_shuffle_v3i64_v3i64__5_3_5() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s4
-; GFX942-NEXT: s_mov_b32 s13, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -8803,18 +9228,43 @@ define void @s_shuffle_v3i64_v3i64__5_3_5() {
}
define void @s_shuffle_v3i64_v3i64__5_4_5() {
-; GFX9-LABEL: s_shuffle_v3i64_v3i64__5_4_5:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s12
-; GFX9-NEXT: s_mov_b32 s9, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v3i64_v3i64__5_4_5:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s12
+; GFX900-NEXT: s_mov_b32 s9, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3i64_v3i64__5_4_5:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s12
+; GFX90A-NEXT: s_mov_b32 s9, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3i64_v3i64__5_4_5:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=s"()
%vec1 = call <3 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> <i32 5, i32 4, i32 5>
diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v3i64.v4i64.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v3i64.v4i64.ll
index 6e156d2d4a2f5..7815761d29696 100644
--- a/llvm/test/CodeGen/AMDGPU/shufflevector.v3i64.v4i64.ll
+++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v3i64.v4i64.ll
@@ -8016,8 +8016,7 @@ define void @s_shuffle_v3i64_v4i64__1_u_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -8057,8 +8056,7 @@ define void @s_shuffle_v3i64_v4i64__2_u_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -8102,8 +8100,7 @@ define void @s_shuffle_v3i64_v4i64__3_u_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -8161,8 +8158,7 @@ define void @s_shuffle_v3i64_v4i64__5_u_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -8203,8 +8199,7 @@ define void @s_shuffle_v3i64_v4i64__6_u_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -8249,8 +8244,7 @@ define void @s_shuffle_v3i64_v4i64__7_u_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -8309,10 +8303,8 @@ define void @s_shuffle_v3i64_v4i64__7_0_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -8366,8 +8358,7 @@ define void @s_shuffle_v3i64_v4i64__7_1_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -8425,10 +8416,8 @@ define void @s_shuffle_v3i64_v4i64__7_2_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -8482,10 +8471,8 @@ define void @s_shuffle_v3i64_v4i64__7_3_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -8534,10 +8521,8 @@ define void @s_shuffle_v3i64_v4i64__7_4_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -8550,18 +8535,43 @@ define void @s_shuffle_v3i64_v4i64__7_4_u() {
}
define void @s_shuffle_v3i64_v4i64__7_5_u() {
-; GFX9-LABEL: s_shuffle_v3i64_v4i64__7_5_u:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s14
-; GFX9-NEXT: s_mov_b32 s9, s15
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_5_u:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_5_u:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_5_u:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 5, i32 poison>
@@ -8606,10 +8616,8 @@ define void @s_shuffle_v3i64_v4i64__7_6_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -8654,10 +8662,8 @@ define void @s_shuffle_v3i64_v4i64__7_7_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -8716,13 +8722,11 @@ define void @s_shuffle_v3i64_v4i64__7_7_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s0
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -8781,13 +8785,11 @@ define void @s_shuffle_v3i64_v4i64__7_7_1() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s2
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s13, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -8841,10 +8843,8 @@ define void @s_shuffle_v3i64_v4i64__7_7_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -8902,12 +8902,9 @@ define void @s_shuffle_v3i64_v4i64__7_7_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -8956,12 +8953,9 @@ define void @s_shuffle_v3i64_v4i64__7_7_4() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -9010,12 +9004,9 @@ define void @s_shuffle_v3i64_v4i64__7_7_5() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -9028,20 +9019,48 @@ define void @s_shuffle_v3i64_v4i64__7_7_5() {
}
define void @s_shuffle_v3i64_v4i64__7_7_6() {
-; GFX9-LABEL: s_shuffle_v3i64_v4i64__7_7_6:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s14
-; GFX9-NEXT: s_mov_b32 s9, s15
-; GFX9-NEXT: s_mov_b32 s10, s14
-; GFX9-NEXT: s_mov_b32 s11, s15
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_7_6:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s10, s14
+; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_7_6:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s10, s14
+; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_7_6:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 7, i32 6>
@@ -9090,12 +9109,9 @@ define void @s_shuffle_v3i64_v4i64__7_7_7() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -9144,10 +9160,8 @@ define void @s_shuffle_v3i64_v4i64__u_0_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -9159,20 +9173,48 @@ define void @s_shuffle_v3i64_v4i64__u_0_0() {
}
define void @s_shuffle_v3i64_v4i64__0_0_0() {
-; GFX9-LABEL: s_shuffle_v3i64_v4i64__0_0_0:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s8
-; GFX9-NEXT: s_mov_b32 s11, s9
-; GFX9-NEXT: s_mov_b32 s12, s8
-; GFX9-NEXT: s_mov_b32 s13, s9
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__0_0_0:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s8
+; GFX900-NEXT: s_mov_b32 s11, s9
+; GFX900-NEXT: s_mov_b32 s12, s8
+; GFX900-NEXT: s_mov_b32 s13, s9
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__0_0_0:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s8
+; GFX90A-NEXT: s_mov_b32 s11, s9
+; GFX90A-NEXT: s_mov_b32 s12, s8
+; GFX90A-NEXT: s_mov_b32 s13, s9
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__0_0_0:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[8:9]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[8:9]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> zeroinitializer
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
@@ -9220,12 +9262,9 @@ define void @s_shuffle_v3i64_v4i64__1_0_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -9273,12 +9312,9 @@ define void @s_shuffle_v3i64_v4i64__2_0_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -9330,12 +9366,9 @@ define void @s_shuffle_v3i64_v4i64__3_0_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -9383,10 +9416,8 @@ define void @s_shuffle_v3i64_v4i64__4_0_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -9444,15 +9475,12 @@ define void @s_shuffle_v3i64_v4i64__5_0_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s0
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -9511,13 +9539,11 @@ define void @s_shuffle_v3i64_v4i64__6_0_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s0
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -9576,15 +9602,12 @@ define void @s_shuffle_v3i64_v4i64__7_0_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s0
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -9639,13 +9662,11 @@ define void @s_shuffle_v3i64_v4i64__7_u_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s0
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -9704,15 +9725,12 @@ define void @s_shuffle_v3i64_v4i64__7_1_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s0
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s10, s2
-; GFX942-NEXT: s_mov_b32 s11, s3
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -9770,12 +9788,9 @@ define void @s_shuffle_v3i64_v4i64__7_2_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -9833,12 +9848,9 @@ define void @s_shuffle_v3i64_v4i64__7_3_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -9897,15 +9909,12 @@ define void @s_shuffle_v3i64_v4i64__7_4_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s0
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -9963,10 +9972,8 @@ define void @s_shuffle_v3i64_v4i64__7_5_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -10028,12 +10035,9 @@ define void @s_shuffle_v3i64_v4i64__7_6_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -10046,137 +10050,12 @@ define void @s_shuffle_v3i64_v4i64__7_6_0() {
}
define void @s_shuffle_v3i64_v4i64__u_1_1() {
-; GFX9-LABEL: s_shuffle_v3i64_v4i64__u_1_1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> <i32 poison, i32 1, i32 1>
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v3i64_v4i64__0_1_1() {
-; GFX9-LABEL: s_shuffle_v3i64_v4i64__0_1_1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> <i32 0, i32 1, i32 1>
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v3i64_v4i64__1_1_1() {
-; GFX9-LABEL: s_shuffle_v3i64_v4i64__1_1_1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s10
-; GFX9-NEXT: s_mov_b32 s9, s11
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> <i32 1, i32 1, i32 1>
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v3i64_v4i64__2_1_1() {
-; GFX9-LABEL: s_shuffle_v3i64_v4i64__2_1_1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s12
-; GFX9-NEXT: s_mov_b32 s9, s13
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> <i32 2, i32 1, i32 1>
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v3i64_v4i64__3_1_1() {
-; GFX9-LABEL: s_shuffle_v3i64_v4i64__3_1_1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s14
-; GFX9-NEXT: s_mov_b32 s9, s15
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> <i32 3, i32 1, i32 1>
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v3i64_v4i64__4_1_1() {
-; GFX9-LABEL: s_shuffle_v3i64_v4i64__4_1_1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> <i32 4, i32 1, i32 1>
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v3i64_v4i64__5_1_1() {
-; GFX900-LABEL: s_shuffle_v3i64_v4i64__5_1_1:
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__u_1_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s14
-; GFX900-NEXT: s_mov_b32 s9, s15
; GFX900-NEXT: s_mov_b32 s12, s10
; GFX900-NEXT: s_mov_b32 s13, s11
; GFX900-NEXT: ;;#ASMSTART
@@ -10184,17 +10063,12 @@ define void @s_shuffle_v3i64_v4i64__5_1_1() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3i64_v4i64__5_1_1:
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__u_1_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s14
-; GFX90A-NEXT: s_mov_b32 s9, s15
; GFX90A-NEXT: s_mov_b32 s12, s10
; GFX90A-NEXT: s_mov_b32 s13, s11
; GFX90A-NEXT: ;;#ASMSTART
@@ -10202,42 +10076,30 @@ define void @s_shuffle_v3i64_v4i64__5_1_1() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3i64_v4i64__5_1_1:
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__u_1_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s12, s10
-; GFX942-NEXT: s_mov_b32 s13, s11
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
- %vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 5, i32 1, i32 1>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> <i32 poison, i32 1, i32 1>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
ret void
}
-define void @s_shuffle_v3i64_v4i64__6_1_1() {
-; GFX900-LABEL: s_shuffle_v3i64_v4i64__6_1_1:
+define void @s_shuffle_v3i64_v4i64__0_1_1() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__0_1_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s16
-; GFX900-NEXT: s_mov_b32 s9, s17
; GFX900-NEXT: s_mov_b32 s12, s10
; GFX900-NEXT: s_mov_b32 s13, s11
; GFX900-NEXT: ;;#ASMSTART
@@ -10245,17 +10107,12 @@ define void @s_shuffle_v3i64_v4i64__6_1_1() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3i64_v4i64__6_1_1:
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__0_1_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s16
-; GFX90A-NEXT: s_mov_b32 s9, s17
; GFX90A-NEXT: s_mov_b32 s12, s10
; GFX90A-NEXT: s_mov_b32 s13, s11
; GFX90A-NEXT: ;;#ASMSTART
@@ -10263,42 +10120,32 @@ define void @s_shuffle_v3i64_v4i64__6_1_1() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3i64_v4i64__6_1_1:
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__0_1_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s12, s10
-; GFX942-NEXT: s_mov_b32 s13, s11
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
- %vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 6, i32 1, i32 1>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> <i32 0, i32 1, i32 1>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
ret void
}
-define void @s_shuffle_v3i64_v4i64__7_1_1() {
-; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_1_1:
+define void @s_shuffle_v3i64_v4i64__1_1_1() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__1_1_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s18
-; GFX900-NEXT: s_mov_b32 s9, s19
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
; GFX900-NEXT: s_mov_b32 s12, s10
; GFX900-NEXT: s_mov_b32 s13, s11
; GFX900-NEXT: ;;#ASMSTART
@@ -10306,17 +10153,14 @@ define void @s_shuffle_v3i64_v4i64__7_1_1() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_1_1:
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__1_1_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s18
-; GFX90A-NEXT: s_mov_b32 s9, s19
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
; GFX90A-NEXT: s_mov_b32 s12, s10
; GFX90A-NEXT: s_mov_b32 s13, s11
; GFX90A-NEXT: ;;#ASMSTART
@@ -10324,200 +10168,204 @@ define void @s_shuffle_v3i64_v4i64__7_1_1() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_1_1:
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__1_1_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s12, s10
-; GFX942-NEXT: s_mov_b32 s13, s11
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
- %vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 1, i32 1>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> <i32 1, i32 1, i32 1>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
ret void
}
-define void @s_shuffle_v3i64_v4i64__7_u_1() {
-; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_u_1:
+define void @s_shuffle_v3i64_v4i64__2_1_1() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__2_1_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s14
-; GFX900-NEXT: s_mov_b32 s9, s15
-; GFX900-NEXT: s_mov_b32 s12, s6
-; GFX900-NEXT: s_mov_b32 s13, s7
+; GFX900-NEXT: s_mov_b32 s8, s12
+; GFX900-NEXT: s_mov_b32 s9, s13
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_u_1:
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__2_1_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s14
-; GFX90A-NEXT: s_mov_b32 s9, s15
-; GFX90A-NEXT: s_mov_b32 s12, s6
-; GFX90A-NEXT: s_mov_b32 s13, s7
+; GFX90A-NEXT: s_mov_b32 s8, s12
+; GFX90A-NEXT: s_mov_b32 s9, s13
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_u_1:
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__2_1_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[4:11]
+; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s13, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
- %vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 poison, i32 1>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> <i32 2, i32 1, i32 1>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
ret void
}
-define void @s_shuffle_v3i64_v4i64__7_0_1() {
-; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_0_1:
+define void @s_shuffle_v3i64_v4i64__3_1_1() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__3_1_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s14
; GFX900-NEXT: s_mov_b32 s9, s15
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
-; GFX900-NEXT: s_mov_b32 s12, s6
-; GFX900-NEXT: s_mov_b32 s13, s7
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_0_1:
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__3_1_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s14
; GFX90A-NEXT: s_mov_b32 s9, s15
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
-; GFX90A-NEXT: s_mov_b32 s12, s6
-; GFX90A-NEXT: s_mov_b32 s13, s7
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_0_1:
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__3_1_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s2
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[4:11]
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> <i32 3, i32 1, i32 1>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v3i64_v4i64__4_1_1() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__4_1_1:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__4_1_1:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__4_1_1:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s13, s3
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
- %vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 0, i32 1>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> <i32 4, i32 1, i32 1>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
ret void
}
-define void @s_shuffle_v3i64_v4i64__7_2_1() {
-; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_2_1:
+define void @s_shuffle_v3i64_v4i64__5_1_1() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__5_1_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s10, s16
-; GFX900-NEXT: s_mov_b32 s11, s17
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_2_1:
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__5_1_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s10, s16
-; GFX90A-NEXT: s_mov_b32 s11, s17
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_2_1:
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__5_1_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -10526,61 +10374,57 @@ define void @s_shuffle_v3i64_v4i64__7_2_1() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 2, i32 1>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 5, i32 1, i32 1>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
ret void
}
-define void @s_shuffle_v3i64_v4i64__7_3_1() {
-; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_3_1:
+define void @s_shuffle_v3i64_v4i64__6_1_1() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__6_1_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s18
-; GFX900-NEXT: s_mov_b32 s9, s19
-; GFX900-NEXT: s_mov_b32 s12, s6
-; GFX900-NEXT: s_mov_b32 s13, s7
+; GFX900-NEXT: s_mov_b32 s8, s16
+; GFX900-NEXT: s_mov_b32 s9, s17
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_3_1:
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__6_1_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s18
-; GFX90A-NEXT: s_mov_b32 s9, s19
-; GFX90A-NEXT: s_mov_b32 s12, s6
-; GFX90A-NEXT: s_mov_b32 s13, s7
+; GFX90A-NEXT: s_mov_b32 s8, s16
+; GFX90A-NEXT: s_mov_b32 s9, s17
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_3_1:
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__6_1_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -10589,92 +10433,80 @@ define void @s_shuffle_v3i64_v4i64__7_3_1() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 3, i32 1>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 6, i32 1, i32 1>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
ret void
}
-define void @s_shuffle_v3i64_v4i64__7_4_1() {
-; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_4_1:
+define void @s_shuffle_v3i64_v4i64__7_1_1() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_1_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s18
; GFX900-NEXT: s_mov_b32 s9, s19
-; GFX900-NEXT: s_mov_b32 s10, s12
-; GFX900-NEXT: s_mov_b32 s11, s13
-; GFX900-NEXT: s_mov_b32 s12, s6
-; GFX900-NEXT: s_mov_b32 s13, s7
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_4_1:
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_1_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s18
; GFX90A-NEXT: s_mov_b32 s9, s19
-; GFX90A-NEXT: s_mov_b32 s10, s12
-; GFX90A-NEXT: s_mov_b32 s11, s13
-; GFX90A-NEXT: s_mov_b32 s12, s6
-; GFX90A-NEXT: s_mov_b32 s13, s7
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_4_1:
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_1_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s2
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[4:11]
+; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s13, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 4, i32 1>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 1, i32 1>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
ret void
}
-define void @s_shuffle_v3i64_v4i64__7_5_1() {
-; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_5_1:
+define void @s_shuffle_v3i64_v4i64__7_u_1() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_u_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
@@ -10692,7 +10524,7 @@ define void @s_shuffle_v3i64_v4i64__7_5_1() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_5_1:
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_u_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
@@ -10710,32 +10542,30 @@ define void @s_shuffle_v3i64_v4i64__7_5_1() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_5_1:
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_u_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 5, i32 1>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 poison, i32 1>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
ret void
}
-define void @s_shuffle_v3i64_v4i64__7_6_1() {
-; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_6_1:
+define void @s_shuffle_v3i64_v4i64__7_0_1() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_0_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
@@ -10746,8 +10576,8 @@ define void @s_shuffle_v3i64_v4i64__7_6_1() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s14
; GFX900-NEXT: s_mov_b32 s9, s15
-; GFX900-NEXT: s_mov_b32 s10, s12
-; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s10, s4
+; GFX900-NEXT: s_mov_b32 s11, s5
; GFX900-NEXT: s_mov_b32 s12, s6
; GFX900-NEXT: s_mov_b32 s13, s7
; GFX900-NEXT: ;;#ASMSTART
@@ -10755,7 +10585,7 @@ define void @s_shuffle_v3i64_v4i64__7_6_1() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_6_1:
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_0_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
@@ -10766,8 +10596,8 @@ define void @s_shuffle_v3i64_v4i64__7_6_1() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s14
; GFX90A-NEXT: s_mov_b32 s9, s15
-; GFX90A-NEXT: s_mov_b32 s10, s12
-; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s10, s4
+; GFX90A-NEXT: s_mov_b32 s11, s5
; GFX90A-NEXT: s_mov_b32 s12, s6
; GFX90A-NEXT: s_mov_b32 s13, s7
; GFX90A-NEXT: ;;#ASMSTART
@@ -10775,190 +10605,131 @@ define void @s_shuffle_v3i64_v4i64__7_6_1() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_6_1:
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_0_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 6, i32 1>
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v3i64_v4i64__u_2_2() {
-; GFX9-LABEL: s_shuffle_v3i64_v4i64__u_2_2:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> <i32 poison, i32 2, i32 2>
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v3i64_v4i64__0_2_2() {
-; GFX9-LABEL: s_shuffle_v3i64_v4i64__0_2_2:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> <i32 0, i32 2, i32 2>
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v3i64_v4i64__1_2_2() {
-; GFX9-LABEL: s_shuffle_v3i64_v4i64__1_2_2:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s10
-; GFX9-NEXT: s_mov_b32 s9, s11
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> <i32 1, i32 2, i32 2>
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v3i64_v4i64__2_2_2() {
-; GFX9-LABEL: s_shuffle_v3i64_v4i64__2_2_2:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s12
-; GFX9-NEXT: s_mov_b32 s9, s13
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> <i32 2, i32 2, i32 2>
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v3i64_v4i64__3_2_2() {
-; GFX9-LABEL: s_shuffle_v3i64_v4i64__3_2_2:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s14
-; GFX9-NEXT: s_mov_b32 s9, s15
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> <i32 3, i32 2, i32 2>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 0, i32 1>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
ret void
}
-define void @s_shuffle_v3i64_v4i64__4_2_2() {
-; GFX9-LABEL: s_shuffle_v3i64_v4i64__4_2_2:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+define void @s_shuffle_v3i64_v4i64__7_2_1() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_2_1:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s10, s16
+; GFX900-NEXT: s_mov_b32 s11, s17
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_2_1:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s10, s16
+; GFX90A-NEXT: s_mov_b32 s11, s17
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_2_1:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> <i32 4, i32 2, i32 2>
+ %vec1 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 2, i32 1>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
ret void
}
-define void @s_shuffle_v3i64_v4i64__5_2_2() {
-; GFX900-LABEL: s_shuffle_v3i64_v4i64__5_2_2:
+define void @s_shuffle_v3i64_v4i64__7_3_1() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_3_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s6
-; GFX900-NEXT: s_mov_b32 s9, s7
-; GFX900-NEXT: s_mov_b32 s10, s12
-; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s8, s18
+; GFX900-NEXT: s_mov_b32 s9, s19
+; GFX900-NEXT: s_mov_b32 s12, s6
+; GFX900-NEXT: s_mov_b32 s13, s7
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3i64_v4i64__5_2_2:
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_3_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s6
-; GFX90A-NEXT: s_mov_b32 s9, s7
-; GFX90A-NEXT: s_mov_b32 s10, s12
-; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s8, s18
+; GFX90A-NEXT: s_mov_b32 s9, s19
+; GFX90A-NEXT: s_mov_b32 s12, s6
+; GFX90A-NEXT: s_mov_b32 s13, s7
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3i64_v4i64__5_2_2:
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_3_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -10967,295 +10738,1418 @@ define void @s_shuffle_v3i64_v4i64__5_2_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 5, i32 2, i32 2>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 3, i32 1>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
ret void
}
-define void @s_shuffle_v3i64_v4i64__6_2_2() {
-; GFX900-LABEL: s_shuffle_v3i64_v4i64__6_2_2:
+define void @s_shuffle_v3i64_v4i64__7_4_1() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_4_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s18
+; GFX900-NEXT: s_mov_b32 s9, s19
; GFX900-NEXT: s_mov_b32 s10, s12
; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s12, s6
+; GFX900-NEXT: s_mov_b32 s13, s7
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3i64_v4i64__6_2_2:
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_4_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s18
+; GFX90A-NEXT: s_mov_b32 s9, s19
; GFX90A-NEXT: s_mov_b32 s10, s12
; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s12, s6
+; GFX90A-NEXT: s_mov_b32 s13, s7
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3i64_v4i64__6_2_2:
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_4_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 6, i32 2, i32 2>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 4, i32 1>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
ret void
}
-define void @s_shuffle_v3i64_v4i64__7_2_2() {
-; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_2_2:
+define void @s_shuffle_v3i64_v4i64__7_5_1() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_5_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s10, s12
-; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s12, s6
+; GFX900-NEXT: s_mov_b32 s13, s7
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_2_2:
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_5_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s12, s6
+; GFX90A-NEXT: s_mov_b32 s13, s7
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s10, s12
-; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_5_1:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x i64> asm "; def $0", "=s"()
+ %vec1 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 5, i32 1>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v3i64_v4i64__7_6_1() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_6_1:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s12, s6
+; GFX900-NEXT: s_mov_b32 s13, s7
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_6_1:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s12, s6
+; GFX90A-NEXT: s_mov_b32 s13, s7
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_6_1:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x i64> asm "; def $0", "=s"()
+ %vec1 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 6, i32 1>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v3i64_v4i64__u_2_2() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__u_2_2:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__u_2_2:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__u_2_2:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> <i32 poison, i32 2, i32 2>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v3i64_v4i64__0_2_2() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__0_2_2:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__0_2_2:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__0_2_2:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> <i32 0, i32 2, i32 2>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v3i64_v4i64__1_2_2() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__1_2_2:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__1_2_2:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__1_2_2:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> <i32 1, i32 2, i32 2>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v3i64_v4i64__2_2_2() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__2_2_2:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s12
+; GFX900-NEXT: s_mov_b32 s9, s13
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__2_2_2:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s12
+; GFX90A-NEXT: s_mov_b32 s9, s13
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__2_2_2:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> <i32 2, i32 2, i32 2>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v3i64_v4i64__3_2_2() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__3_2_2:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__3_2_2:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__3_2_2:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> <i32 3, i32 2, i32 2>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v3i64_v4i64__4_2_2() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__4_2_2:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__4_2_2:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__4_2_2:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> <i32 4, i32 2, i32 2>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v3i64_v4i64__5_2_2() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__5_2_2:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s6
+; GFX900-NEXT: s_mov_b32 s9, s7
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__5_2_2:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s6
+; GFX90A-NEXT: s_mov_b32 s9, s7
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__5_2_2:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x i64> asm "; def $0", "=s"()
+ %vec1 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 5, i32 2, i32 2>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v3i64_v4i64__6_2_2() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__6_2_2:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__6_2_2:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__6_2_2:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x i64> asm "; def $0", "=s"()
+ %vec1 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 6, i32 2, i32 2>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v3i64_v4i64__7_2_2() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_2_2:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_2_2:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_2_2:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x i64> asm "; def $0", "=s"()
+ %vec1 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 2, i32 2>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v3i64_v4i64__7_u_2() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_u_2:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_u_2:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_u_2:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x i64> asm "; def $0", "=s"()
+ %vec1 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 poison, i32 2>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v3i64_v4i64__7_0_2() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_0_2:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s12, s16
+; GFX900-NEXT: s_mov_b32 s13, s17
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_0_2:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s12, s16
+; GFX90A-NEXT: s_mov_b32 s13, s17
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_0_2:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[4:5]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x i64> asm "; def $0", "=s"()
+ %vec1 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 0, i32 2>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v3i64_v4i64__7_1_2() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_1_2:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[16:23]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s22
+; GFX900-NEXT: s_mov_b32 s9, s23
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_1_2:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[16:23]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s22
+; GFX90A-NEXT: s_mov_b32 s9, s23
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_1_2:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x i64> asm "; def $0", "=s"()
+ %vec1 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 1, i32 2>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v3i64_v4i64__7_3_2() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_3_2:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s10, s14
+; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_3_2:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s10, s14
+; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_3_2:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x i64> asm "; def $0", "=s"()
+ %vec1 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 3, i32 2>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v3i64_v4i64__7_4_2() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_4_2:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s10, s4
+; GFX900-NEXT: s_mov_b32 s11, s5
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_4_2:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s10, s4
+; GFX90A-NEXT: s_mov_b32 s11, s5
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_4_2:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x i64> asm "; def $0", "=s"()
+ %vec1 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 4, i32 2>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v3i64_v4i64__7_5_2() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_5_2:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s12, s16
+; GFX900-NEXT: s_mov_b32 s13, s17
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_5_2:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s12, s16
+; GFX90A-NEXT: s_mov_b32 s13, s17
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_5_2:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[4:5]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x i64> asm "; def $0", "=s"()
+ %vec1 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 5, i32 2>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v3i64_v4i64__7_6_2() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_6_2:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[16:23]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s22
+; GFX900-NEXT: s_mov_b32 s9, s23
+; GFX900-NEXT: s_mov_b32 s10, s20
+; GFX900-NEXT: s_mov_b32 s11, s21
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_6_2:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[16:23]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s22
+; GFX90A-NEXT: s_mov_b32 s9, s23
+; GFX90A-NEXT: s_mov_b32 s10, s20
+; GFX90A-NEXT: s_mov_b32 s11, s21
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_6_2:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x i64> asm "; def $0", "=s"()
+ %vec1 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 6, i32 2>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v3i64_v4i64__u_3_3() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__u_3_3:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s14
+; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__u_3_3:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s14
+; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__u_3_3:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> <i32 poison, i32 3, i32 3>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v3i64_v4i64__0_3_3() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__0_3_3:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s14
+; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__0_3_3:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s14
+; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__0_3_3:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> <i32 0, i32 3, i32 3>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v3i64_v4i64__1_3_3() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__1_3_3:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s10, s14
+; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__1_3_3:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s10, s14
+; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__1_3_3:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> <i32 1, i32 3, i32 3>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v3i64_v4i64__2_3_3() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__2_3_3:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s12
+; GFX900-NEXT: s_mov_b32 s9, s13
+; GFX900-NEXT: s_mov_b32 s10, s14
+; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__2_3_3:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s12
+; GFX90A-NEXT: s_mov_b32 s9, s13
+; GFX90A-NEXT: s_mov_b32 s10, s14
+; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_2_2:
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__2_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
- %vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 2, i32 2>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> <i32 2, i32 3, i32 3>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
ret void
}
-define void @s_shuffle_v3i64_v4i64__7_u_2() {
-; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_u_2:
+define void @s_shuffle_v3i64_v4i64__3_3_3() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__3_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s10, s14
+; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_u_2:
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__3_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s10, s14
+; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_u_2:
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__3_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
- %vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 poison, i32 2>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> <i32 3, i32 3, i32 3>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
ret void
}
-define void @s_shuffle_v3i64_v4i64__7_0_2() {
-; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_0_2:
+define void @s_shuffle_v3i64_v4i64__4_3_3() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__4_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s10, s12
-; GFX900-NEXT: s_mov_b32 s11, s13
-; GFX900-NEXT: s_mov_b32 s12, s16
-; GFX900-NEXT: s_mov_b32 s13, s17
+; GFX900-NEXT: s_mov_b32 s10, s14
+; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_0_2:
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__4_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s10, s12
-; GFX90A-NEXT: s_mov_b32 s11, s13
-; GFX90A-NEXT: s_mov_b32 s12, s16
-; GFX90A-NEXT: s_mov_b32 s13, s17
+; GFX90A-NEXT: s_mov_b32 s10, s14
+; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_0_2:
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__4_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s4
-; GFX942-NEXT: s_mov_b32 s13, s5
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
- %vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 0, i32 2>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> <i32 4, i32 3, i32 3>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
ret void
}
-define void @s_shuffle_v3i64_v4i64__7_1_2() {
-; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_1_2:
+define void @s_shuffle_v3i64_v4i64__5_3_3() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__5_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[16:23]
+; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s22
-; GFX900-NEXT: s_mov_b32 s9, s23
+; GFX900-NEXT: s_mov_b32 s8, s6
+; GFX900-NEXT: s_mov_b32 s9, s7
+; GFX900-NEXT: s_mov_b32 s10, s14
+; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_1_2:
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__5_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[16:23]
+; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s22
-; GFX90A-NEXT: s_mov_b32 s9, s23
+; GFX90A-NEXT: s_mov_b32 s8, s6
+; GFX90A-NEXT: s_mov_b32 s9, s7
+; GFX90A-NEXT: s_mov_b32 s10, s14
+; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_1_2:
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__5_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -11264,21 +12158,22 @@ define void @s_shuffle_v3i64_v4i64__7_1_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 1, i32 2>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 5, i32 3, i32 3>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
ret void
}
-define void @s_shuffle_v3i64_v4i64__7_3_2() {
-; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_3_2:
+define void @s_shuffle_v3i64_v4i64__6_3_3() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__6_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
@@ -11287,16 +12182,16 @@ define void @s_shuffle_v3i64_v4i64__7_3_2() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
; GFX900-NEXT: s_mov_b32 s10, s14
; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_3_2:
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__6_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
@@ -11305,16 +12200,16 @@ define void @s_shuffle_v3i64_v4i64__7_3_2() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
; GFX90A-NEXT: s_mov_b32 s10, s14
; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_3_2:
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__6_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -11323,23 +12218,22 @@ define void @s_shuffle_v3i64_v4i64__7_3_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 3, i32 2>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 6, i32 3, i32 3>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
ret void
}
-define void @s_shuffle_v3i64_v4i64__7_4_2() {
-; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_4_2:
+define void @s_shuffle_v3i64_v4i64__7_3_3() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
@@ -11350,14 +12244,16 @@ define void @s_shuffle_v3i64_v4i64__7_4_2() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s10
; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
+; GFX900-NEXT: s_mov_b32 s10, s14
+; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_4_2:
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
@@ -11368,14 +12264,16 @@ define void @s_shuffle_v3i64_v4i64__7_4_2() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s10
; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
+; GFX90A-NEXT: s_mov_b32 s10, s14
+; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_4_2:
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -11384,59 +12282,58 @@ define void @s_shuffle_v3i64_v4i64__7_4_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 4, i32 2>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 3, i32 3>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
ret void
}
-define void @s_shuffle_v3i64_v4i64__7_5_2() {
-; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_5_2:
+define void @s_shuffle_v3i64_v4i64__7_u_3() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_u_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s14
-; GFX900-NEXT: s_mov_b32 s9, s15
-; GFX900-NEXT: s_mov_b32 s12, s16
-; GFX900-NEXT: s_mov_b32 s13, s17
+; GFX900-NEXT: s_mov_b32 s8, s18
+; GFX900-NEXT: s_mov_b32 s9, s19
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_5_2:
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_u_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s14
-; GFX90A-NEXT: s_mov_b32 s9, s15
-; GFX90A-NEXT: s_mov_b32 s12, s16
-; GFX90A-NEXT: s_mov_b32 s13, s17
+; GFX90A-NEXT: s_mov_b32 s8, s18
+; GFX90A-NEXT: s_mov_b32 s9, s19
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_5_2:
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_u_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -11445,59 +12342,61 @@ define void @s_shuffle_v3i64_v4i64__7_5_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s12, s4
-; GFX942-NEXT: s_mov_b32 s13, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 5, i32 2>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 poison, i32 3>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
ret void
}
-define void @s_shuffle_v3i64_v4i64__7_6_2() {
-; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_6_2:
+define void @s_shuffle_v3i64_v4i64__7_0_3() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_0_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[16:23]
+; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s22
-; GFX900-NEXT: s_mov_b32 s9, s23
-; GFX900-NEXT: s_mov_b32 s10, s20
-; GFX900-NEXT: s_mov_b32 s11, s21
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s12, s18
+; GFX900-NEXT: s_mov_b32 s13, s19
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_6_2:
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_0_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[16:23]
+; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s22
-; GFX90A-NEXT: s_mov_b32 s9, s23
-; GFX90A-NEXT: s_mov_b32 s10, s20
-; GFX90A-NEXT: s_mov_b32 s11, s21
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s12, s18
+; GFX90A-NEXT: s_mov_b32 s13, s19
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_6_2:
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_0_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -11506,30 +12405,32 @@ define void @s_shuffle_v3i64_v4i64__7_6_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 6, i32 2>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 0, i32 3>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
ret void
}
-define void @s_shuffle_v3i64_v4i64__u_3_3() {
-; GFX900-LABEL: s_shuffle_v3i64_v4i64__u_3_3:
+define void @s_shuffle_v3i64_v4i64__7_1_3() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_1_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s10, s14
-; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[16:23]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s22
+; GFX900-NEXT: s_mov_b32 s9, s23
; GFX900-NEXT: s_mov_b32 s12, s14
; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
@@ -11537,14 +12438,17 @@ define void @s_shuffle_v3i64_v4i64__u_3_3() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3i64_v4i64__u_3_3:
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_1_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s10, s14
-; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[16:23]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s22
+; GFX90A-NEXT: s_mov_b32 s9, s23
; GFX90A-NEXT: s_mov_b32 s12, s14
; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
@@ -11552,58 +12456,42 @@ define void @s_shuffle_v3i64_v4i64__u_3_3() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3i64_v4i64__u_3_3:
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_1_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> <i32 poison, i32 3, i32 3>
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v3i64_v4i64__0_3_3() {
-; GFX9-LABEL: s_shuffle_v3i64_v4i64__0_3_3:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s14
-; GFX9-NEXT: s_mov_b32 s11, s15
-; GFX9-NEXT: s_mov_b32 s12, s14
-; GFX9-NEXT: s_mov_b32 s13, s15
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> <i32 0, i32 3, i32 3>
+ %vec1 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 1, i32 3>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
ret void
}
-define void @s_shuffle_v3i64_v4i64__1_3_3() {
-; GFX900-LABEL: s_shuffle_v3i64_v4i64__1_3_3:
+define void @s_shuffle_v3i64_v4i64__7_2_3() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_2_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s10
; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s10, s14
-; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
; GFX900-NEXT: s_mov_b32 s12, s14
; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
@@ -11611,16 +12499,19 @@ define void @s_shuffle_v3i64_v4i64__1_3_3() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3i64_v4i64__1_3_3:
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_2_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s10
; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s10, s14
-; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
; GFX90A-NEXT: s_mov_b32 s12, s14
; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
@@ -11628,39 +12519,43 @@ define void @s_shuffle_v3i64_v4i64__1_3_3() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3i64_v4i64__1_3_3:
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_2_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> <i32 1, i32 3, i32 3>
+ %vec1 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 2, i32 3>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
ret void
}
-define void @s_shuffle_v3i64_v4i64__2_3_3() {
-; GFX900-LABEL: s_shuffle_v3i64_v4i64__2_3_3:
+define void @s_shuffle_v3i64_v4i64__7_4_3() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_4_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s12
-; GFX900-NEXT: s_mov_b32 s9, s13
-; GFX900-NEXT: s_mov_b32 s10, s14
-; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s10, s4
+; GFX900-NEXT: s_mov_b32 s11, s5
; GFX900-NEXT: s_mov_b32 s12, s14
; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
@@ -11668,16 +12563,19 @@ define void @s_shuffle_v3i64_v4i64__2_3_3() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3i64_v4i64__2_3_3:
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_4_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s12
-; GFX90A-NEXT: s_mov_b32 s9, s13
-; GFX90A-NEXT: s_mov_b32 s10, s14
-; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s10, s4
+; GFX90A-NEXT: s_mov_b32 s11, s5
; GFX90A-NEXT: s_mov_b32 s12, s14
; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
@@ -11685,94 +12583,102 @@ define void @s_shuffle_v3i64_v4i64__2_3_3() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3i64_v4i64__2_3_3:
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_4_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[12:19]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[18:19]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> <i32 2, i32 3, i32 3>
+ %vec1 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 4, i32 3>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
ret void
}
-define void @s_shuffle_v3i64_v4i64__3_3_3() {
-; GFX900-LABEL: s_shuffle_v3i64_v4i64__3_3_3:
+define void @s_shuffle_v3i64_v4i64__7_5_3() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_5_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s14
; GFX900-NEXT: s_mov_b32 s9, s15
-; GFX900-NEXT: s_mov_b32 s10, s14
-; GFX900-NEXT: s_mov_b32 s11, s15
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: s_mov_b32 s12, s18
+; GFX900-NEXT: s_mov_b32 s13, s19
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3i64_v4i64__3_3_3:
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_5_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s14
; GFX90A-NEXT: s_mov_b32 s9, s15
-; GFX90A-NEXT: s_mov_b32 s10, s14
-; GFX90A-NEXT: s_mov_b32 s11, s15
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: s_mov_b32 s12, s18
+; GFX90A-NEXT: s_mov_b32 s13, s19
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3i64_v4i64__3_3_3:
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_5_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> <i32 3, i32 3, i32 3>
+ %vec1 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 5, i32 3>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
ret void
}
-define void @s_shuffle_v3i64_v4i64__4_3_3() {
-; GFX900-LABEL: s_shuffle_v3i64_v4i64__4_3_3:
+define void @s_shuffle_v3i64_v4i64__7_6_3() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_6_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s10, s14
-; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[16:23]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s22
+; GFX900-NEXT: s_mov_b32 s9, s23
+; GFX900-NEXT: s_mov_b32 s10, s20
+; GFX900-NEXT: s_mov_b32 s11, s21
; GFX900-NEXT: s_mov_b32 s12, s14
; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
@@ -11780,14 +12686,19 @@ define void @s_shuffle_v3i64_v4i64__4_3_3() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3i64_v4i64__4_3_3:
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_6_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s10, s14
-; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[16:23]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s22
+; GFX90A-NEXT: s_mov_b32 s9, s23
+; GFX90A-NEXT: s_mov_b32 s10, s20
+; GFX90A-NEXT: s_mov_b32 s11, s21
; GFX90A-NEXT: s_mov_b32 s12, s14
; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
@@ -11795,649 +12706,531 @@ define void @s_shuffle_v3i64_v4i64__4_3_3() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3i64_v4i64__4_3_3:
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_6_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> <i32 4, i32 3, i32 3>
+ %vec1 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 6, i32 3>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
ret void
}
-define void @s_shuffle_v3i64_v4i64__5_3_3() {
-; GFX900-LABEL: s_shuffle_v3i64_v4i64__5_3_3:
+define void @s_shuffle_v3i64_v4i64__u_4_4() {
+; GFX9-LABEL: s_shuffle_v3i64_v4i64__u_4_4:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:13]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> <i32 poison, i32 4, i32 4>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v3i64_v4i64__0_4_4() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__0_4_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s6
-; GFX900-NEXT: s_mov_b32 s9, s7
-; GFX900-NEXT: s_mov_b32 s10, s14
-; GFX900-NEXT: s_mov_b32 s11, s15
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
-; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3i64_v4i64__5_3_3:
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__0_4_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s6
-; GFX90A-NEXT: s_mov_b32 s9, s7
-; GFX90A-NEXT: s_mov_b32 s10, s14
-; GFX90A-NEXT: s_mov_b32 s11, s15
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
-; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3i64_v4i64__5_3_3:
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__0_4_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
- %vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 5, i32 3, i32 3>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> <i32 0, i32 4, i32 4>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
ret void
}
-define void @s_shuffle_v3i64_v4i64__6_3_3() {
-; GFX900-LABEL: s_shuffle_v3i64_v4i64__6_3_3:
+define void @s_shuffle_v3i64_v4i64__1_4_4() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__1_4_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s10, s14
-; GFX900-NEXT: s_mov_b32 s11, s15
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: s_mov_b32 s8, s6
+; GFX900-NEXT: s_mov_b32 s9, s7
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3i64_v4i64__6_3_3:
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__1_4_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s10, s14
-; GFX90A-NEXT: s_mov_b32 s11, s15
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: s_mov_b32 s8, s6
+; GFX90A-NEXT: s_mov_b32 s9, s7
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3i64_v4i64__6_3_3:
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__1_4_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
- %vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 6, i32 3, i32 3>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> <i32 1, i32 4, i32 4>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
ret void
}
-define void @s_shuffle_v3i64_v4i64__7_3_3() {
-; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_3_3:
+define void @s_shuffle_v3i64_v4i64__2_4_4() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__2_4_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s10, s14
-; GFX900-NEXT: s_mov_b32 s11, s15
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_3_3:
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__2_4_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s10, s14
-; GFX90A-NEXT: s_mov_b32 s11, s15
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_3_3:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
-; GFX942-NEXT: ;;#ASMEND
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__2_4_4:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
- %vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 3, i32 3>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> <i32 2, i32 4, i32 4>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
ret void
}
-define void @s_shuffle_v3i64_v4i64__7_u_3() {
-; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_u_3:
+define void @s_shuffle_v3i64_v4i64__3_4_4() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__3_4_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s18
-; GFX900-NEXT: s_mov_b32 s9, s19
-; GFX900-NEXT: s_mov_b32 s12, s10
-; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_u_3:
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__3_4_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s18
-; GFX90A-NEXT: s_mov_b32 s9, s19
-; GFX90A-NEXT: s_mov_b32 s12, s10
-; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_u_3:
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__3_4_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
- %vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 poison, i32 3>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> <i32 3, i32 4, i32 4>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
ret void
}
-define void @s_shuffle_v3i64_v4i64__7_0_3() {
-; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_0_3:
+define void @s_shuffle_v3i64_v4i64__4_4_4() {
+; GFX9-LABEL: s_shuffle_v3i64_v4i64__4_4_4:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:13]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> <i32 4, i32 4, i32 4>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v3i64_v4i64__5_4_4() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__5_4_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s10, s12
-; GFX900-NEXT: s_mov_b32 s11, s13
-; GFX900-NEXT: s_mov_b32 s12, s18
-; GFX900-NEXT: s_mov_b32 s13, s19
+; GFX900-NEXT: s_mov_b32 s8, s6
+; GFX900-NEXT: s_mov_b32 s9, s7
+; GFX900-NEXT: s_mov_b32 s10, s4
+; GFX900-NEXT: s_mov_b32 s11, s5
+; GFX900-NEXT: s_mov_b32 s12, s4
+; GFX900-NEXT: s_mov_b32 s13, s5
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_0_3:
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__5_4_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s10, s12
-; GFX90A-NEXT: s_mov_b32 s11, s13
-; GFX90A-NEXT: s_mov_b32 s12, s18
-; GFX90A-NEXT: s_mov_b32 s13, s19
+; GFX90A-NEXT: s_mov_b32 s8, s6
+; GFX90A-NEXT: s_mov_b32 s9, s7
+; GFX90A-NEXT: s_mov_b32 s10, s4
+; GFX90A-NEXT: s_mov_b32 s11, s5
+; GFX90A-NEXT: s_mov_b32 s12, s4
+; GFX90A-NEXT: s_mov_b32 s13, s5
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_0_3:
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__5_4_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 0, i32 3>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 5, i32 4, i32 4>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
ret void
}
-define void @s_shuffle_v3i64_v4i64__7_1_3() {
-; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_1_3:
+define void @s_shuffle_v3i64_v4i64__6_4_4() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__6_4_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[16:23]
+; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s22
-; GFX900-NEXT: s_mov_b32 s9, s23
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: s_mov_b32 s10, s4
+; GFX900-NEXT: s_mov_b32 s11, s5
+; GFX900-NEXT: s_mov_b32 s12, s4
+; GFX900-NEXT: s_mov_b32 s13, s5
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_1_3:
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__6_4_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[16:23]
+; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s22
-; GFX90A-NEXT: s_mov_b32 s9, s23
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: s_mov_b32 s10, s4
+; GFX90A-NEXT: s_mov_b32 s11, s5
+; GFX90A-NEXT: s_mov_b32 s12, s4
+; GFX90A-NEXT: s_mov_b32 s13, s5
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_1_3:
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__6_4_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 1, i32 3>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 6, i32 4, i32 4>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
ret void
}
-define void @s_shuffle_v3i64_v4i64__7_2_3() {
-; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_2_3:
+define void @s_shuffle_v3i64_v4i64__7_4_4() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_4_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s10
; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s10, s12
-; GFX900-NEXT: s_mov_b32 s11, s13
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: s_mov_b32 s10, s4
+; GFX900-NEXT: s_mov_b32 s11, s5
+; GFX900-NEXT: s_mov_b32 s12, s4
+; GFX900-NEXT: s_mov_b32 s13, s5
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_2_3:
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_4_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s10
; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s10, s12
-; GFX90A-NEXT: s_mov_b32 s11, s13
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: s_mov_b32 s10, s4
+; GFX90A-NEXT: s_mov_b32 s11, s5
+; GFX90A-NEXT: s_mov_b32 s12, s4
+; GFX90A-NEXT: s_mov_b32 s13, s5
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_2_3:
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_4_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 2, i32 3>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 4, i32 4>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
ret void
}
-define void @s_shuffle_v3i64_v4i64__7_4_3() {
-; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_4_3:
+define void @s_shuffle_v3i64_v4i64__7_u_4() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_u_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s10
; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: s_mov_b32 s12, s4
+; GFX900-NEXT: s_mov_b32 s13, s5
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_4_3:
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_u_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s10
; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: s_mov_b32 s12, s4
+; GFX90A-NEXT: s_mov_b32 s13, s5
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_4_3:
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_u_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[12:19]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s18
-; GFX942-NEXT: s_mov_b32 s9, s19
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 4, i32 3>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 poison, i32 4>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
ret void
}
-define void @s_shuffle_v3i64_v4i64__7_5_3() {
-; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_5_3:
+define void @s_shuffle_v3i64_v4i64__7_0_4() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_0_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s14
-; GFX900-NEXT: s_mov_b32 s9, s15
-; GFX900-NEXT: s_mov_b32 s12, s18
-; GFX900-NEXT: s_mov_b32 s13, s19
+; GFX900-NEXT: s_mov_b32 s8, s18
+; GFX900-NEXT: s_mov_b32 s9, s19
+; GFX900-NEXT: s_mov_b32 s10, s4
+; GFX900-NEXT: s_mov_b32 s11, s5
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_5_3:
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_0_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s14
-; GFX90A-NEXT: s_mov_b32 s9, s15
-; GFX90A-NEXT: s_mov_b32 s12, s18
-; GFX90A-NEXT: s_mov_b32 s13, s19
+; GFX90A-NEXT: s_mov_b32 s8, s18
+; GFX90A-NEXT: s_mov_b32 s9, s19
+; GFX90A-NEXT: s_mov_b32 s10, s4
+; GFX90A-NEXT: s_mov_b32 s11, s5
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_5_3:
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_0_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 5, i32 3>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 0, i32 4>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
ret void
}
-define void @s_shuffle_v3i64_v4i64__7_6_3() {
-; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_6_3:
+define void @s_shuffle_v3i64_v4i64__7_1_4() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_1_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[16:23]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s22
-; GFX900-NEXT: s_mov_b32 s9, s23
-; GFX900-NEXT: s_mov_b32 s10, s20
-; GFX900-NEXT: s_mov_b32 s11, s21
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: s_mov_b32 s8, s18
+; GFX900-NEXT: s_mov_b32 s9, s19
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_6_3:
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_1_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[16:23]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s22
-; GFX90A-NEXT: s_mov_b32 s9, s23
-; GFX90A-NEXT: s_mov_b32 s10, s20
-; GFX90A-NEXT: s_mov_b32 s11, s21
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: s_mov_b32 s8, s18
+; GFX90A-NEXT: s_mov_b32 s9, s19
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_6_3:
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_1_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -12446,831 +13239,783 @@ define void @s_shuffle_v3i64_v4i64__7_6_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 6, i32 3>
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v3i64_v4i64__u_4_4() {
-; GFX9-LABEL: s_shuffle_v3i64_v4i64__u_4_4:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> <i32 poison, i32 4, i32 4>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 1, i32 4>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
ret void
}
-define void @s_shuffle_v3i64_v4i64__0_4_4() {
-; GFX900-LABEL: s_shuffle_v3i64_v4i64__0_4_4:
+define void @s_shuffle_v3i64_v4i64__7_2_4() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_2_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s12, s4
+; GFX900-NEXT: s_mov_b32 s13, s5
+; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3i64_v4i64__0_4_4:
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_2_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s12, s4
+; GFX90A-NEXT: s_mov_b32 s13, s5
+; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3i64_v4i64__0_4_4:
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_2_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[12:19]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[18:19]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> <i32 0, i32 4, i32 4>
+ %vec1 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 2, i32 4>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
ret void
}
-define void @s_shuffle_v3i64_v4i64__1_4_4() {
-; GFX900-LABEL: s_shuffle_v3i64_v4i64__1_4_4:
+define void @s_shuffle_v3i64_v4i64__7_3_4() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_3_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s6
-; GFX900-NEXT: s_mov_b32 s9, s7
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s18
+; GFX900-NEXT: s_mov_b32 s9, s19
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3i64_v4i64__1_4_4:
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_3_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s6
-; GFX90A-NEXT: s_mov_b32 s9, s7
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s18
+; GFX90A-NEXT: s_mov_b32 s9, s19
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3i64_v4i64__1_4_4:
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_3_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[12:19]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[18:19]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> <i32 1, i32 4, i32 4>
+ %vec1 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 3, i32 4>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
ret void
}
-define void @s_shuffle_v3i64_v4i64__2_4_4() {
-; GFX900-LABEL: s_shuffle_v3i64_v4i64__2_4_4:
+define void @s_shuffle_v3i64_v4i64__7_5_4() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_5_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s10, s6
+; GFX900-NEXT: s_mov_b32 s11, s7
+; GFX900-NEXT: s_mov_b32 s12, s4
+; GFX900-NEXT: s_mov_b32 s13, s5
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3i64_v4i64__2_4_4:
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_5_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s10, s6
+; GFX90A-NEXT: s_mov_b32 s11, s7
+; GFX90A-NEXT: s_mov_b32 s12, s4
+; GFX90A-NEXT: s_mov_b32 s13, s5
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3i64_v4i64__2_4_4:
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_5_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> <i32 2, i32 4, i32 4>
+ %vec1 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 5, i32 4>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
ret void
}
-define void @s_shuffle_v3i64_v4i64__3_4_4() {
-; GFX900-LABEL: s_shuffle_v3i64_v4i64__3_4_4:
+define void @s_shuffle_v3i64_v4i64__7_6_4() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_6_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s8, s18
+; GFX900-NEXT: s_mov_b32 s9, s19
+; GFX900-NEXT: s_mov_b32 s10, s16
+; GFX900-NEXT: s_mov_b32 s11, s17
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3i64_v4i64__3_4_4:
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_6_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s8, s18
+; GFX90A-NEXT: s_mov_b32 s9, s19
+; GFX90A-NEXT: s_mov_b32 s10, s16
+; GFX90A-NEXT: s_mov_b32 s11, s17
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3i64_v4i64__3_4_4:
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_6_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> <i32 3, i32 4, i32 4>
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v3i64_v4i64__4_4_4() {
-; GFX9-LABEL: s_shuffle_v3i64_v4i64__4_4_4:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> <i32 4, i32 4, i32 4>
+ %vec1 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 6, i32 4>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
ret void
}
-define void @s_shuffle_v3i64_v4i64__5_4_4() {
-; GFX900-LABEL: s_shuffle_v3i64_v4i64__5_4_4:
+define void @s_shuffle_v3i64_v4i64__u_5_5() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__u_5_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s6
-; GFX900-NEXT: s_mov_b32 s9, s7
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3i64_v4i64__5_4_4:
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__u_5_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s6
-; GFX90A-NEXT: s_mov_b32 s9, s7
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3i64_v4i64__5_4_4:
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__u_5_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 5, i32 4, i32 4>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 poison, i32 5, i32 5>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
ret void
}
-define void @s_shuffle_v3i64_v4i64__6_4_4() {
-; GFX900-LABEL: s_shuffle_v3i64_v4i64__6_4_4:
+define void @s_shuffle_v3i64_v4i64__0_5_5() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__0_5_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s14
+; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3i64_v4i64__6_4_4:
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__0_5_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s14
+; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3i64_v4i64__6_4_4:
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__0_5_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[10:11], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 6, i32 4, i32 4>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 0, i32 5, i32 5>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
ret void
}
-define void @s_shuffle_v3i64_v4i64__7_4_4() {
-; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_4_4:
+define void @s_shuffle_v3i64_v4i64__1_5_5() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__1_5_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s6
+; GFX900-NEXT: s_mov_b32 s9, s7
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_4_4:
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__1_5_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s6
+; GFX90A-NEXT: s_mov_b32 s9, s7
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_4_4:
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__1_5_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 4, i32 4>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 1, i32 5, i32 5>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
ret void
}
-define void @s_shuffle_v3i64_v4i64__7_u_4() {
-; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_u_4:
+define void @s_shuffle_v3i64_v4i64__2_5_5() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__2_5_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s16
+; GFX900-NEXT: s_mov_b32 s9, s17
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_u_4:
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__2_5_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s16
+; GFX90A-NEXT: s_mov_b32 s9, s17
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_u_4:
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__2_5_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 poison, i32 4>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 2, i32 5, i32 5>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
ret void
}
-define void @s_shuffle_v3i64_v4i64__7_0_4() {
-; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_0_4:
+define void @s_shuffle_v3i64_v4i64__3_5_5() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__3_5_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s18
; GFX900-NEXT: s_mov_b32 s9, s19
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_0_4:
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__3_5_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s18
; GFX90A-NEXT: s_mov_b32 s9, s19
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_0_4:
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__3_5_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[4:11]
+; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s4
-; GFX942-NEXT: s_mov_b32 s13, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 0, i32 4>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 3, i32 5, i32 5>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
ret void
}
-define void @s_shuffle_v3i64_v4i64__7_1_4() {
-; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_1_4:
+define void @s_shuffle_v3i64_v4i64__4_5_5() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__4_5_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s18
-; GFX900-NEXT: s_mov_b32 s9, s19
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_1_4:
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__4_5_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s18
-; GFX90A-NEXT: s_mov_b32 s9, s19
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_1_4:
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__4_5_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 1, i32 4>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 4, i32 5, i32 5>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
ret void
}
-define void @s_shuffle_v3i64_v4i64__7_2_4() {
-; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_2_4:
+define void @s_shuffle_v3i64_v4i64__5_5_5() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__5_5_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s10
; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s10, s12
-; GFX900-NEXT: s_mov_b32 s11, s13
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_2_4:
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__5_5_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s10
; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s10, s12
-; GFX90A-NEXT: s_mov_b32 s11, s13
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_2_4:
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__5_5_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[12:19]
+; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s18
-; GFX942-NEXT: s_mov_b32 s9, s19
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 2, i32 4>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 5, i32 5, i32 5>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
ret void
}
-define void @s_shuffle_v3i64_v4i64__7_3_4() {
-; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_3_4:
+define void @s_shuffle_v3i64_v4i64__6_5_5() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__6_5_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s18
-; GFX900-NEXT: s_mov_b32 s9, s19
+; GFX900-NEXT: s_mov_b32 s8, s12
+; GFX900-NEXT: s_mov_b32 s9, s13
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_3_4:
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__6_5_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s18
-; GFX90A-NEXT: s_mov_b32 s9, s19
+; GFX90A-NEXT: s_mov_b32 s8, s12
+; GFX90A-NEXT: s_mov_b32 s9, s13
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_3_4:
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__6_5_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[12:19]
+; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s18
-; GFX942-NEXT: s_mov_b32 s9, s19
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 3, i32 4>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 6, i32 5, i32 5>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
ret void
}
-define void @s_shuffle_v3i64_v4i64__7_5_4() {
-; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_5_4:
+define void @s_shuffle_v3i64_v4i64__7_5_5() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_5_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s10, s6
-; GFX900-NEXT: s_mov_b32 s11, s7
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_5_4:
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_5_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s10, s6
-; GFX90A-NEXT: s_mov_b32 s11, s7
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_5_4:
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_5_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s2
-; GFX942-NEXT: s_mov_b32 s11, s3
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 5, i32 4>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 5, i32 5>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
ret void
}
-define void @s_shuffle_v3i64_v4i64__7_6_4() {
-; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_6_4:
+define void @s_shuffle_v3i64_v4i64__7_u_5() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_u_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s18
-; GFX900-NEXT: s_mov_b32 s9, s19
-; GFX900-NEXT: s_mov_b32 s10, s16
-; GFX900-NEXT: s_mov_b32 s11, s17
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s12, s6
+; GFX900-NEXT: s_mov_b32 s13, s7
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_6_4:
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_u_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s18
-; GFX90A-NEXT: s_mov_b32 s9, s19
-; GFX90A-NEXT: s_mov_b32 s10, s16
-; GFX90A-NEXT: s_mov_b32 s11, s17
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s12, s6
+; GFX90A-NEXT: s_mov_b32 s13, s7
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_6_4:
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_u_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 6, i32 4>
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v3i64_v4i64__u_5_5() {
-; GFX9-LABEL: s_shuffle_v3i64_v4i64__u_5_5:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x i64> asm "; def $0", "=s"()
- %vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 poison, i32 5, i32 5>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 poison, i32 5>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
ret void
}
-define void @s_shuffle_v3i64_v4i64__0_5_5() {
-; GFX900-LABEL: s_shuffle_v3i64_v4i64__0_5_5:
+define void @s_shuffle_v3i64_v4i64__7_0_5() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_0_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s10, s14
-; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: s_mov_b32 s8, s18
+; GFX900-NEXT: s_mov_b32 s9, s19
+; GFX900-NEXT: s_mov_b32 s10, s4
+; GFX900-NEXT: s_mov_b32 s11, s5
; GFX900-NEXT: s_mov_b32 s12, s14
; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
@@ -13278,17 +14023,19 @@ define void @s_shuffle_v3i64_v4i64__0_5_5() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3i64_v4i64__0_5_5:
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_0_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s10, s14
-; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: s_mov_b32 s8, s18
+; GFX90A-NEXT: s_mov_b32 s9, s19
+; GFX90A-NEXT: s_mov_b32 s10, s4
+; GFX90A-NEXT: s_mov_b32 s11, s5
; GFX90A-NEXT: s_mov_b32 s12, s14
; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
@@ -13296,68 +14043,68 @@ define void @s_shuffle_v3i64_v4i64__0_5_5() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3i64_v4i64__0_5_5:
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_0_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s2
-; GFX942-NEXT: s_mov_b32 s11, s3
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 0, i32 5, i32 5>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 0, i32 5>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
ret void
}
-define void @s_shuffle_v3i64_v4i64__1_5_5() {
-; GFX900-LABEL: s_shuffle_v3i64_v4i64__1_5_5:
+define void @s_shuffle_v3i64_v4i64__7_1_5() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_1_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s6
-; GFX900-NEXT: s_mov_b32 s9, s7
-; GFX900-NEXT: s_mov_b32 s12, s10
-; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: s_mov_b32 s8, s18
+; GFX900-NEXT: s_mov_b32 s9, s19
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3i64_v4i64__1_5_5:
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_1_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s6
-; GFX90A-NEXT: s_mov_b32 s9, s7
-; GFX90A-NEXT: s_mov_b32 s12, s10
-; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: s_mov_b32 s8, s18
+; GFX90A-NEXT: s_mov_b32 s9, s19
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3i64_v4i64__1_5_5:
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_1_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -13366,231 +14113,145 @@ define void @s_shuffle_v3i64_v4i64__1_5_5() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s12, s10
-; GFX942-NEXT: s_mov_b32 s13, s11
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 1, i32 5, i32 5>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 1, i32 5>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
ret void
}
-define void @s_shuffle_v3i64_v4i64__2_5_5() {
-; GFX900-LABEL: s_shuffle_v3i64_v4i64__2_5_5:
+define void @s_shuffle_v3i64_v4i64__7_2_5() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_2_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s16
-; GFX900-NEXT: s_mov_b32 s9, s17
-; GFX900-NEXT: s_mov_b32 s12, s10
-; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s12, s6
+; GFX900-NEXT: s_mov_b32 s13, s7
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3i64_v4i64__2_5_5:
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_2_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s16
-; GFX90A-NEXT: s_mov_b32 s9, s17
-; GFX90A-NEXT: s_mov_b32 s12, s10
-; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s12, s6
+; GFX90A-NEXT: s_mov_b32 s13, s7
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3i64_v4i64__2_5_5:
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_2_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ; def s[12:19]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s12, s10
-; GFX942-NEXT: s_mov_b32 s13, s11
+; GFX942-NEXT: s_mov_b64 s[8:9], s[18:19]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 2, i32 5, i32 5>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 2, i32 5>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
ret void
}
-define void @s_shuffle_v3i64_v4i64__3_5_5() {
-; GFX900-LABEL: s_shuffle_v3i64_v4i64__3_5_5:
+define void @s_shuffle_v3i64_v4i64__7_3_5() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_3_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s18
; GFX900-NEXT: s_mov_b32 s9, s19
-; GFX900-NEXT: s_mov_b32 s12, s10
-; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3i64_v4i64__3_5_5:
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_3_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s18
; GFX90A-NEXT: s_mov_b32 s9, s19
-; GFX90A-NEXT: s_mov_b32 s12, s10
-; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3i64_v4i64__3_5_5:
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_3_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ; def s[12:19]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s12, s10
-; GFX942-NEXT: s_mov_b32 s13, s11
+; GFX942-NEXT: s_mov_b64 s[8:9], s[18:19]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 3, i32 5, i32 5>
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v3i64_v4i64__4_5_5() {
-; GFX9-LABEL: s_shuffle_v3i64_v4i64__4_5_5:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x i64> asm "; def $0", "=s"()
- %vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 4, i32 5, i32 5>
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v3i64_v4i64__5_5_5() {
-; GFX9-LABEL: s_shuffle_v3i64_v4i64__5_5_5:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s10
-; GFX9-NEXT: s_mov_b32 s9, s11
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x i64> asm "; def $0", "=s"()
- %vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 5, i32 5, i32 5>
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v3i64_v4i64__6_5_5() {
-; GFX9-LABEL: s_shuffle_v3i64_v4i64__6_5_5:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s12
-; GFX9-NEXT: s_mov_b32 s9, s13
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x i64> asm "; def $0", "=s"()
- %vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 6, i32 5, i32 5>
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v3i64_v4i64__7_5_5() {
-; GFX9-LABEL: s_shuffle_v3i64_v4i64__7_5_5:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s14
-; GFX9-NEXT: s_mov_b32 s9, s15
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x i64> asm "; def $0", "=s"()
- %vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 5, i32 5>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 3, i32 5>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
ret void
}
-define void @s_shuffle_v3i64_v4i64__7_u_5() {
-; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_u_5:
+define void @s_shuffle_v3i64_v4i64__7_4_5() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_4_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
@@ -13598,6 +14259,8 @@ define void @s_shuffle_v3i64_v4i64__7_u_5() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s10
; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s10, s4
+; GFX900-NEXT: s_mov_b32 s11, s5
; GFX900-NEXT: s_mov_b32 s12, s6
; GFX900-NEXT: s_mov_b32 s13, s7
; GFX900-NEXT: ;;#ASMSTART
@@ -13605,7 +14268,7 @@ define void @s_shuffle_v3i64_v4i64__7_u_5() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_u_5:
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_4_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
@@ -13613,6 +14276,8 @@ define void @s_shuffle_v3i64_v4i64__7_u_5() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s10
; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s10, s4
+; GFX90A-NEXT: s_mov_b32 s11, s5
; GFX90A-NEXT: s_mov_b32 s12, s6
; GFX90A-NEXT: s_mov_b32 s13, s7
; GFX90A-NEXT: ;;#ASMSTART
@@ -13620,41 +14285,37 @@ define void @s_shuffle_v3i64_v4i64__7_u_5() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_u_5:
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_4_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 poison, i32 5>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 4, i32 5>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
ret void
}
-define void @s_shuffle_v3i64_v4i64__7_0_5() {
-; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_0_5:
+define void @s_shuffle_v3i64_v4i64__7_6_5() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_6_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s18
; GFX900-NEXT: s_mov_b32 s9, s19
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
+; GFX900-NEXT: s_mov_b32 s10, s16
+; GFX900-NEXT: s_mov_b32 s11, s17
; GFX900-NEXT: s_mov_b32 s12, s14
; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
@@ -13662,19 +14323,16 @@ define void @s_shuffle_v3i64_v4i64__7_0_5() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_0_5:
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_6_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s18
; GFX90A-NEXT: s_mov_b32 s9, s19
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
+; GFX90A-NEXT: s_mov_b32 s10, s16
+; GFX90A-NEXT: s_mov_b32 s11, s17
; GFX90A-NEXT: s_mov_b32 s12, s14
; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
@@ -13682,35 +14340,73 @@ define void @s_shuffle_v3i64_v4i64__7_0_5() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_0_5:
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_6_5:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x i64> asm "; def $0", "=s"()
+ %vec1 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 6, i32 5>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v3i64_v4i64__u_6_6() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__u_6_6:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__u_6_6:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__u_6_6:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[4:11]
+; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 0, i32 5>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 poison, i32 6, i32 6>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
ret void
}
-define void @s_shuffle_v3i64_v4i64__7_1_5() {
-; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_1_5:
+define void @s_shuffle_v3i64_v4i64__0_6_6() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__0_6_6:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
@@ -13719,16 +14415,16 @@ define void @s_shuffle_v3i64_v4i64__7_1_5() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s18
-; GFX900-NEXT: s_mov_b32 s9, s19
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: s_mov_b32 s10, s16
+; GFX900-NEXT: s_mov_b32 s11, s17
+; GFX900-NEXT: s_mov_b32 s12, s16
+; GFX900-NEXT: s_mov_b32 s13, s17
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_1_5:
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__0_6_6:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
@@ -13737,16 +14433,16 @@ define void @s_shuffle_v3i64_v4i64__7_1_5() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s18
-; GFX90A-NEXT: s_mov_b32 s9, s19
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: s_mov_b32 s10, s16
+; GFX90A-NEXT: s_mov_b32 s11, s17
+; GFX90A-NEXT: s_mov_b32 s12, s16
+; GFX90A-NEXT: s_mov_b32 s13, s17
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_1_5:
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__0_6_6:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -13755,360 +14451,300 @@ define void @s_shuffle_v3i64_v4i64__7_1_5() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 1, i32 5>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 0, i32 6, i32 6>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
ret void
}
-define void @s_shuffle_v3i64_v4i64__7_2_5() {
-; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_2_5:
+define void @s_shuffle_v3i64_v4i64__1_6_6() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__1_6_6:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s8, s6
+; GFX900-NEXT: s_mov_b32 s9, s7
; GFX900-NEXT: s_mov_b32 s10, s12
; GFX900-NEXT: s_mov_b32 s11, s13
-; GFX900-NEXT: s_mov_b32 s12, s6
-; GFX900-NEXT: s_mov_b32 s13, s7
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_2_5:
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__1_6_6:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s8, s6
+; GFX90A-NEXT: s_mov_b32 s9, s7
; GFX90A-NEXT: s_mov_b32 s10, s12
; GFX90A-NEXT: s_mov_b32 s11, s13
-; GFX90A-NEXT: s_mov_b32 s12, s6
-; GFX90A-NEXT: s_mov_b32 s13, s7
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_2_5:
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__1_6_6:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[12:19]
+; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s18
-; GFX942-NEXT: s_mov_b32 s9, s19
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 2, i32 5>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 1, i32 6, i32 6>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
ret void
}
-define void @s_shuffle_v3i64_v4i64__7_3_5() {
-; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_3_5:
+define void @s_shuffle_v3i64_v4i64__2_6_6() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__2_6_6:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s18
-; GFX900-NEXT: s_mov_b32 s9, s19
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: s_mov_b32 s8, s16
+; GFX900-NEXT: s_mov_b32 s9, s17
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_3_5:
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__2_6_6:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s18
-; GFX90A-NEXT: s_mov_b32 s9, s19
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: s_mov_b32 s8, s16
+; GFX90A-NEXT: s_mov_b32 s9, s17
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_3_5:
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__2_6_6:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[12:19]
+; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s18
-; GFX942-NEXT: s_mov_b32 s9, s19
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 3, i32 5>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 2, i32 6, i32 6>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
ret void
}
-define void @s_shuffle_v3i64_v4i64__7_4_5() {
-; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_4_5:
+define void @s_shuffle_v3i64_v4i64__3_6_6() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__3_6_6:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
-; GFX900-NEXT: s_mov_b32 s12, s6
-; GFX900-NEXT: s_mov_b32 s13, s7
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s18
+; GFX900-NEXT: s_mov_b32 s9, s19
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_4_5:
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__3_6_6:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
-; GFX90A-NEXT: s_mov_b32 s12, s6
-; GFX90A-NEXT: s_mov_b32 s13, s7
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s18
+; GFX90A-NEXT: s_mov_b32 s9, s19
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_4_5:
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__3_6_6:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 4, i32 5>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 3, i32 6, i32 6>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
ret void
}
-define void @s_shuffle_v3i64_v4i64__7_6_5() {
-; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_6_5:
+define void @s_shuffle_v3i64_v4i64__4_6_6() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__4_6_6:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s18
-; GFX900-NEXT: s_mov_b32 s9, s19
-; GFX900-NEXT: s_mov_b32 s10, s16
-; GFX900-NEXT: s_mov_b32 s11, s17
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_6_5:
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__4_6_6:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s18
-; GFX90A-NEXT: s_mov_b32 s9, s19
-; GFX90A-NEXT: s_mov_b32 s10, s16
-; GFX90A-NEXT: s_mov_b32 s11, s17
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_6_5:
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__4_6_6:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 6, i32 5>
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v3i64_v4i64__u_6_6() {
-; GFX9-LABEL: s_shuffle_v3i64_v4i64__u_6_6:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x i64> asm "; def $0", "=s"()
- %vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 poison, i32 6, i32 6>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 4, i32 6, i32 6>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
ret void
}
-define void @s_shuffle_v3i64_v4i64__0_6_6() {
-; GFX900-LABEL: s_shuffle_v3i64_v4i64__0_6_6:
+define void @s_shuffle_v3i64_v4i64__5_6_6() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__5_6_6:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s10, s16
-; GFX900-NEXT: s_mov_b32 s11, s17
-; GFX900-NEXT: s_mov_b32 s12, s16
-; GFX900-NEXT: s_mov_b32 s13, s17
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3i64_v4i64__0_6_6:
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__5_6_6:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s10, s16
-; GFX90A-NEXT: s_mov_b32 s11, s17
-; GFX90A-NEXT: s_mov_b32 s12, s16
-; GFX90A-NEXT: s_mov_b32 s13, s17
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3i64_v4i64__0_6_6:
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__5_6_6:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s4
-; GFX942-NEXT: s_mov_b32 s13, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 0, i32 6, i32 6>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 5, i32 6, i32 6>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
ret void
}
-define void @s_shuffle_v3i64_v4i64__1_6_6() {
-; GFX900-LABEL: s_shuffle_v3i64_v4i64__1_6_6:
+define void @s_shuffle_v3i64_v4i64__6_6_6() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__6_6_6:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s6
-; GFX900-NEXT: s_mov_b32 s9, s7
+; GFX900-NEXT: s_mov_b32 s8, s12
+; GFX900-NEXT: s_mov_b32 s9, s13
; GFX900-NEXT: s_mov_b32 s10, s12
; GFX900-NEXT: s_mov_b32 s11, s13
; GFX900-NEXT: ;;#ASMSTART
@@ -14116,17 +14752,14 @@ define void @s_shuffle_v3i64_v4i64__1_6_6() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3i64_v4i64__1_6_6:
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__6_6_6:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s6
-; GFX90A-NEXT: s_mov_b32 s9, s7
+; GFX90A-NEXT: s_mov_b32 s8, s12
+; GFX90A-NEXT: s_mov_b32 s9, s13
; GFX90A-NEXT: s_mov_b32 s10, s12
; GFX90A-NEXT: s_mov_b32 s11, s13
; GFX90A-NEXT: ;;#ASMSTART
@@ -14134,42 +14767,34 @@ define void @s_shuffle_v3i64_v4i64__1_6_6() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3i64_v4i64__1_6_6:
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__6_6_6:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 1, i32 6, i32 6>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 6, i32 6, i32 6>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
ret void
}
-define void @s_shuffle_v3i64_v4i64__2_6_6() {
-; GFX900-LABEL: s_shuffle_v3i64_v4i64__2_6_6:
+define void @s_shuffle_v3i64_v4i64__7_6_6() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_6_6:
; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
-; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s16
-; GFX900-NEXT: s_mov_b32 s9, s17
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
; GFX900-NEXT: s_mov_b32 s10, s12
; GFX900-NEXT: s_mov_b32 s11, s13
; GFX900-NEXT: ;;#ASMSTART
@@ -14177,17 +14802,14 @@ define void @s_shuffle_v3i64_v4i64__2_6_6() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3i64_v4i64__2_6_6:
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_6_6:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s16
-; GFX90A-NEXT: s_mov_b32 s9, s17
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
; GFX90A-NEXT: s_mov_b32 s10, s12
; GFX90A-NEXT: s_mov_b32 s11, s13
; GFX90A-NEXT: ;;#ASMSTART
@@ -14195,190 +14817,63 @@ define void @s_shuffle_v3i64_v4i64__2_6_6() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3i64_v4i64__2_6_6:
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_6_6:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 2, i32 6, i32 6>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 6, i32 6>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
ret void
}
-define void @s_shuffle_v3i64_v4i64__3_6_6() {
-; GFX900-LABEL: s_shuffle_v3i64_v4i64__3_6_6:
+define void @s_shuffle_v3i64_v4i64__7_u_6() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_u_6:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s18
-; GFX900-NEXT: s_mov_b32 s9, s19
-; GFX900-NEXT: s_mov_b32 s10, s12
-; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3i64_v4i64__3_6_6:
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_u_6:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s18
-; GFX90A-NEXT: s_mov_b32 s9, s19
-; GFX90A-NEXT: s_mov_b32 s10, s12
-; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3i64_v4i64__3_6_6:
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_u_6:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x i64> asm "; def $0", "=s"()
- %vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 3, i32 6, i32 6>
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v3i64_v4i64__4_6_6() {
-; GFX9-LABEL: s_shuffle_v3i64_v4i64__4_6_6:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x i64> asm "; def $0", "=s"()
- %vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 4, i32 6, i32 6>
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v3i64_v4i64__5_6_6() {
-; GFX9-LABEL: s_shuffle_v3i64_v4i64__5_6_6:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s10
-; GFX9-NEXT: s_mov_b32 s9, s11
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x i64> asm "; def $0", "=s"()
- %vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 5, i32 6, i32 6>
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v3i64_v4i64__6_6_6() {
-; GFX9-LABEL: s_shuffle_v3i64_v4i64__6_6_6:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s12
-; GFX9-NEXT: s_mov_b32 s9, s13
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x i64> asm "; def $0", "=s"()
- %vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 6, i32 6, i32 6>
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v3i64_v4i64__7_6_6() {
-; GFX9-LABEL: s_shuffle_v3i64_v4i64__7_6_6:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s14
-; GFX9-NEXT: s_mov_b32 s9, s15
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x i64> asm "; def $0", "=s"()
- %vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 6, i32 6>
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v3i64_v4i64__7_u_6() {
-; GFX9-LABEL: s_shuffle_v3i64_v4i64__7_u_6:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s14
-; GFX9-NEXT: s_mov_b32 s9, s15
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 poison, i32 6>
@@ -14432,10 +14927,8 @@ define void @s_shuffle_v3i64_v4i64__7_0_6() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -14493,10 +14986,8 @@ define void @s_shuffle_v3i64_v4i64__7_1_6() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s12, s4
-; GFX942-NEXT: s_mov_b32 s13, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -14554,10 +15045,8 @@ define void @s_shuffle_v3i64_v4i64__7_2_6() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -14615,10 +15104,8 @@ define void @s_shuffle_v3i64_v4i64__7_3_6() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -14671,12 +15158,9 @@ define void @s_shuffle_v3i64_v4i64__7_4_6() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s4
-; GFX942-NEXT: s_mov_b32 s13, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -14689,18 +15173,43 @@ define void @s_shuffle_v3i64_v4i64__7_4_6() {
}
define void @s_shuffle_v3i64_v4i64__7_5_6() {
-; GFX9-LABEL: s_shuffle_v3i64_v4i64__7_5_6:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s14
-; GFX9-NEXT: s_mov_b32 s9, s15
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_5_6:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_5_6:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_5_6:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 5, i32 6>
@@ -14745,10 +15254,8 @@ define void @s_shuffle_v3i64_v4i64__u_7_7() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -14806,10 +15313,8 @@ define void @s_shuffle_v3i64_v4i64__0_7_7() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -14871,12 +15376,9 @@ define void @s_shuffle_v3i64_v4i64__1_7_7() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -14934,12 +15436,9 @@ define void @s_shuffle_v3i64_v4i64__2_7_7() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -15001,12 +15500,9 @@ define void @s_shuffle_v3i64_v4i64__3_7_7() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -15019,20 +15515,48 @@ define void @s_shuffle_v3i64_v4i64__3_7_7() {
}
define void @s_shuffle_v3i64_v4i64__4_7_7() {
-; GFX9-LABEL: s_shuffle_v3i64_v4i64__4_7_7:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s14
-; GFX9-NEXT: s_mov_b32 s11, s15
-; GFX9-NEXT: s_mov_b32 s12, s14
-; GFX9-NEXT: s_mov_b32 s13, s15
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__4_7_7:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s14
+; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__4_7_7:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s14
+; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__4_7_7:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 4, i32 7, i32 7>
@@ -15081,12 +15605,9 @@ define void @s_shuffle_v3i64_v4i64__5_7_7() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -15139,12 +15660,9 @@ define void @s_shuffle_v3i64_v4i64__6_7_7() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -15193,10 +15711,8 @@ define void @s_shuffle_v3i64_v4i64__7_u_7() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -15258,12 +15774,9 @@ define void @s_shuffle_v3i64_v4i64__7_0_7() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -15321,10 +15834,8 @@ define void @s_shuffle_v3i64_v4i64__7_1_7() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -15386,12 +15897,9 @@ define void @s_shuffle_v3i64_v4i64__7_2_7() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -15449,12 +15957,9 @@ define void @s_shuffle_v3i64_v4i64__7_3_7() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -15507,12 +16012,9 @@ define void @s_shuffle_v3i64_v4i64__7_4_7() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -15525,20 +16027,48 @@ define void @s_shuffle_v3i64_v4i64__7_4_7() {
}
define void @s_shuffle_v3i64_v4i64__7_5_7() {
-; GFX9-LABEL: s_shuffle_v3i64_v4i64__7_5_7:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s14
-; GFX9-NEXT: s_mov_b32 s9, s15
-; GFX9-NEXT: s_mov_b32 s12, s14
-; GFX9-NEXT: s_mov_b32 s13, s15
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_5_7:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_5_7:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_5_7:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 5, i32 7>
@@ -15587,12 +16117,9 @@ define void @s_shuffle_v3i64_v4i64__7_6_7() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v3p0.v2p0.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v3p0.v2p0.ll
index fe132493ce536..fdba1e81224d0 100644
--- a/llvm/test/CodeGen/AMDGPU/shufflevector.v3p0.v2p0.ll
+++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v3p0.v2p0.ll
@@ -2266,8 +2266,7 @@ define void @s_shuffle_v3p0_v2p0__1_u_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -2325,8 +2324,7 @@ define void @s_shuffle_v3p0_v2p0__3_u_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -2384,10 +2382,8 @@ define void @s_shuffle_v3p0_v2p0__3_0_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -2441,8 +2437,7 @@ define void @s_shuffle_v3p0_v2p0__3_1_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -2491,10 +2486,8 @@ define void @s_shuffle_v3p0_v2p0__3_2_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -2507,18 +2500,43 @@ define void @s_shuffle_v3p0_v2p0__3_2_u() {
}
define void @s_shuffle_v3p0_v2p0__3_3_u() {
-; GFX9-LABEL: s_shuffle_v3p0_v2p0__3_3_u:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s10
-; GFX9-NEXT: s_mov_b32 s9, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v3p0_v2p0__3_3_u:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3p0_v2p0__3_3_u:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3p0_v2p0__3_3_u:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=s"()
%vec1 = call <2 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <3 x i32> <i32 3, i32 3, i32 poison>
@@ -2572,10 +2590,8 @@ define void @s_shuffle_v3p0_v2p0__3_3_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -2633,10 +2649,8 @@ define void @s_shuffle_v3p0_v2p0__3_3_1() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -2689,12 +2703,9 @@ define void @s_shuffle_v3p0_v2p0__3_3_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s2
-; GFX942-NEXT: s_mov_b32 s11, s3
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -2707,20 +2718,48 @@ define void @s_shuffle_v3p0_v2p0__3_3_2() {
}
define void @s_shuffle_v3p0_v2p0__3_3_3() {
-; GFX9-LABEL: s_shuffle_v3p0_v2p0__3_3_3:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s10
-; GFX9-NEXT: s_mov_b32 s9, s11
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v3p0_v2p0__3_3_3:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3p0_v2p0__3_3_3:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3p0_v2p0__3_3_3:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=s"()
%vec1 = call <2 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <3 x i32> <i32 3, i32 3, i32 3>
@@ -2765,10 +2804,8 @@ define void @s_shuffle_v3p0_v2p0__u_0_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -2780,20 +2817,48 @@ define void @s_shuffle_v3p0_v2p0__u_0_0() {
}
define void @s_shuffle_v3p0_v2p0__0_0_0() {
-; GFX9-LABEL: s_shuffle_v3p0_v2p0__0_0_0:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s8
-; GFX9-NEXT: s_mov_b32 s11, s9
-; GFX9-NEXT: s_mov_b32 s12, s8
-; GFX9-NEXT: s_mov_b32 s13, s9
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v3p0_v2p0__0_0_0:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s8
+; GFX900-NEXT: s_mov_b32 s11, s9
+; GFX900-NEXT: s_mov_b32 s12, s8
+; GFX900-NEXT: s_mov_b32 s13, s9
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3p0_v2p0__0_0_0:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s8
+; GFX90A-NEXT: s_mov_b32 s11, s9
+; GFX90A-NEXT: s_mov_b32 s12, s8
+; GFX90A-NEXT: s_mov_b32 s13, s9
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3p0_v2p0__0_0_0:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[8:9]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[8:9]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> poison, <3 x i32> zeroinitializer
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
@@ -2841,12 +2906,9 @@ define void @s_shuffle_v3p0_v2p0__1_0_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -2894,10 +2956,8 @@ define void @s_shuffle_v3p0_v2p0__2_0_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -2958,12 +3018,9 @@ define void @s_shuffle_v3p0_v2p0__3_0_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -3021,10 +3078,8 @@ define void @s_shuffle_v3p0_v2p0__3_u_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -3086,12 +3141,9 @@ define void @s_shuffle_v3p0_v2p0__3_1_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s2
-; GFX942-NEXT: s_mov_b32 s11, s3
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -3153,12 +3205,9 @@ define void @s_shuffle_v3p0_v2p0__3_2_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -3171,18 +3220,43 @@ define void @s_shuffle_v3p0_v2p0__3_2_0() {
}
define void @s_shuffle_v3p0_v2p0__u_1_1() {
-; GFX9-LABEL: s_shuffle_v3p0_v2p0__u_1_1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v3p0_v2p0__u_1_1:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3p0_v2p0__u_1_1:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3p0_v2p0__u_1_1:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> poison, <3 x i32> <i32 poison, i32 1, i32 1>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
@@ -3190,18 +3264,43 @@ define void @s_shuffle_v3p0_v2p0__u_1_1() {
}
define void @s_shuffle_v3p0_v2p0__0_1_1() {
-; GFX9-LABEL: s_shuffle_v3p0_v2p0__0_1_1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v3p0_v2p0__0_1_1:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3p0_v2p0__0_1_1:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3p0_v2p0__0_1_1:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> poison, <3 x i32> <i32 0, i32 1, i32 1>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
@@ -3209,20 +3308,48 @@ define void @s_shuffle_v3p0_v2p0__0_1_1() {
}
define void @s_shuffle_v3p0_v2p0__1_1_1() {
-; GFX9-LABEL: s_shuffle_v3p0_v2p0__1_1_1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s10
-; GFX9-NEXT: s_mov_b32 s9, s11
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v3p0_v2p0__1_1_1:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3p0_v2p0__1_1_1:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3p0_v2p0__1_1_1:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> poison, <3 x i32> <i32 1, i32 1, i32 1>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
@@ -3230,18 +3357,43 @@ define void @s_shuffle_v3p0_v2p0__1_1_1() {
}
define void @s_shuffle_v3p0_v2p0__2_1_1() {
-; GFX9-LABEL: s_shuffle_v3p0_v2p0__2_1_1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v3p0_v2p0__2_1_1:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3p0_v2p0__2_1_1:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3p0_v2p0__2_1_1:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> poison, <3 x i32> <i32 2, i32 1, i32 1>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
@@ -3294,10 +3446,8 @@ define void @s_shuffle_v3p0_v2p0__3_1_1() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s12, s10
-; GFX942-NEXT: s_mov_b32 s13, s11
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -3355,10 +3505,8 @@ define void @s_shuffle_v3p0_v2p0__3_u_1() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -3420,12 +3568,9 @@ define void @s_shuffle_v3p0_v2p0__3_0_1() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -3487,12 +3632,9 @@ define void @s_shuffle_v3p0_v2p0__3_2_1() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -3591,8 +3733,7 @@ define void @s_shuffle_v3p0_v2p0__1_2_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -3658,12 +3799,9 @@ define void @s_shuffle_v3p0_v2p0__3_2_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -3712,10 +3850,8 @@ define void @s_shuffle_v3p0_v2p0__3_u_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -3773,12 +3909,9 @@ define void @s_shuffle_v3p0_v2p0__3_0_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s4
-; GFX942-NEXT: s_mov_b32 s13, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -3836,10 +3969,8 @@ define void @s_shuffle_v3p0_v2p0__3_1_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -3852,18 +3983,43 @@ define void @s_shuffle_v3p0_v2p0__3_1_2() {
}
define void @s_shuffle_v3p0_v2p0__u_3_3() {
-; GFX9-LABEL: s_shuffle_v3p0_v2p0__u_3_3:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v3p0_v2p0__u_3_3:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3p0_v2p0__u_3_3:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3p0_v2p0__u_3_3:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=s"()
%vec1 = call <2 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <3 x i32> <i32 poison, i32 3, i32 3>
@@ -3917,10 +4073,8 @@ define void @s_shuffle_v3p0_v2p0__0_3_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s2
-; GFX942-NEXT: s_mov_b32 s11, s3
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
+; GFX942-NEXT: s_mov_b64 s[10:11], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -3978,10 +4132,8 @@ define void @s_shuffle_v3p0_v2p0__1_3_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s12, s10
-; GFX942-NEXT: s_mov_b32 s13, s11
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -3994,18 +4146,43 @@ define void @s_shuffle_v3p0_v2p0__1_3_3() {
}
define void @s_shuffle_v3p0_v2p0__2_3_3() {
-; GFX9-LABEL: s_shuffle_v3p0_v2p0__2_3_3:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v3p0_v2p0__2_3_3:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3p0_v2p0__2_3_3:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3p0_v2p0__2_3_3:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=s"()
%vec1 = call <2 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <3 x i32> <i32 2, i32 3, i32 3>
@@ -4050,10 +4227,8 @@ define void @s_shuffle_v3p0_v2p0__3_u_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -4115,12 +4290,9 @@ define void @s_shuffle_v3p0_v2p0__3_0_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -4178,10 +4350,8 @@ define void @s_shuffle_v3p0_v2p0__3_1_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -4234,12 +4404,9 @@ define void @s_shuffle_v3p0_v2p0__3_2_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v3p0.v3p0.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v3p0.v3p0.ll
index b6f4e3091b61f..422bcb5b4414a 100644
--- a/llvm/test/CodeGen/AMDGPU/shufflevector.v3p0.v3p0.ll
+++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v3p0.v3p0.ll
@@ -4720,8 +4720,7 @@ define void @s_shuffle_v3p0_v3p0__1_u_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -4761,8 +4760,7 @@ define void @s_shuffle_v3p0_v3p0__2_u_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -4820,8 +4818,7 @@ define void @s_shuffle_v3p0_v3p0__4_u_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -4862,8 +4859,7 @@ define void @s_shuffle_v3p0_v3p0__5_u_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -4918,11 +4914,11 @@ define void @s_shuffle_v3p0_v3p0__5_0_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s0
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -4972,8 +4968,7 @@ define void @s_shuffle_v3p0_v3p0__5_1_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -5027,10 +5022,8 @@ define void @s_shuffle_v3p0_v3p0__5_2_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -5075,10 +5068,8 @@ define void @s_shuffle_v3p0_v3p0__5_3_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -5091,18 +5082,43 @@ define void @s_shuffle_v3p0_v3p0__5_3_u() {
}
define void @s_shuffle_v3p0_v3p0__5_4_u() {
-; GFX9-LABEL: s_shuffle_v3p0_v3p0__5_4_u:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s12
-; GFX9-NEXT: s_mov_b32 s9, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v3p0_v3p0__5_4_u:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s12
+; GFX900-NEXT: s_mov_b32 s9, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3p0_v3p0__5_4_u:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s12
+; GFX90A-NEXT: s_mov_b32 s9, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3p0_v3p0__5_4_u:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=s"()
%vec1 = call <3 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <3 x i32> <i32 5, i32 4, i32 poison>
@@ -5147,10 +5163,8 @@ define void @s_shuffle_v3p0_v3p0__5_5_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -5212,12 +5226,9 @@ define void @s_shuffle_v3p0_v3p0__5_5_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -5279,12 +5290,9 @@ define void @s_shuffle_v3p0_v3p0__5_5_1() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -5342,10 +5350,8 @@ define void @s_shuffle_v3p0_v3p0__5_5_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -5394,12 +5400,9 @@ define void @s_shuffle_v3p0_v3p0__5_5_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -5452,12 +5455,9 @@ define void @s_shuffle_v3p0_v3p0__5_5_4() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -5470,20 +5470,48 @@ define void @s_shuffle_v3p0_v3p0__5_5_4() {
}
define void @s_shuffle_v3p0_v3p0__5_5_5() {
-; GFX9-LABEL: s_shuffle_v3p0_v3p0__5_5_5:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s12
-; GFX9-NEXT: s_mov_b32 s9, s13
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v3p0_v3p0__5_5_5:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s12
+; GFX900-NEXT: s_mov_b32 s9, s13
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3p0_v3p0__5_5_5:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s12
+; GFX90A-NEXT: s_mov_b32 s9, s13
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3p0_v3p0__5_5_5:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=s"()
%vec1 = call <3 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <3 x i32> <i32 5, i32 5, i32 5>
@@ -5528,10 +5556,8 @@ define void @s_shuffle_v3p0_v3p0__u_0_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -5543,20 +5569,48 @@ define void @s_shuffle_v3p0_v3p0__u_0_0() {
}
define void @s_shuffle_v3p0_v3p0__0_0_0() {
-; GFX9-LABEL: s_shuffle_v3p0_v3p0__0_0_0:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s8
-; GFX9-NEXT: s_mov_b32 s11, s9
-; GFX9-NEXT: s_mov_b32 s12, s8
-; GFX9-NEXT: s_mov_b32 s13, s9
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v3p0_v3p0__0_0_0:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s8
+; GFX900-NEXT: s_mov_b32 s11, s9
+; GFX900-NEXT: s_mov_b32 s12, s8
+; GFX900-NEXT: s_mov_b32 s13, s9
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3p0_v3p0__0_0_0:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s8
+; GFX90A-NEXT: s_mov_b32 s11, s9
+; GFX90A-NEXT: s_mov_b32 s12, s8
+; GFX90A-NEXT: s_mov_b32 s13, s9
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3p0_v3p0__0_0_0:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[8:9]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[8:9]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <3 x i32> zeroinitializer
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
@@ -5604,12 +5658,9 @@ define void @s_shuffle_v3p0_v3p0__1_0_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -5657,12 +5708,9 @@ define void @s_shuffle_v3p0_v3p0__2_0_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -5710,10 +5758,8 @@ define void @s_shuffle_v3p0_v3p0__3_0_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -5771,15 +5817,12 @@ define void @s_shuffle_v3p0_v3p0__4_0_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s0
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -5838,13 +5881,11 @@ define void @s_shuffle_v3p0_v3p0__5_0_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s0
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -5899,11 +5940,11 @@ define void @s_shuffle_v3p0_v3p0__5_u_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s0
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -5962,13 +6003,11 @@ define void @s_shuffle_v3p0_v3p0__5_1_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s2
+; GFX942-NEXT: s_mov_b64 s[10:11], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s11, s3
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -6022,12 +6061,9 @@ define void @s_shuffle_v3p0_v3p0__5_2_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -6086,13 +6122,11 @@ define void @s_shuffle_v3p0_v3p0__5_3_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s0
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -6150,10 +6184,8 @@ define void @s_shuffle_v3p0_v3p0__5_4_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -6166,116 +6198,12 @@ define void @s_shuffle_v3p0_v3p0__5_4_0() {
}
define void @s_shuffle_v3p0_v3p0__u_1_1() {
-; GFX9-LABEL: s_shuffle_v3p0_v3p0__u_1_1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <3 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <3 x i32> <i32 poison, i32 1, i32 1>
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v3p0_v3p0__0_1_1() {
-; GFX9-LABEL: s_shuffle_v3p0_v3p0__0_1_1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <3 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <3 x i32> <i32 0, i32 1, i32 1>
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v3p0_v3p0__1_1_1() {
-; GFX9-LABEL: s_shuffle_v3p0_v3p0__1_1_1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s10
-; GFX9-NEXT: s_mov_b32 s9, s11
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <3 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <3 x i32> <i32 1, i32 1, i32 1>
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v3p0_v3p0__2_1_1() {
-; GFX9-LABEL: s_shuffle_v3p0_v3p0__2_1_1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s12
-; GFX9-NEXT: s_mov_b32 s9, s13
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <3 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <3 x i32> <i32 2, i32 1, i32 1>
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v3p0_v3p0__3_1_1() {
-; GFX9-LABEL: s_shuffle_v3p0_v3p0__3_1_1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <3 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <3 x i32> <i32 3, i32 1, i32 1>
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v3p0_v3p0__4_1_1() {
-; GFX900-LABEL: s_shuffle_v3p0_v3p0__4_1_1:
+; GFX900-LABEL: s_shuffle_v3p0_v3p0__u_1_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:13]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s6
-; GFX900-NEXT: s_mov_b32 s9, s7
; GFX900-NEXT: s_mov_b32 s12, s10
; GFX900-NEXT: s_mov_b32 s13, s11
; GFX900-NEXT: ;;#ASMSTART
@@ -6283,17 +6211,12 @@ define void @s_shuffle_v3p0_v3p0__4_1_1() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3p0_v3p0__4_1_1:
+; GFX90A-LABEL: s_shuffle_v3p0_v3p0__u_1_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:13]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s6
-; GFX90A-NEXT: s_mov_b32 s9, s7
; GFX90A-NEXT: s_mov_b32 s12, s10
; GFX90A-NEXT: s_mov_b32 s13, s11
; GFX90A-NEXT: ;;#ASMSTART
@@ -6301,40 +6224,30 @@ define void @s_shuffle_v3p0_v3p0__4_1_1() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3p0_v3p0__4_1_1:
+; GFX942-LABEL: s_shuffle_v3p0_v3p0__u_1_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:13]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s12, s10
-; GFX942-NEXT: s_mov_b32 s13, s11
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=s"()
- %vec1 = call <3 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <3 x i32> <i32 4, i32 1, i32 1>
+ %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <3 x i32> <i32 poison, i32 1, i32 1>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v3p0_v3p0__5_1_1() {
-; GFX900-LABEL: s_shuffle_v3p0_v3p0__5_1_1:
+define void @s_shuffle_v3p0_v3p0__0_1_1() {
+; GFX900-LABEL: s_shuffle_v3p0_v3p0__0_1_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:13]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
-; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s12, s10
; GFX900-NEXT: s_mov_b32 s13, s11
; GFX900-NEXT: ;;#ASMSTART
@@ -6342,15 +6255,12 @@ define void @s_shuffle_v3p0_v3p0__5_1_1() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3p0_v3p0__5_1_1:
+; GFX90A-LABEL: s_shuffle_v3p0_v3p0__0_1_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:13]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
-; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s12, s10
; GFX90A-NEXT: s_mov_b32 s13, s11
; GFX90A-NEXT: ;;#ASMSTART
@@ -6358,60 +6268,309 @@ define void @s_shuffle_v3p0_v3p0__5_1_1() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3p0_v3p0__5_1_1:
+; GFX942-LABEL: s_shuffle_v3p0_v3p0__0_1_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:13]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s12, s10
-; GFX942-NEXT: s_mov_b32 s13, s11
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=s"()
- %vec1 = call <3 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <3 x i32> <i32 5, i32 1, i32 1>
+ %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <3 x i32> <i32 0, i32 1, i32 1>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v3p0_v3p0__5_u_1() {
-; GFX900-LABEL: s_shuffle_v3p0_v3p0__5_u_1:
+define void @s_shuffle_v3p0_v3p0__1_1_1() {
+; GFX900-LABEL: s_shuffle_v3p0_v3p0__1_1_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:13]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s12
-; GFX900-NEXT: s_mov_b32 s9, s13
-; GFX900-NEXT: s_mov_b32 s12, s6
-; GFX900-NEXT: s_mov_b32 s13, s7
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3p0_v3p0__5_u_1:
+; GFX90A-LABEL: s_shuffle_v3p0_v3p0__1_1_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:13]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s12
-; GFX90A-NEXT: s_mov_b32 s9, s13
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3p0_v3p0__1_1_1:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <3 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <3 x i32> <i32 1, i32 1, i32 1>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v3p0_v3p0__2_1_1() {
+; GFX900-LABEL: s_shuffle_v3p0_v3p0__2_1_1:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s12
+; GFX900-NEXT: s_mov_b32 s9, s13
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3p0_v3p0__2_1_1:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s12
+; GFX90A-NEXT: s_mov_b32 s9, s13
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3p0_v3p0__2_1_1:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <3 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <3 x i32> <i32 2, i32 1, i32 1>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v3p0_v3p0__3_1_1() {
+; GFX900-LABEL: s_shuffle_v3p0_v3p0__3_1_1:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3p0_v3p0__3_1_1:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3p0_v3p0__3_1_1:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <3 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <3 x i32> <i32 3, i32 1, i32 1>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v3p0_v3p0__4_1_1() {
+; GFX900-LABEL: s_shuffle_v3p0_v3p0__4_1_1:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:9]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s6
+; GFX900-NEXT: s_mov_b32 s9, s7
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3p0_v3p0__4_1_1:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:9]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s6
+; GFX90A-NEXT: s_mov_b32 s9, s7
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3p0_v3p0__4_1_1:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[0:5]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <3 x ptr> asm "; def $0", "=s"()
+ %vec1 = call <3 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <3 x i32> <i32 4, i32 1, i32 1>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v3p0_v3p0__5_1_1() {
+; GFX900-LABEL: s_shuffle_v3p0_v3p0__5_1_1:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:9]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3p0_v3p0__5_1_1:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:9]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3p0_v3p0__5_1_1:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[0:5]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <3 x ptr> asm "; def $0", "=s"()
+ %vec1 = call <3 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <3 x i32> <i32 5, i32 1, i32 1>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v3p0_v3p0__5_u_1() {
+; GFX900-LABEL: s_shuffle_v3p0_v3p0__5_u_1:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:9]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s12
+; GFX900-NEXT: s_mov_b32 s9, s13
+; GFX900-NEXT: s_mov_b32 s12, s6
+; GFX900-NEXT: s_mov_b32 s13, s7
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3p0_v3p0__5_u_1:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:9]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s12
+; GFX90A-NEXT: s_mov_b32 s9, s13
; GFX90A-NEXT: s_mov_b32 s12, s6
; GFX90A-NEXT: s_mov_b32 s13, s7
; GFX90A-NEXT: ;;#ASMSTART
@@ -6425,11 +6584,11 @@ define void @s_shuffle_v3p0_v3p0__5_u_1() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s2
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s13, s3
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -6488,13 +6647,11 @@ define void @s_shuffle_v3p0_v3p0__5_0_1() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s0
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -6552,12 +6709,9 @@ define void @s_shuffle_v3p0_v3p0__5_2_1() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -6577,217 +6731,344 @@ define void @s_shuffle_v3p0_v3p0__5_3_1() {
; GFX900-NEXT: ; def s[4:9]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:17]
+; GFX900-NEXT: ; def s[12:17]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s16
+; GFX900-NEXT: s_mov_b32 s9, s17
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s12, s6
+; GFX900-NEXT: s_mov_b32 s13, s7
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3p0_v3p0__5_3_1:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:9]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[12:17]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s16
+; GFX90A-NEXT: s_mov_b32 s9, s17
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s12, s6
+; GFX90A-NEXT: s_mov_b32 s13, s7
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3p0_v3p0__5_3_1:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[0:5]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[4:9]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <3 x ptr> asm "; def $0", "=s"()
+ %vec1 = call <3 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <3 x i32> <i32 5, i32 3, i32 1>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v3p0_v3p0__5_4_1() {
+; GFX900-LABEL: s_shuffle_v3p0_v3p0__5_4_1:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:9]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s12
+; GFX900-NEXT: s_mov_b32 s9, s13
+; GFX900-NEXT: s_mov_b32 s12, s6
+; GFX900-NEXT: s_mov_b32 s13, s7
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3p0_v3p0__5_4_1:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:9]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s12
+; GFX90A-NEXT: s_mov_b32 s9, s13
+; GFX90A-NEXT: s_mov_b32 s12, s6
+; GFX90A-NEXT: s_mov_b32 s13, s7
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3p0_v3p0__5_4_1:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[0:5]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <3 x ptr> asm "; def $0", "=s"()
+ %vec1 = call <3 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <3 x i32> <i32 5, i32 4, i32 1>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v3p0_v3p0__u_2_2() {
+; GFX900-LABEL: s_shuffle_v3p0_v3p0__u_2_2:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3p0_v3p0__u_2_2:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3p0_v3p0__u_2_2:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <3 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <3 x i32> <i32 poison, i32 2, i32 2>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v3p0_v3p0__0_2_2() {
+; GFX900-LABEL: s_shuffle_v3p0_v3p0__0_2_2:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3p0_v3p0__0_2_2:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3p0_v3p0__0_2_2:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <3 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <3 x i32> <i32 0, i32 2, i32 2>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v3p0_v3p0__1_2_2() {
+; GFX900-LABEL: s_shuffle_v3p0_v3p0__1_2_2:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3p0_v3p0__1_2_2:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3p0_v3p0__1_2_2:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <3 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <3 x i32> <i32 1, i32 2, i32 2>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v3p0_v3p0__2_2_2() {
+; GFX900-LABEL: s_shuffle_v3p0_v3p0__2_2_2:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s16
-; GFX900-NEXT: s_mov_b32 s9, s17
+; GFX900-NEXT: s_mov_b32 s8, s12
+; GFX900-NEXT: s_mov_b32 s9, s13
; GFX900-NEXT: s_mov_b32 s10, s12
; GFX900-NEXT: s_mov_b32 s11, s13
-; GFX900-NEXT: s_mov_b32 s12, s6
-; GFX900-NEXT: s_mov_b32 s13, s7
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3p0_v3p0__5_3_1:
+; GFX90A-LABEL: s_shuffle_v3p0_v3p0__2_2_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:17]
+; GFX90A-NEXT: ; def s[8:13]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s16
-; GFX90A-NEXT: s_mov_b32 s9, s17
+; GFX90A-NEXT: s_mov_b32 s8, s12
+; GFX90A-NEXT: s_mov_b32 s9, s13
; GFX90A-NEXT: s_mov_b32 s10, s12
; GFX90A-NEXT: s_mov_b32 s11, s13
-; GFX90A-NEXT: s_mov_b32 s12, s6
-; GFX90A-NEXT: s_mov_b32 s13, s7
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3p0_v3p0__5_3_1:
+; GFX942-LABEL: s_shuffle_v3p0_v3p0__2_2_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[4:9]
+; GFX942-NEXT: ; def s[8:13]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s13, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=s"()
- %vec1 = call <3 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <3 x i32> <i32 5, i32 3, i32 1>
+ %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <3 x i32> <i32 2, i32 2, i32 2>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v3p0_v3p0__5_4_1() {
-; GFX900-LABEL: s_shuffle_v3p0_v3p0__5_4_1:
+define void @s_shuffle_v3p0_v3p0__3_2_2() {
+; GFX900-LABEL: s_shuffle_v3p0_v3p0__3_2_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:13]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s12
-; GFX900-NEXT: s_mov_b32 s9, s13
-; GFX900-NEXT: s_mov_b32 s12, s6
-; GFX900-NEXT: s_mov_b32 s13, s7
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3p0_v3p0__5_4_1:
+; GFX90A-LABEL: s_shuffle_v3p0_v3p0__3_2_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:13]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s12
-; GFX90A-NEXT: s_mov_b32 s9, s13
-; GFX90A-NEXT: s_mov_b32 s12, s6
-; GFX90A-NEXT: s_mov_b32 s13, s7
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3p0_v3p0__5_4_1:
+; GFX942-LABEL: s_shuffle_v3p0_v3p0__3_2_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:13]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <3 x ptr> asm "; def $0", "=s"()
- %vec1 = call <3 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <3 x i32> <i32 5, i32 4, i32 1>
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v3p0_v3p0__u_2_2() {
-; GFX9-LABEL: s_shuffle_v3p0_v3p0__u_2_2:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <3 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <3 x i32> <i32 poison, i32 2, i32 2>
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v3p0_v3p0__0_2_2() {
-; GFX9-LABEL: s_shuffle_v3p0_v3p0__0_2_2:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <3 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <3 x i32> <i32 0, i32 2, i32 2>
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v3p0_v3p0__1_2_2() {
-; GFX9-LABEL: s_shuffle_v3p0_v3p0__1_2_2:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s10
-; GFX9-NEXT: s_mov_b32 s9, s11
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <3 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <3 x i32> <i32 1, i32 2, i32 2>
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v3p0_v3p0__2_2_2() {
-; GFX9-LABEL: s_shuffle_v3p0_v3p0__2_2_2:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s12
-; GFX9-NEXT: s_mov_b32 s9, s13
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <3 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <3 x i32> <i32 2, i32 2, i32 2>
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v3p0_v3p0__3_2_2() {
-; GFX9-LABEL: s_shuffle_v3p0_v3p0__3_2_2:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <3 x i32> <i32 3, i32 2, i32 2>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
@@ -6840,10 +7121,8 @@ define void @s_shuffle_v3p0_v3p0__4_2_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -6897,10 +7176,8 @@ define void @s_shuffle_v3p0_v3p0__5_2_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -6950,8 +7227,7 @@ define void @s_shuffle_v3p0_v3p0__5_u_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -7009,12 +7285,9 @@ define void @s_shuffle_v3p0_v3p0__5_0_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s4
-; GFX942-NEXT: s_mov_b32 s13, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -7064,8 +7337,7 @@ define void @s_shuffle_v3p0_v3p0__5_1_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -7119,10 +7391,8 @@ define void @s_shuffle_v3p0_v3p0__5_3_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -7180,10 +7450,8 @@ define void @s_shuffle_v3p0_v3p0__5_4_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s12, s4
-; GFX942-NEXT: s_mov_b32 s13, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -7282,8 +7550,7 @@ define void @s_shuffle_v3p0_v3p0__1_3_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -7323,8 +7590,7 @@ define void @s_shuffle_v3p0_v3p0__2_3_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -7390,12 +7656,9 @@ define void @s_shuffle_v3p0_v3p0__4_3_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -7444,12 +7707,9 @@ define void @s_shuffle_v3p0_v3p0__5_3_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -7494,10 +7754,8 @@ define void @s_shuffle_v3p0_v3p0__5_u_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -7552,13 +7810,11 @@ define void @s_shuffle_v3p0_v3p0__5_0_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s0
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s4
-; GFX942-NEXT: s_mov_b32 s13, s5
+; GFX942-NEXT: s_mov_b64 s[12:13], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -7612,10 +7868,8 @@ define void @s_shuffle_v3p0_v3p0__5_1_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -7673,10 +7927,8 @@ define void @s_shuffle_v3p0_v3p0__5_2_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[12:17]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s16
-; GFX942-NEXT: s_mov_b32 s9, s17
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[16:17]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -7725,12 +7977,9 @@ define void @s_shuffle_v3p0_v3p0__5_4_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s2
-; GFX942-NEXT: s_mov_b32 s11, s3
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -7743,18 +7992,43 @@ define void @s_shuffle_v3p0_v3p0__5_4_3() {
}
define void @s_shuffle_v3p0_v3p0__u_4_4() {
-; GFX9-LABEL: s_shuffle_v3p0_v3p0__u_4_4:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v3p0_v3p0__u_4_4:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3p0_v3p0__u_4_4:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3p0_v3p0__u_4_4:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=s"()
%vec1 = call <3 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <3 x i32> <i32 poison, i32 4, i32 4>
@@ -7808,10 +8082,8 @@ define void @s_shuffle_v3p0_v3p0__0_4_4() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s2
-; GFX942-NEXT: s_mov_b32 s11, s3
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
+; GFX942-NEXT: s_mov_b64 s[10:11], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -7869,10 +8141,8 @@ define void @s_shuffle_v3p0_v3p0__1_4_4() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s12, s10
-; GFX942-NEXT: s_mov_b32 s13, s11
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -7930,10 +8200,8 @@ define void @s_shuffle_v3p0_v3p0__2_4_4() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s12, s10
-; GFX942-NEXT: s_mov_b32 s13, s11
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -7946,18 +8214,43 @@ define void @s_shuffle_v3p0_v3p0__2_4_4() {
}
define void @s_shuffle_v3p0_v3p0__3_4_4() {
-; GFX9-LABEL: s_shuffle_v3p0_v3p0__3_4_4:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v3p0_v3p0__3_4_4:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3p0_v3p0__3_4_4:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3p0_v3p0__3_4_4:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=s"()
%vec1 = call <3 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <3 x i32> <i32 3, i32 4, i32 4>
@@ -7966,20 +8259,48 @@ define void @s_shuffle_v3p0_v3p0__3_4_4() {
}
define void @s_shuffle_v3p0_v3p0__4_4_4() {
-; GFX9-LABEL: s_shuffle_v3p0_v3p0__4_4_4:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s10
-; GFX9-NEXT: s_mov_b32 s9, s11
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v3p0_v3p0__4_4_4:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3p0_v3p0__4_4_4:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3p0_v3p0__4_4_4:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=s"()
%vec1 = call <3 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <3 x i32> <i32 4, i32 4, i32 4>
@@ -7988,20 +8309,48 @@ define void @s_shuffle_v3p0_v3p0__4_4_4() {
}
define void @s_shuffle_v3p0_v3p0__5_4_4() {
-; GFX9-LABEL: s_shuffle_v3p0_v3p0__5_4_4:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s12
-; GFX9-NEXT: s_mov_b32 s9, s13
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v3p0_v3p0__5_4_4:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s12
+; GFX900-NEXT: s_mov_b32 s9, s13
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3p0_v3p0__5_4_4:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s12
+; GFX90A-NEXT: s_mov_b32 s9, s13
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3p0_v3p0__5_4_4:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=s"()
%vec1 = call <3 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <3 x i32> <i32 5, i32 4, i32 4>
@@ -8042,10 +8391,8 @@ define void @s_shuffle_v3p0_v3p0__5_u_4() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -8104,13 +8451,11 @@ define void @s_shuffle_v3p0_v3p0__5_0_4() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s0
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
+; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -8164,10 +8509,8 @@ define void @s_shuffle_v3p0_v3p0__5_1_4() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -8225,12 +8568,9 @@ define void @s_shuffle_v3p0_v3p0__5_2_4() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s16
-; GFX942-NEXT: s_mov_b32 s9, s17
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
+; GFX942-NEXT: s_mov_b64 s[8:9], s[16:17]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -8279,12 +8619,9 @@ define void @s_shuffle_v3p0_v3p0__5_3_4() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -8297,18 +8634,43 @@ define void @s_shuffle_v3p0_v3p0__5_3_4() {
}
define void @s_shuffle_v3p0_v3p0__u_5_5() {
-; GFX9-LABEL: s_shuffle_v3p0_v3p0__u_5_5:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v3p0_v3p0__u_5_5:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3p0_v3p0__u_5_5:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3p0_v3p0__u_5_5:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=s"()
%vec1 = call <3 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <3 x i32> <i32 poison, i32 5, i32 5>
@@ -8362,10 +8724,8 @@ define void @s_shuffle_v3p0_v3p0__0_5_5() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s4
-; GFX942-NEXT: s_mov_b32 s13, s5
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -8423,10 +8783,8 @@ define void @s_shuffle_v3p0_v3p0__1_5_5() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -8484,10 +8842,8 @@ define void @s_shuffle_v3p0_v3p0__2_5_5() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -8500,18 +8856,43 @@ define void @s_shuffle_v3p0_v3p0__2_5_5() {
}
define void @s_shuffle_v3p0_v3p0__3_5_5() {
-; GFX9-LABEL: s_shuffle_v3p0_v3p0__3_5_5:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v3p0_v3p0__3_5_5:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3p0_v3p0__3_5_5:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3p0_v3p0__3_5_5:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=s"()
%vec1 = call <3 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <3 x i32> <i32 3, i32 5, i32 5>
@@ -8520,20 +8901,48 @@ define void @s_shuffle_v3p0_v3p0__3_5_5() {
}
define void @s_shuffle_v3p0_v3p0__4_5_5() {
-; GFX9-LABEL: s_shuffle_v3p0_v3p0__4_5_5:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s10
-; GFX9-NEXT: s_mov_b32 s9, s11
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v3p0_v3p0__4_5_5:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3p0_v3p0__4_5_5:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3p0_v3p0__4_5_5:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=s"()
%vec1 = call <3 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <3 x i32> <i32 4, i32 5, i32 5>
@@ -8542,18 +8951,43 @@ define void @s_shuffle_v3p0_v3p0__4_5_5() {
}
define void @s_shuffle_v3p0_v3p0__5_u_5() {
-; GFX9-LABEL: s_shuffle_v3p0_v3p0__5_u_5:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s12
-; GFX9-NEXT: s_mov_b32 s9, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v3p0_v3p0__5_u_5:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s12
+; GFX900-NEXT: s_mov_b32 s9, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3p0_v3p0__5_u_5:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s12
+; GFX90A-NEXT: s_mov_b32 s9, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3p0_v3p0__5_u_5:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=s"()
%vec1 = call <3 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <3 x i32> <i32 5, i32 poison, i32 5>
@@ -8607,10 +9041,8 @@ define void @s_shuffle_v3p0_v3p0__5_0_5() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -8668,10 +9100,8 @@ define void @s_shuffle_v3p0_v3p0__5_1_5() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s12, s4
-; GFX942-NEXT: s_mov_b32 s13, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -8729,10 +9159,8 @@ define void @s_shuffle_v3p0_v3p0__5_2_5() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -8785,12 +9213,9 @@ define void @s_shuffle_v3p0_v3p0__5_3_5() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s4
-; GFX942-NEXT: s_mov_b32 s13, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -8803,18 +9228,43 @@ define void @s_shuffle_v3p0_v3p0__5_3_5() {
}
define void @s_shuffle_v3p0_v3p0__5_4_5() {
-; GFX9-LABEL: s_shuffle_v3p0_v3p0__5_4_5:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s12
-; GFX9-NEXT: s_mov_b32 s9, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v3p0_v3p0__5_4_5:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s12
+; GFX900-NEXT: s_mov_b32 s9, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3p0_v3p0__5_4_5:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s12
+; GFX90A-NEXT: s_mov_b32 s9, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3p0_v3p0__5_4_5:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=s"()
%vec1 = call <3 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <3 x i32> <i32 5, i32 4, i32 5>
diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v3p0.v4p0.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v3p0.v4p0.ll
index b03066e66cf66..707633944e851 100644
--- a/llvm/test/CodeGen/AMDGPU/shufflevector.v3p0.v4p0.ll
+++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v3p0.v4p0.ll
@@ -8016,8 +8016,7 @@ define void @s_shuffle_v3p0_v4p0__1_u_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -8057,8 +8056,7 @@ define void @s_shuffle_v3p0_v4p0__2_u_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -8102,8 +8100,7 @@ define void @s_shuffle_v3p0_v4p0__3_u_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -8161,8 +8158,7 @@ define void @s_shuffle_v3p0_v4p0__5_u_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -8203,8 +8199,7 @@ define void @s_shuffle_v3p0_v4p0__6_u_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -8249,8 +8244,7 @@ define void @s_shuffle_v3p0_v4p0__7_u_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -8309,10 +8303,8 @@ define void @s_shuffle_v3p0_v4p0__7_0_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -8366,8 +8358,7 @@ define void @s_shuffle_v3p0_v4p0__7_1_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -8425,10 +8416,8 @@ define void @s_shuffle_v3p0_v4p0__7_2_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -8482,10 +8471,8 @@ define void @s_shuffle_v3p0_v4p0__7_3_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -8534,10 +8521,8 @@ define void @s_shuffle_v3p0_v4p0__7_4_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -8550,18 +8535,43 @@ define void @s_shuffle_v3p0_v4p0__7_4_u() {
}
define void @s_shuffle_v3p0_v4p0__7_5_u() {
-; GFX9-LABEL: s_shuffle_v3p0_v4p0__7_5_u:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s14
-; GFX9-NEXT: s_mov_b32 s9, s15
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_5_u:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_5_u:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_5_u:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 5, i32 poison>
@@ -8606,10 +8616,8 @@ define void @s_shuffle_v3p0_v4p0__7_6_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -8654,10 +8662,8 @@ define void @s_shuffle_v3p0_v4p0__7_7_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -8716,13 +8722,11 @@ define void @s_shuffle_v3p0_v4p0__7_7_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s0
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -8781,13 +8785,11 @@ define void @s_shuffle_v3p0_v4p0__7_7_1() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s2
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s13, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -8841,10 +8843,8 @@ define void @s_shuffle_v3p0_v4p0__7_7_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -8902,12 +8902,9 @@ define void @s_shuffle_v3p0_v4p0__7_7_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -8956,12 +8953,9 @@ define void @s_shuffle_v3p0_v4p0__7_7_4() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -9010,12 +9004,9 @@ define void @s_shuffle_v3p0_v4p0__7_7_5() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -9028,20 +9019,48 @@ define void @s_shuffle_v3p0_v4p0__7_7_5() {
}
define void @s_shuffle_v3p0_v4p0__7_7_6() {
-; GFX9-LABEL: s_shuffle_v3p0_v4p0__7_7_6:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s14
-; GFX9-NEXT: s_mov_b32 s9, s15
-; GFX9-NEXT: s_mov_b32 s10, s14
-; GFX9-NEXT: s_mov_b32 s11, s15
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_7_6:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s10, s14
+; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_7_6:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s10, s14
+; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_7_6:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 7, i32 6>
@@ -9090,12 +9109,9 @@ define void @s_shuffle_v3p0_v4p0__7_7_7() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -9144,10 +9160,8 @@ define void @s_shuffle_v3p0_v4p0__u_0_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -9159,20 +9173,48 @@ define void @s_shuffle_v3p0_v4p0__u_0_0() {
}
define void @s_shuffle_v3p0_v4p0__0_0_0() {
-; GFX9-LABEL: s_shuffle_v3p0_v4p0__0_0_0:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s8
-; GFX9-NEXT: s_mov_b32 s11, s9
-; GFX9-NEXT: s_mov_b32 s12, s8
-; GFX9-NEXT: s_mov_b32 s13, s9
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__0_0_0:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s8
+; GFX900-NEXT: s_mov_b32 s11, s9
+; GFX900-NEXT: s_mov_b32 s12, s8
+; GFX900-NEXT: s_mov_b32 s13, s9
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__0_0_0:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s8
+; GFX90A-NEXT: s_mov_b32 s11, s9
+; GFX90A-NEXT: s_mov_b32 s12, s8
+; GFX90A-NEXT: s_mov_b32 s13, s9
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__0_0_0:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[8:9]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[8:9]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> zeroinitializer
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
@@ -9220,12 +9262,9 @@ define void @s_shuffle_v3p0_v4p0__1_0_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -9273,12 +9312,9 @@ define void @s_shuffle_v3p0_v4p0__2_0_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -9330,12 +9366,9 @@ define void @s_shuffle_v3p0_v4p0__3_0_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -9383,10 +9416,8 @@ define void @s_shuffle_v3p0_v4p0__4_0_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -9444,15 +9475,12 @@ define void @s_shuffle_v3p0_v4p0__5_0_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s0
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -9511,13 +9539,11 @@ define void @s_shuffle_v3p0_v4p0__6_0_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s0
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -9576,15 +9602,12 @@ define void @s_shuffle_v3p0_v4p0__7_0_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s0
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -9639,13 +9662,11 @@ define void @s_shuffle_v3p0_v4p0__7_u_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s0
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -9704,15 +9725,12 @@ define void @s_shuffle_v3p0_v4p0__7_1_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s0
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s10, s2
-; GFX942-NEXT: s_mov_b32 s11, s3
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -9770,12 +9788,9 @@ define void @s_shuffle_v3p0_v4p0__7_2_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -9833,12 +9848,9 @@ define void @s_shuffle_v3p0_v4p0__7_3_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -9897,15 +9909,12 @@ define void @s_shuffle_v3p0_v4p0__7_4_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s0
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -9963,10 +9972,8 @@ define void @s_shuffle_v3p0_v4p0__7_5_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -10028,12 +10035,9 @@ define void @s_shuffle_v3p0_v4p0__7_6_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -10046,137 +10050,12 @@ define void @s_shuffle_v3p0_v4p0__7_6_0() {
}
define void @s_shuffle_v3p0_v4p0__u_1_1() {
-; GFX9-LABEL: s_shuffle_v3p0_v4p0__u_1_1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> <i32 poison, i32 1, i32 1>
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v3p0_v4p0__0_1_1() {
-; GFX9-LABEL: s_shuffle_v3p0_v4p0__0_1_1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> <i32 0, i32 1, i32 1>
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v3p0_v4p0__1_1_1() {
-; GFX9-LABEL: s_shuffle_v3p0_v4p0__1_1_1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s10
-; GFX9-NEXT: s_mov_b32 s9, s11
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> <i32 1, i32 1, i32 1>
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v3p0_v4p0__2_1_1() {
-; GFX9-LABEL: s_shuffle_v3p0_v4p0__2_1_1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s12
-; GFX9-NEXT: s_mov_b32 s9, s13
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> <i32 2, i32 1, i32 1>
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v3p0_v4p0__3_1_1() {
-; GFX9-LABEL: s_shuffle_v3p0_v4p0__3_1_1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s14
-; GFX9-NEXT: s_mov_b32 s9, s15
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> <i32 3, i32 1, i32 1>
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v3p0_v4p0__4_1_1() {
-; GFX9-LABEL: s_shuffle_v3p0_v4p0__4_1_1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> <i32 4, i32 1, i32 1>
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v3p0_v4p0__5_1_1() {
-; GFX900-LABEL: s_shuffle_v3p0_v4p0__5_1_1:
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__u_1_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s14
-; GFX900-NEXT: s_mov_b32 s9, s15
; GFX900-NEXT: s_mov_b32 s12, s10
; GFX900-NEXT: s_mov_b32 s13, s11
; GFX900-NEXT: ;;#ASMSTART
@@ -10184,17 +10063,12 @@ define void @s_shuffle_v3p0_v4p0__5_1_1() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3p0_v4p0__5_1_1:
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__u_1_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s14
-; GFX90A-NEXT: s_mov_b32 s9, s15
; GFX90A-NEXT: s_mov_b32 s12, s10
; GFX90A-NEXT: s_mov_b32 s13, s11
; GFX90A-NEXT: ;;#ASMSTART
@@ -10202,42 +10076,30 @@ define void @s_shuffle_v3p0_v4p0__5_1_1() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3p0_v4p0__5_1_1:
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__u_1_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s12, s10
-; GFX942-NEXT: s_mov_b32 s13, s11
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 5, i32 1, i32 1>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> <i32 poison, i32 1, i32 1>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v3p0_v4p0__6_1_1() {
-; GFX900-LABEL: s_shuffle_v3p0_v4p0__6_1_1:
+define void @s_shuffle_v3p0_v4p0__0_1_1() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__0_1_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s16
-; GFX900-NEXT: s_mov_b32 s9, s17
; GFX900-NEXT: s_mov_b32 s12, s10
; GFX900-NEXT: s_mov_b32 s13, s11
; GFX900-NEXT: ;;#ASMSTART
@@ -10245,17 +10107,12 @@ define void @s_shuffle_v3p0_v4p0__6_1_1() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3p0_v4p0__6_1_1:
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__0_1_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s16
-; GFX90A-NEXT: s_mov_b32 s9, s17
; GFX90A-NEXT: s_mov_b32 s12, s10
; GFX90A-NEXT: s_mov_b32 s13, s11
; GFX90A-NEXT: ;;#ASMSTART
@@ -10263,42 +10120,32 @@ define void @s_shuffle_v3p0_v4p0__6_1_1() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3p0_v4p0__6_1_1:
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__0_1_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s12, s10
-; GFX942-NEXT: s_mov_b32 s13, s11
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 6, i32 1, i32 1>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> <i32 0, i32 1, i32 1>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v3p0_v4p0__7_1_1() {
-; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_1_1:
+define void @s_shuffle_v3p0_v4p0__1_1_1() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__1_1_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s18
-; GFX900-NEXT: s_mov_b32 s9, s19
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
; GFX900-NEXT: s_mov_b32 s12, s10
; GFX900-NEXT: s_mov_b32 s13, s11
; GFX900-NEXT: ;;#ASMSTART
@@ -10306,17 +10153,14 @@ define void @s_shuffle_v3p0_v4p0__7_1_1() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_1_1:
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__1_1_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s18
-; GFX90A-NEXT: s_mov_b32 s9, s19
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
; GFX90A-NEXT: s_mov_b32 s12, s10
; GFX90A-NEXT: s_mov_b32 s13, s11
; GFX90A-NEXT: ;;#ASMSTART
@@ -10324,200 +10168,204 @@ define void @s_shuffle_v3p0_v4p0__7_1_1() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_1_1:
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__1_1_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s12, s10
-; GFX942-NEXT: s_mov_b32 s13, s11
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 1, i32 1>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> <i32 1, i32 1, i32 1>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v3p0_v4p0__7_u_1() {
-; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_u_1:
+define void @s_shuffle_v3p0_v4p0__2_1_1() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__2_1_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s14
-; GFX900-NEXT: s_mov_b32 s9, s15
-; GFX900-NEXT: s_mov_b32 s12, s6
-; GFX900-NEXT: s_mov_b32 s13, s7
+; GFX900-NEXT: s_mov_b32 s8, s12
+; GFX900-NEXT: s_mov_b32 s9, s13
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_u_1:
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__2_1_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s14
-; GFX90A-NEXT: s_mov_b32 s9, s15
-; GFX90A-NEXT: s_mov_b32 s12, s6
-; GFX90A-NEXT: s_mov_b32 s13, s7
+; GFX90A-NEXT: s_mov_b32 s8, s12
+; GFX90A-NEXT: s_mov_b32 s9, s13
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_u_1:
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__2_1_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[4:11]
+; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s13, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 poison, i32 1>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> <i32 2, i32 1, i32 1>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v3p0_v4p0__7_0_1() {
-; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_0_1:
+define void @s_shuffle_v3p0_v4p0__3_1_1() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__3_1_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s14
; GFX900-NEXT: s_mov_b32 s9, s15
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
-; GFX900-NEXT: s_mov_b32 s12, s6
-; GFX900-NEXT: s_mov_b32 s13, s7
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_0_1:
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__3_1_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s14
; GFX90A-NEXT: s_mov_b32 s9, s15
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
-; GFX90A-NEXT: s_mov_b32 s12, s6
-; GFX90A-NEXT: s_mov_b32 s13, s7
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_0_1:
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__3_1_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s2
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[4:11]
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> <i32 3, i32 1, i32 1>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v3p0_v4p0__4_1_1() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__4_1_1:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__4_1_1:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__4_1_1:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s13, s3
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 0, i32 1>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> <i32 4, i32 1, i32 1>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v3p0_v4p0__7_2_1() {
-; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_2_1:
+define void @s_shuffle_v3p0_v4p0__5_1_1() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__5_1_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s10, s16
-; GFX900-NEXT: s_mov_b32 s11, s17
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_2_1:
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__5_1_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s10, s16
-; GFX90A-NEXT: s_mov_b32 s11, s17
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_2_1:
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__5_1_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -10526,61 +10374,57 @@ define void @s_shuffle_v3p0_v4p0__7_2_1() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 2, i32 1>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 5, i32 1, i32 1>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v3p0_v4p0__7_3_1() {
-; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_3_1:
+define void @s_shuffle_v3p0_v4p0__6_1_1() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__6_1_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s18
-; GFX900-NEXT: s_mov_b32 s9, s19
-; GFX900-NEXT: s_mov_b32 s12, s6
-; GFX900-NEXT: s_mov_b32 s13, s7
+; GFX900-NEXT: s_mov_b32 s8, s16
+; GFX900-NEXT: s_mov_b32 s9, s17
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_3_1:
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__6_1_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s18
-; GFX90A-NEXT: s_mov_b32 s9, s19
-; GFX90A-NEXT: s_mov_b32 s12, s6
-; GFX90A-NEXT: s_mov_b32 s13, s7
+; GFX90A-NEXT: s_mov_b32 s8, s16
+; GFX90A-NEXT: s_mov_b32 s9, s17
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_3_1:
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__6_1_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -10589,92 +10433,80 @@ define void @s_shuffle_v3p0_v4p0__7_3_1() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 3, i32 1>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 6, i32 1, i32 1>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v3p0_v4p0__7_4_1() {
-; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_4_1:
+define void @s_shuffle_v3p0_v4p0__7_1_1() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_1_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s18
; GFX900-NEXT: s_mov_b32 s9, s19
-; GFX900-NEXT: s_mov_b32 s10, s12
-; GFX900-NEXT: s_mov_b32 s11, s13
-; GFX900-NEXT: s_mov_b32 s12, s6
-; GFX900-NEXT: s_mov_b32 s13, s7
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_4_1:
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_1_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s18
; GFX90A-NEXT: s_mov_b32 s9, s19
-; GFX90A-NEXT: s_mov_b32 s10, s12
-; GFX90A-NEXT: s_mov_b32 s11, s13
-; GFX90A-NEXT: s_mov_b32 s12, s6
-; GFX90A-NEXT: s_mov_b32 s13, s7
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_4_1:
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_1_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s2
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[4:11]
+; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s13, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 4, i32 1>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 1, i32 1>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v3p0_v4p0__7_5_1() {
-; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_5_1:
+define void @s_shuffle_v3p0_v4p0__7_u_1() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_u_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
@@ -10692,7 +10524,7 @@ define void @s_shuffle_v3p0_v4p0__7_5_1() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_5_1:
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_u_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
@@ -10710,32 +10542,30 @@ define void @s_shuffle_v3p0_v4p0__7_5_1() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_5_1:
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_u_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 5, i32 1>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 poison, i32 1>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v3p0_v4p0__7_6_1() {
-; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_6_1:
+define void @s_shuffle_v3p0_v4p0__7_0_1() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_0_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
@@ -10746,8 +10576,8 @@ define void @s_shuffle_v3p0_v4p0__7_6_1() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s14
; GFX900-NEXT: s_mov_b32 s9, s15
-; GFX900-NEXT: s_mov_b32 s10, s12
-; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s10, s4
+; GFX900-NEXT: s_mov_b32 s11, s5
; GFX900-NEXT: s_mov_b32 s12, s6
; GFX900-NEXT: s_mov_b32 s13, s7
; GFX900-NEXT: ;;#ASMSTART
@@ -10755,7 +10585,7 @@ define void @s_shuffle_v3p0_v4p0__7_6_1() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_6_1:
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_0_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
@@ -10766,8 +10596,8 @@ define void @s_shuffle_v3p0_v4p0__7_6_1() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s14
; GFX90A-NEXT: s_mov_b32 s9, s15
-; GFX90A-NEXT: s_mov_b32 s10, s12
-; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s10, s4
+; GFX90A-NEXT: s_mov_b32 s11, s5
; GFX90A-NEXT: s_mov_b32 s12, s6
; GFX90A-NEXT: s_mov_b32 s13, s7
; GFX90A-NEXT: ;;#ASMSTART
@@ -10775,190 +10605,131 @@ define void @s_shuffle_v3p0_v4p0__7_6_1() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_6_1:
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_0_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 6, i32 1>
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v3p0_v4p0__u_2_2() {
-; GFX9-LABEL: s_shuffle_v3p0_v4p0__u_2_2:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> <i32 poison, i32 2, i32 2>
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v3p0_v4p0__0_2_2() {
-; GFX9-LABEL: s_shuffle_v3p0_v4p0__0_2_2:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> <i32 0, i32 2, i32 2>
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v3p0_v4p0__1_2_2() {
-; GFX9-LABEL: s_shuffle_v3p0_v4p0__1_2_2:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s10
-; GFX9-NEXT: s_mov_b32 s9, s11
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> <i32 1, i32 2, i32 2>
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v3p0_v4p0__2_2_2() {
-; GFX9-LABEL: s_shuffle_v3p0_v4p0__2_2_2:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s12
-; GFX9-NEXT: s_mov_b32 s9, s13
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> <i32 2, i32 2, i32 2>
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v3p0_v4p0__3_2_2() {
-; GFX9-LABEL: s_shuffle_v3p0_v4p0__3_2_2:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s14
-; GFX9-NEXT: s_mov_b32 s9, s15
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> <i32 3, i32 2, i32 2>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 0, i32 1>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v3p0_v4p0__4_2_2() {
-; GFX9-LABEL: s_shuffle_v3p0_v4p0__4_2_2:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+define void @s_shuffle_v3p0_v4p0__7_2_1() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_2_1:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s10, s16
+; GFX900-NEXT: s_mov_b32 s11, s17
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_2_1:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s10, s16
+; GFX90A-NEXT: s_mov_b32 s11, s17
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_2_1:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> <i32 4, i32 2, i32 2>
+ %vec1 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 2, i32 1>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v3p0_v4p0__5_2_2() {
-; GFX900-LABEL: s_shuffle_v3p0_v4p0__5_2_2:
+define void @s_shuffle_v3p0_v4p0__7_3_1() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_3_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s6
-; GFX900-NEXT: s_mov_b32 s9, s7
-; GFX900-NEXT: s_mov_b32 s10, s12
-; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s8, s18
+; GFX900-NEXT: s_mov_b32 s9, s19
+; GFX900-NEXT: s_mov_b32 s12, s6
+; GFX900-NEXT: s_mov_b32 s13, s7
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3p0_v4p0__5_2_2:
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_3_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s6
-; GFX90A-NEXT: s_mov_b32 s9, s7
-; GFX90A-NEXT: s_mov_b32 s10, s12
-; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s8, s18
+; GFX90A-NEXT: s_mov_b32 s9, s19
+; GFX90A-NEXT: s_mov_b32 s12, s6
+; GFX90A-NEXT: s_mov_b32 s13, s7
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3p0_v4p0__5_2_2:
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_3_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -10967,295 +10738,1418 @@ define void @s_shuffle_v3p0_v4p0__5_2_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 5, i32 2, i32 2>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 3, i32 1>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v3p0_v4p0__6_2_2() {
-; GFX900-LABEL: s_shuffle_v3p0_v4p0__6_2_2:
+define void @s_shuffle_v3p0_v4p0__7_4_1() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_4_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s18
+; GFX900-NEXT: s_mov_b32 s9, s19
; GFX900-NEXT: s_mov_b32 s10, s12
; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s12, s6
+; GFX900-NEXT: s_mov_b32 s13, s7
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3p0_v4p0__6_2_2:
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_4_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s18
+; GFX90A-NEXT: s_mov_b32 s9, s19
; GFX90A-NEXT: s_mov_b32 s10, s12
; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s12, s6
+; GFX90A-NEXT: s_mov_b32 s13, s7
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3p0_v4p0__6_2_2:
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_4_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 6, i32 2, i32 2>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 4, i32 1>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v3p0_v4p0__7_2_2() {
-; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_2_2:
+define void @s_shuffle_v3p0_v4p0__7_5_1() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_5_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s10, s12
-; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s12, s6
+; GFX900-NEXT: s_mov_b32 s13, s7
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_2_2:
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_5_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s12, s6
+; GFX90A-NEXT: s_mov_b32 s13, s7
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s10, s12
-; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_5_1:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x ptr> asm "; def $0", "=s"()
+ %vec1 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 5, i32 1>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v3p0_v4p0__7_6_1() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_6_1:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s12, s6
+; GFX900-NEXT: s_mov_b32 s13, s7
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_6_1:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s12, s6
+; GFX90A-NEXT: s_mov_b32 s13, s7
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_6_1:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x ptr> asm "; def $0", "=s"()
+ %vec1 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 6, i32 1>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v3p0_v4p0__u_2_2() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__u_2_2:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__u_2_2:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__u_2_2:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> <i32 poison, i32 2, i32 2>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v3p0_v4p0__0_2_2() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__0_2_2:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__0_2_2:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__0_2_2:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> <i32 0, i32 2, i32 2>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v3p0_v4p0__1_2_2() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__1_2_2:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__1_2_2:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__1_2_2:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> <i32 1, i32 2, i32 2>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v3p0_v4p0__2_2_2() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__2_2_2:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s12
+; GFX900-NEXT: s_mov_b32 s9, s13
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__2_2_2:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s12
+; GFX90A-NEXT: s_mov_b32 s9, s13
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__2_2_2:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> <i32 2, i32 2, i32 2>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v3p0_v4p0__3_2_2() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__3_2_2:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__3_2_2:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__3_2_2:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> <i32 3, i32 2, i32 2>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v3p0_v4p0__4_2_2() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__4_2_2:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__4_2_2:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__4_2_2:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> <i32 4, i32 2, i32 2>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v3p0_v4p0__5_2_2() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__5_2_2:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s6
+; GFX900-NEXT: s_mov_b32 s9, s7
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__5_2_2:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s6
+; GFX90A-NEXT: s_mov_b32 s9, s7
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__5_2_2:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x ptr> asm "; def $0", "=s"()
+ %vec1 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 5, i32 2, i32 2>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v3p0_v4p0__6_2_2() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__6_2_2:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__6_2_2:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__6_2_2:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x ptr> asm "; def $0", "=s"()
+ %vec1 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 6, i32 2, i32 2>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v3p0_v4p0__7_2_2() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_2_2:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_2_2:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_2_2:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x ptr> asm "; def $0", "=s"()
+ %vec1 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 2, i32 2>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v3p0_v4p0__7_u_2() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_u_2:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_u_2:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_u_2:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x ptr> asm "; def $0", "=s"()
+ %vec1 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 poison, i32 2>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v3p0_v4p0__7_0_2() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_0_2:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s12, s16
+; GFX900-NEXT: s_mov_b32 s13, s17
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_0_2:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s12, s16
+; GFX90A-NEXT: s_mov_b32 s13, s17
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_0_2:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[4:5]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x ptr> asm "; def $0", "=s"()
+ %vec1 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 0, i32 2>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v3p0_v4p0__7_1_2() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_1_2:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[16:23]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s22
+; GFX900-NEXT: s_mov_b32 s9, s23
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_1_2:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[16:23]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s22
+; GFX90A-NEXT: s_mov_b32 s9, s23
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_1_2:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x ptr> asm "; def $0", "=s"()
+ %vec1 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 1, i32 2>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v3p0_v4p0__7_3_2() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_3_2:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s10, s14
+; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_3_2:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s10, s14
+; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_3_2:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x ptr> asm "; def $0", "=s"()
+ %vec1 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 3, i32 2>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v3p0_v4p0__7_4_2() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_4_2:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s10, s4
+; GFX900-NEXT: s_mov_b32 s11, s5
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_4_2:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s10, s4
+; GFX90A-NEXT: s_mov_b32 s11, s5
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_4_2:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x ptr> asm "; def $0", "=s"()
+ %vec1 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 4, i32 2>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v3p0_v4p0__7_5_2() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_5_2:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s12, s16
+; GFX900-NEXT: s_mov_b32 s13, s17
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_5_2:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s12, s16
+; GFX90A-NEXT: s_mov_b32 s13, s17
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_5_2:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[4:5]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x ptr> asm "; def $0", "=s"()
+ %vec1 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 5, i32 2>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v3p0_v4p0__7_6_2() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_6_2:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[16:23]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s22
+; GFX900-NEXT: s_mov_b32 s9, s23
+; GFX900-NEXT: s_mov_b32 s10, s20
+; GFX900-NEXT: s_mov_b32 s11, s21
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_6_2:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[16:23]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s22
+; GFX90A-NEXT: s_mov_b32 s9, s23
+; GFX90A-NEXT: s_mov_b32 s10, s20
+; GFX90A-NEXT: s_mov_b32 s11, s21
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_6_2:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x ptr> asm "; def $0", "=s"()
+ %vec1 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 6, i32 2>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v3p0_v4p0__u_3_3() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__u_3_3:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s14
+; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__u_3_3:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s14
+; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__u_3_3:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> <i32 poison, i32 3, i32 3>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v3p0_v4p0__0_3_3() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__0_3_3:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s14
+; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__0_3_3:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s14
+; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__0_3_3:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> <i32 0, i32 3, i32 3>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v3p0_v4p0__1_3_3() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__1_3_3:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s10, s14
+; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__1_3_3:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s10, s14
+; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__1_3_3:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> <i32 1, i32 3, i32 3>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v3p0_v4p0__2_3_3() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__2_3_3:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s12
+; GFX900-NEXT: s_mov_b32 s9, s13
+; GFX900-NEXT: s_mov_b32 s10, s14
+; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__2_3_3:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s12
+; GFX90A-NEXT: s_mov_b32 s9, s13
+; GFX90A-NEXT: s_mov_b32 s10, s14
+; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_2_2:
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__2_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 2, i32 2>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> <i32 2, i32 3, i32 3>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v3p0_v4p0__7_u_2() {
-; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_u_2:
+define void @s_shuffle_v3p0_v4p0__3_3_3() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__3_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s10, s14
+; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_u_2:
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__3_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s10, s14
+; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_u_2:
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__3_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 poison, i32 2>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> <i32 3, i32 3, i32 3>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v3p0_v4p0__7_0_2() {
-; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_0_2:
+define void @s_shuffle_v3p0_v4p0__4_3_3() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__4_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s10, s12
-; GFX900-NEXT: s_mov_b32 s11, s13
-; GFX900-NEXT: s_mov_b32 s12, s16
-; GFX900-NEXT: s_mov_b32 s13, s17
+; GFX900-NEXT: s_mov_b32 s10, s14
+; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_0_2:
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__4_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s10, s12
-; GFX90A-NEXT: s_mov_b32 s11, s13
-; GFX90A-NEXT: s_mov_b32 s12, s16
-; GFX90A-NEXT: s_mov_b32 s13, s17
+; GFX90A-NEXT: s_mov_b32 s10, s14
+; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_0_2:
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__4_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s4
-; GFX942-NEXT: s_mov_b32 s13, s5
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 0, i32 2>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> <i32 4, i32 3, i32 3>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v3p0_v4p0__7_1_2() {
-; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_1_2:
+define void @s_shuffle_v3p0_v4p0__5_3_3() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__5_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[16:23]
+; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s22
-; GFX900-NEXT: s_mov_b32 s9, s23
+; GFX900-NEXT: s_mov_b32 s8, s6
+; GFX900-NEXT: s_mov_b32 s9, s7
+; GFX900-NEXT: s_mov_b32 s10, s14
+; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_1_2:
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__5_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[16:23]
+; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s22
-; GFX90A-NEXT: s_mov_b32 s9, s23
+; GFX90A-NEXT: s_mov_b32 s8, s6
+; GFX90A-NEXT: s_mov_b32 s9, s7
+; GFX90A-NEXT: s_mov_b32 s10, s14
+; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_1_2:
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__5_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -11264,21 +12158,22 @@ define void @s_shuffle_v3p0_v4p0__7_1_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 1, i32 2>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 5, i32 3, i32 3>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v3p0_v4p0__7_3_2() {
-; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_3_2:
+define void @s_shuffle_v3p0_v4p0__6_3_3() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__6_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
@@ -11287,16 +12182,16 @@ define void @s_shuffle_v3p0_v4p0__7_3_2() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
; GFX900-NEXT: s_mov_b32 s10, s14
; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_3_2:
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__6_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
@@ -11305,16 +12200,16 @@ define void @s_shuffle_v3p0_v4p0__7_3_2() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
; GFX90A-NEXT: s_mov_b32 s10, s14
; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_3_2:
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__6_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -11323,23 +12218,22 @@ define void @s_shuffle_v3p0_v4p0__7_3_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 3, i32 2>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 6, i32 3, i32 3>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v3p0_v4p0__7_4_2() {
-; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_4_2:
+define void @s_shuffle_v3p0_v4p0__7_3_3() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
@@ -11350,14 +12244,16 @@ define void @s_shuffle_v3p0_v4p0__7_4_2() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s10
; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
+; GFX900-NEXT: s_mov_b32 s10, s14
+; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_4_2:
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
@@ -11368,14 +12264,16 @@ define void @s_shuffle_v3p0_v4p0__7_4_2() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s10
; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
+; GFX90A-NEXT: s_mov_b32 s10, s14
+; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_4_2:
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -11384,59 +12282,58 @@ define void @s_shuffle_v3p0_v4p0__7_4_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 4, i32 2>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 3, i32 3>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v3p0_v4p0__7_5_2() {
-; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_5_2:
+define void @s_shuffle_v3p0_v4p0__7_u_3() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_u_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s14
-; GFX900-NEXT: s_mov_b32 s9, s15
-; GFX900-NEXT: s_mov_b32 s12, s16
-; GFX900-NEXT: s_mov_b32 s13, s17
+; GFX900-NEXT: s_mov_b32 s8, s18
+; GFX900-NEXT: s_mov_b32 s9, s19
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_5_2:
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_u_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s14
-; GFX90A-NEXT: s_mov_b32 s9, s15
-; GFX90A-NEXT: s_mov_b32 s12, s16
-; GFX90A-NEXT: s_mov_b32 s13, s17
+; GFX90A-NEXT: s_mov_b32 s8, s18
+; GFX90A-NEXT: s_mov_b32 s9, s19
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_5_2:
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_u_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -11445,59 +12342,61 @@ define void @s_shuffle_v3p0_v4p0__7_5_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s12, s4
-; GFX942-NEXT: s_mov_b32 s13, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 5, i32 2>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 poison, i32 3>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v3p0_v4p0__7_6_2() {
-; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_6_2:
+define void @s_shuffle_v3p0_v4p0__7_0_3() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_0_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[16:23]
+; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s22
-; GFX900-NEXT: s_mov_b32 s9, s23
-; GFX900-NEXT: s_mov_b32 s10, s20
-; GFX900-NEXT: s_mov_b32 s11, s21
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s12, s18
+; GFX900-NEXT: s_mov_b32 s13, s19
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_6_2:
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_0_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[16:23]
+; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s22
-; GFX90A-NEXT: s_mov_b32 s9, s23
-; GFX90A-NEXT: s_mov_b32 s10, s20
-; GFX90A-NEXT: s_mov_b32 s11, s21
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s12, s18
+; GFX90A-NEXT: s_mov_b32 s13, s19
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_6_2:
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_0_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -11506,30 +12405,32 @@ define void @s_shuffle_v3p0_v4p0__7_6_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 6, i32 2>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 0, i32 3>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v3p0_v4p0__u_3_3() {
-; GFX900-LABEL: s_shuffle_v3p0_v4p0__u_3_3:
+define void @s_shuffle_v3p0_v4p0__7_1_3() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_1_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s10, s14
-; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[16:23]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s22
+; GFX900-NEXT: s_mov_b32 s9, s23
; GFX900-NEXT: s_mov_b32 s12, s14
; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
@@ -11537,14 +12438,17 @@ define void @s_shuffle_v3p0_v4p0__u_3_3() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3p0_v4p0__u_3_3:
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_1_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s10, s14
-; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[16:23]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s22
+; GFX90A-NEXT: s_mov_b32 s9, s23
; GFX90A-NEXT: s_mov_b32 s12, s14
; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
@@ -11552,58 +12456,42 @@ define void @s_shuffle_v3p0_v4p0__u_3_3() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3p0_v4p0__u_3_3:
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_1_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> <i32 poison, i32 3, i32 3>
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v3p0_v4p0__0_3_3() {
-; GFX9-LABEL: s_shuffle_v3p0_v4p0__0_3_3:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s14
-; GFX9-NEXT: s_mov_b32 s11, s15
-; GFX9-NEXT: s_mov_b32 s12, s14
-; GFX9-NEXT: s_mov_b32 s13, s15
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> <i32 0, i32 3, i32 3>
+ %vec1 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 1, i32 3>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v3p0_v4p0__1_3_3() {
-; GFX900-LABEL: s_shuffle_v3p0_v4p0__1_3_3:
+define void @s_shuffle_v3p0_v4p0__7_2_3() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_2_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s10
; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s10, s14
-; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
; GFX900-NEXT: s_mov_b32 s12, s14
; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
@@ -11611,16 +12499,19 @@ define void @s_shuffle_v3p0_v4p0__1_3_3() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3p0_v4p0__1_3_3:
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_2_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s10
; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s10, s14
-; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
; GFX90A-NEXT: s_mov_b32 s12, s14
; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
@@ -11628,39 +12519,43 @@ define void @s_shuffle_v3p0_v4p0__1_3_3() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3p0_v4p0__1_3_3:
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_2_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> <i32 1, i32 3, i32 3>
+ %vec1 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 2, i32 3>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v3p0_v4p0__2_3_3() {
-; GFX900-LABEL: s_shuffle_v3p0_v4p0__2_3_3:
+define void @s_shuffle_v3p0_v4p0__7_4_3() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_4_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s12
-; GFX900-NEXT: s_mov_b32 s9, s13
-; GFX900-NEXT: s_mov_b32 s10, s14
-; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s10, s4
+; GFX900-NEXT: s_mov_b32 s11, s5
; GFX900-NEXT: s_mov_b32 s12, s14
; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
@@ -11668,16 +12563,19 @@ define void @s_shuffle_v3p0_v4p0__2_3_3() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3p0_v4p0__2_3_3:
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_4_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s12
-; GFX90A-NEXT: s_mov_b32 s9, s13
-; GFX90A-NEXT: s_mov_b32 s10, s14
-; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s10, s4
+; GFX90A-NEXT: s_mov_b32 s11, s5
; GFX90A-NEXT: s_mov_b32 s12, s14
; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
@@ -11685,94 +12583,102 @@ define void @s_shuffle_v3p0_v4p0__2_3_3() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3p0_v4p0__2_3_3:
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_4_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[12:19]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[18:19]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> <i32 2, i32 3, i32 3>
+ %vec1 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 4, i32 3>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v3p0_v4p0__3_3_3() {
-; GFX900-LABEL: s_shuffle_v3p0_v4p0__3_3_3:
+define void @s_shuffle_v3p0_v4p0__7_5_3() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_5_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s14
; GFX900-NEXT: s_mov_b32 s9, s15
-; GFX900-NEXT: s_mov_b32 s10, s14
-; GFX900-NEXT: s_mov_b32 s11, s15
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: s_mov_b32 s12, s18
+; GFX900-NEXT: s_mov_b32 s13, s19
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3p0_v4p0__3_3_3:
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_5_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s14
; GFX90A-NEXT: s_mov_b32 s9, s15
-; GFX90A-NEXT: s_mov_b32 s10, s14
-; GFX90A-NEXT: s_mov_b32 s11, s15
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: s_mov_b32 s12, s18
+; GFX90A-NEXT: s_mov_b32 s13, s19
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3p0_v4p0__3_3_3:
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_5_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> <i32 3, i32 3, i32 3>
+ %vec1 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 5, i32 3>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v3p0_v4p0__4_3_3() {
-; GFX900-LABEL: s_shuffle_v3p0_v4p0__4_3_3:
+define void @s_shuffle_v3p0_v4p0__7_6_3() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_6_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s10, s14
-; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[16:23]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s22
+; GFX900-NEXT: s_mov_b32 s9, s23
+; GFX900-NEXT: s_mov_b32 s10, s20
+; GFX900-NEXT: s_mov_b32 s11, s21
; GFX900-NEXT: s_mov_b32 s12, s14
; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
@@ -11780,14 +12686,19 @@ define void @s_shuffle_v3p0_v4p0__4_3_3() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3p0_v4p0__4_3_3:
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_6_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s10, s14
-; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[16:23]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s22
+; GFX90A-NEXT: s_mov_b32 s9, s23
+; GFX90A-NEXT: s_mov_b32 s10, s20
+; GFX90A-NEXT: s_mov_b32 s11, s21
; GFX90A-NEXT: s_mov_b32 s12, s14
; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
@@ -11795,649 +12706,531 @@ define void @s_shuffle_v3p0_v4p0__4_3_3() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3p0_v4p0__4_3_3:
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_6_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> <i32 4, i32 3, i32 3>
+ %vec1 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 6, i32 3>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v3p0_v4p0__5_3_3() {
-; GFX900-LABEL: s_shuffle_v3p0_v4p0__5_3_3:
+define void @s_shuffle_v3p0_v4p0__u_4_4() {
+; GFX9-LABEL: s_shuffle_v3p0_v4p0__u_4_4:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:13]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> <i32 poison, i32 4, i32 4>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v3p0_v4p0__0_4_4() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__0_4_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s6
-; GFX900-NEXT: s_mov_b32 s9, s7
-; GFX900-NEXT: s_mov_b32 s10, s14
-; GFX900-NEXT: s_mov_b32 s11, s15
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
-; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3p0_v4p0__5_3_3:
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__0_4_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s6
-; GFX90A-NEXT: s_mov_b32 s9, s7
-; GFX90A-NEXT: s_mov_b32 s10, s14
-; GFX90A-NEXT: s_mov_b32 s11, s15
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
-; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3p0_v4p0__5_3_3:
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__0_4_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 5, i32 3, i32 3>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> <i32 0, i32 4, i32 4>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v3p0_v4p0__6_3_3() {
-; GFX900-LABEL: s_shuffle_v3p0_v4p0__6_3_3:
+define void @s_shuffle_v3p0_v4p0__1_4_4() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__1_4_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s10, s14
-; GFX900-NEXT: s_mov_b32 s11, s15
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: s_mov_b32 s8, s6
+; GFX900-NEXT: s_mov_b32 s9, s7
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3p0_v4p0__6_3_3:
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__1_4_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s10, s14
-; GFX90A-NEXT: s_mov_b32 s11, s15
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: s_mov_b32 s8, s6
+; GFX90A-NEXT: s_mov_b32 s9, s7
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3p0_v4p0__6_3_3:
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__1_4_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 6, i32 3, i32 3>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> <i32 1, i32 4, i32 4>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v3p0_v4p0__7_3_3() {
-; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_3_3:
+define void @s_shuffle_v3p0_v4p0__2_4_4() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__2_4_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s10, s14
-; GFX900-NEXT: s_mov_b32 s11, s15
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_3_3:
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__2_4_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s10, s14
-; GFX90A-NEXT: s_mov_b32 s11, s15
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_3_3:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
-; GFX942-NEXT: ;;#ASMEND
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__2_4_4:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 3, i32 3>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> <i32 2, i32 4, i32 4>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v3p0_v4p0__7_u_3() {
-; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_u_3:
+define void @s_shuffle_v3p0_v4p0__3_4_4() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__3_4_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s18
-; GFX900-NEXT: s_mov_b32 s9, s19
-; GFX900-NEXT: s_mov_b32 s12, s10
-; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_u_3:
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__3_4_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s18
-; GFX90A-NEXT: s_mov_b32 s9, s19
-; GFX90A-NEXT: s_mov_b32 s12, s10
-; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_u_3:
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__3_4_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 poison, i32 3>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> <i32 3, i32 4, i32 4>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v3p0_v4p0__7_0_3() {
-; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_0_3:
+define void @s_shuffle_v3p0_v4p0__4_4_4() {
+; GFX9-LABEL: s_shuffle_v3p0_v4p0__4_4_4:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:13]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> <i32 4, i32 4, i32 4>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v3p0_v4p0__5_4_4() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__5_4_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s10, s12
-; GFX900-NEXT: s_mov_b32 s11, s13
-; GFX900-NEXT: s_mov_b32 s12, s18
-; GFX900-NEXT: s_mov_b32 s13, s19
+; GFX900-NEXT: s_mov_b32 s8, s6
+; GFX900-NEXT: s_mov_b32 s9, s7
+; GFX900-NEXT: s_mov_b32 s10, s4
+; GFX900-NEXT: s_mov_b32 s11, s5
+; GFX900-NEXT: s_mov_b32 s12, s4
+; GFX900-NEXT: s_mov_b32 s13, s5
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_0_3:
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__5_4_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s10, s12
-; GFX90A-NEXT: s_mov_b32 s11, s13
-; GFX90A-NEXT: s_mov_b32 s12, s18
-; GFX90A-NEXT: s_mov_b32 s13, s19
+; GFX90A-NEXT: s_mov_b32 s8, s6
+; GFX90A-NEXT: s_mov_b32 s9, s7
+; GFX90A-NEXT: s_mov_b32 s10, s4
+; GFX90A-NEXT: s_mov_b32 s11, s5
+; GFX90A-NEXT: s_mov_b32 s12, s4
+; GFX90A-NEXT: s_mov_b32 s13, s5
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_0_3:
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__5_4_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 0, i32 3>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 5, i32 4, i32 4>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v3p0_v4p0__7_1_3() {
-; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_1_3:
+define void @s_shuffle_v3p0_v4p0__6_4_4() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__6_4_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[16:23]
+; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s22
-; GFX900-NEXT: s_mov_b32 s9, s23
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: s_mov_b32 s10, s4
+; GFX900-NEXT: s_mov_b32 s11, s5
+; GFX900-NEXT: s_mov_b32 s12, s4
+; GFX900-NEXT: s_mov_b32 s13, s5
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_1_3:
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__6_4_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[16:23]
+; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s22
-; GFX90A-NEXT: s_mov_b32 s9, s23
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: s_mov_b32 s10, s4
+; GFX90A-NEXT: s_mov_b32 s11, s5
+; GFX90A-NEXT: s_mov_b32 s12, s4
+; GFX90A-NEXT: s_mov_b32 s13, s5
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_1_3:
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__6_4_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 1, i32 3>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 6, i32 4, i32 4>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v3p0_v4p0__7_2_3() {
-; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_2_3:
+define void @s_shuffle_v3p0_v4p0__7_4_4() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_4_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s10
; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s10, s12
-; GFX900-NEXT: s_mov_b32 s11, s13
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: s_mov_b32 s10, s4
+; GFX900-NEXT: s_mov_b32 s11, s5
+; GFX900-NEXT: s_mov_b32 s12, s4
+; GFX900-NEXT: s_mov_b32 s13, s5
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_2_3:
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_4_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s10
; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s10, s12
-; GFX90A-NEXT: s_mov_b32 s11, s13
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: s_mov_b32 s10, s4
+; GFX90A-NEXT: s_mov_b32 s11, s5
+; GFX90A-NEXT: s_mov_b32 s12, s4
+; GFX90A-NEXT: s_mov_b32 s13, s5
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_2_3:
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_4_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 2, i32 3>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 4, i32 4>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v3p0_v4p0__7_4_3() {
-; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_4_3:
+define void @s_shuffle_v3p0_v4p0__7_u_4() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_u_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s10
; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: s_mov_b32 s12, s4
+; GFX900-NEXT: s_mov_b32 s13, s5
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_4_3:
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_u_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s10
; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: s_mov_b32 s12, s4
+; GFX90A-NEXT: s_mov_b32 s13, s5
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_4_3:
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_u_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[12:19]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s18
-; GFX942-NEXT: s_mov_b32 s9, s19
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 4, i32 3>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 poison, i32 4>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v3p0_v4p0__7_5_3() {
-; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_5_3:
+define void @s_shuffle_v3p0_v4p0__7_0_4() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_0_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s14
-; GFX900-NEXT: s_mov_b32 s9, s15
-; GFX900-NEXT: s_mov_b32 s12, s18
-; GFX900-NEXT: s_mov_b32 s13, s19
+; GFX900-NEXT: s_mov_b32 s8, s18
+; GFX900-NEXT: s_mov_b32 s9, s19
+; GFX900-NEXT: s_mov_b32 s10, s4
+; GFX900-NEXT: s_mov_b32 s11, s5
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_5_3:
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_0_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s14
-; GFX90A-NEXT: s_mov_b32 s9, s15
-; GFX90A-NEXT: s_mov_b32 s12, s18
-; GFX90A-NEXT: s_mov_b32 s13, s19
+; GFX90A-NEXT: s_mov_b32 s8, s18
+; GFX90A-NEXT: s_mov_b32 s9, s19
+; GFX90A-NEXT: s_mov_b32 s10, s4
+; GFX90A-NEXT: s_mov_b32 s11, s5
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_5_3:
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_0_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 5, i32 3>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 0, i32 4>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v3p0_v4p0__7_6_3() {
-; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_6_3:
+define void @s_shuffle_v3p0_v4p0__7_1_4() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_1_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[16:23]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s22
-; GFX900-NEXT: s_mov_b32 s9, s23
-; GFX900-NEXT: s_mov_b32 s10, s20
-; GFX900-NEXT: s_mov_b32 s11, s21
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: s_mov_b32 s8, s18
+; GFX900-NEXT: s_mov_b32 s9, s19
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_6_3:
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_1_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[16:23]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s22
-; GFX90A-NEXT: s_mov_b32 s9, s23
-; GFX90A-NEXT: s_mov_b32 s10, s20
-; GFX90A-NEXT: s_mov_b32 s11, s21
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: s_mov_b32 s8, s18
+; GFX90A-NEXT: s_mov_b32 s9, s19
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_6_3:
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_1_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -12446,831 +13239,783 @@ define void @s_shuffle_v3p0_v4p0__7_6_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 6, i32 3>
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v3p0_v4p0__u_4_4() {
-; GFX9-LABEL: s_shuffle_v3p0_v4p0__u_4_4:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> <i32 poison, i32 4, i32 4>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 1, i32 4>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v3p0_v4p0__0_4_4() {
-; GFX900-LABEL: s_shuffle_v3p0_v4p0__0_4_4:
+define void @s_shuffle_v3p0_v4p0__7_2_4() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_2_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s12, s4
+; GFX900-NEXT: s_mov_b32 s13, s5
+; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3p0_v4p0__0_4_4:
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_2_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s12, s4
+; GFX90A-NEXT: s_mov_b32 s13, s5
+; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3p0_v4p0__0_4_4:
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_2_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[12:19]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[18:19]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> <i32 0, i32 4, i32 4>
+ %vec1 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 2, i32 4>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v3p0_v4p0__1_4_4() {
-; GFX900-LABEL: s_shuffle_v3p0_v4p0__1_4_4:
+define void @s_shuffle_v3p0_v4p0__7_3_4() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_3_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s6
-; GFX900-NEXT: s_mov_b32 s9, s7
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s18
+; GFX900-NEXT: s_mov_b32 s9, s19
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3p0_v4p0__1_4_4:
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_3_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s6
-; GFX90A-NEXT: s_mov_b32 s9, s7
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s18
+; GFX90A-NEXT: s_mov_b32 s9, s19
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3p0_v4p0__1_4_4:
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_3_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[12:19]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[18:19]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> <i32 1, i32 4, i32 4>
+ %vec1 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 3, i32 4>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v3p0_v4p0__2_4_4() {
-; GFX900-LABEL: s_shuffle_v3p0_v4p0__2_4_4:
+define void @s_shuffle_v3p0_v4p0__7_5_4() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_5_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s10, s6
+; GFX900-NEXT: s_mov_b32 s11, s7
+; GFX900-NEXT: s_mov_b32 s12, s4
+; GFX900-NEXT: s_mov_b32 s13, s5
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3p0_v4p0__2_4_4:
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_5_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s10, s6
+; GFX90A-NEXT: s_mov_b32 s11, s7
+; GFX90A-NEXT: s_mov_b32 s12, s4
+; GFX90A-NEXT: s_mov_b32 s13, s5
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3p0_v4p0__2_4_4:
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_5_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> <i32 2, i32 4, i32 4>
+ %vec1 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 5, i32 4>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v3p0_v4p0__3_4_4() {
-; GFX900-LABEL: s_shuffle_v3p0_v4p0__3_4_4:
+define void @s_shuffle_v3p0_v4p0__7_6_4() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_6_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s8, s18
+; GFX900-NEXT: s_mov_b32 s9, s19
+; GFX900-NEXT: s_mov_b32 s10, s16
+; GFX900-NEXT: s_mov_b32 s11, s17
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3p0_v4p0__3_4_4:
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_6_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s8, s18
+; GFX90A-NEXT: s_mov_b32 s9, s19
+; GFX90A-NEXT: s_mov_b32 s10, s16
+; GFX90A-NEXT: s_mov_b32 s11, s17
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3p0_v4p0__3_4_4:
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_6_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> <i32 3, i32 4, i32 4>
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v3p0_v4p0__4_4_4() {
-; GFX9-LABEL: s_shuffle_v3p0_v4p0__4_4_4:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> <i32 4, i32 4, i32 4>
+ %vec1 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 6, i32 4>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v3p0_v4p0__5_4_4() {
-; GFX900-LABEL: s_shuffle_v3p0_v4p0__5_4_4:
+define void @s_shuffle_v3p0_v4p0__u_5_5() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__u_5_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s6
-; GFX900-NEXT: s_mov_b32 s9, s7
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3p0_v4p0__5_4_4:
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__u_5_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s6
-; GFX90A-NEXT: s_mov_b32 s9, s7
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3p0_v4p0__5_4_4:
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__u_5_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 5, i32 4, i32 4>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 poison, i32 5, i32 5>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v3p0_v4p0__6_4_4() {
-; GFX900-LABEL: s_shuffle_v3p0_v4p0__6_4_4:
+define void @s_shuffle_v3p0_v4p0__0_5_5() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__0_5_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s14
+; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3p0_v4p0__6_4_4:
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__0_5_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s14
+; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3p0_v4p0__6_4_4:
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__0_5_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[10:11], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 6, i32 4, i32 4>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 0, i32 5, i32 5>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v3p0_v4p0__7_4_4() {
-; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_4_4:
+define void @s_shuffle_v3p0_v4p0__1_5_5() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__1_5_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s6
+; GFX900-NEXT: s_mov_b32 s9, s7
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_4_4:
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__1_5_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s6
+; GFX90A-NEXT: s_mov_b32 s9, s7
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_4_4:
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__1_5_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 4, i32 4>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 1, i32 5, i32 5>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v3p0_v4p0__7_u_4() {
-; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_u_4:
+define void @s_shuffle_v3p0_v4p0__2_5_5() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__2_5_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s16
+; GFX900-NEXT: s_mov_b32 s9, s17
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_u_4:
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__2_5_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s16
+; GFX90A-NEXT: s_mov_b32 s9, s17
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_u_4:
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__2_5_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 poison, i32 4>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 2, i32 5, i32 5>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v3p0_v4p0__7_0_4() {
-; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_0_4:
+define void @s_shuffle_v3p0_v4p0__3_5_5() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__3_5_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s18
; GFX900-NEXT: s_mov_b32 s9, s19
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_0_4:
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__3_5_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s18
; GFX90A-NEXT: s_mov_b32 s9, s19
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_0_4:
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__3_5_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[4:11]
+; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s4
-; GFX942-NEXT: s_mov_b32 s13, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 0, i32 4>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 3, i32 5, i32 5>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v3p0_v4p0__7_1_4() {
-; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_1_4:
+define void @s_shuffle_v3p0_v4p0__4_5_5() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__4_5_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s18
-; GFX900-NEXT: s_mov_b32 s9, s19
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_1_4:
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__4_5_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s18
-; GFX90A-NEXT: s_mov_b32 s9, s19
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_1_4:
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__4_5_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 1, i32 4>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 4, i32 5, i32 5>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v3p0_v4p0__7_2_4() {
-; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_2_4:
+define void @s_shuffle_v3p0_v4p0__5_5_5() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__5_5_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s10
; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s10, s12
-; GFX900-NEXT: s_mov_b32 s11, s13
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_2_4:
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__5_5_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s10
; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s10, s12
-; GFX90A-NEXT: s_mov_b32 s11, s13
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_2_4:
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__5_5_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[12:19]
+; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s18
-; GFX942-NEXT: s_mov_b32 s9, s19
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 2, i32 4>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 5, i32 5, i32 5>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v3p0_v4p0__7_3_4() {
-; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_3_4:
+define void @s_shuffle_v3p0_v4p0__6_5_5() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__6_5_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s18
-; GFX900-NEXT: s_mov_b32 s9, s19
+; GFX900-NEXT: s_mov_b32 s8, s12
+; GFX900-NEXT: s_mov_b32 s9, s13
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_3_4:
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__6_5_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s18
-; GFX90A-NEXT: s_mov_b32 s9, s19
+; GFX90A-NEXT: s_mov_b32 s8, s12
+; GFX90A-NEXT: s_mov_b32 s9, s13
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_3_4:
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__6_5_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[12:19]
+; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s18
-; GFX942-NEXT: s_mov_b32 s9, s19
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 3, i32 4>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 6, i32 5, i32 5>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v3p0_v4p0__7_5_4() {
-; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_5_4:
+define void @s_shuffle_v3p0_v4p0__7_5_5() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_5_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s10, s6
-; GFX900-NEXT: s_mov_b32 s11, s7
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_5_4:
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_5_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s10, s6
-; GFX90A-NEXT: s_mov_b32 s11, s7
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_5_4:
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_5_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s2
-; GFX942-NEXT: s_mov_b32 s11, s3
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 5, i32 4>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 5, i32 5>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v3p0_v4p0__7_6_4() {
-; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_6_4:
+define void @s_shuffle_v3p0_v4p0__7_u_5() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_u_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s18
-; GFX900-NEXT: s_mov_b32 s9, s19
-; GFX900-NEXT: s_mov_b32 s10, s16
-; GFX900-NEXT: s_mov_b32 s11, s17
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s12, s6
+; GFX900-NEXT: s_mov_b32 s13, s7
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_6_4:
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_u_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s18
-; GFX90A-NEXT: s_mov_b32 s9, s19
-; GFX90A-NEXT: s_mov_b32 s10, s16
-; GFX90A-NEXT: s_mov_b32 s11, s17
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s12, s6
+; GFX90A-NEXT: s_mov_b32 s13, s7
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_6_4:
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_u_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 6, i32 4>
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v3p0_v4p0__u_5_5() {
-; GFX9-LABEL: s_shuffle_v3p0_v4p0__u_5_5:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 poison, i32 5, i32 5>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 poison, i32 5>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v3p0_v4p0__0_5_5() {
-; GFX900-LABEL: s_shuffle_v3p0_v4p0__0_5_5:
+define void @s_shuffle_v3p0_v4p0__7_0_5() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_0_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s10, s14
-; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: s_mov_b32 s8, s18
+; GFX900-NEXT: s_mov_b32 s9, s19
+; GFX900-NEXT: s_mov_b32 s10, s4
+; GFX900-NEXT: s_mov_b32 s11, s5
; GFX900-NEXT: s_mov_b32 s12, s14
; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
@@ -13278,17 +14023,19 @@ define void @s_shuffle_v3p0_v4p0__0_5_5() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3p0_v4p0__0_5_5:
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_0_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s10, s14
-; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: s_mov_b32 s8, s18
+; GFX90A-NEXT: s_mov_b32 s9, s19
+; GFX90A-NEXT: s_mov_b32 s10, s4
+; GFX90A-NEXT: s_mov_b32 s11, s5
; GFX90A-NEXT: s_mov_b32 s12, s14
; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
@@ -13296,68 +14043,68 @@ define void @s_shuffle_v3p0_v4p0__0_5_5() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3p0_v4p0__0_5_5:
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_0_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s2
-; GFX942-NEXT: s_mov_b32 s11, s3
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 0, i32 5, i32 5>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 0, i32 5>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v3p0_v4p0__1_5_5() {
-; GFX900-LABEL: s_shuffle_v3p0_v4p0__1_5_5:
+define void @s_shuffle_v3p0_v4p0__7_1_5() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_1_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s6
-; GFX900-NEXT: s_mov_b32 s9, s7
-; GFX900-NEXT: s_mov_b32 s12, s10
-; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: s_mov_b32 s8, s18
+; GFX900-NEXT: s_mov_b32 s9, s19
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3p0_v4p0__1_5_5:
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_1_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s6
-; GFX90A-NEXT: s_mov_b32 s9, s7
-; GFX90A-NEXT: s_mov_b32 s12, s10
-; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: s_mov_b32 s8, s18
+; GFX90A-NEXT: s_mov_b32 s9, s19
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3p0_v4p0__1_5_5:
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_1_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -13366,231 +14113,145 @@ define void @s_shuffle_v3p0_v4p0__1_5_5() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s12, s10
-; GFX942-NEXT: s_mov_b32 s13, s11
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 1, i32 5, i32 5>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 1, i32 5>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v3p0_v4p0__2_5_5() {
-; GFX900-LABEL: s_shuffle_v3p0_v4p0__2_5_5:
+define void @s_shuffle_v3p0_v4p0__7_2_5() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_2_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s16
-; GFX900-NEXT: s_mov_b32 s9, s17
-; GFX900-NEXT: s_mov_b32 s12, s10
-; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s12, s6
+; GFX900-NEXT: s_mov_b32 s13, s7
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3p0_v4p0__2_5_5:
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_2_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s16
-; GFX90A-NEXT: s_mov_b32 s9, s17
-; GFX90A-NEXT: s_mov_b32 s12, s10
-; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s12, s6
+; GFX90A-NEXT: s_mov_b32 s13, s7
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3p0_v4p0__2_5_5:
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_2_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ; def s[12:19]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s12, s10
-; GFX942-NEXT: s_mov_b32 s13, s11
+; GFX942-NEXT: s_mov_b64 s[8:9], s[18:19]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 2, i32 5, i32 5>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 2, i32 5>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v3p0_v4p0__3_5_5() {
-; GFX900-LABEL: s_shuffle_v3p0_v4p0__3_5_5:
+define void @s_shuffle_v3p0_v4p0__7_3_5() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_3_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s18
; GFX900-NEXT: s_mov_b32 s9, s19
-; GFX900-NEXT: s_mov_b32 s12, s10
-; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3p0_v4p0__3_5_5:
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_3_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s18
; GFX90A-NEXT: s_mov_b32 s9, s19
-; GFX90A-NEXT: s_mov_b32 s12, s10
-; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3p0_v4p0__3_5_5:
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_3_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ; def s[12:19]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s12, s10
-; GFX942-NEXT: s_mov_b32 s13, s11
+; GFX942-NEXT: s_mov_b64 s[8:9], s[18:19]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 3, i32 5, i32 5>
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v3p0_v4p0__4_5_5() {
-; GFX9-LABEL: s_shuffle_v3p0_v4p0__4_5_5:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 4, i32 5, i32 5>
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v3p0_v4p0__5_5_5() {
-; GFX9-LABEL: s_shuffle_v3p0_v4p0__5_5_5:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s10
-; GFX9-NEXT: s_mov_b32 s9, s11
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 5, i32 5, i32 5>
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v3p0_v4p0__6_5_5() {
-; GFX9-LABEL: s_shuffle_v3p0_v4p0__6_5_5:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s12
-; GFX9-NEXT: s_mov_b32 s9, s13
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 6, i32 5, i32 5>
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v3p0_v4p0__7_5_5() {
-; GFX9-LABEL: s_shuffle_v3p0_v4p0__7_5_5:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s14
-; GFX9-NEXT: s_mov_b32 s9, s15
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 5, i32 5>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 3, i32 5>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v3p0_v4p0__7_u_5() {
-; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_u_5:
+define void @s_shuffle_v3p0_v4p0__7_4_5() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_4_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
@@ -13598,6 +14259,8 @@ define void @s_shuffle_v3p0_v4p0__7_u_5() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s10
; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s10, s4
+; GFX900-NEXT: s_mov_b32 s11, s5
; GFX900-NEXT: s_mov_b32 s12, s6
; GFX900-NEXT: s_mov_b32 s13, s7
; GFX900-NEXT: ;;#ASMSTART
@@ -13605,7 +14268,7 @@ define void @s_shuffle_v3p0_v4p0__7_u_5() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_u_5:
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_4_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
@@ -13613,6 +14276,8 @@ define void @s_shuffle_v3p0_v4p0__7_u_5() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s10
; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s10, s4
+; GFX90A-NEXT: s_mov_b32 s11, s5
; GFX90A-NEXT: s_mov_b32 s12, s6
; GFX90A-NEXT: s_mov_b32 s13, s7
; GFX90A-NEXT: ;;#ASMSTART
@@ -13620,41 +14285,37 @@ define void @s_shuffle_v3p0_v4p0__7_u_5() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_u_5:
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_4_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 poison, i32 5>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 4, i32 5>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v3p0_v4p0__7_0_5() {
-; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_0_5:
+define void @s_shuffle_v3p0_v4p0__7_6_5() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_6_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s18
; GFX900-NEXT: s_mov_b32 s9, s19
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
+; GFX900-NEXT: s_mov_b32 s10, s16
+; GFX900-NEXT: s_mov_b32 s11, s17
; GFX900-NEXT: s_mov_b32 s12, s14
; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
@@ -13662,19 +14323,16 @@ define void @s_shuffle_v3p0_v4p0__7_0_5() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_0_5:
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_6_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s18
; GFX90A-NEXT: s_mov_b32 s9, s19
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
+; GFX90A-NEXT: s_mov_b32 s10, s16
+; GFX90A-NEXT: s_mov_b32 s11, s17
; GFX90A-NEXT: s_mov_b32 s12, s14
; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
@@ -13682,35 +14340,73 @@ define void @s_shuffle_v3p0_v4p0__7_0_5() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_0_5:
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_6_5:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x ptr> asm "; def $0", "=s"()
+ %vec1 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 6, i32 5>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v3p0_v4p0__u_6_6() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__u_6_6:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__u_6_6:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__u_6_6:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[4:11]
+; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 0, i32 5>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 poison, i32 6, i32 6>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v3p0_v4p0__7_1_5() {
-; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_1_5:
+define void @s_shuffle_v3p0_v4p0__0_6_6() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__0_6_6:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
@@ -13719,16 +14415,16 @@ define void @s_shuffle_v3p0_v4p0__7_1_5() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s18
-; GFX900-NEXT: s_mov_b32 s9, s19
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: s_mov_b32 s10, s16
+; GFX900-NEXT: s_mov_b32 s11, s17
+; GFX900-NEXT: s_mov_b32 s12, s16
+; GFX900-NEXT: s_mov_b32 s13, s17
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_1_5:
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__0_6_6:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
@@ -13737,16 +14433,16 @@ define void @s_shuffle_v3p0_v4p0__7_1_5() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s18
-; GFX90A-NEXT: s_mov_b32 s9, s19
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: s_mov_b32 s10, s16
+; GFX90A-NEXT: s_mov_b32 s11, s17
+; GFX90A-NEXT: s_mov_b32 s12, s16
+; GFX90A-NEXT: s_mov_b32 s13, s17
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_1_5:
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__0_6_6:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -13755,360 +14451,300 @@ define void @s_shuffle_v3p0_v4p0__7_1_5() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 1, i32 5>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 0, i32 6, i32 6>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v3p0_v4p0__7_2_5() {
-; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_2_5:
+define void @s_shuffle_v3p0_v4p0__1_6_6() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__1_6_6:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s8, s6
+; GFX900-NEXT: s_mov_b32 s9, s7
; GFX900-NEXT: s_mov_b32 s10, s12
; GFX900-NEXT: s_mov_b32 s11, s13
-; GFX900-NEXT: s_mov_b32 s12, s6
-; GFX900-NEXT: s_mov_b32 s13, s7
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_2_5:
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__1_6_6:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s8, s6
+; GFX90A-NEXT: s_mov_b32 s9, s7
; GFX90A-NEXT: s_mov_b32 s10, s12
; GFX90A-NEXT: s_mov_b32 s11, s13
-; GFX90A-NEXT: s_mov_b32 s12, s6
-; GFX90A-NEXT: s_mov_b32 s13, s7
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_2_5:
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__1_6_6:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[12:19]
+; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s18
-; GFX942-NEXT: s_mov_b32 s9, s19
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 2, i32 5>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 1, i32 6, i32 6>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v3p0_v4p0__7_3_5() {
-; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_3_5:
+define void @s_shuffle_v3p0_v4p0__2_6_6() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__2_6_6:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s18
-; GFX900-NEXT: s_mov_b32 s9, s19
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: s_mov_b32 s8, s16
+; GFX900-NEXT: s_mov_b32 s9, s17
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_3_5:
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__2_6_6:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s18
-; GFX90A-NEXT: s_mov_b32 s9, s19
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: s_mov_b32 s8, s16
+; GFX90A-NEXT: s_mov_b32 s9, s17
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_3_5:
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__2_6_6:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[12:19]
+; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s18
-; GFX942-NEXT: s_mov_b32 s9, s19
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 3, i32 5>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 2, i32 6, i32 6>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v3p0_v4p0__7_4_5() {
-; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_4_5:
+define void @s_shuffle_v3p0_v4p0__3_6_6() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__3_6_6:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
-; GFX900-NEXT: s_mov_b32 s12, s6
-; GFX900-NEXT: s_mov_b32 s13, s7
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s18
+; GFX900-NEXT: s_mov_b32 s9, s19
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_4_5:
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__3_6_6:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
-; GFX90A-NEXT: s_mov_b32 s12, s6
-; GFX90A-NEXT: s_mov_b32 s13, s7
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s18
+; GFX90A-NEXT: s_mov_b32 s9, s19
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_4_5:
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__3_6_6:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 4, i32 5>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 3, i32 6, i32 6>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v3p0_v4p0__7_6_5() {
-; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_6_5:
+define void @s_shuffle_v3p0_v4p0__4_6_6() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__4_6_6:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s18
-; GFX900-NEXT: s_mov_b32 s9, s19
-; GFX900-NEXT: s_mov_b32 s10, s16
-; GFX900-NEXT: s_mov_b32 s11, s17
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_6_5:
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__4_6_6:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s18
-; GFX90A-NEXT: s_mov_b32 s9, s19
-; GFX90A-NEXT: s_mov_b32 s10, s16
-; GFX90A-NEXT: s_mov_b32 s11, s17
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_6_5:
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__4_6_6:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 6, i32 5>
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v3p0_v4p0__u_6_6() {
-; GFX9-LABEL: s_shuffle_v3p0_v4p0__u_6_6:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 poison, i32 6, i32 6>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 4, i32 6, i32 6>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v3p0_v4p0__0_6_6() {
-; GFX900-LABEL: s_shuffle_v3p0_v4p0__0_6_6:
+define void @s_shuffle_v3p0_v4p0__5_6_6() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__5_6_6:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s10, s16
-; GFX900-NEXT: s_mov_b32 s11, s17
-; GFX900-NEXT: s_mov_b32 s12, s16
-; GFX900-NEXT: s_mov_b32 s13, s17
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3p0_v4p0__0_6_6:
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__5_6_6:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s10, s16
-; GFX90A-NEXT: s_mov_b32 s11, s17
-; GFX90A-NEXT: s_mov_b32 s12, s16
-; GFX90A-NEXT: s_mov_b32 s13, s17
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3p0_v4p0__0_6_6:
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__5_6_6:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s4
-; GFX942-NEXT: s_mov_b32 s13, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 0, i32 6, i32 6>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 5, i32 6, i32 6>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v3p0_v4p0__1_6_6() {
-; GFX900-LABEL: s_shuffle_v3p0_v4p0__1_6_6:
+define void @s_shuffle_v3p0_v4p0__6_6_6() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__6_6_6:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s6
-; GFX900-NEXT: s_mov_b32 s9, s7
+; GFX900-NEXT: s_mov_b32 s8, s12
+; GFX900-NEXT: s_mov_b32 s9, s13
; GFX900-NEXT: s_mov_b32 s10, s12
; GFX900-NEXT: s_mov_b32 s11, s13
; GFX900-NEXT: ;;#ASMSTART
@@ -14116,17 +14752,14 @@ define void @s_shuffle_v3p0_v4p0__1_6_6() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3p0_v4p0__1_6_6:
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__6_6_6:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s6
-; GFX90A-NEXT: s_mov_b32 s9, s7
+; GFX90A-NEXT: s_mov_b32 s8, s12
+; GFX90A-NEXT: s_mov_b32 s9, s13
; GFX90A-NEXT: s_mov_b32 s10, s12
; GFX90A-NEXT: s_mov_b32 s11, s13
; GFX90A-NEXT: ;;#ASMSTART
@@ -14134,42 +14767,34 @@ define void @s_shuffle_v3p0_v4p0__1_6_6() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3p0_v4p0__1_6_6:
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__6_6_6:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 1, i32 6, i32 6>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 6, i32 6, i32 6>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v3p0_v4p0__2_6_6() {
-; GFX900-LABEL: s_shuffle_v3p0_v4p0__2_6_6:
+define void @s_shuffle_v3p0_v4p0__7_6_6() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_6_6:
; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
-; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s16
-; GFX900-NEXT: s_mov_b32 s9, s17
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
; GFX900-NEXT: s_mov_b32 s10, s12
; GFX900-NEXT: s_mov_b32 s11, s13
; GFX900-NEXT: ;;#ASMSTART
@@ -14177,17 +14802,14 @@ define void @s_shuffle_v3p0_v4p0__2_6_6() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3p0_v4p0__2_6_6:
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_6_6:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s16
-; GFX90A-NEXT: s_mov_b32 s9, s17
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
; GFX90A-NEXT: s_mov_b32 s10, s12
; GFX90A-NEXT: s_mov_b32 s11, s13
; GFX90A-NEXT: ;;#ASMSTART
@@ -14195,190 +14817,63 @@ define void @s_shuffle_v3p0_v4p0__2_6_6() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3p0_v4p0__2_6_6:
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_6_6:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 2, i32 6, i32 6>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 6, i32 6>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v3p0_v4p0__3_6_6() {
-; GFX900-LABEL: s_shuffle_v3p0_v4p0__3_6_6:
+define void @s_shuffle_v3p0_v4p0__7_u_6() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_u_6:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s18
-; GFX900-NEXT: s_mov_b32 s9, s19
-; GFX900-NEXT: s_mov_b32 s10, s12
-; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3p0_v4p0__3_6_6:
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_u_6:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s18
-; GFX90A-NEXT: s_mov_b32 s9, s19
-; GFX90A-NEXT: s_mov_b32 s10, s12
-; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3p0_v4p0__3_6_6:
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_u_6:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 3, i32 6, i32 6>
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v3p0_v4p0__4_6_6() {
-; GFX9-LABEL: s_shuffle_v3p0_v4p0__4_6_6:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 4, i32 6, i32 6>
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v3p0_v4p0__5_6_6() {
-; GFX9-LABEL: s_shuffle_v3p0_v4p0__5_6_6:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s10
-; GFX9-NEXT: s_mov_b32 s9, s11
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 5, i32 6, i32 6>
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v3p0_v4p0__6_6_6() {
-; GFX9-LABEL: s_shuffle_v3p0_v4p0__6_6_6:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s12
-; GFX9-NEXT: s_mov_b32 s9, s13
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 6, i32 6, i32 6>
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v3p0_v4p0__7_6_6() {
-; GFX9-LABEL: s_shuffle_v3p0_v4p0__7_6_6:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s14
-; GFX9-NEXT: s_mov_b32 s9, s15
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 6, i32 6>
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v3p0_v4p0__7_u_6() {
-; GFX9-LABEL: s_shuffle_v3p0_v4p0__7_u_6:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s14
-; GFX9-NEXT: s_mov_b32 s9, s15
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 poison, i32 6>
@@ -14432,10 +14927,8 @@ define void @s_shuffle_v3p0_v4p0__7_0_6() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -14493,10 +14986,8 @@ define void @s_shuffle_v3p0_v4p0__7_1_6() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s12, s4
-; GFX942-NEXT: s_mov_b32 s13, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -14554,10 +15045,8 @@ define void @s_shuffle_v3p0_v4p0__7_2_6() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -14615,10 +15104,8 @@ define void @s_shuffle_v3p0_v4p0__7_3_6() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -14671,12 +15158,9 @@ define void @s_shuffle_v3p0_v4p0__7_4_6() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s4
-; GFX942-NEXT: s_mov_b32 s13, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -14689,18 +15173,43 @@ define void @s_shuffle_v3p0_v4p0__7_4_6() {
}
define void @s_shuffle_v3p0_v4p0__7_5_6() {
-; GFX9-LABEL: s_shuffle_v3p0_v4p0__7_5_6:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s14
-; GFX9-NEXT: s_mov_b32 s9, s15
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_5_6:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_5_6:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_5_6:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 5, i32 6>
@@ -14745,10 +15254,8 @@ define void @s_shuffle_v3p0_v4p0__u_7_7() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -14806,10 +15313,8 @@ define void @s_shuffle_v3p0_v4p0__0_7_7() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -14871,12 +15376,9 @@ define void @s_shuffle_v3p0_v4p0__1_7_7() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -14934,12 +15436,9 @@ define void @s_shuffle_v3p0_v4p0__2_7_7() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -15001,12 +15500,9 @@ define void @s_shuffle_v3p0_v4p0__3_7_7() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -15019,20 +15515,48 @@ define void @s_shuffle_v3p0_v4p0__3_7_7() {
}
define void @s_shuffle_v3p0_v4p0__4_7_7() {
-; GFX9-LABEL: s_shuffle_v3p0_v4p0__4_7_7:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s14
-; GFX9-NEXT: s_mov_b32 s11, s15
-; GFX9-NEXT: s_mov_b32 s12, s14
-; GFX9-NEXT: s_mov_b32 s13, s15
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__4_7_7:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s14
+; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__4_7_7:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s14
+; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__4_7_7:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 4, i32 7, i32 7>
@@ -15081,12 +15605,9 @@ define void @s_shuffle_v3p0_v4p0__5_7_7() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -15139,12 +15660,9 @@ define void @s_shuffle_v3p0_v4p0__6_7_7() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -15193,10 +15711,8 @@ define void @s_shuffle_v3p0_v4p0__7_u_7() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -15258,12 +15774,9 @@ define void @s_shuffle_v3p0_v4p0__7_0_7() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -15321,10 +15834,8 @@ define void @s_shuffle_v3p0_v4p0__7_1_7() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -15386,12 +15897,9 @@ define void @s_shuffle_v3p0_v4p0__7_2_7() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -15449,12 +15957,9 @@ define void @s_shuffle_v3p0_v4p0__7_3_7() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -15507,12 +16012,9 @@ define void @s_shuffle_v3p0_v4p0__7_4_7() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -15525,20 +16027,48 @@ define void @s_shuffle_v3p0_v4p0__7_4_7() {
}
define void @s_shuffle_v3p0_v4p0__7_5_7() {
-; GFX9-LABEL: s_shuffle_v3p0_v4p0__7_5_7:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s14
-; GFX9-NEXT: s_mov_b32 s9, s15
-; GFX9-NEXT: s_mov_b32 s12, s14
-; GFX9-NEXT: s_mov_b32 s13, s15
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_5_7:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_5_7:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_5_7:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 5, i32 7>
@@ -15587,12 +16117,9 @@ define void @s_shuffle_v3p0_v4p0__7_6_7() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v4i64.v2i64.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v4i64.v2i64.ll
index ac7d9557ce765..3dc06c075b039 100644
--- a/llvm/test/CodeGen/AMDGPU/shufflevector.v4i64.v2i64.ll
+++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v4i64.v2i64.ll
@@ -3734,8 +3734,7 @@ define void @s_shuffle_v4i64_v2i64__1_u_u_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -3793,8 +3792,7 @@ define void @s_shuffle_v4i64_v2i64__3_u_u_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -3852,10 +3850,8 @@ define void @s_shuffle_v4i64_v2i64__3_0_u_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -3909,8 +3905,7 @@ define void @s_shuffle_v4i64_v2i64__3_1_u_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -3959,10 +3954,8 @@ define void @s_shuffle_v4i64_v2i64__3_2_u_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -3975,18 +3968,43 @@ define void @s_shuffle_v4i64_v2i64__3_2_u_u() {
}
define void @s_shuffle_v4i64_v2i64__3_3_u_u() {
-; GFX9-LABEL: s_shuffle_v4i64_v2i64__3_3_u_u:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s10
-; GFX9-NEXT: s_mov_b32 s9, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4i64_v2i64__3_3_u_u:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4i64_v2i64__3_3_u_u:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4i64_v2i64__3_3_u_u:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=s"()
%vec1 = call <2 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <4 x i32> <i32 3, i32 3, i32 poison, i32 poison>
@@ -3995,21 +4013,52 @@ define void @s_shuffle_v4i64_v2i64__3_3_u_u() {
}
define void @s_shuffle_v4i64_v2i64__3_3_0_u() {
-; GFX9-LABEL: s_shuffle_v4i64_v2i64__3_3_0_u:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[12:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s10
-; GFX9-NEXT: s_mov_b32 s9, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4i64_v2i64__3_3_0_u:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[12:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4i64_v2i64__3_3_0_u:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[12:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4i64_v2i64__3_3_0_u:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[12:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=s"()
%vec1 = call <2 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <4 x i32> <i32 3, i32 3, i32 0, i32 poison>
@@ -4063,10 +4112,8 @@ define void @s_shuffle_v4i64_v2i64__3_3_1_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -4079,20 +4126,48 @@ define void @s_shuffle_v4i64_v2i64__3_3_1_u() {
}
define void @s_shuffle_v4i64_v2i64__3_3_2_u() {
-; GFX9-LABEL: s_shuffle_v4i64_v2i64__3_3_2_u:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[12:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s14
-; GFX9-NEXT: s_mov_b32 s9, s15
-; GFX9-NEXT: s_mov_b32 s10, s14
-; GFX9-NEXT: s_mov_b32 s11, s15
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4i64_v2i64__3_3_2_u:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[12:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s10, s14
+; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4i64_v2i64__3_3_2_u:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[12:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s10, s14
+; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4i64_v2i64__3_3_2_u:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[12:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=s"()
%vec1 = call <2 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <4 x i32> <i32 3, i32 3, i32 2, i32 poison>
@@ -4101,20 +4176,48 @@ define void @s_shuffle_v4i64_v2i64__3_3_2_u() {
}
define void @s_shuffle_v4i64_v2i64__3_3_3_u() {
-; GFX9-LABEL: s_shuffle_v4i64_v2i64__3_3_3_u:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s10
-; GFX9-NEXT: s_mov_b32 s9, s11
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4i64_v2i64__3_3_3_u:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4i64_v2i64__3_3_3_u:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4i64_v2i64__3_3_3_u:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=s"()
%vec1 = call <2 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <4 x i32> <i32 3, i32 3, i32 3, i32 poison>
@@ -4172,12 +4275,9 @@ define void @s_shuffle_v4i64_v2i64__3_3_3_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s12, s10
-; GFX942-NEXT: s_mov_b32 s13, s11
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -4190,23 +4290,57 @@ define void @s_shuffle_v4i64_v2i64__3_3_3_0() {
}
define void @s_shuffle_v4i64_v2i64__3_3_3_1() {
-; GFX9-LABEL: s_shuffle_v4i64_v2i64__3_3_3_1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[12:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s10
-; GFX9-NEXT: s_mov_b32 s9, s11
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4i64_v2i64__3_3_3_1:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[12:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4i64_v2i64__3_3_3_1:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[12:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4i64_v2i64__3_3_3_1:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[12:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=s"()
%vec1 = call <2 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <4 x i32> <i32 3, i32 3, i32 3, i32 1>
@@ -4259,14 +4393,10 @@ define void @s_shuffle_v4i64_v2i64__3_3_3_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s2
-; GFX942-NEXT: s_mov_b32 s11, s3
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -4279,44 +4409,103 @@ define void @s_shuffle_v4i64_v2i64__3_3_3_2() {
}
define void @s_shuffle_v4i64_v2i64__3_3_3_3() {
-; GFX9-LABEL: s_shuffle_v4i64_v2i64__3_3_3_3:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s10
-; GFX9-NEXT: s_mov_b32 s9, s11
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: s_mov_b32 s14, s10
-; GFX9-NEXT: s_mov_b32 s15, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <2 x i64> asm "; def $0", "=s"()
- %vec1 = call <2 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4i64_v2i64__u_0_0_0() {
-; GFX9-LABEL: s_shuffle_v4i64_v2i64__u_0_0_0:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[12:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: s_mov_b32 s14, s12
-; GFX9-NEXT: s_mov_b32 s15, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4i64_v2i64__3_3_3_3:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: s_mov_b32 s14, s10
+; GFX900-NEXT: s_mov_b32 s15, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4i64_v2i64__3_3_3_3:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: s_mov_b32 s14, s10
+; GFX90A-NEXT: s_mov_b32 s15, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4i64_v2i64__3_3_3_3:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <2 x i64> asm "; def $0", "=s"()
+ %vec1 = call <2 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4i64_v2i64__u_0_0_0() {
+; GFX900-LABEL: s_shuffle_v4i64_v2i64__u_0_0_0:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[12:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s14, s12
+; GFX900-NEXT: s_mov_b32 s15, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4i64_v2i64__u_0_0_0:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[12:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s14, s12
+; GFX90A-NEXT: s_mov_b32 s15, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4i64_v2i64__u_0_0_0:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[12:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <2 x i64> %vec0, <2 x i64> poison, <4 x i32> <i32 poison, i32 0, i32 0, i32 0>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
@@ -4324,22 +4513,53 @@ define void @s_shuffle_v4i64_v2i64__u_0_0_0() {
}
define void @s_shuffle_v4i64_v2i64__0_0_0_0() {
-; GFX9-LABEL: s_shuffle_v4i64_v2i64__0_0_0_0:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s8
-; GFX9-NEXT: s_mov_b32 s11, s9
-; GFX9-NEXT: s_mov_b32 s12, s8
-; GFX9-NEXT: s_mov_b32 s13, s9
-; GFX9-NEXT: s_mov_b32 s14, s8
-; GFX9-NEXT: s_mov_b32 s15, s9
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4i64_v2i64__0_0_0_0:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s8
+; GFX900-NEXT: s_mov_b32 s11, s9
+; GFX900-NEXT: s_mov_b32 s12, s8
+; GFX900-NEXT: s_mov_b32 s13, s9
+; GFX900-NEXT: s_mov_b32 s14, s8
+; GFX900-NEXT: s_mov_b32 s15, s9
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4i64_v2i64__0_0_0_0:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s8
+; GFX90A-NEXT: s_mov_b32 s11, s9
+; GFX90A-NEXT: s_mov_b32 s12, s8
+; GFX90A-NEXT: s_mov_b32 s13, s9
+; GFX90A-NEXT: s_mov_b32 s14, s8
+; GFX90A-NEXT: s_mov_b32 s15, s9
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4i64_v2i64__0_0_0_0:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[8:9]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[8:9]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[8:9]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <2 x i64> %vec0, <2 x i64> poison, <4 x i32> zeroinitializer
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
@@ -4347,22 +4567,53 @@ define void @s_shuffle_v4i64_v2i64__0_0_0_0() {
}
define void @s_shuffle_v4i64_v2i64__1_0_0_0() {
-; GFX9-LABEL: s_shuffle_v4i64_v2i64__1_0_0_0:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[12:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s14
-; GFX9-NEXT: s_mov_b32 s9, s15
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: s_mov_b32 s14, s12
-; GFX9-NEXT: s_mov_b32 s15, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4i64_v2i64__1_0_0_0:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[12:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s14, s12
+; GFX900-NEXT: s_mov_b32 s15, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4i64_v2i64__1_0_0_0:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[12:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s14, s12
+; GFX90A-NEXT: s_mov_b32 s15, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4i64_v2i64__1_0_0_0:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[12:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <2 x i64> %vec0, <2 x i64> poison, <4 x i32> <i32 1, i32 0, i32 0, i32 0>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
@@ -4370,20 +4621,48 @@ define void @s_shuffle_v4i64_v2i64__1_0_0_0() {
}
define void @s_shuffle_v4i64_v2i64__2_0_0_0() {
-; GFX9-LABEL: s_shuffle_v4i64_v2i64__2_0_0_0:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[12:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: s_mov_b32 s14, s12
-; GFX9-NEXT: s_mov_b32 s15, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4i64_v2i64__2_0_0_0:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[12:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s14, s12
+; GFX900-NEXT: s_mov_b32 s15, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4i64_v2i64__2_0_0_0:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[12:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s14, s12
+; GFX90A-NEXT: s_mov_b32 s15, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4i64_v2i64__2_0_0_0:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[12:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <2 x i64> %vec0, <2 x i64> poison, <4 x i32> <i32 2, i32 0, i32 0, i32 0>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
@@ -4440,12 +4719,9 @@ define void @s_shuffle_v4i64_v2i64__3_0_0_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
-; GFX942-NEXT: s_mov_b32 s14, s12
-; GFX942-NEXT: s_mov_b32 s15, s13
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -4503,10 +4779,8 @@ define void @s_shuffle_v4i64_v2i64__3_u_0_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s14, s12
-; GFX942-NEXT: s_mov_b32 s15, s13
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -4568,12 +4842,9 @@ define void @s_shuffle_v4i64_v2i64__3_1_0_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
-; GFX942-NEXT: s_mov_b32 s14, s12
-; GFX942-NEXT: s_mov_b32 s15, s13
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -4635,12 +4906,9 @@ define void @s_shuffle_v4i64_v2i64__3_2_0_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s14, s12
-; GFX942-NEXT: s_mov_b32 s15, s13
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -4653,57 +4921,91 @@ define void @s_shuffle_v4i64_v2i64__3_2_0_0() {
}
define void @s_shuffle_v4i64_v2i64__3_3_0_0() {
-; GFX9-LABEL: s_shuffle_v4i64_v2i64__3_3_0_0:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[12:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s10
-; GFX9-NEXT: s_mov_b32 s9, s11
-; GFX9-NEXT: s_mov_b32 s14, s12
-; GFX9-NEXT: s_mov_b32 s15, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <2 x i64> asm "; def $0", "=s"()
- %vec1 = call <2 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <4 x i32> <i32 3, i32 3, i32 0, i32 0>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4i64_v2i64__3_3_u_0() {
-; GFX900-LABEL: s_shuffle_v4i64_v2i64__3_3_u_0:
+; GFX900-LABEL: s_shuffle_v4i64_v2i64__3_3_0_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:11]
+; GFX900-NEXT: ; def s[12:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:7]
+; GFX900-NEXT: ; def s[8:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s10
; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s14, s4
-; GFX900-NEXT: s_mov_b32 s15, s5
+; GFX900-NEXT: s_mov_b32 s14, s12
+; GFX900-NEXT: s_mov_b32 s15, s13
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v2i64__3_3_u_0:
+; GFX90A-LABEL: s_shuffle_v4i64_v2i64__3_3_0_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:11]
+; GFX90A-NEXT: ; def s[12:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:7]
+; GFX90A-NEXT: ; def s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s14, s12
+; GFX90A-NEXT: s_mov_b32 s15, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4i64_v2i64__3_3_0_0:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[12:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <2 x i64> asm "; def $0", "=s"()
+ %vec1 = call <2 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <4 x i32> <i32 3, i32 3, i32 0, i32 0>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4i64_v2i64__3_3_u_0() {
+; GFX900-LABEL: s_shuffle_v4i64_v2i64__3_3_u_0:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:7]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s14, s4
+; GFX900-NEXT: s_mov_b32 s15, s5
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4i64_v2i64__3_3_u_0:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:7]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s10
; GFX90A-NEXT: s_mov_b32 s9, s11
@@ -4723,10 +5025,8 @@ define void @s_shuffle_v4i64_v2i64__3_3_u_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -4788,12 +5088,9 @@ define void @s_shuffle_v4i64_v2i64__3_3_1_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -4855,12 +5152,9 @@ define void @s_shuffle_v4i64_v2i64__3_3_2_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -4873,20 +5167,48 @@ define void @s_shuffle_v4i64_v2i64__3_3_2_0() {
}
define void @s_shuffle_v4i64_v2i64__u_1_1_1() {
-; GFX9-LABEL: s_shuffle_v4i64_v2i64__u_1_1_1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: s_mov_b32 s14, s10
-; GFX9-NEXT: s_mov_b32 s15, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4i64_v2i64__u_1_1_1:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: s_mov_b32 s14, s10
+; GFX900-NEXT: s_mov_b32 s15, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4i64_v2i64__u_1_1_1:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: s_mov_b32 s14, s10
+; GFX90A-NEXT: s_mov_b32 s15, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4i64_v2i64__u_1_1_1:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <2 x i64> %vec0, <2 x i64> poison, <4 x i32> <i32 poison, i32 1, i32 1, i32 1>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
@@ -4894,20 +5216,48 @@ define void @s_shuffle_v4i64_v2i64__u_1_1_1() {
}
define void @s_shuffle_v4i64_v2i64__0_1_1_1() {
-; GFX9-LABEL: s_shuffle_v4i64_v2i64__0_1_1_1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: s_mov_b32 s14, s10
-; GFX9-NEXT: s_mov_b32 s15, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4i64_v2i64__0_1_1_1:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: s_mov_b32 s14, s10
+; GFX900-NEXT: s_mov_b32 s15, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4i64_v2i64__0_1_1_1:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: s_mov_b32 s14, s10
+; GFX90A-NEXT: s_mov_b32 s15, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4i64_v2i64__0_1_1_1:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <2 x i64> %vec0, <2 x i64> poison, <4 x i32> <i32 0, i32 1, i32 1, i32 1>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
@@ -4915,22 +5265,53 @@ define void @s_shuffle_v4i64_v2i64__0_1_1_1() {
}
define void @s_shuffle_v4i64_v2i64__1_1_1_1() {
-; GFX9-LABEL: s_shuffle_v4i64_v2i64__1_1_1_1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s10
-; GFX9-NEXT: s_mov_b32 s9, s11
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: s_mov_b32 s14, s10
-; GFX9-NEXT: s_mov_b32 s15, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4i64_v2i64__1_1_1_1:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: s_mov_b32 s14, s10
+; GFX900-NEXT: s_mov_b32 s15, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4i64_v2i64__1_1_1_1:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: s_mov_b32 s14, s10
+; GFX90A-NEXT: s_mov_b32 s15, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4i64_v2i64__1_1_1_1:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <2 x i64> %vec0, <2 x i64> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
@@ -4938,20 +5319,48 @@ define void @s_shuffle_v4i64_v2i64__1_1_1_1() {
}
define void @s_shuffle_v4i64_v2i64__2_1_1_1() {
-; GFX9-LABEL: s_shuffle_v4i64_v2i64__2_1_1_1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: s_mov_b32 s14, s10
-; GFX9-NEXT: s_mov_b32 s15, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4i64_v2i64__2_1_1_1:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: s_mov_b32 s14, s10
+; GFX900-NEXT: s_mov_b32 s15, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4i64_v2i64__2_1_1_1:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: s_mov_b32 s14, s10
+; GFX90A-NEXT: s_mov_b32 s15, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4i64_v2i64__2_1_1_1:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <2 x i64> %vec0, <2 x i64> poison, <4 x i32> <i32 2, i32 1, i32 1, i32 1>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
@@ -5008,12 +5417,9 @@ define void @s_shuffle_v4i64_v2i64__3_1_1_1() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s12, s10
-; GFX942-NEXT: s_mov_b32 s13, s11
-; GFX942-NEXT: s_mov_b32 s14, s10
-; GFX942-NEXT: s_mov_b32 s15, s11
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -5071,10 +5477,8 @@ define void @s_shuffle_v4i64_v2i64__3_u_1_1() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -5136,12 +5540,9 @@ define void @s_shuffle_v4i64_v2i64__3_0_1_1() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -5203,12 +5604,9 @@ define void @s_shuffle_v4i64_v2i64__3_2_1_1() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -5221,46 +5619,111 @@ define void @s_shuffle_v4i64_v2i64__3_2_1_1() {
}
define void @s_shuffle_v4i64_v2i64__3_3_1_1() {
-; GFX9-LABEL: s_shuffle_v4i64_v2i64__3_3_1_1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[12:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s10
-; GFX9-NEXT: s_mov_b32 s9, s11
-; GFX9-NEXT: s_mov_b32 s12, s14
-; GFX9-NEXT: s_mov_b32 s13, s15
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <2 x i64> asm "; def $0", "=s"()
- %vec1 = call <2 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <4 x i32> <i32 3, i32 3, i32 1, i32 1>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
- ret void
-}
+; GFX900-LABEL: s_shuffle_v4i64_v2i64__3_3_1_1:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[12:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4i64_v2i64__3_3_1_1:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[12:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4i64_v2i64__3_3_1_1:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[12:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <2 x i64> asm "; def $0", "=s"()
+ %vec1 = call <2 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <4 x i32> <i32 3, i32 3, i32 1, i32 1>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
+ ret void
+}
define void @s_shuffle_v4i64_v2i64__3_3_u_1() {
-; GFX9-LABEL: s_shuffle_v4i64_v2i64__3_3_u_1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[12:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s10
-; GFX9-NEXT: s_mov_b32 s9, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4i64_v2i64__3_3_u_1:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[12:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4i64_v2i64__3_3_u_1:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[12:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4i64_v2i64__3_3_u_1:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[12:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=s"()
%vec1 = call <2 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <4 x i32> <i32 3, i32 3, i32 poison, i32 1>
@@ -5269,21 +5732,52 @@ define void @s_shuffle_v4i64_v2i64__3_3_u_1() {
}
define void @s_shuffle_v4i64_v2i64__3_3_0_1() {
-; GFX9-LABEL: s_shuffle_v4i64_v2i64__3_3_0_1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[12:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s10
-; GFX9-NEXT: s_mov_b32 s9, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4i64_v2i64__3_3_0_1:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[12:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4i64_v2i64__3_3_0_1:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[12:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4i64_v2i64__3_3_0_1:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[12:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=s"()
%vec1 = call <2 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <4 x i32> <i32 3, i32 3, i32 0, i32 1>
@@ -5341,12 +5835,9 @@ define void @s_shuffle_v4i64_v2i64__3_3_2_1() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -5445,8 +5936,7 @@ define void @s_shuffle_v4i64_v2i64__1_2_2_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -5472,22 +5962,53 @@ define void @s_shuffle_v4i64_v2i64__2_2_2_2() {
}
define void @s_shuffle_v4i64_v2i64__3_2_2_2() {
-; GFX9-LABEL: s_shuffle_v4i64_v2i64__3_2_2_2:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[12:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s14
-; GFX9-NEXT: s_mov_b32 s9, s15
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: s_mov_b32 s14, s12
-; GFX9-NEXT: s_mov_b32 s15, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4i64_v2i64__3_2_2_2:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[12:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s14, s12
+; GFX900-NEXT: s_mov_b32 s15, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4i64_v2i64__3_2_2_2:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[12:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s14, s12
+; GFX90A-NEXT: s_mov_b32 s15, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4i64_v2i64__3_2_2_2:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[12:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=s"()
%vec1 = call <2 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <4 x i32> <i32 3, i32 2, i32 2, i32 2>
@@ -5496,20 +6017,48 @@ define void @s_shuffle_v4i64_v2i64__3_2_2_2() {
}
define void @s_shuffle_v4i64_v2i64__3_u_2_2() {
-; GFX9-LABEL: s_shuffle_v4i64_v2i64__3_u_2_2:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[12:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s14
-; GFX9-NEXT: s_mov_b32 s9, s15
-; GFX9-NEXT: s_mov_b32 s14, s12
-; GFX9-NEXT: s_mov_b32 s15, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4i64_v2i64__3_u_2_2:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[12:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s14, s12
+; GFX900-NEXT: s_mov_b32 s15, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4i64_v2i64__3_u_2_2:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[12:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s14, s12
+; GFX90A-NEXT: s_mov_b32 s15, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4i64_v2i64__3_u_2_2:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[12:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=s"()
%vec1 = call <2 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <4 x i32> <i32 3, i32 poison, i32 2, i32 2>
@@ -5567,12 +6116,9 @@ define void @s_shuffle_v4i64_v2i64__3_0_2_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s14, s12
-; GFX942-NEXT: s_mov_b32 s15, s13
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -5585,23 +6131,57 @@ define void @s_shuffle_v4i64_v2i64__3_0_2_2() {
}
define void @s_shuffle_v4i64_v2i64__3_1_2_2() {
-; GFX9-LABEL: s_shuffle_v4i64_v2i64__3_1_2_2:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[12:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s14
-; GFX9-NEXT: s_mov_b32 s9, s15
-; GFX9-NEXT: s_mov_b32 s14, s12
-; GFX9-NEXT: s_mov_b32 s15, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4i64_v2i64__3_1_2_2:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[12:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s14, s12
+; GFX900-NEXT: s_mov_b32 s15, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4i64_v2i64__3_1_2_2:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[12:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s14, s12
+; GFX90A-NEXT: s_mov_b32 s15, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4i64_v2i64__3_1_2_2:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[12:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=s"()
%vec1 = call <2 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <4 x i32> <i32 3, i32 1, i32 2, i32 2>
@@ -5610,22 +6190,53 @@ define void @s_shuffle_v4i64_v2i64__3_1_2_2() {
}
define void @s_shuffle_v4i64_v2i64__3_3_2_2() {
-; GFX9-LABEL: s_shuffle_v4i64_v2i64__3_3_2_2:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[12:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s14
-; GFX9-NEXT: s_mov_b32 s9, s15
-; GFX9-NEXT: s_mov_b32 s10, s14
-; GFX9-NEXT: s_mov_b32 s11, s15
-; GFX9-NEXT: s_mov_b32 s14, s12
-; GFX9-NEXT: s_mov_b32 s15, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4i64_v2i64__3_3_2_2:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[12:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s10, s14
+; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: s_mov_b32 s14, s12
+; GFX900-NEXT: s_mov_b32 s15, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4i64_v2i64__3_3_2_2:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[12:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s10, s14
+; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: s_mov_b32 s14, s12
+; GFX90A-NEXT: s_mov_b32 s15, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4i64_v2i64__3_3_2_2:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[12:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=s"()
%vec1 = call <2 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <4 x i32> <i32 3, i32 3, i32 2, i32 2>
@@ -5674,12 +6285,9 @@ define void @s_shuffle_v4i64_v2i64__3_3_u_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s2
-; GFX942-NEXT: s_mov_b32 s11, s3
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -5741,12 +6349,9 @@ define void @s_shuffle_v4i64_v2i64__3_3_0_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s2
-; GFX942-NEXT: s_mov_b32 s11, s3
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -5812,14 +6417,10 @@ define void @s_shuffle_v4i64_v2i64__3_3_1_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -5832,20 +6433,48 @@ define void @s_shuffle_v4i64_v2i64__3_3_1_2() {
}
define void @s_shuffle_v4i64_v2i64__u_3_3_3() {
-; GFX9-LABEL: s_shuffle_v4i64_v2i64__u_3_3_3:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: s_mov_b32 s14, s10
-; GFX9-NEXT: s_mov_b32 s15, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4i64_v2i64__u_3_3_3:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: s_mov_b32 s14, s10
+; GFX900-NEXT: s_mov_b32 s15, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4i64_v2i64__u_3_3_3:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: s_mov_b32 s14, s10
+; GFX90A-NEXT: s_mov_b32 s15, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4i64_v2i64__u_3_3_3:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=s"()
%vec1 = call <2 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <4 x i32> <i32 poison, i32 3, i32 3, i32 3>
@@ -5854,23 +6483,57 @@ define void @s_shuffle_v4i64_v2i64__u_3_3_3() {
}
define void @s_shuffle_v4i64_v2i64__0_3_3_3() {
-; GFX9-LABEL: s_shuffle_v4i64_v2i64__0_3_3_3:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[12:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s14
-; GFX9-NEXT: s_mov_b32 s11, s15
-; GFX9-NEXT: s_mov_b32 s12, s14
-; GFX9-NEXT: s_mov_b32 s13, s15
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4i64_v2i64__0_3_3_3:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[12:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s14
+; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4i64_v2i64__0_3_3_3:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[12:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s14
+; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4i64_v2i64__0_3_3_3:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[12:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=s"()
%vec1 = call <2 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <4 x i32> <i32 0, i32 3, i32 3, i32 3>
@@ -5928,12 +6591,9 @@ define void @s_shuffle_v4i64_v2i64__1_3_3_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s12, s10
-; GFX942-NEXT: s_mov_b32 s13, s11
-; GFX942-NEXT: s_mov_b32 s14, s10
-; GFX942-NEXT: s_mov_b32 s15, s11
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -5946,78 +6606,134 @@ define void @s_shuffle_v4i64_v2i64__1_3_3_3() {
}
define void @s_shuffle_v4i64_v2i64__2_3_3_3() {
-; GFX9-LABEL: s_shuffle_v4i64_v2i64__2_3_3_3:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: s_mov_b32 s14, s10
-; GFX9-NEXT: s_mov_b32 s15, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <2 x i64> asm "; def $0", "=s"()
- %vec1 = call <2 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <4 x i32> <i32 2, i32 3, i32 3, i32 3>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4i64_v2i64__3_u_3_3() {
-; GFX9-LABEL: s_shuffle_v4i64_v2i64__3_u_3_3:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[12:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s14
-; GFX9-NEXT: s_mov_b32 s9, s15
-; GFX9-NEXT: s_mov_b32 s12, s14
-; GFX9-NEXT: s_mov_b32 s13, s15
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <2 x i64> asm "; def $0", "=s"()
- %vec1 = call <2 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <4 x i32> <i32 3, i32 poison, i32 3, i32 3>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4i64_v2i64__3_0_3_3() {
-; GFX900-LABEL: s_shuffle_v4i64_v2i64__3_0_3_3:
+; GFX900-LABEL: s_shuffle_v4i64_v2i64__2_3_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:7]
+; GFX900-NEXT: ; def s[8:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s14
-; GFX900-NEXT: s_mov_b32 s9, s15
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: s_mov_b32 s14, s10
+; GFX900-NEXT: s_mov_b32 s15, s11
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v2i64__3_0_3_3:
+; GFX90A-LABEL: s_shuffle_v4i64_v2i64__2_3_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:15]
+; GFX90A-NEXT: ; def s[8:11]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: s_mov_b32 s14, s10
+; GFX90A-NEXT: s_mov_b32 s15, s11
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:7]
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4i64_v2i64__2_3_3_3:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <2 x i64> asm "; def $0", "=s"()
+ %vec1 = call <2 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <4 x i32> <i32 2, i32 3, i32 3, i32 3>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4i64_v2i64__3_u_3_3() {
+; GFX900-LABEL: s_shuffle_v4i64_v2i64__3_u_3_3:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[12:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4i64_v2i64__3_u_3_3:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[12:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4i64_v2i64__3_u_3_3:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[12:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <2 x i64> asm "; def $0", "=s"()
+ %vec1 = call <2 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <4 x i32> <i32 3, i32 poison, i32 3, i32 3>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4i64_v2i64__3_0_3_3() {
+; GFX900-LABEL: s_shuffle_v4i64_v2i64__3_0_3_3:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[12:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:7]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s10, s4
+; GFX900-NEXT: s_mov_b32 s11, s5
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4i64_v2i64__3_0_3_3:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[12:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:7]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s14
; GFX90A-NEXT: s_mov_b32 s9, s15
@@ -6039,12 +6755,9 @@ define void @s_shuffle_v4i64_v2i64__3_0_3_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -6057,23 +6770,57 @@ define void @s_shuffle_v4i64_v2i64__3_0_3_3() {
}
define void @s_shuffle_v4i64_v2i64__3_1_3_3() {
-; GFX9-LABEL: s_shuffle_v4i64_v2i64__3_1_3_3:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[12:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s14
-; GFX9-NEXT: s_mov_b32 s9, s15
-; GFX9-NEXT: s_mov_b32 s12, s14
-; GFX9-NEXT: s_mov_b32 s13, s15
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4i64_v2i64__3_1_3_3:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[12:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4i64_v2i64__3_1_3_3:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[12:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4i64_v2i64__3_1_3_3:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[12:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=s"()
%vec1 = call <2 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <4 x i32> <i32 3, i32 1, i32 3, i32 3>
@@ -6082,22 +6829,53 @@ define void @s_shuffle_v4i64_v2i64__3_1_3_3() {
}
define void @s_shuffle_v4i64_v2i64__3_2_3_3() {
-; GFX9-LABEL: s_shuffle_v4i64_v2i64__3_2_3_3:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[12:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s14
-; GFX9-NEXT: s_mov_b32 s9, s15
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: s_mov_b32 s12, s14
-; GFX9-NEXT: s_mov_b32 s13, s15
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4i64_v2i64__3_2_3_3:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[12:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4i64_v2i64__3_2_3_3:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[12:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4i64_v2i64__3_2_3_3:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[12:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=s"()
%vec1 = call <2 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <4 x i32> <i32 3, i32 2, i32 3, i32 3>
@@ -6106,20 +6884,48 @@ define void @s_shuffle_v4i64_v2i64__3_2_3_3() {
}
define void @s_shuffle_v4i64_v2i64__3_3_u_3() {
-; GFX9-LABEL: s_shuffle_v4i64_v2i64__3_3_u_3:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s10
-; GFX9-NEXT: s_mov_b32 s9, s11
-; GFX9-NEXT: s_mov_b32 s14, s10
-; GFX9-NEXT: s_mov_b32 s15, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4i64_v2i64__3_3_u_3:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s14, s10
+; GFX900-NEXT: s_mov_b32 s15, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4i64_v2i64__3_3_u_3:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s14, s10
+; GFX90A-NEXT: s_mov_b32 s15, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4i64_v2i64__3_3_u_3:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=s"()
%vec1 = call <2 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <4 x i32> <i32 3, i32 3, i32 poison, i32 3>
@@ -6128,23 +6934,57 @@ define void @s_shuffle_v4i64_v2i64__3_3_u_3() {
}
define void @s_shuffle_v4i64_v2i64__3_3_0_3() {
-; GFX9-LABEL: s_shuffle_v4i64_v2i64__3_3_0_3:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[12:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s10
-; GFX9-NEXT: s_mov_b32 s9, s11
-; GFX9-NEXT: s_mov_b32 s14, s10
-; GFX9-NEXT: s_mov_b32 s15, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4i64_v2i64__3_3_0_3:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[12:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s14, s10
+; GFX900-NEXT: s_mov_b32 s15, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4i64_v2i64__3_3_0_3:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[12:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s14, s10
+; GFX90A-NEXT: s_mov_b32 s15, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4i64_v2i64__3_3_0_3:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[12:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=s"()
%vec1 = call <2 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <4 x i32> <i32 3, i32 3, i32 0, i32 3>
@@ -6202,12 +7042,9 @@ define void @s_shuffle_v4i64_v2i64__3_3_1_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s10
-; GFX942-NEXT: s_mov_b32 s15, s11
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -6220,20 +7057,48 @@ define void @s_shuffle_v4i64_v2i64__3_3_1_3() {
}
define void @s_shuffle_v4i64_v2i64__3_3_2_3() {
-; GFX9-LABEL: s_shuffle_v4i64_v2i64__3_3_2_3:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[12:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s14
-; GFX9-NEXT: s_mov_b32 s9, s15
-; GFX9-NEXT: s_mov_b32 s10, s14
-; GFX9-NEXT: s_mov_b32 s11, s15
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4i64_v2i64__3_3_2_3:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[12:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s10, s14
+; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4i64_v2i64__3_3_2_3:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[12:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s10, s14
+; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4i64_v2i64__3_3_2_3:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[12:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=s"()
%vec1 = call <2 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <4 x i32> <i32 3, i32 3, i32 2, i32 3>
diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v4i64.v3i64.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v4i64.v3i64.ll
index 8dd4a40d00680..1a295a4c6e8ed 100644
--- a/llvm/test/CodeGen/AMDGPU/shufflevector.v4i64.v3i64.ll
+++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v4i64.v3i64.ll
@@ -7818,8 +7818,7 @@ define void @s_shuffle_v4i64_v3i64__1_u_u_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -7859,8 +7858,7 @@ define void @s_shuffle_v4i64_v3i64__2_u_u_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -7918,8 +7916,7 @@ define void @s_shuffle_v4i64_v3i64__4_u_u_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -7960,8 +7957,7 @@ define void @s_shuffle_v4i64_v3i64__5_u_u_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -8016,11 +8012,11 @@ define void @s_shuffle_v4i64_v3i64__5_0_u_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s0
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -8070,8 +8066,7 @@ define void @s_shuffle_v4i64_v3i64__5_1_u_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -8125,10 +8120,8 @@ define void @s_shuffle_v4i64_v3i64__5_2_u_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -8173,10 +8166,8 @@ define void @s_shuffle_v4i64_v3i64__5_3_u_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -8189,18 +8180,43 @@ define void @s_shuffle_v4i64_v3i64__5_3_u_u() {
}
define void @s_shuffle_v4i64_v3i64__5_4_u_u() {
-; GFX9-LABEL: s_shuffle_v4i64_v3i64__5_4_u_u:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s12
-; GFX9-NEXT: s_mov_b32 s9, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_4_u_u:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s12
+; GFX900-NEXT: s_mov_b32 s9, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_4_u_u:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s12
+; GFX90A-NEXT: s_mov_b32 s9, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_4_u_u:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=s"()
%vec1 = call <3 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 4, i32 poison, i32 poison>
@@ -8245,10 +8261,8 @@ define void @s_shuffle_v4i64_v3i64__5_5_u_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -8310,12 +8324,9 @@ define void @s_shuffle_v4i64_v3i64__5_5_0_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -8377,12 +8388,9 @@ define void @s_shuffle_v4i64_v3i64__5_5_1_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -8440,10 +8448,8 @@ define void @s_shuffle_v4i64_v3i64__5_5_2_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -8492,12 +8498,9 @@ define void @s_shuffle_v4i64_v3i64__5_5_3_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -8550,12 +8553,9 @@ define void @s_shuffle_v4i64_v3i64__5_5_4_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -8568,20 +8568,48 @@ define void @s_shuffle_v4i64_v3i64__5_5_4_u() {
}
define void @s_shuffle_v4i64_v3i64__5_5_5_u() {
-; GFX9-LABEL: s_shuffle_v4i64_v3i64__5_5_5_u:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s12
-; GFX9-NEXT: s_mov_b32 s9, s13
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_5_5_u:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s12
+; GFX900-NEXT: s_mov_b32 s9, s13
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_5_5_u:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s12
+; GFX90A-NEXT: s_mov_b32 s9, s13
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_5_5_u:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=s"()
%vec1 = call <3 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 5, i32 poison>
@@ -8639,12 +8667,9 @@ define void @s_shuffle_v4i64_v3i64__5_5_5_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -8706,12 +8731,9 @@ define void @s_shuffle_v4i64_v3i64__5_5_5_1() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -8773,12 +8795,9 @@ define void @s_shuffle_v4i64_v3i64__5_5_5_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -8835,14 +8854,10 @@ define void @s_shuffle_v4i64_v3i64__5_5_5_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s4
-; GFX942-NEXT: s_mov_b32 s13, s5
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -8895,14 +8910,10 @@ define void @s_shuffle_v4i64_v3i64__5_5_5_4() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s4
-; GFX942-NEXT: s_mov_b32 s13, s5
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -8915,22 +8926,53 @@ define void @s_shuffle_v4i64_v3i64__5_5_5_4() {
}
define void @s_shuffle_v4i64_v3i64__5_5_5_5() {
-; GFX9-LABEL: s_shuffle_v4i64_v3i64__5_5_5_5:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s12
-; GFX9-NEXT: s_mov_b32 s9, s13
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: s_mov_b32 s14, s12
-; GFX9-NEXT: s_mov_b32 s15, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_5_5_5:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s12
+; GFX900-NEXT: s_mov_b32 s9, s13
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s14, s12
+; GFX900-NEXT: s_mov_b32 s15, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_5_5_5:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s12
+; GFX90A-NEXT: s_mov_b32 s9, s13
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s14, s12
+; GFX90A-NEXT: s_mov_b32 s15, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_5_5_5:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=s"()
%vec1 = call <3 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 5, i32 5>
@@ -8979,12 +9021,9 @@ define void @s_shuffle_v4i64_v3i64__u_0_0_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -8996,22 +9035,53 @@ define void @s_shuffle_v4i64_v3i64__u_0_0_0() {
}
define void @s_shuffle_v4i64_v3i64__0_0_0_0() {
-; GFX9-LABEL: s_shuffle_v4i64_v3i64__0_0_0_0:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s8
-; GFX9-NEXT: s_mov_b32 s11, s9
-; GFX9-NEXT: s_mov_b32 s12, s8
-; GFX9-NEXT: s_mov_b32 s13, s9
-; GFX9-NEXT: s_mov_b32 s14, s8
-; GFX9-NEXT: s_mov_b32 s15, s9
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4i64_v3i64__0_0_0_0:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s8
+; GFX900-NEXT: s_mov_b32 s11, s9
+; GFX900-NEXT: s_mov_b32 s12, s8
+; GFX900-NEXT: s_mov_b32 s13, s9
+; GFX900-NEXT: s_mov_b32 s14, s8
+; GFX900-NEXT: s_mov_b32 s15, s9
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4i64_v3i64__0_0_0_0:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s8
+; GFX90A-NEXT: s_mov_b32 s11, s9
+; GFX90A-NEXT: s_mov_b32 s12, s8
+; GFX90A-NEXT: s_mov_b32 s13, s9
+; GFX90A-NEXT: s_mov_b32 s14, s8
+; GFX90A-NEXT: s_mov_b32 s15, s9
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4i64_v3i64__0_0_0_0:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[8:9]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[8:9]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[8:9]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> zeroinitializer
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
@@ -9063,14 +9133,10 @@ define void @s_shuffle_v4i64_v3i64__1_0_0_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -9122,14 +9188,10 @@ define void @s_shuffle_v4i64_v3i64__2_0_0_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -9181,12 +9243,9 @@ define void @s_shuffle_v4i64_v3i64__3_0_0_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -9248,17 +9307,13 @@ define void @s_shuffle_v4i64_v3i64__4_0_0_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s0
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -9321,15 +9376,12 @@ define void @s_shuffle_v4i64_v3i64__5_0_0_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s0
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -9388,13 +9440,11 @@ define void @s_shuffle_v4i64_v3i64__5_u_0_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s0
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -9457,15 +9507,12 @@ define void @s_shuffle_v4i64_v3i64__5_1_0_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s2
+; GFX942-NEXT: s_mov_b64 s[10:11], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s11, s3
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -9527,14 +9574,10 @@ define void @s_shuffle_v4i64_v3i64__5_2_0_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -9597,15 +9640,12 @@ define void @s_shuffle_v4i64_v3i64__5_3_0_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s0
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -9667,12 +9707,9 @@ define void @s_shuffle_v4i64_v3i64__5_4_0_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -9738,14 +9775,10 @@ define void @s_shuffle_v4i64_v3i64__5_5_0_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -9807,12 +9840,9 @@ define void @s_shuffle_v4i64_v3i64__5_5_u_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -9878,14 +9908,10 @@ define void @s_shuffle_v4i64_v3i64__5_5_1_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -9951,14 +9977,10 @@ define void @s_shuffle_v4i64_v3i64__5_5_2_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
-; GFX942-NEXT: s_mov_b32 s12, s4
-; GFX942-NEXT: s_mov_b32 s13, s5
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -10020,12 +10042,9 @@ define void @s_shuffle_v4i64_v3i64__5_5_3_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s16
-; GFX942-NEXT: s_mov_b32 s9, s17
-; GFX942-NEXT: s_mov_b32 s10, s16
-; GFX942-NEXT: s_mov_b32 s11, s17
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[16:17]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[16:17]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -10091,14 +10110,10 @@ define void @s_shuffle_v4i64_v3i64__5_5_4_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s16
-; GFX942-NEXT: s_mov_b32 s9, s17
-; GFX942-NEXT: s_mov_b32 s10, s16
-; GFX942-NEXT: s_mov_b32 s11, s17
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[16:17]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[16:17]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -10111,126 +10126,12 @@ define void @s_shuffle_v4i64_v3i64__5_5_4_0() {
}
define void @s_shuffle_v4i64_v3i64__u_1_1_1() {
-; GFX9-LABEL: s_shuffle_v4i64_v3i64__u_1_1_1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: s_mov_b32 s14, s10
-; GFX9-NEXT: s_mov_b32 s15, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <3 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 poison, i32 1, i32 1, i32 1>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4i64_v3i64__0_1_1_1() {
-; GFX9-LABEL: s_shuffle_v4i64_v3i64__0_1_1_1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: s_mov_b32 s14, s10
-; GFX9-NEXT: s_mov_b32 s15, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <3 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 0, i32 1, i32 1, i32 1>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4i64_v3i64__1_1_1_1() {
-; GFX9-LABEL: s_shuffle_v4i64_v3i64__1_1_1_1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s10
-; GFX9-NEXT: s_mov_b32 s9, s11
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: s_mov_b32 s14, s10
-; GFX9-NEXT: s_mov_b32 s15, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <3 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4i64_v3i64__2_1_1_1() {
-; GFX9-LABEL: s_shuffle_v4i64_v3i64__2_1_1_1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s12
-; GFX9-NEXT: s_mov_b32 s9, s13
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: s_mov_b32 s14, s10
-; GFX9-NEXT: s_mov_b32 s15, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <3 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 2, i32 1, i32 1, i32 1>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4i64_v3i64__3_1_1_1() {
-; GFX9-LABEL: s_shuffle_v4i64_v3i64__3_1_1_1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: s_mov_b32 s14, s10
-; GFX9-NEXT: s_mov_b32 s15, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <3 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 3, i32 1, i32 1, i32 1>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4i64_v3i64__4_1_1_1() {
-; GFX900-LABEL: s_shuffle_v4i64_v3i64__4_1_1_1:
+; GFX900-LABEL: s_shuffle_v4i64_v3i64__u_1_1_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:13]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s6
-; GFX900-NEXT: s_mov_b32 s9, s7
; GFX900-NEXT: s_mov_b32 s12, s10
; GFX900-NEXT: s_mov_b32 s13, s11
; GFX900-NEXT: s_mov_b32 s14, s10
@@ -10240,13 +10141,273 @@ define void @s_shuffle_v4i64_v3i64__4_1_1_1() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v3i64__4_1_1_1:
+; GFX90A-LABEL: s_shuffle_v4i64_v3i64__u_1_1_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:13]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: s_mov_b32 s14, s10
+; GFX90A-NEXT: s_mov_b32 s15, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4i64_v3i64__u_1_1_1:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <3 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 poison, i32 1, i32 1, i32 1>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4i64_v3i64__0_1_1_1() {
+; GFX900-LABEL: s_shuffle_v4i64_v3i64__0_1_1_1:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: s_mov_b32 s14, s10
+; GFX900-NEXT: s_mov_b32 s15, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4i64_v3i64__0_1_1_1:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: s_mov_b32 s14, s10
+; GFX90A-NEXT: s_mov_b32 s15, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4i64_v3i64__0_1_1_1:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <3 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 0, i32 1, i32 1, i32 1>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4i64_v3i64__1_1_1_1() {
+; GFX900-LABEL: s_shuffle_v4i64_v3i64__1_1_1_1:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: s_mov_b32 s14, s10
+; GFX900-NEXT: s_mov_b32 s15, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4i64_v3i64__1_1_1_1:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: s_mov_b32 s14, s10
+; GFX90A-NEXT: s_mov_b32 s15, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4i64_v3i64__1_1_1_1:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <3 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4i64_v3i64__2_1_1_1() {
+; GFX900-LABEL: s_shuffle_v4i64_v3i64__2_1_1_1:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s12
+; GFX900-NEXT: s_mov_b32 s9, s13
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: s_mov_b32 s14, s10
+; GFX900-NEXT: s_mov_b32 s15, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4i64_v3i64__2_1_1_1:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s12
+; GFX90A-NEXT: s_mov_b32 s9, s13
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: s_mov_b32 s14, s10
+; GFX90A-NEXT: s_mov_b32 s15, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4i64_v3i64__2_1_1_1:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <3 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 2, i32 1, i32 1, i32 1>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4i64_v3i64__3_1_1_1() {
+; GFX900-LABEL: s_shuffle_v4i64_v3i64__3_1_1_1:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: s_mov_b32 s14, s10
+; GFX900-NEXT: s_mov_b32 s15, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4i64_v3i64__3_1_1_1:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: s_mov_b32 s14, s10
+; GFX90A-NEXT: s_mov_b32 s15, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4i64_v3i64__3_1_1_1:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <3 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 3, i32 1, i32 1, i32 1>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4i64_v3i64__4_1_1_1() {
+; GFX900-LABEL: s_shuffle_v4i64_v3i64__4_1_1_1:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:9]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s6
+; GFX900-NEXT: s_mov_b32 s9, s7
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: s_mov_b32 s14, s10
+; GFX900-NEXT: s_mov_b32 s15, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4i64_v3i64__4_1_1_1:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:9]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s6
@@ -10269,12 +10430,9 @@ define void @s_shuffle_v4i64_v3i64__4_1_1_1() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s12, s10
-; GFX942-NEXT: s_mov_b32 s13, s11
-; GFX942-NEXT: s_mov_b32 s14, s10
-; GFX942-NEXT: s_mov_b32 s15, s11
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -10332,12 +10490,9 @@ define void @s_shuffle_v4i64_v3i64__5_1_1_1() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s12, s10
-; GFX942-NEXT: s_mov_b32 s13, s11
-; GFX942-NEXT: s_mov_b32 s14, s10
-; GFX942-NEXT: s_mov_b32 s15, s11
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -10396,13 +10551,11 @@ define void @s_shuffle_v4i64_v3i64__5_u_1_1() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s2
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -10465,15 +10618,12 @@ define void @s_shuffle_v4i64_v3i64__5_0_1_1() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s0
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -10531,14 +10681,10 @@ define void @s_shuffle_v4i64_v3i64__5_2_1_1() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -10601,15 +10747,12 @@ define void @s_shuffle_v4i64_v3i64__5_3_1_1() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s2
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -10671,12 +10814,9 @@ define void @s_shuffle_v4i64_v3i64__5_4_1_1() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -10742,14 +10882,10 @@ define void @s_shuffle_v4i64_v3i64__5_5_1_1() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -10811,12 +10947,9 @@ define void @s_shuffle_v4i64_v3i64__5_5_u_1() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -10882,14 +11015,10 @@ define void @s_shuffle_v4i64_v3i64__5_5_0_1() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -10951,14 +11080,10 @@ define void @s_shuffle_v4i64_v3i64__5_5_2_1() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
-; GFX942-NEXT: s_mov_b32 s12, s4
-; GFX942-NEXT: s_mov_b32 s13, s5
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -11020,12 +11145,9 @@ define void @s_shuffle_v4i64_v3i64__5_5_3_1() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s16
-; GFX942-NEXT: s_mov_b32 s9, s17
-; GFX942-NEXT: s_mov_b32 s10, s16
-; GFX942-NEXT: s_mov_b32 s11, s17
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[16:17]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[16:17]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -11091,14 +11213,10 @@ define void @s_shuffle_v4i64_v3i64__5_5_4_1() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s16
-; GFX942-NEXT: s_mov_b32 s9, s17
-; GFX942-NEXT: s_mov_b32 s10, s16
-; GFX942-NEXT: s_mov_b32 s11, s17
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[16:17]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[16:17]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -11111,20 +11229,48 @@ define void @s_shuffle_v4i64_v3i64__5_5_4_1() {
}
define void @s_shuffle_v4i64_v3i64__u_2_2_2() {
-; GFX9-LABEL: s_shuffle_v4i64_v3i64__u_2_2_2:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: s_mov_b32 s14, s12
-; GFX9-NEXT: s_mov_b32 s15, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4i64_v3i64__u_2_2_2:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s14, s12
+; GFX900-NEXT: s_mov_b32 s15, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4i64_v3i64__u_2_2_2:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s14, s12
+; GFX90A-NEXT: s_mov_b32 s15, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4i64_v3i64__u_2_2_2:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 poison, i32 2, i32 2, i32 2>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
@@ -11132,20 +11278,48 @@ define void @s_shuffle_v4i64_v3i64__u_2_2_2() {
}
define void @s_shuffle_v4i64_v3i64__0_2_2_2() {
-; GFX9-LABEL: s_shuffle_v4i64_v3i64__0_2_2_2:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: s_mov_b32 s14, s12
-; GFX9-NEXT: s_mov_b32 s15, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4i64_v3i64__0_2_2_2:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s14, s12
+; GFX900-NEXT: s_mov_b32 s15, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4i64_v3i64__0_2_2_2:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s14, s12
+; GFX90A-NEXT: s_mov_b32 s15, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4i64_v3i64__0_2_2_2:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 0, i32 2, i32 2, i32 2>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
@@ -11153,22 +11327,53 @@ define void @s_shuffle_v4i64_v3i64__0_2_2_2() {
}
define void @s_shuffle_v4i64_v3i64__1_2_2_2() {
-; GFX9-LABEL: s_shuffle_v4i64_v3i64__1_2_2_2:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s10
-; GFX9-NEXT: s_mov_b32 s9, s11
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: s_mov_b32 s14, s12
-; GFX9-NEXT: s_mov_b32 s15, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4i64_v3i64__1_2_2_2:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s14, s12
+; GFX900-NEXT: s_mov_b32 s15, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4i64_v3i64__1_2_2_2:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s14, s12
+; GFX90A-NEXT: s_mov_b32 s15, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4i64_v3i64__1_2_2_2:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 1, i32 2, i32 2, i32 2>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
@@ -11176,22 +11381,53 @@ define void @s_shuffle_v4i64_v3i64__1_2_2_2() {
}
define void @s_shuffle_v4i64_v3i64__2_2_2_2() {
-; GFX9-LABEL: s_shuffle_v4i64_v3i64__2_2_2_2:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s12
-; GFX9-NEXT: s_mov_b32 s9, s13
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: s_mov_b32 s14, s12
-; GFX9-NEXT: s_mov_b32 s15, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4i64_v3i64__2_2_2_2:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s12
+; GFX900-NEXT: s_mov_b32 s9, s13
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s14, s12
+; GFX900-NEXT: s_mov_b32 s15, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4i64_v3i64__2_2_2_2:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s12
+; GFX90A-NEXT: s_mov_b32 s9, s13
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s14, s12
+; GFX90A-NEXT: s_mov_b32 s15, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4i64_v3i64__2_2_2_2:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
@@ -11199,20 +11435,48 @@ define void @s_shuffle_v4i64_v3i64__2_2_2_2() {
}
define void @s_shuffle_v4i64_v3i64__3_2_2_2() {
-; GFX9-LABEL: s_shuffle_v4i64_v3i64__3_2_2_2:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: s_mov_b32 s14, s12
-; GFX9-NEXT: s_mov_b32 s15, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4i64_v3i64__3_2_2_2:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s14, s12
+; GFX900-NEXT: s_mov_b32 s15, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4i64_v3i64__3_2_2_2:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s14, s12
+; GFX90A-NEXT: s_mov_b32 s15, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4i64_v3i64__3_2_2_2:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 3, i32 2, i32 2, i32 2>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
@@ -11269,12 +11533,9 @@ define void @s_shuffle_v4i64_v3i64__4_2_2_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
-; GFX942-NEXT: s_mov_b32 s14, s12
-; GFX942-NEXT: s_mov_b32 s15, s13
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -11332,12 +11593,9 @@ define void @s_shuffle_v4i64_v3i64__5_2_2_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
-; GFX942-NEXT: s_mov_b32 s14, s12
-; GFX942-NEXT: s_mov_b32 s15, s13
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -11391,10 +11649,8 @@ define void @s_shuffle_v4i64_v3i64__5_u_2_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s14, s12
-; GFX942-NEXT: s_mov_b32 s15, s13
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -11456,14 +11712,10 @@ define void @s_shuffle_v4i64_v3i64__5_0_2_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s4
-; GFX942-NEXT: s_mov_b32 s13, s5
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -11517,10 +11769,8 @@ define void @s_shuffle_v4i64_v3i64__5_1_2_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s14, s12
-; GFX942-NEXT: s_mov_b32 s15, s13
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -11578,12 +11828,9 @@ define void @s_shuffle_v4i64_v3i64__5_3_2_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s14, s12
-; GFX942-NEXT: s_mov_b32 s15, s13
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -11645,12 +11892,9 @@ define void @s_shuffle_v4i64_v3i64__5_4_2_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s12, s4
-; GFX942-NEXT: s_mov_b32 s13, s5
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -11712,12 +11956,9 @@ define void @s_shuffle_v4i64_v3i64__5_5_2_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s14, s12
-; GFX942-NEXT: s_mov_b32 s15, s13
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -11779,12 +12020,9 @@ define void @s_shuffle_v4i64_v3i64__5_5_u_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -11846,14 +12084,10 @@ define void @s_shuffle_v4i64_v3i64__5_5_0_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -11919,14 +12153,10 @@ define void @s_shuffle_v4i64_v3i64__5_5_1_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -11992,12 +12222,9 @@ define void @s_shuffle_v4i64_v3i64__5_5_3_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s16
-; GFX942-NEXT: s_mov_b32 s9, s17
-; GFX942-NEXT: s_mov_b32 s10, s16
-; GFX942-NEXT: s_mov_b32 s11, s17
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[16:17]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[16:17]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -12063,14 +12290,10 @@ define void @s_shuffle_v4i64_v3i64__5_5_4_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s16
-; GFX942-NEXT: s_mov_b32 s9, s17
-; GFX942-NEXT: s_mov_b32 s10, s16
-; GFX942-NEXT: s_mov_b32 s11, s17
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[16:17]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[16:17]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -12169,8 +12392,7 @@ define void @s_shuffle_v4i64_v3i64__1_3_3_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -12210,8 +12432,7 @@ define void @s_shuffle_v4i64_v3i64__2_3_3_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -12281,14 +12502,10 @@ define void @s_shuffle_v4i64_v3i64__4_3_3_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -12341,14 +12558,10 @@ define void @s_shuffle_v4i64_v3i64__5_3_3_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -12397,12 +12610,9 @@ define void @s_shuffle_v4i64_v3i64__5_u_3_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -12465,15 +12675,12 @@ define void @s_shuffle_v4i64_v3i64__5_0_3_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s0
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s4
-; GFX942-NEXT: s_mov_b32 s13, s5
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
+; GFX942-NEXT: s_mov_b64 s[12:13], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -12531,12 +12738,9 @@ define void @s_shuffle_v4i64_v3i64__5_1_3_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -12598,14 +12802,10 @@ define void @s_shuffle_v4i64_v3i64__5_2_3_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[16:21]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s20
-; GFX942-NEXT: s_mov_b32 s9, s21
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s16
-; GFX942-NEXT: s_mov_b32 s13, s17
-; GFX942-NEXT: s_mov_b32 s14, s16
-; GFX942-NEXT: s_mov_b32 s15, s17
+; GFX942-NEXT: s_mov_b64 s[8:9], s[20:21]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[16:17]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[16:17]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -12658,14 +12858,10 @@ define void @s_shuffle_v4i64_v3i64__5_4_3_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s2
-; GFX942-NEXT: s_mov_b32 s11, s3
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -12722,14 +12918,10 @@ define void @s_shuffle_v4i64_v3i64__5_5_3_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -12782,12 +12974,9 @@ define void @s_shuffle_v4i64_v3i64__5_5_u_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -12853,14 +13042,10 @@ define void @s_shuffle_v4i64_v3i64__5_5_0_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[16:21]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s20
-; GFX942-NEXT: s_mov_b32 s9, s21
-; GFX942-NEXT: s_mov_b32 s10, s20
-; GFX942-NEXT: s_mov_b32 s11, s21
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s16
-; GFX942-NEXT: s_mov_b32 s15, s17
+; GFX942-NEXT: s_mov_b64 s[8:9], s[20:21]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[20:21]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[16:17]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -12926,14 +13111,10 @@ define void @s_shuffle_v4i64_v3i64__5_5_1_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[16:21]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s20
-; GFX942-NEXT: s_mov_b32 s9, s21
-; GFX942-NEXT: s_mov_b32 s10, s20
-; GFX942-NEXT: s_mov_b32 s11, s21
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s16
-; GFX942-NEXT: s_mov_b32 s15, s17
+; GFX942-NEXT: s_mov_b64 s[8:9], s[20:21]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[20:21]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[16:17]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -12995,12 +13176,9 @@ define void @s_shuffle_v4i64_v3i64__5_5_2_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -13057,14 +13235,10 @@ define void @s_shuffle_v4i64_v3i64__5_5_4_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -13077,20 +13251,48 @@ define void @s_shuffle_v4i64_v3i64__5_5_4_3() {
}
define void @s_shuffle_v4i64_v3i64__u_4_4_4() {
-; GFX9-LABEL: s_shuffle_v4i64_v3i64__u_4_4_4:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: s_mov_b32 s14, s10
-; GFX9-NEXT: s_mov_b32 s15, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4i64_v3i64__u_4_4_4:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: s_mov_b32 s14, s10
+; GFX900-NEXT: s_mov_b32 s15, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4i64_v3i64__u_4_4_4:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: s_mov_b32 s14, s10
+; GFX90A-NEXT: s_mov_b32 s15, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4i64_v3i64__u_4_4_4:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=s"()
%vec1 = call <3 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 poison, i32 4, i32 4, i32 4>
@@ -13144,12 +13346,9 @@ define void @s_shuffle_v4i64_v3i64__0_4_4_4() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s2
-; GFX942-NEXT: s_mov_b32 s11, s3
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b64 s[10:11], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -13211,12 +13410,9 @@ define void @s_shuffle_v4i64_v3i64__1_4_4_4() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s12, s10
-; GFX942-NEXT: s_mov_b32 s13, s11
-; GFX942-NEXT: s_mov_b32 s14, s10
-; GFX942-NEXT: s_mov_b32 s15, s11
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -13278,12 +13474,9 @@ define void @s_shuffle_v4i64_v3i64__2_4_4_4() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s12, s10
-; GFX942-NEXT: s_mov_b32 s13, s11
-; GFX942-NEXT: s_mov_b32 s14, s10
-; GFX942-NEXT: s_mov_b32 s15, s11
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -13296,20 +13489,48 @@ define void @s_shuffle_v4i64_v3i64__2_4_4_4() {
}
define void @s_shuffle_v4i64_v3i64__3_4_4_4() {
-; GFX9-LABEL: s_shuffle_v4i64_v3i64__3_4_4_4:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: s_mov_b32 s14, s10
-; GFX9-NEXT: s_mov_b32 s15, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4i64_v3i64__3_4_4_4:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: s_mov_b32 s14, s10
+; GFX900-NEXT: s_mov_b32 s15, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4i64_v3i64__3_4_4_4:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: s_mov_b32 s14, s10
+; GFX90A-NEXT: s_mov_b32 s15, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4i64_v3i64__3_4_4_4:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=s"()
%vec1 = call <3 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 3, i32 4, i32 4, i32 4>
@@ -13318,22 +13539,53 @@ define void @s_shuffle_v4i64_v3i64__3_4_4_4() {
}
define void @s_shuffle_v4i64_v3i64__4_4_4_4() {
-; GFX9-LABEL: s_shuffle_v4i64_v3i64__4_4_4_4:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s10
-; GFX9-NEXT: s_mov_b32 s9, s11
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: s_mov_b32 s14, s10
-; GFX9-NEXT: s_mov_b32 s15, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4i64_v3i64__4_4_4_4:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: s_mov_b32 s14, s10
+; GFX900-NEXT: s_mov_b32 s15, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4i64_v3i64__4_4_4_4:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: s_mov_b32 s14, s10
+; GFX90A-NEXT: s_mov_b32 s15, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4i64_v3i64__4_4_4_4:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=s"()
%vec1 = call <3 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 4, i32 4, i32 4, i32 4>
@@ -13342,22 +13594,53 @@ define void @s_shuffle_v4i64_v3i64__4_4_4_4() {
}
define void @s_shuffle_v4i64_v3i64__5_4_4_4() {
-; GFX9-LABEL: s_shuffle_v4i64_v3i64__5_4_4_4:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s12
-; GFX9-NEXT: s_mov_b32 s9, s13
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: s_mov_b32 s14, s10
-; GFX9-NEXT: s_mov_b32 s15, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_4_4_4:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s12
+; GFX900-NEXT: s_mov_b32 s9, s13
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: s_mov_b32 s14, s10
+; GFX900-NEXT: s_mov_b32 s15, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_4_4_4:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s12
+; GFX90A-NEXT: s_mov_b32 s9, s13
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: s_mov_b32 s14, s10
+; GFX90A-NEXT: s_mov_b32 s15, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_4_4_4:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=s"()
%vec1 = call <3 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 4, i32 4, i32 4>
@@ -13402,12 +13685,9 @@ define void @s_shuffle_v4i64_v3i64__5_u_4_4() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -13466,15 +13746,12 @@ define void @s_shuffle_v4i64_v3i64__5_0_4_4() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s0
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
-; GFX942-NEXT: s_mov_b32 s14, s6
-; GFX942-NEXT: s_mov_b32 s15, s7
+; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -13532,12 +13809,9 @@ define void @s_shuffle_v4i64_v3i64__5_1_4_4() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -13599,12 +13873,9 @@ define void @s_shuffle_v4i64_v3i64__5_2_4_4() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s16
-; GFX942-NEXT: s_mov_b32 s9, s17
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
+; GFX942-NEXT: s_mov_b64 s[8:9], s[16:17]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -13657,14 +13928,10 @@ define void @s_shuffle_v4i64_v3i64__5_3_4_4() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -13717,14 +13984,10 @@ define void @s_shuffle_v4i64_v3i64__5_5_4_4() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -13773,12 +14036,9 @@ define void @s_shuffle_v4i64_v3i64__5_5_u_4() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -13840,12 +14100,9 @@ define void @s_shuffle_v4i64_v3i64__5_5_0_4() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s16
-; GFX942-NEXT: s_mov_b32 s9, s17
-; GFX942-NEXT: s_mov_b32 s10, s16
-; GFX942-NEXT: s_mov_b32 s11, s17
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[16:17]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[16:17]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -13907,12 +14164,9 @@ define void @s_shuffle_v4i64_v3i64__5_5_1_4() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s16
-; GFX942-NEXT: s_mov_b32 s9, s17
-; GFX942-NEXT: s_mov_b32 s10, s16
-; GFX942-NEXT: s_mov_b32 s11, s17
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[16:17]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[16:17]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -13974,12 +14228,9 @@ define void @s_shuffle_v4i64_v3i64__5_5_2_4() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -14028,14 +14279,10 @@ define void @s_shuffle_v4i64_v3i64__5_5_3_4() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -14048,20 +14295,48 @@ define void @s_shuffle_v4i64_v3i64__5_5_3_4() {
}
define void @s_shuffle_v4i64_v3i64__u_5_5_5() {
-; GFX9-LABEL: s_shuffle_v4i64_v3i64__u_5_5_5:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: s_mov_b32 s14, s12
-; GFX9-NEXT: s_mov_b32 s15, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4i64_v3i64__u_5_5_5:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s14, s12
+; GFX900-NEXT: s_mov_b32 s15, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4i64_v3i64__u_5_5_5:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s14, s12
+; GFX90A-NEXT: s_mov_b32 s15, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4i64_v3i64__u_5_5_5:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=s"()
%vec1 = call <3 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 poison, i32 5, i32 5, i32 5>
@@ -14119,12 +14394,9 @@ define void @s_shuffle_v4i64_v3i64__0_5_5_5() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s4
-; GFX942-NEXT: s_mov_b32 s13, s5
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -14186,12 +14458,9 @@ define void @s_shuffle_v4i64_v3i64__1_5_5_5() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
-; GFX942-NEXT: s_mov_b32 s14, s12
-; GFX942-NEXT: s_mov_b32 s15, s13
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -14253,12 +14522,9 @@ define void @s_shuffle_v4i64_v3i64__2_5_5_5() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
-; GFX942-NEXT: s_mov_b32 s14, s12
-; GFX942-NEXT: s_mov_b32 s15, s13
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -14271,20 +14537,48 @@ define void @s_shuffle_v4i64_v3i64__2_5_5_5() {
}
define void @s_shuffle_v4i64_v3i64__3_5_5_5() {
-; GFX9-LABEL: s_shuffle_v4i64_v3i64__3_5_5_5:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: s_mov_b32 s14, s12
-; GFX9-NEXT: s_mov_b32 s15, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4i64_v3i64__3_5_5_5:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s14, s12
+; GFX900-NEXT: s_mov_b32 s15, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4i64_v3i64__3_5_5_5:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s14, s12
+; GFX90A-NEXT: s_mov_b32 s15, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4i64_v3i64__3_5_5_5:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=s"()
%vec1 = call <3 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 3, i32 5, i32 5, i32 5>
@@ -14293,22 +14587,53 @@ define void @s_shuffle_v4i64_v3i64__3_5_5_5() {
}
define void @s_shuffle_v4i64_v3i64__4_5_5_5() {
-; GFX9-LABEL: s_shuffle_v4i64_v3i64__4_5_5_5:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s10
-; GFX9-NEXT: s_mov_b32 s9, s11
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: s_mov_b32 s14, s12
-; GFX9-NEXT: s_mov_b32 s15, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4i64_v3i64__4_5_5_5:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s14, s12
+; GFX900-NEXT: s_mov_b32 s15, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4i64_v3i64__4_5_5_5:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s14, s12
+; GFX90A-NEXT: s_mov_b32 s15, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4i64_v3i64__4_5_5_5:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=s"()
%vec1 = call <3 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 4, i32 5, i32 5, i32 5>
@@ -14317,20 +14642,48 @@ define void @s_shuffle_v4i64_v3i64__4_5_5_5() {
}
define void @s_shuffle_v4i64_v3i64__5_u_5_5() {
-; GFX9-LABEL: s_shuffle_v4i64_v3i64__5_u_5_5:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s12
-; GFX9-NEXT: s_mov_b32 s9, s13
-; GFX9-NEXT: s_mov_b32 s14, s12
-; GFX9-NEXT: s_mov_b32 s15, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_u_5_5:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s12
+; GFX900-NEXT: s_mov_b32 s9, s13
+; GFX900-NEXT: s_mov_b32 s14, s12
+; GFX900-NEXT: s_mov_b32 s15, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_u_5_5:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s12
+; GFX90A-NEXT: s_mov_b32 s9, s13
+; GFX90A-NEXT: s_mov_b32 s14, s12
+; GFX90A-NEXT: s_mov_b32 s15, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_u_5_5:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=s"()
%vec1 = call <3 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 poison, i32 5, i32 5>
@@ -14388,12 +14741,9 @@ define void @s_shuffle_v4i64_v3i64__5_0_5_5() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s14, s12
-; GFX942-NEXT: s_mov_b32 s15, s13
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -14455,12 +14805,9 @@ define void @s_shuffle_v4i64_v3i64__5_1_5_5() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s12, s4
-; GFX942-NEXT: s_mov_b32 s13, s5
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -14522,12 +14869,9 @@ define void @s_shuffle_v4i64_v3i64__5_2_5_5() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s14, s12
-; GFX942-NEXT: s_mov_b32 s15, s13
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -14584,14 +14928,10 @@ define void @s_shuffle_v4i64_v3i64__5_3_5_5() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s4
-; GFX942-NEXT: s_mov_b32 s13, s5
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -14604,20 +14944,48 @@ define void @s_shuffle_v4i64_v3i64__5_3_5_5() {
}
define void @s_shuffle_v4i64_v3i64__5_4_5_5() {
-; GFX9-LABEL: s_shuffle_v4i64_v3i64__5_4_5_5:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s12
-; GFX9-NEXT: s_mov_b32 s9, s13
-; GFX9-NEXT: s_mov_b32 s14, s12
-; GFX9-NEXT: s_mov_b32 s15, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_4_5_5:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s12
+; GFX900-NEXT: s_mov_b32 s9, s13
+; GFX900-NEXT: s_mov_b32 s14, s12
+; GFX900-NEXT: s_mov_b32 s15, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_4_5_5:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s12
+; GFX90A-NEXT: s_mov_b32 s9, s13
+; GFX90A-NEXT: s_mov_b32 s14, s12
+; GFX90A-NEXT: s_mov_b32 s15, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_4_5_5:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=s"()
%vec1 = call <3 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 4, i32 5, i32 5>
@@ -14666,12 +15034,9 @@ define void @s_shuffle_v4i64_v3i64__5_5_u_5() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -14737,14 +15102,10 @@ define void @s_shuffle_v4i64_v3i64__5_5_0_5() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s16
-; GFX942-NEXT: s_mov_b32 s9, s17
-; GFX942-NEXT: s_mov_b32 s10, s16
-; GFX942-NEXT: s_mov_b32 s11, s17
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s16
-; GFX942-NEXT: s_mov_b32 s15, s17
+; GFX942-NEXT: s_mov_b64 s[8:9], s[16:17]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[16:17]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[16:17]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -14810,14 +15171,10 @@ define void @s_shuffle_v4i64_v3i64__5_5_1_5() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s16
-; GFX942-NEXT: s_mov_b32 s9, s17
-; GFX942-NEXT: s_mov_b32 s10, s16
-; GFX942-NEXT: s_mov_b32 s11, s17
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s16
-; GFX942-NEXT: s_mov_b32 s15, s17
+; GFX942-NEXT: s_mov_b64 s[8:9], s[16:17]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[16:17]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[16:17]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -14879,12 +15236,9 @@ define void @s_shuffle_v4i64_v3i64__5_5_2_5() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -14937,14 +15291,10 @@ define void @s_shuffle_v4i64_v3i64__5_5_3_5() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -15001,14 +15351,10 @@ define void @s_shuffle_v4i64_v3i64__5_5_4_5() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v4i64.v4i64.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v4i64.v4i64.ll
index ea9ef2f1ac94a..d026e3b08b171 100644
--- a/llvm/test/CodeGen/AMDGPU/shufflevector.v4i64.v4i64.ll
+++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v4i64.v4i64.ll
@@ -13289,8 +13289,7 @@ define void @s_shuffle_v4i64_v4i64__1_u_u_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -13330,8 +13329,7 @@ define void @s_shuffle_v4i64_v4i64__2_u_u_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -13375,8 +13373,7 @@ define void @s_shuffle_v4i64_v4i64__3_u_u_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -13434,8 +13431,7 @@ define void @s_shuffle_v4i64_v4i64__5_u_u_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -13476,8 +13472,7 @@ define void @s_shuffle_v4i64_v4i64__6_u_u_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -13522,8 +13517,7 @@ define void @s_shuffle_v4i64_v4i64__7_u_u_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -13582,10 +13576,8 @@ define void @s_shuffle_v4i64_v4i64__7_0_u_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -13639,8 +13631,7 @@ define void @s_shuffle_v4i64_v4i64__7_1_u_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -13698,10 +13689,8 @@ define void @s_shuffle_v4i64_v4i64__7_2_u_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -13755,10 +13744,8 @@ define void @s_shuffle_v4i64_v4i64__7_3_u_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -13807,10 +13794,8 @@ define void @s_shuffle_v4i64_v4i64__7_4_u_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -13823,18 +13808,43 @@ define void @s_shuffle_v4i64_v4i64__7_4_u_u() {
}
define void @s_shuffle_v4i64_v4i64__7_5_u_u() {
-; GFX9-LABEL: s_shuffle_v4i64_v4i64__7_5_u_u:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s14
-; GFX9-NEXT: s_mov_b32 s9, s15
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_5_u_u:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_5_u_u:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_5_u_u:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 5, i32 poison, i32 poison>
@@ -13879,10 +13889,8 @@ define void @s_shuffle_v4i64_v4i64__7_6_u_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -13927,10 +13935,8 @@ define void @s_shuffle_v4i64_v4i64__7_7_u_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -13989,13 +13995,11 @@ define void @s_shuffle_v4i64_v4i64__7_7_0_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s0
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -14054,13 +14058,11 @@ define void @s_shuffle_v4i64_v4i64__7_7_1_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s2
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s13, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -14114,10 +14116,8 @@ define void @s_shuffle_v4i64_v4i64__7_7_2_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -14175,12 +14175,9 @@ define void @s_shuffle_v4i64_v4i64__7_7_3_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -14229,12 +14226,9 @@ define void @s_shuffle_v4i64_v4i64__7_7_4_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -14283,12 +14277,9 @@ define void @s_shuffle_v4i64_v4i64__7_7_5_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -14301,20 +14292,48 @@ define void @s_shuffle_v4i64_v4i64__7_7_5_u() {
}
define void @s_shuffle_v4i64_v4i64__7_7_6_u() {
-; GFX9-LABEL: s_shuffle_v4i64_v4i64__7_7_6_u:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s14
-; GFX9-NEXT: s_mov_b32 s9, s15
-; GFX9-NEXT: s_mov_b32 s10, s14
-; GFX9-NEXT: s_mov_b32 s11, s15
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_6_u:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s10, s14
+; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_6_u:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s10, s14
+; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_6_u:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 6, i32 poison>
@@ -14363,12 +14382,9 @@ define void @s_shuffle_v4i64_v4i64__7_7_7_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -14434,14 +14450,10 @@ define void @s_shuffle_v4i64_v4i64__7_7_7_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -14507,14 +14519,10 @@ define void @s_shuffle_v4i64_v4i64__7_7_7_1() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -14580,14 +14588,10 @@ define void @s_shuffle_v4i64_v4i64__7_7_7_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -14649,12 +14653,9 @@ define void @s_shuffle_v4i64_v4i64__7_7_7_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -14711,14 +14712,10 @@ define void @s_shuffle_v4i64_v4i64__7_7_7_4() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -14771,14 +14768,10 @@ define void @s_shuffle_v4i64_v4i64__7_7_7_5() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -14835,14 +14828,10 @@ define void @s_shuffle_v4i64_v4i64__7_7_7_6() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -14855,22 +14844,53 @@ define void @s_shuffle_v4i64_v4i64__7_7_7_6() {
}
define void @s_shuffle_v4i64_v4i64__7_7_7_7() {
-; GFX9-LABEL: s_shuffle_v4i64_v4i64__7_7_7_7:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s14
-; GFX9-NEXT: s_mov_b32 s9, s15
-; GFX9-NEXT: s_mov_b32 s10, s14
-; GFX9-NEXT: s_mov_b32 s11, s15
-; GFX9-NEXT: s_mov_b32 s12, s14
-; GFX9-NEXT: s_mov_b32 s13, s15
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_7_7:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s10, s14
+; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_7_7:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s10, s14
+; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_7_7:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
@@ -14919,12 +14939,9 @@ define void @s_shuffle_v4i64_v4i64__u_0_0_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -14936,22 +14953,53 @@ define void @s_shuffle_v4i64_v4i64__u_0_0_0() {
}
define void @s_shuffle_v4i64_v4i64__0_0_0_0() {
-; GFX9-LABEL: s_shuffle_v4i64_v4i64__0_0_0_0:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s8
-; GFX9-NEXT: s_mov_b32 s11, s9
-; GFX9-NEXT: s_mov_b32 s12, s8
-; GFX9-NEXT: s_mov_b32 s13, s9
-; GFX9-NEXT: s_mov_b32 s14, s8
-; GFX9-NEXT: s_mov_b32 s15, s9
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__0_0_0_0:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s8
+; GFX900-NEXT: s_mov_b32 s11, s9
+; GFX900-NEXT: s_mov_b32 s12, s8
+; GFX900-NEXT: s_mov_b32 s13, s9
+; GFX900-NEXT: s_mov_b32 s14, s8
+; GFX900-NEXT: s_mov_b32 s15, s9
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__0_0_0_0:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s8
+; GFX90A-NEXT: s_mov_b32 s11, s9
+; GFX90A-NEXT: s_mov_b32 s12, s8
+; GFX90A-NEXT: s_mov_b32 s13, s9
+; GFX90A-NEXT: s_mov_b32 s14, s8
+; GFX90A-NEXT: s_mov_b32 s15, s9
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__0_0_0_0:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[8:9]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[8:9]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[8:9]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> zeroinitializer
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
@@ -15003,14 +15051,10 @@ define void @s_shuffle_v4i64_v4i64__1_0_0_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -15062,14 +15106,10 @@ define void @s_shuffle_v4i64_v4i64__2_0_0_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -15125,14 +15165,10 @@ define void @s_shuffle_v4i64_v4i64__3_0_0_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -15184,12 +15220,9 @@ define void @s_shuffle_v4i64_v4i64__4_0_0_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -15251,17 +15284,13 @@ define void @s_shuffle_v4i64_v4i64__5_0_0_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s0
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -15324,15 +15353,12 @@ define void @s_shuffle_v4i64_v4i64__6_0_0_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s0
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -15395,17 +15421,13 @@ define void @s_shuffle_v4i64_v4i64__7_0_0_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s0
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -15464,15 +15486,12 @@ define void @s_shuffle_v4i64_v4i64__7_u_0_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s0
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -15535,17 +15554,13 @@ define void @s_shuffle_v4i64_v4i64__7_1_0_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s0
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s10, s2
-; GFX942-NEXT: s_mov_b32 s11, s3
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -15611,14 +15626,10 @@ define void @s_shuffle_v4i64_v4i64__7_2_0_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -15680,14 +15691,10 @@ define void @s_shuffle_v4i64_v4i64__7_3_0_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -15750,17 +15757,13 @@ define void @s_shuffle_v4i64_v4i64__7_4_0_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s0
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -15822,12 +15825,9 @@ define void @s_shuffle_v4i64_v4i64__7_5_0_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -15893,14 +15893,10 @@ define void @s_shuffle_v4i64_v4i64__7_6_0_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -15963,15 +15959,12 @@ define void @s_shuffle_v4i64_v4i64__7_7_0_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s0
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -16030,13 +16023,11 @@ define void @s_shuffle_v4i64_v4i64__7_7_u_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s14, s0
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -16099,15 +16090,12 @@ define void @s_shuffle_v4i64_v4i64__7_7_1_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s2
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -16169,14 +16157,10 @@ define void @s_shuffle_v4i64_v4i64__7_7_2_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
-; GFX942-NEXT: s_mov_b32 s12, s4
-; GFX942-NEXT: s_mov_b32 s13, s5
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -16238,14 +16222,10 @@ define void @s_shuffle_v4i64_v4i64__7_7_3_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -16304,15 +16284,12 @@ define void @s_shuffle_v4i64_v4i64__7_7_4_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s14, s0
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s12, s4
-; GFX942-NEXT: s_mov_b32 s13, s5
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -16375,15 +16352,12 @@ define void @s_shuffle_v4i64_v4i64__7_7_5_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s14, s0
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -16445,12 +16419,9 @@ define void @s_shuffle_v4i64_v4i64__7_7_6_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -16463,20 +16434,48 @@ define void @s_shuffle_v4i64_v4i64__7_7_6_0() {
}
define void @s_shuffle_v4i64_v4i64__u_1_1_1() {
-; GFX9-LABEL: s_shuffle_v4i64_v4i64__u_1_1_1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: s_mov_b32 s14, s10
-; GFX9-NEXT: s_mov_b32 s15, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__u_1_1_1:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: s_mov_b32 s14, s10
+; GFX900-NEXT: s_mov_b32 s15, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__u_1_1_1:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: s_mov_b32 s14, s10
+; GFX90A-NEXT: s_mov_b32 s15, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__u_1_1_1:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> <i32 poison, i32 1, i32 1, i32 1>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
@@ -16484,89 +16483,210 @@ define void @s_shuffle_v4i64_v4i64__u_1_1_1() {
}
define void @s_shuffle_v4i64_v4i64__0_1_1_1() {
-; GFX9-LABEL: s_shuffle_v4i64_v4i64__0_1_1_1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: s_mov_b32 s14, s10
-; GFX9-NEXT: s_mov_b32 s15, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> <i32 0, i32 1, i32 1, i32 1>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4i64_v4i64__1_1_1_1() {
-; GFX9-LABEL: s_shuffle_v4i64_v4i64__1_1_1_1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s10
-; GFX9-NEXT: s_mov_b32 s9, s11
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: s_mov_b32 s14, s10
-; GFX9-NEXT: s_mov_b32 s15, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4i64_v4i64__2_1_1_1() {
-; GFX9-LABEL: s_shuffle_v4i64_v4i64__2_1_1_1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s12
-; GFX9-NEXT: s_mov_b32 s9, s13
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: s_mov_b32 s14, s10
-; GFX9-NEXT: s_mov_b32 s15, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x i64> asm "; def $0", "=s"()
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__0_1_1_1:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: s_mov_b32 s14, s10
+; GFX900-NEXT: s_mov_b32 s15, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__0_1_1_1:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: s_mov_b32 s14, s10
+; GFX90A-NEXT: s_mov_b32 s15, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__0_1_1_1:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> <i32 0, i32 1, i32 1, i32 1>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4i64_v4i64__1_1_1_1() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__1_1_1_1:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: s_mov_b32 s14, s10
+; GFX900-NEXT: s_mov_b32 s15, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__1_1_1_1:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: s_mov_b32 s14, s10
+; GFX90A-NEXT: s_mov_b32 s15, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__1_1_1_1:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4i64_v4i64__2_1_1_1() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__2_1_1_1:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s12
+; GFX900-NEXT: s_mov_b32 s9, s13
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: s_mov_b32 s14, s10
+; GFX900-NEXT: s_mov_b32 s15, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__2_1_1_1:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s12
+; GFX90A-NEXT: s_mov_b32 s9, s13
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: s_mov_b32 s14, s10
+; GFX90A-NEXT: s_mov_b32 s15, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__2_1_1_1:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> <i32 2, i32 1, i32 1, i32 1>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
define void @s_shuffle_v4i64_v4i64__3_1_1_1() {
-; GFX9-LABEL: s_shuffle_v4i64_v4i64__3_1_1_1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s14
-; GFX9-NEXT: s_mov_b32 s9, s15
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: s_mov_b32 s14, s10
-; GFX9-NEXT: s_mov_b32 s15, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__3_1_1_1:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: s_mov_b32 s14, s10
+; GFX900-NEXT: s_mov_b32 s15, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__3_1_1_1:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: s_mov_b32 s14, s10
+; GFX90A-NEXT: s_mov_b32 s15, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__3_1_1_1:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> <i32 3, i32 1, i32 1, i32 1>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
@@ -16574,20 +16694,48 @@ define void @s_shuffle_v4i64_v4i64__3_1_1_1() {
}
define void @s_shuffle_v4i64_v4i64__4_1_1_1() {
-; GFX9-LABEL: s_shuffle_v4i64_v4i64__4_1_1_1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: s_mov_b32 s14, s10
-; GFX9-NEXT: s_mov_b32 s15, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__4_1_1_1:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: s_mov_b32 s14, s10
+; GFX900-NEXT: s_mov_b32 s15, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__4_1_1_1:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: s_mov_b32 s14, s10
+; GFX90A-NEXT: s_mov_b32 s15, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__4_1_1_1:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> <i32 4, i32 1, i32 1, i32 1>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
@@ -16644,12 +16792,9 @@ define void @s_shuffle_v4i64_v4i64__5_1_1_1() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s12, s10
-; GFX942-NEXT: s_mov_b32 s13, s11
-; GFX942-NEXT: s_mov_b32 s14, s10
-; GFX942-NEXT: s_mov_b32 s15, s11
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -16711,12 +16856,9 @@ define void @s_shuffle_v4i64_v4i64__6_1_1_1() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s12, s10
-; GFX942-NEXT: s_mov_b32 s13, s11
-; GFX942-NEXT: s_mov_b32 s14, s10
-; GFX942-NEXT: s_mov_b32 s15, s11
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -16778,12 +16920,9 @@ define void @s_shuffle_v4i64_v4i64__7_1_1_1() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s12, s10
-; GFX942-NEXT: s_mov_b32 s13, s11
-; GFX942-NEXT: s_mov_b32 s14, s10
-; GFX942-NEXT: s_mov_b32 s15, s11
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -16842,15 +16981,12 @@ define void @s_shuffle_v4i64_v4i64__7_u_1_1() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s2
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -16913,17 +17049,13 @@ define void @s_shuffle_v4i64_v4i64__7_0_1_1() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s2
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -16985,14 +17117,10 @@ define void @s_shuffle_v4i64_v4i64__7_2_1_1() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -17054,14 +17182,10 @@ define void @s_shuffle_v4i64_v4i64__7_3_1_1() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -17124,17 +17248,13 @@ define void @s_shuffle_v4i64_v4i64__7_4_1_1() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s2
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -17196,12 +17316,9 @@ define void @s_shuffle_v4i64_v4i64__7_5_1_1() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -17267,14 +17384,10 @@ define void @s_shuffle_v4i64_v4i64__7_6_1_1() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -17337,15 +17450,12 @@ define void @s_shuffle_v4i64_v4i64__7_7_1_1() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s2
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -17404,13 +17514,11 @@ define void @s_shuffle_v4i64_v4i64__7_7_u_1() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s14, s2
+; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -17473,15 +17581,12 @@ define void @s_shuffle_v4i64_v4i64__7_7_0_1() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s0
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -17539,14 +17644,10 @@ define void @s_shuffle_v4i64_v4i64__7_7_2_1() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
-; GFX942-NEXT: s_mov_b32 s12, s4
-; GFX942-NEXT: s_mov_b32 s13, s5
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -17604,14 +17705,10 @@ define void @s_shuffle_v4i64_v4i64__7_7_3_1() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -17670,15 +17767,12 @@ define void @s_shuffle_v4i64_v4i64__7_7_4_1() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s14, s2
+; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s12, s4
-; GFX942-NEXT: s_mov_b32 s13, s5
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -17741,15 +17835,12 @@ define void @s_shuffle_v4i64_v4i64__7_7_5_1() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s14, s2
+; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -17811,12 +17902,9 @@ define void @s_shuffle_v4i64_v4i64__7_7_6_1() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -17829,149 +17917,12 @@ define void @s_shuffle_v4i64_v4i64__7_7_6_1() {
}
define void @s_shuffle_v4i64_v4i64__u_2_2_2() {
-; GFX9-LABEL: s_shuffle_v4i64_v4i64__u_2_2_2:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: s_mov_b32 s14, s12
-; GFX9-NEXT: s_mov_b32 s15, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> <i32 poison, i32 2, i32 2, i32 2>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4i64_v4i64__0_2_2_2() {
-; GFX9-LABEL: s_shuffle_v4i64_v4i64__0_2_2_2:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: s_mov_b32 s14, s12
-; GFX9-NEXT: s_mov_b32 s15, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> <i32 0, i32 2, i32 2, i32 2>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4i64_v4i64__1_2_2_2() {
-; GFX9-LABEL: s_shuffle_v4i64_v4i64__1_2_2_2:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s10
-; GFX9-NEXT: s_mov_b32 s9, s11
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: s_mov_b32 s14, s12
-; GFX9-NEXT: s_mov_b32 s15, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> <i32 1, i32 2, i32 2, i32 2>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4i64_v4i64__2_2_2_2() {
-; GFX9-LABEL: s_shuffle_v4i64_v4i64__2_2_2_2:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s12
-; GFX9-NEXT: s_mov_b32 s9, s13
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: s_mov_b32 s14, s12
-; GFX9-NEXT: s_mov_b32 s15, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4i64_v4i64__3_2_2_2() {
-; GFX9-LABEL: s_shuffle_v4i64_v4i64__3_2_2_2:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s14
-; GFX9-NEXT: s_mov_b32 s9, s15
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: s_mov_b32 s14, s12
-; GFX9-NEXT: s_mov_b32 s15, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> <i32 3, i32 2, i32 2, i32 2>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4i64_v4i64__4_2_2_2() {
-; GFX9-LABEL: s_shuffle_v4i64_v4i64__4_2_2_2:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: s_mov_b32 s14, s12
-; GFX9-NEXT: s_mov_b32 s15, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> <i32 4, i32 2, i32 2, i32 2>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4i64_v4i64__5_2_2_2() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__5_2_2_2:
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__u_2_2_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s6
-; GFX900-NEXT: s_mov_b32 s9, s7
; GFX900-NEXT: s_mov_b32 s10, s12
; GFX900-NEXT: s_mov_b32 s11, s13
; GFX900-NEXT: s_mov_b32 s14, s12
@@ -17981,17 +17932,12 @@ define void @s_shuffle_v4i64_v4i64__5_2_2_2() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__5_2_2_2:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__u_2_2_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s6
-; GFX90A-NEXT: s_mov_b32 s9, s7
; GFX90A-NEXT: s_mov_b32 s10, s12
; GFX90A-NEXT: s_mov_b32 s11, s13
; GFX90A-NEXT: s_mov_b32 s14, s12
@@ -18001,42 +17947,31 @@ define void @s_shuffle_v4i64_v4i64__5_2_2_2() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__5_2_2_2:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__u_2_2_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
-; GFX942-NEXT: s_mov_b32 s14, s12
-; GFX942-NEXT: s_mov_b32 s15, s13
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
- %vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 5, i32 2, i32 2, i32 2>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> <i32 poison, i32 2, i32 2, i32 2>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__6_2_2_2() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__6_2_2_2:
+define void @s_shuffle_v4i64_v4i64__0_2_2_2() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__0_2_2_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s10, s12
; GFX900-NEXT: s_mov_b32 s11, s13
; GFX900-NEXT: s_mov_b32 s14, s12
@@ -18046,15 +17981,12 @@ define void @s_shuffle_v4i64_v4i64__6_2_2_2() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__6_2_2_2:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__0_2_2_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s10, s12
; GFX90A-NEXT: s_mov_b32 s11, s13
; GFX90A-NEXT: s_mov_b32 s14, s12
@@ -18064,42 +17996,31 @@ define void @s_shuffle_v4i64_v4i64__6_2_2_2() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__6_2_2_2:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__0_2_2_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
-; GFX942-NEXT: s_mov_b32 s14, s12
-; GFX942-NEXT: s_mov_b32 s15, s13
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
- %vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 6, i32 2, i32 2, i32 2>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> <i32 0, i32 2, i32 2, i32 2>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__7_2_2_2() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_2_2_2:
+define void @s_shuffle_v4i64_v4i64__1_2_2_2() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__1_2_2_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s10
; GFX900-NEXT: s_mov_b32 s9, s11
; GFX900-NEXT: s_mov_b32 s10, s12
@@ -18111,15 +18032,12 @@ define void @s_shuffle_v4i64_v4i64__7_2_2_2() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_2_2_2:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__1_2_2_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s10
; GFX90A-NEXT: s_mov_b32 s9, s11
; GFX90A-NEXT: s_mov_b32 s10, s12
@@ -18131,44 +18049,36 @@ define void @s_shuffle_v4i64_v4i64__7_2_2_2() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_2_2_2:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__1_2_2_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
-; GFX942-NEXT: s_mov_b32 s14, s12
-; GFX942-NEXT: s_mov_b32 s15, s13
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
- %vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 2, i32 2, i32 2>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> <i32 1, i32 2, i32 2, i32 2>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__7_u_2_2() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_u_2_2:
+define void @s_shuffle_v4i64_v4i64__2_2_2_2() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__2_2_2_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s8, s12
+; GFX900-NEXT: s_mov_b32 s9, s13
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
; GFX900-NEXT: s_mov_b32 s14, s12
; GFX900-NEXT: s_mov_b32 s15, s13
; GFX900-NEXT: ;;#ASMSTART
@@ -18176,17 +18086,16 @@ define void @s_shuffle_v4i64_v4i64__7_u_2_2() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_u_2_2:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__2_2_2_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s8, s12
+; GFX90A-NEXT: s_mov_b32 s9, s13
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
; GFX90A-NEXT: s_mov_b32 s14, s12
; GFX90A-NEXT: s_mov_b32 s15, s13
; GFX90A-NEXT: ;;#ASMSTART
@@ -18194,115 +18103,88 @@ define void @s_shuffle_v4i64_v4i64__7_u_2_2() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_u_2_2:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__2_2_2_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s14, s12
-; GFX942-NEXT: s_mov_b32 s15, s13
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
- %vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 poison, i32 2, i32 2>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__7_0_2_2() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_0_2_2:
+define void @s_shuffle_v4i64_v4i64__3_2_2_2() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__3_2_2_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
; GFX900-NEXT: s_mov_b32 s10, s12
; GFX900-NEXT: s_mov_b32 s11, s13
-; GFX900-NEXT: s_mov_b32 s12, s16
-; GFX900-NEXT: s_mov_b32 s13, s17
-; GFX900-NEXT: s_mov_b32 s14, s16
-; GFX900-NEXT: s_mov_b32 s15, s17
+; GFX900-NEXT: s_mov_b32 s14, s12
+; GFX900-NEXT: s_mov_b32 s15, s13
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_0_2_2:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__3_2_2_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
; GFX90A-NEXT: s_mov_b32 s10, s12
; GFX90A-NEXT: s_mov_b32 s11, s13
-; GFX90A-NEXT: s_mov_b32 s12, s16
-; GFX90A-NEXT: s_mov_b32 s13, s17
-; GFX90A-NEXT: s_mov_b32 s14, s16
-; GFX90A-NEXT: s_mov_b32 s15, s17
+; GFX90A-NEXT: s_mov_b32 s14, s12
+; GFX90A-NEXT: s_mov_b32 s15, s13
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_0_2_2:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__3_2_2_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s4
-; GFX942-NEXT: s_mov_b32 s13, s5
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
- %vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 0, i32 2, i32 2>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> <i32 3, i32 2, i32 2, i32 2>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__7_1_2_2() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_1_2_2:
+define void @s_shuffle_v4i64_v4i64__4_2_2_2() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__4_2_2_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[16:23]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s22
-; GFX900-NEXT: s_mov_b32 s9, s23
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
; GFX900-NEXT: s_mov_b32 s14, s12
; GFX900-NEXT: s_mov_b32 s15, s13
; GFX900-NEXT: ;;#ASMSTART
@@ -18310,17 +18192,14 @@ define void @s_shuffle_v4i64_v4i64__7_1_2_2() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_1_2_2:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__4_2_2_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[16:23]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s22
-; GFX90A-NEXT: s_mov_b32 s9, s23
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
; GFX90A-NEXT: s_mov_b32 s14, s12
; GFX90A-NEXT: s_mov_b32 s15, s13
; GFX90A-NEXT: ;;#ASMSTART
@@ -18328,32 +18207,26 @@ define void @s_shuffle_v4i64_v4i64__7_1_2_2() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_1_2_2:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__4_2_2_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s14, s12
-; GFX942-NEXT: s_mov_b32 s15, s13
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
- %vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 1, i32 2, i32 2>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> <i32 4, i32 2, i32 2, i32 2>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__7_3_2_2() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_3_2_2:
+define void @s_shuffle_v4i64_v4i64__5_2_2_2() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__5_2_2_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
@@ -18362,10 +18235,10 @@ define void @s_shuffle_v4i64_v4i64__7_3_2_2() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s10, s14
-; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: s_mov_b32 s8, s6
+; GFX900-NEXT: s_mov_b32 s9, s7
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
; GFX900-NEXT: s_mov_b32 s14, s12
; GFX900-NEXT: s_mov_b32 s15, s13
; GFX900-NEXT: ;;#ASMSTART
@@ -18373,7 +18246,7 @@ define void @s_shuffle_v4i64_v4i64__7_3_2_2() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_3_2_2:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__5_2_2_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
@@ -18382,10 +18255,10 @@ define void @s_shuffle_v4i64_v4i64__7_3_2_2() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s10, s14
-; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: s_mov_b32 s8, s6
+; GFX90A-NEXT: s_mov_b32 s9, s7
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
; GFX90A-NEXT: s_mov_b32 s14, s12
; GFX90A-NEXT: s_mov_b32 s15, s13
; GFX90A-NEXT: ;;#ASMSTART
@@ -18393,7 +18266,7 @@ define void @s_shuffle_v4i64_v4i64__7_3_2_2() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_3_2_2:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__5_2_2_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -18402,25 +18275,22 @@ define void @s_shuffle_v4i64_v4i64__7_3_2_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
-; GFX942-NEXT: s_mov_b32 s14, s12
-; GFX942-NEXT: s_mov_b32 s15, s13
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 3, i32 2, i32 2>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 5, i32 2, i32 2, i32 2>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__7_4_2_2() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_4_2_2:
+define void @s_shuffle_v4i64_v4i64__6_2_2_2() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__6_2_2_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
@@ -18429,10 +18299,8 @@ define void @s_shuffle_v4i64_v4i64__7_4_2_2() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
; GFX900-NEXT: s_mov_b32 s14, s12
; GFX900-NEXT: s_mov_b32 s15, s13
; GFX900-NEXT: ;;#ASMSTART
@@ -18440,7 +18308,7 @@ define void @s_shuffle_v4i64_v4i64__7_4_2_2() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_4_2_2:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__6_2_2_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
@@ -18449,10 +18317,8 @@ define void @s_shuffle_v4i64_v4i64__7_4_2_2() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
; GFX90A-NEXT: s_mov_b32 s14, s12
; GFX90A-NEXT: s_mov_b32 s15, s13
; GFX90A-NEXT: ;;#ASMSTART
@@ -18460,7 +18326,7 @@ define void @s_shuffle_v4i64_v4i64__7_4_2_2() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_4_2_2:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__6_2_2_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -18469,65 +18335,62 @@ define void @s_shuffle_v4i64_v4i64__7_4_2_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s14, s12
-; GFX942-NEXT: s_mov_b32 s15, s13
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 4, i32 2, i32 2>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 6, i32 2, i32 2, i32 2>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__7_5_2_2() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_5_2_2:
+define void @s_shuffle_v4i64_v4i64__7_2_2_2() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_2_2_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s14
-; GFX900-NEXT: s_mov_b32 s9, s15
-; GFX900-NEXT: s_mov_b32 s12, s16
-; GFX900-NEXT: s_mov_b32 s13, s17
-; GFX900-NEXT: s_mov_b32 s14, s16
-; GFX900-NEXT: s_mov_b32 s15, s17
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s14, s12
+; GFX900-NEXT: s_mov_b32 s15, s13
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_5_2_2:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_2_2_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s14
-; GFX90A-NEXT: s_mov_b32 s9, s15
-; GFX90A-NEXT: s_mov_b32 s12, s16
-; GFX90A-NEXT: s_mov_b32 s13, s17
-; GFX90A-NEXT: s_mov_b32 s14, s16
-; GFX90A-NEXT: s_mov_b32 s15, s17
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s14, s12
+; GFX90A-NEXT: s_mov_b32 s15, s13
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_5_2_2:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_2_2_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -18536,37 +18399,32 @@ define void @s_shuffle_v4i64_v4i64__7_5_2_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s12, s4
-; GFX942-NEXT: s_mov_b32 s13, s5
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 5, i32 2, i32 2>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 2, i32 2, i32 2>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__7_6_2_2() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_6_2_2:
+define void @s_shuffle_v4i64_v4i64__7_u_2_2() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_u_2_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[16:23]
+; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s22
-; GFX900-NEXT: s_mov_b32 s9, s23
-; GFX900-NEXT: s_mov_b32 s10, s20
-; GFX900-NEXT: s_mov_b32 s11, s21
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
; GFX900-NEXT: s_mov_b32 s14, s12
; GFX900-NEXT: s_mov_b32 s15, s13
; GFX900-NEXT: ;;#ASMSTART
@@ -18574,19 +18432,17 @@ define void @s_shuffle_v4i64_v4i64__7_6_2_2() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_6_2_2:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_u_2_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[16:23]
+; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s22
-; GFX90A-NEXT: s_mov_b32 s9, s23
-; GFX90A-NEXT: s_mov_b32 s10, s20
-; GFX90A-NEXT: s_mov_b32 s11, s21
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
; GFX90A-NEXT: s_mov_b32 s14, s12
; GFX90A-NEXT: s_mov_b32 s15, s13
; GFX90A-NEXT: ;;#ASMSTART
@@ -18594,7 +18450,7 @@ define void @s_shuffle_v4i64_v4i64__7_6_2_2() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_6_2_2:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_u_2_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -18603,61 +18459,65 @@ define void @s_shuffle_v4i64_v4i64__7_6_2_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s14, s12
-; GFX942-NEXT: s_mov_b32 s15, s13
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 6, i32 2, i32 2>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 poison, i32 2, i32 2>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__7_7_2_2() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_2_2:
+define void @s_shuffle_v4i64_v4i64__7_0_2_2() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_0_2_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s10
; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s14, s12
-; GFX900-NEXT: s_mov_b32 s15, s13
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s12, s16
+; GFX900-NEXT: s_mov_b32 s13, s17
+; GFX900-NEXT: s_mov_b32 s14, s16
+; GFX900-NEXT: s_mov_b32 s15, s17
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_2_2:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_0_2_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s10
; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s14, s12
-; GFX90A-NEXT: s_mov_b32 s15, s13
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s12, s16
+; GFX90A-NEXT: s_mov_b32 s13, s17
+; GFX90A-NEXT: s_mov_b32 s14, s16
+; GFX90A-NEXT: s_mov_b32 s15, s17
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_2_2:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_0_2_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -18666,35 +18526,33 @@ define void @s_shuffle_v4i64_v4i64__7_7_2_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s14, s12
-; GFX942-NEXT: s_mov_b32 s15, s13
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 2, i32 2>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 0, i32 2, i32 2>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__7_7_u_2() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_u_2:
+define void @s_shuffle_v4i64_v4i64__7_1_2_2() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_1_2_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[16:23]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s8, s22
+; GFX900-NEXT: s_mov_b32 s9, s23
; GFX900-NEXT: s_mov_b32 s14, s12
; GFX900-NEXT: s_mov_b32 s15, s13
; GFX900-NEXT: ;;#ASMSTART
@@ -18702,17 +18560,17 @@ define void @s_shuffle_v4i64_v4i64__7_7_u_2() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_u_2:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_1_2_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[16:23]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s8, s22
+; GFX90A-NEXT: s_mov_b32 s9, s23
; GFX90A-NEXT: s_mov_b32 s14, s12
; GFX90A-NEXT: s_mov_b32 s15, s13
; GFX90A-NEXT: ;;#ASMSTART
@@ -18720,7 +18578,7 @@ define void @s_shuffle_v4i64_v4i64__7_7_u_2() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_u_2:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_1_2_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -18729,61 +18587,61 @@ define void @s_shuffle_v4i64_v4i64__7_7_u_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 poison, i32 2>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 1, i32 2, i32 2>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__7_7_0_2() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_0_2:
+define void @s_shuffle_v4i64_v4i64__7_3_2_2() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_3_2_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s10
; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s14, s16
-; GFX900-NEXT: s_mov_b32 s15, s17
+; GFX900-NEXT: s_mov_b32 s10, s14
+; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: s_mov_b32 s14, s12
+; GFX900-NEXT: s_mov_b32 s15, s13
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_0_2:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_3_2_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s10
; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s14, s16
-; GFX90A-NEXT: s_mov_b32 s15, s17
+; GFX90A-NEXT: s_mov_b32 s10, s14
+; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: s_mov_b32 s14, s12
+; GFX90A-NEXT: s_mov_b32 s15, s13
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_0_2:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_3_2_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -18792,67 +18650,62 @@ define void @s_shuffle_v4i64_v4i64__7_7_0_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 0, i32 2>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 3, i32 2, i32 2>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__7_7_1_2() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_1_2:
+define void @s_shuffle_v4i64_v4i64__7_4_2_2() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_4_2_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s10
; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
-; GFX900-NEXT: s_mov_b32 s14, s16
-; GFX900-NEXT: s_mov_b32 s15, s17
+; GFX900-NEXT: s_mov_b32 s10, s4
+; GFX900-NEXT: s_mov_b32 s11, s5
+; GFX900-NEXT: s_mov_b32 s14, s12
+; GFX900-NEXT: s_mov_b32 s15, s13
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_1_2:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_4_2_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s10
; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
-; GFX90A-NEXT: s_mov_b32 s14, s16
-; GFX90A-NEXT: s_mov_b32 s15, s17
+; GFX90A-NEXT: s_mov_b32 s10, s4
+; GFX90A-NEXT: s_mov_b32 s11, s5
+; GFX90A-NEXT: s_mov_b32 s14, s12
+; GFX90A-NEXT: s_mov_b32 s15, s13
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_1_2:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_4_2_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -18861,39 +18714,34 @@ define void @s_shuffle_v4i64_v4i64__7_7_1_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 1, i32 2>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 4, i32 2, i32 2>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__7_7_3_2() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_3_2:
+define void @s_shuffle_v4i64_v4i64__7_5_2_2() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_5_2_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s12, s18
-; GFX900-NEXT: s_mov_b32 s13, s19
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s12, s16
+; GFX900-NEXT: s_mov_b32 s13, s17
; GFX900-NEXT: s_mov_b32 s14, s16
; GFX900-NEXT: s_mov_b32 s15, s17
; GFX900-NEXT: ;;#ASMSTART
@@ -18901,19 +18749,19 @@ define void @s_shuffle_v4i64_v4i64__7_7_3_2() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_3_2:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_5_2_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s12, s18
-; GFX90A-NEXT: s_mov_b32 s13, s19
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s12, s16
+; GFX90A-NEXT: s_mov_b32 s13, s17
; GFX90A-NEXT: s_mov_b32 s14, s16
; GFX90A-NEXT: s_mov_b32 s15, s17
; GFX90A-NEXT: ;;#ASMSTART
@@ -18921,7 +18769,7 @@ define void @s_shuffle_v4i64_v4i64__7_7_3_2() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_3_2:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_5_2_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -18930,203 +18778,182 @@ define void @s_shuffle_v4i64_v4i64__7_7_3_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 3, i32 2>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 5, i32 2, i32 2>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__7_7_4_2() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_4_2:
+define void @s_shuffle_v4i64_v4i64__7_6_2_2() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_6_2_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[16:23]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: s_mov_b32 s14, s16
-; GFX900-NEXT: s_mov_b32 s15, s17
+; GFX900-NEXT: s_mov_b32 s8, s22
+; GFX900-NEXT: s_mov_b32 s9, s23
+; GFX900-NEXT: s_mov_b32 s10, s20
+; GFX900-NEXT: s_mov_b32 s11, s21
+; GFX900-NEXT: s_mov_b32 s14, s12
+; GFX900-NEXT: s_mov_b32 s15, s13
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_4_2:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_6_2_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[16:23]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: s_mov_b32 s14, s16
-; GFX90A-NEXT: s_mov_b32 s15, s17
+; GFX90A-NEXT: s_mov_b32 s8, s22
+; GFX90A-NEXT: s_mov_b32 s9, s23
+; GFX90A-NEXT: s_mov_b32 s10, s20
+; GFX90A-NEXT: s_mov_b32 s11, s21
+; GFX90A-NEXT: s_mov_b32 s14, s12
+; GFX90A-NEXT: s_mov_b32 s15, s13
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_4_2:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_6_2_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[12:19]
+; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s18
-; GFX942-NEXT: s_mov_b32 s9, s19
-; GFX942-NEXT: s_mov_b32 s10, s18
-; GFX942-NEXT: s_mov_b32 s11, s19
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 4, i32 2>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 6, i32 2, i32 2>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__7_7_5_2() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_5_2:
+define void @s_shuffle_v4i64_v4i64__7_7_2_2() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_2_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s10
; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s12, s6
-; GFX900-NEXT: s_mov_b32 s13, s7
-; GFX900-NEXT: s_mov_b32 s14, s16
-; GFX900-NEXT: s_mov_b32 s15, s17
+; GFX900-NEXT: s_mov_b32 s14, s12
+; GFX900-NEXT: s_mov_b32 s15, s13
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_5_2:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_2_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s10
; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s12, s6
-; GFX90A-NEXT: s_mov_b32 s13, s7
-; GFX90A-NEXT: s_mov_b32 s14, s16
-; GFX90A-NEXT: s_mov_b32 s15, s17
+; GFX90A-NEXT: s_mov_b32 s14, s12
+; GFX90A-NEXT: s_mov_b32 s15, s13
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_5_2:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_2_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[12:19]
+; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s18
-; GFX942-NEXT: s_mov_b32 s9, s19
-; GFX942-NEXT: s_mov_b32 s10, s18
-; GFX942-NEXT: s_mov_b32 s11, s19
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 5, i32 2>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 2, i32 2>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__7_7_6_2() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_6_2:
+define void @s_shuffle_v4i64_v4i64__7_7_u_2() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_u_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s14
-; GFX900-NEXT: s_mov_b32 s9, s15
-; GFX900-NEXT: s_mov_b32 s10, s14
-; GFX900-NEXT: s_mov_b32 s11, s15
-; GFX900-NEXT: s_mov_b32 s14, s16
-; GFX900-NEXT: s_mov_b32 s15, s17
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s14, s12
+; GFX900-NEXT: s_mov_b32 s15, s13
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_6_2:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_u_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s14
-; GFX90A-NEXT: s_mov_b32 s9, s15
-; GFX90A-NEXT: s_mov_b32 s10, s14
-; GFX90A-NEXT: s_mov_b32 s11, s15
-; GFX90A-NEXT: s_mov_b32 s14, s16
-; GFX90A-NEXT: s_mov_b32 s15, s17
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s14, s12
+; GFX90A-NEXT: s_mov_b32 s15, s13
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_6_2:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_u_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -19135,197 +18962,58 @@ define void @s_shuffle_v4i64_v4i64__7_7_6_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 6, i32 2>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4i64_v4i64__u_3_3_3() {
-; GFX9-LABEL: s_shuffle_v4i64_v4i64__u_3_3_3:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s14
-; GFX9-NEXT: s_mov_b32 s11, s15
-; GFX9-NEXT: s_mov_b32 s12, s14
-; GFX9-NEXT: s_mov_b32 s13, s15
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> <i32 poison, i32 3, i32 3, i32 3>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4i64_v4i64__0_3_3_3() {
-; GFX9-LABEL: s_shuffle_v4i64_v4i64__0_3_3_3:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s14
-; GFX9-NEXT: s_mov_b32 s11, s15
-; GFX9-NEXT: s_mov_b32 s12, s14
-; GFX9-NEXT: s_mov_b32 s13, s15
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> <i32 0, i32 3, i32 3, i32 3>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4i64_v4i64__1_3_3_3() {
-; GFX9-LABEL: s_shuffle_v4i64_v4i64__1_3_3_3:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s10
-; GFX9-NEXT: s_mov_b32 s9, s11
-; GFX9-NEXT: s_mov_b32 s10, s14
-; GFX9-NEXT: s_mov_b32 s11, s15
-; GFX9-NEXT: s_mov_b32 s12, s14
-; GFX9-NEXT: s_mov_b32 s13, s15
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> <i32 1, i32 3, i32 3, i32 3>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4i64_v4i64__2_3_3_3() {
-; GFX9-LABEL: s_shuffle_v4i64_v4i64__2_3_3_3:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s12
-; GFX9-NEXT: s_mov_b32 s9, s13
-; GFX9-NEXT: s_mov_b32 s10, s14
-; GFX9-NEXT: s_mov_b32 s11, s15
-; GFX9-NEXT: s_mov_b32 s12, s14
-; GFX9-NEXT: s_mov_b32 s13, s15
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> <i32 2, i32 3, i32 3, i32 3>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4i64_v4i64__3_3_3_3() {
-; GFX9-LABEL: s_shuffle_v4i64_v4i64__3_3_3_3:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s14
-; GFX9-NEXT: s_mov_b32 s9, s15
-; GFX9-NEXT: s_mov_b32 s10, s14
-; GFX9-NEXT: s_mov_b32 s11, s15
-; GFX9-NEXT: s_mov_b32 s12, s14
-; GFX9-NEXT: s_mov_b32 s13, s15
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4i64_v4i64__4_3_3_3() {
-; GFX9-LABEL: s_shuffle_v4i64_v4i64__4_3_3_3:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s14
-; GFX9-NEXT: s_mov_b32 s11, s15
-; GFX9-NEXT: s_mov_b32 s12, s14
-; GFX9-NEXT: s_mov_b32 s13, s15
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> <i32 4, i32 3, i32 3, i32 3>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 poison, i32 2>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__5_3_3_3() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__5_3_3_3:
+define void @s_shuffle_v4i64_v4i64__7_7_0_2() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_0_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s6
-; GFX900-NEXT: s_mov_b32 s9, s7
-; GFX900-NEXT: s_mov_b32 s10, s14
-; GFX900-NEXT: s_mov_b32 s11, s15
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s14, s16
+; GFX900-NEXT: s_mov_b32 s15, s17
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__5_3_3_3:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_0_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s6
-; GFX90A-NEXT: s_mov_b32 s9, s7
-; GFX90A-NEXT: s_mov_b32 s10, s14
-; GFX90A-NEXT: s_mov_b32 s11, s15
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s14, s16
+; GFX90A-NEXT: s_mov_b32 s15, s17
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__5_3_3_3:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_0_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -19334,61 +19022,63 @@ define void @s_shuffle_v4i64_v4i64__5_3_3_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 5, i32 3, i32 3, i32 3>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 0, i32 2>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__6_3_3_3() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__6_3_3_3:
+define void @s_shuffle_v4i64_v4i64__7_7_1_2() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_1_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s10, s14
-; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
; GFX900-NEXT: s_mov_b32 s12, s14
; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: s_mov_b32 s14, s16
+; GFX900-NEXT: s_mov_b32 s15, s17
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__6_3_3_3:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_1_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s10, s14
-; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
; GFX90A-NEXT: s_mov_b32 s12, s14
; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: s_mov_b32 s14, s16
+; GFX90A-NEXT: s_mov_b32 s15, s17
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__6_3_3_3:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_1_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -19397,65 +19087,63 @@ define void @s_shuffle_v4i64_v4i64__6_3_3_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 6, i32 3, i32 3, i32 3>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 1, i32 2>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__7_3_3_3() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_3_3_3:
+define void @s_shuffle_v4i64_v4i64__7_7_3_2() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_3_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s10
; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s10, s14
-; GFX900-NEXT: s_mov_b32 s11, s15
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: s_mov_b32 s12, s18
+; GFX900-NEXT: s_mov_b32 s13, s19
+; GFX900-NEXT: s_mov_b32 s14, s16
+; GFX900-NEXT: s_mov_b32 s15, s17
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_3_3_3:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_3_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s10
; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s10, s14
-; GFX90A-NEXT: s_mov_b32 s11, s15
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: s_mov_b32 s12, s18
+; GFX90A-NEXT: s_mov_b32 s13, s19
+; GFX90A-NEXT: s_mov_b32 s14, s16
+; GFX90A-NEXT: s_mov_b32 s15, s17
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_3_3_3:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_3_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -19464,86 +19152,87 @@ define void @s_shuffle_v4i64_v4i64__7_3_3_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 3, i32 3, i32 3>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 3, i32 2>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__7_u_3_3() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_u_3_3:
+define void @s_shuffle_v4i64_v4i64__7_7_4_2() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_4_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s10
; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: s_mov_b32 s12, s4
+; GFX900-NEXT: s_mov_b32 s13, s5
+; GFX900-NEXT: s_mov_b32 s14, s16
+; GFX900-NEXT: s_mov_b32 s15, s17
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_u_3_3:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_4_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s10
; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: s_mov_b32 s12, s4
+; GFX90A-NEXT: s_mov_b32 s13, s5
+; GFX90A-NEXT: s_mov_b32 s14, s16
+; GFX90A-NEXT: s_mov_b32 s15, s17
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_u_3_3:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_4_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ; def s[12:19]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
+; GFX942-NEXT: s_mov_b64 s[8:9], s[18:19]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[18:19]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 poison, i32 3, i32 3>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 4, i32 2>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__7_0_3_3() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_0_3_3:
+define void @s_shuffle_v4i64_v4i64__7_7_5_2() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_5_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
@@ -19554,18 +19243,16 @@ define void @s_shuffle_v4i64_v4i64__7_0_3_3() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s10
; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s10, s12
-; GFX900-NEXT: s_mov_b32 s11, s13
-; GFX900-NEXT: s_mov_b32 s12, s18
-; GFX900-NEXT: s_mov_b32 s13, s19
-; GFX900-NEXT: s_mov_b32 s14, s18
-; GFX900-NEXT: s_mov_b32 s15, s19
+; GFX900-NEXT: s_mov_b32 s12, s6
+; GFX900-NEXT: s_mov_b32 s13, s7
+; GFX900-NEXT: s_mov_b32 s14, s16
+; GFX900-NEXT: s_mov_b32 s15, s17
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_0_3_3:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_5_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
@@ -19576,83 +19263,81 @@ define void @s_shuffle_v4i64_v4i64__7_0_3_3() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s10
; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s10, s12
-; GFX90A-NEXT: s_mov_b32 s11, s13
-; GFX90A-NEXT: s_mov_b32 s12, s18
-; GFX90A-NEXT: s_mov_b32 s13, s19
-; GFX90A-NEXT: s_mov_b32 s14, s18
-; GFX90A-NEXT: s_mov_b32 s15, s19
+; GFX90A-NEXT: s_mov_b32 s12, s6
+; GFX90A-NEXT: s_mov_b32 s13, s7
+; GFX90A-NEXT: s_mov_b32 s14, s16
+; GFX90A-NEXT: s_mov_b32 s15, s17
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_0_3_3:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_5_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ; def s[12:19]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
-; GFX942-NEXT: s_mov_b32 s14, s6
-; GFX942-NEXT: s_mov_b32 s15, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[18:19]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[18:19]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 0, i32 3, i32 3>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 5, i32 2>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__7_1_3_3() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_1_3_3:
+define void @s_shuffle_v4i64_v4i64__7_7_6_2() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_6_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[16:23]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s22
-; GFX900-NEXT: s_mov_b32 s9, s23
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s10, s14
+; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: s_mov_b32 s14, s16
+; GFX900-NEXT: s_mov_b32 s15, s17
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_1_3_3:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_6_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[16:23]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s22
-; GFX90A-NEXT: s_mov_b32 s9, s23
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s10, s14
+; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: s_mov_b32 s14, s16
+; GFX90A-NEXT: s_mov_b32 s15, s17
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_1_3_3:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_6_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -19661,35 +19346,29 @@ define void @s_shuffle_v4i64_v4i64__7_1_3_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 1, i32 3, i32 3>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 6, i32 2>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__7_2_3_3() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_2_3_3:
+define void @s_shuffle_v4i64_v4i64__u_3_3_3() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__u_3_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s10, s12
-; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s10, s14
+; GFX900-NEXT: s_mov_b32 s11, s15
; GFX900-NEXT: s_mov_b32 s12, s14
; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
@@ -19697,19 +19376,14 @@ define void @s_shuffle_v4i64_v4i64__7_2_3_3() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_2_3_3:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__u_3_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s10, s12
-; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s10, s14
+; GFX90A-NEXT: s_mov_b32 s11, s15
; GFX90A-NEXT: s_mov_b32 s12, s14
; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
@@ -19717,46 +19391,33 @@ define void @s_shuffle_v4i64_v4i64__7_2_3_3() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_2_3_3:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__u_3_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
- %vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 2, i32 3, i32 3>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> <i32 poison, i32 3, i32 3, i32 3>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__7_4_3_3() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_4_3_3:
+define void @s_shuffle_v4i64_v4i64__0_3_3_3() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__0_3_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
+; GFX900-NEXT: s_mov_b32 s10, s14
+; GFX900-NEXT: s_mov_b32 s11, s15
; GFX900-NEXT: s_mov_b32 s12, s14
; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
@@ -19764,19 +19425,14 @@ define void @s_shuffle_v4i64_v4i64__7_4_3_3() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_4_3_3:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__0_3_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
+; GFX90A-NEXT: s_mov_b32 s10, s14
+; GFX90A-NEXT: s_mov_b32 s11, s15
; GFX90A-NEXT: s_mov_b32 s12, s14
; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
@@ -19784,113 +19440,89 @@ define void @s_shuffle_v4i64_v4i64__7_4_3_3() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_4_3_3:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__0_3_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
- %vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 4, i32 3, i32 3>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> <i32 0, i32 3, i32 3, i32 3>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__7_5_3_3() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_5_3_3:
+define void @s_shuffle_v4i64_v4i64__1_3_3_3() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__1_3_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s14
-; GFX900-NEXT: s_mov_b32 s9, s15
-; GFX900-NEXT: s_mov_b32 s12, s18
-; GFX900-NEXT: s_mov_b32 s13, s19
-; GFX900-NEXT: s_mov_b32 s14, s18
-; GFX900-NEXT: s_mov_b32 s15, s19
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s10, s14
+; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_5_3_3:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__1_3_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s14
-; GFX90A-NEXT: s_mov_b32 s9, s15
-; GFX90A-NEXT: s_mov_b32 s12, s18
-; GFX90A-NEXT: s_mov_b32 s13, s19
-; GFX90A-NEXT: s_mov_b32 s14, s18
-; GFX90A-NEXT: s_mov_b32 s15, s19
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s10, s14
+; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_5_3_3:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__1_3_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
-; GFX942-NEXT: s_mov_b32 s14, s6
-; GFX942-NEXT: s_mov_b32 s15, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
- %vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 5, i32 3, i32 3>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> <i32 1, i32 3, i32 3, i32 3>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__7_6_3_3() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_6_3_3:
+define void @s_shuffle_v4i64_v4i64__2_3_3_3() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__2_3_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[16:23]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s22
-; GFX900-NEXT: s_mov_b32 s9, s23
-; GFX900-NEXT: s_mov_b32 s10, s20
-; GFX900-NEXT: s_mov_b32 s11, s21
+; GFX900-NEXT: s_mov_b32 s8, s12
+; GFX900-NEXT: s_mov_b32 s9, s13
+; GFX900-NEXT: s_mov_b32 s10, s14
+; GFX900-NEXT: s_mov_b32 s11, s15
; GFX900-NEXT: s_mov_b32 s12, s14
; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
@@ -19898,19 +19530,16 @@ define void @s_shuffle_v4i64_v4i64__7_6_3_3() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_6_3_3:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__2_3_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[16:23]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s22
-; GFX90A-NEXT: s_mov_b32 s9, s23
-; GFX90A-NEXT: s_mov_b32 s10, s20
-; GFX90A-NEXT: s_mov_b32 s11, s21
+; GFX90A-NEXT: s_mov_b32 s8, s12
+; GFX90A-NEXT: s_mov_b32 s9, s13
+; GFX90A-NEXT: s_mov_b32 s10, s14
+; GFX90A-NEXT: s_mov_b32 s11, s15
; GFX90A-NEXT: s_mov_b32 s12, s14
; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
@@ -19918,44 +19547,36 @@ define void @s_shuffle_v4i64_v4i64__7_6_3_3() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_6_3_3:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__2_3_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
- %vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 6, i32 3, i32 3>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> <i32 2, i32 3, i32 3, i32 3>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__7_7_3_3() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_3_3:
+define void @s_shuffle_v4i64_v4i64__3_3_3_3() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__3_3_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s10, s14
+; GFX900-NEXT: s_mov_b32 s11, s15
; GFX900-NEXT: s_mov_b32 s12, s14
; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
@@ -19963,17 +19584,16 @@ define void @s_shuffle_v4i64_v4i64__7_7_3_3() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_3_3:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__3_3_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s10, s14
+; GFX90A-NEXT: s_mov_b32 s11, s15
; GFX90A-NEXT: s_mov_b32 s12, s14
; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
@@ -19981,127 +19601,116 @@ define void @s_shuffle_v4i64_v4i64__7_7_3_3() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_3_3:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__3_3_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
- %vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 3, i32 3>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__7_7_u_3() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_u_3:
+define void @s_shuffle_v4i64_v4i64__4_3_3_3() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__4_3_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s14
+; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_u_3:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__4_3_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s10, s14
+; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_u_3:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__4_3_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
- %vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 poison, i32 3>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> <i32 4, i32 3, i32 3, i32 3>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__7_7_0_3() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_0_3:
+define void @s_shuffle_v4i64_v4i64__5_3_3_3() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__5_3_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s14, s18
-; GFX900-NEXT: s_mov_b32 s15, s19
+; GFX900-NEXT: s_mov_b32 s8, s6
+; GFX900-NEXT: s_mov_b32 s9, s7
+; GFX900-NEXT: s_mov_b32 s10, s14
+; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_0_3:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__5_3_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s14, s18
-; GFX90A-NEXT: s_mov_b32 s15, s19
+; GFX90A-NEXT: s_mov_b32 s8, s6
+; GFX90A-NEXT: s_mov_b32 s9, s7
+; GFX90A-NEXT: s_mov_b32 s10, s14
+; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_0_3:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__5_3_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -20110,67 +19719,58 @@ define void @s_shuffle_v4i64_v4i64__7_7_0_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s6
-; GFX942-NEXT: s_mov_b32 s15, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 0, i32 3>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 5, i32 3, i32 3, i32 3>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__7_7_1_3() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_1_3:
+define void @s_shuffle_v4i64_v4i64__6_3_3_3() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__6_3_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s10, s14
+; GFX900-NEXT: s_mov_b32 s11, s15
; GFX900-NEXT: s_mov_b32 s12, s14
; GFX900-NEXT: s_mov_b32 s13, s15
-; GFX900-NEXT: s_mov_b32 s14, s18
-; GFX900-NEXT: s_mov_b32 s15, s19
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_1_3:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__6_3_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s10, s14
+; GFX90A-NEXT: s_mov_b32 s11, s15
; GFX90A-NEXT: s_mov_b32 s12, s14
; GFX90A-NEXT: s_mov_b32 s13, s15
-; GFX90A-NEXT: s_mov_b32 s14, s18
-; GFX90A-NEXT: s_mov_b32 s15, s19
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_1_3:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__6_3_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -20179,27 +19779,22 @@ define void @s_shuffle_v4i64_v4i64__7_7_1_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s6
-; GFX942-NEXT: s_mov_b32 s15, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 1, i32 3>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 6, i32 3, i32 3, i32 3>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__7_7_2_3() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_2_3:
+define void @s_shuffle_v4i64_v4i64__7_3_3_3() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_3_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
@@ -20210,12 +19805,16 @@ define void @s_shuffle_v4i64_v4i64__7_7_2_3() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s10
; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s10, s14
+; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_2_3:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_3_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
@@ -20226,12 +19825,16 @@ define void @s_shuffle_v4i64_v4i64__7_7_2_3() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s10
; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s10, s14
+; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_2_3:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_3_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -20240,23 +19843,22 @@ define void @s_shuffle_v4i64_v4i64__7_7_2_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 2, i32 3>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 3, i32 3, i32 3>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__7_7_4_3() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_4_3:
+define void @s_shuffle_v4i64_v4i64__7_u_3_3() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_u_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
@@ -20267,14 +19869,14 @@ define void @s_shuffle_v4i64_v4i64__7_7_4_3() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s10
; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_4_3:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_u_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
@@ -20285,14 +19887,14 @@ define void @s_shuffle_v4i64_v4i64__7_7_4_3() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s10
; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_4_3:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_u_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -20301,61 +19903,65 @@ define void @s_shuffle_v4i64_v4i64__7_7_4_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 4, i32 3>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 poison, i32 3, i32 3>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__7_7_5_3() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_5_3:
+define void @s_shuffle_v4i64_v4i64__7_0_3_3() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_0_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s10
; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s12, s6
-; GFX900-NEXT: s_mov_b32 s13, s7
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s12, s18
+; GFX900-NEXT: s_mov_b32 s13, s19
+; GFX900-NEXT: s_mov_b32 s14, s18
+; GFX900-NEXT: s_mov_b32 s15, s19
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_5_3:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_0_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s10
; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s12, s6
-; GFX90A-NEXT: s_mov_b32 s13, s7
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s12, s18
+; GFX90A-NEXT: s_mov_b32 s13, s19
+; GFX90A-NEXT: s_mov_b32 s14, s18
+; GFX90A-NEXT: s_mov_b32 s15, s19
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_5_3:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_0_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -20364,65 +19970,59 @@ define void @s_shuffle_v4i64_v4i64__7_7_5_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 5, i32 3>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 0, i32 3, i32 3>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__7_7_6_3() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_6_3:
+define void @s_shuffle_v4i64_v4i64__7_1_3_3() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_1_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[16:23]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s14
-; GFX900-NEXT: s_mov_b32 s9, s15
-; GFX900-NEXT: s_mov_b32 s10, s14
-; GFX900-NEXT: s_mov_b32 s11, s15
-; GFX900-NEXT: s_mov_b32 s14, s18
-; GFX900-NEXT: s_mov_b32 s15, s19
+; GFX900-NEXT: s_mov_b32 s8, s22
+; GFX900-NEXT: s_mov_b32 s9, s23
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_6_3:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_1_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[16:23]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s14
-; GFX90A-NEXT: s_mov_b32 s9, s15
-; GFX90A-NEXT: s_mov_b32 s10, s14
-; GFX90A-NEXT: s_mov_b32 s11, s15
-; GFX90A-NEXT: s_mov_b32 s14, s18
-; GFX90A-NEXT: s_mov_b32 s15, s19
+; GFX90A-NEXT: s_mov_b32 s8, s22
+; GFX90A-NEXT: s_mov_b32 s9, s23
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_6_3:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_1_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -20431,584 +20031,609 @@ define void @s_shuffle_v4i64_v4i64__7_7_6_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
-; GFX942-NEXT: s_mov_b32 s14, s6
-; GFX942-NEXT: s_mov_b32 s15, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 6, i32 3>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4i64_v4i64__u_4_4_4() {
-; GFX9-LABEL: s_shuffle_v4i64_v4i64__u_4_4_4:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> <i32 poison, i32 4, i32 4, i32 4>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 1, i32 3, i32 3>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__0_4_4_4() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__0_4_4_4:
+define void @s_shuffle_v4i64_v4i64__7_2_3_3() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_2_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__0_4_4_4:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_2_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__0_4_4_4:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_2_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> <i32 0, i32 4, i32 4, i32 4>
+ %vec1 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 2, i32 3, i32 3>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__1_4_4_4() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__1_4_4_4:
+define void @s_shuffle_v4i64_v4i64__7_4_3_3() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_4_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s6
-; GFX900-NEXT: s_mov_b32 s9, s7
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s10, s4
+; GFX900-NEXT: s_mov_b32 s11, s5
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__1_4_4_4:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_4_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s6
-; GFX90A-NEXT: s_mov_b32 s9, s7
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__1_4_4_4:
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s10, s4
+; GFX90A-NEXT: s_mov_b32 s11, s5
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_4_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> <i32 1, i32 4, i32 4, i32 4>
+ %vec1 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 4, i32 3, i32 3>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__2_4_4_4() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__2_4_4_4:
+define void @s_shuffle_v4i64_v4i64__7_5_3_3() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_5_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s12, s18
+; GFX900-NEXT: s_mov_b32 s13, s19
+; GFX900-NEXT: s_mov_b32 s14, s18
+; GFX900-NEXT: s_mov_b32 s15, s19
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__2_4_4_4:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_5_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s12, s18
+; GFX90A-NEXT: s_mov_b32 s13, s19
+; GFX90A-NEXT: s_mov_b32 s14, s18
+; GFX90A-NEXT: s_mov_b32 s15, s19
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__2_4_4_4:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_5_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> <i32 2, i32 4, i32 4, i32 4>
+ %vec1 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 5, i32 3, i32 3>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__3_4_4_4() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__3_4_4_4:
+define void @s_shuffle_v4i64_v4i64__7_6_3_3() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_6_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[16:23]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s22
+; GFX900-NEXT: s_mov_b32 s9, s23
+; GFX900-NEXT: s_mov_b32 s10, s20
+; GFX900-NEXT: s_mov_b32 s11, s21
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__3_4_4_4:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_6_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[16:23]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s22
+; GFX90A-NEXT: s_mov_b32 s9, s23
+; GFX90A-NEXT: s_mov_b32 s10, s20
+; GFX90A-NEXT: s_mov_b32 s11, s21
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__3_4_4_4:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_6_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> <i32 3, i32 4, i32 4, i32 4>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4i64_v4i64__4_4_4_4() {
-; GFX9-LABEL: s_shuffle_v4i64_v4i64__4_4_4_4:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> <i32 4, i32 4, i32 4, i32 4>
+ %vec1 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 6, i32 3, i32 3>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__5_4_4_4() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__5_4_4_4:
+define void @s_shuffle_v4i64_v4i64__7_7_3_3() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s6
-; GFX900-NEXT: s_mov_b32 s9, s7
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: s_mov_b32 s14, s4
-; GFX900-NEXT: s_mov_b32 s15, s5
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__5_4_4_4:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s6
-; GFX90A-NEXT: s_mov_b32 s9, s7
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: s_mov_b32 s14, s4
-; GFX90A-NEXT: s_mov_b32 s15, s5
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__5_4_4_4:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 5, i32 4, i32 4, i32 4>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 3, i32 3>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__6_4_4_4() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__6_4_4_4:
+define void @s_shuffle_v4i64_v4i64__7_7_u_3() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_u_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: s_mov_b32 s14, s4
-; GFX900-NEXT: s_mov_b32 s15, s5
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__6_4_4_4:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_u_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: s_mov_b32 s14, s4
-; GFX90A-NEXT: s_mov_b32 s15, s5
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__6_4_4_4:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_u_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 6, i32 4, i32 4, i32 4>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 poison, i32 3>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__7_4_4_4() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_4_4_4:
+define void @s_shuffle_v4i64_v4i64__7_7_0_3() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_0_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s10
; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: s_mov_b32 s14, s4
-; GFX900-NEXT: s_mov_b32 s15, s5
+; GFX900-NEXT: s_mov_b32 s14, s18
+; GFX900-NEXT: s_mov_b32 s15, s19
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_4_4_4:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_0_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s10
; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: s_mov_b32 s14, s4
-; GFX90A-NEXT: s_mov_b32 s15, s5
+; GFX90A-NEXT: s_mov_b32 s14, s18
+; GFX90A-NEXT: s_mov_b32 s15, s19
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_4_4_4:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_0_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 4, i32 4, i32 4>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 0, i32 3>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__7_u_4_4() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_u_4_4:
+define void @s_shuffle_v4i64_v4i64__7_7_1_3() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_1_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s10
; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: s_mov_b32 s14, s4
-; GFX900-NEXT: s_mov_b32 s15, s5
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: s_mov_b32 s14, s18
+; GFX900-NEXT: s_mov_b32 s15, s19
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_u_4_4:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_1_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s10
; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: s_mov_b32 s14, s4
-; GFX90A-NEXT: s_mov_b32 s15, s5
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: s_mov_b32 s14, s18
+; GFX90A-NEXT: s_mov_b32 s15, s19
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_u_4_4:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_1_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 poison, i32 4, i32 4>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 1, i32 3>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__7_0_4_4() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_0_4_4:
+define void @s_shuffle_v4i64_v4i64__7_7_2_3() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_2_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[16:23]
+; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s22
-; GFX900-NEXT: s_mov_b32 s9, s23
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
-; GFX900-NEXT: s_mov_b32 s12, s16
-; GFX900-NEXT: s_mov_b32 s13, s17
-; GFX900-NEXT: s_mov_b32 s14, s16
-; GFX900-NEXT: s_mov_b32 s15, s17
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_0_4_4:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_2_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[16:23]
+; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s22
-; GFX90A-NEXT: s_mov_b32 s9, s23
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
-; GFX90A-NEXT: s_mov_b32 s12, s16
-; GFX90A-NEXT: s_mov_b32 s13, s17
-; GFX90A-NEXT: s_mov_b32 s14, s16
-; GFX90A-NEXT: s_mov_b32 s15, s17
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_0_4_4:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_2_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[4:11]
+; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s4
-; GFX942-NEXT: s_mov_b32 s13, s5
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 0, i32 4, i32 4>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 2, i32 3>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__7_1_4_4() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_1_4_4:
+define void @s_shuffle_v4i64_v4i64__7_7_4_3() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_4_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[16:23]
+; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s22
-; GFX900-NEXT: s_mov_b32 s9, s23
-; GFX900-NEXT: s_mov_b32 s12, s16
-; GFX900-NEXT: s_mov_b32 s13, s17
-; GFX900-NEXT: s_mov_b32 s14, s16
-; GFX900-NEXT: s_mov_b32 s15, s17
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s12, s4
+; GFX900-NEXT: s_mov_b32 s13, s5
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_1_4_4:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_4_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[16:23]
+; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s22
-; GFX90A-NEXT: s_mov_b32 s9, s23
-; GFX90A-NEXT: s_mov_b32 s12, s16
-; GFX90A-NEXT: s_mov_b32 s13, s17
-; GFX90A-NEXT: s_mov_b32 s14, s16
-; GFX90A-NEXT: s_mov_b32 s15, s17
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s12, s4
+; GFX90A-NEXT: s_mov_b32 s13, s5
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_1_4_4:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_4_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -21017,25 +20642,22 @@ define void @s_shuffle_v4i64_v4i64__7_1_4_4() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 1, i32 4, i32 4>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 4, i32 3>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__7_2_4_4() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_2_4_4:
+define void @s_shuffle_v4i64_v4i64__7_7_5_3() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_5_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
@@ -21046,18 +20668,14 @@ define void @s_shuffle_v4i64_v4i64__7_2_4_4() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s10
; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s10, s12
-; GFX900-NEXT: s_mov_b32 s11, s13
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: s_mov_b32 s14, s4
-; GFX900-NEXT: s_mov_b32 s15, s5
+; GFX900-NEXT: s_mov_b32 s12, s6
+; GFX900-NEXT: s_mov_b32 s13, s7
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_2_4_4:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_5_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
@@ -21068,304 +20686,240 @@ define void @s_shuffle_v4i64_v4i64__7_2_4_4() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s10
; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s10, s12
-; GFX90A-NEXT: s_mov_b32 s11, s13
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: s_mov_b32 s14, s4
-; GFX90A-NEXT: s_mov_b32 s15, s5
+; GFX90A-NEXT: s_mov_b32 s12, s6
+; GFX90A-NEXT: s_mov_b32 s13, s7
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_2_4_4:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_5_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[16:23]
+; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s22
-; GFX942-NEXT: s_mov_b32 s9, s23
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s16
-; GFX942-NEXT: s_mov_b32 s13, s17
-; GFX942-NEXT: s_mov_b32 s14, s16
-; GFX942-NEXT: s_mov_b32 s15, s17
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 2, i32 4, i32 4>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 5, i32 3>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__7_3_4_4() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_3_4_4:
+define void @s_shuffle_v4i64_v4i64__7_7_6_3() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_6_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[16:23]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s22
-; GFX900-NEXT: s_mov_b32 s9, s23
-; GFX900-NEXT: s_mov_b32 s12, s16
-; GFX900-NEXT: s_mov_b32 s13, s17
-; GFX900-NEXT: s_mov_b32 s14, s16
-; GFX900-NEXT: s_mov_b32 s15, s17
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s10, s14
+; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: s_mov_b32 s14, s18
+; GFX900-NEXT: s_mov_b32 s15, s19
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_3_4_4:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_6_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[16:23]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s22
-; GFX90A-NEXT: s_mov_b32 s9, s23
-; GFX90A-NEXT: s_mov_b32 s12, s16
-; GFX90A-NEXT: s_mov_b32 s13, s17
-; GFX90A-NEXT: s_mov_b32 s14, s16
-; GFX90A-NEXT: s_mov_b32 s15, s17
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s10, s14
+; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: s_mov_b32 s14, s18
+; GFX90A-NEXT: s_mov_b32 s15, s19
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_3_4_4:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_6_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[16:23]
+; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s22
-; GFX942-NEXT: s_mov_b32 s9, s23
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s16
-; GFX942-NEXT: s_mov_b32 s13, s17
-; GFX942-NEXT: s_mov_b32 s14, s16
-; GFX942-NEXT: s_mov_b32 s15, s17
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 3, i32 4, i32 4>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 6, i32 3>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__7_5_4_4() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_5_4_4:
+define void @s_shuffle_v4i64_v4i64__u_4_4_4() {
+; GFX9-LABEL: s_shuffle_v4i64_v4i64__u_4_4_4:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> <i32 poison, i32 4, i32 4, i32 4>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4i64_v4i64__0_4_4_4() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__0_4_4_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s10, s6
-; GFX900-NEXT: s_mov_b32 s11, s7
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: s_mov_b32 s14, s4
-; GFX900-NEXT: s_mov_b32 s15, s5
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_5_4_4:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__0_4_4_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s10, s6
-; GFX90A-NEXT: s_mov_b32 s11, s7
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: s_mov_b32 s14, s4
-; GFX90A-NEXT: s_mov_b32 s15, s5
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_5_4_4:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__0_4_4_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s2
-; GFX942-NEXT: s_mov_b32 s11, s3
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
- %vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 5, i32 4, i32 4>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> <i32 0, i32 4, i32 4, i32 4>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__7_6_4_4() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_6_4_4:
+define void @s_shuffle_v4i64_v4i64__1_4_4_4() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__1_4_4_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[16:23]
+; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s22
-; GFX900-NEXT: s_mov_b32 s9, s23
-; GFX900-NEXT: s_mov_b32 s10, s20
-; GFX900-NEXT: s_mov_b32 s11, s21
-; GFX900-NEXT: s_mov_b32 s12, s16
-; GFX900-NEXT: s_mov_b32 s13, s17
-; GFX900-NEXT: s_mov_b32 s14, s16
-; GFX900-NEXT: s_mov_b32 s15, s17
+; GFX900-NEXT: s_mov_b32 s8, s6
+; GFX900-NEXT: s_mov_b32 s9, s7
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_6_4_4:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__1_4_4_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[16:23]
+; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s22
-; GFX90A-NEXT: s_mov_b32 s9, s23
-; GFX90A-NEXT: s_mov_b32 s10, s20
-; GFX90A-NEXT: s_mov_b32 s11, s21
-; GFX90A-NEXT: s_mov_b32 s12, s16
-; GFX90A-NEXT: s_mov_b32 s13, s17
-; GFX90A-NEXT: s_mov_b32 s14, s16
-; GFX90A-NEXT: s_mov_b32 s15, s17
+; GFX90A-NEXT: s_mov_b32 s8, s6
+; GFX90A-NEXT: s_mov_b32 s9, s7
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_6_4_4:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__1_4_4_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
- %vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 6, i32 4, i32 4>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> <i32 1, i32 4, i32 4, i32 4>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__7_7_4_4() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_4_4:
+define void @s_shuffle_v4i64_v4i64__2_4_4_4() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__2_4_4_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: s_mov_b32 s14, s4
-; GFX900-NEXT: s_mov_b32 s15, s5
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_4_4:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__2_4_4_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: s_mov_b32 s14, s4
-; GFX90A-NEXT: s_mov_b32 s15, s5
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_4_4:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__2_4_4_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
- %vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 4, i32 4>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> <i32 2, i32 4, i32 4, i32 4>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__7_7_u_4() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_u_4:
+define void @s_shuffle_v4i64_v4i64__3_4_4_4() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__3_4_4_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
@@ -21373,14 +20927,12 @@ define void @s_shuffle_v4i64_v4i64__7_7_u_4() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s10
; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s14, s4
-; GFX900-NEXT: s_mov_b32 s15, s5
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_u_4:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__3_4_4_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
@@ -21388,190 +20940,171 @@ define void @s_shuffle_v4i64_v4i64__7_7_u_4() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s10
; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s14, s4
-; GFX90A-NEXT: s_mov_b32 s15, s5
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_u_4:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__3_4_4_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
- %vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 poison, i32 4>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> <i32 3, i32 4, i32 4, i32 4>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__7_7_0_4() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_0_4:
+define void @s_shuffle_v4i64_v4i64__4_4_4_4() {
+; GFX9-LABEL: s_shuffle_v4i64_v4i64__4_4_4_4:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> <i32 4, i32 4, i32 4, i32 4>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4i64_v4i64__5_4_4_4() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__5_4_4_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[16:23]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s22
-; GFX900-NEXT: s_mov_b32 s9, s23
-; GFX900-NEXT: s_mov_b32 s10, s22
-; GFX900-NEXT: s_mov_b32 s11, s23
+; GFX900-NEXT: s_mov_b32 s8, s6
+; GFX900-NEXT: s_mov_b32 s9, s7
+; GFX900-NEXT: s_mov_b32 s10, s4
+; GFX900-NEXT: s_mov_b32 s11, s5
; GFX900-NEXT: s_mov_b32 s12, s4
; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: s_mov_b32 s14, s16
-; GFX900-NEXT: s_mov_b32 s15, s17
+; GFX900-NEXT: s_mov_b32 s14, s4
+; GFX900-NEXT: s_mov_b32 s15, s5
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_0_4:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__5_4_4_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[16:23]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s22
-; GFX90A-NEXT: s_mov_b32 s9, s23
-; GFX90A-NEXT: s_mov_b32 s10, s22
-; GFX90A-NEXT: s_mov_b32 s11, s23
+; GFX90A-NEXT: s_mov_b32 s8, s6
+; GFX90A-NEXT: s_mov_b32 s9, s7
+; GFX90A-NEXT: s_mov_b32 s10, s4
+; GFX90A-NEXT: s_mov_b32 s11, s5
; GFX90A-NEXT: s_mov_b32 s12, s4
; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: s_mov_b32 s14, s16
-; GFX90A-NEXT: s_mov_b32 s15, s17
+; GFX90A-NEXT: s_mov_b32 s14, s4
+; GFX90A-NEXT: s_mov_b32 s15, s5
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_0_4:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__5_4_4_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[4:11]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 0, i32 4>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 5, i32 4, i32 4, i32 4>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__7_7_1_4() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_1_4:
+define void @s_shuffle_v4i64_v4i64__6_4_4_4() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__6_4_4_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[16:23]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s22
-; GFX900-NEXT: s_mov_b32 s9, s23
-; GFX900-NEXT: s_mov_b32 s10, s22
-; GFX900-NEXT: s_mov_b32 s11, s23
-; GFX900-NEXT: s_mov_b32 s12, s6
-; GFX900-NEXT: s_mov_b32 s13, s7
-; GFX900-NEXT: s_mov_b32 s14, s16
-; GFX900-NEXT: s_mov_b32 s15, s17
+; GFX900-NEXT: s_mov_b32 s10, s4
+; GFX900-NEXT: s_mov_b32 s11, s5
+; GFX900-NEXT: s_mov_b32 s12, s4
+; GFX900-NEXT: s_mov_b32 s13, s5
+; GFX900-NEXT: s_mov_b32 s14, s4
+; GFX900-NEXT: s_mov_b32 s15, s5
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_1_4:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__6_4_4_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[16:23]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s22
-; GFX90A-NEXT: s_mov_b32 s9, s23
-; GFX90A-NEXT: s_mov_b32 s10, s22
-; GFX90A-NEXT: s_mov_b32 s11, s23
-; GFX90A-NEXT: s_mov_b32 s12, s6
-; GFX90A-NEXT: s_mov_b32 s13, s7
-; GFX90A-NEXT: s_mov_b32 s14, s16
-; GFX90A-NEXT: s_mov_b32 s15, s17
+; GFX90A-NEXT: s_mov_b32 s10, s4
+; GFX90A-NEXT: s_mov_b32 s11, s5
+; GFX90A-NEXT: s_mov_b32 s12, s4
+; GFX90A-NEXT: s_mov_b32 s13, s5
+; GFX90A-NEXT: s_mov_b32 s14, s4
+; GFX90A-NEXT: s_mov_b32 s15, s5
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_1_4:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__6_4_4_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[4:11]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 1, i32 4>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 6, i32 4, i32 4, i32 4>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__7_7_2_4() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_2_4:
+define void @s_shuffle_v4i64_v4i64__7_4_4_4() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_4_4_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s10
; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s10, s4
+; GFX900-NEXT: s_mov_b32 s11, s5
+; GFX900-NEXT: s_mov_b32 s12, s4
+; GFX900-NEXT: s_mov_b32 s13, s5
; GFX900-NEXT: s_mov_b32 s14, s4
; GFX900-NEXT: s_mov_b32 s15, s5
; GFX900-NEXT: ;;#ASMSTART
@@ -21579,17 +21112,18 @@ define void @s_shuffle_v4i64_v4i64__7_7_2_4() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_2_4:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_4_4_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s10
; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s10, s4
+; GFX90A-NEXT: s_mov_b32 s11, s5
+; GFX90A-NEXT: s_mov_b32 s12, s4
+; GFX90A-NEXT: s_mov_b32 s13, s5
; GFX90A-NEXT: s_mov_b32 s14, s4
; GFX90A-NEXT: s_mov_b32 s15, s5
; GFX90A-NEXT: ;;#ASMSTART
@@ -21597,46 +21131,38 @@ define void @s_shuffle_v4i64_v4i64__7_7_2_4() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_2_4:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_4_4_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 2, i32 4>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 4, i32 4, i32 4>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__7_7_3_4() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_3_4:
+define void @s_shuffle_v4i64_v4i64__7_u_4_4() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_u_4_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s10
; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: s_mov_b32 s12, s4
+; GFX900-NEXT: s_mov_b32 s13, s5
; GFX900-NEXT: s_mov_b32 s14, s4
; GFX900-NEXT: s_mov_b32 s15, s5
; GFX900-NEXT: ;;#ASMSTART
@@ -21644,19 +21170,16 @@ define void @s_shuffle_v4i64_v4i64__7_7_3_4() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_3_4:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_u_4_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s10
; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: s_mov_b32 s12, s4
+; GFX90A-NEXT: s_mov_b32 s13, s5
; GFX90A-NEXT: s_mov_b32 s14, s4
; GFX90A-NEXT: s_mov_b32 s15, s5
; GFX90A-NEXT: ;;#ASMSTART
@@ -21664,107 +21187,110 @@ define void @s_shuffle_v4i64_v4i64__7_7_3_4() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_3_4:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_u_4_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[16:23]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s22
-; GFX942-NEXT: s_mov_b32 s9, s23
-; GFX942-NEXT: s_mov_b32 s10, s22
-; GFX942-NEXT: s_mov_b32 s11, s23
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
-; GFX942-NEXT: s_mov_b32 s14, s16
-; GFX942-NEXT: s_mov_b32 s15, s17
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 3, i32 4>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 poison, i32 4, i32 4>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__7_7_5_4() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_5_4:
+define void @s_shuffle_v4i64_v4i64__7_0_4_4() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_0_4_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s12, s6
-; GFX900-NEXT: s_mov_b32 s13, s7
-; GFX900-NEXT: s_mov_b32 s14, s4
-; GFX900-NEXT: s_mov_b32 s15, s5
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[16:23]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s22
+; GFX900-NEXT: s_mov_b32 s9, s23
+; GFX900-NEXT: s_mov_b32 s10, s4
+; GFX900-NEXT: s_mov_b32 s11, s5
+; GFX900-NEXT: s_mov_b32 s12, s16
+; GFX900-NEXT: s_mov_b32 s13, s17
+; GFX900-NEXT: s_mov_b32 s14, s16
+; GFX900-NEXT: s_mov_b32 s15, s17
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_5_4:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_0_4_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s12, s6
-; GFX90A-NEXT: s_mov_b32 s13, s7
-; GFX90A-NEXT: s_mov_b32 s14, s4
-; GFX90A-NEXT: s_mov_b32 s15, s5
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[16:23]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s22
+; GFX90A-NEXT: s_mov_b32 s9, s23
+; GFX90A-NEXT: s_mov_b32 s10, s4
+; GFX90A-NEXT: s_mov_b32 s11, s5
+; GFX90A-NEXT: s_mov_b32 s12, s16
+; GFX90A-NEXT: s_mov_b32 s13, s17
+; GFX90A-NEXT: s_mov_b32 s14, s16
+; GFX90A-NEXT: s_mov_b32 s15, s17
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_5_4:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_0_4_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[4:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 5, i32 4>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 0, i32 4, i32 4>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__7_7_6_4() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_6_4:
+define void @s_shuffle_v4i64_v4i64__7_1_4_4() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_1_4_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[16:23]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s22
; GFX900-NEXT: s_mov_b32 s9, s23
-; GFX900-NEXT: s_mov_b32 s10, s22
-; GFX900-NEXT: s_mov_b32 s11, s23
-; GFX900-NEXT: s_mov_b32 s12, s20
-; GFX900-NEXT: s_mov_b32 s13, s21
+; GFX900-NEXT: s_mov_b32 s12, s16
+; GFX900-NEXT: s_mov_b32 s13, s17
; GFX900-NEXT: s_mov_b32 s14, s16
; GFX900-NEXT: s_mov_b32 s15, s17
; GFX900-NEXT: ;;#ASMSTART
@@ -21772,18 +21298,19 @@ define void @s_shuffle_v4i64_v4i64__7_7_6_4() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_6_4:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_1_4_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[16:23]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s22
; GFX90A-NEXT: s_mov_b32 s9, s23
-; GFX90A-NEXT: s_mov_b32 s10, s22
-; GFX90A-NEXT: s_mov_b32 s11, s23
-; GFX90A-NEXT: s_mov_b32 s12, s20
-; GFX90A-NEXT: s_mov_b32 s13, s21
+; GFX90A-NEXT: s_mov_b32 s12, s16
+; GFX90A-NEXT: s_mov_b32 s13, s17
; GFX90A-NEXT: s_mov_b32 s14, s16
; GFX90A-NEXT: s_mov_b32 s15, s17
; GFX90A-NEXT: ;;#ASMSTART
@@ -21791,65 +21318,1353 @@ define void @s_shuffle_v4i64_v4i64__7_7_6_4() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_6_4:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_1_4_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s4
-; GFX942-NEXT: s_mov_b32 s13, s5
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 6, i32 4>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 1, i32 4, i32 4>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4i64_v4i64__7_2_4_4() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_2_4_4:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s12, s4
+; GFX900-NEXT: s_mov_b32 s13, s5
+; GFX900-NEXT: s_mov_b32 s14, s4
+; GFX900-NEXT: s_mov_b32 s15, s5
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_2_4_4:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s12, s4
+; GFX90A-NEXT: s_mov_b32 s13, s5
+; GFX90A-NEXT: s_mov_b32 s14, s4
+; GFX90A-NEXT: s_mov_b32 s15, s5
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_2_4_4:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[16:23]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[22:23]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[16:17]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[16:17]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x i64> asm "; def $0", "=s"()
+ %vec1 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 2, i32 4, i32 4>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4i64_v4i64__7_3_4_4() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_3_4_4:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[16:23]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s22
+; GFX900-NEXT: s_mov_b32 s9, s23
+; GFX900-NEXT: s_mov_b32 s12, s16
+; GFX900-NEXT: s_mov_b32 s13, s17
+; GFX900-NEXT: s_mov_b32 s14, s16
+; GFX900-NEXT: s_mov_b32 s15, s17
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_3_4_4:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[16:23]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s22
+; GFX90A-NEXT: s_mov_b32 s9, s23
+; GFX90A-NEXT: s_mov_b32 s12, s16
+; GFX90A-NEXT: s_mov_b32 s13, s17
+; GFX90A-NEXT: s_mov_b32 s14, s16
+; GFX90A-NEXT: s_mov_b32 s15, s17
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_3_4_4:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[16:23]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[22:23]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[16:17]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[16:17]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x i64> asm "; def $0", "=s"()
+ %vec1 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 3, i32 4, i32 4>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4i64_v4i64__7_5_4_4() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_5_4_4:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s10, s6
+; GFX900-NEXT: s_mov_b32 s11, s7
+; GFX900-NEXT: s_mov_b32 s12, s4
+; GFX900-NEXT: s_mov_b32 s13, s5
+; GFX900-NEXT: s_mov_b32 s14, s4
+; GFX900-NEXT: s_mov_b32 s15, s5
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_5_4_4:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s10, s6
+; GFX90A-NEXT: s_mov_b32 s11, s7
+; GFX90A-NEXT: s_mov_b32 s12, s4
+; GFX90A-NEXT: s_mov_b32 s13, s5
+; GFX90A-NEXT: s_mov_b32 s14, s4
+; GFX90A-NEXT: s_mov_b32 s15, s5
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_5_4_4:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x i64> asm "; def $0", "=s"()
+ %vec1 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 5, i32 4, i32 4>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4i64_v4i64__7_6_4_4() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_6_4_4:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[16:23]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s22
+; GFX900-NEXT: s_mov_b32 s9, s23
+; GFX900-NEXT: s_mov_b32 s10, s20
+; GFX900-NEXT: s_mov_b32 s11, s21
+; GFX900-NEXT: s_mov_b32 s12, s16
+; GFX900-NEXT: s_mov_b32 s13, s17
+; GFX900-NEXT: s_mov_b32 s14, s16
+; GFX900-NEXT: s_mov_b32 s15, s17
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_6_4_4:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[16:23]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s22
+; GFX90A-NEXT: s_mov_b32 s9, s23
+; GFX90A-NEXT: s_mov_b32 s10, s20
+; GFX90A-NEXT: s_mov_b32 s11, s21
+; GFX90A-NEXT: s_mov_b32 s12, s16
+; GFX90A-NEXT: s_mov_b32 s13, s17
+; GFX90A-NEXT: s_mov_b32 s14, s16
+; GFX90A-NEXT: s_mov_b32 s15, s17
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_6_4_4:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x i64> asm "; def $0", "=s"()
+ %vec1 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 6, i32 4, i32 4>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4i64_v4i64__7_7_4_4() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_4_4:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s12, s4
+; GFX900-NEXT: s_mov_b32 s13, s5
+; GFX900-NEXT: s_mov_b32 s14, s4
+; GFX900-NEXT: s_mov_b32 s15, s5
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_4_4:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s12, s4
+; GFX90A-NEXT: s_mov_b32 s13, s5
+; GFX90A-NEXT: s_mov_b32 s14, s4
+; GFX90A-NEXT: s_mov_b32 s15, s5
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_4_4:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x i64> asm "; def $0", "=s"()
+ %vec1 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 4, i32 4>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4i64_v4i64__7_7_u_4() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_u_4:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s14, s4
+; GFX900-NEXT: s_mov_b32 s15, s5
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_u_4:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s14, s4
+; GFX90A-NEXT: s_mov_b32 s15, s5
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_u_4:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x i64> asm "; def $0", "=s"()
+ %vec1 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 poison, i32 4>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4i64_v4i64__7_7_0_4() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_0_4:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[16:23]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s22
+; GFX900-NEXT: s_mov_b32 s9, s23
+; GFX900-NEXT: s_mov_b32 s10, s22
+; GFX900-NEXT: s_mov_b32 s11, s23
+; GFX900-NEXT: s_mov_b32 s12, s4
+; GFX900-NEXT: s_mov_b32 s13, s5
+; GFX900-NEXT: s_mov_b32 s14, s16
+; GFX900-NEXT: s_mov_b32 s15, s17
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_0_4:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[16:23]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s22
+; GFX90A-NEXT: s_mov_b32 s9, s23
+; GFX90A-NEXT: s_mov_b32 s10, s22
+; GFX90A-NEXT: s_mov_b32 s11, s23
+; GFX90A-NEXT: s_mov_b32 s12, s4
+; GFX90A-NEXT: s_mov_b32 s13, s5
+; GFX90A-NEXT: s_mov_b32 s14, s16
+; GFX90A-NEXT: s_mov_b32 s15, s17
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_0_4:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[4:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x i64> asm "; def $0", "=s"()
+ %vec1 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 0, i32 4>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4i64_v4i64__7_7_1_4() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_1_4:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[16:23]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s22
+; GFX900-NEXT: s_mov_b32 s9, s23
+; GFX900-NEXT: s_mov_b32 s10, s22
+; GFX900-NEXT: s_mov_b32 s11, s23
+; GFX900-NEXT: s_mov_b32 s12, s6
+; GFX900-NEXT: s_mov_b32 s13, s7
+; GFX900-NEXT: s_mov_b32 s14, s16
+; GFX900-NEXT: s_mov_b32 s15, s17
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_1_4:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[16:23]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s22
+; GFX90A-NEXT: s_mov_b32 s9, s23
+; GFX90A-NEXT: s_mov_b32 s10, s22
+; GFX90A-NEXT: s_mov_b32 s11, s23
+; GFX90A-NEXT: s_mov_b32 s12, s6
+; GFX90A-NEXT: s_mov_b32 s13, s7
+; GFX90A-NEXT: s_mov_b32 s14, s16
+; GFX90A-NEXT: s_mov_b32 s15, s17
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_1_4:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[4:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x i64> asm "; def $0", "=s"()
+ %vec1 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 1, i32 4>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4i64_v4i64__7_7_2_4() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_2_4:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s14, s4
+; GFX900-NEXT: s_mov_b32 s15, s5
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_2_4:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s14, s4
+; GFX90A-NEXT: s_mov_b32 s15, s5
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_2_4:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x i64> asm "; def $0", "=s"()
+ %vec1 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 2, i32 4>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4i64_v4i64__7_7_3_4() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_3_4:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: s_mov_b32 s14, s4
+; GFX900-NEXT: s_mov_b32 s15, s5
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_3_4:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: s_mov_b32 s14, s4
+; GFX90A-NEXT: s_mov_b32 s15, s5
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_3_4:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[16:23]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[22:23]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[22:23]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[16:17]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x i64> asm "; def $0", "=s"()
+ %vec1 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 3, i32 4>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4i64_v4i64__7_7_5_4() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_5_4:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s12, s6
+; GFX900-NEXT: s_mov_b32 s13, s7
+; GFX900-NEXT: s_mov_b32 s14, s4
+; GFX900-NEXT: s_mov_b32 s15, s5
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_5_4:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s12, s6
+; GFX90A-NEXT: s_mov_b32 s13, s7
+; GFX90A-NEXT: s_mov_b32 s14, s4
+; GFX90A-NEXT: s_mov_b32 s15, s5
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_5_4:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x i64> asm "; def $0", "=s"()
+ %vec1 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 5, i32 4>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4i64_v4i64__7_7_6_4() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_6_4:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[16:23]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s22
+; GFX900-NEXT: s_mov_b32 s9, s23
+; GFX900-NEXT: s_mov_b32 s10, s22
+; GFX900-NEXT: s_mov_b32 s11, s23
+; GFX900-NEXT: s_mov_b32 s12, s20
+; GFX900-NEXT: s_mov_b32 s13, s21
+; GFX900-NEXT: s_mov_b32 s14, s16
+; GFX900-NEXT: s_mov_b32 s15, s17
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_6_4:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[16:23]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s22
+; GFX90A-NEXT: s_mov_b32 s9, s23
+; GFX90A-NEXT: s_mov_b32 s10, s22
+; GFX90A-NEXT: s_mov_b32 s11, s23
+; GFX90A-NEXT: s_mov_b32 s12, s20
+; GFX90A-NEXT: s_mov_b32 s13, s21
+; GFX90A-NEXT: s_mov_b32 s14, s16
+; GFX90A-NEXT: s_mov_b32 s15, s17
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_6_4:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x i64> asm "; def $0", "=s"()
+ %vec1 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 6, i32 4>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
define void @s_shuffle_v4i64_v4i64__u_5_5_5() {
-; GFX9-LABEL: s_shuffle_v4i64_v4i64__u_5_5_5:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: s_mov_b32 s14, s10
-; GFX9-NEXT: s_mov_b32 s15, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__u_5_5_5:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: s_mov_b32 s14, s10
+; GFX900-NEXT: s_mov_b32 s15, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__u_5_5_5:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: s_mov_b32 s14, s10
+; GFX90A-NEXT: s_mov_b32 s15, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__u_5_5_5:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x i64> asm "; def $0", "=s"()
+ %vec1 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 poison, i32 5, i32 5, i32 5>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4i64_v4i64__0_5_5_5() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__0_5_5_5:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s14
+; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__0_5_5_5:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s14
+; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__0_5_5_5:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x i64> asm "; def $0", "=s"()
+ %vec1 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 0, i32 5, i32 5, i32 5>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4i64_v4i64__1_5_5_5() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__1_5_5_5:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s6
+; GFX900-NEXT: s_mov_b32 s9, s7
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: s_mov_b32 s14, s10
+; GFX900-NEXT: s_mov_b32 s15, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__1_5_5_5:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s6
+; GFX90A-NEXT: s_mov_b32 s9, s7
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: s_mov_b32 s14, s10
+; GFX90A-NEXT: s_mov_b32 s15, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__1_5_5_5:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x i64> asm "; def $0", "=s"()
+ %vec1 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 1, i32 5, i32 5, i32 5>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4i64_v4i64__2_5_5_5() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__2_5_5_5:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s16
+; GFX900-NEXT: s_mov_b32 s9, s17
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: s_mov_b32 s14, s10
+; GFX900-NEXT: s_mov_b32 s15, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__2_5_5_5:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s16
+; GFX90A-NEXT: s_mov_b32 s9, s17
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: s_mov_b32 s14, s10
+; GFX90A-NEXT: s_mov_b32 s15, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__2_5_5_5:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x i64> asm "; def $0", "=s"()
+ %vec1 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 2, i32 5, i32 5, i32 5>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4i64_v4i64__3_5_5_5() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__3_5_5_5:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s18
+; GFX900-NEXT: s_mov_b32 s9, s19
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: s_mov_b32 s14, s10
+; GFX900-NEXT: s_mov_b32 s15, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__3_5_5_5:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s18
+; GFX90A-NEXT: s_mov_b32 s9, s19
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: s_mov_b32 s14, s10
+; GFX90A-NEXT: s_mov_b32 s15, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__3_5_5_5:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x i64> asm "; def $0", "=s"()
+ %vec1 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 3, i32 5, i32 5, i32 5>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4i64_v4i64__4_5_5_5() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__4_5_5_5:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: s_mov_b32 s14, s10
+; GFX900-NEXT: s_mov_b32 s15, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__4_5_5_5:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: s_mov_b32 s14, s10
+; GFX90A-NEXT: s_mov_b32 s15, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__4_5_5_5:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x i64> asm "; def $0", "=s"()
+ %vec1 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 4, i32 5, i32 5, i32 5>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4i64_v4i64__5_5_5_5() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__5_5_5_5:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: s_mov_b32 s14, s10
+; GFX900-NEXT: s_mov_b32 s15, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__5_5_5_5:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: s_mov_b32 s14, s10
+; GFX90A-NEXT: s_mov_b32 s15, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__5_5_5_5:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x i64> asm "; def $0", "=s"()
+ %vec1 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 5, i32 5>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4i64_v4i64__6_5_5_5() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__6_5_5_5:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s12
+; GFX900-NEXT: s_mov_b32 s9, s13
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: s_mov_b32 s14, s10
+; GFX900-NEXT: s_mov_b32 s15, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__6_5_5_5:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s12
+; GFX90A-NEXT: s_mov_b32 s9, s13
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: s_mov_b32 s14, s10
+; GFX90A-NEXT: s_mov_b32 s15, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__6_5_5_5:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x i64> asm "; def $0", "=s"()
+ %vec1 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 6, i32 5, i32 5, i32 5>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4i64_v4i64__7_5_5_5() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_5_5_5:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: s_mov_b32 s14, s10
+; GFX900-NEXT: s_mov_b32 s15, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_5_5_5:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: s_mov_b32 s14, s10
+; GFX90A-NEXT: s_mov_b32 s15, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_5_5_5:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x i64> asm "; def $0", "=s"()
+ %vec1 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 5, i32 5, i32 5>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4i64_v4i64__7_u_5_5() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_u_5_5:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s12, s6
+; GFX900-NEXT: s_mov_b32 s13, s7
+; GFX900-NEXT: s_mov_b32 s14, s6
+; GFX900-NEXT: s_mov_b32 s15, s7
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_u_5_5:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s12, s6
+; GFX90A-NEXT: s_mov_b32 s13, s7
+; GFX90A-NEXT: s_mov_b32 s14, s6
+; GFX90A-NEXT: s_mov_b32 s15, s7
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_u_5_5:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 poison, i32 5, i32 5, i32 5>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 poison, i32 5, i32 5>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__0_5_5_5() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__0_5_5_5:
+define void @s_shuffle_v4i64_v4i64__7_0_5_5() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_0_5_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s10, s14
-; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: s_mov_b32 s8, s18
+; GFX900-NEXT: s_mov_b32 s9, s19
+; GFX900-NEXT: s_mov_b32 s10, s4
+; GFX900-NEXT: s_mov_b32 s11, s5
; GFX900-NEXT: s_mov_b32 s12, s14
; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
@@ -21857,17 +22672,19 @@ define void @s_shuffle_v4i64_v4i64__0_5_5_5() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__0_5_5_5:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_0_5_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s10, s14
-; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: s_mov_b32 s8, s18
+; GFX90A-NEXT: s_mov_b32 s9, s19
+; GFX90A-NEXT: s_mov_b32 s10, s4
+; GFX90A-NEXT: s_mov_b32 s11, s5
; GFX90A-NEXT: s_mov_b32 s12, s14
; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
@@ -21875,74 +22692,69 @@ define void @s_shuffle_v4i64_v4i64__0_5_5_5() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__0_5_5_5:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_0_5_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s2
-; GFX942-NEXT: s_mov_b32 s11, s3
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 0, i32 5, i32 5, i32 5>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 0, i32 5, i32 5>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__1_5_5_5() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__1_5_5_5:
+define void @s_shuffle_v4i64_v4i64__7_1_5_5() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_1_5_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s6
-; GFX900-NEXT: s_mov_b32 s9, s7
-; GFX900-NEXT: s_mov_b32 s12, s10
-; GFX900-NEXT: s_mov_b32 s13, s11
-; GFX900-NEXT: s_mov_b32 s14, s10
-; GFX900-NEXT: s_mov_b32 s15, s11
+; GFX900-NEXT: s_mov_b32 s8, s18
+; GFX900-NEXT: s_mov_b32 s9, s19
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__1_5_5_5:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_1_5_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s6
-; GFX90A-NEXT: s_mov_b32 s9, s7
-; GFX90A-NEXT: s_mov_b32 s12, s10
-; GFX90A-NEXT: s_mov_b32 s13, s11
-; GFX90A-NEXT: s_mov_b32 s14, s10
-; GFX90A-NEXT: s_mov_b32 s15, s11
+; GFX90A-NEXT: s_mov_b32 s8, s18
+; GFX90A-NEXT: s_mov_b32 s9, s19
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__1_5_5_5:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_1_5_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -21951,253 +22763,150 @@ define void @s_shuffle_v4i64_v4i64__1_5_5_5() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s12, s10
-; GFX942-NEXT: s_mov_b32 s13, s11
-; GFX942-NEXT: s_mov_b32 s14, s10
-; GFX942-NEXT: s_mov_b32 s15, s11
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 1, i32 5, i32 5, i32 5>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 1, i32 5, i32 5>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__2_5_5_5() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__2_5_5_5:
+define void @s_shuffle_v4i64_v4i64__7_2_5_5() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_2_5_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s16
-; GFX900-NEXT: s_mov_b32 s9, s17
-; GFX900-NEXT: s_mov_b32 s12, s10
-; GFX900-NEXT: s_mov_b32 s13, s11
-; GFX900-NEXT: s_mov_b32 s14, s10
-; GFX900-NEXT: s_mov_b32 s15, s11
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s12, s6
+; GFX900-NEXT: s_mov_b32 s13, s7
+; GFX900-NEXT: s_mov_b32 s14, s6
+; GFX900-NEXT: s_mov_b32 s15, s7
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__2_5_5_5:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_2_5_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s16
-; GFX90A-NEXT: s_mov_b32 s9, s17
-; GFX90A-NEXT: s_mov_b32 s12, s10
-; GFX90A-NEXT: s_mov_b32 s13, s11
-; GFX90A-NEXT: s_mov_b32 s14, s10
-; GFX90A-NEXT: s_mov_b32 s15, s11
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s12, s6
+; GFX90A-NEXT: s_mov_b32 s13, s7
+; GFX90A-NEXT: s_mov_b32 s14, s6
+; GFX90A-NEXT: s_mov_b32 s15, s7
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__2_5_5_5:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_2_5_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ; def s[12:19]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s12, s10
-; GFX942-NEXT: s_mov_b32 s13, s11
-; GFX942-NEXT: s_mov_b32 s14, s10
-; GFX942-NEXT: s_mov_b32 s15, s11
+; GFX942-NEXT: s_mov_b64 s[8:9], s[18:19]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 2, i32 5, i32 5, i32 5>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 2, i32 5, i32 5>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__3_5_5_5() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__3_5_5_5:
+define void @s_shuffle_v4i64_v4i64__7_3_5_5() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_3_5_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s18
; GFX900-NEXT: s_mov_b32 s9, s19
-; GFX900-NEXT: s_mov_b32 s12, s10
-; GFX900-NEXT: s_mov_b32 s13, s11
-; GFX900-NEXT: s_mov_b32 s14, s10
-; GFX900-NEXT: s_mov_b32 s15, s11
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__3_5_5_5:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_3_5_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s18
; GFX90A-NEXT: s_mov_b32 s9, s19
-; GFX90A-NEXT: s_mov_b32 s12, s10
-; GFX90A-NEXT: s_mov_b32 s13, s11
-; GFX90A-NEXT: s_mov_b32 s14, s10
-; GFX90A-NEXT: s_mov_b32 s15, s11
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__3_5_5_5:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_3_5_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ; def s[12:19]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s12, s10
-; GFX942-NEXT: s_mov_b32 s13, s11
-; GFX942-NEXT: s_mov_b32 s14, s10
-; GFX942-NEXT: s_mov_b32 s15, s11
+; GFX942-NEXT: s_mov_b64 s[8:9], s[18:19]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 3, i32 5, i32 5, i32 5>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4i64_v4i64__4_5_5_5() {
-; GFX9-LABEL: s_shuffle_v4i64_v4i64__4_5_5_5:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: s_mov_b32 s14, s10
-; GFX9-NEXT: s_mov_b32 s15, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x i64> asm "; def $0", "=s"()
- %vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 4, i32 5, i32 5, i32 5>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4i64_v4i64__5_5_5_5() {
-; GFX9-LABEL: s_shuffle_v4i64_v4i64__5_5_5_5:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s10
-; GFX9-NEXT: s_mov_b32 s9, s11
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: s_mov_b32 s14, s10
-; GFX9-NEXT: s_mov_b32 s15, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x i64> asm "; def $0", "=s"()
- %vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 5, i32 5>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4i64_v4i64__6_5_5_5() {
-; GFX9-LABEL: s_shuffle_v4i64_v4i64__6_5_5_5:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s12
-; GFX9-NEXT: s_mov_b32 s9, s13
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: s_mov_b32 s14, s10
-; GFX9-NEXT: s_mov_b32 s15, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x i64> asm "; def $0", "=s"()
- %vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 6, i32 5, i32 5, i32 5>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4i64_v4i64__7_5_5_5() {
-; GFX9-LABEL: s_shuffle_v4i64_v4i64__7_5_5_5:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s14
-; GFX9-NEXT: s_mov_b32 s9, s15
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: s_mov_b32 s14, s10
-; GFX9-NEXT: s_mov_b32 s15, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x i64> asm "; def $0", "=s"()
- %vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 5, i32 5, i32 5>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 3, i32 5, i32 5>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__7_u_5_5() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_u_5_5:
+define void @s_shuffle_v4i64_v4i64__7_4_5_5() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_4_5_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
@@ -22205,6 +22914,8 @@ define void @s_shuffle_v4i64_v4i64__7_u_5_5() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s10
; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s10, s4
+; GFX900-NEXT: s_mov_b32 s11, s5
; GFX900-NEXT: s_mov_b32 s12, s6
; GFX900-NEXT: s_mov_b32 s13, s7
; GFX900-NEXT: s_mov_b32 s14, s6
@@ -22214,7 +22925,7 @@ define void @s_shuffle_v4i64_v4i64__7_u_5_5() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_u_5_5:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_4_5_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
@@ -22222,6 +22933,8 @@ define void @s_shuffle_v4i64_v4i64__7_u_5_5() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s10
; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s10, s4
+; GFX90A-NEXT: s_mov_b32 s11, s5
; GFX90A-NEXT: s_mov_b32 s12, s6
; GFX90A-NEXT: s_mov_b32 s13, s7
; GFX90A-NEXT: s_mov_b32 s14, s6
@@ -22231,43 +22944,38 @@ define void @s_shuffle_v4i64_v4i64__7_u_5_5() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_u_5_5:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_4_5_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 poison, i32 5, i32 5>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 4, i32 5, i32 5>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__7_0_5_5() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_0_5_5:
+define void @s_shuffle_v4i64_v4i64__7_6_5_5() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_6_5_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s18
; GFX900-NEXT: s_mov_b32 s9, s19
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
+; GFX900-NEXT: s_mov_b32 s10, s16
+; GFX900-NEXT: s_mov_b32 s11, s17
; GFX900-NEXT: s_mov_b32 s12, s14
; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
@@ -22275,19 +22983,16 @@ define void @s_shuffle_v4i64_v4i64__7_0_5_5() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_0_5_5:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_6_5_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s18
; GFX90A-NEXT: s_mov_b32 s9, s19
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
+; GFX90A-NEXT: s_mov_b32 s10, s16
+; GFX90A-NEXT: s_mov_b32 s11, s17
; GFX90A-NEXT: s_mov_b32 s12, s14
; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
@@ -22295,114 +23000,92 @@ define void @s_shuffle_v4i64_v4i64__7_0_5_5() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_0_5_5:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_6_5_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[4:11]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
-; GFX942-NEXT: s_mov_b32 s14, s6
-; GFX942-NEXT: s_mov_b32 s15, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 0, i32 5, i32 5>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 6, i32 5, i32 5>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__7_1_5_5() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_1_5_5:
+define void @s_shuffle_v4i64_v4i64__7_7_5_5() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_5_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s18
-; GFX900-NEXT: s_mov_b32 s9, s19
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s12, s6
+; GFX900-NEXT: s_mov_b32 s13, s7
+; GFX900-NEXT: s_mov_b32 s14, s6
+; GFX900-NEXT: s_mov_b32 s15, s7
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_1_5_5:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_5_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s18
-; GFX90A-NEXT: s_mov_b32 s9, s19
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s12, s6
+; GFX90A-NEXT: s_mov_b32 s13, s7
+; GFX90A-NEXT: s_mov_b32 s14, s6
+; GFX90A-NEXT: s_mov_b32 s15, s7
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_1_5_5:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_5_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 1, i32 5, i32 5>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 5, i32 5>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__7_2_5_5() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_2_5_5:
+define void @s_shuffle_v4i64_v4i64__7_7_u_5() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_u_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s10
; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s10, s12
-; GFX900-NEXT: s_mov_b32 s11, s13
-; GFX900-NEXT: s_mov_b32 s12, s6
-; GFX900-NEXT: s_mov_b32 s13, s7
; GFX900-NEXT: s_mov_b32 s14, s6
; GFX900-NEXT: s_mov_b32 s15, s7
; GFX900-NEXT: ;;#ASMSTART
@@ -22410,21 +23093,14 @@ define void @s_shuffle_v4i64_v4i64__7_2_5_5() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_2_5_5:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_u_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s10
; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s10, s12
-; GFX90A-NEXT: s_mov_b32 s11, s13
-; GFX90A-NEXT: s_mov_b32 s12, s6
-; GFX90A-NEXT: s_mov_b32 s13, s7
; GFX90A-NEXT: s_mov_b32 s14, s6
; GFX90A-NEXT: s_mov_b32 s15, s7
; GFX90A-NEXT: ;;#ASMSTART
@@ -22432,34 +23108,28 @@ define void @s_shuffle_v4i64_v4i64__7_2_5_5() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_2_5_5:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_u_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[12:19]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s18
-; GFX942-NEXT: s_mov_b32 s9, s19
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 2, i32 5, i32 5>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 poison, i32 5>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__7_3_5_5() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_3_5_5:
+define void @s_shuffle_v4i64_v4i64__7_7_0_5() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_0_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
@@ -22470,14 +23140,16 @@ define void @s_shuffle_v4i64_v4i64__7_3_5_5() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s18
; GFX900-NEXT: s_mov_b32 s9, s19
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: s_mov_b32 s10, s18
+; GFX900-NEXT: s_mov_b32 s11, s19
+; GFX900-NEXT: s_mov_b32 s12, s4
+; GFX900-NEXT: s_mov_b32 s13, s5
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_3_5_5:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_0_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
@@ -22488,174 +23160,176 @@ define void @s_shuffle_v4i64_v4i64__7_3_5_5() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s18
; GFX90A-NEXT: s_mov_b32 s9, s19
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: s_mov_b32 s10, s18
+; GFX90A-NEXT: s_mov_b32 s11, s19
+; GFX90A-NEXT: s_mov_b32 s12, s4
+; GFX90A-NEXT: s_mov_b32 s13, s5
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_3_5_5:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_0_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[12:19]
+; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s18
-; GFX942-NEXT: s_mov_b32 s9, s19
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 3, i32 5, i32 5>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 0, i32 5>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__7_4_5_5() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_4_5_5:
+define void @s_shuffle_v4i64_v4i64__7_7_1_5() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_1_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s18
+; GFX900-NEXT: s_mov_b32 s9, s19
+; GFX900-NEXT: s_mov_b32 s10, s18
+; GFX900-NEXT: s_mov_b32 s11, s19
; GFX900-NEXT: s_mov_b32 s12, s6
; GFX900-NEXT: s_mov_b32 s13, s7
-; GFX900-NEXT: s_mov_b32 s14, s6
-; GFX900-NEXT: s_mov_b32 s15, s7
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_4_5_5:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_1_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s18
+; GFX90A-NEXT: s_mov_b32 s9, s19
+; GFX90A-NEXT: s_mov_b32 s10, s18
+; GFX90A-NEXT: s_mov_b32 s11, s19
; GFX90A-NEXT: s_mov_b32 s12, s6
; GFX90A-NEXT: s_mov_b32 s13, s7
-; GFX90A-NEXT: s_mov_b32 s14, s6
-; GFX90A-NEXT: s_mov_b32 s15, s7
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_4_5_5:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_1_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[4:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 4, i32 5, i32 5>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 1, i32 5>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__7_6_5_5() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_6_5_5:
+define void @s_shuffle_v4i64_v4i64__7_7_2_5() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_2_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s18
-; GFX900-NEXT: s_mov_b32 s9, s19
-; GFX900-NEXT: s_mov_b32 s10, s16
-; GFX900-NEXT: s_mov_b32 s11, s17
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s14, s6
+; GFX900-NEXT: s_mov_b32 s15, s7
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_6_5_5:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_2_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s18
-; GFX90A-NEXT: s_mov_b32 s9, s19
-; GFX90A-NEXT: s_mov_b32 s10, s16
-; GFX90A-NEXT: s_mov_b32 s11, s17
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s14, s6
+; GFX90A-NEXT: s_mov_b32 s15, s7
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_6_5_5:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_2_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 6, i32 5, i32 5>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 2, i32 5>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__7_7_5_5() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_5_5:
+define void @s_shuffle_v4i64_v4i64__7_7_3_5() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_3_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s10
; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s12, s6
-; GFX900-NEXT: s_mov_b32 s13, s7
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: s_mov_b32 s14, s6
; GFX900-NEXT: s_mov_b32 s15, s7
; GFX900-NEXT: ;;#ASMSTART
@@ -22663,16 +23337,19 @@ define void @s_shuffle_v4i64_v4i64__7_7_5_5() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_5_5:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_3_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s10
; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s12, s6
-; GFX90A-NEXT: s_mov_b32 s13, s7
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: s_mov_b32 s14, s6
; GFX90A-NEXT: s_mov_b32 s15, s7
; GFX90A-NEXT: ;;#ASMSTART
@@ -22680,33 +23357,31 @@ define void @s_shuffle_v4i64_v4i64__7_7_5_5() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_5_5:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_3_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[12:19]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[18:19]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[18:19]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 5, i32 5>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 3, i32 5>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__7_7_u_5() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_u_5:
+define void @s_shuffle_v4i64_v4i64__7_7_4_5() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_4_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
@@ -22714,6 +23389,8 @@ define void @s_shuffle_v4i64_v4i64__7_7_u_5() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s10
; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s12, s4
+; GFX900-NEXT: s_mov_b32 s13, s5
; GFX900-NEXT: s_mov_b32 s14, s6
; GFX900-NEXT: s_mov_b32 s15, s7
; GFX900-NEXT: ;;#ASMSTART
@@ -22721,7 +23398,7 @@ define void @s_shuffle_v4i64_v4i64__7_7_u_5() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_u_5:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_4_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
@@ -22729,6 +23406,8 @@ define void @s_shuffle_v4i64_v4i64__7_7_u_5() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s10
; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s12, s4
+; GFX90A-NEXT: s_mov_b32 s13, s5
; GFX90A-NEXT: s_mov_b32 s14, s6
; GFX90A-NEXT: s_mov_b32 s15, s7
; GFX90A-NEXT: ;;#ASMSTART
@@ -22736,201 +23415,175 @@ define void @s_shuffle_v4i64_v4i64__7_7_u_5() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_u_5:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_4_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 poison, i32 5>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 4, i32 5>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__7_7_0_5() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_0_5:
+define void @s_shuffle_v4i64_v4i64__7_7_6_5() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_6_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s18
; GFX900-NEXT: s_mov_b32 s9, s19
; GFX900-NEXT: s_mov_b32 s10, s18
; GFX900-NEXT: s_mov_b32 s11, s19
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
+; GFX900-NEXT: s_mov_b32 s12, s16
+; GFX900-NEXT: s_mov_b32 s13, s17
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_0_5:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_6_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s18
; GFX90A-NEXT: s_mov_b32 s9, s19
; GFX90A-NEXT: s_mov_b32 s10, s18
; GFX90A-NEXT: s_mov_b32 s11, s19
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
+; GFX90A-NEXT: s_mov_b32 s12, s16
+; GFX90A-NEXT: s_mov_b32 s13, s17
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_0_5:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_6_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[4:11]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s6
-; GFX942-NEXT: s_mov_b32 s15, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 0, i32 5>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 6, i32 5>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__7_7_1_5() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_1_5:
+define void @s_shuffle_v4i64_v4i64__u_6_6_6() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__u_6_6_6:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s18
-; GFX900-NEXT: s_mov_b32 s9, s19
-; GFX900-NEXT: s_mov_b32 s10, s18
-; GFX900-NEXT: s_mov_b32 s11, s19
-; GFX900-NEXT: s_mov_b32 s12, s6
-; GFX900-NEXT: s_mov_b32 s13, s7
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s14, s12
+; GFX900-NEXT: s_mov_b32 s15, s13
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_1_5:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__u_6_6_6:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s18
-; GFX90A-NEXT: s_mov_b32 s9, s19
-; GFX90A-NEXT: s_mov_b32 s10, s18
-; GFX90A-NEXT: s_mov_b32 s11, s19
-; GFX90A-NEXT: s_mov_b32 s12, s6
-; GFX90A-NEXT: s_mov_b32 s13, s7
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s14, s12
+; GFX90A-NEXT: s_mov_b32 s15, s13
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_1_5:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__u_6_6_6:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[4:11]
+; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s6
-; GFX942-NEXT: s_mov_b32 s15, s7
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 1, i32 5>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 poison, i32 6, i32 6, i32 6>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__7_7_2_5() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_2_5:
+define void @s_shuffle_v4i64_v4i64__0_6_6_6() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__0_6_6_6:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s14, s6
-; GFX900-NEXT: s_mov_b32 s15, s7
+; GFX900-NEXT: s_mov_b32 s10, s16
+; GFX900-NEXT: s_mov_b32 s11, s17
+; GFX900-NEXT: s_mov_b32 s12, s16
+; GFX900-NEXT: s_mov_b32 s13, s17
+; GFX900-NEXT: s_mov_b32 s14, s16
+; GFX900-NEXT: s_mov_b32 s15, s17
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_2_5:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__0_6_6_6:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s14, s6
-; GFX90A-NEXT: s_mov_b32 s15, s7
+; GFX90A-NEXT: s_mov_b32 s10, s16
+; GFX90A-NEXT: s_mov_b32 s11, s17
+; GFX90A-NEXT: s_mov_b32 s12, s16
+; GFX90A-NEXT: s_mov_b32 s13, s17
+; GFX90A-NEXT: s_mov_b32 s14, s16
+; GFX90A-NEXT: s_mov_b32 s15, s17
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_2_5:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__0_6_6_6:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -22939,311 +23592,271 @@ define void @s_shuffle_v4i64_v4i64__7_7_2_5() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 2, i32 5>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 0, i32 6, i32 6, i32 6>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__7_7_3_5() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_3_5:
+define void @s_shuffle_v4i64_v4i64__1_6_6_6() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__1_6_6_6:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
-; GFX900-NEXT: s_mov_b32 s14, s6
-; GFX900-NEXT: s_mov_b32 s15, s7
+; GFX900-NEXT: s_mov_b32 s8, s6
+; GFX900-NEXT: s_mov_b32 s9, s7
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s14, s12
+; GFX900-NEXT: s_mov_b32 s15, s13
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_3_5:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__1_6_6_6:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
-; GFX90A-NEXT: s_mov_b32 s14, s6
-; GFX90A-NEXT: s_mov_b32 s15, s7
+; GFX90A-NEXT: s_mov_b32 s8, s6
+; GFX90A-NEXT: s_mov_b32 s9, s7
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s14, s12
+; GFX90A-NEXT: s_mov_b32 s15, s13
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_3_5:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__1_6_6_6:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[12:19]
+; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s18
-; GFX942-NEXT: s_mov_b32 s9, s19
-; GFX942-NEXT: s_mov_b32 s10, s18
-; GFX942-NEXT: s_mov_b32 s11, s19
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 3, i32 5>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 1, i32 6, i32 6, i32 6>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__7_7_4_5() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_4_5:
+define void @s_shuffle_v4i64_v4i64__2_6_6_6() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__2_6_6_6:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: s_mov_b32 s14, s6
-; GFX900-NEXT: s_mov_b32 s15, s7
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s16
+; GFX900-NEXT: s_mov_b32 s9, s17
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s14, s12
+; GFX900-NEXT: s_mov_b32 s15, s13
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_4_5:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__2_6_6_6:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: s_mov_b32 s14, s6
-; GFX90A-NEXT: s_mov_b32 s15, s7
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s16
+; GFX90A-NEXT: s_mov_b32 s9, s17
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s14, s12
+; GFX90A-NEXT: s_mov_b32 s15, s13
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_4_5:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__2_6_6_6:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 4, i32 5>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 2, i32 6, i32 6, i32 6>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__7_7_6_5() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_6_5:
+define void @s_shuffle_v4i64_v4i64__3_6_6_6() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__3_6_6_6:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s18
; GFX900-NEXT: s_mov_b32 s9, s19
-; GFX900-NEXT: s_mov_b32 s10, s18
-; GFX900-NEXT: s_mov_b32 s11, s19
-; GFX900-NEXT: s_mov_b32 s12, s16
-; GFX900-NEXT: s_mov_b32 s13, s17
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s14, s12
+; GFX900-NEXT: s_mov_b32 s15, s13
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_6_5:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__3_6_6_6:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s18
; GFX90A-NEXT: s_mov_b32 s9, s19
-; GFX90A-NEXT: s_mov_b32 s10, s18
-; GFX90A-NEXT: s_mov_b32 s11, s19
-; GFX90A-NEXT: s_mov_b32 s12, s16
-; GFX90A-NEXT: s_mov_b32 s13, s17
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s14, s12
+; GFX90A-NEXT: s_mov_b32 s15, s13
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_6_5:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__3_6_6_6:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s4
-; GFX942-NEXT: s_mov_b32 s13, s5
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 6, i32 5>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4i64_v4i64__u_6_6_6() {
-; GFX9-LABEL: s_shuffle_v4i64_v4i64__u_6_6_6:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: s_mov_b32 s14, s12
-; GFX9-NEXT: s_mov_b32 s15, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x i64> asm "; def $0", "=s"()
- %vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 poison, i32 6, i32 6, i32 6>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 3, i32 6, i32 6, i32 6>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__0_6_6_6() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__0_6_6_6:
+define void @s_shuffle_v4i64_v4i64__4_6_6_6() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__4_6_6_6:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s10, s16
-; GFX900-NEXT: s_mov_b32 s11, s17
-; GFX900-NEXT: s_mov_b32 s12, s16
-; GFX900-NEXT: s_mov_b32 s13, s17
-; GFX900-NEXT: s_mov_b32 s14, s16
-; GFX900-NEXT: s_mov_b32 s15, s17
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s14, s12
+; GFX900-NEXT: s_mov_b32 s15, s13
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__0_6_6_6:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__4_6_6_6:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s10, s16
-; GFX90A-NEXT: s_mov_b32 s11, s17
-; GFX90A-NEXT: s_mov_b32 s12, s16
-; GFX90A-NEXT: s_mov_b32 s13, s17
-; GFX90A-NEXT: s_mov_b32 s14, s16
-; GFX90A-NEXT: s_mov_b32 s15, s17
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s14, s12
+; GFX90A-NEXT: s_mov_b32 s15, s13
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__0_6_6_6:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__4_6_6_6:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s4
-; GFX942-NEXT: s_mov_b32 s13, s5
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 0, i32 6, i32 6, i32 6>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 4, i32 6, i32 6, i32 6>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__1_6_6_6() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__1_6_6_6:
+define void @s_shuffle_v4i64_v4i64__5_6_6_6() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__5_6_6_6:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s6
-; GFX900-NEXT: s_mov_b32 s9, s7
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
; GFX900-NEXT: s_mov_b32 s10, s12
; GFX900-NEXT: s_mov_b32 s11, s13
; GFX900-NEXT: s_mov_b32 s14, s12
@@ -23253,17 +23866,14 @@ define void @s_shuffle_v4i64_v4i64__1_6_6_6() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__1_6_6_6:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__5_6_6_6:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s6
-; GFX90A-NEXT: s_mov_b32 s9, s7
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
; GFX90A-NEXT: s_mov_b32 s10, s12
; GFX90A-NEXT: s_mov_b32 s11, s13
; GFX90A-NEXT: s_mov_b32 s14, s12
@@ -23273,44 +23883,35 @@ define void @s_shuffle_v4i64_v4i64__1_6_6_6() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__1_6_6_6:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__5_6_6_6:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
-; GFX942-NEXT: s_mov_b32 s14, s12
-; GFX942-NEXT: s_mov_b32 s15, s13
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 1, i32 6, i32 6, i32 6>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 5, i32 6, i32 6, i32 6>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__2_6_6_6() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__2_6_6_6:
+define void @s_shuffle_v4i64_v4i64__6_6_6_6() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__6_6_6_6:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s16
-; GFX900-NEXT: s_mov_b32 s9, s17
+; GFX900-NEXT: s_mov_b32 s8, s12
+; GFX900-NEXT: s_mov_b32 s9, s13
; GFX900-NEXT: s_mov_b32 s10, s12
; GFX900-NEXT: s_mov_b32 s11, s13
; GFX900-NEXT: s_mov_b32 s14, s12
@@ -23320,17 +23921,14 @@ define void @s_shuffle_v4i64_v4i64__2_6_6_6() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__2_6_6_6:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__6_6_6_6:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s16
-; GFX90A-NEXT: s_mov_b32 s9, s17
+; GFX90A-NEXT: s_mov_b32 s8, s12
+; GFX90A-NEXT: s_mov_b32 s9, s13
; GFX90A-NEXT: s_mov_b32 s10, s12
; GFX90A-NEXT: s_mov_b32 s11, s13
; GFX90A-NEXT: s_mov_b32 s14, s12
@@ -23340,44 +23938,35 @@ define void @s_shuffle_v4i64_v4i64__2_6_6_6() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__2_6_6_6:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__6_6_6_6:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
-; GFX942-NEXT: s_mov_b32 s14, s12
-; GFX942-NEXT: s_mov_b32 s15, s13
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 2, i32 6, i32 6, i32 6>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 6, i32 6, i32 6, i32 6>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__3_6_6_6() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__3_6_6_6:
+define void @s_shuffle_v4i64_v4i64__7_6_6_6() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_6_6_6:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s18
-; GFX900-NEXT: s_mov_b32 s9, s19
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
; GFX900-NEXT: s_mov_b32 s10, s12
; GFX900-NEXT: s_mov_b32 s11, s13
; GFX900-NEXT: s_mov_b32 s14, s12
@@ -23387,17 +23976,14 @@ define void @s_shuffle_v4i64_v4i64__3_6_6_6() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__3_6_6_6:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_6_6_6:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s18
-; GFX90A-NEXT: s_mov_b32 s9, s19
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
; GFX90A-NEXT: s_mov_b32 s10, s12
; GFX90A-NEXT: s_mov_b32 s11, s13
; GFX90A-NEXT: s_mov_b32 s14, s12
@@ -23407,119 +23993,19 @@ define void @s_shuffle_v4i64_v4i64__3_6_6_6() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__3_6_6_6:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_6_6_6:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
-; GFX942-NEXT: s_mov_b32 s14, s12
-; GFX942-NEXT: s_mov_b32 s15, s13
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x i64> asm "; def $0", "=s"()
- %vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 3, i32 6, i32 6, i32 6>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4i64_v4i64__4_6_6_6() {
-; GFX9-LABEL: s_shuffle_v4i64_v4i64__4_6_6_6:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: s_mov_b32 s14, s12
-; GFX9-NEXT: s_mov_b32 s15, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x i64> asm "; def $0", "=s"()
- %vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 4, i32 6, i32 6, i32 6>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4i64_v4i64__5_6_6_6() {
-; GFX9-LABEL: s_shuffle_v4i64_v4i64__5_6_6_6:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s10
-; GFX9-NEXT: s_mov_b32 s9, s11
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: s_mov_b32 s14, s12
-; GFX9-NEXT: s_mov_b32 s15, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x i64> asm "; def $0", "=s"()
- %vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 5, i32 6, i32 6, i32 6>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4i64_v4i64__6_6_6_6() {
-; GFX9-LABEL: s_shuffle_v4i64_v4i64__6_6_6_6:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s12
-; GFX9-NEXT: s_mov_b32 s9, s13
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: s_mov_b32 s14, s12
-; GFX9-NEXT: s_mov_b32 s15, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x i64> asm "; def $0", "=s"()
- %vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 6, i32 6, i32 6, i32 6>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4i64_v4i64__7_6_6_6() {
-; GFX9-LABEL: s_shuffle_v4i64_v4i64__7_6_6_6:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s14
-; GFX9-NEXT: s_mov_b32 s9, s15
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: s_mov_b32 s14, s12
-; GFX9-NEXT: s_mov_b32 s15, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 6, i32 6, i32 6>
@@ -23528,20 +24014,48 @@ define void @s_shuffle_v4i64_v4i64__7_6_6_6() {
}
define void @s_shuffle_v4i64_v4i64__7_u_6_6() {
-; GFX9-LABEL: s_shuffle_v4i64_v4i64__7_u_6_6:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s14
-; GFX9-NEXT: s_mov_b32 s9, s15
-; GFX9-NEXT: s_mov_b32 s14, s12
-; GFX9-NEXT: s_mov_b32 s15, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_u_6_6:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s14, s12
+; GFX900-NEXT: s_mov_b32 s15, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_u_6_6:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s14, s12
+; GFX90A-NEXT: s_mov_b32 s15, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_u_6_6:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 poison, i32 6, i32 6>
@@ -23599,12 +24113,9 @@ define void @s_shuffle_v4i64_v4i64__7_0_6_6() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s14, s12
-; GFX942-NEXT: s_mov_b32 s15, s13
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -23666,12 +24177,9 @@ define void @s_shuffle_v4i64_v4i64__7_1_6_6() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s12, s4
-; GFX942-NEXT: s_mov_b32 s13, s5
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -23733,12 +24241,9 @@ define void @s_shuffle_v4i64_v4i64__7_2_6_6() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s14, s12
-; GFX942-NEXT: s_mov_b32 s15, s13
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -23800,12 +24305,9 @@ define void @s_shuffle_v4i64_v4i64__7_3_6_6() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s14, s12
-; GFX942-NEXT: s_mov_b32 s15, s13
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -23862,14 +24364,10 @@ define void @s_shuffle_v4i64_v4i64__7_4_6_6() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s4
-; GFX942-NEXT: s_mov_b32 s13, s5
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -23882,20 +24380,48 @@ define void @s_shuffle_v4i64_v4i64__7_4_6_6() {
}
define void @s_shuffle_v4i64_v4i64__7_5_6_6() {
-; GFX9-LABEL: s_shuffle_v4i64_v4i64__7_5_6_6:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s14
-; GFX9-NEXT: s_mov_b32 s9, s15
-; GFX9-NEXT: s_mov_b32 s14, s12
-; GFX9-NEXT: s_mov_b32 s15, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_5_6_6:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s14, s12
+; GFX900-NEXT: s_mov_b32 s15, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_5_6_6:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s14, s12
+; GFX90A-NEXT: s_mov_b32 s15, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_5_6_6:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 5, i32 6, i32 6>
@@ -23904,22 +24430,53 @@ define void @s_shuffle_v4i64_v4i64__7_5_6_6() {
}
define void @s_shuffle_v4i64_v4i64__7_7_6_6() {
-; GFX9-LABEL: s_shuffle_v4i64_v4i64__7_7_6_6:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s14
-; GFX9-NEXT: s_mov_b32 s9, s15
-; GFX9-NEXT: s_mov_b32 s10, s14
-; GFX9-NEXT: s_mov_b32 s11, s15
-; GFX9-NEXT: s_mov_b32 s14, s12
-; GFX9-NEXT: s_mov_b32 s15, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_6_6:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s10, s14
+; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: s_mov_b32 s14, s12
+; GFX900-NEXT: s_mov_b32 s15, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_6_6:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s10, s14
+; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: s_mov_b32 s14, s12
+; GFX90A-NEXT: s_mov_b32 s15, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_6_6:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 6, i32 6>
@@ -23968,12 +24525,9 @@ define void @s_shuffle_v4i64_v4i64__7_7_u_6() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -24039,14 +24593,10 @@ define void @s_shuffle_v4i64_v4i64__7_7_0_6() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s18
-; GFX942-NEXT: s_mov_b32 s9, s19
-; GFX942-NEXT: s_mov_b32 s10, s18
-; GFX942-NEXT: s_mov_b32 s11, s19
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s16
-; GFX942-NEXT: s_mov_b32 s15, s17
+; GFX942-NEXT: s_mov_b64 s[8:9], s[18:19]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[18:19]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[16:17]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -24112,14 +24662,10 @@ define void @s_shuffle_v4i64_v4i64__7_7_1_6() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s18
-; GFX942-NEXT: s_mov_b32 s9, s19
-; GFX942-NEXT: s_mov_b32 s10, s18
-; GFX942-NEXT: s_mov_b32 s11, s19
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s16
-; GFX942-NEXT: s_mov_b32 s15, s17
+; GFX942-NEXT: s_mov_b64 s[8:9], s[18:19]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[18:19]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[16:17]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -24181,12 +24727,9 @@ define void @s_shuffle_v4i64_v4i64__7_7_2_6() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -24252,14 +24795,10 @@ define void @s_shuffle_v4i64_v4i64__7_7_3_6() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s18
-; GFX942-NEXT: s_mov_b32 s9, s19
-; GFX942-NEXT: s_mov_b32 s10, s18
-; GFX942-NEXT: s_mov_b32 s11, s19
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
-; GFX942-NEXT: s_mov_b32 s14, s16
-; GFX942-NEXT: s_mov_b32 s15, s17
+; GFX942-NEXT: s_mov_b64 s[8:9], s[18:19]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[18:19]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[16:17]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -24312,14 +24851,10 @@ define void @s_shuffle_v4i64_v4i64__7_7_4_6() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -24376,14 +24911,10 @@ define void @s_shuffle_v4i64_v4i64__7_7_5_6() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -24396,20 +24927,48 @@ define void @s_shuffle_v4i64_v4i64__7_7_5_6() {
}
define void @s_shuffle_v4i64_v4i64__u_7_7_7() {
-; GFX9-LABEL: s_shuffle_v4i64_v4i64__u_7_7_7:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s14
-; GFX9-NEXT: s_mov_b32 s11, s15
-; GFX9-NEXT: s_mov_b32 s12, s14
-; GFX9-NEXT: s_mov_b32 s13, s15
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__u_7_7_7:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s14
+; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__u_7_7_7:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s14
+; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__u_7_7_7:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 poison, i32 7, i32 7, i32 7>
@@ -24467,12 +25026,9 @@ define void @s_shuffle_v4i64_v4i64__0_7_7_7() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
-; GFX942-NEXT: s_mov_b32 s14, s6
-; GFX942-NEXT: s_mov_b32 s15, s7
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -24534,12 +25090,9 @@ define void @s_shuffle_v4i64_v4i64__1_7_7_7() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -24601,12 +25154,9 @@ define void @s_shuffle_v4i64_v4i64__2_7_7_7() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -24659,117 +25209,232 @@ define void @s_shuffle_v4i64_v4i64__3_7_7_7() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__3_7_7_7:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__3_7_7_7:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x i64> asm "; def $0", "=s"()
+ %vec1 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 3, i32 7, i32 7, i32 7>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4i64_v4i64__4_7_7_7() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__4_7_7_7:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s14
+; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__4_7_7_7:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s14
+; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__4_7_7_7:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x i64> asm "; def $0", "=s"()
+ %vec1 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 4, i32 7, i32 7, i32 7>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4i64_v4i64__5_7_7_7() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__5_7_7_7:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s10, s14
+; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__5_7_7_7:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s10, s14
+; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__5_7_7_7:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x i64> asm "; def $0", "=s"()
+ %vec1 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 5, i32 7, i32 7, i32 7>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4i64_v4i64__6_7_7_7() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__6_7_7_7:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s12
+; GFX900-NEXT: s_mov_b32 s9, s13
+; GFX900-NEXT: s_mov_b32 s10, s14
+; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__6_7_7_7:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s12
+; GFX90A-NEXT: s_mov_b32 s9, s13
+; GFX90A-NEXT: s_mov_b32 s10, s14
+; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__6_7_7_7:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x i64> asm "; def $0", "=s"()
+ %vec1 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 6, i32 7, i32 7, i32 7>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4i64_v4i64__7_u_7_7() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_u_7_7:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_u_7_7:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_u_7_7:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x i64> asm "; def $0", "=s"()
- %vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 3, i32 7, i32 7, i32 7>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4i64_v4i64__4_7_7_7() {
-; GFX9-LABEL: s_shuffle_v4i64_v4i64__4_7_7_7:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s14
-; GFX9-NEXT: s_mov_b32 s11, s15
-; GFX9-NEXT: s_mov_b32 s12, s14
-; GFX9-NEXT: s_mov_b32 s13, s15
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x i64> asm "; def $0", "=s"()
- %vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 4, i32 7, i32 7, i32 7>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4i64_v4i64__5_7_7_7() {
-; GFX9-LABEL: s_shuffle_v4i64_v4i64__5_7_7_7:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s10
-; GFX9-NEXT: s_mov_b32 s9, s11
-; GFX9-NEXT: s_mov_b32 s10, s14
-; GFX9-NEXT: s_mov_b32 s11, s15
-; GFX9-NEXT: s_mov_b32 s12, s14
-; GFX9-NEXT: s_mov_b32 s13, s15
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x i64> asm "; def $0", "=s"()
- %vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 5, i32 7, i32 7, i32 7>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4i64_v4i64__6_7_7_7() {
-; GFX9-LABEL: s_shuffle_v4i64_v4i64__6_7_7_7:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s12
-; GFX9-NEXT: s_mov_b32 s9, s13
-; GFX9-NEXT: s_mov_b32 s10, s14
-; GFX9-NEXT: s_mov_b32 s11, s15
-; GFX9-NEXT: s_mov_b32 s12, s14
-; GFX9-NEXT: s_mov_b32 s13, s15
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x i64> asm "; def $0", "=s"()
- %vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 6, i32 7, i32 7, i32 7>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4i64_v4i64__7_u_7_7() {
-; GFX9-LABEL: s_shuffle_v4i64_v4i64__7_u_7_7:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s14
-; GFX9-NEXT: s_mov_b32 s9, s15
-; GFX9-NEXT: s_mov_b32 s12, s14
-; GFX9-NEXT: s_mov_b32 s13, s15
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 poison, i32 7, i32 7>
@@ -24827,12 +25492,9 @@ define void @s_shuffle_v4i64_v4i64__7_0_7_7() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -24894,12 +25556,9 @@ define void @s_shuffle_v4i64_v4i64__7_1_7_7() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
-; GFX942-NEXT: s_mov_b32 s14, s6
-; GFX942-NEXT: s_mov_b32 s15, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -24961,12 +25620,9 @@ define void @s_shuffle_v4i64_v4i64__7_2_7_7() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -25028,12 +25684,9 @@ define void @s_shuffle_v4i64_v4i64__7_3_7_7() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -25090,14 +25743,10 @@ define void @s_shuffle_v4i64_v4i64__7_4_7_7() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
-; GFX942-NEXT: s_mov_b32 s14, s6
-; GFX942-NEXT: s_mov_b32 s15, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -25110,20 +25759,48 @@ define void @s_shuffle_v4i64_v4i64__7_4_7_7() {
}
define void @s_shuffle_v4i64_v4i64__7_5_7_7() {
-; GFX9-LABEL: s_shuffle_v4i64_v4i64__7_5_7_7:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s14
-; GFX9-NEXT: s_mov_b32 s9, s15
-; GFX9-NEXT: s_mov_b32 s12, s14
-; GFX9-NEXT: s_mov_b32 s13, s15
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_5_7_7:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_5_7_7:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_5_7_7:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 5, i32 7, i32 7>
@@ -25132,22 +25809,53 @@ define void @s_shuffle_v4i64_v4i64__7_5_7_7() {
}
define void @s_shuffle_v4i64_v4i64__7_6_7_7() {
-; GFX9-LABEL: s_shuffle_v4i64_v4i64__7_6_7_7:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s14
-; GFX9-NEXT: s_mov_b32 s9, s15
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: s_mov_b32 s12, s14
-; GFX9-NEXT: s_mov_b32 s13, s15
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_6_7_7:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_6_7_7:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_6_7_7:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 6, i32 7, i32 7>
@@ -25156,20 +25864,48 @@ define void @s_shuffle_v4i64_v4i64__7_6_7_7() {
}
define void @s_shuffle_v4i64_v4i64__7_7_u_7() {
-; GFX9-LABEL: s_shuffle_v4i64_v4i64__7_7_u_7:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s14
-; GFX9-NEXT: s_mov_b32 s9, s15
-; GFX9-NEXT: s_mov_b32 s10, s14
-; GFX9-NEXT: s_mov_b32 s11, s15
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_u_7:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s10, s14
+; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_u_7:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s10, s14
+; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_u_7:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 poison, i32 7>
@@ -25227,12 +25963,9 @@ define void @s_shuffle_v4i64_v4i64__7_7_0_7() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -25294,12 +26027,9 @@ define void @s_shuffle_v4i64_v4i64__7_7_1_7() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -25361,12 +26091,9 @@ define void @s_shuffle_v4i64_v4i64__7_7_2_7() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s14, s6
-; GFX942-NEXT: s_mov_b32 s15, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -25428,12 +26155,9 @@ define void @s_shuffle_v4i64_v4i64__7_7_3_7() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -25486,14 +26210,10 @@ define void @s_shuffle_v4i64_v4i64__7_7_4_7() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s6
-; GFX942-NEXT: s_mov_b32 s15, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -25550,14 +26270,10 @@ define void @s_shuffle_v4i64_v4i64__7_7_5_7() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s6
-; GFX942-NEXT: s_mov_b32 s15, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -25570,20 +26286,48 @@ define void @s_shuffle_v4i64_v4i64__7_7_5_7() {
}
define void @s_shuffle_v4i64_v4i64__7_7_6_7() {
-; GFX9-LABEL: s_shuffle_v4i64_v4i64__7_7_6_7:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s14
-; GFX9-NEXT: s_mov_b32 s9, s15
-; GFX9-NEXT: s_mov_b32 s10, s14
-; GFX9-NEXT: s_mov_b32 s11, s15
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_6_7:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s10, s14
+; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_6_7:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s10, s14
+; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_6_7:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 6, i32 7>
diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v4p0.v2p0.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v4p0.v2p0.ll
index b30af835a7882..14bacc2f74876 100644
--- a/llvm/test/CodeGen/AMDGPU/shufflevector.v4p0.v2p0.ll
+++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v4p0.v2p0.ll
@@ -3734,8 +3734,7 @@ define void @s_shuffle_v4p0_v2p0__1_u_u_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -3793,8 +3792,7 @@ define void @s_shuffle_v4p0_v2p0__3_u_u_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -3852,10 +3850,8 @@ define void @s_shuffle_v4p0_v2p0__3_0_u_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -3909,8 +3905,7 @@ define void @s_shuffle_v4p0_v2p0__3_1_u_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -3959,10 +3954,8 @@ define void @s_shuffle_v4p0_v2p0__3_2_u_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -3975,18 +3968,43 @@ define void @s_shuffle_v4p0_v2p0__3_2_u_u() {
}
define void @s_shuffle_v4p0_v2p0__3_3_u_u() {
-; GFX9-LABEL: s_shuffle_v4p0_v2p0__3_3_u_u:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s10
-; GFX9-NEXT: s_mov_b32 s9, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4p0_v2p0__3_3_u_u:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4p0_v2p0__3_3_u_u:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4p0_v2p0__3_3_u_u:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=s"()
%vec1 = call <2 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <4 x i32> <i32 3, i32 3, i32 poison, i32 poison>
@@ -3995,21 +4013,52 @@ define void @s_shuffle_v4p0_v2p0__3_3_u_u() {
}
define void @s_shuffle_v4p0_v2p0__3_3_0_u() {
-; GFX9-LABEL: s_shuffle_v4p0_v2p0__3_3_0_u:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[12:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s10
-; GFX9-NEXT: s_mov_b32 s9, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4p0_v2p0__3_3_0_u:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[12:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4p0_v2p0__3_3_0_u:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[12:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4p0_v2p0__3_3_0_u:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[12:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=s"()
%vec1 = call <2 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <4 x i32> <i32 3, i32 3, i32 0, i32 poison>
@@ -4063,10 +4112,8 @@ define void @s_shuffle_v4p0_v2p0__3_3_1_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -4079,20 +4126,48 @@ define void @s_shuffle_v4p0_v2p0__3_3_1_u() {
}
define void @s_shuffle_v4p0_v2p0__3_3_2_u() {
-; GFX9-LABEL: s_shuffle_v4p0_v2p0__3_3_2_u:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[12:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s14
-; GFX9-NEXT: s_mov_b32 s9, s15
-; GFX9-NEXT: s_mov_b32 s10, s14
-; GFX9-NEXT: s_mov_b32 s11, s15
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4p0_v2p0__3_3_2_u:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[12:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s10, s14
+; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4p0_v2p0__3_3_2_u:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[12:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s10, s14
+; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4p0_v2p0__3_3_2_u:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[12:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=s"()
%vec1 = call <2 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <4 x i32> <i32 3, i32 3, i32 2, i32 poison>
@@ -4101,20 +4176,48 @@ define void @s_shuffle_v4p0_v2p0__3_3_2_u() {
}
define void @s_shuffle_v4p0_v2p0__3_3_3_u() {
-; GFX9-LABEL: s_shuffle_v4p0_v2p0__3_3_3_u:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s10
-; GFX9-NEXT: s_mov_b32 s9, s11
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4p0_v2p0__3_3_3_u:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4p0_v2p0__3_3_3_u:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4p0_v2p0__3_3_3_u:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=s"()
%vec1 = call <2 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <4 x i32> <i32 3, i32 3, i32 3, i32 poison>
@@ -4172,12 +4275,9 @@ define void @s_shuffle_v4p0_v2p0__3_3_3_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s12, s10
-; GFX942-NEXT: s_mov_b32 s13, s11
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -4190,23 +4290,57 @@ define void @s_shuffle_v4p0_v2p0__3_3_3_0() {
}
define void @s_shuffle_v4p0_v2p0__3_3_3_1() {
-; GFX9-LABEL: s_shuffle_v4p0_v2p0__3_3_3_1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[12:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s10
-; GFX9-NEXT: s_mov_b32 s9, s11
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4p0_v2p0__3_3_3_1:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[12:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4p0_v2p0__3_3_3_1:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[12:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4p0_v2p0__3_3_3_1:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[12:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=s"()
%vec1 = call <2 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <4 x i32> <i32 3, i32 3, i32 3, i32 1>
@@ -4259,14 +4393,10 @@ define void @s_shuffle_v4p0_v2p0__3_3_3_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s2
-; GFX942-NEXT: s_mov_b32 s11, s3
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -4279,44 +4409,103 @@ define void @s_shuffle_v4p0_v2p0__3_3_3_2() {
}
define void @s_shuffle_v4p0_v2p0__3_3_3_3() {
-; GFX9-LABEL: s_shuffle_v4p0_v2p0__3_3_3_3:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s10
-; GFX9-NEXT: s_mov_b32 s9, s11
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: s_mov_b32 s14, s10
-; GFX9-NEXT: s_mov_b32 s15, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <2 x ptr> asm "; def $0", "=s"()
- %vec1 = call <2 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4p0_v2p0__u_0_0_0() {
-; GFX9-LABEL: s_shuffle_v4p0_v2p0__u_0_0_0:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[12:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: s_mov_b32 s14, s12
-; GFX9-NEXT: s_mov_b32 s15, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4p0_v2p0__3_3_3_3:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: s_mov_b32 s14, s10
+; GFX900-NEXT: s_mov_b32 s15, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4p0_v2p0__3_3_3_3:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: s_mov_b32 s14, s10
+; GFX90A-NEXT: s_mov_b32 s15, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4p0_v2p0__3_3_3_3:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <2 x ptr> asm "; def $0", "=s"()
+ %vec1 = call <2 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4p0_v2p0__u_0_0_0() {
+; GFX900-LABEL: s_shuffle_v4p0_v2p0__u_0_0_0:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[12:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s14, s12
+; GFX900-NEXT: s_mov_b32 s15, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4p0_v2p0__u_0_0_0:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[12:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s14, s12
+; GFX90A-NEXT: s_mov_b32 s15, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4p0_v2p0__u_0_0_0:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[12:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> poison, <4 x i32> <i32 poison, i32 0, i32 0, i32 0>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
@@ -4324,22 +4513,53 @@ define void @s_shuffle_v4p0_v2p0__u_0_0_0() {
}
define void @s_shuffle_v4p0_v2p0__0_0_0_0() {
-; GFX9-LABEL: s_shuffle_v4p0_v2p0__0_0_0_0:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s8
-; GFX9-NEXT: s_mov_b32 s11, s9
-; GFX9-NEXT: s_mov_b32 s12, s8
-; GFX9-NEXT: s_mov_b32 s13, s9
-; GFX9-NEXT: s_mov_b32 s14, s8
-; GFX9-NEXT: s_mov_b32 s15, s9
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4p0_v2p0__0_0_0_0:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s8
+; GFX900-NEXT: s_mov_b32 s11, s9
+; GFX900-NEXT: s_mov_b32 s12, s8
+; GFX900-NEXT: s_mov_b32 s13, s9
+; GFX900-NEXT: s_mov_b32 s14, s8
+; GFX900-NEXT: s_mov_b32 s15, s9
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4p0_v2p0__0_0_0_0:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s8
+; GFX90A-NEXT: s_mov_b32 s11, s9
+; GFX90A-NEXT: s_mov_b32 s12, s8
+; GFX90A-NEXT: s_mov_b32 s13, s9
+; GFX90A-NEXT: s_mov_b32 s14, s8
+; GFX90A-NEXT: s_mov_b32 s15, s9
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4p0_v2p0__0_0_0_0:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[8:9]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[8:9]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[8:9]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> poison, <4 x i32> zeroinitializer
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
@@ -4347,22 +4567,53 @@ define void @s_shuffle_v4p0_v2p0__0_0_0_0() {
}
define void @s_shuffle_v4p0_v2p0__1_0_0_0() {
-; GFX9-LABEL: s_shuffle_v4p0_v2p0__1_0_0_0:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[12:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s14
-; GFX9-NEXT: s_mov_b32 s9, s15
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: s_mov_b32 s14, s12
-; GFX9-NEXT: s_mov_b32 s15, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4p0_v2p0__1_0_0_0:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[12:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s14, s12
+; GFX900-NEXT: s_mov_b32 s15, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4p0_v2p0__1_0_0_0:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[12:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s14, s12
+; GFX90A-NEXT: s_mov_b32 s15, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4p0_v2p0__1_0_0_0:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[12:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> poison, <4 x i32> <i32 1, i32 0, i32 0, i32 0>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
@@ -4370,20 +4621,48 @@ define void @s_shuffle_v4p0_v2p0__1_0_0_0() {
}
define void @s_shuffle_v4p0_v2p0__2_0_0_0() {
-; GFX9-LABEL: s_shuffle_v4p0_v2p0__2_0_0_0:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[12:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: s_mov_b32 s14, s12
-; GFX9-NEXT: s_mov_b32 s15, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4p0_v2p0__2_0_0_0:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[12:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s14, s12
+; GFX900-NEXT: s_mov_b32 s15, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4p0_v2p0__2_0_0_0:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[12:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s14, s12
+; GFX90A-NEXT: s_mov_b32 s15, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4p0_v2p0__2_0_0_0:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[12:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> poison, <4 x i32> <i32 2, i32 0, i32 0, i32 0>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
@@ -4440,12 +4719,9 @@ define void @s_shuffle_v4p0_v2p0__3_0_0_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
-; GFX942-NEXT: s_mov_b32 s14, s12
-; GFX942-NEXT: s_mov_b32 s15, s13
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -4503,10 +4779,8 @@ define void @s_shuffle_v4p0_v2p0__3_u_0_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s14, s12
-; GFX942-NEXT: s_mov_b32 s15, s13
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -4568,12 +4842,9 @@ define void @s_shuffle_v4p0_v2p0__3_1_0_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
-; GFX942-NEXT: s_mov_b32 s14, s12
-; GFX942-NEXT: s_mov_b32 s15, s13
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -4635,12 +4906,9 @@ define void @s_shuffle_v4p0_v2p0__3_2_0_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s14, s12
-; GFX942-NEXT: s_mov_b32 s15, s13
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -4653,57 +4921,91 @@ define void @s_shuffle_v4p0_v2p0__3_2_0_0() {
}
define void @s_shuffle_v4p0_v2p0__3_3_0_0() {
-; GFX9-LABEL: s_shuffle_v4p0_v2p0__3_3_0_0:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[12:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s10
-; GFX9-NEXT: s_mov_b32 s9, s11
-; GFX9-NEXT: s_mov_b32 s14, s12
-; GFX9-NEXT: s_mov_b32 s15, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <2 x ptr> asm "; def $0", "=s"()
- %vec1 = call <2 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <4 x i32> <i32 3, i32 3, i32 0, i32 0>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4p0_v2p0__3_3_u_0() {
-; GFX900-LABEL: s_shuffle_v4p0_v2p0__3_3_u_0:
+; GFX900-LABEL: s_shuffle_v4p0_v2p0__3_3_0_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:11]
+; GFX900-NEXT: ; def s[12:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:7]
+; GFX900-NEXT: ; def s[8:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s10
; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s14, s4
-; GFX900-NEXT: s_mov_b32 s15, s5
+; GFX900-NEXT: s_mov_b32 s14, s12
+; GFX900-NEXT: s_mov_b32 s15, s13
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v2p0__3_3_u_0:
+; GFX90A-LABEL: s_shuffle_v4p0_v2p0__3_3_0_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:11]
+; GFX90A-NEXT: ; def s[12:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:7]
+; GFX90A-NEXT: ; def s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s14, s12
+; GFX90A-NEXT: s_mov_b32 s15, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4p0_v2p0__3_3_0_0:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[12:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <2 x ptr> asm "; def $0", "=s"()
+ %vec1 = call <2 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <4 x i32> <i32 3, i32 3, i32 0, i32 0>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4p0_v2p0__3_3_u_0() {
+; GFX900-LABEL: s_shuffle_v4p0_v2p0__3_3_u_0:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:7]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s14, s4
+; GFX900-NEXT: s_mov_b32 s15, s5
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4p0_v2p0__3_3_u_0:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:7]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s10
; GFX90A-NEXT: s_mov_b32 s9, s11
@@ -4723,10 +5025,8 @@ define void @s_shuffle_v4p0_v2p0__3_3_u_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -4788,12 +5088,9 @@ define void @s_shuffle_v4p0_v2p0__3_3_1_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -4855,12 +5152,9 @@ define void @s_shuffle_v4p0_v2p0__3_3_2_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -4873,20 +5167,48 @@ define void @s_shuffle_v4p0_v2p0__3_3_2_0() {
}
define void @s_shuffle_v4p0_v2p0__u_1_1_1() {
-; GFX9-LABEL: s_shuffle_v4p0_v2p0__u_1_1_1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: s_mov_b32 s14, s10
-; GFX9-NEXT: s_mov_b32 s15, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4p0_v2p0__u_1_1_1:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: s_mov_b32 s14, s10
+; GFX900-NEXT: s_mov_b32 s15, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4p0_v2p0__u_1_1_1:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: s_mov_b32 s14, s10
+; GFX90A-NEXT: s_mov_b32 s15, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4p0_v2p0__u_1_1_1:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> poison, <4 x i32> <i32 poison, i32 1, i32 1, i32 1>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
@@ -4894,20 +5216,48 @@ define void @s_shuffle_v4p0_v2p0__u_1_1_1() {
}
define void @s_shuffle_v4p0_v2p0__0_1_1_1() {
-; GFX9-LABEL: s_shuffle_v4p0_v2p0__0_1_1_1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: s_mov_b32 s14, s10
-; GFX9-NEXT: s_mov_b32 s15, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4p0_v2p0__0_1_1_1:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: s_mov_b32 s14, s10
+; GFX900-NEXT: s_mov_b32 s15, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4p0_v2p0__0_1_1_1:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: s_mov_b32 s14, s10
+; GFX90A-NEXT: s_mov_b32 s15, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4p0_v2p0__0_1_1_1:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> poison, <4 x i32> <i32 0, i32 1, i32 1, i32 1>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
@@ -4915,22 +5265,53 @@ define void @s_shuffle_v4p0_v2p0__0_1_1_1() {
}
define void @s_shuffle_v4p0_v2p0__1_1_1_1() {
-; GFX9-LABEL: s_shuffle_v4p0_v2p0__1_1_1_1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s10
-; GFX9-NEXT: s_mov_b32 s9, s11
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: s_mov_b32 s14, s10
-; GFX9-NEXT: s_mov_b32 s15, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4p0_v2p0__1_1_1_1:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: s_mov_b32 s14, s10
+; GFX900-NEXT: s_mov_b32 s15, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4p0_v2p0__1_1_1_1:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: s_mov_b32 s14, s10
+; GFX90A-NEXT: s_mov_b32 s15, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4p0_v2p0__1_1_1_1:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
@@ -4938,20 +5319,48 @@ define void @s_shuffle_v4p0_v2p0__1_1_1_1() {
}
define void @s_shuffle_v4p0_v2p0__2_1_1_1() {
-; GFX9-LABEL: s_shuffle_v4p0_v2p0__2_1_1_1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: s_mov_b32 s14, s10
-; GFX9-NEXT: s_mov_b32 s15, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4p0_v2p0__2_1_1_1:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: s_mov_b32 s14, s10
+; GFX900-NEXT: s_mov_b32 s15, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4p0_v2p0__2_1_1_1:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: s_mov_b32 s14, s10
+; GFX90A-NEXT: s_mov_b32 s15, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4p0_v2p0__2_1_1_1:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> poison, <4 x i32> <i32 2, i32 1, i32 1, i32 1>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
@@ -5008,12 +5417,9 @@ define void @s_shuffle_v4p0_v2p0__3_1_1_1() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s12, s10
-; GFX942-NEXT: s_mov_b32 s13, s11
-; GFX942-NEXT: s_mov_b32 s14, s10
-; GFX942-NEXT: s_mov_b32 s15, s11
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -5071,10 +5477,8 @@ define void @s_shuffle_v4p0_v2p0__3_u_1_1() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -5136,12 +5540,9 @@ define void @s_shuffle_v4p0_v2p0__3_0_1_1() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -5203,12 +5604,9 @@ define void @s_shuffle_v4p0_v2p0__3_2_1_1() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -5221,46 +5619,111 @@ define void @s_shuffle_v4p0_v2p0__3_2_1_1() {
}
define void @s_shuffle_v4p0_v2p0__3_3_1_1() {
-; GFX9-LABEL: s_shuffle_v4p0_v2p0__3_3_1_1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[12:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s10
-; GFX9-NEXT: s_mov_b32 s9, s11
-; GFX9-NEXT: s_mov_b32 s12, s14
-; GFX9-NEXT: s_mov_b32 s13, s15
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <2 x ptr> asm "; def $0", "=s"()
- %vec1 = call <2 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <4 x i32> <i32 3, i32 3, i32 1, i32 1>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
- ret void
-}
+; GFX900-LABEL: s_shuffle_v4p0_v2p0__3_3_1_1:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[12:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4p0_v2p0__3_3_1_1:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[12:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4p0_v2p0__3_3_1_1:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[12:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <2 x ptr> asm "; def $0", "=s"()
+ %vec1 = call <2 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <4 x i32> <i32 3, i32 3, i32 1, i32 1>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
+ ret void
+}
define void @s_shuffle_v4p0_v2p0__3_3_u_1() {
-; GFX9-LABEL: s_shuffle_v4p0_v2p0__3_3_u_1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[12:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s10
-; GFX9-NEXT: s_mov_b32 s9, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4p0_v2p0__3_3_u_1:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[12:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4p0_v2p0__3_3_u_1:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[12:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4p0_v2p0__3_3_u_1:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[12:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=s"()
%vec1 = call <2 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <4 x i32> <i32 3, i32 3, i32 poison, i32 1>
@@ -5269,21 +5732,52 @@ define void @s_shuffle_v4p0_v2p0__3_3_u_1() {
}
define void @s_shuffle_v4p0_v2p0__3_3_0_1() {
-; GFX9-LABEL: s_shuffle_v4p0_v2p0__3_3_0_1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[12:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s10
-; GFX9-NEXT: s_mov_b32 s9, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4p0_v2p0__3_3_0_1:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[12:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4p0_v2p0__3_3_0_1:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[12:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4p0_v2p0__3_3_0_1:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[12:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=s"()
%vec1 = call <2 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <4 x i32> <i32 3, i32 3, i32 0, i32 1>
@@ -5341,12 +5835,9 @@ define void @s_shuffle_v4p0_v2p0__3_3_2_1() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -5445,8 +5936,7 @@ define void @s_shuffle_v4p0_v2p0__1_2_2_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -5472,22 +5962,53 @@ define void @s_shuffle_v4p0_v2p0__2_2_2_2() {
}
define void @s_shuffle_v4p0_v2p0__3_2_2_2() {
-; GFX9-LABEL: s_shuffle_v4p0_v2p0__3_2_2_2:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[12:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s14
-; GFX9-NEXT: s_mov_b32 s9, s15
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: s_mov_b32 s14, s12
-; GFX9-NEXT: s_mov_b32 s15, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4p0_v2p0__3_2_2_2:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[12:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s14, s12
+; GFX900-NEXT: s_mov_b32 s15, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4p0_v2p0__3_2_2_2:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[12:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s14, s12
+; GFX90A-NEXT: s_mov_b32 s15, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4p0_v2p0__3_2_2_2:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[12:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=s"()
%vec1 = call <2 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <4 x i32> <i32 3, i32 2, i32 2, i32 2>
@@ -5496,20 +6017,48 @@ define void @s_shuffle_v4p0_v2p0__3_2_2_2() {
}
define void @s_shuffle_v4p0_v2p0__3_u_2_2() {
-; GFX9-LABEL: s_shuffle_v4p0_v2p0__3_u_2_2:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[12:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s14
-; GFX9-NEXT: s_mov_b32 s9, s15
-; GFX9-NEXT: s_mov_b32 s14, s12
-; GFX9-NEXT: s_mov_b32 s15, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4p0_v2p0__3_u_2_2:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[12:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s14, s12
+; GFX900-NEXT: s_mov_b32 s15, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4p0_v2p0__3_u_2_2:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[12:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s14, s12
+; GFX90A-NEXT: s_mov_b32 s15, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4p0_v2p0__3_u_2_2:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[12:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=s"()
%vec1 = call <2 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <4 x i32> <i32 3, i32 poison, i32 2, i32 2>
@@ -5567,12 +6116,9 @@ define void @s_shuffle_v4p0_v2p0__3_0_2_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s14, s12
-; GFX942-NEXT: s_mov_b32 s15, s13
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -5585,23 +6131,57 @@ define void @s_shuffle_v4p0_v2p0__3_0_2_2() {
}
define void @s_shuffle_v4p0_v2p0__3_1_2_2() {
-; GFX9-LABEL: s_shuffle_v4p0_v2p0__3_1_2_2:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[12:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s14
-; GFX9-NEXT: s_mov_b32 s9, s15
-; GFX9-NEXT: s_mov_b32 s14, s12
-; GFX9-NEXT: s_mov_b32 s15, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4p0_v2p0__3_1_2_2:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[12:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s14, s12
+; GFX900-NEXT: s_mov_b32 s15, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4p0_v2p0__3_1_2_2:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[12:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s14, s12
+; GFX90A-NEXT: s_mov_b32 s15, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4p0_v2p0__3_1_2_2:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[12:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=s"()
%vec1 = call <2 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <4 x i32> <i32 3, i32 1, i32 2, i32 2>
@@ -5610,22 +6190,53 @@ define void @s_shuffle_v4p0_v2p0__3_1_2_2() {
}
define void @s_shuffle_v4p0_v2p0__3_3_2_2() {
-; GFX9-LABEL: s_shuffle_v4p0_v2p0__3_3_2_2:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[12:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s14
-; GFX9-NEXT: s_mov_b32 s9, s15
-; GFX9-NEXT: s_mov_b32 s10, s14
-; GFX9-NEXT: s_mov_b32 s11, s15
-; GFX9-NEXT: s_mov_b32 s14, s12
-; GFX9-NEXT: s_mov_b32 s15, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4p0_v2p0__3_3_2_2:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[12:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s10, s14
+; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: s_mov_b32 s14, s12
+; GFX900-NEXT: s_mov_b32 s15, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4p0_v2p0__3_3_2_2:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[12:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s10, s14
+; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: s_mov_b32 s14, s12
+; GFX90A-NEXT: s_mov_b32 s15, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4p0_v2p0__3_3_2_2:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[12:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=s"()
%vec1 = call <2 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <4 x i32> <i32 3, i32 3, i32 2, i32 2>
@@ -5674,12 +6285,9 @@ define void @s_shuffle_v4p0_v2p0__3_3_u_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s2
-; GFX942-NEXT: s_mov_b32 s11, s3
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -5741,12 +6349,9 @@ define void @s_shuffle_v4p0_v2p0__3_3_0_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s2
-; GFX942-NEXT: s_mov_b32 s11, s3
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -5812,14 +6417,10 @@ define void @s_shuffle_v4p0_v2p0__3_3_1_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -5832,20 +6433,48 @@ define void @s_shuffle_v4p0_v2p0__3_3_1_2() {
}
define void @s_shuffle_v4p0_v2p0__u_3_3_3() {
-; GFX9-LABEL: s_shuffle_v4p0_v2p0__u_3_3_3:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: s_mov_b32 s14, s10
-; GFX9-NEXT: s_mov_b32 s15, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4p0_v2p0__u_3_3_3:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: s_mov_b32 s14, s10
+; GFX900-NEXT: s_mov_b32 s15, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4p0_v2p0__u_3_3_3:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: s_mov_b32 s14, s10
+; GFX90A-NEXT: s_mov_b32 s15, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4p0_v2p0__u_3_3_3:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=s"()
%vec1 = call <2 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <4 x i32> <i32 poison, i32 3, i32 3, i32 3>
@@ -5854,23 +6483,57 @@ define void @s_shuffle_v4p0_v2p0__u_3_3_3() {
}
define void @s_shuffle_v4p0_v2p0__0_3_3_3() {
-; GFX9-LABEL: s_shuffle_v4p0_v2p0__0_3_3_3:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[12:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s14
-; GFX9-NEXT: s_mov_b32 s11, s15
-; GFX9-NEXT: s_mov_b32 s12, s14
-; GFX9-NEXT: s_mov_b32 s13, s15
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4p0_v2p0__0_3_3_3:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[12:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s14
+; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4p0_v2p0__0_3_3_3:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[12:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s14
+; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4p0_v2p0__0_3_3_3:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[12:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=s"()
%vec1 = call <2 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <4 x i32> <i32 0, i32 3, i32 3, i32 3>
@@ -5928,12 +6591,9 @@ define void @s_shuffle_v4p0_v2p0__1_3_3_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s12, s10
-; GFX942-NEXT: s_mov_b32 s13, s11
-; GFX942-NEXT: s_mov_b32 s14, s10
-; GFX942-NEXT: s_mov_b32 s15, s11
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -5946,78 +6606,134 @@ define void @s_shuffle_v4p0_v2p0__1_3_3_3() {
}
define void @s_shuffle_v4p0_v2p0__2_3_3_3() {
-; GFX9-LABEL: s_shuffle_v4p0_v2p0__2_3_3_3:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: s_mov_b32 s14, s10
-; GFX9-NEXT: s_mov_b32 s15, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <2 x ptr> asm "; def $0", "=s"()
- %vec1 = call <2 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <4 x i32> <i32 2, i32 3, i32 3, i32 3>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4p0_v2p0__3_u_3_3() {
-; GFX9-LABEL: s_shuffle_v4p0_v2p0__3_u_3_3:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[12:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s14
-; GFX9-NEXT: s_mov_b32 s9, s15
-; GFX9-NEXT: s_mov_b32 s12, s14
-; GFX9-NEXT: s_mov_b32 s13, s15
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <2 x ptr> asm "; def $0", "=s"()
- %vec1 = call <2 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <4 x i32> <i32 3, i32 poison, i32 3, i32 3>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4p0_v2p0__3_0_3_3() {
-; GFX900-LABEL: s_shuffle_v4p0_v2p0__3_0_3_3:
+; GFX900-LABEL: s_shuffle_v4p0_v2p0__2_3_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:7]
+; GFX900-NEXT: ; def s[8:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s14
-; GFX900-NEXT: s_mov_b32 s9, s15
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: s_mov_b32 s14, s10
+; GFX900-NEXT: s_mov_b32 s15, s11
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v2p0__3_0_3_3:
+; GFX90A-LABEL: s_shuffle_v4p0_v2p0__2_3_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:15]
+; GFX90A-NEXT: ; def s[8:11]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: s_mov_b32 s14, s10
+; GFX90A-NEXT: s_mov_b32 s15, s11
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:7]
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4p0_v2p0__2_3_3_3:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <2 x ptr> asm "; def $0", "=s"()
+ %vec1 = call <2 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <4 x i32> <i32 2, i32 3, i32 3, i32 3>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4p0_v2p0__3_u_3_3() {
+; GFX900-LABEL: s_shuffle_v4p0_v2p0__3_u_3_3:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[12:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4p0_v2p0__3_u_3_3:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[12:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4p0_v2p0__3_u_3_3:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[12:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <2 x ptr> asm "; def $0", "=s"()
+ %vec1 = call <2 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <4 x i32> <i32 3, i32 poison, i32 3, i32 3>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4p0_v2p0__3_0_3_3() {
+; GFX900-LABEL: s_shuffle_v4p0_v2p0__3_0_3_3:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[12:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:7]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s10, s4
+; GFX900-NEXT: s_mov_b32 s11, s5
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4p0_v2p0__3_0_3_3:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[12:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:7]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s14
; GFX90A-NEXT: s_mov_b32 s9, s15
@@ -6039,12 +6755,9 @@ define void @s_shuffle_v4p0_v2p0__3_0_3_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -6057,23 +6770,57 @@ define void @s_shuffle_v4p0_v2p0__3_0_3_3() {
}
define void @s_shuffle_v4p0_v2p0__3_1_3_3() {
-; GFX9-LABEL: s_shuffle_v4p0_v2p0__3_1_3_3:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[12:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s14
-; GFX9-NEXT: s_mov_b32 s9, s15
-; GFX9-NEXT: s_mov_b32 s12, s14
-; GFX9-NEXT: s_mov_b32 s13, s15
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4p0_v2p0__3_1_3_3:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[12:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4p0_v2p0__3_1_3_3:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[12:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4p0_v2p0__3_1_3_3:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[12:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=s"()
%vec1 = call <2 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <4 x i32> <i32 3, i32 1, i32 3, i32 3>
@@ -6082,22 +6829,53 @@ define void @s_shuffle_v4p0_v2p0__3_1_3_3() {
}
define void @s_shuffle_v4p0_v2p0__3_2_3_3() {
-; GFX9-LABEL: s_shuffle_v4p0_v2p0__3_2_3_3:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[12:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s14
-; GFX9-NEXT: s_mov_b32 s9, s15
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: s_mov_b32 s12, s14
-; GFX9-NEXT: s_mov_b32 s13, s15
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4p0_v2p0__3_2_3_3:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[12:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4p0_v2p0__3_2_3_3:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[12:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4p0_v2p0__3_2_3_3:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[12:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=s"()
%vec1 = call <2 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <4 x i32> <i32 3, i32 2, i32 3, i32 3>
@@ -6106,20 +6884,48 @@ define void @s_shuffle_v4p0_v2p0__3_2_3_3() {
}
define void @s_shuffle_v4p0_v2p0__3_3_u_3() {
-; GFX9-LABEL: s_shuffle_v4p0_v2p0__3_3_u_3:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s10
-; GFX9-NEXT: s_mov_b32 s9, s11
-; GFX9-NEXT: s_mov_b32 s14, s10
-; GFX9-NEXT: s_mov_b32 s15, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4p0_v2p0__3_3_u_3:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s14, s10
+; GFX900-NEXT: s_mov_b32 s15, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4p0_v2p0__3_3_u_3:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s14, s10
+; GFX90A-NEXT: s_mov_b32 s15, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4p0_v2p0__3_3_u_3:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=s"()
%vec1 = call <2 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <4 x i32> <i32 3, i32 3, i32 poison, i32 3>
@@ -6128,23 +6934,57 @@ define void @s_shuffle_v4p0_v2p0__3_3_u_3() {
}
define void @s_shuffle_v4p0_v2p0__3_3_0_3() {
-; GFX9-LABEL: s_shuffle_v4p0_v2p0__3_3_0_3:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[12:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s10
-; GFX9-NEXT: s_mov_b32 s9, s11
-; GFX9-NEXT: s_mov_b32 s14, s10
-; GFX9-NEXT: s_mov_b32 s15, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4p0_v2p0__3_3_0_3:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[12:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s14, s10
+; GFX900-NEXT: s_mov_b32 s15, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4p0_v2p0__3_3_0_3:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[12:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s14, s10
+; GFX90A-NEXT: s_mov_b32 s15, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4p0_v2p0__3_3_0_3:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[12:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=s"()
%vec1 = call <2 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <4 x i32> <i32 3, i32 3, i32 0, i32 3>
@@ -6202,12 +7042,9 @@ define void @s_shuffle_v4p0_v2p0__3_3_1_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s10
-; GFX942-NEXT: s_mov_b32 s15, s11
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -6220,20 +7057,48 @@ define void @s_shuffle_v4p0_v2p0__3_3_1_3() {
}
define void @s_shuffle_v4p0_v2p0__3_3_2_3() {
-; GFX9-LABEL: s_shuffle_v4p0_v2p0__3_3_2_3:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[12:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s14
-; GFX9-NEXT: s_mov_b32 s9, s15
-; GFX9-NEXT: s_mov_b32 s10, s14
-; GFX9-NEXT: s_mov_b32 s11, s15
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4p0_v2p0__3_3_2_3:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[12:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s10, s14
+; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4p0_v2p0__3_3_2_3:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[12:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s10, s14
+; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4p0_v2p0__3_3_2_3:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[12:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=s"()
%vec1 = call <2 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <4 x i32> <i32 3, i32 3, i32 2, i32 3>
diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v4p0.v3p0.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v4p0.v3p0.ll
index e6ac554735eee..0398418b82f3d 100644
--- a/llvm/test/CodeGen/AMDGPU/shufflevector.v4p0.v3p0.ll
+++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v4p0.v3p0.ll
@@ -7818,8 +7818,7 @@ define void @s_shuffle_v4p0_v3p0__1_u_u_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -7859,8 +7858,7 @@ define void @s_shuffle_v4p0_v3p0__2_u_u_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -7918,8 +7916,7 @@ define void @s_shuffle_v4p0_v3p0__4_u_u_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -7960,8 +7957,7 @@ define void @s_shuffle_v4p0_v3p0__5_u_u_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -8016,11 +8012,11 @@ define void @s_shuffle_v4p0_v3p0__5_0_u_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s0
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -8070,8 +8066,7 @@ define void @s_shuffle_v4p0_v3p0__5_1_u_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -8125,10 +8120,8 @@ define void @s_shuffle_v4p0_v3p0__5_2_u_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -8173,10 +8166,8 @@ define void @s_shuffle_v4p0_v3p0__5_3_u_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -8189,18 +8180,43 @@ define void @s_shuffle_v4p0_v3p0__5_3_u_u() {
}
define void @s_shuffle_v4p0_v3p0__5_4_u_u() {
-; GFX9-LABEL: s_shuffle_v4p0_v3p0__5_4_u_u:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s12
-; GFX9-NEXT: s_mov_b32 s9, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4p0_v3p0__5_4_u_u:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s12
+; GFX900-NEXT: s_mov_b32 s9, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4p0_v3p0__5_4_u_u:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s12
+; GFX90A-NEXT: s_mov_b32 s9, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_4_u_u:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=s"()
%vec1 = call <3 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 5, i32 4, i32 poison, i32 poison>
@@ -8245,10 +8261,8 @@ define void @s_shuffle_v4p0_v3p0__5_5_u_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -8310,12 +8324,9 @@ define void @s_shuffle_v4p0_v3p0__5_5_0_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -8377,12 +8388,9 @@ define void @s_shuffle_v4p0_v3p0__5_5_1_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -8440,10 +8448,8 @@ define void @s_shuffle_v4p0_v3p0__5_5_2_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -8492,12 +8498,9 @@ define void @s_shuffle_v4p0_v3p0__5_5_3_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -8550,12 +8553,9 @@ define void @s_shuffle_v4p0_v3p0__5_5_4_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -8568,20 +8568,48 @@ define void @s_shuffle_v4p0_v3p0__5_5_4_u() {
}
define void @s_shuffle_v4p0_v3p0__5_5_5_u() {
-; GFX9-LABEL: s_shuffle_v4p0_v3p0__5_5_5_u:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s12
-; GFX9-NEXT: s_mov_b32 s9, s13
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4p0_v3p0__5_5_5_u:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s12
+; GFX900-NEXT: s_mov_b32 s9, s13
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4p0_v3p0__5_5_5_u:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s12
+; GFX90A-NEXT: s_mov_b32 s9, s13
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_5_5_u:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=s"()
%vec1 = call <3 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 5, i32 5, i32 5, i32 poison>
@@ -8639,12 +8667,9 @@ define void @s_shuffle_v4p0_v3p0__5_5_5_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -8706,12 +8731,9 @@ define void @s_shuffle_v4p0_v3p0__5_5_5_1() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -8773,12 +8795,9 @@ define void @s_shuffle_v4p0_v3p0__5_5_5_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -8835,14 +8854,10 @@ define void @s_shuffle_v4p0_v3p0__5_5_5_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s4
-; GFX942-NEXT: s_mov_b32 s13, s5
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -8895,14 +8910,10 @@ define void @s_shuffle_v4p0_v3p0__5_5_5_4() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s4
-; GFX942-NEXT: s_mov_b32 s13, s5
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -8915,22 +8926,53 @@ define void @s_shuffle_v4p0_v3p0__5_5_5_4() {
}
define void @s_shuffle_v4p0_v3p0__5_5_5_5() {
-; GFX9-LABEL: s_shuffle_v4p0_v3p0__5_5_5_5:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s12
-; GFX9-NEXT: s_mov_b32 s9, s13
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: s_mov_b32 s14, s12
-; GFX9-NEXT: s_mov_b32 s15, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4p0_v3p0__5_5_5_5:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s12
+; GFX900-NEXT: s_mov_b32 s9, s13
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s14, s12
+; GFX900-NEXT: s_mov_b32 s15, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4p0_v3p0__5_5_5_5:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s12
+; GFX90A-NEXT: s_mov_b32 s9, s13
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s14, s12
+; GFX90A-NEXT: s_mov_b32 s15, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_5_5_5:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=s"()
%vec1 = call <3 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 5, i32 5, i32 5, i32 5>
@@ -8979,12 +9021,9 @@ define void @s_shuffle_v4p0_v3p0__u_0_0_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -8996,22 +9035,53 @@ define void @s_shuffle_v4p0_v3p0__u_0_0_0() {
}
define void @s_shuffle_v4p0_v3p0__0_0_0_0() {
-; GFX9-LABEL: s_shuffle_v4p0_v3p0__0_0_0_0:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s8
-; GFX9-NEXT: s_mov_b32 s11, s9
-; GFX9-NEXT: s_mov_b32 s12, s8
-; GFX9-NEXT: s_mov_b32 s13, s9
-; GFX9-NEXT: s_mov_b32 s14, s8
-; GFX9-NEXT: s_mov_b32 s15, s9
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4p0_v3p0__0_0_0_0:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s8
+; GFX900-NEXT: s_mov_b32 s11, s9
+; GFX900-NEXT: s_mov_b32 s12, s8
+; GFX900-NEXT: s_mov_b32 s13, s9
+; GFX900-NEXT: s_mov_b32 s14, s8
+; GFX900-NEXT: s_mov_b32 s15, s9
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4p0_v3p0__0_0_0_0:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s8
+; GFX90A-NEXT: s_mov_b32 s11, s9
+; GFX90A-NEXT: s_mov_b32 s12, s8
+; GFX90A-NEXT: s_mov_b32 s13, s9
+; GFX90A-NEXT: s_mov_b32 s14, s8
+; GFX90A-NEXT: s_mov_b32 s15, s9
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4p0_v3p0__0_0_0_0:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[8:9]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[8:9]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[8:9]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <4 x i32> zeroinitializer
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
@@ -9063,14 +9133,10 @@ define void @s_shuffle_v4p0_v3p0__1_0_0_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -9122,14 +9188,10 @@ define void @s_shuffle_v4p0_v3p0__2_0_0_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -9181,12 +9243,9 @@ define void @s_shuffle_v4p0_v3p0__3_0_0_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -9248,17 +9307,13 @@ define void @s_shuffle_v4p0_v3p0__4_0_0_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s0
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -9321,15 +9376,12 @@ define void @s_shuffle_v4p0_v3p0__5_0_0_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s0
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -9388,13 +9440,11 @@ define void @s_shuffle_v4p0_v3p0__5_u_0_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s0
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -9457,15 +9507,12 @@ define void @s_shuffle_v4p0_v3p0__5_1_0_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s2
+; GFX942-NEXT: s_mov_b64 s[10:11], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s11, s3
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -9527,14 +9574,10 @@ define void @s_shuffle_v4p0_v3p0__5_2_0_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -9597,15 +9640,12 @@ define void @s_shuffle_v4p0_v3p0__5_3_0_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s0
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -9667,12 +9707,9 @@ define void @s_shuffle_v4p0_v3p0__5_4_0_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -9738,14 +9775,10 @@ define void @s_shuffle_v4p0_v3p0__5_5_0_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -9807,12 +9840,9 @@ define void @s_shuffle_v4p0_v3p0__5_5_u_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -9878,14 +9908,10 @@ define void @s_shuffle_v4p0_v3p0__5_5_1_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -9951,14 +9977,10 @@ define void @s_shuffle_v4p0_v3p0__5_5_2_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
-; GFX942-NEXT: s_mov_b32 s12, s4
-; GFX942-NEXT: s_mov_b32 s13, s5
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -10020,12 +10042,9 @@ define void @s_shuffle_v4p0_v3p0__5_5_3_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s16
-; GFX942-NEXT: s_mov_b32 s9, s17
-; GFX942-NEXT: s_mov_b32 s10, s16
-; GFX942-NEXT: s_mov_b32 s11, s17
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[16:17]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[16:17]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -10091,14 +10110,10 @@ define void @s_shuffle_v4p0_v3p0__5_5_4_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s16
-; GFX942-NEXT: s_mov_b32 s9, s17
-; GFX942-NEXT: s_mov_b32 s10, s16
-; GFX942-NEXT: s_mov_b32 s11, s17
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[16:17]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[16:17]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -10111,126 +10126,12 @@ define void @s_shuffle_v4p0_v3p0__5_5_4_0() {
}
define void @s_shuffle_v4p0_v3p0__u_1_1_1() {
-; GFX9-LABEL: s_shuffle_v4p0_v3p0__u_1_1_1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: s_mov_b32 s14, s10
-; GFX9-NEXT: s_mov_b32 s15, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <3 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <4 x i32> <i32 poison, i32 1, i32 1, i32 1>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4p0_v3p0__0_1_1_1() {
-; GFX9-LABEL: s_shuffle_v4p0_v3p0__0_1_1_1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: s_mov_b32 s14, s10
-; GFX9-NEXT: s_mov_b32 s15, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <3 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <4 x i32> <i32 0, i32 1, i32 1, i32 1>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4p0_v3p0__1_1_1_1() {
-; GFX9-LABEL: s_shuffle_v4p0_v3p0__1_1_1_1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s10
-; GFX9-NEXT: s_mov_b32 s9, s11
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: s_mov_b32 s14, s10
-; GFX9-NEXT: s_mov_b32 s15, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <3 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4p0_v3p0__2_1_1_1() {
-; GFX9-LABEL: s_shuffle_v4p0_v3p0__2_1_1_1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s12
-; GFX9-NEXT: s_mov_b32 s9, s13
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: s_mov_b32 s14, s10
-; GFX9-NEXT: s_mov_b32 s15, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <3 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <4 x i32> <i32 2, i32 1, i32 1, i32 1>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4p0_v3p0__3_1_1_1() {
-; GFX9-LABEL: s_shuffle_v4p0_v3p0__3_1_1_1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: s_mov_b32 s14, s10
-; GFX9-NEXT: s_mov_b32 s15, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <3 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <4 x i32> <i32 3, i32 1, i32 1, i32 1>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4p0_v3p0__4_1_1_1() {
-; GFX900-LABEL: s_shuffle_v4p0_v3p0__4_1_1_1:
+; GFX900-LABEL: s_shuffle_v4p0_v3p0__u_1_1_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:13]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s6
-; GFX900-NEXT: s_mov_b32 s9, s7
; GFX900-NEXT: s_mov_b32 s12, s10
; GFX900-NEXT: s_mov_b32 s13, s11
; GFX900-NEXT: s_mov_b32 s14, s10
@@ -10240,13 +10141,273 @@ define void @s_shuffle_v4p0_v3p0__4_1_1_1() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v3p0__4_1_1_1:
+; GFX90A-LABEL: s_shuffle_v4p0_v3p0__u_1_1_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:13]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: s_mov_b32 s14, s10
+; GFX90A-NEXT: s_mov_b32 s15, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4p0_v3p0__u_1_1_1:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <3 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <4 x i32> <i32 poison, i32 1, i32 1, i32 1>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4p0_v3p0__0_1_1_1() {
+; GFX900-LABEL: s_shuffle_v4p0_v3p0__0_1_1_1:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: s_mov_b32 s14, s10
+; GFX900-NEXT: s_mov_b32 s15, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4p0_v3p0__0_1_1_1:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: s_mov_b32 s14, s10
+; GFX90A-NEXT: s_mov_b32 s15, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4p0_v3p0__0_1_1_1:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <3 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <4 x i32> <i32 0, i32 1, i32 1, i32 1>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4p0_v3p0__1_1_1_1() {
+; GFX900-LABEL: s_shuffle_v4p0_v3p0__1_1_1_1:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: s_mov_b32 s14, s10
+; GFX900-NEXT: s_mov_b32 s15, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4p0_v3p0__1_1_1_1:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: s_mov_b32 s14, s10
+; GFX90A-NEXT: s_mov_b32 s15, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4p0_v3p0__1_1_1_1:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <3 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4p0_v3p0__2_1_1_1() {
+; GFX900-LABEL: s_shuffle_v4p0_v3p0__2_1_1_1:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s12
+; GFX900-NEXT: s_mov_b32 s9, s13
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: s_mov_b32 s14, s10
+; GFX900-NEXT: s_mov_b32 s15, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4p0_v3p0__2_1_1_1:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s12
+; GFX90A-NEXT: s_mov_b32 s9, s13
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: s_mov_b32 s14, s10
+; GFX90A-NEXT: s_mov_b32 s15, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4p0_v3p0__2_1_1_1:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <3 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <4 x i32> <i32 2, i32 1, i32 1, i32 1>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4p0_v3p0__3_1_1_1() {
+; GFX900-LABEL: s_shuffle_v4p0_v3p0__3_1_1_1:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: s_mov_b32 s14, s10
+; GFX900-NEXT: s_mov_b32 s15, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4p0_v3p0__3_1_1_1:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: s_mov_b32 s14, s10
+; GFX90A-NEXT: s_mov_b32 s15, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4p0_v3p0__3_1_1_1:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <3 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <4 x i32> <i32 3, i32 1, i32 1, i32 1>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4p0_v3p0__4_1_1_1() {
+; GFX900-LABEL: s_shuffle_v4p0_v3p0__4_1_1_1:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:9]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s6
+; GFX900-NEXT: s_mov_b32 s9, s7
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: s_mov_b32 s14, s10
+; GFX900-NEXT: s_mov_b32 s15, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4p0_v3p0__4_1_1_1:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:9]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s6
@@ -10269,12 +10430,9 @@ define void @s_shuffle_v4p0_v3p0__4_1_1_1() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s12, s10
-; GFX942-NEXT: s_mov_b32 s13, s11
-; GFX942-NEXT: s_mov_b32 s14, s10
-; GFX942-NEXT: s_mov_b32 s15, s11
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -10332,12 +10490,9 @@ define void @s_shuffle_v4p0_v3p0__5_1_1_1() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s12, s10
-; GFX942-NEXT: s_mov_b32 s13, s11
-; GFX942-NEXT: s_mov_b32 s14, s10
-; GFX942-NEXT: s_mov_b32 s15, s11
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -10396,13 +10551,11 @@ define void @s_shuffle_v4p0_v3p0__5_u_1_1() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s2
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -10465,15 +10618,12 @@ define void @s_shuffle_v4p0_v3p0__5_0_1_1() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s0
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -10531,14 +10681,10 @@ define void @s_shuffle_v4p0_v3p0__5_2_1_1() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -10601,15 +10747,12 @@ define void @s_shuffle_v4p0_v3p0__5_3_1_1() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s2
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -10671,12 +10814,9 @@ define void @s_shuffle_v4p0_v3p0__5_4_1_1() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -10742,14 +10882,10 @@ define void @s_shuffle_v4p0_v3p0__5_5_1_1() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -10811,12 +10947,9 @@ define void @s_shuffle_v4p0_v3p0__5_5_u_1() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -10882,14 +11015,10 @@ define void @s_shuffle_v4p0_v3p0__5_5_0_1() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -10951,14 +11080,10 @@ define void @s_shuffle_v4p0_v3p0__5_5_2_1() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
-; GFX942-NEXT: s_mov_b32 s12, s4
-; GFX942-NEXT: s_mov_b32 s13, s5
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -11020,12 +11145,9 @@ define void @s_shuffle_v4p0_v3p0__5_5_3_1() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s16
-; GFX942-NEXT: s_mov_b32 s9, s17
-; GFX942-NEXT: s_mov_b32 s10, s16
-; GFX942-NEXT: s_mov_b32 s11, s17
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[16:17]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[16:17]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -11091,14 +11213,10 @@ define void @s_shuffle_v4p0_v3p0__5_5_4_1() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s16
-; GFX942-NEXT: s_mov_b32 s9, s17
-; GFX942-NEXT: s_mov_b32 s10, s16
-; GFX942-NEXT: s_mov_b32 s11, s17
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[16:17]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[16:17]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -11111,20 +11229,48 @@ define void @s_shuffle_v4p0_v3p0__5_5_4_1() {
}
define void @s_shuffle_v4p0_v3p0__u_2_2_2() {
-; GFX9-LABEL: s_shuffle_v4p0_v3p0__u_2_2_2:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: s_mov_b32 s14, s12
-; GFX9-NEXT: s_mov_b32 s15, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4p0_v3p0__u_2_2_2:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s14, s12
+; GFX900-NEXT: s_mov_b32 s15, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4p0_v3p0__u_2_2_2:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s14, s12
+; GFX90A-NEXT: s_mov_b32 s15, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4p0_v3p0__u_2_2_2:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <4 x i32> <i32 poison, i32 2, i32 2, i32 2>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
@@ -11132,20 +11278,48 @@ define void @s_shuffle_v4p0_v3p0__u_2_2_2() {
}
define void @s_shuffle_v4p0_v3p0__0_2_2_2() {
-; GFX9-LABEL: s_shuffle_v4p0_v3p0__0_2_2_2:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: s_mov_b32 s14, s12
-; GFX9-NEXT: s_mov_b32 s15, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4p0_v3p0__0_2_2_2:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s14, s12
+; GFX900-NEXT: s_mov_b32 s15, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4p0_v3p0__0_2_2_2:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s14, s12
+; GFX90A-NEXT: s_mov_b32 s15, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4p0_v3p0__0_2_2_2:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <4 x i32> <i32 0, i32 2, i32 2, i32 2>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
@@ -11153,22 +11327,53 @@ define void @s_shuffle_v4p0_v3p0__0_2_2_2() {
}
define void @s_shuffle_v4p0_v3p0__1_2_2_2() {
-; GFX9-LABEL: s_shuffle_v4p0_v3p0__1_2_2_2:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s10
-; GFX9-NEXT: s_mov_b32 s9, s11
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: s_mov_b32 s14, s12
-; GFX9-NEXT: s_mov_b32 s15, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4p0_v3p0__1_2_2_2:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s14, s12
+; GFX900-NEXT: s_mov_b32 s15, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4p0_v3p0__1_2_2_2:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s14, s12
+; GFX90A-NEXT: s_mov_b32 s15, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4p0_v3p0__1_2_2_2:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <4 x i32> <i32 1, i32 2, i32 2, i32 2>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
@@ -11176,22 +11381,53 @@ define void @s_shuffle_v4p0_v3p0__1_2_2_2() {
}
define void @s_shuffle_v4p0_v3p0__2_2_2_2() {
-; GFX9-LABEL: s_shuffle_v4p0_v3p0__2_2_2_2:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s12
-; GFX9-NEXT: s_mov_b32 s9, s13
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: s_mov_b32 s14, s12
-; GFX9-NEXT: s_mov_b32 s15, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4p0_v3p0__2_2_2_2:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s12
+; GFX900-NEXT: s_mov_b32 s9, s13
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s14, s12
+; GFX900-NEXT: s_mov_b32 s15, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4p0_v3p0__2_2_2_2:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s12
+; GFX90A-NEXT: s_mov_b32 s9, s13
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s14, s12
+; GFX90A-NEXT: s_mov_b32 s15, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4p0_v3p0__2_2_2_2:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
@@ -11199,20 +11435,48 @@ define void @s_shuffle_v4p0_v3p0__2_2_2_2() {
}
define void @s_shuffle_v4p0_v3p0__3_2_2_2() {
-; GFX9-LABEL: s_shuffle_v4p0_v3p0__3_2_2_2:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: s_mov_b32 s14, s12
-; GFX9-NEXT: s_mov_b32 s15, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4p0_v3p0__3_2_2_2:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s14, s12
+; GFX900-NEXT: s_mov_b32 s15, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4p0_v3p0__3_2_2_2:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s14, s12
+; GFX90A-NEXT: s_mov_b32 s15, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4p0_v3p0__3_2_2_2:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <4 x i32> <i32 3, i32 2, i32 2, i32 2>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
@@ -11269,12 +11533,9 @@ define void @s_shuffle_v4p0_v3p0__4_2_2_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
-; GFX942-NEXT: s_mov_b32 s14, s12
-; GFX942-NEXT: s_mov_b32 s15, s13
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -11332,12 +11593,9 @@ define void @s_shuffle_v4p0_v3p0__5_2_2_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
-; GFX942-NEXT: s_mov_b32 s14, s12
-; GFX942-NEXT: s_mov_b32 s15, s13
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -11391,10 +11649,8 @@ define void @s_shuffle_v4p0_v3p0__5_u_2_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s14, s12
-; GFX942-NEXT: s_mov_b32 s15, s13
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -11456,14 +11712,10 @@ define void @s_shuffle_v4p0_v3p0__5_0_2_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s4
-; GFX942-NEXT: s_mov_b32 s13, s5
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -11517,10 +11769,8 @@ define void @s_shuffle_v4p0_v3p0__5_1_2_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s14, s12
-; GFX942-NEXT: s_mov_b32 s15, s13
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -11578,12 +11828,9 @@ define void @s_shuffle_v4p0_v3p0__5_3_2_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s14, s12
-; GFX942-NEXT: s_mov_b32 s15, s13
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -11645,12 +11892,9 @@ define void @s_shuffle_v4p0_v3p0__5_4_2_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s12, s4
-; GFX942-NEXT: s_mov_b32 s13, s5
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -11712,12 +11956,9 @@ define void @s_shuffle_v4p0_v3p0__5_5_2_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s14, s12
-; GFX942-NEXT: s_mov_b32 s15, s13
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -11779,12 +12020,9 @@ define void @s_shuffle_v4p0_v3p0__5_5_u_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -11846,14 +12084,10 @@ define void @s_shuffle_v4p0_v3p0__5_5_0_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -11919,14 +12153,10 @@ define void @s_shuffle_v4p0_v3p0__5_5_1_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -11992,12 +12222,9 @@ define void @s_shuffle_v4p0_v3p0__5_5_3_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s16
-; GFX942-NEXT: s_mov_b32 s9, s17
-; GFX942-NEXT: s_mov_b32 s10, s16
-; GFX942-NEXT: s_mov_b32 s11, s17
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[16:17]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[16:17]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -12063,14 +12290,10 @@ define void @s_shuffle_v4p0_v3p0__5_5_4_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s16
-; GFX942-NEXT: s_mov_b32 s9, s17
-; GFX942-NEXT: s_mov_b32 s10, s16
-; GFX942-NEXT: s_mov_b32 s11, s17
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[16:17]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[16:17]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -12169,8 +12392,7 @@ define void @s_shuffle_v4p0_v3p0__1_3_3_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -12210,8 +12432,7 @@ define void @s_shuffle_v4p0_v3p0__2_3_3_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -12281,14 +12502,10 @@ define void @s_shuffle_v4p0_v3p0__4_3_3_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -12341,14 +12558,10 @@ define void @s_shuffle_v4p0_v3p0__5_3_3_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -12397,12 +12610,9 @@ define void @s_shuffle_v4p0_v3p0__5_u_3_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -12465,15 +12675,12 @@ define void @s_shuffle_v4p0_v3p0__5_0_3_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s0
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s4
-; GFX942-NEXT: s_mov_b32 s13, s5
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
+; GFX942-NEXT: s_mov_b64 s[12:13], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -12531,12 +12738,9 @@ define void @s_shuffle_v4p0_v3p0__5_1_3_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -12598,14 +12802,10 @@ define void @s_shuffle_v4p0_v3p0__5_2_3_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[16:21]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s20
-; GFX942-NEXT: s_mov_b32 s9, s21
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s16
-; GFX942-NEXT: s_mov_b32 s13, s17
-; GFX942-NEXT: s_mov_b32 s14, s16
-; GFX942-NEXT: s_mov_b32 s15, s17
+; GFX942-NEXT: s_mov_b64 s[8:9], s[20:21]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[16:17]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[16:17]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -12658,14 +12858,10 @@ define void @s_shuffle_v4p0_v3p0__5_4_3_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s2
-; GFX942-NEXT: s_mov_b32 s11, s3
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -12722,14 +12918,10 @@ define void @s_shuffle_v4p0_v3p0__5_5_3_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -12782,12 +12974,9 @@ define void @s_shuffle_v4p0_v3p0__5_5_u_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -12853,14 +13042,10 @@ define void @s_shuffle_v4p0_v3p0__5_5_0_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[16:21]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s20
-; GFX942-NEXT: s_mov_b32 s9, s21
-; GFX942-NEXT: s_mov_b32 s10, s20
-; GFX942-NEXT: s_mov_b32 s11, s21
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s16
-; GFX942-NEXT: s_mov_b32 s15, s17
+; GFX942-NEXT: s_mov_b64 s[8:9], s[20:21]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[20:21]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[16:17]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -12926,14 +13111,10 @@ define void @s_shuffle_v4p0_v3p0__5_5_1_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[16:21]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s20
-; GFX942-NEXT: s_mov_b32 s9, s21
-; GFX942-NEXT: s_mov_b32 s10, s20
-; GFX942-NEXT: s_mov_b32 s11, s21
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s16
-; GFX942-NEXT: s_mov_b32 s15, s17
+; GFX942-NEXT: s_mov_b64 s[8:9], s[20:21]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[20:21]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[16:17]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -12995,12 +13176,9 @@ define void @s_shuffle_v4p0_v3p0__5_5_2_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -13057,14 +13235,10 @@ define void @s_shuffle_v4p0_v3p0__5_5_4_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -13077,20 +13251,48 @@ define void @s_shuffle_v4p0_v3p0__5_5_4_3() {
}
define void @s_shuffle_v4p0_v3p0__u_4_4_4() {
-; GFX9-LABEL: s_shuffle_v4p0_v3p0__u_4_4_4:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: s_mov_b32 s14, s10
-; GFX9-NEXT: s_mov_b32 s15, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4p0_v3p0__u_4_4_4:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: s_mov_b32 s14, s10
+; GFX900-NEXT: s_mov_b32 s15, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4p0_v3p0__u_4_4_4:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: s_mov_b32 s14, s10
+; GFX90A-NEXT: s_mov_b32 s15, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4p0_v3p0__u_4_4_4:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=s"()
%vec1 = call <3 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 poison, i32 4, i32 4, i32 4>
@@ -13144,12 +13346,9 @@ define void @s_shuffle_v4p0_v3p0__0_4_4_4() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s2
-; GFX942-NEXT: s_mov_b32 s11, s3
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b64 s[10:11], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -13211,12 +13410,9 @@ define void @s_shuffle_v4p0_v3p0__1_4_4_4() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s12, s10
-; GFX942-NEXT: s_mov_b32 s13, s11
-; GFX942-NEXT: s_mov_b32 s14, s10
-; GFX942-NEXT: s_mov_b32 s15, s11
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -13278,12 +13474,9 @@ define void @s_shuffle_v4p0_v3p0__2_4_4_4() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s12, s10
-; GFX942-NEXT: s_mov_b32 s13, s11
-; GFX942-NEXT: s_mov_b32 s14, s10
-; GFX942-NEXT: s_mov_b32 s15, s11
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -13296,20 +13489,48 @@ define void @s_shuffle_v4p0_v3p0__2_4_4_4() {
}
define void @s_shuffle_v4p0_v3p0__3_4_4_4() {
-; GFX9-LABEL: s_shuffle_v4p0_v3p0__3_4_4_4:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: s_mov_b32 s14, s10
-; GFX9-NEXT: s_mov_b32 s15, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4p0_v3p0__3_4_4_4:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: s_mov_b32 s14, s10
+; GFX900-NEXT: s_mov_b32 s15, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4p0_v3p0__3_4_4_4:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: s_mov_b32 s14, s10
+; GFX90A-NEXT: s_mov_b32 s15, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4p0_v3p0__3_4_4_4:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=s"()
%vec1 = call <3 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 3, i32 4, i32 4, i32 4>
@@ -13318,22 +13539,53 @@ define void @s_shuffle_v4p0_v3p0__3_4_4_4() {
}
define void @s_shuffle_v4p0_v3p0__4_4_4_4() {
-; GFX9-LABEL: s_shuffle_v4p0_v3p0__4_4_4_4:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s10
-; GFX9-NEXT: s_mov_b32 s9, s11
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: s_mov_b32 s14, s10
-; GFX9-NEXT: s_mov_b32 s15, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4p0_v3p0__4_4_4_4:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: s_mov_b32 s14, s10
+; GFX900-NEXT: s_mov_b32 s15, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4p0_v3p0__4_4_4_4:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: s_mov_b32 s14, s10
+; GFX90A-NEXT: s_mov_b32 s15, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4p0_v3p0__4_4_4_4:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=s"()
%vec1 = call <3 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 4, i32 4, i32 4, i32 4>
@@ -13342,22 +13594,53 @@ define void @s_shuffle_v4p0_v3p0__4_4_4_4() {
}
define void @s_shuffle_v4p0_v3p0__5_4_4_4() {
-; GFX9-LABEL: s_shuffle_v4p0_v3p0__5_4_4_4:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s12
-; GFX9-NEXT: s_mov_b32 s9, s13
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: s_mov_b32 s14, s10
-; GFX9-NEXT: s_mov_b32 s15, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4p0_v3p0__5_4_4_4:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s12
+; GFX900-NEXT: s_mov_b32 s9, s13
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: s_mov_b32 s14, s10
+; GFX900-NEXT: s_mov_b32 s15, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4p0_v3p0__5_4_4_4:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s12
+; GFX90A-NEXT: s_mov_b32 s9, s13
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: s_mov_b32 s14, s10
+; GFX90A-NEXT: s_mov_b32 s15, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_4_4_4:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=s"()
%vec1 = call <3 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 5, i32 4, i32 4, i32 4>
@@ -13402,12 +13685,9 @@ define void @s_shuffle_v4p0_v3p0__5_u_4_4() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -13466,15 +13746,12 @@ define void @s_shuffle_v4p0_v3p0__5_0_4_4() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s0
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
-; GFX942-NEXT: s_mov_b32 s14, s6
-; GFX942-NEXT: s_mov_b32 s15, s7
+; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -13532,12 +13809,9 @@ define void @s_shuffle_v4p0_v3p0__5_1_4_4() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -13599,12 +13873,9 @@ define void @s_shuffle_v4p0_v3p0__5_2_4_4() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s16
-; GFX942-NEXT: s_mov_b32 s9, s17
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
+; GFX942-NEXT: s_mov_b64 s[8:9], s[16:17]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -13657,14 +13928,10 @@ define void @s_shuffle_v4p0_v3p0__5_3_4_4() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -13717,14 +13984,10 @@ define void @s_shuffle_v4p0_v3p0__5_5_4_4() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -13773,12 +14036,9 @@ define void @s_shuffle_v4p0_v3p0__5_5_u_4() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -13840,12 +14100,9 @@ define void @s_shuffle_v4p0_v3p0__5_5_0_4() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s16
-; GFX942-NEXT: s_mov_b32 s9, s17
-; GFX942-NEXT: s_mov_b32 s10, s16
-; GFX942-NEXT: s_mov_b32 s11, s17
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[16:17]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[16:17]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -13907,12 +14164,9 @@ define void @s_shuffle_v4p0_v3p0__5_5_1_4() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s16
-; GFX942-NEXT: s_mov_b32 s9, s17
-; GFX942-NEXT: s_mov_b32 s10, s16
-; GFX942-NEXT: s_mov_b32 s11, s17
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[16:17]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[16:17]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -13974,12 +14228,9 @@ define void @s_shuffle_v4p0_v3p0__5_5_2_4() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -14028,14 +14279,10 @@ define void @s_shuffle_v4p0_v3p0__5_5_3_4() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -14048,20 +14295,48 @@ define void @s_shuffle_v4p0_v3p0__5_5_3_4() {
}
define void @s_shuffle_v4p0_v3p0__u_5_5_5() {
-; GFX9-LABEL: s_shuffle_v4p0_v3p0__u_5_5_5:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: s_mov_b32 s14, s12
-; GFX9-NEXT: s_mov_b32 s15, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4p0_v3p0__u_5_5_5:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s14, s12
+; GFX900-NEXT: s_mov_b32 s15, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4p0_v3p0__u_5_5_5:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s14, s12
+; GFX90A-NEXT: s_mov_b32 s15, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4p0_v3p0__u_5_5_5:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=s"()
%vec1 = call <3 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 poison, i32 5, i32 5, i32 5>
@@ -14119,12 +14394,9 @@ define void @s_shuffle_v4p0_v3p0__0_5_5_5() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s4
-; GFX942-NEXT: s_mov_b32 s13, s5
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -14186,12 +14458,9 @@ define void @s_shuffle_v4p0_v3p0__1_5_5_5() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
-; GFX942-NEXT: s_mov_b32 s14, s12
-; GFX942-NEXT: s_mov_b32 s15, s13
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -14253,12 +14522,9 @@ define void @s_shuffle_v4p0_v3p0__2_5_5_5() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
-; GFX942-NEXT: s_mov_b32 s14, s12
-; GFX942-NEXT: s_mov_b32 s15, s13
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -14271,20 +14537,48 @@ define void @s_shuffle_v4p0_v3p0__2_5_5_5() {
}
define void @s_shuffle_v4p0_v3p0__3_5_5_5() {
-; GFX9-LABEL: s_shuffle_v4p0_v3p0__3_5_5_5:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: s_mov_b32 s14, s12
-; GFX9-NEXT: s_mov_b32 s15, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4p0_v3p0__3_5_5_5:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s14, s12
+; GFX900-NEXT: s_mov_b32 s15, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4p0_v3p0__3_5_5_5:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s14, s12
+; GFX90A-NEXT: s_mov_b32 s15, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4p0_v3p0__3_5_5_5:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=s"()
%vec1 = call <3 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 3, i32 5, i32 5, i32 5>
@@ -14293,22 +14587,53 @@ define void @s_shuffle_v4p0_v3p0__3_5_5_5() {
}
define void @s_shuffle_v4p0_v3p0__4_5_5_5() {
-; GFX9-LABEL: s_shuffle_v4p0_v3p0__4_5_5_5:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s10
-; GFX9-NEXT: s_mov_b32 s9, s11
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: s_mov_b32 s14, s12
-; GFX9-NEXT: s_mov_b32 s15, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4p0_v3p0__4_5_5_5:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s14, s12
+; GFX900-NEXT: s_mov_b32 s15, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4p0_v3p0__4_5_5_5:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s14, s12
+; GFX90A-NEXT: s_mov_b32 s15, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4p0_v3p0__4_5_5_5:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=s"()
%vec1 = call <3 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 4, i32 5, i32 5, i32 5>
@@ -14317,20 +14642,48 @@ define void @s_shuffle_v4p0_v3p0__4_5_5_5() {
}
define void @s_shuffle_v4p0_v3p0__5_u_5_5() {
-; GFX9-LABEL: s_shuffle_v4p0_v3p0__5_u_5_5:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s12
-; GFX9-NEXT: s_mov_b32 s9, s13
-; GFX9-NEXT: s_mov_b32 s14, s12
-; GFX9-NEXT: s_mov_b32 s15, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4p0_v3p0__5_u_5_5:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s12
+; GFX900-NEXT: s_mov_b32 s9, s13
+; GFX900-NEXT: s_mov_b32 s14, s12
+; GFX900-NEXT: s_mov_b32 s15, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4p0_v3p0__5_u_5_5:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s12
+; GFX90A-NEXT: s_mov_b32 s9, s13
+; GFX90A-NEXT: s_mov_b32 s14, s12
+; GFX90A-NEXT: s_mov_b32 s15, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_u_5_5:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=s"()
%vec1 = call <3 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 5, i32 poison, i32 5, i32 5>
@@ -14388,12 +14741,9 @@ define void @s_shuffle_v4p0_v3p0__5_0_5_5() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s14, s12
-; GFX942-NEXT: s_mov_b32 s15, s13
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -14455,12 +14805,9 @@ define void @s_shuffle_v4p0_v3p0__5_1_5_5() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s12, s4
-; GFX942-NEXT: s_mov_b32 s13, s5
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -14522,12 +14869,9 @@ define void @s_shuffle_v4p0_v3p0__5_2_5_5() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s14, s12
-; GFX942-NEXT: s_mov_b32 s15, s13
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -14584,14 +14928,10 @@ define void @s_shuffle_v4p0_v3p0__5_3_5_5() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s4
-; GFX942-NEXT: s_mov_b32 s13, s5
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -14604,20 +14944,48 @@ define void @s_shuffle_v4p0_v3p0__5_3_5_5() {
}
define void @s_shuffle_v4p0_v3p0__5_4_5_5() {
-; GFX9-LABEL: s_shuffle_v4p0_v3p0__5_4_5_5:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s12
-; GFX9-NEXT: s_mov_b32 s9, s13
-; GFX9-NEXT: s_mov_b32 s14, s12
-; GFX9-NEXT: s_mov_b32 s15, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4p0_v3p0__5_4_5_5:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s12
+; GFX900-NEXT: s_mov_b32 s9, s13
+; GFX900-NEXT: s_mov_b32 s14, s12
+; GFX900-NEXT: s_mov_b32 s15, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4p0_v3p0__5_4_5_5:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s12
+; GFX90A-NEXT: s_mov_b32 s9, s13
+; GFX90A-NEXT: s_mov_b32 s14, s12
+; GFX90A-NEXT: s_mov_b32 s15, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_4_5_5:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=s"()
%vec1 = call <3 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 5, i32 4, i32 5, i32 5>
@@ -14666,12 +15034,9 @@ define void @s_shuffle_v4p0_v3p0__5_5_u_5() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -14737,14 +15102,10 @@ define void @s_shuffle_v4p0_v3p0__5_5_0_5() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s16
-; GFX942-NEXT: s_mov_b32 s9, s17
-; GFX942-NEXT: s_mov_b32 s10, s16
-; GFX942-NEXT: s_mov_b32 s11, s17
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s16
-; GFX942-NEXT: s_mov_b32 s15, s17
+; GFX942-NEXT: s_mov_b64 s[8:9], s[16:17]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[16:17]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[16:17]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -14810,14 +15171,10 @@ define void @s_shuffle_v4p0_v3p0__5_5_1_5() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s16
-; GFX942-NEXT: s_mov_b32 s9, s17
-; GFX942-NEXT: s_mov_b32 s10, s16
-; GFX942-NEXT: s_mov_b32 s11, s17
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s16
-; GFX942-NEXT: s_mov_b32 s15, s17
+; GFX942-NEXT: s_mov_b64 s[8:9], s[16:17]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[16:17]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[16:17]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -14879,12 +15236,9 @@ define void @s_shuffle_v4p0_v3p0__5_5_2_5() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -14937,14 +15291,10 @@ define void @s_shuffle_v4p0_v3p0__5_5_3_5() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -15001,14 +15351,10 @@ define void @s_shuffle_v4p0_v3p0__5_5_4_5() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v4p0.v4p0.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v4p0.v4p0.ll
index ce1c54129f706..5e61b0b51e280 100644
--- a/llvm/test/CodeGen/AMDGPU/shufflevector.v4p0.v4p0.ll
+++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v4p0.v4p0.ll
@@ -13289,8 +13289,7 @@ define void @s_shuffle_v4p0_v4p0__1_u_u_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -13330,8 +13329,7 @@ define void @s_shuffle_v4p0_v4p0__2_u_u_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -13375,8 +13373,7 @@ define void @s_shuffle_v4p0_v4p0__3_u_u_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -13434,8 +13431,7 @@ define void @s_shuffle_v4p0_v4p0__5_u_u_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -13476,8 +13472,7 @@ define void @s_shuffle_v4p0_v4p0__6_u_u_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -13522,8 +13517,7 @@ define void @s_shuffle_v4p0_v4p0__7_u_u_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -13582,10 +13576,8 @@ define void @s_shuffle_v4p0_v4p0__7_0_u_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -13639,8 +13631,7 @@ define void @s_shuffle_v4p0_v4p0__7_1_u_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -13698,10 +13689,8 @@ define void @s_shuffle_v4p0_v4p0__7_2_u_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -13755,10 +13744,8 @@ define void @s_shuffle_v4p0_v4p0__7_3_u_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -13807,10 +13794,8 @@ define void @s_shuffle_v4p0_v4p0__7_4_u_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -13823,18 +13808,43 @@ define void @s_shuffle_v4p0_v4p0__7_4_u_u() {
}
define void @s_shuffle_v4p0_v4p0__7_5_u_u() {
-; GFX9-LABEL: s_shuffle_v4p0_v4p0__7_5_u_u:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s14
-; GFX9-NEXT: s_mov_b32 s9, s15
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_5_u_u:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_5_u_u:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_5_u_u:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 5, i32 poison, i32 poison>
@@ -13879,10 +13889,8 @@ define void @s_shuffle_v4p0_v4p0__7_6_u_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -13927,10 +13935,8 @@ define void @s_shuffle_v4p0_v4p0__7_7_u_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -13989,13 +13995,11 @@ define void @s_shuffle_v4p0_v4p0__7_7_0_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s0
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -14054,13 +14058,11 @@ define void @s_shuffle_v4p0_v4p0__7_7_1_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s2
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s13, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -14114,10 +14116,8 @@ define void @s_shuffle_v4p0_v4p0__7_7_2_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -14175,12 +14175,9 @@ define void @s_shuffle_v4p0_v4p0__7_7_3_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -14229,12 +14226,9 @@ define void @s_shuffle_v4p0_v4p0__7_7_4_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -14283,12 +14277,9 @@ define void @s_shuffle_v4p0_v4p0__7_7_5_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -14301,20 +14292,48 @@ define void @s_shuffle_v4p0_v4p0__7_7_5_u() {
}
define void @s_shuffle_v4p0_v4p0__7_7_6_u() {
-; GFX9-LABEL: s_shuffle_v4p0_v4p0__7_7_6_u:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s14
-; GFX9-NEXT: s_mov_b32 s9, s15
-; GFX9-NEXT: s_mov_b32 s10, s14
-; GFX9-NEXT: s_mov_b32 s11, s15
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_6_u:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s10, s14
+; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_6_u:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s10, s14
+; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_6_u:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 6, i32 poison>
@@ -14363,12 +14382,9 @@ define void @s_shuffle_v4p0_v4p0__7_7_7_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -14434,14 +14450,10 @@ define void @s_shuffle_v4p0_v4p0__7_7_7_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -14507,14 +14519,10 @@ define void @s_shuffle_v4p0_v4p0__7_7_7_1() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -14580,14 +14588,10 @@ define void @s_shuffle_v4p0_v4p0__7_7_7_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -14649,12 +14653,9 @@ define void @s_shuffle_v4p0_v4p0__7_7_7_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -14711,14 +14712,10 @@ define void @s_shuffle_v4p0_v4p0__7_7_7_4() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -14771,14 +14768,10 @@ define void @s_shuffle_v4p0_v4p0__7_7_7_5() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -14835,14 +14828,10 @@ define void @s_shuffle_v4p0_v4p0__7_7_7_6() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -14855,22 +14844,53 @@ define void @s_shuffle_v4p0_v4p0__7_7_7_6() {
}
define void @s_shuffle_v4p0_v4p0__7_7_7_7() {
-; GFX9-LABEL: s_shuffle_v4p0_v4p0__7_7_7_7:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s14
-; GFX9-NEXT: s_mov_b32 s9, s15
-; GFX9-NEXT: s_mov_b32 s10, s14
-; GFX9-NEXT: s_mov_b32 s11, s15
-; GFX9-NEXT: s_mov_b32 s12, s14
-; GFX9-NEXT: s_mov_b32 s13, s15
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_7_7:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s10, s14
+; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_7_7:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s10, s14
+; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_7_7:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
@@ -14919,12 +14939,9 @@ define void @s_shuffle_v4p0_v4p0__u_0_0_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -14936,22 +14953,53 @@ define void @s_shuffle_v4p0_v4p0__u_0_0_0() {
}
define void @s_shuffle_v4p0_v4p0__0_0_0_0() {
-; GFX9-LABEL: s_shuffle_v4p0_v4p0__0_0_0_0:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s8
-; GFX9-NEXT: s_mov_b32 s11, s9
-; GFX9-NEXT: s_mov_b32 s12, s8
-; GFX9-NEXT: s_mov_b32 s13, s9
-; GFX9-NEXT: s_mov_b32 s14, s8
-; GFX9-NEXT: s_mov_b32 s15, s9
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__0_0_0_0:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s8
+; GFX900-NEXT: s_mov_b32 s11, s9
+; GFX900-NEXT: s_mov_b32 s12, s8
+; GFX900-NEXT: s_mov_b32 s13, s9
+; GFX900-NEXT: s_mov_b32 s14, s8
+; GFX900-NEXT: s_mov_b32 s15, s9
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__0_0_0_0:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s8
+; GFX90A-NEXT: s_mov_b32 s11, s9
+; GFX90A-NEXT: s_mov_b32 s12, s8
+; GFX90A-NEXT: s_mov_b32 s13, s9
+; GFX90A-NEXT: s_mov_b32 s14, s8
+; GFX90A-NEXT: s_mov_b32 s15, s9
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__0_0_0_0:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[8:9]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[8:9]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[8:9]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> zeroinitializer
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
@@ -15003,14 +15051,10 @@ define void @s_shuffle_v4p0_v4p0__1_0_0_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -15062,14 +15106,10 @@ define void @s_shuffle_v4p0_v4p0__2_0_0_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -15125,14 +15165,10 @@ define void @s_shuffle_v4p0_v4p0__3_0_0_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -15184,12 +15220,9 @@ define void @s_shuffle_v4p0_v4p0__4_0_0_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -15251,17 +15284,13 @@ define void @s_shuffle_v4p0_v4p0__5_0_0_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s0
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -15324,15 +15353,12 @@ define void @s_shuffle_v4p0_v4p0__6_0_0_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s0
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -15395,17 +15421,13 @@ define void @s_shuffle_v4p0_v4p0__7_0_0_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s0
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -15464,15 +15486,12 @@ define void @s_shuffle_v4p0_v4p0__7_u_0_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s0
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -15535,17 +15554,13 @@ define void @s_shuffle_v4p0_v4p0__7_1_0_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s0
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s10, s2
-; GFX942-NEXT: s_mov_b32 s11, s3
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -15611,14 +15626,10 @@ define void @s_shuffle_v4p0_v4p0__7_2_0_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -15680,14 +15691,10 @@ define void @s_shuffle_v4p0_v4p0__7_3_0_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -15750,17 +15757,13 @@ define void @s_shuffle_v4p0_v4p0__7_4_0_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s0
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -15822,12 +15825,9 @@ define void @s_shuffle_v4p0_v4p0__7_5_0_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -15893,14 +15893,10 @@ define void @s_shuffle_v4p0_v4p0__7_6_0_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -15963,15 +15959,12 @@ define void @s_shuffle_v4p0_v4p0__7_7_0_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s0
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -16030,13 +16023,11 @@ define void @s_shuffle_v4p0_v4p0__7_7_u_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s14, s0
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -16099,15 +16090,12 @@ define void @s_shuffle_v4p0_v4p0__7_7_1_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s2
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -16169,14 +16157,10 @@ define void @s_shuffle_v4p0_v4p0__7_7_2_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
-; GFX942-NEXT: s_mov_b32 s12, s4
-; GFX942-NEXT: s_mov_b32 s13, s5
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -16238,14 +16222,10 @@ define void @s_shuffle_v4p0_v4p0__7_7_3_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -16304,15 +16284,12 @@ define void @s_shuffle_v4p0_v4p0__7_7_4_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s14, s0
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s12, s4
-; GFX942-NEXT: s_mov_b32 s13, s5
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -16375,15 +16352,12 @@ define void @s_shuffle_v4p0_v4p0__7_7_5_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s14, s0
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -16445,12 +16419,9 @@ define void @s_shuffle_v4p0_v4p0__7_7_6_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -16463,20 +16434,48 @@ define void @s_shuffle_v4p0_v4p0__7_7_6_0() {
}
define void @s_shuffle_v4p0_v4p0__u_1_1_1() {
-; GFX9-LABEL: s_shuffle_v4p0_v4p0__u_1_1_1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: s_mov_b32 s14, s10
-; GFX9-NEXT: s_mov_b32 s15, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__u_1_1_1:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: s_mov_b32 s14, s10
+; GFX900-NEXT: s_mov_b32 s15, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__u_1_1_1:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: s_mov_b32 s14, s10
+; GFX90A-NEXT: s_mov_b32 s15, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__u_1_1_1:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> <i32 poison, i32 1, i32 1, i32 1>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
@@ -16484,89 +16483,210 @@ define void @s_shuffle_v4p0_v4p0__u_1_1_1() {
}
define void @s_shuffle_v4p0_v4p0__0_1_1_1() {
-; GFX9-LABEL: s_shuffle_v4p0_v4p0__0_1_1_1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: s_mov_b32 s14, s10
-; GFX9-NEXT: s_mov_b32 s15, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> <i32 0, i32 1, i32 1, i32 1>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4p0_v4p0__1_1_1_1() {
-; GFX9-LABEL: s_shuffle_v4p0_v4p0__1_1_1_1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s10
-; GFX9-NEXT: s_mov_b32 s9, s11
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: s_mov_b32 s14, s10
-; GFX9-NEXT: s_mov_b32 s15, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4p0_v4p0__2_1_1_1() {
-; GFX9-LABEL: s_shuffle_v4p0_v4p0__2_1_1_1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s12
-; GFX9-NEXT: s_mov_b32 s9, s13
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: s_mov_b32 s14, s10
-; GFX9-NEXT: s_mov_b32 s15, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x ptr> asm "; def $0", "=s"()
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__0_1_1_1:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: s_mov_b32 s14, s10
+; GFX900-NEXT: s_mov_b32 s15, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__0_1_1_1:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: s_mov_b32 s14, s10
+; GFX90A-NEXT: s_mov_b32 s15, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__0_1_1_1:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> <i32 0, i32 1, i32 1, i32 1>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4p0_v4p0__1_1_1_1() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__1_1_1_1:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: s_mov_b32 s14, s10
+; GFX900-NEXT: s_mov_b32 s15, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__1_1_1_1:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: s_mov_b32 s14, s10
+; GFX90A-NEXT: s_mov_b32 s15, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__1_1_1_1:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4p0_v4p0__2_1_1_1() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__2_1_1_1:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s12
+; GFX900-NEXT: s_mov_b32 s9, s13
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: s_mov_b32 s14, s10
+; GFX900-NEXT: s_mov_b32 s15, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__2_1_1_1:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s12
+; GFX90A-NEXT: s_mov_b32 s9, s13
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: s_mov_b32 s14, s10
+; GFX90A-NEXT: s_mov_b32 s15, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__2_1_1_1:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> <i32 2, i32 1, i32 1, i32 1>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
define void @s_shuffle_v4p0_v4p0__3_1_1_1() {
-; GFX9-LABEL: s_shuffle_v4p0_v4p0__3_1_1_1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s14
-; GFX9-NEXT: s_mov_b32 s9, s15
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: s_mov_b32 s14, s10
-; GFX9-NEXT: s_mov_b32 s15, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__3_1_1_1:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: s_mov_b32 s14, s10
+; GFX900-NEXT: s_mov_b32 s15, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__3_1_1_1:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: s_mov_b32 s14, s10
+; GFX90A-NEXT: s_mov_b32 s15, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__3_1_1_1:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> <i32 3, i32 1, i32 1, i32 1>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
@@ -16574,20 +16694,48 @@ define void @s_shuffle_v4p0_v4p0__3_1_1_1() {
}
define void @s_shuffle_v4p0_v4p0__4_1_1_1() {
-; GFX9-LABEL: s_shuffle_v4p0_v4p0__4_1_1_1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: s_mov_b32 s14, s10
-; GFX9-NEXT: s_mov_b32 s15, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__4_1_1_1:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: s_mov_b32 s14, s10
+; GFX900-NEXT: s_mov_b32 s15, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__4_1_1_1:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: s_mov_b32 s14, s10
+; GFX90A-NEXT: s_mov_b32 s15, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__4_1_1_1:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> <i32 4, i32 1, i32 1, i32 1>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
@@ -16644,12 +16792,9 @@ define void @s_shuffle_v4p0_v4p0__5_1_1_1() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s12, s10
-; GFX942-NEXT: s_mov_b32 s13, s11
-; GFX942-NEXT: s_mov_b32 s14, s10
-; GFX942-NEXT: s_mov_b32 s15, s11
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -16711,12 +16856,9 @@ define void @s_shuffle_v4p0_v4p0__6_1_1_1() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s12, s10
-; GFX942-NEXT: s_mov_b32 s13, s11
-; GFX942-NEXT: s_mov_b32 s14, s10
-; GFX942-NEXT: s_mov_b32 s15, s11
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -16778,12 +16920,9 @@ define void @s_shuffle_v4p0_v4p0__7_1_1_1() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s12, s10
-; GFX942-NEXT: s_mov_b32 s13, s11
-; GFX942-NEXT: s_mov_b32 s14, s10
-; GFX942-NEXT: s_mov_b32 s15, s11
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -16842,15 +16981,12 @@ define void @s_shuffle_v4p0_v4p0__7_u_1_1() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s2
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -16913,17 +17049,13 @@ define void @s_shuffle_v4p0_v4p0__7_0_1_1() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s2
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -16985,14 +17117,10 @@ define void @s_shuffle_v4p0_v4p0__7_2_1_1() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -17054,14 +17182,10 @@ define void @s_shuffle_v4p0_v4p0__7_3_1_1() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -17124,17 +17248,13 @@ define void @s_shuffle_v4p0_v4p0__7_4_1_1() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s2
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -17196,12 +17316,9 @@ define void @s_shuffle_v4p0_v4p0__7_5_1_1() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -17267,14 +17384,10 @@ define void @s_shuffle_v4p0_v4p0__7_6_1_1() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -17337,15 +17450,12 @@ define void @s_shuffle_v4p0_v4p0__7_7_1_1() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s2
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -17404,13 +17514,11 @@ define void @s_shuffle_v4p0_v4p0__7_7_u_1() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s14, s2
+; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -17473,15 +17581,12 @@ define void @s_shuffle_v4p0_v4p0__7_7_0_1() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s0
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -17539,14 +17644,10 @@ define void @s_shuffle_v4p0_v4p0__7_7_2_1() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
-; GFX942-NEXT: s_mov_b32 s12, s4
-; GFX942-NEXT: s_mov_b32 s13, s5
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -17604,14 +17705,10 @@ define void @s_shuffle_v4p0_v4p0__7_7_3_1() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -17670,15 +17767,12 @@ define void @s_shuffle_v4p0_v4p0__7_7_4_1() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s14, s2
+; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s12, s4
-; GFX942-NEXT: s_mov_b32 s13, s5
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -17741,15 +17835,12 @@ define void @s_shuffle_v4p0_v4p0__7_7_5_1() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s14, s2
+; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -17811,12 +17902,9 @@ define void @s_shuffle_v4p0_v4p0__7_7_6_1() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -17829,149 +17917,12 @@ define void @s_shuffle_v4p0_v4p0__7_7_6_1() {
}
define void @s_shuffle_v4p0_v4p0__u_2_2_2() {
-; GFX9-LABEL: s_shuffle_v4p0_v4p0__u_2_2_2:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: s_mov_b32 s14, s12
-; GFX9-NEXT: s_mov_b32 s15, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> <i32 poison, i32 2, i32 2, i32 2>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4p0_v4p0__0_2_2_2() {
-; GFX9-LABEL: s_shuffle_v4p0_v4p0__0_2_2_2:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: s_mov_b32 s14, s12
-; GFX9-NEXT: s_mov_b32 s15, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> <i32 0, i32 2, i32 2, i32 2>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4p0_v4p0__1_2_2_2() {
-; GFX9-LABEL: s_shuffle_v4p0_v4p0__1_2_2_2:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s10
-; GFX9-NEXT: s_mov_b32 s9, s11
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: s_mov_b32 s14, s12
-; GFX9-NEXT: s_mov_b32 s15, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> <i32 1, i32 2, i32 2, i32 2>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4p0_v4p0__2_2_2_2() {
-; GFX9-LABEL: s_shuffle_v4p0_v4p0__2_2_2_2:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s12
-; GFX9-NEXT: s_mov_b32 s9, s13
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: s_mov_b32 s14, s12
-; GFX9-NEXT: s_mov_b32 s15, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4p0_v4p0__3_2_2_2() {
-; GFX9-LABEL: s_shuffle_v4p0_v4p0__3_2_2_2:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s14
-; GFX9-NEXT: s_mov_b32 s9, s15
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: s_mov_b32 s14, s12
-; GFX9-NEXT: s_mov_b32 s15, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> <i32 3, i32 2, i32 2, i32 2>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4p0_v4p0__4_2_2_2() {
-; GFX9-LABEL: s_shuffle_v4p0_v4p0__4_2_2_2:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: s_mov_b32 s14, s12
-; GFX9-NEXT: s_mov_b32 s15, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> <i32 4, i32 2, i32 2, i32 2>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4p0_v4p0__5_2_2_2() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__5_2_2_2:
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__u_2_2_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s6
-; GFX900-NEXT: s_mov_b32 s9, s7
; GFX900-NEXT: s_mov_b32 s10, s12
; GFX900-NEXT: s_mov_b32 s11, s13
; GFX900-NEXT: s_mov_b32 s14, s12
@@ -17981,17 +17932,12 @@ define void @s_shuffle_v4p0_v4p0__5_2_2_2() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__5_2_2_2:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__u_2_2_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s6
-; GFX90A-NEXT: s_mov_b32 s9, s7
; GFX90A-NEXT: s_mov_b32 s10, s12
; GFX90A-NEXT: s_mov_b32 s11, s13
; GFX90A-NEXT: s_mov_b32 s14, s12
@@ -18001,42 +17947,31 @@ define void @s_shuffle_v4p0_v4p0__5_2_2_2() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__5_2_2_2:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__u_2_2_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
-; GFX942-NEXT: s_mov_b32 s14, s12
-; GFX942-NEXT: s_mov_b32 s15, s13
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 5, i32 2, i32 2, i32 2>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> <i32 poison, i32 2, i32 2, i32 2>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__6_2_2_2() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__6_2_2_2:
+define void @s_shuffle_v4p0_v4p0__0_2_2_2() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__0_2_2_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s10, s12
; GFX900-NEXT: s_mov_b32 s11, s13
; GFX900-NEXT: s_mov_b32 s14, s12
@@ -18046,15 +17981,12 @@ define void @s_shuffle_v4p0_v4p0__6_2_2_2() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__6_2_2_2:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__0_2_2_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s10, s12
; GFX90A-NEXT: s_mov_b32 s11, s13
; GFX90A-NEXT: s_mov_b32 s14, s12
@@ -18064,42 +17996,31 @@ define void @s_shuffle_v4p0_v4p0__6_2_2_2() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__6_2_2_2:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__0_2_2_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
-; GFX942-NEXT: s_mov_b32 s14, s12
-; GFX942-NEXT: s_mov_b32 s15, s13
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 6, i32 2, i32 2, i32 2>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> <i32 0, i32 2, i32 2, i32 2>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__7_2_2_2() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_2_2_2:
+define void @s_shuffle_v4p0_v4p0__1_2_2_2() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__1_2_2_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s10
; GFX900-NEXT: s_mov_b32 s9, s11
; GFX900-NEXT: s_mov_b32 s10, s12
@@ -18111,15 +18032,12 @@ define void @s_shuffle_v4p0_v4p0__7_2_2_2() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_2_2_2:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__1_2_2_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s10
; GFX90A-NEXT: s_mov_b32 s9, s11
; GFX90A-NEXT: s_mov_b32 s10, s12
@@ -18131,44 +18049,36 @@ define void @s_shuffle_v4p0_v4p0__7_2_2_2() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_2_2_2:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__1_2_2_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
-; GFX942-NEXT: s_mov_b32 s14, s12
-; GFX942-NEXT: s_mov_b32 s15, s13
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 2, i32 2, i32 2>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> <i32 1, i32 2, i32 2, i32 2>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__7_u_2_2() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_u_2_2:
+define void @s_shuffle_v4p0_v4p0__2_2_2_2() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__2_2_2_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s8, s12
+; GFX900-NEXT: s_mov_b32 s9, s13
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
; GFX900-NEXT: s_mov_b32 s14, s12
; GFX900-NEXT: s_mov_b32 s15, s13
; GFX900-NEXT: ;;#ASMSTART
@@ -18176,17 +18086,16 @@ define void @s_shuffle_v4p0_v4p0__7_u_2_2() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_u_2_2:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__2_2_2_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s8, s12
+; GFX90A-NEXT: s_mov_b32 s9, s13
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
; GFX90A-NEXT: s_mov_b32 s14, s12
; GFX90A-NEXT: s_mov_b32 s15, s13
; GFX90A-NEXT: ;;#ASMSTART
@@ -18194,115 +18103,88 @@ define void @s_shuffle_v4p0_v4p0__7_u_2_2() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_u_2_2:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__2_2_2_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s14, s12
-; GFX942-NEXT: s_mov_b32 s15, s13
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 poison, i32 2, i32 2>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__7_0_2_2() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_0_2_2:
+define void @s_shuffle_v4p0_v4p0__3_2_2_2() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__3_2_2_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
; GFX900-NEXT: s_mov_b32 s10, s12
; GFX900-NEXT: s_mov_b32 s11, s13
-; GFX900-NEXT: s_mov_b32 s12, s16
-; GFX900-NEXT: s_mov_b32 s13, s17
-; GFX900-NEXT: s_mov_b32 s14, s16
-; GFX900-NEXT: s_mov_b32 s15, s17
+; GFX900-NEXT: s_mov_b32 s14, s12
+; GFX900-NEXT: s_mov_b32 s15, s13
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_0_2_2:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__3_2_2_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
; GFX90A-NEXT: s_mov_b32 s10, s12
; GFX90A-NEXT: s_mov_b32 s11, s13
-; GFX90A-NEXT: s_mov_b32 s12, s16
-; GFX90A-NEXT: s_mov_b32 s13, s17
-; GFX90A-NEXT: s_mov_b32 s14, s16
-; GFX90A-NEXT: s_mov_b32 s15, s17
+; GFX90A-NEXT: s_mov_b32 s14, s12
+; GFX90A-NEXT: s_mov_b32 s15, s13
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_0_2_2:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__3_2_2_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s4
-; GFX942-NEXT: s_mov_b32 s13, s5
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 0, i32 2, i32 2>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> <i32 3, i32 2, i32 2, i32 2>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__7_1_2_2() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_1_2_2:
+define void @s_shuffle_v4p0_v4p0__4_2_2_2() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__4_2_2_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[16:23]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s22
-; GFX900-NEXT: s_mov_b32 s9, s23
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
; GFX900-NEXT: s_mov_b32 s14, s12
; GFX900-NEXT: s_mov_b32 s15, s13
; GFX900-NEXT: ;;#ASMSTART
@@ -18310,17 +18192,14 @@ define void @s_shuffle_v4p0_v4p0__7_1_2_2() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_1_2_2:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__4_2_2_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[16:23]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s22
-; GFX90A-NEXT: s_mov_b32 s9, s23
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
; GFX90A-NEXT: s_mov_b32 s14, s12
; GFX90A-NEXT: s_mov_b32 s15, s13
; GFX90A-NEXT: ;;#ASMSTART
@@ -18328,32 +18207,26 @@ define void @s_shuffle_v4p0_v4p0__7_1_2_2() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_1_2_2:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__4_2_2_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s14, s12
-; GFX942-NEXT: s_mov_b32 s15, s13
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 1, i32 2, i32 2>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> <i32 4, i32 2, i32 2, i32 2>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__7_3_2_2() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_3_2_2:
+define void @s_shuffle_v4p0_v4p0__5_2_2_2() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__5_2_2_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
@@ -18362,10 +18235,10 @@ define void @s_shuffle_v4p0_v4p0__7_3_2_2() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s10, s14
-; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: s_mov_b32 s8, s6
+; GFX900-NEXT: s_mov_b32 s9, s7
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
; GFX900-NEXT: s_mov_b32 s14, s12
; GFX900-NEXT: s_mov_b32 s15, s13
; GFX900-NEXT: ;;#ASMSTART
@@ -18373,7 +18246,7 @@ define void @s_shuffle_v4p0_v4p0__7_3_2_2() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_3_2_2:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__5_2_2_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
@@ -18382,10 +18255,10 @@ define void @s_shuffle_v4p0_v4p0__7_3_2_2() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s10, s14
-; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: s_mov_b32 s8, s6
+; GFX90A-NEXT: s_mov_b32 s9, s7
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
; GFX90A-NEXT: s_mov_b32 s14, s12
; GFX90A-NEXT: s_mov_b32 s15, s13
; GFX90A-NEXT: ;;#ASMSTART
@@ -18393,7 +18266,7 @@ define void @s_shuffle_v4p0_v4p0__7_3_2_2() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_3_2_2:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__5_2_2_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -18402,25 +18275,22 @@ define void @s_shuffle_v4p0_v4p0__7_3_2_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
-; GFX942-NEXT: s_mov_b32 s14, s12
-; GFX942-NEXT: s_mov_b32 s15, s13
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 3, i32 2, i32 2>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 5, i32 2, i32 2, i32 2>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__7_4_2_2() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_4_2_2:
+define void @s_shuffle_v4p0_v4p0__6_2_2_2() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__6_2_2_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
@@ -18429,10 +18299,8 @@ define void @s_shuffle_v4p0_v4p0__7_4_2_2() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
; GFX900-NEXT: s_mov_b32 s14, s12
; GFX900-NEXT: s_mov_b32 s15, s13
; GFX900-NEXT: ;;#ASMSTART
@@ -18440,7 +18308,7 @@ define void @s_shuffle_v4p0_v4p0__7_4_2_2() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_4_2_2:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__6_2_2_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
@@ -18449,10 +18317,8 @@ define void @s_shuffle_v4p0_v4p0__7_4_2_2() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
; GFX90A-NEXT: s_mov_b32 s14, s12
; GFX90A-NEXT: s_mov_b32 s15, s13
; GFX90A-NEXT: ;;#ASMSTART
@@ -18460,7 +18326,7 @@ define void @s_shuffle_v4p0_v4p0__7_4_2_2() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_4_2_2:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__6_2_2_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -18469,65 +18335,62 @@ define void @s_shuffle_v4p0_v4p0__7_4_2_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s14, s12
-; GFX942-NEXT: s_mov_b32 s15, s13
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 4, i32 2, i32 2>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 6, i32 2, i32 2, i32 2>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__7_5_2_2() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_5_2_2:
+define void @s_shuffle_v4p0_v4p0__7_2_2_2() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_2_2_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s14
-; GFX900-NEXT: s_mov_b32 s9, s15
-; GFX900-NEXT: s_mov_b32 s12, s16
-; GFX900-NEXT: s_mov_b32 s13, s17
-; GFX900-NEXT: s_mov_b32 s14, s16
-; GFX900-NEXT: s_mov_b32 s15, s17
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s14, s12
+; GFX900-NEXT: s_mov_b32 s15, s13
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_5_2_2:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_2_2_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s14
-; GFX90A-NEXT: s_mov_b32 s9, s15
-; GFX90A-NEXT: s_mov_b32 s12, s16
-; GFX90A-NEXT: s_mov_b32 s13, s17
-; GFX90A-NEXT: s_mov_b32 s14, s16
-; GFX90A-NEXT: s_mov_b32 s15, s17
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s14, s12
+; GFX90A-NEXT: s_mov_b32 s15, s13
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_5_2_2:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_2_2_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -18536,37 +18399,32 @@ define void @s_shuffle_v4p0_v4p0__7_5_2_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s12, s4
-; GFX942-NEXT: s_mov_b32 s13, s5
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 5, i32 2, i32 2>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 2, i32 2, i32 2>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__7_6_2_2() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_6_2_2:
+define void @s_shuffle_v4p0_v4p0__7_u_2_2() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_u_2_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[16:23]
+; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s22
-; GFX900-NEXT: s_mov_b32 s9, s23
-; GFX900-NEXT: s_mov_b32 s10, s20
-; GFX900-NEXT: s_mov_b32 s11, s21
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
; GFX900-NEXT: s_mov_b32 s14, s12
; GFX900-NEXT: s_mov_b32 s15, s13
; GFX900-NEXT: ;;#ASMSTART
@@ -18574,19 +18432,17 @@ define void @s_shuffle_v4p0_v4p0__7_6_2_2() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_6_2_2:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_u_2_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[16:23]
+; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s22
-; GFX90A-NEXT: s_mov_b32 s9, s23
-; GFX90A-NEXT: s_mov_b32 s10, s20
-; GFX90A-NEXT: s_mov_b32 s11, s21
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
; GFX90A-NEXT: s_mov_b32 s14, s12
; GFX90A-NEXT: s_mov_b32 s15, s13
; GFX90A-NEXT: ;;#ASMSTART
@@ -18594,7 +18450,7 @@ define void @s_shuffle_v4p0_v4p0__7_6_2_2() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_6_2_2:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_u_2_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -18603,61 +18459,65 @@ define void @s_shuffle_v4p0_v4p0__7_6_2_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s14, s12
-; GFX942-NEXT: s_mov_b32 s15, s13
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 6, i32 2, i32 2>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 poison, i32 2, i32 2>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__7_7_2_2() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_2_2:
+define void @s_shuffle_v4p0_v4p0__7_0_2_2() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_0_2_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s10
; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s14, s12
-; GFX900-NEXT: s_mov_b32 s15, s13
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s12, s16
+; GFX900-NEXT: s_mov_b32 s13, s17
+; GFX900-NEXT: s_mov_b32 s14, s16
+; GFX900-NEXT: s_mov_b32 s15, s17
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_2_2:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_0_2_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s10
; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s14, s12
-; GFX90A-NEXT: s_mov_b32 s15, s13
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s12, s16
+; GFX90A-NEXT: s_mov_b32 s13, s17
+; GFX90A-NEXT: s_mov_b32 s14, s16
+; GFX90A-NEXT: s_mov_b32 s15, s17
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_2_2:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_0_2_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -18666,35 +18526,33 @@ define void @s_shuffle_v4p0_v4p0__7_7_2_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s14, s12
-; GFX942-NEXT: s_mov_b32 s15, s13
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 2, i32 2>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 0, i32 2, i32 2>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__7_7_u_2() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_u_2:
+define void @s_shuffle_v4p0_v4p0__7_1_2_2() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_1_2_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[16:23]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s8, s22
+; GFX900-NEXT: s_mov_b32 s9, s23
; GFX900-NEXT: s_mov_b32 s14, s12
; GFX900-NEXT: s_mov_b32 s15, s13
; GFX900-NEXT: ;;#ASMSTART
@@ -18702,17 +18560,17 @@ define void @s_shuffle_v4p0_v4p0__7_7_u_2() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_u_2:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_1_2_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[16:23]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s8, s22
+; GFX90A-NEXT: s_mov_b32 s9, s23
; GFX90A-NEXT: s_mov_b32 s14, s12
; GFX90A-NEXT: s_mov_b32 s15, s13
; GFX90A-NEXT: ;;#ASMSTART
@@ -18720,7 +18578,7 @@ define void @s_shuffle_v4p0_v4p0__7_7_u_2() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_u_2:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_1_2_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -18729,61 +18587,61 @@ define void @s_shuffle_v4p0_v4p0__7_7_u_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 poison, i32 2>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 1, i32 2, i32 2>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__7_7_0_2() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_0_2:
+define void @s_shuffle_v4p0_v4p0__7_3_2_2() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_3_2_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s10
; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s14, s16
-; GFX900-NEXT: s_mov_b32 s15, s17
+; GFX900-NEXT: s_mov_b32 s10, s14
+; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: s_mov_b32 s14, s12
+; GFX900-NEXT: s_mov_b32 s15, s13
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_0_2:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_3_2_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s10
; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s14, s16
-; GFX90A-NEXT: s_mov_b32 s15, s17
+; GFX90A-NEXT: s_mov_b32 s10, s14
+; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: s_mov_b32 s14, s12
+; GFX90A-NEXT: s_mov_b32 s15, s13
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_0_2:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_3_2_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -18792,67 +18650,62 @@ define void @s_shuffle_v4p0_v4p0__7_7_0_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 0, i32 2>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 3, i32 2, i32 2>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__7_7_1_2() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_1_2:
+define void @s_shuffle_v4p0_v4p0__7_4_2_2() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_4_2_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s10
; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
-; GFX900-NEXT: s_mov_b32 s14, s16
-; GFX900-NEXT: s_mov_b32 s15, s17
+; GFX900-NEXT: s_mov_b32 s10, s4
+; GFX900-NEXT: s_mov_b32 s11, s5
+; GFX900-NEXT: s_mov_b32 s14, s12
+; GFX900-NEXT: s_mov_b32 s15, s13
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_1_2:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_4_2_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s10
; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
-; GFX90A-NEXT: s_mov_b32 s14, s16
-; GFX90A-NEXT: s_mov_b32 s15, s17
+; GFX90A-NEXT: s_mov_b32 s10, s4
+; GFX90A-NEXT: s_mov_b32 s11, s5
+; GFX90A-NEXT: s_mov_b32 s14, s12
+; GFX90A-NEXT: s_mov_b32 s15, s13
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_1_2:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_4_2_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -18861,39 +18714,34 @@ define void @s_shuffle_v4p0_v4p0__7_7_1_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 1, i32 2>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 4, i32 2, i32 2>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__7_7_3_2() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_3_2:
+define void @s_shuffle_v4p0_v4p0__7_5_2_2() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_5_2_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s12, s18
-; GFX900-NEXT: s_mov_b32 s13, s19
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s12, s16
+; GFX900-NEXT: s_mov_b32 s13, s17
; GFX900-NEXT: s_mov_b32 s14, s16
; GFX900-NEXT: s_mov_b32 s15, s17
; GFX900-NEXT: ;;#ASMSTART
@@ -18901,19 +18749,19 @@ define void @s_shuffle_v4p0_v4p0__7_7_3_2() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_3_2:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_5_2_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s12, s18
-; GFX90A-NEXT: s_mov_b32 s13, s19
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s12, s16
+; GFX90A-NEXT: s_mov_b32 s13, s17
; GFX90A-NEXT: s_mov_b32 s14, s16
; GFX90A-NEXT: s_mov_b32 s15, s17
; GFX90A-NEXT: ;;#ASMSTART
@@ -18921,7 +18769,7 @@ define void @s_shuffle_v4p0_v4p0__7_7_3_2() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_3_2:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_5_2_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -18930,203 +18778,182 @@ define void @s_shuffle_v4p0_v4p0__7_7_3_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 3, i32 2>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 5, i32 2, i32 2>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__7_7_4_2() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_4_2:
+define void @s_shuffle_v4p0_v4p0__7_6_2_2() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_6_2_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[16:23]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: s_mov_b32 s14, s16
-; GFX900-NEXT: s_mov_b32 s15, s17
+; GFX900-NEXT: s_mov_b32 s8, s22
+; GFX900-NEXT: s_mov_b32 s9, s23
+; GFX900-NEXT: s_mov_b32 s10, s20
+; GFX900-NEXT: s_mov_b32 s11, s21
+; GFX900-NEXT: s_mov_b32 s14, s12
+; GFX900-NEXT: s_mov_b32 s15, s13
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_4_2:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_6_2_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[16:23]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: s_mov_b32 s14, s16
-; GFX90A-NEXT: s_mov_b32 s15, s17
+; GFX90A-NEXT: s_mov_b32 s8, s22
+; GFX90A-NEXT: s_mov_b32 s9, s23
+; GFX90A-NEXT: s_mov_b32 s10, s20
+; GFX90A-NEXT: s_mov_b32 s11, s21
+; GFX90A-NEXT: s_mov_b32 s14, s12
+; GFX90A-NEXT: s_mov_b32 s15, s13
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_4_2:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_6_2_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[12:19]
+; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s18
-; GFX942-NEXT: s_mov_b32 s9, s19
-; GFX942-NEXT: s_mov_b32 s10, s18
-; GFX942-NEXT: s_mov_b32 s11, s19
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 4, i32 2>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 6, i32 2, i32 2>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__7_7_5_2() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_5_2:
+define void @s_shuffle_v4p0_v4p0__7_7_2_2() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_2_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s10
; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s12, s6
-; GFX900-NEXT: s_mov_b32 s13, s7
-; GFX900-NEXT: s_mov_b32 s14, s16
-; GFX900-NEXT: s_mov_b32 s15, s17
+; GFX900-NEXT: s_mov_b32 s14, s12
+; GFX900-NEXT: s_mov_b32 s15, s13
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_5_2:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_2_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s10
; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s12, s6
-; GFX90A-NEXT: s_mov_b32 s13, s7
-; GFX90A-NEXT: s_mov_b32 s14, s16
-; GFX90A-NEXT: s_mov_b32 s15, s17
+; GFX90A-NEXT: s_mov_b32 s14, s12
+; GFX90A-NEXT: s_mov_b32 s15, s13
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_5_2:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_2_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[12:19]
+; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s18
-; GFX942-NEXT: s_mov_b32 s9, s19
-; GFX942-NEXT: s_mov_b32 s10, s18
-; GFX942-NEXT: s_mov_b32 s11, s19
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 5, i32 2>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 2, i32 2>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__7_7_6_2() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_6_2:
+define void @s_shuffle_v4p0_v4p0__7_7_u_2() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_u_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s14
-; GFX900-NEXT: s_mov_b32 s9, s15
-; GFX900-NEXT: s_mov_b32 s10, s14
-; GFX900-NEXT: s_mov_b32 s11, s15
-; GFX900-NEXT: s_mov_b32 s14, s16
-; GFX900-NEXT: s_mov_b32 s15, s17
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s14, s12
+; GFX900-NEXT: s_mov_b32 s15, s13
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_6_2:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_u_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s14
-; GFX90A-NEXT: s_mov_b32 s9, s15
-; GFX90A-NEXT: s_mov_b32 s10, s14
-; GFX90A-NEXT: s_mov_b32 s11, s15
-; GFX90A-NEXT: s_mov_b32 s14, s16
-; GFX90A-NEXT: s_mov_b32 s15, s17
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s14, s12
+; GFX90A-NEXT: s_mov_b32 s15, s13
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_6_2:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_u_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -19135,197 +18962,58 @@ define void @s_shuffle_v4p0_v4p0__7_7_6_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 6, i32 2>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4p0_v4p0__u_3_3_3() {
-; GFX9-LABEL: s_shuffle_v4p0_v4p0__u_3_3_3:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s14
-; GFX9-NEXT: s_mov_b32 s11, s15
-; GFX9-NEXT: s_mov_b32 s12, s14
-; GFX9-NEXT: s_mov_b32 s13, s15
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> <i32 poison, i32 3, i32 3, i32 3>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4p0_v4p0__0_3_3_3() {
-; GFX9-LABEL: s_shuffle_v4p0_v4p0__0_3_3_3:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s14
-; GFX9-NEXT: s_mov_b32 s11, s15
-; GFX9-NEXT: s_mov_b32 s12, s14
-; GFX9-NEXT: s_mov_b32 s13, s15
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> <i32 0, i32 3, i32 3, i32 3>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4p0_v4p0__1_3_3_3() {
-; GFX9-LABEL: s_shuffle_v4p0_v4p0__1_3_3_3:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s10
-; GFX9-NEXT: s_mov_b32 s9, s11
-; GFX9-NEXT: s_mov_b32 s10, s14
-; GFX9-NEXT: s_mov_b32 s11, s15
-; GFX9-NEXT: s_mov_b32 s12, s14
-; GFX9-NEXT: s_mov_b32 s13, s15
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> <i32 1, i32 3, i32 3, i32 3>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4p0_v4p0__2_3_3_3() {
-; GFX9-LABEL: s_shuffle_v4p0_v4p0__2_3_3_3:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s12
-; GFX9-NEXT: s_mov_b32 s9, s13
-; GFX9-NEXT: s_mov_b32 s10, s14
-; GFX9-NEXT: s_mov_b32 s11, s15
-; GFX9-NEXT: s_mov_b32 s12, s14
-; GFX9-NEXT: s_mov_b32 s13, s15
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> <i32 2, i32 3, i32 3, i32 3>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4p0_v4p0__3_3_3_3() {
-; GFX9-LABEL: s_shuffle_v4p0_v4p0__3_3_3_3:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s14
-; GFX9-NEXT: s_mov_b32 s9, s15
-; GFX9-NEXT: s_mov_b32 s10, s14
-; GFX9-NEXT: s_mov_b32 s11, s15
-; GFX9-NEXT: s_mov_b32 s12, s14
-; GFX9-NEXT: s_mov_b32 s13, s15
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4p0_v4p0__4_3_3_3() {
-; GFX9-LABEL: s_shuffle_v4p0_v4p0__4_3_3_3:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s14
-; GFX9-NEXT: s_mov_b32 s11, s15
-; GFX9-NEXT: s_mov_b32 s12, s14
-; GFX9-NEXT: s_mov_b32 s13, s15
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> <i32 4, i32 3, i32 3, i32 3>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 poison, i32 2>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__5_3_3_3() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__5_3_3_3:
+define void @s_shuffle_v4p0_v4p0__7_7_0_2() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_0_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s6
-; GFX900-NEXT: s_mov_b32 s9, s7
-; GFX900-NEXT: s_mov_b32 s10, s14
-; GFX900-NEXT: s_mov_b32 s11, s15
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s14, s16
+; GFX900-NEXT: s_mov_b32 s15, s17
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__5_3_3_3:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_0_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s6
-; GFX90A-NEXT: s_mov_b32 s9, s7
-; GFX90A-NEXT: s_mov_b32 s10, s14
-; GFX90A-NEXT: s_mov_b32 s11, s15
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s14, s16
+; GFX90A-NEXT: s_mov_b32 s15, s17
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__5_3_3_3:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_0_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -19334,61 +19022,63 @@ define void @s_shuffle_v4p0_v4p0__5_3_3_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 5, i32 3, i32 3, i32 3>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 0, i32 2>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__6_3_3_3() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__6_3_3_3:
+define void @s_shuffle_v4p0_v4p0__7_7_1_2() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_1_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s10, s14
-; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
; GFX900-NEXT: s_mov_b32 s12, s14
; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: s_mov_b32 s14, s16
+; GFX900-NEXT: s_mov_b32 s15, s17
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__6_3_3_3:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_1_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s10, s14
-; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
; GFX90A-NEXT: s_mov_b32 s12, s14
; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: s_mov_b32 s14, s16
+; GFX90A-NEXT: s_mov_b32 s15, s17
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__6_3_3_3:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_1_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -19397,65 +19087,63 @@ define void @s_shuffle_v4p0_v4p0__6_3_3_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 6, i32 3, i32 3, i32 3>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 1, i32 2>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__7_3_3_3() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_3_3_3:
+define void @s_shuffle_v4p0_v4p0__7_7_3_2() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_3_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s10
; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s10, s14
-; GFX900-NEXT: s_mov_b32 s11, s15
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: s_mov_b32 s12, s18
+; GFX900-NEXT: s_mov_b32 s13, s19
+; GFX900-NEXT: s_mov_b32 s14, s16
+; GFX900-NEXT: s_mov_b32 s15, s17
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_3_3_3:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_3_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s10
; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s10, s14
-; GFX90A-NEXT: s_mov_b32 s11, s15
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: s_mov_b32 s12, s18
+; GFX90A-NEXT: s_mov_b32 s13, s19
+; GFX90A-NEXT: s_mov_b32 s14, s16
+; GFX90A-NEXT: s_mov_b32 s15, s17
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_3_3_3:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_3_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -19464,86 +19152,87 @@ define void @s_shuffle_v4p0_v4p0__7_3_3_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 3, i32 3, i32 3>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 3, i32 2>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__7_u_3_3() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_u_3_3:
+define void @s_shuffle_v4p0_v4p0__7_7_4_2() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_4_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s10
; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: s_mov_b32 s12, s4
+; GFX900-NEXT: s_mov_b32 s13, s5
+; GFX900-NEXT: s_mov_b32 s14, s16
+; GFX900-NEXT: s_mov_b32 s15, s17
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_u_3_3:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_4_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s10
; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: s_mov_b32 s12, s4
+; GFX90A-NEXT: s_mov_b32 s13, s5
+; GFX90A-NEXT: s_mov_b32 s14, s16
+; GFX90A-NEXT: s_mov_b32 s15, s17
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_u_3_3:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_4_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ; def s[12:19]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
+; GFX942-NEXT: s_mov_b64 s[8:9], s[18:19]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[18:19]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 poison, i32 3, i32 3>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 4, i32 2>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__7_0_3_3() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_0_3_3:
+define void @s_shuffle_v4p0_v4p0__7_7_5_2() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_5_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
@@ -19554,18 +19243,16 @@ define void @s_shuffle_v4p0_v4p0__7_0_3_3() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s10
; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s10, s12
-; GFX900-NEXT: s_mov_b32 s11, s13
-; GFX900-NEXT: s_mov_b32 s12, s18
-; GFX900-NEXT: s_mov_b32 s13, s19
-; GFX900-NEXT: s_mov_b32 s14, s18
-; GFX900-NEXT: s_mov_b32 s15, s19
+; GFX900-NEXT: s_mov_b32 s12, s6
+; GFX900-NEXT: s_mov_b32 s13, s7
+; GFX900-NEXT: s_mov_b32 s14, s16
+; GFX900-NEXT: s_mov_b32 s15, s17
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_0_3_3:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_5_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
@@ -19576,83 +19263,81 @@ define void @s_shuffle_v4p0_v4p0__7_0_3_3() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s10
; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s10, s12
-; GFX90A-NEXT: s_mov_b32 s11, s13
-; GFX90A-NEXT: s_mov_b32 s12, s18
-; GFX90A-NEXT: s_mov_b32 s13, s19
-; GFX90A-NEXT: s_mov_b32 s14, s18
-; GFX90A-NEXT: s_mov_b32 s15, s19
+; GFX90A-NEXT: s_mov_b32 s12, s6
+; GFX90A-NEXT: s_mov_b32 s13, s7
+; GFX90A-NEXT: s_mov_b32 s14, s16
+; GFX90A-NEXT: s_mov_b32 s15, s17
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_0_3_3:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_5_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ; def s[12:19]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
-; GFX942-NEXT: s_mov_b32 s14, s6
-; GFX942-NEXT: s_mov_b32 s15, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[18:19]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[18:19]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 0, i32 3, i32 3>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 5, i32 2>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__7_1_3_3() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_1_3_3:
+define void @s_shuffle_v4p0_v4p0__7_7_6_2() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_6_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[16:23]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s22
-; GFX900-NEXT: s_mov_b32 s9, s23
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s10, s14
+; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: s_mov_b32 s14, s16
+; GFX900-NEXT: s_mov_b32 s15, s17
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_1_3_3:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_6_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[16:23]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s22
-; GFX90A-NEXT: s_mov_b32 s9, s23
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s10, s14
+; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: s_mov_b32 s14, s16
+; GFX90A-NEXT: s_mov_b32 s15, s17
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_1_3_3:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_6_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -19661,35 +19346,29 @@ define void @s_shuffle_v4p0_v4p0__7_1_3_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 1, i32 3, i32 3>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 6, i32 2>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__7_2_3_3() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_2_3_3:
+define void @s_shuffle_v4p0_v4p0__u_3_3_3() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__u_3_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s10, s12
-; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s10, s14
+; GFX900-NEXT: s_mov_b32 s11, s15
; GFX900-NEXT: s_mov_b32 s12, s14
; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
@@ -19697,19 +19376,14 @@ define void @s_shuffle_v4p0_v4p0__7_2_3_3() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_2_3_3:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__u_3_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s10, s12
-; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s10, s14
+; GFX90A-NEXT: s_mov_b32 s11, s15
; GFX90A-NEXT: s_mov_b32 s12, s14
; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
@@ -19717,46 +19391,33 @@ define void @s_shuffle_v4p0_v4p0__7_2_3_3() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_2_3_3:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__u_3_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 2, i32 3, i32 3>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> <i32 poison, i32 3, i32 3, i32 3>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__7_4_3_3() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_4_3_3:
+define void @s_shuffle_v4p0_v4p0__0_3_3_3() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__0_3_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
+; GFX900-NEXT: s_mov_b32 s10, s14
+; GFX900-NEXT: s_mov_b32 s11, s15
; GFX900-NEXT: s_mov_b32 s12, s14
; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
@@ -19764,19 +19425,14 @@ define void @s_shuffle_v4p0_v4p0__7_4_3_3() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_4_3_3:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__0_3_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
+; GFX90A-NEXT: s_mov_b32 s10, s14
+; GFX90A-NEXT: s_mov_b32 s11, s15
; GFX90A-NEXT: s_mov_b32 s12, s14
; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
@@ -19784,113 +19440,89 @@ define void @s_shuffle_v4p0_v4p0__7_4_3_3() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_4_3_3:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__0_3_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 4, i32 3, i32 3>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> <i32 0, i32 3, i32 3, i32 3>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__7_5_3_3() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_5_3_3:
+define void @s_shuffle_v4p0_v4p0__1_3_3_3() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__1_3_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s14
-; GFX900-NEXT: s_mov_b32 s9, s15
-; GFX900-NEXT: s_mov_b32 s12, s18
-; GFX900-NEXT: s_mov_b32 s13, s19
-; GFX900-NEXT: s_mov_b32 s14, s18
-; GFX900-NEXT: s_mov_b32 s15, s19
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s10, s14
+; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_5_3_3:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__1_3_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s14
-; GFX90A-NEXT: s_mov_b32 s9, s15
-; GFX90A-NEXT: s_mov_b32 s12, s18
-; GFX90A-NEXT: s_mov_b32 s13, s19
-; GFX90A-NEXT: s_mov_b32 s14, s18
-; GFX90A-NEXT: s_mov_b32 s15, s19
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s10, s14
+; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_5_3_3:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__1_3_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
-; GFX942-NEXT: s_mov_b32 s14, s6
-; GFX942-NEXT: s_mov_b32 s15, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 5, i32 3, i32 3>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> <i32 1, i32 3, i32 3, i32 3>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__7_6_3_3() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_6_3_3:
+define void @s_shuffle_v4p0_v4p0__2_3_3_3() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__2_3_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[16:23]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s22
-; GFX900-NEXT: s_mov_b32 s9, s23
-; GFX900-NEXT: s_mov_b32 s10, s20
-; GFX900-NEXT: s_mov_b32 s11, s21
+; GFX900-NEXT: s_mov_b32 s8, s12
+; GFX900-NEXT: s_mov_b32 s9, s13
+; GFX900-NEXT: s_mov_b32 s10, s14
+; GFX900-NEXT: s_mov_b32 s11, s15
; GFX900-NEXT: s_mov_b32 s12, s14
; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
@@ -19898,19 +19530,16 @@ define void @s_shuffle_v4p0_v4p0__7_6_3_3() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_6_3_3:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__2_3_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[16:23]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s22
-; GFX90A-NEXT: s_mov_b32 s9, s23
-; GFX90A-NEXT: s_mov_b32 s10, s20
-; GFX90A-NEXT: s_mov_b32 s11, s21
+; GFX90A-NEXT: s_mov_b32 s8, s12
+; GFX90A-NEXT: s_mov_b32 s9, s13
+; GFX90A-NEXT: s_mov_b32 s10, s14
+; GFX90A-NEXT: s_mov_b32 s11, s15
; GFX90A-NEXT: s_mov_b32 s12, s14
; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
@@ -19918,44 +19547,36 @@ define void @s_shuffle_v4p0_v4p0__7_6_3_3() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_6_3_3:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__2_3_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 6, i32 3, i32 3>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> <i32 2, i32 3, i32 3, i32 3>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__7_7_3_3() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_3_3:
+define void @s_shuffle_v4p0_v4p0__3_3_3_3() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__3_3_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s10, s14
+; GFX900-NEXT: s_mov_b32 s11, s15
; GFX900-NEXT: s_mov_b32 s12, s14
; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
@@ -19963,17 +19584,16 @@ define void @s_shuffle_v4p0_v4p0__7_7_3_3() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_3_3:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__3_3_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s10, s14
+; GFX90A-NEXT: s_mov_b32 s11, s15
; GFX90A-NEXT: s_mov_b32 s12, s14
; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
@@ -19981,127 +19601,116 @@ define void @s_shuffle_v4p0_v4p0__7_7_3_3() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_3_3:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__3_3_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 3, i32 3>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__7_7_u_3() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_u_3:
+define void @s_shuffle_v4p0_v4p0__4_3_3_3() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__4_3_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s14
+; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_u_3:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__4_3_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s10, s14
+; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_u_3:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__4_3_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 poison, i32 3>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> <i32 4, i32 3, i32 3, i32 3>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__7_7_0_3() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_0_3:
+define void @s_shuffle_v4p0_v4p0__5_3_3_3() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__5_3_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s14, s18
-; GFX900-NEXT: s_mov_b32 s15, s19
+; GFX900-NEXT: s_mov_b32 s8, s6
+; GFX900-NEXT: s_mov_b32 s9, s7
+; GFX900-NEXT: s_mov_b32 s10, s14
+; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_0_3:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__5_3_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s14, s18
-; GFX90A-NEXT: s_mov_b32 s15, s19
+; GFX90A-NEXT: s_mov_b32 s8, s6
+; GFX90A-NEXT: s_mov_b32 s9, s7
+; GFX90A-NEXT: s_mov_b32 s10, s14
+; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_0_3:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__5_3_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -20110,67 +19719,58 @@ define void @s_shuffle_v4p0_v4p0__7_7_0_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s6
-; GFX942-NEXT: s_mov_b32 s15, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 0, i32 3>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 5, i32 3, i32 3, i32 3>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__7_7_1_3() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_1_3:
+define void @s_shuffle_v4p0_v4p0__6_3_3_3() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__6_3_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s10, s14
+; GFX900-NEXT: s_mov_b32 s11, s15
; GFX900-NEXT: s_mov_b32 s12, s14
; GFX900-NEXT: s_mov_b32 s13, s15
-; GFX900-NEXT: s_mov_b32 s14, s18
-; GFX900-NEXT: s_mov_b32 s15, s19
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_1_3:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__6_3_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s10, s14
+; GFX90A-NEXT: s_mov_b32 s11, s15
; GFX90A-NEXT: s_mov_b32 s12, s14
; GFX90A-NEXT: s_mov_b32 s13, s15
-; GFX90A-NEXT: s_mov_b32 s14, s18
-; GFX90A-NEXT: s_mov_b32 s15, s19
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_1_3:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__6_3_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -20179,27 +19779,22 @@ define void @s_shuffle_v4p0_v4p0__7_7_1_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s6
-; GFX942-NEXT: s_mov_b32 s15, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 1, i32 3>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 6, i32 3, i32 3, i32 3>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__7_7_2_3() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_2_3:
+define void @s_shuffle_v4p0_v4p0__7_3_3_3() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_3_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
@@ -20210,12 +19805,16 @@ define void @s_shuffle_v4p0_v4p0__7_7_2_3() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s10
; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s10, s14
+; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_2_3:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_3_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
@@ -20226,12 +19825,16 @@ define void @s_shuffle_v4p0_v4p0__7_7_2_3() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s10
; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s10, s14
+; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_2_3:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_3_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -20240,23 +19843,22 @@ define void @s_shuffle_v4p0_v4p0__7_7_2_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 2, i32 3>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 3, i32 3, i32 3>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__7_7_4_3() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_4_3:
+define void @s_shuffle_v4p0_v4p0__7_u_3_3() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_u_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
@@ -20267,14 +19869,14 @@ define void @s_shuffle_v4p0_v4p0__7_7_4_3() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s10
; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_4_3:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_u_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
@@ -20285,14 +19887,14 @@ define void @s_shuffle_v4p0_v4p0__7_7_4_3() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s10
; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_4_3:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_u_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -20301,61 +19903,65 @@ define void @s_shuffle_v4p0_v4p0__7_7_4_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 4, i32 3>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 poison, i32 3, i32 3>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__7_7_5_3() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_5_3:
+define void @s_shuffle_v4p0_v4p0__7_0_3_3() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_0_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s10
; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s12, s6
-; GFX900-NEXT: s_mov_b32 s13, s7
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s12, s18
+; GFX900-NEXT: s_mov_b32 s13, s19
+; GFX900-NEXT: s_mov_b32 s14, s18
+; GFX900-NEXT: s_mov_b32 s15, s19
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_5_3:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_0_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s10
; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s12, s6
-; GFX90A-NEXT: s_mov_b32 s13, s7
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s12, s18
+; GFX90A-NEXT: s_mov_b32 s13, s19
+; GFX90A-NEXT: s_mov_b32 s14, s18
+; GFX90A-NEXT: s_mov_b32 s15, s19
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_5_3:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_0_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -20364,65 +19970,59 @@ define void @s_shuffle_v4p0_v4p0__7_7_5_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 5, i32 3>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 0, i32 3, i32 3>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__7_7_6_3() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_6_3:
+define void @s_shuffle_v4p0_v4p0__7_1_3_3() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_1_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[16:23]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s14
-; GFX900-NEXT: s_mov_b32 s9, s15
-; GFX900-NEXT: s_mov_b32 s10, s14
-; GFX900-NEXT: s_mov_b32 s11, s15
-; GFX900-NEXT: s_mov_b32 s14, s18
-; GFX900-NEXT: s_mov_b32 s15, s19
+; GFX900-NEXT: s_mov_b32 s8, s22
+; GFX900-NEXT: s_mov_b32 s9, s23
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_6_3:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_1_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[16:23]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s14
-; GFX90A-NEXT: s_mov_b32 s9, s15
-; GFX90A-NEXT: s_mov_b32 s10, s14
-; GFX90A-NEXT: s_mov_b32 s11, s15
-; GFX90A-NEXT: s_mov_b32 s14, s18
-; GFX90A-NEXT: s_mov_b32 s15, s19
+; GFX90A-NEXT: s_mov_b32 s8, s22
+; GFX90A-NEXT: s_mov_b32 s9, s23
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_6_3:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_1_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -20431,584 +20031,609 @@ define void @s_shuffle_v4p0_v4p0__7_7_6_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
-; GFX942-NEXT: s_mov_b32 s14, s6
-; GFX942-NEXT: s_mov_b32 s15, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 6, i32 3>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4p0_v4p0__u_4_4_4() {
-; GFX9-LABEL: s_shuffle_v4p0_v4p0__u_4_4_4:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> <i32 poison, i32 4, i32 4, i32 4>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 1, i32 3, i32 3>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__0_4_4_4() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__0_4_4_4:
+define void @s_shuffle_v4p0_v4p0__7_2_3_3() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_2_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__0_4_4_4:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_2_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__0_4_4_4:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_2_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> <i32 0, i32 4, i32 4, i32 4>
+ %vec1 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 2, i32 3, i32 3>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__1_4_4_4() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__1_4_4_4:
+define void @s_shuffle_v4p0_v4p0__7_4_3_3() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_4_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s6
-; GFX900-NEXT: s_mov_b32 s9, s7
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s10, s4
+; GFX900-NEXT: s_mov_b32 s11, s5
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__1_4_4_4:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_4_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s6
-; GFX90A-NEXT: s_mov_b32 s9, s7
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__1_4_4_4:
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s10, s4
+; GFX90A-NEXT: s_mov_b32 s11, s5
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_4_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> <i32 1, i32 4, i32 4, i32 4>
+ %vec1 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 4, i32 3, i32 3>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__2_4_4_4() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__2_4_4_4:
+define void @s_shuffle_v4p0_v4p0__7_5_3_3() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_5_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s12, s18
+; GFX900-NEXT: s_mov_b32 s13, s19
+; GFX900-NEXT: s_mov_b32 s14, s18
+; GFX900-NEXT: s_mov_b32 s15, s19
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__2_4_4_4:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_5_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s12, s18
+; GFX90A-NEXT: s_mov_b32 s13, s19
+; GFX90A-NEXT: s_mov_b32 s14, s18
+; GFX90A-NEXT: s_mov_b32 s15, s19
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__2_4_4_4:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_5_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> <i32 2, i32 4, i32 4, i32 4>
+ %vec1 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 5, i32 3, i32 3>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__3_4_4_4() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__3_4_4_4:
+define void @s_shuffle_v4p0_v4p0__7_6_3_3() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_6_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[16:23]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s22
+; GFX900-NEXT: s_mov_b32 s9, s23
+; GFX900-NEXT: s_mov_b32 s10, s20
+; GFX900-NEXT: s_mov_b32 s11, s21
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__3_4_4_4:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_6_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[16:23]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s22
+; GFX90A-NEXT: s_mov_b32 s9, s23
+; GFX90A-NEXT: s_mov_b32 s10, s20
+; GFX90A-NEXT: s_mov_b32 s11, s21
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__3_4_4_4:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_6_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> <i32 3, i32 4, i32 4, i32 4>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4p0_v4p0__4_4_4_4() {
-; GFX9-LABEL: s_shuffle_v4p0_v4p0__4_4_4_4:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> <i32 4, i32 4, i32 4, i32 4>
+ %vec1 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 6, i32 3, i32 3>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__5_4_4_4() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__5_4_4_4:
+define void @s_shuffle_v4p0_v4p0__7_7_3_3() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s6
-; GFX900-NEXT: s_mov_b32 s9, s7
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: s_mov_b32 s14, s4
-; GFX900-NEXT: s_mov_b32 s15, s5
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__5_4_4_4:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s6
-; GFX90A-NEXT: s_mov_b32 s9, s7
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: s_mov_b32 s14, s4
-; GFX90A-NEXT: s_mov_b32 s15, s5
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__5_4_4_4:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 5, i32 4, i32 4, i32 4>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 3, i32 3>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__6_4_4_4() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__6_4_4_4:
+define void @s_shuffle_v4p0_v4p0__7_7_u_3() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_u_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: s_mov_b32 s14, s4
-; GFX900-NEXT: s_mov_b32 s15, s5
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__6_4_4_4:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_u_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: s_mov_b32 s14, s4
-; GFX90A-NEXT: s_mov_b32 s15, s5
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__6_4_4_4:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_u_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 6, i32 4, i32 4, i32 4>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 poison, i32 3>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__7_4_4_4() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_4_4_4:
+define void @s_shuffle_v4p0_v4p0__7_7_0_3() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_0_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s10
; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: s_mov_b32 s14, s4
-; GFX900-NEXT: s_mov_b32 s15, s5
+; GFX900-NEXT: s_mov_b32 s14, s18
+; GFX900-NEXT: s_mov_b32 s15, s19
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_4_4_4:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_0_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s10
; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: s_mov_b32 s14, s4
-; GFX90A-NEXT: s_mov_b32 s15, s5
+; GFX90A-NEXT: s_mov_b32 s14, s18
+; GFX90A-NEXT: s_mov_b32 s15, s19
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_4_4_4:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_0_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 4, i32 4, i32 4>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 0, i32 3>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__7_u_4_4() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_u_4_4:
+define void @s_shuffle_v4p0_v4p0__7_7_1_3() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_1_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s10
; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: s_mov_b32 s14, s4
-; GFX900-NEXT: s_mov_b32 s15, s5
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: s_mov_b32 s14, s18
+; GFX900-NEXT: s_mov_b32 s15, s19
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_u_4_4:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_1_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s10
; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: s_mov_b32 s14, s4
-; GFX90A-NEXT: s_mov_b32 s15, s5
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: s_mov_b32 s14, s18
+; GFX90A-NEXT: s_mov_b32 s15, s19
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_u_4_4:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_1_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 poison, i32 4, i32 4>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 1, i32 3>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__7_0_4_4() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_0_4_4:
+define void @s_shuffle_v4p0_v4p0__7_7_2_3() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_2_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[16:23]
+; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s22
-; GFX900-NEXT: s_mov_b32 s9, s23
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
-; GFX900-NEXT: s_mov_b32 s12, s16
-; GFX900-NEXT: s_mov_b32 s13, s17
-; GFX900-NEXT: s_mov_b32 s14, s16
-; GFX900-NEXT: s_mov_b32 s15, s17
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_0_4_4:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_2_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[16:23]
+; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s22
-; GFX90A-NEXT: s_mov_b32 s9, s23
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
-; GFX90A-NEXT: s_mov_b32 s12, s16
-; GFX90A-NEXT: s_mov_b32 s13, s17
-; GFX90A-NEXT: s_mov_b32 s14, s16
-; GFX90A-NEXT: s_mov_b32 s15, s17
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_0_4_4:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_2_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[4:11]
+; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s4
-; GFX942-NEXT: s_mov_b32 s13, s5
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 0, i32 4, i32 4>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 2, i32 3>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__7_1_4_4() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_1_4_4:
+define void @s_shuffle_v4p0_v4p0__7_7_4_3() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_4_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[16:23]
+; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s22
-; GFX900-NEXT: s_mov_b32 s9, s23
-; GFX900-NEXT: s_mov_b32 s12, s16
-; GFX900-NEXT: s_mov_b32 s13, s17
-; GFX900-NEXT: s_mov_b32 s14, s16
-; GFX900-NEXT: s_mov_b32 s15, s17
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s12, s4
+; GFX900-NEXT: s_mov_b32 s13, s5
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_1_4_4:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_4_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[16:23]
+; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s22
-; GFX90A-NEXT: s_mov_b32 s9, s23
-; GFX90A-NEXT: s_mov_b32 s12, s16
-; GFX90A-NEXT: s_mov_b32 s13, s17
-; GFX90A-NEXT: s_mov_b32 s14, s16
-; GFX90A-NEXT: s_mov_b32 s15, s17
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s12, s4
+; GFX90A-NEXT: s_mov_b32 s13, s5
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_1_4_4:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_4_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -21017,25 +20642,22 @@ define void @s_shuffle_v4p0_v4p0__7_1_4_4() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 1, i32 4, i32 4>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 4, i32 3>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__7_2_4_4() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_2_4_4:
+define void @s_shuffle_v4p0_v4p0__7_7_5_3() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_5_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
@@ -21046,18 +20668,14 @@ define void @s_shuffle_v4p0_v4p0__7_2_4_4() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s10
; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s10, s12
-; GFX900-NEXT: s_mov_b32 s11, s13
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: s_mov_b32 s14, s4
-; GFX900-NEXT: s_mov_b32 s15, s5
+; GFX900-NEXT: s_mov_b32 s12, s6
+; GFX900-NEXT: s_mov_b32 s13, s7
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_2_4_4:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_5_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
@@ -21068,304 +20686,240 @@ define void @s_shuffle_v4p0_v4p0__7_2_4_4() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s10
; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s10, s12
-; GFX90A-NEXT: s_mov_b32 s11, s13
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: s_mov_b32 s14, s4
-; GFX90A-NEXT: s_mov_b32 s15, s5
+; GFX90A-NEXT: s_mov_b32 s12, s6
+; GFX90A-NEXT: s_mov_b32 s13, s7
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_2_4_4:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_5_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[16:23]
+; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s22
-; GFX942-NEXT: s_mov_b32 s9, s23
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s16
-; GFX942-NEXT: s_mov_b32 s13, s17
-; GFX942-NEXT: s_mov_b32 s14, s16
-; GFX942-NEXT: s_mov_b32 s15, s17
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 2, i32 4, i32 4>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 5, i32 3>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__7_3_4_4() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_3_4_4:
+define void @s_shuffle_v4p0_v4p0__7_7_6_3() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_6_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[16:23]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s22
-; GFX900-NEXT: s_mov_b32 s9, s23
-; GFX900-NEXT: s_mov_b32 s12, s16
-; GFX900-NEXT: s_mov_b32 s13, s17
-; GFX900-NEXT: s_mov_b32 s14, s16
-; GFX900-NEXT: s_mov_b32 s15, s17
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s10, s14
+; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: s_mov_b32 s14, s18
+; GFX900-NEXT: s_mov_b32 s15, s19
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_3_4_4:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_6_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[16:23]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s22
-; GFX90A-NEXT: s_mov_b32 s9, s23
-; GFX90A-NEXT: s_mov_b32 s12, s16
-; GFX90A-NEXT: s_mov_b32 s13, s17
-; GFX90A-NEXT: s_mov_b32 s14, s16
-; GFX90A-NEXT: s_mov_b32 s15, s17
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s10, s14
+; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: s_mov_b32 s14, s18
+; GFX90A-NEXT: s_mov_b32 s15, s19
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_3_4_4:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_6_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[16:23]
+; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s22
-; GFX942-NEXT: s_mov_b32 s9, s23
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s16
-; GFX942-NEXT: s_mov_b32 s13, s17
-; GFX942-NEXT: s_mov_b32 s14, s16
-; GFX942-NEXT: s_mov_b32 s15, s17
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 3, i32 4, i32 4>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 6, i32 3>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__7_5_4_4() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_5_4_4:
+define void @s_shuffle_v4p0_v4p0__u_4_4_4() {
+; GFX9-LABEL: s_shuffle_v4p0_v4p0__u_4_4_4:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> <i32 poison, i32 4, i32 4, i32 4>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4p0_v4p0__0_4_4_4() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__0_4_4_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s10, s6
-; GFX900-NEXT: s_mov_b32 s11, s7
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: s_mov_b32 s14, s4
-; GFX900-NEXT: s_mov_b32 s15, s5
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_5_4_4:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__0_4_4_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s10, s6
-; GFX90A-NEXT: s_mov_b32 s11, s7
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: s_mov_b32 s14, s4
-; GFX90A-NEXT: s_mov_b32 s15, s5
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_5_4_4:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__0_4_4_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s2
-; GFX942-NEXT: s_mov_b32 s11, s3
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 5, i32 4, i32 4>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> <i32 0, i32 4, i32 4, i32 4>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__7_6_4_4() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_6_4_4:
+define void @s_shuffle_v4p0_v4p0__1_4_4_4() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__1_4_4_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[16:23]
+; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s22
-; GFX900-NEXT: s_mov_b32 s9, s23
-; GFX900-NEXT: s_mov_b32 s10, s20
-; GFX900-NEXT: s_mov_b32 s11, s21
-; GFX900-NEXT: s_mov_b32 s12, s16
-; GFX900-NEXT: s_mov_b32 s13, s17
-; GFX900-NEXT: s_mov_b32 s14, s16
-; GFX900-NEXT: s_mov_b32 s15, s17
+; GFX900-NEXT: s_mov_b32 s8, s6
+; GFX900-NEXT: s_mov_b32 s9, s7
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_6_4_4:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__1_4_4_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[16:23]
+; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s22
-; GFX90A-NEXT: s_mov_b32 s9, s23
-; GFX90A-NEXT: s_mov_b32 s10, s20
-; GFX90A-NEXT: s_mov_b32 s11, s21
-; GFX90A-NEXT: s_mov_b32 s12, s16
-; GFX90A-NEXT: s_mov_b32 s13, s17
-; GFX90A-NEXT: s_mov_b32 s14, s16
-; GFX90A-NEXT: s_mov_b32 s15, s17
+; GFX90A-NEXT: s_mov_b32 s8, s6
+; GFX90A-NEXT: s_mov_b32 s9, s7
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_6_4_4:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__1_4_4_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 6, i32 4, i32 4>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> <i32 1, i32 4, i32 4, i32 4>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__7_7_4_4() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_4_4:
+define void @s_shuffle_v4p0_v4p0__2_4_4_4() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__2_4_4_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: s_mov_b32 s14, s4
-; GFX900-NEXT: s_mov_b32 s15, s5
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_4_4:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__2_4_4_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: s_mov_b32 s14, s4
-; GFX90A-NEXT: s_mov_b32 s15, s5
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_4_4:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__2_4_4_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 4, i32 4>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> <i32 2, i32 4, i32 4, i32 4>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__7_7_u_4() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_u_4:
+define void @s_shuffle_v4p0_v4p0__3_4_4_4() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__3_4_4_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
@@ -21373,14 +20927,12 @@ define void @s_shuffle_v4p0_v4p0__7_7_u_4() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s10
; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s14, s4
-; GFX900-NEXT: s_mov_b32 s15, s5
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_u_4:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__3_4_4_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
@@ -21388,190 +20940,171 @@ define void @s_shuffle_v4p0_v4p0__7_7_u_4() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s10
; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s14, s4
-; GFX90A-NEXT: s_mov_b32 s15, s5
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_u_4:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__3_4_4_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 poison, i32 4>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> <i32 3, i32 4, i32 4, i32 4>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__7_7_0_4() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_0_4:
+define void @s_shuffle_v4p0_v4p0__4_4_4_4() {
+; GFX9-LABEL: s_shuffle_v4p0_v4p0__4_4_4_4:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> <i32 4, i32 4, i32 4, i32 4>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4p0_v4p0__5_4_4_4() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__5_4_4_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[16:23]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s22
-; GFX900-NEXT: s_mov_b32 s9, s23
-; GFX900-NEXT: s_mov_b32 s10, s22
-; GFX900-NEXT: s_mov_b32 s11, s23
+; GFX900-NEXT: s_mov_b32 s8, s6
+; GFX900-NEXT: s_mov_b32 s9, s7
+; GFX900-NEXT: s_mov_b32 s10, s4
+; GFX900-NEXT: s_mov_b32 s11, s5
; GFX900-NEXT: s_mov_b32 s12, s4
; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: s_mov_b32 s14, s16
-; GFX900-NEXT: s_mov_b32 s15, s17
+; GFX900-NEXT: s_mov_b32 s14, s4
+; GFX900-NEXT: s_mov_b32 s15, s5
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_0_4:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__5_4_4_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[16:23]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s22
-; GFX90A-NEXT: s_mov_b32 s9, s23
-; GFX90A-NEXT: s_mov_b32 s10, s22
-; GFX90A-NEXT: s_mov_b32 s11, s23
+; GFX90A-NEXT: s_mov_b32 s8, s6
+; GFX90A-NEXT: s_mov_b32 s9, s7
+; GFX90A-NEXT: s_mov_b32 s10, s4
+; GFX90A-NEXT: s_mov_b32 s11, s5
; GFX90A-NEXT: s_mov_b32 s12, s4
; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: s_mov_b32 s14, s16
-; GFX90A-NEXT: s_mov_b32 s15, s17
+; GFX90A-NEXT: s_mov_b32 s14, s4
+; GFX90A-NEXT: s_mov_b32 s15, s5
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_0_4:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__5_4_4_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[4:11]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 0, i32 4>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 5, i32 4, i32 4, i32 4>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__7_7_1_4() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_1_4:
+define void @s_shuffle_v4p0_v4p0__6_4_4_4() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__6_4_4_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[16:23]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s22
-; GFX900-NEXT: s_mov_b32 s9, s23
-; GFX900-NEXT: s_mov_b32 s10, s22
-; GFX900-NEXT: s_mov_b32 s11, s23
-; GFX900-NEXT: s_mov_b32 s12, s6
-; GFX900-NEXT: s_mov_b32 s13, s7
-; GFX900-NEXT: s_mov_b32 s14, s16
-; GFX900-NEXT: s_mov_b32 s15, s17
+; GFX900-NEXT: s_mov_b32 s10, s4
+; GFX900-NEXT: s_mov_b32 s11, s5
+; GFX900-NEXT: s_mov_b32 s12, s4
+; GFX900-NEXT: s_mov_b32 s13, s5
+; GFX900-NEXT: s_mov_b32 s14, s4
+; GFX900-NEXT: s_mov_b32 s15, s5
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_1_4:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__6_4_4_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[16:23]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s22
-; GFX90A-NEXT: s_mov_b32 s9, s23
-; GFX90A-NEXT: s_mov_b32 s10, s22
-; GFX90A-NEXT: s_mov_b32 s11, s23
-; GFX90A-NEXT: s_mov_b32 s12, s6
-; GFX90A-NEXT: s_mov_b32 s13, s7
-; GFX90A-NEXT: s_mov_b32 s14, s16
-; GFX90A-NEXT: s_mov_b32 s15, s17
+; GFX90A-NEXT: s_mov_b32 s10, s4
+; GFX90A-NEXT: s_mov_b32 s11, s5
+; GFX90A-NEXT: s_mov_b32 s12, s4
+; GFX90A-NEXT: s_mov_b32 s13, s5
+; GFX90A-NEXT: s_mov_b32 s14, s4
+; GFX90A-NEXT: s_mov_b32 s15, s5
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_1_4:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__6_4_4_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[4:11]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 1, i32 4>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 6, i32 4, i32 4, i32 4>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__7_7_2_4() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_2_4:
+define void @s_shuffle_v4p0_v4p0__7_4_4_4() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_4_4_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s10
; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s10, s4
+; GFX900-NEXT: s_mov_b32 s11, s5
+; GFX900-NEXT: s_mov_b32 s12, s4
+; GFX900-NEXT: s_mov_b32 s13, s5
; GFX900-NEXT: s_mov_b32 s14, s4
; GFX900-NEXT: s_mov_b32 s15, s5
; GFX900-NEXT: ;;#ASMSTART
@@ -21579,17 +21112,18 @@ define void @s_shuffle_v4p0_v4p0__7_7_2_4() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_2_4:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_4_4_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s10
; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s10, s4
+; GFX90A-NEXT: s_mov_b32 s11, s5
+; GFX90A-NEXT: s_mov_b32 s12, s4
+; GFX90A-NEXT: s_mov_b32 s13, s5
; GFX90A-NEXT: s_mov_b32 s14, s4
; GFX90A-NEXT: s_mov_b32 s15, s5
; GFX90A-NEXT: ;;#ASMSTART
@@ -21597,46 +21131,38 @@ define void @s_shuffle_v4p0_v4p0__7_7_2_4() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_2_4:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_4_4_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 2, i32 4>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 4, i32 4, i32 4>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__7_7_3_4() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_3_4:
+define void @s_shuffle_v4p0_v4p0__7_u_4_4() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_u_4_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s10
; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: s_mov_b32 s12, s4
+; GFX900-NEXT: s_mov_b32 s13, s5
; GFX900-NEXT: s_mov_b32 s14, s4
; GFX900-NEXT: s_mov_b32 s15, s5
; GFX900-NEXT: ;;#ASMSTART
@@ -21644,19 +21170,16 @@ define void @s_shuffle_v4p0_v4p0__7_7_3_4() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_3_4:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_u_4_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s10
; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: s_mov_b32 s12, s4
+; GFX90A-NEXT: s_mov_b32 s13, s5
; GFX90A-NEXT: s_mov_b32 s14, s4
; GFX90A-NEXT: s_mov_b32 s15, s5
; GFX90A-NEXT: ;;#ASMSTART
@@ -21664,107 +21187,110 @@ define void @s_shuffle_v4p0_v4p0__7_7_3_4() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_3_4:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_u_4_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[16:23]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s22
-; GFX942-NEXT: s_mov_b32 s9, s23
-; GFX942-NEXT: s_mov_b32 s10, s22
-; GFX942-NEXT: s_mov_b32 s11, s23
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
-; GFX942-NEXT: s_mov_b32 s14, s16
-; GFX942-NEXT: s_mov_b32 s15, s17
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 3, i32 4>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 poison, i32 4, i32 4>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__7_7_5_4() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_5_4:
+define void @s_shuffle_v4p0_v4p0__7_0_4_4() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_0_4_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s12, s6
-; GFX900-NEXT: s_mov_b32 s13, s7
-; GFX900-NEXT: s_mov_b32 s14, s4
-; GFX900-NEXT: s_mov_b32 s15, s5
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[16:23]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s22
+; GFX900-NEXT: s_mov_b32 s9, s23
+; GFX900-NEXT: s_mov_b32 s10, s4
+; GFX900-NEXT: s_mov_b32 s11, s5
+; GFX900-NEXT: s_mov_b32 s12, s16
+; GFX900-NEXT: s_mov_b32 s13, s17
+; GFX900-NEXT: s_mov_b32 s14, s16
+; GFX900-NEXT: s_mov_b32 s15, s17
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_5_4:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_0_4_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s12, s6
-; GFX90A-NEXT: s_mov_b32 s13, s7
-; GFX90A-NEXT: s_mov_b32 s14, s4
-; GFX90A-NEXT: s_mov_b32 s15, s5
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[16:23]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s22
+; GFX90A-NEXT: s_mov_b32 s9, s23
+; GFX90A-NEXT: s_mov_b32 s10, s4
+; GFX90A-NEXT: s_mov_b32 s11, s5
+; GFX90A-NEXT: s_mov_b32 s12, s16
+; GFX90A-NEXT: s_mov_b32 s13, s17
+; GFX90A-NEXT: s_mov_b32 s14, s16
+; GFX90A-NEXT: s_mov_b32 s15, s17
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_5_4:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_0_4_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[4:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 5, i32 4>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 0, i32 4, i32 4>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__7_7_6_4() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_6_4:
+define void @s_shuffle_v4p0_v4p0__7_1_4_4() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_1_4_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[16:23]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s22
; GFX900-NEXT: s_mov_b32 s9, s23
-; GFX900-NEXT: s_mov_b32 s10, s22
-; GFX900-NEXT: s_mov_b32 s11, s23
-; GFX900-NEXT: s_mov_b32 s12, s20
-; GFX900-NEXT: s_mov_b32 s13, s21
+; GFX900-NEXT: s_mov_b32 s12, s16
+; GFX900-NEXT: s_mov_b32 s13, s17
; GFX900-NEXT: s_mov_b32 s14, s16
; GFX900-NEXT: s_mov_b32 s15, s17
; GFX900-NEXT: ;;#ASMSTART
@@ -21772,18 +21298,19 @@ define void @s_shuffle_v4p0_v4p0__7_7_6_4() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_6_4:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_1_4_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[16:23]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s22
; GFX90A-NEXT: s_mov_b32 s9, s23
-; GFX90A-NEXT: s_mov_b32 s10, s22
-; GFX90A-NEXT: s_mov_b32 s11, s23
-; GFX90A-NEXT: s_mov_b32 s12, s20
-; GFX90A-NEXT: s_mov_b32 s13, s21
+; GFX90A-NEXT: s_mov_b32 s12, s16
+; GFX90A-NEXT: s_mov_b32 s13, s17
; GFX90A-NEXT: s_mov_b32 s14, s16
; GFX90A-NEXT: s_mov_b32 s15, s17
; GFX90A-NEXT: ;;#ASMSTART
@@ -21791,65 +21318,1353 @@ define void @s_shuffle_v4p0_v4p0__7_7_6_4() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_6_4:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_1_4_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s4
-; GFX942-NEXT: s_mov_b32 s13, s5
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 6, i32 4>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 1, i32 4, i32 4>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4p0_v4p0__7_2_4_4() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_2_4_4:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s12, s4
+; GFX900-NEXT: s_mov_b32 s13, s5
+; GFX900-NEXT: s_mov_b32 s14, s4
+; GFX900-NEXT: s_mov_b32 s15, s5
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_2_4_4:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s12, s4
+; GFX90A-NEXT: s_mov_b32 s13, s5
+; GFX90A-NEXT: s_mov_b32 s14, s4
+; GFX90A-NEXT: s_mov_b32 s15, s5
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_2_4_4:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[16:23]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[22:23]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[16:17]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[16:17]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x ptr> asm "; def $0", "=s"()
+ %vec1 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 2, i32 4, i32 4>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4p0_v4p0__7_3_4_4() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_3_4_4:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[16:23]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s22
+; GFX900-NEXT: s_mov_b32 s9, s23
+; GFX900-NEXT: s_mov_b32 s12, s16
+; GFX900-NEXT: s_mov_b32 s13, s17
+; GFX900-NEXT: s_mov_b32 s14, s16
+; GFX900-NEXT: s_mov_b32 s15, s17
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_3_4_4:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[16:23]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s22
+; GFX90A-NEXT: s_mov_b32 s9, s23
+; GFX90A-NEXT: s_mov_b32 s12, s16
+; GFX90A-NEXT: s_mov_b32 s13, s17
+; GFX90A-NEXT: s_mov_b32 s14, s16
+; GFX90A-NEXT: s_mov_b32 s15, s17
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_3_4_4:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[16:23]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[22:23]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[16:17]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[16:17]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x ptr> asm "; def $0", "=s"()
+ %vec1 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 3, i32 4, i32 4>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4p0_v4p0__7_5_4_4() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_5_4_4:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s10, s6
+; GFX900-NEXT: s_mov_b32 s11, s7
+; GFX900-NEXT: s_mov_b32 s12, s4
+; GFX900-NEXT: s_mov_b32 s13, s5
+; GFX900-NEXT: s_mov_b32 s14, s4
+; GFX900-NEXT: s_mov_b32 s15, s5
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_5_4_4:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s10, s6
+; GFX90A-NEXT: s_mov_b32 s11, s7
+; GFX90A-NEXT: s_mov_b32 s12, s4
+; GFX90A-NEXT: s_mov_b32 s13, s5
+; GFX90A-NEXT: s_mov_b32 s14, s4
+; GFX90A-NEXT: s_mov_b32 s15, s5
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_5_4_4:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x ptr> asm "; def $0", "=s"()
+ %vec1 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 5, i32 4, i32 4>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4p0_v4p0__7_6_4_4() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_6_4_4:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[16:23]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s22
+; GFX900-NEXT: s_mov_b32 s9, s23
+; GFX900-NEXT: s_mov_b32 s10, s20
+; GFX900-NEXT: s_mov_b32 s11, s21
+; GFX900-NEXT: s_mov_b32 s12, s16
+; GFX900-NEXT: s_mov_b32 s13, s17
+; GFX900-NEXT: s_mov_b32 s14, s16
+; GFX900-NEXT: s_mov_b32 s15, s17
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_6_4_4:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[16:23]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s22
+; GFX90A-NEXT: s_mov_b32 s9, s23
+; GFX90A-NEXT: s_mov_b32 s10, s20
+; GFX90A-NEXT: s_mov_b32 s11, s21
+; GFX90A-NEXT: s_mov_b32 s12, s16
+; GFX90A-NEXT: s_mov_b32 s13, s17
+; GFX90A-NEXT: s_mov_b32 s14, s16
+; GFX90A-NEXT: s_mov_b32 s15, s17
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_6_4_4:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x ptr> asm "; def $0", "=s"()
+ %vec1 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 6, i32 4, i32 4>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4p0_v4p0__7_7_4_4() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_4_4:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s12, s4
+; GFX900-NEXT: s_mov_b32 s13, s5
+; GFX900-NEXT: s_mov_b32 s14, s4
+; GFX900-NEXT: s_mov_b32 s15, s5
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_4_4:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s12, s4
+; GFX90A-NEXT: s_mov_b32 s13, s5
+; GFX90A-NEXT: s_mov_b32 s14, s4
+; GFX90A-NEXT: s_mov_b32 s15, s5
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_4_4:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x ptr> asm "; def $0", "=s"()
+ %vec1 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 4, i32 4>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4p0_v4p0__7_7_u_4() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_u_4:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s14, s4
+; GFX900-NEXT: s_mov_b32 s15, s5
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_u_4:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s14, s4
+; GFX90A-NEXT: s_mov_b32 s15, s5
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_u_4:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x ptr> asm "; def $0", "=s"()
+ %vec1 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 poison, i32 4>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4p0_v4p0__7_7_0_4() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_0_4:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[16:23]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s22
+; GFX900-NEXT: s_mov_b32 s9, s23
+; GFX900-NEXT: s_mov_b32 s10, s22
+; GFX900-NEXT: s_mov_b32 s11, s23
+; GFX900-NEXT: s_mov_b32 s12, s4
+; GFX900-NEXT: s_mov_b32 s13, s5
+; GFX900-NEXT: s_mov_b32 s14, s16
+; GFX900-NEXT: s_mov_b32 s15, s17
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_0_4:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[16:23]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s22
+; GFX90A-NEXT: s_mov_b32 s9, s23
+; GFX90A-NEXT: s_mov_b32 s10, s22
+; GFX90A-NEXT: s_mov_b32 s11, s23
+; GFX90A-NEXT: s_mov_b32 s12, s4
+; GFX90A-NEXT: s_mov_b32 s13, s5
+; GFX90A-NEXT: s_mov_b32 s14, s16
+; GFX90A-NEXT: s_mov_b32 s15, s17
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_0_4:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[4:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x ptr> asm "; def $0", "=s"()
+ %vec1 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 0, i32 4>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4p0_v4p0__7_7_1_4() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_1_4:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[16:23]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s22
+; GFX900-NEXT: s_mov_b32 s9, s23
+; GFX900-NEXT: s_mov_b32 s10, s22
+; GFX900-NEXT: s_mov_b32 s11, s23
+; GFX900-NEXT: s_mov_b32 s12, s6
+; GFX900-NEXT: s_mov_b32 s13, s7
+; GFX900-NEXT: s_mov_b32 s14, s16
+; GFX900-NEXT: s_mov_b32 s15, s17
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_1_4:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[16:23]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s22
+; GFX90A-NEXT: s_mov_b32 s9, s23
+; GFX90A-NEXT: s_mov_b32 s10, s22
+; GFX90A-NEXT: s_mov_b32 s11, s23
+; GFX90A-NEXT: s_mov_b32 s12, s6
+; GFX90A-NEXT: s_mov_b32 s13, s7
+; GFX90A-NEXT: s_mov_b32 s14, s16
+; GFX90A-NEXT: s_mov_b32 s15, s17
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_1_4:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[4:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x ptr> asm "; def $0", "=s"()
+ %vec1 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 1, i32 4>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4p0_v4p0__7_7_2_4() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_2_4:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s14, s4
+; GFX900-NEXT: s_mov_b32 s15, s5
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_2_4:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s14, s4
+; GFX90A-NEXT: s_mov_b32 s15, s5
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_2_4:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x ptr> asm "; def $0", "=s"()
+ %vec1 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 2, i32 4>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4p0_v4p0__7_7_3_4() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_3_4:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: s_mov_b32 s14, s4
+; GFX900-NEXT: s_mov_b32 s15, s5
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_3_4:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: s_mov_b32 s14, s4
+; GFX90A-NEXT: s_mov_b32 s15, s5
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_3_4:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[16:23]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[22:23]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[22:23]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[16:17]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x ptr> asm "; def $0", "=s"()
+ %vec1 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 3, i32 4>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4p0_v4p0__7_7_5_4() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_5_4:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s12, s6
+; GFX900-NEXT: s_mov_b32 s13, s7
+; GFX900-NEXT: s_mov_b32 s14, s4
+; GFX900-NEXT: s_mov_b32 s15, s5
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_5_4:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s12, s6
+; GFX90A-NEXT: s_mov_b32 s13, s7
+; GFX90A-NEXT: s_mov_b32 s14, s4
+; GFX90A-NEXT: s_mov_b32 s15, s5
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_5_4:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x ptr> asm "; def $0", "=s"()
+ %vec1 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 5, i32 4>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4p0_v4p0__7_7_6_4() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_6_4:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[16:23]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s22
+; GFX900-NEXT: s_mov_b32 s9, s23
+; GFX900-NEXT: s_mov_b32 s10, s22
+; GFX900-NEXT: s_mov_b32 s11, s23
+; GFX900-NEXT: s_mov_b32 s12, s20
+; GFX900-NEXT: s_mov_b32 s13, s21
+; GFX900-NEXT: s_mov_b32 s14, s16
+; GFX900-NEXT: s_mov_b32 s15, s17
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_6_4:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[16:23]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s22
+; GFX90A-NEXT: s_mov_b32 s9, s23
+; GFX90A-NEXT: s_mov_b32 s10, s22
+; GFX90A-NEXT: s_mov_b32 s11, s23
+; GFX90A-NEXT: s_mov_b32 s12, s20
+; GFX90A-NEXT: s_mov_b32 s13, s21
+; GFX90A-NEXT: s_mov_b32 s14, s16
+; GFX90A-NEXT: s_mov_b32 s15, s17
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_6_4:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x ptr> asm "; def $0", "=s"()
+ %vec1 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 6, i32 4>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
define void @s_shuffle_v4p0_v4p0__u_5_5_5() {
-; GFX9-LABEL: s_shuffle_v4p0_v4p0__u_5_5_5:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: s_mov_b32 s14, s10
-; GFX9-NEXT: s_mov_b32 s15, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__u_5_5_5:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: s_mov_b32 s14, s10
+; GFX900-NEXT: s_mov_b32 s15, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__u_5_5_5:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: s_mov_b32 s14, s10
+; GFX90A-NEXT: s_mov_b32 s15, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__u_5_5_5:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x ptr> asm "; def $0", "=s"()
+ %vec1 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 poison, i32 5, i32 5, i32 5>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4p0_v4p0__0_5_5_5() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__0_5_5_5:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s14
+; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__0_5_5_5:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s14
+; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__0_5_5_5:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x ptr> asm "; def $0", "=s"()
+ %vec1 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 0, i32 5, i32 5, i32 5>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4p0_v4p0__1_5_5_5() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__1_5_5_5:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s6
+; GFX900-NEXT: s_mov_b32 s9, s7
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: s_mov_b32 s14, s10
+; GFX900-NEXT: s_mov_b32 s15, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__1_5_5_5:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s6
+; GFX90A-NEXT: s_mov_b32 s9, s7
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: s_mov_b32 s14, s10
+; GFX90A-NEXT: s_mov_b32 s15, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__1_5_5_5:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x ptr> asm "; def $0", "=s"()
+ %vec1 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 1, i32 5, i32 5, i32 5>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4p0_v4p0__2_5_5_5() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__2_5_5_5:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s16
+; GFX900-NEXT: s_mov_b32 s9, s17
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: s_mov_b32 s14, s10
+; GFX900-NEXT: s_mov_b32 s15, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__2_5_5_5:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s16
+; GFX90A-NEXT: s_mov_b32 s9, s17
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: s_mov_b32 s14, s10
+; GFX90A-NEXT: s_mov_b32 s15, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__2_5_5_5:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x ptr> asm "; def $0", "=s"()
+ %vec1 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 2, i32 5, i32 5, i32 5>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4p0_v4p0__3_5_5_5() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__3_5_5_5:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s18
+; GFX900-NEXT: s_mov_b32 s9, s19
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: s_mov_b32 s14, s10
+; GFX900-NEXT: s_mov_b32 s15, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__3_5_5_5:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s18
+; GFX90A-NEXT: s_mov_b32 s9, s19
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: s_mov_b32 s14, s10
+; GFX90A-NEXT: s_mov_b32 s15, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__3_5_5_5:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x ptr> asm "; def $0", "=s"()
+ %vec1 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 3, i32 5, i32 5, i32 5>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4p0_v4p0__4_5_5_5() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__4_5_5_5:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: s_mov_b32 s14, s10
+; GFX900-NEXT: s_mov_b32 s15, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__4_5_5_5:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: s_mov_b32 s14, s10
+; GFX90A-NEXT: s_mov_b32 s15, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__4_5_5_5:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x ptr> asm "; def $0", "=s"()
+ %vec1 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 4, i32 5, i32 5, i32 5>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4p0_v4p0__5_5_5_5() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__5_5_5_5:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: s_mov_b32 s14, s10
+; GFX900-NEXT: s_mov_b32 s15, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__5_5_5_5:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: s_mov_b32 s14, s10
+; GFX90A-NEXT: s_mov_b32 s15, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__5_5_5_5:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x ptr> asm "; def $0", "=s"()
+ %vec1 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 5, i32 5, i32 5, i32 5>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4p0_v4p0__6_5_5_5() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__6_5_5_5:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s12
+; GFX900-NEXT: s_mov_b32 s9, s13
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: s_mov_b32 s14, s10
+; GFX900-NEXT: s_mov_b32 s15, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__6_5_5_5:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s12
+; GFX90A-NEXT: s_mov_b32 s9, s13
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: s_mov_b32 s14, s10
+; GFX90A-NEXT: s_mov_b32 s15, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__6_5_5_5:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x ptr> asm "; def $0", "=s"()
+ %vec1 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 6, i32 5, i32 5, i32 5>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4p0_v4p0__7_5_5_5() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_5_5_5:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: s_mov_b32 s14, s10
+; GFX900-NEXT: s_mov_b32 s15, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_5_5_5:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: s_mov_b32 s14, s10
+; GFX90A-NEXT: s_mov_b32 s15, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_5_5_5:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x ptr> asm "; def $0", "=s"()
+ %vec1 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 5, i32 5, i32 5>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4p0_v4p0__7_u_5_5() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_u_5_5:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s12, s6
+; GFX900-NEXT: s_mov_b32 s13, s7
+; GFX900-NEXT: s_mov_b32 s14, s6
+; GFX900-NEXT: s_mov_b32 s15, s7
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_u_5_5:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s12, s6
+; GFX90A-NEXT: s_mov_b32 s13, s7
+; GFX90A-NEXT: s_mov_b32 s14, s6
+; GFX90A-NEXT: s_mov_b32 s15, s7
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_u_5_5:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 poison, i32 5, i32 5, i32 5>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 poison, i32 5, i32 5>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__0_5_5_5() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__0_5_5_5:
+define void @s_shuffle_v4p0_v4p0__7_0_5_5() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_0_5_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s10, s14
-; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: s_mov_b32 s8, s18
+; GFX900-NEXT: s_mov_b32 s9, s19
+; GFX900-NEXT: s_mov_b32 s10, s4
+; GFX900-NEXT: s_mov_b32 s11, s5
; GFX900-NEXT: s_mov_b32 s12, s14
; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
@@ -21857,17 +22672,19 @@ define void @s_shuffle_v4p0_v4p0__0_5_5_5() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__0_5_5_5:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_0_5_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s10, s14
-; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: s_mov_b32 s8, s18
+; GFX90A-NEXT: s_mov_b32 s9, s19
+; GFX90A-NEXT: s_mov_b32 s10, s4
+; GFX90A-NEXT: s_mov_b32 s11, s5
; GFX90A-NEXT: s_mov_b32 s12, s14
; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
@@ -21875,74 +22692,69 @@ define void @s_shuffle_v4p0_v4p0__0_5_5_5() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__0_5_5_5:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_0_5_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s2
-; GFX942-NEXT: s_mov_b32 s11, s3
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 0, i32 5, i32 5, i32 5>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 0, i32 5, i32 5>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__1_5_5_5() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__1_5_5_5:
+define void @s_shuffle_v4p0_v4p0__7_1_5_5() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_1_5_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s6
-; GFX900-NEXT: s_mov_b32 s9, s7
-; GFX900-NEXT: s_mov_b32 s12, s10
-; GFX900-NEXT: s_mov_b32 s13, s11
-; GFX900-NEXT: s_mov_b32 s14, s10
-; GFX900-NEXT: s_mov_b32 s15, s11
+; GFX900-NEXT: s_mov_b32 s8, s18
+; GFX900-NEXT: s_mov_b32 s9, s19
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__1_5_5_5:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_1_5_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s6
-; GFX90A-NEXT: s_mov_b32 s9, s7
-; GFX90A-NEXT: s_mov_b32 s12, s10
-; GFX90A-NEXT: s_mov_b32 s13, s11
-; GFX90A-NEXT: s_mov_b32 s14, s10
-; GFX90A-NEXT: s_mov_b32 s15, s11
+; GFX90A-NEXT: s_mov_b32 s8, s18
+; GFX90A-NEXT: s_mov_b32 s9, s19
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__1_5_5_5:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_1_5_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -21951,253 +22763,150 @@ define void @s_shuffle_v4p0_v4p0__1_5_5_5() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s12, s10
-; GFX942-NEXT: s_mov_b32 s13, s11
-; GFX942-NEXT: s_mov_b32 s14, s10
-; GFX942-NEXT: s_mov_b32 s15, s11
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 1, i32 5, i32 5, i32 5>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 1, i32 5, i32 5>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__2_5_5_5() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__2_5_5_5:
+define void @s_shuffle_v4p0_v4p0__7_2_5_5() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_2_5_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s16
-; GFX900-NEXT: s_mov_b32 s9, s17
-; GFX900-NEXT: s_mov_b32 s12, s10
-; GFX900-NEXT: s_mov_b32 s13, s11
-; GFX900-NEXT: s_mov_b32 s14, s10
-; GFX900-NEXT: s_mov_b32 s15, s11
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s12, s6
+; GFX900-NEXT: s_mov_b32 s13, s7
+; GFX900-NEXT: s_mov_b32 s14, s6
+; GFX900-NEXT: s_mov_b32 s15, s7
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__2_5_5_5:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_2_5_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s16
-; GFX90A-NEXT: s_mov_b32 s9, s17
-; GFX90A-NEXT: s_mov_b32 s12, s10
-; GFX90A-NEXT: s_mov_b32 s13, s11
-; GFX90A-NEXT: s_mov_b32 s14, s10
-; GFX90A-NEXT: s_mov_b32 s15, s11
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s12, s6
+; GFX90A-NEXT: s_mov_b32 s13, s7
+; GFX90A-NEXT: s_mov_b32 s14, s6
+; GFX90A-NEXT: s_mov_b32 s15, s7
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__2_5_5_5:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_2_5_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ; def s[12:19]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s12, s10
-; GFX942-NEXT: s_mov_b32 s13, s11
-; GFX942-NEXT: s_mov_b32 s14, s10
-; GFX942-NEXT: s_mov_b32 s15, s11
+; GFX942-NEXT: s_mov_b64 s[8:9], s[18:19]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 2, i32 5, i32 5, i32 5>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 2, i32 5, i32 5>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__3_5_5_5() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__3_5_5_5:
+define void @s_shuffle_v4p0_v4p0__7_3_5_5() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_3_5_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s18
; GFX900-NEXT: s_mov_b32 s9, s19
-; GFX900-NEXT: s_mov_b32 s12, s10
-; GFX900-NEXT: s_mov_b32 s13, s11
-; GFX900-NEXT: s_mov_b32 s14, s10
-; GFX900-NEXT: s_mov_b32 s15, s11
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__3_5_5_5:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_3_5_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s18
; GFX90A-NEXT: s_mov_b32 s9, s19
-; GFX90A-NEXT: s_mov_b32 s12, s10
-; GFX90A-NEXT: s_mov_b32 s13, s11
-; GFX90A-NEXT: s_mov_b32 s14, s10
-; GFX90A-NEXT: s_mov_b32 s15, s11
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__3_5_5_5:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_3_5_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ; def s[12:19]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s12, s10
-; GFX942-NEXT: s_mov_b32 s13, s11
-; GFX942-NEXT: s_mov_b32 s14, s10
-; GFX942-NEXT: s_mov_b32 s15, s11
+; GFX942-NEXT: s_mov_b64 s[8:9], s[18:19]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 3, i32 5, i32 5, i32 5>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4p0_v4p0__4_5_5_5() {
-; GFX9-LABEL: s_shuffle_v4p0_v4p0__4_5_5_5:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: s_mov_b32 s14, s10
-; GFX9-NEXT: s_mov_b32 s15, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 4, i32 5, i32 5, i32 5>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4p0_v4p0__5_5_5_5() {
-; GFX9-LABEL: s_shuffle_v4p0_v4p0__5_5_5_5:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s10
-; GFX9-NEXT: s_mov_b32 s9, s11
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: s_mov_b32 s14, s10
-; GFX9-NEXT: s_mov_b32 s15, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 5, i32 5, i32 5, i32 5>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4p0_v4p0__6_5_5_5() {
-; GFX9-LABEL: s_shuffle_v4p0_v4p0__6_5_5_5:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s12
-; GFX9-NEXT: s_mov_b32 s9, s13
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: s_mov_b32 s14, s10
-; GFX9-NEXT: s_mov_b32 s15, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 6, i32 5, i32 5, i32 5>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4p0_v4p0__7_5_5_5() {
-; GFX9-LABEL: s_shuffle_v4p0_v4p0__7_5_5_5:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s14
-; GFX9-NEXT: s_mov_b32 s9, s15
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: s_mov_b32 s14, s10
-; GFX9-NEXT: s_mov_b32 s15, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 5, i32 5, i32 5>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 3, i32 5, i32 5>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__7_u_5_5() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_u_5_5:
+define void @s_shuffle_v4p0_v4p0__7_4_5_5() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_4_5_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
@@ -22205,6 +22914,8 @@ define void @s_shuffle_v4p0_v4p0__7_u_5_5() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s10
; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s10, s4
+; GFX900-NEXT: s_mov_b32 s11, s5
; GFX900-NEXT: s_mov_b32 s12, s6
; GFX900-NEXT: s_mov_b32 s13, s7
; GFX900-NEXT: s_mov_b32 s14, s6
@@ -22214,7 +22925,7 @@ define void @s_shuffle_v4p0_v4p0__7_u_5_5() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_u_5_5:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_4_5_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
@@ -22222,6 +22933,8 @@ define void @s_shuffle_v4p0_v4p0__7_u_5_5() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s10
; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s10, s4
+; GFX90A-NEXT: s_mov_b32 s11, s5
; GFX90A-NEXT: s_mov_b32 s12, s6
; GFX90A-NEXT: s_mov_b32 s13, s7
; GFX90A-NEXT: s_mov_b32 s14, s6
@@ -22231,43 +22944,38 @@ define void @s_shuffle_v4p0_v4p0__7_u_5_5() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_u_5_5:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_4_5_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 poison, i32 5, i32 5>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 4, i32 5, i32 5>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__7_0_5_5() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_0_5_5:
+define void @s_shuffle_v4p0_v4p0__7_6_5_5() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_6_5_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s18
; GFX900-NEXT: s_mov_b32 s9, s19
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
+; GFX900-NEXT: s_mov_b32 s10, s16
+; GFX900-NEXT: s_mov_b32 s11, s17
; GFX900-NEXT: s_mov_b32 s12, s14
; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
@@ -22275,19 +22983,16 @@ define void @s_shuffle_v4p0_v4p0__7_0_5_5() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_0_5_5:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_6_5_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s18
; GFX90A-NEXT: s_mov_b32 s9, s19
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
+; GFX90A-NEXT: s_mov_b32 s10, s16
+; GFX90A-NEXT: s_mov_b32 s11, s17
; GFX90A-NEXT: s_mov_b32 s12, s14
; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
@@ -22295,114 +23000,92 @@ define void @s_shuffle_v4p0_v4p0__7_0_5_5() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_0_5_5:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_6_5_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[4:11]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
-; GFX942-NEXT: s_mov_b32 s14, s6
-; GFX942-NEXT: s_mov_b32 s15, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 0, i32 5, i32 5>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 6, i32 5, i32 5>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__7_1_5_5() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_1_5_5:
+define void @s_shuffle_v4p0_v4p0__7_7_5_5() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_5_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s18
-; GFX900-NEXT: s_mov_b32 s9, s19
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s12, s6
+; GFX900-NEXT: s_mov_b32 s13, s7
+; GFX900-NEXT: s_mov_b32 s14, s6
+; GFX900-NEXT: s_mov_b32 s15, s7
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_1_5_5:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_5_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s18
-; GFX90A-NEXT: s_mov_b32 s9, s19
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s12, s6
+; GFX90A-NEXT: s_mov_b32 s13, s7
+; GFX90A-NEXT: s_mov_b32 s14, s6
+; GFX90A-NEXT: s_mov_b32 s15, s7
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_1_5_5:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_5_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 1, i32 5, i32 5>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 5, i32 5>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__7_2_5_5() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_2_5_5:
+define void @s_shuffle_v4p0_v4p0__7_7_u_5() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_u_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s10
; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s10, s12
-; GFX900-NEXT: s_mov_b32 s11, s13
-; GFX900-NEXT: s_mov_b32 s12, s6
-; GFX900-NEXT: s_mov_b32 s13, s7
; GFX900-NEXT: s_mov_b32 s14, s6
; GFX900-NEXT: s_mov_b32 s15, s7
; GFX900-NEXT: ;;#ASMSTART
@@ -22410,21 +23093,14 @@ define void @s_shuffle_v4p0_v4p0__7_2_5_5() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_2_5_5:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_u_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s10
; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s10, s12
-; GFX90A-NEXT: s_mov_b32 s11, s13
-; GFX90A-NEXT: s_mov_b32 s12, s6
-; GFX90A-NEXT: s_mov_b32 s13, s7
; GFX90A-NEXT: s_mov_b32 s14, s6
; GFX90A-NEXT: s_mov_b32 s15, s7
; GFX90A-NEXT: ;;#ASMSTART
@@ -22432,34 +23108,28 @@ define void @s_shuffle_v4p0_v4p0__7_2_5_5() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_2_5_5:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_u_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[12:19]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s18
-; GFX942-NEXT: s_mov_b32 s9, s19
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 2, i32 5, i32 5>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 poison, i32 5>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__7_3_5_5() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_3_5_5:
+define void @s_shuffle_v4p0_v4p0__7_7_0_5() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_0_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
@@ -22470,14 +23140,16 @@ define void @s_shuffle_v4p0_v4p0__7_3_5_5() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s18
; GFX900-NEXT: s_mov_b32 s9, s19
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: s_mov_b32 s10, s18
+; GFX900-NEXT: s_mov_b32 s11, s19
+; GFX900-NEXT: s_mov_b32 s12, s4
+; GFX900-NEXT: s_mov_b32 s13, s5
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_3_5_5:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_0_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
@@ -22488,174 +23160,176 @@ define void @s_shuffle_v4p0_v4p0__7_3_5_5() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s18
; GFX90A-NEXT: s_mov_b32 s9, s19
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: s_mov_b32 s10, s18
+; GFX90A-NEXT: s_mov_b32 s11, s19
+; GFX90A-NEXT: s_mov_b32 s12, s4
+; GFX90A-NEXT: s_mov_b32 s13, s5
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_3_5_5:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_0_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[12:19]
+; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s18
-; GFX942-NEXT: s_mov_b32 s9, s19
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 3, i32 5, i32 5>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 0, i32 5>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__7_4_5_5() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_4_5_5:
+define void @s_shuffle_v4p0_v4p0__7_7_1_5() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_1_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s18
+; GFX900-NEXT: s_mov_b32 s9, s19
+; GFX900-NEXT: s_mov_b32 s10, s18
+; GFX900-NEXT: s_mov_b32 s11, s19
; GFX900-NEXT: s_mov_b32 s12, s6
; GFX900-NEXT: s_mov_b32 s13, s7
-; GFX900-NEXT: s_mov_b32 s14, s6
-; GFX900-NEXT: s_mov_b32 s15, s7
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_4_5_5:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_1_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s18
+; GFX90A-NEXT: s_mov_b32 s9, s19
+; GFX90A-NEXT: s_mov_b32 s10, s18
+; GFX90A-NEXT: s_mov_b32 s11, s19
; GFX90A-NEXT: s_mov_b32 s12, s6
; GFX90A-NEXT: s_mov_b32 s13, s7
-; GFX90A-NEXT: s_mov_b32 s14, s6
-; GFX90A-NEXT: s_mov_b32 s15, s7
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_4_5_5:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_1_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[4:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 4, i32 5, i32 5>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 1, i32 5>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__7_6_5_5() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_6_5_5:
+define void @s_shuffle_v4p0_v4p0__7_7_2_5() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_2_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s18
-; GFX900-NEXT: s_mov_b32 s9, s19
-; GFX900-NEXT: s_mov_b32 s10, s16
-; GFX900-NEXT: s_mov_b32 s11, s17
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s14, s6
+; GFX900-NEXT: s_mov_b32 s15, s7
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_6_5_5:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_2_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s18
-; GFX90A-NEXT: s_mov_b32 s9, s19
-; GFX90A-NEXT: s_mov_b32 s10, s16
-; GFX90A-NEXT: s_mov_b32 s11, s17
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s14, s6
+; GFX90A-NEXT: s_mov_b32 s15, s7
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_6_5_5:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_2_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 6, i32 5, i32 5>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 2, i32 5>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__7_7_5_5() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_5_5:
+define void @s_shuffle_v4p0_v4p0__7_7_3_5() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_3_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s10
; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s12, s6
-; GFX900-NEXT: s_mov_b32 s13, s7
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: s_mov_b32 s14, s6
; GFX900-NEXT: s_mov_b32 s15, s7
; GFX900-NEXT: ;;#ASMSTART
@@ -22663,16 +23337,19 @@ define void @s_shuffle_v4p0_v4p0__7_7_5_5() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_5_5:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_3_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s10
; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s12, s6
-; GFX90A-NEXT: s_mov_b32 s13, s7
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: s_mov_b32 s14, s6
; GFX90A-NEXT: s_mov_b32 s15, s7
; GFX90A-NEXT: ;;#ASMSTART
@@ -22680,33 +23357,31 @@ define void @s_shuffle_v4p0_v4p0__7_7_5_5() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_5_5:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_3_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[12:19]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[18:19]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[18:19]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 5, i32 5>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 3, i32 5>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__7_7_u_5() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_u_5:
+define void @s_shuffle_v4p0_v4p0__7_7_4_5() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_4_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
@@ -22714,6 +23389,8 @@ define void @s_shuffle_v4p0_v4p0__7_7_u_5() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s10
; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s12, s4
+; GFX900-NEXT: s_mov_b32 s13, s5
; GFX900-NEXT: s_mov_b32 s14, s6
; GFX900-NEXT: s_mov_b32 s15, s7
; GFX900-NEXT: ;;#ASMSTART
@@ -22721,7 +23398,7 @@ define void @s_shuffle_v4p0_v4p0__7_7_u_5() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_u_5:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_4_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
@@ -22729,6 +23406,8 @@ define void @s_shuffle_v4p0_v4p0__7_7_u_5() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s10
; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s12, s4
+; GFX90A-NEXT: s_mov_b32 s13, s5
; GFX90A-NEXT: s_mov_b32 s14, s6
; GFX90A-NEXT: s_mov_b32 s15, s7
; GFX90A-NEXT: ;;#ASMSTART
@@ -22736,201 +23415,175 @@ define void @s_shuffle_v4p0_v4p0__7_7_u_5() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_u_5:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_4_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 poison, i32 5>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 4, i32 5>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__7_7_0_5() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_0_5:
+define void @s_shuffle_v4p0_v4p0__7_7_6_5() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_6_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s18
; GFX900-NEXT: s_mov_b32 s9, s19
; GFX900-NEXT: s_mov_b32 s10, s18
; GFX900-NEXT: s_mov_b32 s11, s19
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
+; GFX900-NEXT: s_mov_b32 s12, s16
+; GFX900-NEXT: s_mov_b32 s13, s17
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_0_5:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_6_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s18
; GFX90A-NEXT: s_mov_b32 s9, s19
; GFX90A-NEXT: s_mov_b32 s10, s18
; GFX90A-NEXT: s_mov_b32 s11, s19
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
+; GFX90A-NEXT: s_mov_b32 s12, s16
+; GFX90A-NEXT: s_mov_b32 s13, s17
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_0_5:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_6_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[4:11]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s6
-; GFX942-NEXT: s_mov_b32 s15, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 0, i32 5>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 6, i32 5>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__7_7_1_5() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_1_5:
+define void @s_shuffle_v4p0_v4p0__u_6_6_6() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__u_6_6_6:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s18
-; GFX900-NEXT: s_mov_b32 s9, s19
-; GFX900-NEXT: s_mov_b32 s10, s18
-; GFX900-NEXT: s_mov_b32 s11, s19
-; GFX900-NEXT: s_mov_b32 s12, s6
-; GFX900-NEXT: s_mov_b32 s13, s7
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s14, s12
+; GFX900-NEXT: s_mov_b32 s15, s13
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_1_5:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__u_6_6_6:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s18
-; GFX90A-NEXT: s_mov_b32 s9, s19
-; GFX90A-NEXT: s_mov_b32 s10, s18
-; GFX90A-NEXT: s_mov_b32 s11, s19
-; GFX90A-NEXT: s_mov_b32 s12, s6
-; GFX90A-NEXT: s_mov_b32 s13, s7
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s14, s12
+; GFX90A-NEXT: s_mov_b32 s15, s13
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_1_5:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__u_6_6_6:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[4:11]
+; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s6
-; GFX942-NEXT: s_mov_b32 s15, s7
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 1, i32 5>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 poison, i32 6, i32 6, i32 6>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__7_7_2_5() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_2_5:
+define void @s_shuffle_v4p0_v4p0__0_6_6_6() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__0_6_6_6:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s14, s6
-; GFX900-NEXT: s_mov_b32 s15, s7
+; GFX900-NEXT: s_mov_b32 s10, s16
+; GFX900-NEXT: s_mov_b32 s11, s17
+; GFX900-NEXT: s_mov_b32 s12, s16
+; GFX900-NEXT: s_mov_b32 s13, s17
+; GFX900-NEXT: s_mov_b32 s14, s16
+; GFX900-NEXT: s_mov_b32 s15, s17
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_2_5:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__0_6_6_6:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s14, s6
-; GFX90A-NEXT: s_mov_b32 s15, s7
+; GFX90A-NEXT: s_mov_b32 s10, s16
+; GFX90A-NEXT: s_mov_b32 s11, s17
+; GFX90A-NEXT: s_mov_b32 s12, s16
+; GFX90A-NEXT: s_mov_b32 s13, s17
+; GFX90A-NEXT: s_mov_b32 s14, s16
+; GFX90A-NEXT: s_mov_b32 s15, s17
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_2_5:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__0_6_6_6:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -22939,311 +23592,271 @@ define void @s_shuffle_v4p0_v4p0__7_7_2_5() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 2, i32 5>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 0, i32 6, i32 6, i32 6>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__7_7_3_5() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_3_5:
+define void @s_shuffle_v4p0_v4p0__1_6_6_6() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__1_6_6_6:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
-; GFX900-NEXT: s_mov_b32 s14, s6
-; GFX900-NEXT: s_mov_b32 s15, s7
+; GFX900-NEXT: s_mov_b32 s8, s6
+; GFX900-NEXT: s_mov_b32 s9, s7
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s14, s12
+; GFX900-NEXT: s_mov_b32 s15, s13
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_3_5:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__1_6_6_6:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
-; GFX90A-NEXT: s_mov_b32 s14, s6
-; GFX90A-NEXT: s_mov_b32 s15, s7
+; GFX90A-NEXT: s_mov_b32 s8, s6
+; GFX90A-NEXT: s_mov_b32 s9, s7
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s14, s12
+; GFX90A-NEXT: s_mov_b32 s15, s13
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_3_5:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__1_6_6_6:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[12:19]
+; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s18
-; GFX942-NEXT: s_mov_b32 s9, s19
-; GFX942-NEXT: s_mov_b32 s10, s18
-; GFX942-NEXT: s_mov_b32 s11, s19
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 3, i32 5>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 1, i32 6, i32 6, i32 6>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__7_7_4_5() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_4_5:
+define void @s_shuffle_v4p0_v4p0__2_6_6_6() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__2_6_6_6:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: s_mov_b32 s14, s6
-; GFX900-NEXT: s_mov_b32 s15, s7
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s16
+; GFX900-NEXT: s_mov_b32 s9, s17
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s14, s12
+; GFX900-NEXT: s_mov_b32 s15, s13
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_4_5:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__2_6_6_6:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: s_mov_b32 s14, s6
-; GFX90A-NEXT: s_mov_b32 s15, s7
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s16
+; GFX90A-NEXT: s_mov_b32 s9, s17
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s14, s12
+; GFX90A-NEXT: s_mov_b32 s15, s13
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_4_5:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__2_6_6_6:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 4, i32 5>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 2, i32 6, i32 6, i32 6>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__7_7_6_5() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_6_5:
+define void @s_shuffle_v4p0_v4p0__3_6_6_6() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__3_6_6_6:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s18
; GFX900-NEXT: s_mov_b32 s9, s19
-; GFX900-NEXT: s_mov_b32 s10, s18
-; GFX900-NEXT: s_mov_b32 s11, s19
-; GFX900-NEXT: s_mov_b32 s12, s16
-; GFX900-NEXT: s_mov_b32 s13, s17
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s14, s12
+; GFX900-NEXT: s_mov_b32 s15, s13
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_6_5:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__3_6_6_6:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s18
; GFX90A-NEXT: s_mov_b32 s9, s19
-; GFX90A-NEXT: s_mov_b32 s10, s18
-; GFX90A-NEXT: s_mov_b32 s11, s19
-; GFX90A-NEXT: s_mov_b32 s12, s16
-; GFX90A-NEXT: s_mov_b32 s13, s17
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s14, s12
+; GFX90A-NEXT: s_mov_b32 s15, s13
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_6_5:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__3_6_6_6:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s4
-; GFX942-NEXT: s_mov_b32 s13, s5
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 6, i32 5>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4p0_v4p0__u_6_6_6() {
-; GFX9-LABEL: s_shuffle_v4p0_v4p0__u_6_6_6:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: s_mov_b32 s14, s12
-; GFX9-NEXT: s_mov_b32 s15, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 poison, i32 6, i32 6, i32 6>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 3, i32 6, i32 6, i32 6>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__0_6_6_6() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__0_6_6_6:
+define void @s_shuffle_v4p0_v4p0__4_6_6_6() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__4_6_6_6:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s10, s16
-; GFX900-NEXT: s_mov_b32 s11, s17
-; GFX900-NEXT: s_mov_b32 s12, s16
-; GFX900-NEXT: s_mov_b32 s13, s17
-; GFX900-NEXT: s_mov_b32 s14, s16
-; GFX900-NEXT: s_mov_b32 s15, s17
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s14, s12
+; GFX900-NEXT: s_mov_b32 s15, s13
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__0_6_6_6:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__4_6_6_6:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s10, s16
-; GFX90A-NEXT: s_mov_b32 s11, s17
-; GFX90A-NEXT: s_mov_b32 s12, s16
-; GFX90A-NEXT: s_mov_b32 s13, s17
-; GFX90A-NEXT: s_mov_b32 s14, s16
-; GFX90A-NEXT: s_mov_b32 s15, s17
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s14, s12
+; GFX90A-NEXT: s_mov_b32 s15, s13
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__0_6_6_6:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__4_6_6_6:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s4
-; GFX942-NEXT: s_mov_b32 s13, s5
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 0, i32 6, i32 6, i32 6>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 4, i32 6, i32 6, i32 6>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__1_6_6_6() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__1_6_6_6:
+define void @s_shuffle_v4p0_v4p0__5_6_6_6() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__5_6_6_6:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s6
-; GFX900-NEXT: s_mov_b32 s9, s7
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
; GFX900-NEXT: s_mov_b32 s10, s12
; GFX900-NEXT: s_mov_b32 s11, s13
; GFX900-NEXT: s_mov_b32 s14, s12
@@ -23253,17 +23866,14 @@ define void @s_shuffle_v4p0_v4p0__1_6_6_6() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__1_6_6_6:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__5_6_6_6:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s6
-; GFX90A-NEXT: s_mov_b32 s9, s7
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
; GFX90A-NEXT: s_mov_b32 s10, s12
; GFX90A-NEXT: s_mov_b32 s11, s13
; GFX90A-NEXT: s_mov_b32 s14, s12
@@ -23273,44 +23883,35 @@ define void @s_shuffle_v4p0_v4p0__1_6_6_6() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__1_6_6_6:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__5_6_6_6:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
-; GFX942-NEXT: s_mov_b32 s14, s12
-; GFX942-NEXT: s_mov_b32 s15, s13
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 1, i32 6, i32 6, i32 6>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 5, i32 6, i32 6, i32 6>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__2_6_6_6() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__2_6_6_6:
+define void @s_shuffle_v4p0_v4p0__6_6_6_6() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__6_6_6_6:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s16
-; GFX900-NEXT: s_mov_b32 s9, s17
+; GFX900-NEXT: s_mov_b32 s8, s12
+; GFX900-NEXT: s_mov_b32 s9, s13
; GFX900-NEXT: s_mov_b32 s10, s12
; GFX900-NEXT: s_mov_b32 s11, s13
; GFX900-NEXT: s_mov_b32 s14, s12
@@ -23320,17 +23921,14 @@ define void @s_shuffle_v4p0_v4p0__2_6_6_6() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__2_6_6_6:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__6_6_6_6:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s16
-; GFX90A-NEXT: s_mov_b32 s9, s17
+; GFX90A-NEXT: s_mov_b32 s8, s12
+; GFX90A-NEXT: s_mov_b32 s9, s13
; GFX90A-NEXT: s_mov_b32 s10, s12
; GFX90A-NEXT: s_mov_b32 s11, s13
; GFX90A-NEXT: s_mov_b32 s14, s12
@@ -23340,44 +23938,35 @@ define void @s_shuffle_v4p0_v4p0__2_6_6_6() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__2_6_6_6:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__6_6_6_6:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
-; GFX942-NEXT: s_mov_b32 s14, s12
-; GFX942-NEXT: s_mov_b32 s15, s13
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 2, i32 6, i32 6, i32 6>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 6, i32 6, i32 6, i32 6>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__3_6_6_6() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__3_6_6_6:
+define void @s_shuffle_v4p0_v4p0__7_6_6_6() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_6_6_6:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s18
-; GFX900-NEXT: s_mov_b32 s9, s19
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
; GFX900-NEXT: s_mov_b32 s10, s12
; GFX900-NEXT: s_mov_b32 s11, s13
; GFX900-NEXT: s_mov_b32 s14, s12
@@ -23387,17 +23976,14 @@ define void @s_shuffle_v4p0_v4p0__3_6_6_6() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__3_6_6_6:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_6_6_6:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s18
-; GFX90A-NEXT: s_mov_b32 s9, s19
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
; GFX90A-NEXT: s_mov_b32 s10, s12
; GFX90A-NEXT: s_mov_b32 s11, s13
; GFX90A-NEXT: s_mov_b32 s14, s12
@@ -23407,119 +23993,19 @@ define void @s_shuffle_v4p0_v4p0__3_6_6_6() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__3_6_6_6:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_6_6_6:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
-; GFX942-NEXT: s_mov_b32 s14, s12
-; GFX942-NEXT: s_mov_b32 s15, s13
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 3, i32 6, i32 6, i32 6>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4p0_v4p0__4_6_6_6() {
-; GFX9-LABEL: s_shuffle_v4p0_v4p0__4_6_6_6:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: s_mov_b32 s14, s12
-; GFX9-NEXT: s_mov_b32 s15, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 4, i32 6, i32 6, i32 6>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4p0_v4p0__5_6_6_6() {
-; GFX9-LABEL: s_shuffle_v4p0_v4p0__5_6_6_6:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s10
-; GFX9-NEXT: s_mov_b32 s9, s11
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: s_mov_b32 s14, s12
-; GFX9-NEXT: s_mov_b32 s15, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 5, i32 6, i32 6, i32 6>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4p0_v4p0__6_6_6_6() {
-; GFX9-LABEL: s_shuffle_v4p0_v4p0__6_6_6_6:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s12
-; GFX9-NEXT: s_mov_b32 s9, s13
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: s_mov_b32 s14, s12
-; GFX9-NEXT: s_mov_b32 s15, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 6, i32 6, i32 6, i32 6>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4p0_v4p0__7_6_6_6() {
-; GFX9-LABEL: s_shuffle_v4p0_v4p0__7_6_6_6:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s14
-; GFX9-NEXT: s_mov_b32 s9, s15
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: s_mov_b32 s14, s12
-; GFX9-NEXT: s_mov_b32 s15, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 6, i32 6, i32 6>
@@ -23528,20 +24014,48 @@ define void @s_shuffle_v4p0_v4p0__7_6_6_6() {
}
define void @s_shuffle_v4p0_v4p0__7_u_6_6() {
-; GFX9-LABEL: s_shuffle_v4p0_v4p0__7_u_6_6:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s14
-; GFX9-NEXT: s_mov_b32 s9, s15
-; GFX9-NEXT: s_mov_b32 s14, s12
-; GFX9-NEXT: s_mov_b32 s15, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_u_6_6:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s14, s12
+; GFX900-NEXT: s_mov_b32 s15, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_u_6_6:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s14, s12
+; GFX90A-NEXT: s_mov_b32 s15, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_u_6_6:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 poison, i32 6, i32 6>
@@ -23599,12 +24113,9 @@ define void @s_shuffle_v4p0_v4p0__7_0_6_6() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s14, s12
-; GFX942-NEXT: s_mov_b32 s15, s13
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -23666,12 +24177,9 @@ define void @s_shuffle_v4p0_v4p0__7_1_6_6() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s12, s4
-; GFX942-NEXT: s_mov_b32 s13, s5
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -23733,12 +24241,9 @@ define void @s_shuffle_v4p0_v4p0__7_2_6_6() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s14, s12
-; GFX942-NEXT: s_mov_b32 s15, s13
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -23800,12 +24305,9 @@ define void @s_shuffle_v4p0_v4p0__7_3_6_6() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s14, s12
-; GFX942-NEXT: s_mov_b32 s15, s13
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -23862,14 +24364,10 @@ define void @s_shuffle_v4p0_v4p0__7_4_6_6() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s4
-; GFX942-NEXT: s_mov_b32 s13, s5
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -23882,20 +24380,48 @@ define void @s_shuffle_v4p0_v4p0__7_4_6_6() {
}
define void @s_shuffle_v4p0_v4p0__7_5_6_6() {
-; GFX9-LABEL: s_shuffle_v4p0_v4p0__7_5_6_6:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s14
-; GFX9-NEXT: s_mov_b32 s9, s15
-; GFX9-NEXT: s_mov_b32 s14, s12
-; GFX9-NEXT: s_mov_b32 s15, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_5_6_6:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s14, s12
+; GFX900-NEXT: s_mov_b32 s15, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_5_6_6:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s14, s12
+; GFX90A-NEXT: s_mov_b32 s15, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_5_6_6:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 5, i32 6, i32 6>
@@ -23904,22 +24430,53 @@ define void @s_shuffle_v4p0_v4p0__7_5_6_6() {
}
define void @s_shuffle_v4p0_v4p0__7_7_6_6() {
-; GFX9-LABEL: s_shuffle_v4p0_v4p0__7_7_6_6:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s14
-; GFX9-NEXT: s_mov_b32 s9, s15
-; GFX9-NEXT: s_mov_b32 s10, s14
-; GFX9-NEXT: s_mov_b32 s11, s15
-; GFX9-NEXT: s_mov_b32 s14, s12
-; GFX9-NEXT: s_mov_b32 s15, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_6_6:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s10, s14
+; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: s_mov_b32 s14, s12
+; GFX900-NEXT: s_mov_b32 s15, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_6_6:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s10, s14
+; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: s_mov_b32 s14, s12
+; GFX90A-NEXT: s_mov_b32 s15, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_6_6:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 6, i32 6>
@@ -23968,12 +24525,9 @@ define void @s_shuffle_v4p0_v4p0__7_7_u_6() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -24039,14 +24593,10 @@ define void @s_shuffle_v4p0_v4p0__7_7_0_6() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s18
-; GFX942-NEXT: s_mov_b32 s9, s19
-; GFX942-NEXT: s_mov_b32 s10, s18
-; GFX942-NEXT: s_mov_b32 s11, s19
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s16
-; GFX942-NEXT: s_mov_b32 s15, s17
+; GFX942-NEXT: s_mov_b64 s[8:9], s[18:19]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[18:19]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[16:17]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -24112,14 +24662,10 @@ define void @s_shuffle_v4p0_v4p0__7_7_1_6() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s18
-; GFX942-NEXT: s_mov_b32 s9, s19
-; GFX942-NEXT: s_mov_b32 s10, s18
-; GFX942-NEXT: s_mov_b32 s11, s19
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s16
-; GFX942-NEXT: s_mov_b32 s15, s17
+; GFX942-NEXT: s_mov_b64 s[8:9], s[18:19]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[18:19]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[16:17]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -24181,12 +24727,9 @@ define void @s_shuffle_v4p0_v4p0__7_7_2_6() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -24252,14 +24795,10 @@ define void @s_shuffle_v4p0_v4p0__7_7_3_6() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s18
-; GFX942-NEXT: s_mov_b32 s9, s19
-; GFX942-NEXT: s_mov_b32 s10, s18
-; GFX942-NEXT: s_mov_b32 s11, s19
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
-; GFX942-NEXT: s_mov_b32 s14, s16
-; GFX942-NEXT: s_mov_b32 s15, s17
+; GFX942-NEXT: s_mov_b64 s[8:9], s[18:19]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[18:19]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[16:17]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -24312,14 +24851,10 @@ define void @s_shuffle_v4p0_v4p0__7_7_4_6() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -24376,14 +24911,10 @@ define void @s_shuffle_v4p0_v4p0__7_7_5_6() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -24396,20 +24927,48 @@ define void @s_shuffle_v4p0_v4p0__7_7_5_6() {
}
define void @s_shuffle_v4p0_v4p0__u_7_7_7() {
-; GFX9-LABEL: s_shuffle_v4p0_v4p0__u_7_7_7:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s14
-; GFX9-NEXT: s_mov_b32 s11, s15
-; GFX9-NEXT: s_mov_b32 s12, s14
-; GFX9-NEXT: s_mov_b32 s13, s15
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__u_7_7_7:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s14
+; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__u_7_7_7:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s14
+; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__u_7_7_7:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 poison, i32 7, i32 7, i32 7>
@@ -24467,12 +25026,9 @@ define void @s_shuffle_v4p0_v4p0__0_7_7_7() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
-; GFX942-NEXT: s_mov_b32 s14, s6
-; GFX942-NEXT: s_mov_b32 s15, s7
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -24534,12 +25090,9 @@ define void @s_shuffle_v4p0_v4p0__1_7_7_7() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -24601,12 +25154,9 @@ define void @s_shuffle_v4p0_v4p0__2_7_7_7() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -24659,117 +25209,232 @@ define void @s_shuffle_v4p0_v4p0__3_7_7_7() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__3_7_7_7:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__3_7_7_7:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x ptr> asm "; def $0", "=s"()
+ %vec1 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 3, i32 7, i32 7, i32 7>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4p0_v4p0__4_7_7_7() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__4_7_7_7:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s14
+; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__4_7_7_7:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s14
+; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__4_7_7_7:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x ptr> asm "; def $0", "=s"()
+ %vec1 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 4, i32 7, i32 7, i32 7>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4p0_v4p0__5_7_7_7() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__5_7_7_7:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s10, s14
+; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__5_7_7_7:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s10, s14
+; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__5_7_7_7:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x ptr> asm "; def $0", "=s"()
+ %vec1 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 5, i32 7, i32 7, i32 7>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4p0_v4p0__6_7_7_7() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__6_7_7_7:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s12
+; GFX900-NEXT: s_mov_b32 s9, s13
+; GFX900-NEXT: s_mov_b32 s10, s14
+; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__6_7_7_7:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s12
+; GFX90A-NEXT: s_mov_b32 s9, s13
+; GFX90A-NEXT: s_mov_b32 s10, s14
+; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__6_7_7_7:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x ptr> asm "; def $0", "=s"()
+ %vec1 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 6, i32 7, i32 7, i32 7>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4p0_v4p0__7_u_7_7() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_u_7_7:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_u_7_7:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_u_7_7:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 3, i32 7, i32 7, i32 7>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4p0_v4p0__4_7_7_7() {
-; GFX9-LABEL: s_shuffle_v4p0_v4p0__4_7_7_7:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s14
-; GFX9-NEXT: s_mov_b32 s11, s15
-; GFX9-NEXT: s_mov_b32 s12, s14
-; GFX9-NEXT: s_mov_b32 s13, s15
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 4, i32 7, i32 7, i32 7>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4p0_v4p0__5_7_7_7() {
-; GFX9-LABEL: s_shuffle_v4p0_v4p0__5_7_7_7:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s10
-; GFX9-NEXT: s_mov_b32 s9, s11
-; GFX9-NEXT: s_mov_b32 s10, s14
-; GFX9-NEXT: s_mov_b32 s11, s15
-; GFX9-NEXT: s_mov_b32 s12, s14
-; GFX9-NEXT: s_mov_b32 s13, s15
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 5, i32 7, i32 7, i32 7>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4p0_v4p0__6_7_7_7() {
-; GFX9-LABEL: s_shuffle_v4p0_v4p0__6_7_7_7:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s12
-; GFX9-NEXT: s_mov_b32 s9, s13
-; GFX9-NEXT: s_mov_b32 s10, s14
-; GFX9-NEXT: s_mov_b32 s11, s15
-; GFX9-NEXT: s_mov_b32 s12, s14
-; GFX9-NEXT: s_mov_b32 s13, s15
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 6, i32 7, i32 7, i32 7>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4p0_v4p0__7_u_7_7() {
-; GFX9-LABEL: s_shuffle_v4p0_v4p0__7_u_7_7:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s14
-; GFX9-NEXT: s_mov_b32 s9, s15
-; GFX9-NEXT: s_mov_b32 s12, s14
-; GFX9-NEXT: s_mov_b32 s13, s15
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 poison, i32 7, i32 7>
@@ -24827,12 +25492,9 @@ define void @s_shuffle_v4p0_v4p0__7_0_7_7() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -24894,12 +25556,9 @@ define void @s_shuffle_v4p0_v4p0__7_1_7_7() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
-; GFX942-NEXT: s_mov_b32 s14, s6
-; GFX942-NEXT: s_mov_b32 s15, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -24961,12 +25620,9 @@ define void @s_shuffle_v4p0_v4p0__7_2_7_7() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -25028,12 +25684,9 @@ define void @s_shuffle_v4p0_v4p0__7_3_7_7() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -25090,14 +25743,10 @@ define void @s_shuffle_v4p0_v4p0__7_4_7_7() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
-; GFX942-NEXT: s_mov_b32 s14, s6
-; GFX942-NEXT: s_mov_b32 s15, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -25110,20 +25759,48 @@ define void @s_shuffle_v4p0_v4p0__7_4_7_7() {
}
define void @s_shuffle_v4p0_v4p0__7_5_7_7() {
-; GFX9-LABEL: s_shuffle_v4p0_v4p0__7_5_7_7:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s14
-; GFX9-NEXT: s_mov_b32 s9, s15
-; GFX9-NEXT: s_mov_b32 s12, s14
-; GFX9-NEXT: s_mov_b32 s13, s15
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_5_7_7:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_5_7_7:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_5_7_7:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 5, i32 7, i32 7>
@@ -25132,22 +25809,53 @@ define void @s_shuffle_v4p0_v4p0__7_5_7_7() {
}
define void @s_shuffle_v4p0_v4p0__7_6_7_7() {
-; GFX9-LABEL: s_shuffle_v4p0_v4p0__7_6_7_7:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s14
-; GFX9-NEXT: s_mov_b32 s9, s15
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: s_mov_b32 s12, s14
-; GFX9-NEXT: s_mov_b32 s13, s15
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_6_7_7:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_6_7_7:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_6_7_7:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 6, i32 7, i32 7>
@@ -25156,20 +25864,48 @@ define void @s_shuffle_v4p0_v4p0__7_6_7_7() {
}
define void @s_shuffle_v4p0_v4p0__7_7_u_7() {
-; GFX9-LABEL: s_shuffle_v4p0_v4p0__7_7_u_7:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s14
-; GFX9-NEXT: s_mov_b32 s9, s15
-; GFX9-NEXT: s_mov_b32 s10, s14
-; GFX9-NEXT: s_mov_b32 s11, s15
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_u_7:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s10, s14
+; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_u_7:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s10, s14
+; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_u_7:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 poison, i32 7>
@@ -25227,12 +25963,9 @@ define void @s_shuffle_v4p0_v4p0__7_7_0_7() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -25294,12 +26027,9 @@ define void @s_shuffle_v4p0_v4p0__7_7_1_7() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -25361,12 +26091,9 @@ define void @s_shuffle_v4p0_v4p0__7_7_2_7() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s14, s6
-; GFX942-NEXT: s_mov_b32 s15, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -25428,12 +26155,9 @@ define void @s_shuffle_v4p0_v4p0__7_7_3_7() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -25486,14 +26210,10 @@ define void @s_shuffle_v4p0_v4p0__7_7_4_7() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s6
-; GFX942-NEXT: s_mov_b32 s15, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -25550,14 +26270,10 @@ define void @s_shuffle_v4p0_v4p0__7_7_5_7() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s6
-; GFX942-NEXT: s_mov_b32 s15, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -25570,20 +26286,48 @@ define void @s_shuffle_v4p0_v4p0__7_7_5_7() {
}
define void @s_shuffle_v4p0_v4p0__7_7_6_7() {
-; GFX9-LABEL: s_shuffle_v4p0_v4p0__7_7_6_7:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s14
-; GFX9-NEXT: s_mov_b32 s9, s15
-; GFX9-NEXT: s_mov_b32 s10, s14
-; GFX9-NEXT: s_mov_b32 s11, s15
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_6_7:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s10, s14
+; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_6_7:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s10, s14
+; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_6_7:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 6, i32 7>
diff --git a/llvm/test/CodeGen/AMDGPU/smfmac_no_agprs.ll b/llvm/test/CodeGen/AMDGPU/smfmac_no_agprs.ll
index 1e042d3b4a31f..69773bf265e8c 100644
--- a/llvm/test/CodeGen/AMDGPU/smfmac_no_agprs.ll
+++ b/llvm/test/CodeGen/AMDGPU/smfmac_no_agprs.ll
@@ -6,25 +6,22 @@ define protected amdgpu_kernel void @test(ptr addrspace(1) %in, ptr addrspace(1)
; GFX942-LABEL: test:
; GFX942: ; %bb.0: ; %entry
; GFX942-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
-; GFX942-NEXT: v_mov_b32_e32 v0, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: v_mov_b32_e32 v3, v0
-; GFX942-NEXT: v_mov_b32_e32 v1, v0
+; GFX942-NEXT: v_mov_b64_e32 v[4:5], 0
+; GFX942-NEXT: v_mov_b64_e32 v[6:7], v[4:5]
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: s_waitcnt lgkmcnt(0)
; GFX942-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x0
-; GFX942-NEXT: v_mov_b64_e32 v[10:11], v[2:3]
-; GFX942-NEXT: v_mov_b64_e32 v[8:9], v[0:1]
; GFX942-NEXT: s_waitcnt lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v12, s4
-; GFX942-NEXT: v_mov_b32_e32 v13, s5
-; GFX942-NEXT: v_mov_b32_e32 v4, s6
-; GFX942-NEXT: v_mov_b32_e32 v5, s7
-; GFX942-NEXT: v_mov_b32_e32 v6, s7
-; GFX942-NEXT: v_mov_b32_e32 v7, s7
+; GFX942-NEXT: v_mov_b32_e32 v8, s4
+; GFX942-NEXT: v_mov_b32_e32 v9, s5
+; GFX942-NEXT: v_mov_b32_e32 v0, s6
+; GFX942-NEXT: v_mov_b32_e32 v1, s7
+; GFX942-NEXT: v_mov_b32_e32 v2, s7
+; GFX942-NEXT: v_mov_b32_e32 v3, s7
; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_smfmac_i32_16x16x64_i8 v[8:11], v[12:13], v[4:7], v13
+; GFX942-NEXT: v_smfmac_i32_16x16x64_i8 v[4:7], v[8:9], v[0:3], v9
; GFX942-NEXT: s_nop 6
-; GFX942-NEXT: global_store_dword v0, v11, s[2:3] offset:12
+; GFX942-NEXT: global_store_dword v10, v7, s[2:3] offset:12
; GFX942-NEXT: s_endpgm
entry:
%arrayidx = getelementptr inbounds i32, ptr addrspace(1) %in, i64 0
diff --git a/llvm/test/CodeGen/AMDGPU/uint_to_fp.f64.ll b/llvm/test/CodeGen/AMDGPU/uint_to_fp.f64.ll
index b31cc36a5f7c6..1d50d96f43f58 100644
--- a/llvm/test/CodeGen/AMDGPU/uint_to_fp.f64.ll
+++ b/llvm/test/CodeGen/AMDGPU/uint_to_fp.f64.ll
@@ -263,24 +263,24 @@ define amdgpu_kernel void @s_uint_to_fp_v4i64_to_v4f64(ptr addrspace(1) %out, <4
; GFX942-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: s_waitcnt lgkmcnt(0)
-; GFX942-NEXT: v_cvt_f64_u32_e32 v[0:1], s11
-; GFX942-NEXT: v_cvt_f64_u32_e32 v[2:3], s10
-; GFX942-NEXT: v_cvt_f64_u32_e32 v[4:5], s9
+; GFX942-NEXT: v_cvt_f64_u32_e32 v[0:1], s15
+; GFX942-NEXT: v_cvt_f64_u32_e32 v[2:3], s14
+; GFX942-NEXT: v_cvt_f64_u32_e32 v[4:5], s13
; GFX942-NEXT: v_ldexp_f64 v[0:1], v[0:1], 32
; GFX942-NEXT: v_add_f64 v[2:3], v[0:1], v[2:3]
; GFX942-NEXT: v_ldexp_f64 v[0:1], v[4:5], 32
-; GFX942-NEXT: v_cvt_f64_u32_e32 v[4:5], s8
+; GFX942-NEXT: v_cvt_f64_u32_e32 v[4:5], s12
; GFX942-NEXT: v_add_f64 v[0:1], v[0:1], v[4:5]
-; GFX942-NEXT: v_cvt_f64_u32_e32 v[4:5], s15
+; GFX942-NEXT: v_cvt_f64_u32_e32 v[4:5], s11
; GFX942-NEXT: v_ldexp_f64 v[4:5], v[4:5], 32
-; GFX942-NEXT: v_cvt_f64_u32_e32 v[6:7], s14
+; GFX942-NEXT: v_cvt_f64_u32_e32 v[6:7], s10
; GFX942-NEXT: v_add_f64 v[6:7], v[4:5], v[6:7]
-; GFX942-NEXT: v_cvt_f64_u32_e32 v[4:5], s13
+; GFX942-NEXT: v_cvt_f64_u32_e32 v[4:5], s9
; GFX942-NEXT: v_ldexp_f64 v[4:5], v[4:5], 32
-; GFX942-NEXT: v_cvt_f64_u32_e32 v[8:9], s12
+; GFX942-NEXT: v_cvt_f64_u32_e32 v[8:9], s8
; GFX942-NEXT: v_add_f64 v[4:5], v[4:5], v[8:9]
-; GFX942-NEXT: global_store_dwordx4 v10, v[4:7], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v10, v[4:7], s[0:1]
; GFX942-NEXT: s_endpgm
%cast = uitofp <4 x i64> %in to <4 x double>
store <4 x double> %cast, ptr addrspace(1) %out, align 16
@@ -412,12 +412,12 @@ define amdgpu_kernel void @s_uint_to_fp_v4i32_to_v4f64(ptr addrspace(1) %out, <4
; GFX942-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x0
; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: s_waitcnt lgkmcnt(0)
-; GFX942-NEXT: v_cvt_f64_u32_e32 v[6:7], s3
-; GFX942-NEXT: v_cvt_f64_u32_e32 v[4:5], s2
-; GFX942-NEXT: v_cvt_f64_u32_e32 v[2:3], s1
-; GFX942-NEXT: v_cvt_f64_u32_e32 v[0:1], s0
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[6:7] offset:16
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[6:7]
+; GFX942-NEXT: v_cvt_f64_u32_e32 v[2:3], s3
+; GFX942-NEXT: v_cvt_f64_u32_e32 v[0:1], s2
+; GFX942-NEXT: v_cvt_f64_u32_e32 v[6:7], s1
+; GFX942-NEXT: v_cvt_f64_u32_e32 v[4:5], s0
+; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[6:7] offset:16
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[6:7]
; GFX942-NEXT: s_endpgm
%cast = uitofp <4 x i32> %in to <4 x double>
store <4 x double> %cast, ptr addrspace(1) %out, align 16
diff --git a/llvm/test/CodeGen/AMDGPU/vni8-across-blocks.ll b/llvm/test/CodeGen/AMDGPU/vni8-across-blocks.ll
index b045c761436de..a81c7702e02d7 100644
--- a/llvm/test/CodeGen/AMDGPU/vni8-across-blocks.ll
+++ b/llvm/test/CodeGen/AMDGPU/vni8-across-blocks.ll
@@ -459,11 +459,10 @@ define amdgpu_kernel void @v8i8_phi_zeroinit(ptr addrspace(1) %src1, ptr addrspa
; GFX942-NEXT: ; %bb.1: ; %bb.1
; GFX942-NEXT: global_load_dwordx2 v[4:5], v1, s[10:11]
; GFX942-NEXT: v_cmp_gt_u32_e32 vcc, 7, v0
-; GFX942-NEXT: s_waitcnt vmcnt(1)
-; GFX942-NEXT: v_mov_b32_e32 v2, 0
; GFX942-NEXT: s_andn2_b64 s[0:1], s[0:1], exec
; GFX942-NEXT: s_and_b64 s[4:5], vcc, exec
-; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: s_waitcnt vmcnt(1)
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], 0
; GFX942-NEXT: s_or_b64 s[0:1], s[0:1], s[4:5]
; GFX942-NEXT: .LBB9_2: ; %Flow
; GFX942-NEXT: s_or_b64 exec, exec, s[2:3]
More information about the llvm-commits
mailing list